summaryrefslogtreecommitdiffstats
path: root/sys/geom
diff options
context:
space:
mode:
Diffstat (limited to 'sys/geom')
-rw-r--r--sys/geom/bde/g_bde.c286
-rw-r--r--sys/geom/bde/g_bde.h211
-rw-r--r--sys/geom/bde/g_bde_crypt.c393
-rw-r--r--sys/geom/bde/g_bde_lock.c482
-rw-r--r--sys/geom/bde/g_bde_work.c763
-rw-r--r--sys/geom/geom.h313
-rw-r--r--sys/geom/geom_aes.c374
-rw-r--r--sys/geom/geom_apple.c260
-rw-r--r--sys/geom/geom_bsd.c739
-rw-r--r--sys/geom/geom_bsd_enc.c194
-rw-r--r--sys/geom/geom_ccd.c855
-rw-r--r--sys/geom/geom_ctl.c495
-rw-r--r--sys/geom/geom_ctl.h82
-rw-r--r--sys/geom/geom_dev.c468
-rw-r--r--sys/geom/geom_disk.c419
-rw-r--r--sys/geom/geom_disk.h97
-rw-r--r--sys/geom/geom_dump.c306
-rw-r--r--sys/geom/geom_event.c324
-rw-r--r--sys/geom/geom_fox.c468
-rw-r--r--sys/geom/geom_gpt.c227
-rw-r--r--sys/geom/geom_int.h88
-rw-r--r--sys/geom/geom_io.c416
-rw-r--r--sys/geom/geom_kern.c241
-rw-r--r--sys/geom/geom_mbr.c451
-rw-r--r--sys/geom/geom_mbr_enc.c72
-rw-r--r--sys/geom/geom_mirror.c237
-rw-r--r--sys/geom/geom_pc98.c319
-rw-r--r--sys/geom/geom_pc98_enc.c78
-rw-r--r--sys/geom/geom_slice.c488
-rw-r--r--sys/geom/geom_slice.h89
-rw-r--r--sys/geom/geom_subr.c809
-rw-r--r--sys/geom/geom_sunlabel.c281
-rw-r--r--sys/geom/geom_sunlabel_enc.c127
-rw-r--r--sys/geom/geom_vol_ffs.c143
-rw-r--r--sys/geom/notes140
35 files changed, 11735 insertions, 0 deletions
diff --git a/sys/geom/bde/g_bde.c b/sys/geom/bde/g_bde.c
new file mode 100644
index 0000000..e3e06ec
--- /dev/null
+++ b/sys/geom/bde/g_bde.c
@@ -0,0 +1,286 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+
+#include <crypto/rijndael/rijndael.h>
+#include <crypto/sha2/sha2.h>
+#include <geom/geom.h>
+#include <geom/bde/g_bde.h>
+#define BDE_CLASS_NAME "BDE"
+
+static void
+g_bde_start(struct bio *bp)
+{
+
+ switch (bp->bio_cmd) {
+ case BIO_DELETE:
+ case BIO_READ:
+ case BIO_WRITE:
+ g_bde_start1(bp);
+ break;
+ case BIO_GETATTR:
+ g_io_deliver(bp, EOPNOTSUPP);
+ break;
+ default:
+ g_io_deliver(bp, EOPNOTSUPP);
+ return;
+ }
+ return;
+}
+
+static void
+g_bde_orphan(struct g_consumer *cp)
+{
+ struct g_geom *gp;
+ struct g_provider *pp;
+ struct g_bde_softc *sc;
+ int error;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_orphan(%p/%s)", cp, cp->provider->name);
+ g_topology_assert();
+ KASSERT(cp->provider->error != 0,
+ ("g_bde_orphan with error == 0"));
+
+ gp = cp->geom;
+ sc = gp->softc;
+ gp->flags |= G_GEOM_WITHER;
+ error = cp->provider->error;
+ LIST_FOREACH(pp, &gp->provider, provider)
+ g_orphan_provider(pp, error);
+ bzero(sc, sizeof(struct g_bde_softc)); /* destroy evidence */
+ return;
+}
+
+static int
+g_bde_access(struct g_provider *pp, int dr, int dw, int de)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+
+ gp = pp->geom;
+ cp = LIST_FIRST(&gp->consumer);
+ if (cp->acr == 0 && cp->acw == 0 && cp->ace == 0) {
+ de++;
+ dr++;
+ }
+ /* ... and let go of it on last close */
+ if ((cp->acr + dr) == 0 && (cp->acw + dw) == 0 && (cp->ace + de) == 1) {
+ de--;
+ dr--;
+ }
+ return (g_access_rel(cp, dr, dw, de));
+}
+
+static void
+g_bde_create_geom(struct gctl_req *req, struct g_class *mp, struct g_provider *pp)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_bde_key *kp;
+ int error, i;
+ u_int sectorsize;
+ off_t mediasize;
+ struct g_bde_softc *sc;
+ void *pass;
+ void *key;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_create_geom(%s, %s)", mp->name, pp->name);
+ g_topology_assert();
+ gp = NULL;
+
+
+ gp = g_new_geomf(mp, "%s.bde", pp->name);
+ gp->start = g_bde_start;
+ gp->orphan = g_bde_orphan;
+ gp->access = g_bde_access;
+ gp->spoiled = g_std_spoiled;
+ cp = g_new_consumer(gp);
+ g_attach(cp, pp);
+ error = g_access_rel(cp, 1, 1, 1);
+ if (error) {
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ g_destroy_geom(gp);
+ gctl_error(req, "could not access consumer");
+ }
+ pass = NULL;
+ key = NULL;
+ do {
+ pass = gctl_get_param(req, "pass", &i);
+ if (pass == NULL || i != SHA512_DIGEST_LENGTH) {
+ gctl_error(req, "No usable key presented");
+ break;
+ }
+ key = gctl_get_param(req, "key", &i);
+ if (key != NULL && i != 16) {
+ gctl_error(req, "Invalid key presented");
+ break;
+ }
+ sectorsize = cp->provider->sectorsize;
+ mediasize = cp->provider->mediasize;
+ sc = g_malloc(sizeof(struct g_bde_softc), M_WAITOK | M_ZERO);
+ gp->softc = sc;
+ sc->geom = gp;
+ sc->consumer = cp;
+
+ error = g_bde_decrypt_lock(sc, pass, key,
+ mediasize, sectorsize, NULL);
+ bzero(sc->sha2, sizeof sc->sha2);
+ if (error)
+ break;
+ kp = &sc->key;
+
+ /* Initialize helper-fields */
+ kp->keys_per_sector = kp->sectorsize / G_BDE_SKEYLEN;
+ kp->zone_cont = kp->keys_per_sector * kp->sectorsize;
+ kp->zone_width = kp->zone_cont + kp->sectorsize;
+ kp->media_width = kp->sectorN - kp->sector0 -
+ G_BDE_MAXKEYS * kp->sectorsize;
+
+ /* Our external parameters */
+ sc->zone_cont = kp->zone_cont;
+ sc->mediasize = g_bde_max_sector(kp);
+ sc->sectorsize = kp->sectorsize;
+
+ TAILQ_INIT(&sc->freelist);
+ TAILQ_INIT(&sc->worklist);
+ mtx_init(&sc->worklist_mutex, "g_bde_worklist", NULL, MTX_DEF);
+ mtx_lock(&Giant);
+ /* XXX: error check */
+ kthread_create(g_bde_worker, gp, &sc->thread, 0, 0,
+ "g_bde %s", gp->name);
+ mtx_unlock(&Giant);
+ pp = g_new_providerf(gp, gp->name);
+#if 0
+ /*
+ * XXX: Disable this for now. Appearantly UFS no longer
+ * XXX: issues BIO_DELETE requests correctly, with the obvious
+ * XXX: outcome that userdata is trashed.
+ */
+ pp->flags |= G_PF_CANDELETE;
+#endif
+ pp->stripesize = kp->zone_cont;
+ pp->stripeoffset = 0;
+ pp->mediasize = sc->mediasize;
+ pp->sectorsize = sc->sectorsize;
+ g_error_provider(pp, 0);
+ break;
+ } while (0);
+ if (pass != NULL)
+ bzero(pass, SHA512_DIGEST_LENGTH);
+ if (key != NULL)
+ bzero(key, 16);
+ if (error == 0)
+ return;
+ g_access_rel(cp, -1, -1, -1);
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ if (gp->softc != NULL)
+ g_free(gp->softc);
+ g_destroy_geom(gp);
+ return;
+}
+
+
+static int
+g_bde_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
+{
+ struct g_consumer *cp;
+ struct g_provider *pp;
+ int error;
+ struct g_bde_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_destroy_geom(%s, %s)", mp->name, gp->name);
+ g_topology_assert();
+ /*
+ * Orderly detachment.
+ */
+ KASSERT(gp != NULL, ("NULL geom"));
+ pp = LIST_FIRST(&gp->provider);
+ KASSERT(pp != NULL, ("NULL provider"));
+ if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
+ return (EBUSY);
+ sc = gp->softc;
+ cp = LIST_FIRST(&gp->consumer);
+ KASSERT(cp != NULL, ("NULL consumer"));
+ sc->dead = 1;
+ wakeup(sc);
+ error = g_access_rel(cp, -1, -1, -1);
+ KASSERT(error == 0, ("error on close"));
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ while (sc->dead != 2 && !LIST_EMPTY(&pp->consumers))
+ tsleep(sc, PRIBIO, "g_bdedie", hz);
+ mtx_destroy(&sc->worklist_mutex);
+ bzero(&sc->key, sizeof sc->key);
+ g_free(sc);
+ g_wither_geom(gp, ENXIO);
+ return (0);
+}
+
+static void
+g_bde_ctlreq(struct gctl_req *req, struct g_class *mp, char const *verb)
+{
+ struct g_geom *gp;
+ struct g_provider *pp;
+
+ if (!strcmp(verb, "create geom")) {
+ pp = gctl_get_provider(req, "provider");
+ if (pp != NULL)
+ g_bde_create_geom(req, mp, pp);
+ } else if (!strcmp(verb, "destroy geom")) {
+ gp = gctl_get_geom(req, mp, "geom");
+ if (gp != NULL)
+ g_bde_destroy_geom(req, mp, gp);
+ } else {
+ gctl_error(req, "unknown verb");
+ }
+}
+
+static struct g_class g_bde_class = {
+ .name = BDE_CLASS_NAME,
+ .destroy_geom = g_bde_destroy_geom,
+ .ctlreq = g_bde_ctlreq,
+};
+
+DECLARE_GEOM_CLASS(g_bde_class, g_bde);
diff --git a/sys/geom/bde/g_bde.h b/sys/geom/bde/g_bde.h
new file mode 100644
index 0000000..b162e96
--- /dev/null
+++ b/sys/geom/bde/g_bde.h
@@ -0,0 +1,211 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_GEOM_BDE_G_BDE_H_
+#define _SYS_GEOM_BDE_G_BDE_H_ 1
+
+/*
+ * These are quite, but not entirely unlike constants.
+ *
+ * They are not commented in details here, to prevent unadvisable
+ * experimentation. Please consult the code where they are used before you
+ * even think about modifying these.
+ */
+
+#define G_BDE_MKEYLEN (2048/8)
+#define G_BDE_SKEYBITS 128
+#define G_BDE_SKEYLEN (G_BDE_SKEYBITS/8)
+#define G_BDE_KKEYBITS 128
+#define G_BDE_KKEYLEN (G_BDE_KKEYBITS/8)
+#define G_BDE_MAXKEYS 4
+#define G_BDE_LOCKSIZE 384
+#define NLOCK_FIELDS 13
+
+
+/* This just needs to be "large enough" */
+#define G_BDE_KEYBYTES 304
+
+struct g_bde_work;
+struct g_bde_softc;
+
+struct g_bde_sector {
+ struct g_bde_work *owner;
+ struct g_bde_softc *softc;
+ off_t offset;
+ u_int size;
+ u_int ref;
+ void *data;
+ TAILQ_ENTRY(g_bde_sector) list;
+ u_char valid;
+ u_char malloc;
+ enum {JUNK, IO, VALID} state;
+ int error;
+ time_t used;
+};
+
+struct g_bde_work {
+ struct mtx mutex;
+ off_t offset;
+ off_t length;
+ void *data;
+ struct bio *bp;
+ struct g_bde_softc *softc;
+ off_t so;
+ off_t kso;
+ u_int ko;
+ struct g_bde_sector *sp;
+ struct g_bde_sector *ksp;
+ TAILQ_ENTRY(g_bde_work) list;
+ enum {SETUP, WAIT, FINISH} state;
+ int error;
+};
+
+/*
+ * The decrypted contents of the lock sectors. Notice that this is not
+ * the same as the on-disk layout. The on-disk layout is dynamic and
+ * dependent on the pass-phrase.
+ */
+struct g_bde_key {
+ uint64_t sector0;
+ /* Physical byte offset of 1st byte used */
+ uint64_t sectorN;
+ /* Physical byte offset of 1st byte not used */
+ uint64_t keyoffset;
+ /* Number of bytes the disk image is skewed. */
+ uint64_t lsector[G_BDE_MAXKEYS];
+ /* Physical byte offsets of lock sectors */
+ uint32_t sectorsize;
+ /* Our "logical" sector size */
+ uint32_t flags;
+ /* 1 = lockfile in sector 0 */
+ uint8_t salt[16];
+ /* Used to frustate the kkey generation */
+ uint8_t spare[32];
+ /* For future use, random contents */
+ uint8_t mkey[G_BDE_MKEYLEN];
+ /* Our masterkey. */
+
+ /* Non-stored help-fields */
+ uint64_t zone_width; /* On-disk width of zone */
+ uint64_t zone_cont; /* Payload width of zone */
+ uint64_t media_width; /* Non-magic width of zone */
+ u_int keys_per_sector;
+};
+
+struct g_bde_softc {
+ off_t mediasize;
+ u_int sectorsize;
+ uint64_t zone_cont;
+ struct g_geom *geom;
+ struct g_consumer *consumer;
+ TAILQ_HEAD(, g_bde_sector) freelist;
+ TAILQ_HEAD(, g_bde_work) worklist;
+ struct mtx worklist_mutex;
+ struct proc *thread;
+ struct g_bde_key key;
+ int dead;
+ u_int nwork;
+ u_int nsect;
+ u_int ncache;
+ u_char sha2[SHA512_DIGEST_LENGTH];
+};
+
+/* g_bde_crypt.c */
+void g_bde_crypt_delete(struct g_bde_work *wp);
+void g_bde_crypt_read(struct g_bde_work *wp);
+void g_bde_crypt_write(struct g_bde_work *wp);
+
+/* g_bde_key.c */
+void g_bde_zap_key(struct g_bde_softc *sc);
+int g_bde_get_key(struct g_bde_softc *sc, void *ptr, int len);
+int g_bde_init_keybytes(struct g_bde_softc *sc, char *passp, int len);
+
+/* g_bde_lock .c */
+int g_bde_encode_lock(struct g_bde_softc *sc, struct g_bde_key *gl, u_char *ptr);
+int g_bde_decode_lock(struct g_bde_softc *sc, struct g_bde_key *gl, u_char *ptr);
+int g_bde_keyloc_encrypt(struct g_bde_softc *sc, uint64_t *input, void *output);
+int g_bde_keyloc_decrypt(struct g_bde_softc *sc, void *input, uint64_t *output);
+int g_bde_decrypt_lock(struct g_bde_softc *sc, u_char *keymat, u_char *meta, off_t mediasize, u_int sectorsize, u_int *nkey);
+void g_bde_hash_pass(struct g_bde_softc *sc, const void *input, u_int len);
+
+/* g_bde_math .c */
+uint64_t g_bde_max_sector(struct g_bde_key *lp);
+void g_bde_map_sector(struct g_bde_work *wp);
+
+/* g_bde_work.c */
+void g_bde_start1(struct bio *bp);
+void g_bde_worker(void *arg);
+
+/*
+ * These four functions wrap the raw Rijndael functions and make sure we
+ * explode if something fails which shouldn't.
+ */
+
+static __inline void
+AES_init(cipherInstance *ci)
+{
+ int error;
+
+ error = rijndael_cipherInit(ci, MODE_CBC, NULL);
+ KASSERT(error > 0, ("rijndael_cipherInit %d", error));
+}
+
+static __inline void
+AES_makekey(keyInstance *ki, int dir, u_int len, void *key)
+{
+ int error;
+
+ error = rijndael_makeKey(ki, dir, len, key);
+ KASSERT(error > 0, ("rijndael_makeKey %d", error));
+}
+
+static __inline void
+AES_encrypt(cipherInstance *ci, keyInstance *ki, void *in, void *out, u_int len)
+{
+ int error;
+
+ error = rijndael_blockEncrypt(ci, ki, in, len * 8, out);
+ KASSERT(error > 0, ("rijndael_blockEncrypt %d", error));
+}
+
+static __inline void
+AES_decrypt(cipherInstance *ci, keyInstance *ki, void *in, void *out, u_int len)
+{
+ int error;
+
+ error = rijndael_blockDecrypt(ci, ki, in, len * 8, out);
+ KASSERT(error > 0, ("rijndael_blockDecrypt %d", error));
+}
+
+#endif /* _SYS_GEOM_BDE_G_BDE_H_ */
diff --git a/sys/geom/bde/g_bde_crypt.c b/sys/geom/bde/g_bde_crypt.c
new file mode 100644
index 0000000..97fe8d2
--- /dev/null
+++ b/sys/geom/bde/g_bde_crypt.c
@@ -0,0 +1,393 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ * This source file contains the functions responsible for the crypto, keying
+ * and mapping operations on the I/O requests.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/malloc.h>
+#include <sys/libkern.h>
+#include <sys/endian.h>
+#include <sys/md5.h>
+
+#include <crypto/rijndael/rijndael.h>
+#include <crypto/sha2/sha2.h>
+
+#include <geom/geom.h>
+#include <geom/bde/g_bde.h>
+
+/*
+ * XXX: Debugging DO NOT ENABLE
+ */
+#undef MD5_KEY
+
+/*
+ * Derive kkey from mkey + sector offset.
+ *
+ * Security objective: Derive a potentially very large number of distinct skeys
+ * from the comparatively small key material in our mkey, in such a way that
+ * if one, more or even many of the kkeys are compromised, this does not
+ * significantly help an attack on other kkeys and in particular does not
+ * weaken or compromised the mkey.
+ *
+ * First we MD5 hash the sectornumber with the salt from the lock sector.
+ * The salt prevents the precalculation and statistical analysis of the MD5
+ * output which would be possible if we only gave it the sectornumber.
+ *
+ * The MD5 hash is used to pick out 16 bytes from the masterkey, which
+ * are then hashed with MD5 together with the sector number.
+ *
+ * The resulting MD5 hash is the kkey.
+ */
+
+static void
+g_bde_kkey(struct g_bde_softc *sc, keyInstance *ki, int dir, off_t sector)
+{
+ u_int t;
+ MD5_CTX ct;
+ u_char buf[16];
+ u_char buf2[8];
+
+ /* We have to be architecture neutral */
+ le64enc(buf2, sector);
+
+ MD5Init(&ct);
+ MD5Update(&ct, sc->key.salt, 8);
+ MD5Update(&ct, buf2, sizeof buf2);
+ MD5Update(&ct, sc->key.salt + 8, 8);
+ MD5Final(buf, &ct);
+
+ MD5Init(&ct);
+ for (t = 0; t < 16; t++) {
+ MD5Update(&ct, &sc->key.mkey[buf[t]], 1);
+ if (t == 8)
+ MD5Update(&ct, buf2, sizeof buf2);
+ }
+ bzero(buf2, sizeof buf2);
+ MD5Final(buf, &ct);
+ bzero(&ct, sizeof ct);
+ AES_makekey(ki, dir, G_BDE_KKEYBITS, buf);
+ bzero(buf, sizeof buf);
+}
+
+/*
+ * Encryption work for read operation.
+ *
+ * Security objective: Find the kkey, find the skey, decrypt the sector data.
+ */
+
+void
+g_bde_crypt_read(struct g_bde_work *wp)
+{
+ struct g_bde_softc *sc;
+ u_char *d;
+ u_int n;
+ off_t o;
+ u_char skey[G_BDE_SKEYLEN];
+ keyInstance ki;
+ cipherInstance ci;
+
+
+ AES_init(&ci);
+ sc = wp->softc;
+ o = 0;
+ for (n = 0; o < wp->length; n++, o += sc->sectorsize) {
+ d = (u_char *)wp->ksp->data + wp->ko + n * G_BDE_SKEYLEN;
+ g_bde_kkey(sc, &ki, DIR_DECRYPT, wp->offset + o);
+ AES_decrypt(&ci, &ki, d, skey, sizeof skey);
+ d = (u_char *)wp->data + o;
+#ifdef MD5_KEY
+ {
+ MD5_CTX ct;
+ u_char rkey[16];
+ int i;
+
+ MD5Init(&ct);
+ MD5Update(&ct, d, sc->sectorsize);
+ MD5Final(rkey, &ct);
+ if (bcmp(rkey, skey, 16) != 0) {
+#if 0
+ printf("MD5_KEY failed at %jd (t=%d)\n",
+ (intmax_t)(wp->offset + o), time_second);
+#endif
+ for (i = 0; i < sc->sectorsize; i++)
+ d[i] = 'A' + i % 26;
+ sprintf(d, "MD5_KEY failed at %jd (t=%d)",
+ (intmax_t)(wp->offset + o), time_second);
+ }
+ }
+#else
+ AES_makekey(&ki, DIR_DECRYPT, G_BDE_SKEYBITS, skey);
+ AES_decrypt(&ci, &ki, d, d, sc->sectorsize);
+#endif
+ }
+ bzero(skey, sizeof skey);
+ bzero(&ci, sizeof ci);
+ bzero(&ki, sizeof ci);
+}
+
+/*
+ * Encryption work for write operation.
+ *
+ * Security objective: Create random skey, encrypt sector data,
+ * encrypt skey with the kkey.
+ */
+
+void
+g_bde_crypt_write(struct g_bde_work *wp)
+{
+ u_char *s, *d;
+ struct g_bde_softc *sc;
+ u_int n;
+ off_t o;
+ u_char skey[G_BDE_SKEYLEN];
+ keyInstance ki;
+ cipherInstance ci;
+
+ sc = wp->softc;
+ AES_init(&ci);
+ o = 0;
+ for (n = 0; o < wp->length; n++, o += sc->sectorsize) {
+
+ s = (u_char *)wp->data + o;
+ d = (u_char *)wp->sp->data + o;
+#ifdef MD5_KEY
+ {
+ MD5_CTX ct;
+
+ MD5Init(&ct);
+ MD5Update(&ct, s, sc->sectorsize);
+ MD5Final(skey, &ct);
+ bcopy(s, d, sc->sectorsize);
+ }
+#else
+ arc4rand(skey, sizeof skey, 0);
+ AES_makekey(&ki, DIR_ENCRYPT, G_BDE_SKEYBITS, skey);
+ AES_encrypt(&ci, &ki, s, d, sc->sectorsize);
+#endif
+
+ d = (u_char *)wp->ksp->data + wp->ko + n * G_BDE_SKEYLEN;
+ g_bde_kkey(sc, &ki, DIR_ENCRYPT, wp->offset + o);
+ AES_encrypt(&ci, &ki, skey, d, sizeof skey);
+ bzero(skey, sizeof skey);
+ }
+ bzero(skey, sizeof skey);
+ bzero(&ci, sizeof ci);
+ bzero(&ki, sizeof ci);
+}
+
+/*
+ * Encryption work for delete operation.
+ *
+ * Security objective: Write random data to the sectors.
+ *
+ * XXX: At a hit in performance we would trash the encrypted skey as well.
+ * XXX: This would add frustration to the cleaning lady attack by making
+ * XXX: deletes look like writes.
+ */
+
+void
+g_bde_crypt_delete(struct g_bde_work *wp)
+{
+ struct g_bde_softc *sc;
+ u_char *d;
+ off_t o;
+ u_char skey[G_BDE_SKEYLEN];
+ keyInstance ki;
+ cipherInstance ci;
+
+ sc = wp->softc;
+ d = wp->sp->data;
+ AES_init(&ci);
+ /*
+ * Do not unroll this loop!
+ * Our zone may be significantly wider than the amount of random
+ * bytes arc4rand likes to give in one reseeding, whereas our
+ * sectorsize is far more likely to be in the same range.
+ */
+ for (o = 0; o < wp->length; o += sc->sectorsize) {
+ arc4rand(d, sc->sectorsize, 0);
+ arc4rand(skey, sizeof skey, 0);
+ AES_makekey(&ki, DIR_ENCRYPT, G_BDE_SKEYBITS, skey);
+ AES_encrypt(&ci, &ki, d, d, sc->sectorsize);
+ d += sc->sectorsize;
+ }
+ /*
+ * Having written a long random sequence to disk here, we want to
+ * force a reseed, to avoid weakening the next time we use random
+ * data for something important.
+ */
+ arc4rand(&o, sizeof o, 1);
+}
+
+/*
+ * Calculate the total payload size of the encrypted device.
+ *
+ * Security objectives: none.
+ *
+ * This function needs to agree with g_bde_map_sector() about things.
+ */
+
+uint64_t
+g_bde_max_sector(struct g_bde_key *kp)
+{
+ uint64_t maxsect;
+
+ maxsect = kp->media_width;
+ maxsect /= kp->zone_width;
+ maxsect *= kp->zone_cont;
+ return (maxsect);
+}
+
+/*
+ * Convert an unencrypted side offset to offsets on the encrypted side.
+ *
+ * Security objective: Make it harder to identify what sectors contain what
+ * on a "cold" disk image.
+ *
+ * We do this by adding the "keyoffset" from the lock to the physical sector
+ * number modulus the available number of sectors. Since all physical sectors
+ * presumably look the same cold, this will do.
+ *
+ * As part of the mapping we have to skip the lock sectors which we know
+ * the physical address off. We also truncate the work packet, respecting
+ * zone boundaries and lock sectors, so that we end up with a sequence of
+ * sectors which are physically contiguous.
+ *
+ * Shuffling things further is an option, but the incremental frustration is
+ * not currently deemed worth the run-time performance hit resulting from the
+ * increased number of disk arm movements it would incur.
+ *
+ * This function offers nothing but a trivial diversion for an attacker able
+ * to do "the cleaning lady attack" in its current static mapping form.
+ */
+
+void
+g_bde_map_sector(struct g_bde_work *wp)
+{
+
+ u_int zone, zoff, u, len;
+ uint64_t ko;
+ struct g_bde_softc *sc;
+ struct g_bde_key *kp;
+
+ sc = wp->softc;
+ kp = &sc->key;
+
+ /* find which zone and the offset in it */
+ zone = wp->offset / kp->zone_cont;
+ zoff = wp->offset % kp->zone_cont;
+
+ /* Calculate the offset of the key in the key sector */
+ wp->ko = (zoff / kp->sectorsize) * G_BDE_SKEYLEN;
+
+ /* restrict length to that zone */
+ len = kp->zone_cont - zoff;
+
+ /* ... and in general */
+ if (len > DFLTPHYS)
+ len = DFLTPHYS;
+
+ if (len < wp->length)
+ wp->length = len;
+
+ /* Find physical sector address */
+ wp->so = zone * kp->zone_width + zoff;
+ wp->so += kp->keyoffset;
+ wp->so %= kp->media_width;
+ if (wp->so + wp->length > kp->media_width)
+ wp->length = kp->media_width - wp->so;
+ wp->so += kp->sector0;
+
+ /* The key sector is the last in this zone. */
+ wp->kso = zone * kp->zone_width + kp->zone_cont;
+ wp->kso += kp->keyoffset;
+ wp->kso %= kp->media_width;
+ wp->kso += kp->sector0;
+
+ /* Compensate for lock sectors */
+ for (u = 0; u < G_BDE_MAXKEYS; u++) {
+ /* Find the start of this lock sector */
+ ko = kp->lsector[u] & ~(kp->sectorsize - 1);
+
+ if (wp->kso >= ko)
+ wp->kso += kp->sectorsize;
+
+ if (wp->so >= ko) {
+ /* lock sector before work packet */
+ wp->so += kp->sectorsize;
+ } else if ((wp->so + wp->length) > ko) {
+ /* lock sector in work packet, truncate */
+ wp->length = ko - wp->so;
+ }
+ }
+
+#if 0
+ printf("off %jd len %jd so %jd ko %jd kso %u\n",
+ (intmax_t)wp->offset,
+ (intmax_t)wp->length,
+ (intmax_t)wp->so,
+ (intmax_t)wp->kso,
+ wp->ko);
+#endif
+ KASSERT(wp->so + wp->length <= kp->sectorN,
+ ("wp->so (%jd) + wp->length (%jd) > EOM (%jd), offset = %jd",
+ (intmax_t)wp->so,
+ (intmax_t)wp->length,
+ (intmax_t)kp->sectorN,
+ (intmax_t)wp->offset));
+
+ KASSERT(wp->kso + kp->sectorsize <= kp->sectorN,
+ ("wp->kso (%jd) + kp->sectorsize > EOM (%jd), offset = %jd",
+ (intmax_t)wp->kso,
+ (intmax_t)kp->sectorN,
+ (intmax_t)wp->offset));
+
+ KASSERT(wp->so >= kp->sector0,
+ ("wp->so (%jd) < BOM (%jd), offset = %jd",
+ (intmax_t)wp->so,
+ (intmax_t)kp->sector0,
+ (intmax_t)wp->offset));
+
+ KASSERT(wp->kso >= kp->sector0,
+ ("wp->kso (%jd) <BOM (%jd), offset = %jd",
+ (intmax_t)wp->kso,
+ (intmax_t)kp->sector0,
+ (intmax_t)wp->offset));
+}
diff --git a/sys/geom/bde/g_bde_lock.c b/sys/geom/bde/g_bde_lock.c
new file mode 100644
index 0000000..b06f279
--- /dev/null
+++ b/sys/geom/bde/g_bde_lock.c
@@ -0,0 +1,482 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ * This souce file contains routines which operates on the lock sectors, both
+ * for the kernel and the userland program gbde(1).
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/endian.h>
+#include <sys/md5.h>
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#else
+#include <err.h>
+#define CTASSERT(foo)
+#define KASSERT(foo, bar) do { if(!(foo)) { warn bar ; exit (1); } } while (0)
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#define g_free(foo) free(foo)
+#endif
+
+#include <crypto/rijndael/rijndael.h>
+#include <crypto/sha2/sha2.h>
+
+#include <geom/geom.h>
+#include <geom/bde/g_bde.h>
+
+/*
+ * Hash the raw pass-phrase.
+ *
+ * Security objectives: produce from the pass-phrase a fixed length
+ * bytesequence with PRN like properties in a reproducible way retaining
+ * as much entropy from the pass-phrase as possible.
+ *
+ * SHA2-512 makes this easy.
+ */
+
+void
+g_bde_hash_pass(struct g_bde_softc *sc, const void *input, u_int len)
+{
+ SHA512_CTX cx;
+
+ SHA512_Init(&cx);
+ SHA512_Update(&cx, input, len);
+ SHA512_Final(sc->sha2, &cx);
+}
+
+/*
+ * Encode/Decode the lock structure in byte-sequence format.
+ *
+ * Security objectives: Store in pass-phrase dependent variant format.
+ *
+ * C-structure packing and byte-endianess depends on architecture, compiler
+ * and compiler options. Writing raw structures to disk is therefore a bad
+ * idea in these enlightend days.
+ *
+ * We spend a fraction of the key-material on shuffling the fields around
+ * so they will be stored in an unpredictable sequence.
+ *
+ * For each byte of the key-material we derive two field indexes, and swap
+ * the position of those two fields.
+ *
+ * I have not worked out the statistical properties of this shuffle, but
+ * given that the key-material has PRN properties, the primary objective
+ * of making it hard to figure out which bits are where in the lock sector
+ * is sufficiently fulfilled.
+ *
+ * We include (and shuffle) an extra hash field in the stored version for
+ * identification and versioning purposes. This field contains the MD5 hash
+ * of a version identifier (currently "0000") followed by the stored lock
+ * sector byte-sequence substituting zero bytes for the hash field.
+ *
+ * The stored keysequence is protected by AES/256/CBC elsewhere in the code
+ * so the fact that the generated byte sequence has a much higher than
+ * average density of zero bits (from the numeric fields) is not currently
+ * a concern.
+ *
+ * Should this later become a concern, a simple software update and
+ * pass-phrase change can remedy the situation. One possible solution
+ * could be to XOR the numeric fields with a key-material derived PRN.
+ *
+ * The chosen shuffle algorithm only works as long as we have no more than 16
+ * fields in the stored part of the lock structure (hence the CTASSERT below).
+ */
+
+CTASSERT(NLOCK_FIELDS <= 16);
+
+static void
+g_bde_shuffle_lock(struct g_bde_softc *sc, int *buf)
+{
+ int j, k, l;
+ u_int u;
+
+ /* Assign the fields sequential positions */
+ for(u = 0; u < NLOCK_FIELDS; u++)
+ buf[u] = u;
+
+ /* Then mix it all up */
+ for(u = 48; u < sizeof(sc->sha2); u++) {
+ j = sc->sha2[u] % NLOCK_FIELDS;
+ k = (sc->sha2[u] / NLOCK_FIELDS) % NLOCK_FIELDS;
+ l = buf[j];
+ buf[j] = buf[k];
+ buf[k] = l;
+ }
+}
+
+int
+g_bde_encode_lock(struct g_bde_softc *sc, struct g_bde_key *gl, u_char *ptr)
+{
+ int shuffle[NLOCK_FIELDS];
+ u_char *hash, *p;
+ int i;
+ MD5_CTX c;
+
+ p = ptr;
+ hash = NULL;
+ g_bde_shuffle_lock(sc, shuffle);
+ for (i = 0; i < NLOCK_FIELDS; i++) {
+ switch(shuffle[i]) {
+ case 0:
+ le64enc(p, gl->sector0);
+ p += 8;
+ break;
+ case 1:
+ le64enc(p, gl->sectorN);
+ p += 8;
+ break;
+ case 2:
+ le64enc(p, gl->keyoffset);
+ p += 8;
+ break;
+ case 3:
+ le32enc(p, gl->sectorsize);
+ p += 4;
+ break;
+ case 4:
+ le32enc(p, gl->flags);
+ p += 4;
+ break;
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ le64enc(p, gl->lsector[shuffle[i] - 5]);
+ p += 8;
+ break;
+ case 9:
+ bcopy(gl->spare, p, sizeof gl->spare);
+ p += sizeof gl->spare;
+ break;
+ case 10:
+ bcopy(gl->salt, p, sizeof gl->salt);
+ p += sizeof gl->salt;
+ break;
+ case 11:
+ bcopy(gl->mkey, p, sizeof gl->mkey);
+ p += sizeof gl->mkey;
+ break;
+ case 12:
+ bzero(p, 16);
+ hash = p;
+ p += 16;
+ break;
+ }
+ }
+ if(ptr + G_BDE_LOCKSIZE != p)
+ return(-1);
+ if (hash == NULL)
+ return(-1);
+ MD5Init(&c);
+ MD5Update(&c, "0000", 4); /* Versioning */
+ MD5Update(&c, ptr, G_BDE_LOCKSIZE);
+ MD5Final(hash, &c);
+ return(0);
+}
+
+int
+g_bde_decode_lock(struct g_bde_softc *sc, struct g_bde_key *gl, u_char *ptr)
+{
+ int shuffle[NLOCK_FIELDS];
+ u_char *p;
+ u_char hash[16], hash2[16];
+ MD5_CTX c;
+ int i;
+
+ p = ptr;
+ g_bde_shuffle_lock(sc, shuffle);
+ for (i = 0; i < NLOCK_FIELDS; i++) {
+ switch(shuffle[i]) {
+ case 0:
+ gl->sector0 = le64dec(p);
+ p += 8;
+ break;
+ case 1:
+ gl->sectorN = le64dec(p);
+ p += 8;
+ break;
+ case 2:
+ gl->keyoffset = le64dec(p);
+ p += 8;
+ break;
+ case 3:
+ gl->sectorsize = le32dec(p);
+ p += 4;
+ break;
+ case 4:
+ gl->flags = le32dec(p);
+ p += 4;
+ break;
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ gl->lsector[shuffle[i] - 5] = le64dec(p);
+ p += 8;
+ break;
+ case 9:
+ bcopy(p, gl->spare, sizeof gl->spare);
+ p += sizeof gl->spare;
+ break;
+ case 10:
+ bcopy(p, gl->salt, sizeof gl->salt);
+ p += sizeof gl->salt;
+ break;
+ case 11:
+ bcopy(p, gl->mkey, sizeof gl->mkey);
+ p += sizeof gl->mkey;
+ break;
+ case 12:
+ bcopy(p, hash2, sizeof hash2);
+ bzero(p, sizeof hash2);
+ p += sizeof hash2;
+ break;
+ }
+ }
+ if(ptr + G_BDE_LOCKSIZE != p)
+ return(-1);
+ MD5Init(&c);
+ MD5Update(&c, "0000", 4); /* Versioning */
+ MD5Update(&c, ptr, G_BDE_LOCKSIZE);
+ MD5Final(hash, &c);
+ if (bcmp(hash, hash2, sizeof hash2))
+ return (1);
+ return (0);
+}
+
+/*
+ * Encode/Decode the locksector address ("metadata") with key-material.
+ *
+ * Security objectives: Encode/Decode the metadata encrypted by key-material.
+ *
+ * A simple AES/128/CBC will do. We take care to always store the metadata
+ * in the same endianess to make it MI.
+ *
+ * In the typical case the metadata is stored in encrypted format in sector
+ * zero on the media, but at the users discretion or if the piece of the
+ * device used (sector0...sectorN) does not contain sector zero, it can
+ * be stored in a filesystem or on a PostIt.
+ *
+ * The inability to easily locate the lock sectors makes an attack on a
+ * cold disk much less attractive, without unduly inconveniencing the
+ * legitimate user who can feasibly do a brute-force scan if the metadata
+ * was lost.
+ */
+
+int
+g_bde_keyloc_encrypt(struct g_bde_softc *sc, uint64_t *input, void *output)
+{
+ u_char buf[16];
+ keyInstance ki;
+ cipherInstance ci;
+
+ le64enc(buf, input[0]);
+ le64enc(buf + 8, input[1]);
+ AES_init(&ci);
+ AES_makekey(&ki, DIR_ENCRYPT, G_BDE_KKEYBITS, sc->sha2 + 0);
+ AES_encrypt(&ci, &ki, buf, output, sizeof buf);
+ bzero(buf, sizeof buf);
+ bzero(&ci, sizeof ci);
+ bzero(&ki, sizeof ki);
+ return (0);
+}
+
+int
+g_bde_keyloc_decrypt(struct g_bde_softc *sc, void *input, uint64_t *output)
+{
+ keyInstance ki;
+ cipherInstance ci;
+ u_char buf[16];
+
+ AES_init(&ci);
+ AES_makekey(&ki, DIR_DECRYPT, G_BDE_KKEYBITS, sc->sha2 + 0);
+ AES_decrypt(&ci, &ki, input, buf, sizeof buf);
+ output[0] = le64dec(buf);
+ output[1] = le64dec(buf + 8);
+ bzero(buf, sizeof buf);
+ bzero(&ci, sizeof ci);
+ bzero(&ki, sizeof ki);
+ return (0);
+}
+
+/*
+ * Find and Encode/Decode lock sectors.
+ *
+ * Security objective: given the pass-phrase, find, decrypt, decode and
+ * validate the lock sector contents.
+ *
+ * For ondisk metadata we cannot know beforehand which of the lock sectors
+ * a given pass-phrase opens so we must try each of the metadata copies in
+ * sector zero in turn. If metadata was passed as an argument, we don't
+ * have this problem.
+ *
+ */
+
+static int
+g_bde_decrypt_lockx(struct g_bde_softc *sc, u_char *meta, off_t mediasize, u_int sectorsize, u_int *nkey)
+{
+ u_char *buf, *q;
+ struct g_bde_key *gl;
+ uint64_t off[2];
+ int error, m, i;
+ keyInstance ki;
+ cipherInstance ci;
+
+ gl = &sc->key;
+
+ /* Try to decrypt the metadata */
+ error = g_bde_keyloc_decrypt(sc, meta, off);
+ if (error)
+ return(error);
+
+ /* loose the random part */
+ off[1] = 0;
+
+ /* If it points ito thin blue air, forget it */
+ if (off[0] + G_BDE_LOCKSIZE > (uint64_t)mediasize) {
+ off[0] = 0;
+ return (EINVAL);
+ }
+
+ /* The lock data may span two physical sectors. */
+
+ m = 1;
+ if (off[0] % sectorsize > sectorsize - G_BDE_LOCKSIZE)
+ m++;
+
+ /* Read the suspected sector(s) */
+ buf = g_read_data(sc->consumer,
+ off[0] - (off[0] % sectorsize),
+ m * sectorsize, &error);
+ if (buf == NULL) {
+ off[0] = 0;
+ return(error);
+ }
+
+ /* Find the byte-offset of the stored byte sequence */
+ q = buf + off[0] % sectorsize;
+
+ /* If it is all zero, somebody nuked our lock sector */
+ for (i = 0; i < G_BDE_LOCKSIZE; i++)
+ off[1] += q[i];
+ if (off[1] == 0) {
+ off[0] = 0;
+ g_free(buf);
+ return (ESRCH);
+ }
+
+ /* Decrypt the byte-sequence in place */
+ AES_init(&ci);
+ AES_makekey(&ki, DIR_DECRYPT, 256, sc->sha2 + 16);
+ AES_decrypt(&ci, &ki, q, q, G_BDE_LOCKSIZE);
+
+ /* Decode the byte-sequence */
+ i = g_bde_decode_lock(sc, gl, q);
+ q = NULL;
+ if (i < 0) {
+ off[0] = 0;
+ return (EDOOFUS); /* Programming error */
+ } else if (i > 0) {
+ off[0] = 0;
+ return (ENOTDIR); /* Hash didn't match */
+ }
+
+ bzero(buf, sectorsize * m);
+ g_free(buf);
+
+ /* If the masterkey is all zeros, user destroyed it */
+ off[1] = 0;
+ for (i = 0; i < (int)sizeof(gl->mkey); i++)
+ off[1] += gl->mkey[i];
+ if (off[1] == 0)
+ return (ENOENT);
+
+ /* If we have an unsorted lock-sequence, refuse */
+ if (gl->lsector[0] > gl->lsector[1] ||
+ gl->lsector[1] > gl->lsector[2] ||
+ gl->lsector[2] > gl->lsector[3])
+ return (EINVAL);
+
+ /* Finally, find out which key was used by matching the byte offset */
+ for (i = 0; i < G_BDE_MAXKEYS; i++)
+ if (nkey != NULL && off[0] == gl->lsector[i])
+ *nkey = i;
+ off[0] = 0;
+ return (0);
+}
+
+int
+g_bde_decrypt_lock(struct g_bde_softc *sc, u_char *keymat, u_char *meta, off_t mediasize, u_int sectorsize, u_int *nkey)
+{
+ u_char *buf, buf1[16];
+ int error, e, i;
+
+ /* set up the key-material */
+ bcopy(keymat, sc->sha2, SHA512_DIGEST_LENGTH);
+
+ /* If passed-in metadata is non-zero, use it */
+ bzero(buf1, sizeof buf1);
+ if (meta != NULL && bcmp(buf1, meta, sizeof buf1))
+ return (g_bde_decrypt_lockx(sc, meta, mediasize,
+ sectorsize, nkey));
+
+ /* Read sector zero */
+ buf = g_read_data(sc->consumer, 0, sectorsize, &error);
+ if (buf == NULL)
+ return(error);
+
+ /* Try each index in turn, save indicative errors for final result */
+ error = EINVAL;
+ for (i = 0; i < G_BDE_MAXKEYS; i++) {
+ e = g_bde_decrypt_lockx(sc, buf + i * 16, mediasize,
+ sectorsize, nkey);
+ /* Success or destroyed master key terminates */
+ if (e == 0 || e == ENOENT) {
+ error = e;
+ break;
+ }
+ if (e != 0 && error == EINVAL)
+ error = e;
+ }
+ g_free(buf);
+ return (error);
+}
diff --git a/sys/geom/bde/g_bde_work.c b/sys/geom/bde/g_bde_work.c
new file mode 100644
index 0000000..b2f5aa9
--- /dev/null
+++ b/sys/geom/bde/g_bde_work.c
@@ -0,0 +1,763 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ * This source file contains the state-engine which makes things happen in the
+ * right order.
+ *
+ * Outline:
+ * 1) g_bde_start1()
+ * Break the struct bio into multiple work packets one per zone.
+ * 2) g_bde_start2()
+ * Setup the necessary sector buffers and start those read operations
+ * which we can start at this time and put the item on the work-list.
+ * 3) g_bde_worker()
+ * Scan the work-list for items which are ready for crypto processing
+ * and call the matching crypto function in g_bde_crypt.c and schedule
+ * any writes needed. Read operations finish here by releasing the
+ * sector buffers and delivering the original bio request.
+ * 4) g_bde_write_done()
+ * Release sector buffers and deliver the original bio request.
+ *
+ * Because of the C-scope rules, the functions are almost perfectly in the
+ * opposite order in this source file.
+ *
+ * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
+ * XXX: additional states to this state-engine. Since no hardware available
+ * XXX: at this time has AES support, implementing this has been postponed
+ * XXX: until such time as it would result in a benefit.
+ */
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/proc.h>
+#include <sys/kthread.h>
+
+#include <crypto/rijndael/rijndael.h>
+#include <crypto/sha2/sha2.h>
+#include <geom/geom.h>
+#include <geom/bde/g_bde.h>
+
+static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
+static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
+static void g_bde_release_keysector(struct g_bde_work *wp);
+static struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp);
+static int g_bde_start_read(struct g_bde_sector *sp);
+static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
+
+/*
+ * Work item allocation.
+ *
+ * C++ would call these constructors and destructors.
+ */
+static u_int g_bde_nwork;
+SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
+
+static MALLOC_DEFINE(M_GBDE, "GBDE", "GBDE data structures");
+
+static struct g_bde_work *
+g_bde_new_work(struct g_bde_softc *sc)
+{
+ struct g_bde_work *wp;
+
+ wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO);
+ if (wp == NULL)
+ return (wp);
+ wp->state = SETUP;
+ wp->softc = sc;
+ g_bde_nwork++;
+ sc->nwork++;
+ TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
+ return (wp);
+}
+
+static void
+g_bde_delete_work(struct g_bde_work *wp)
+{
+ struct g_bde_softc *sc;
+
+ sc = wp->softc;
+ g_bde_nwork--;
+ sc->nwork--;
+ TAILQ_REMOVE(&sc->worklist, wp, list);
+ free(wp, M_GBDE);
+}
+
+/*
+ * Sector buffer allocation
+ *
+ * These two functions allocate and free back variable sized sector buffers
+ */
+
+static u_int g_bde_nsect;
+SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
+
+static void
+g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
+{
+
+ g_bde_nsect--;
+ sc->nsect--;
+ if (sp->malloc)
+ free(sp->data, M_GBDE);
+ free(sp, M_GBDE);
+}
+
+static struct g_bde_sector *
+g_bde_new_sector(struct g_bde_work *wp, u_int len)
+{
+ struct g_bde_sector *sp;
+
+ sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO);
+ if (sp == NULL)
+ return (sp);
+ if (len > 0) {
+ sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO);
+ if (sp->data == NULL) {
+ free(sp, M_GBDE);
+ return (NULL);
+ }
+ sp->malloc = 1;
+ }
+ g_bde_nsect++;
+ wp->softc->nsect++;
+ sp->size = len;
+ sp->softc = wp->softc;
+ sp->ref = 1;
+ sp->owner = wp;
+ sp->offset = wp->so;
+ sp->state = JUNK;
+ return (sp);
+}
+
+/*
+ * Skey sector cache.
+ *
+ * Nothing prevents two separate I/O requests from addressing the same zone
+ * and thereby needing the same skey sector. We therefore need to sequence
+ * I/O operations to the skey sectors. A certain amount of caching is also
+ * desirable, although the extent of benefit from this is not at this point
+ * determined.
+ *
+ * XXX: GEOM may be able to grow a generic caching facility at some point
+ * XXX: to support such needs.
+ */
+
+static u_int g_bde_ncache;
+SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
+
+static void
+g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
+{
+
+ g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
+ if (sp->ref != 0)
+ return;
+ TAILQ_REMOVE(&sc->freelist, sp, list);
+ g_bde_ncache--;
+ sc->ncache--;
+ bzero(sp->data, sp->size);
+ g_bde_delete_sector(sc, sp);
+}
+
+static struct g_bde_sector *
+g_bde_get_keysector(struct g_bde_work *wp)
+{
+ struct g_bde_sector *sp;
+ struct g_bde_softc *sc;
+ off_t offset;
+
+ offset = wp->kso;
+ g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset);
+ sc = wp->softc;
+
+ if (malloc_last_fail() < g_bde_ncache)
+ g_bde_purge_sector(sc, -1);
+
+ sp = TAILQ_FIRST(&sc->freelist);
+ if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
+ g_bde_purge_one_sector(sc, sp);
+
+ TAILQ_FOREACH(sp, &sc->freelist, list) {
+ if (sp->offset == offset)
+ break;
+ }
+ if (sp != NULL) {
+ sp->ref++;
+ KASSERT(sp->offset == offset, ("wrong offset"));
+ KASSERT(sp->softc == wp->softc, ("wrong softc"));
+ if (sp->ref == 1)
+ sp->owner = wp;
+ } else {
+ if (malloc_last_fail() < g_bde_ncache) {
+ TAILQ_FOREACH(sp, &sc->freelist, list)
+ if (sp->ref == 0)
+ break;
+ }
+ if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
+ sp = TAILQ_FIRST(&sc->freelist);
+ if (sp != NULL && sp->ref > 0)
+ sp = NULL;
+ if (sp == NULL) {
+ sp = g_bde_new_sector(wp, sc->sectorsize);
+ if (sp != NULL) {
+ g_bde_ncache++;
+ sc->ncache++;
+ TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
+ sp->malloc = 2;
+ }
+ }
+ if (sp != NULL) {
+ sp->offset = offset;
+ sp->softc = wp->softc;
+ sp->ref = 1;
+ sp->owner = wp;
+ sp->state = JUNK;
+ sp->error = 0;
+ }
+ }
+ if (sp != NULL) {
+ TAILQ_REMOVE(&sc->freelist, sp, list);
+ TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
+ sp->used = time_uptime;
+ }
+ wp->ksp = sp;
+ return(sp);
+}
+
+static void
+g_bde_release_keysector(struct g_bde_work *wp)
+{
+ struct g_bde_softc *sc;
+ struct g_bde_work *wp2;
+ struct g_bde_sector *sp;
+
+ sp = wp->ksp;
+ g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp);
+ KASSERT(sp->malloc == 2, ("Wrong sector released"));
+ sc = sp->softc;
+ KASSERT(sc != NULL, ("NULL sp->softc"));
+ KASSERT(wp == sp->owner, ("Releasing, not owner"));
+ sp->owner = NULL;
+ wp->ksp = NULL;
+ sp->ref--;
+ if (sp->ref > 0) {
+ TAILQ_REMOVE(&sc->freelist, sp, list);
+ TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
+ TAILQ_FOREACH(wp2, &sc->worklist, list) {
+ if (wp2->ksp == sp) {
+ KASSERT(wp2 != wp, ("Self-reowning"));
+ sp->owner = wp2;
+ wakeup(sp->softc);
+ break;
+ }
+ }
+ KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
+ } else if (sp->error != 0) {
+ sp->offset = ~0;
+ sp->error = 0;
+ sp->state = JUNK;
+ }
+ TAILQ_REMOVE(&sc->freelist, sp, list);
+ TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
+}
+
+static void
+g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
+{
+ struct g_bde_sector *sp;
+ int n;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
+ if (fraction > 0)
+ n = sc->ncache / fraction + 1;
+ else
+ n = g_bde_ncache - malloc_last_fail();
+ if (n < 0)
+ return;
+ if (n > sc->ncache)
+ n = sc->ncache;
+ while(n--) {
+ TAILQ_FOREACH(sp, &sc->freelist, list) {
+ if (sp->ref != 0)
+ continue;
+ TAILQ_REMOVE(&sc->freelist, sp, list);
+ g_bde_ncache--;
+ sc->ncache--;
+ bzero(sp->data, sp->size);
+ g_bde_delete_sector(sc, sp);
+ break;
+ }
+ }
+}
+
+static struct g_bde_sector *
+g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp)
+{
+ struct g_bde_sector *sp;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp);
+ sp = g_bde_get_keysector(wp);
+ if (sp == NULL) {
+ g_bde_purge_sector(sc, -1);
+ sp = g_bde_get_keysector(wp);
+ }
+ if (sp == NULL)
+ return (sp);
+ if (sp->owner != wp)
+ return (sp);
+ if (sp->state == VALID)
+ return (sp);
+ if (g_bde_start_read(sp) == 0)
+ return (sp);
+ g_bde_release_keysector(wp);
+ return (NULL);
+}
+
+/*
+ * Contribute to the completion of the original bio request.
+ *
+ * We have no simple way to tell how many bits the original bio request has
+ * been segmented into, so the easiest way to determine when we can deliver
+ * it is to keep track of the number of bytes we have completed. We keep
+ * track of any errors underway and latch onto the first one.
+ *
+ * We always report "nothing done" in case of error, because random bits here
+ * and there may be completed and returning a number of completed bytes does
+ * not convey any useful information about which bytes they were. If some
+ * piece of broken code somewhere interprets this to mean that nothing has
+ * changed on the underlying media they deserve the lossage headed for them.
+ *
+ * A single mutex per g_bde instance is used to prevent contention.
+ */
+
+static void
+g_bde_contribute(struct bio *bp, off_t bytes, int error)
+{
+
+ g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
+ bp, (intmax_t)bytes, error);
+ if (bp->bio_error == 0)
+ bp->bio_error = error;
+ bp->bio_completed += bytes;
+ KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
+ if (bp->bio_completed == bp->bio_length) {
+ if (bp->bio_error != 0)
+ bp->bio_completed = 0;
+ g_io_deliver(bp, bp->bio_error);
+ }
+}
+
+/*
+ * A write operation has finished. When we have all expected cows in the
+ * barn close the door and call it a day.
+ */
+
+static void
+g_bde_write_done(struct bio *bp)
+{
+ struct g_bde_sector *sp;
+ struct g_bde_work *wp;
+ struct g_bde_softc *sc;
+
+ sp = bp->bio_caller1;
+ sc = bp->bio_caller2;
+ mtx_lock(&sc->worklist_mutex);
+ KASSERT(sp != NULL, ("NULL sp"));
+ KASSERT(sc != NULL, ("NULL sc"));
+ KASSERT(sp->owner != NULL, ("NULL sp->owner"));
+ g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
+ if (bp->bio_error == 0 && bp->bio_completed != sp->size)
+ bp->bio_error = EIO;
+ sp->error = bp->bio_error;
+ g_destroy_bio(bp);
+ wp = sp->owner;
+ if (wp->error == 0)
+ wp->error = sp->error;
+
+ if (wp->bp->bio_cmd == BIO_DELETE) {
+ KASSERT(sp == wp->sp, ("trashed delete op"));
+ g_bde_contribute(wp->bp, wp->length, wp->error);
+ g_bde_delete_sector(sc, sp);
+ g_bde_delete_work(wp);
+ mtx_unlock(&sc->worklist_mutex);
+ return;
+ }
+
+ KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
+ KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
+ if (wp->sp == sp) {
+ g_bde_delete_sector(sc, wp->sp);
+ wp->sp = NULL;
+ } else {
+ sp->state = VALID;
+ }
+ if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID) {
+ g_bde_contribute(wp->bp, wp->length, wp->error);
+ g_bde_release_keysector(wp);
+ g_bde_delete_work(wp);
+ }
+ mtx_unlock(&sc->worklist_mutex);
+ return;
+}
+
+/*
+ * Send a write request for the given sector down the pipeline.
+ */
+
+static int
+g_bde_start_write(struct g_bde_sector *sp)
+{
+ struct bio *bp;
+ struct g_bde_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
+ sc = sp->softc;
+ KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
+ KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
+ bp = g_new_bio();
+ if (bp == NULL)
+ return (ENOMEM);
+ bp->bio_cmd = BIO_WRITE;
+ bp->bio_offset = sp->offset;
+ bp->bio_data = sp->data;
+ bp->bio_length = sp->size;
+ bp->bio_done = g_bde_write_done;
+ bp->bio_caller1 = sp;
+ bp->bio_caller2 = sc;
+ sp->state = IO;
+ g_io_request(bp, sc->consumer);
+ return(0);
+}
+
+/*
+ * A read operation has finished. Mark the sector no longer iobusy and
+ * wake up the worker thread and let it do its thing.
+ */
+
+static void
+g_bde_read_done(struct bio *bp)
+{
+ struct g_bde_sector *sp;
+ struct g_bde_softc *sc;
+
+ sp = bp->bio_caller1;
+ g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
+ sc = bp->bio_caller2;
+ mtx_lock(&sc->worklist_mutex);
+ if (bp->bio_error == 0 && bp->bio_completed != sp->size)
+ bp->bio_error = EIO;
+ sp->error = bp->bio_error;
+ if (sp->error == 0)
+ sp->state = VALID;
+ else
+ sp->state = JUNK;
+ wakeup(sc);
+ g_destroy_bio(bp);
+ mtx_unlock(&sc->worklist_mutex);
+}
+
+/*
+ * Send a read request for the given sector down the pipeline.
+ */
+
+static int
+g_bde_start_read(struct g_bde_sector *sp)
+{
+ struct bio *bp;
+ struct g_bde_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
+ sc = sp->softc;
+ KASSERT(sc != NULL, ("Null softc in sp %p", sp));
+ bp = g_new_bio();
+ if (bp == NULL)
+ return (ENOMEM);
+ bp->bio_cmd = BIO_READ;
+ bp->bio_offset = sp->offset;
+ bp->bio_data = sp->data;
+ bp->bio_length = sp->size;
+ bp->bio_done = g_bde_read_done;
+ bp->bio_caller1 = sp;
+ bp->bio_caller2 = sc;
+ sp->state = IO;
+ g_io_request(bp, sc->consumer);
+ return(0);
+}
+
+/*
+ * The worker thread.
+ *
+ * The up/down path of GEOM is not allowed to sleep or do any major work
+ * so we use this thread to do the actual crypto operations and to push
+ * the state engine onwards.
+ *
+ * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
+ * XXX: using a thread here is probably not needed.
+ */
+
+void
+g_bde_worker(void *arg)
+{
+ struct g_bde_softc *sc;
+ struct g_bde_work *wp;
+ struct g_geom *gp;
+ int busy, error;
+
+ gp = arg;
+ sc = gp->softc;
+
+ mtx_lock(&sc->worklist_mutex);
+ for (;;) {
+ busy = 0;
+ g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
+ TAILQ_FOREACH(wp, &sc->worklist, list) {
+ KASSERT(wp != NULL, ("NULL wp"));
+ KASSERT(wp->softc != NULL, ("NULL wp->softc"));
+ if (wp->state != WAIT)
+ continue; /* Not interesting here */
+
+ KASSERT(wp->bp != NULL, ("NULL wp->bp"));
+ KASSERT(wp->sp != NULL, ("NULL wp->sp"));
+
+ if (wp->ksp != NULL) {
+ if (wp->ksp->owner != wp)
+ continue;
+ if (wp->ksp->state == IO)
+ continue;
+ KASSERT(wp->ksp->state == VALID,
+ ("Illegal sector state (JUNK ?)"));
+ }
+
+ if (wp->bp->bio_cmd == BIO_READ &&
+ wp->sp->state == IO)
+ continue;
+
+ if (wp->ksp != NULL && wp->ksp->error != 0) {
+ g_bde_contribute(wp->bp, wp->length,
+ wp->ksp->error);
+ g_bde_delete_sector(sc, wp->sp);
+ g_bde_release_keysector(wp);
+ g_bde_delete_work(wp);
+ busy++;
+ break;
+ }
+ switch(wp->bp->bio_cmd) {
+ case BIO_READ:
+ if (wp->ksp == NULL) {
+ KASSERT(wp->error != 0,
+ ("BIO_READ, no ksp and no error"));
+ g_bde_contribute(wp->bp, wp->length,
+ wp->error);
+ } else {
+ if (wp->sp->error == 0) {
+ mtx_unlock(&sc->worklist_mutex);
+ g_bde_crypt_read(wp);
+ mtx_lock(&sc->worklist_mutex);
+ }
+ g_bde_contribute(wp->bp, wp->length,
+ wp->sp->error);
+ }
+ g_bde_delete_sector(sc, wp->sp);
+ if (wp->ksp != NULL)
+ g_bde_release_keysector(wp);
+ g_bde_delete_work(wp);
+ break;
+ case BIO_WRITE:
+ wp->state = FINISH;
+ KASSERT(wp->sp->owner == wp, ("Write not owner sp"));
+ KASSERT(wp->ksp->owner == wp, ("Write not owner ksp"));
+ mtx_unlock(&sc->worklist_mutex);
+ g_bde_crypt_write(wp);
+ mtx_lock(&sc->worklist_mutex);
+ error = g_bde_start_write(wp->sp);
+ if (error) {
+ g_bde_contribute(wp->bp, wp->length, error);
+ g_bde_release_keysector(wp);
+ g_bde_delete_sector(sc, wp->sp);
+ g_bde_delete_work(wp);
+ break;
+ }
+ error = g_bde_start_write(wp->ksp);
+ if (wp->error == 0)
+ wp->error = error;
+ break;
+ case BIO_DELETE:
+ wp->state = FINISH;
+ mtx_unlock(&sc->worklist_mutex);
+ g_bde_crypt_delete(wp);
+ mtx_lock(&sc->worklist_mutex);
+ g_bde_start_write(wp->sp);
+ break;
+ }
+ busy++;
+ break;
+ }
+ if (!busy) {
+ /*
+ * We don't look for our death-warrant until we are
+ * idle. Shouldn't make a difference in practice.
+ */
+ if (sc->dead)
+ break;
+ g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
+ error = msleep(sc, &sc->worklist_mutex,
+ PRIBIO, "g_bde", hz);
+ if (error == EWOULDBLOCK) {
+ /*
+ * Loose our skey cache in an orderly fashion.
+ * The exact rate can be tuned to be less
+ * aggressive if this is desirable. 10% per
+ * second means that the cache is gone in a
+ * few minutes.
+ */
+ g_bde_purge_sector(sc, 10);
+ }
+ }
+ }
+ g_trace(G_T_TOPOLOGY, "g_bde_worker die");
+ g_bde_purge_sector(sc, 1);
+ KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
+ KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
+ KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
+ mtx_unlock(&sc->worklist_mutex);
+ sc->dead = 2;
+ wakeup(sc);
+ mtx_lock(&Giant);
+ kthread_exit(0);
+}
+
+/*
+ * g_bde_start1 has chopped the incoming request up so all the requests
+ * we see here are inside a single zone. Map the data and key locations
+ * grab the buffers we need and fire off the first volley of read requests.
+ */
+
+static void
+g_bde_start2(struct g_bde_work *wp)
+{
+ struct g_bde_softc *sc;
+
+ KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
+ KASSERT(wp->softc != NULL, ("NULL wp->softc"));
+ g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
+ sc = wp->softc;
+ if (wp->bp->bio_cmd == BIO_READ) {
+ wp->sp = g_bde_new_sector(wp, 0);
+ if (wp->sp == NULL) {
+ g_bde_contribute(wp->bp, wp->length, ENOMEM);
+ g_bde_delete_work(wp);
+ return;
+ }
+ wp->sp->size = wp->length;
+ wp->sp->data = wp->data;
+ if (g_bde_start_read(wp->sp) != 0) {
+ g_bde_contribute(wp->bp, wp->length, ENOMEM);
+ g_bde_delete_sector(sc, wp->sp);
+ g_bde_delete_work(wp);
+ return;
+ }
+ g_bde_read_keysector(sc, wp);
+ if (wp->ksp == NULL)
+ wp->error = ENOMEM;
+ } else if (wp->bp->bio_cmd == BIO_DELETE) {
+ wp->sp = g_bde_new_sector(wp, wp->length);
+ if (wp->sp == NULL) {
+ g_bde_contribute(wp->bp, wp->length, ENOMEM);
+ g_bde_delete_work(wp);
+ return;
+ }
+ } else if (wp->bp->bio_cmd == BIO_WRITE) {
+ wp->sp = g_bde_new_sector(wp, wp->length);
+ if (wp->sp == NULL) {
+ g_bde_contribute(wp->bp, wp->length, ENOMEM);
+ g_bde_delete_work(wp);
+ return;
+ }
+ g_bde_read_keysector(sc, wp);
+ if (wp->ksp == NULL) {
+ g_bde_contribute(wp->bp, wp->length, ENOMEM);
+ g_bde_delete_sector(sc, wp->sp);
+ g_bde_delete_work(wp);
+ return;
+ }
+ } else {
+ KASSERT(0 == 1,
+ ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
+ }
+
+ wp->state = WAIT;
+ wakeup(sc);
+}
+
+/*
+ * Create a sequence of work structures, and have g_bde_map_sector() determine
+ * how long they each can be. Feed them to g_bde_start2().
+ */
+
+void
+g_bde_start1(struct bio *bp)
+{
+ struct g_bde_softc *sc;
+ struct g_bde_work *wp;
+ off_t done;
+
+ sc = bp->bio_to->geom->softc;
+ bp->bio_driver1 = sc;
+
+ mtx_lock(&sc->worklist_mutex);
+ for(done = 0; done < bp->bio_length; ) {
+ wp = g_bde_new_work(sc);
+ if (wp != NULL) {
+ wp->bp = bp;
+ wp->offset = bp->bio_offset + done;
+ wp->data = bp->bio_data + done;
+ wp->length = bp->bio_length - done;
+ g_bde_map_sector(wp);
+ done += wp->length;
+ g_bde_start2(wp);
+ }
+ if (wp == NULL || bp->bio_error != 0) {
+ g_bde_contribute(bp, bp->bio_length - done, ENOMEM);
+ break;
+ }
+ }
+ mtx_unlock(&sc->worklist_mutex);
+ return;
+}
diff --git a/sys/geom/geom.h b/sys/geom/geom.h
new file mode 100644
index 0000000..53f7356
--- /dev/null
+++ b/sys/geom/geom.h
@@ -0,0 +1,313 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _GEOM_GEOM_H_
+#define _GEOM_GEOM_H_
+
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sx.h>
+#include <sys/queue.h>
+#include <sys/ioccom.h>
+#include <sys/sbuf.h>
+#include <sys/module.h>
+
+struct g_class;
+struct g_geom;
+struct g_consumer;
+struct g_provider;
+struct g_stat;
+struct thread;
+struct bio;
+struct sbuf;
+struct gctl_req;
+struct g_configargs;
+
+typedef int g_config_t (struct g_configargs *ca);
+typedef void g_ctl_req_t (struct gctl_req *, struct g_class *cp, char const *verb);
+typedef int g_ctl_create_geom_t (struct gctl_req *, struct g_class *cp, struct g_provider *pp);
+typedef int g_ctl_destroy_geom_t (struct gctl_req *, struct g_class *cp, struct g_geom *gp);
+typedef int g_ctl_config_geom_t (struct gctl_req *, struct g_geom *gp, const char *verb);
+typedef void g_init_t (struct g_class *mp);
+typedef void g_fini_t (struct g_class *mp);
+typedef struct g_geom * g_taste_t (struct g_class *, struct g_provider *,
+ int flags);
+#define G_TF_NORMAL 0
+#define G_TF_INSIST 1
+#define G_TF_TRANSPARENT 2
+typedef int g_access_t (struct g_provider *, int, int, int);
+/* XXX: not sure about the thread arg */
+typedef void g_orphan_t (struct g_consumer *);
+
+typedef void g_start_t (struct bio *);
+typedef void g_spoiled_t (struct g_consumer *);
+typedef void g_dumpconf_t (struct sbuf *, const char *indent, struct g_geom *,
+ struct g_consumer *, struct g_provider *);
+
+/*
+ * The g_class structure describes a transformation class. In other words
+ * all BSD disklabel handlers share one g_class, all MBR handlers share
+ * one common g_class and so on.
+ * Certain operations are instantiated on the class, most notably the
+ * taste and config_geom functions.
+ */
+struct g_class {
+ const char *name;
+ g_taste_t *taste;
+ g_config_t *config;
+ g_ctl_req_t *ctlreq;
+ g_init_t *init;
+ g_fini_t *fini;
+ g_ctl_destroy_geom_t *destroy_geom;
+ /*
+ * The remaining elements are private
+ */
+ LIST_ENTRY(g_class) class;
+ LIST_HEAD(,g_geom) geom;
+};
+
+/*
+ * The g_geom is an instance of a g_class.
+ */
+struct g_geom {
+ char *name;
+ struct g_class *class;
+ LIST_ENTRY(g_geom) geom;
+ LIST_HEAD(,g_consumer) consumer;
+ LIST_HEAD(,g_provider) provider;
+ TAILQ_ENTRY(g_geom) geoms; /* XXX: better name */
+ int rank;
+ g_start_t *start;
+ g_spoiled_t *spoiled;
+ g_dumpconf_t *dumpconf;
+ g_access_t *access;
+ g_orphan_t *orphan;
+ void *softc;
+ unsigned flags;
+#define G_GEOM_WITHER 1
+};
+
+/*
+ * The g_bioq is a queue of struct bio's.
+ * XXX: possibly collection point for statistics.
+ * XXX: should (possibly) be collapsed with sys/bio.h::bio_queue_head.
+ */
+struct g_bioq {
+ TAILQ_HEAD(, bio) bio_queue;
+ struct mtx bio_queue_lock;
+ int bio_queue_length;
+};
+
+/*
+ * A g_consumer is an attachment point for a g_provider. One g_consumer
+ * can only be attached to one g_provider, but multiple g_consumers
+ * can be attached to one g_provider.
+ */
+
+struct g_consumer {
+ struct g_geom *geom;
+ LIST_ENTRY(g_consumer) consumer;
+ struct g_provider *provider;
+ LIST_ENTRY(g_consumer) consumers; /* XXX: better name */
+ int acr, acw, ace;
+ int spoiled;
+ struct devstat *stat;
+ u_int nstart, nend;
+};
+
+/*
+ * A g_provider is a "logical disk".
+ */
+struct g_provider {
+ char *name;
+ LIST_ENTRY(g_provider) provider;
+ struct g_geom *geom;
+ LIST_HEAD(,g_consumer) consumers;
+ int acr, acw, ace;
+ int error;
+ TAILQ_ENTRY(g_provider) orphan;
+ u_int index;
+ off_t mediasize;
+ u_int sectorsize;
+ u_int stripesize;
+ u_int stripeoffset;
+ struct devstat *stat;
+ u_int nstart, nend;
+ u_int flags;
+#define G_PF_CANDELETE 0x1
+};
+
+/* geom_dev.c */
+void g_dev_print(void);
+
+/* geom_dump.c */
+void g_hexdump(void *ptr, int length);
+void g_trace(int level, const char *, ...);
+# define G_T_TOPOLOGY 1
+# define G_T_BIO 2
+# define G_T_ACCESS 4
+
+
+/* geom_event.c */
+typedef void g_event_t(void *, int flag);
+#define EV_CANCEL 1
+int g_post_event(g_event_t *func, void *arg, int flag, ...);
+int g_waitfor_event(g_event_t *func, void *arg, int flag, ...);
+void g_cancel_event(void *ref);
+void g_orphan_provider(struct g_provider *pp, int error);
+void g_waitidle(void);
+
+/* geom_subr.c */
+int g_access_abs(struct g_consumer *cp, int nread, int nwrite, int nexcl);
+int g_access_rel(struct g_consumer *cp, int nread, int nwrite, int nexcl);
+int g_attach(struct g_consumer *cp, struct g_provider *pp);
+void g_destroy_consumer(struct g_consumer *cp);
+void g_destroy_geom(struct g_geom *pp);
+void g_destroy_provider(struct g_provider *pp);
+void g_detach(struct g_consumer *cp);
+void g_error_provider(struct g_provider *pp, int error);
+struct g_provider *g_provider_by_name(char const *arg);
+int g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len);
+#define g_getattr(a, c, v) g_getattr__((a), (c), (v), sizeof *(v))
+int g_handleattr(struct bio *bp, const char *attribute, void *val, int len);
+int g_handleattr_int(struct bio *bp, const char *attribute, int val);
+int g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val);
+struct g_consumer * g_new_consumer(struct g_geom *gp);
+struct g_geom * g_new_geomf(struct g_class *mp, const char *fmt, ...);
+struct g_provider * g_new_providerf(struct g_geom *gp, const char *fmt, ...);
+void g_sanity(void const *ptr);
+void g_spoil(struct g_provider *pp, struct g_consumer *cp);
+int g_std_access(struct g_provider *pp, int dr, int dw, int de);
+void g_std_done(struct bio *bp);
+void g_std_spoiled(struct g_consumer *cp);
+void g_wither_geom(struct g_geom *gp, int error);
+
+int g_modevent(module_t, int, void *);
+
+/* geom_io.c */
+struct bio * g_clone_bio(struct bio *);
+void g_destroy_bio(struct bio *);
+void g_io_deliver(struct bio *bp, int error);
+int g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr);
+void g_io_request(struct bio *bp, struct g_consumer *cp);
+struct bio *g_new_bio(void);
+void * g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error);
+int g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length);
+
+/* geom_kern.c / geom_kernsim.c */
+
+#ifndef _SYS_CONF_H_
+typedef int d_ioctl_t(dev_t dev, u_long cmd, caddr_t data,
+ int fflag, struct thread *td);
+#endif
+
+struct g_ioctl {
+ u_long cmd;
+ void *data;
+ int fflag;
+ struct thread *td;
+ d_ioctl_t *func;
+ void *dev;
+};
+
+#ifdef _KERNEL
+
+struct g_kerneldump {
+ off_t offset;
+ off_t length;
+};
+
+MALLOC_DECLARE(M_GEOM);
+
+static __inline void *
+g_malloc(int size, int flags)
+{
+ void *p;
+
+ p = malloc(size, M_GEOM, flags);
+ g_sanity(p);
+ /* printf("malloc(%d, %x) -> %p\n", size, flags, p); */
+ return (p);
+}
+
+static __inline void
+g_free(void *ptr)
+{
+ g_sanity(ptr);
+ /* printf("free(%p)\n", ptr); */
+ free(ptr, M_GEOM);
+}
+
+extern struct sx topology_lock;
+
+#define g_topology_lock() \
+ do { \
+ mtx_assert(&Giant, MA_NOTOWNED); \
+ sx_xlock(&topology_lock); \
+ } while (0)
+
+#define g_topology_unlock() \
+ do { \
+ g_sanity(NULL); \
+ sx_xunlock(&topology_lock); \
+ } while (0)
+
+#define g_topology_assert() \
+ do { \
+ g_sanity(NULL); \
+ sx_assert(&topology_lock, SX_XLOCKED); \
+ } while (0)
+
+#define DECLARE_GEOM_CLASS(class, name) \
+ static moduledata_t name##_mod = { \
+ #name, g_modevent, &class \
+ }; \
+ DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);
+
+#endif /* _KERNEL */
+
+/* geom_ctl.c */
+void gctl_set_param(struct gctl_req *req, const char *param, void const *ptr, int len);
+void *gctl_get_param(struct gctl_req *req, const char *param, int *len);
+char const *gctl_get_asciiparam(struct gctl_req *req, const char *param);
+void *gctl_get_paraml(struct gctl_req *req, const char *param, int len);
+int gctl_error(struct gctl_req *req, const char *fmt, ...);
+struct g_class *gctl_get_class(struct gctl_req *req, char const *arg);
+struct g_geom *gctl_get_geom(struct gctl_req *req, struct g_class *mpr, char const *arg);
+struct g_provider *gctl_get_provider(struct gctl_req *req, char const *arg);
+
+#endif /* _GEOM_GEOM_H_ */
diff --git a/sys/geom/geom_aes.c b/sys/geom/geom_aes.c
new file mode 100644
index 0000000..867efd9
--- /dev/null
+++ b/sys/geom/geom_aes.c
@@ -0,0 +1,374 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * This method provides AES encryption with a compiled in key (default
+ * all zeroes).
+ *
+ * XXX: This could probably save a lot of code by pretending to be a slicer.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/bio.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/libkern.h>
+#include <sys/endian.h>
+#include <sys/md5.h>
+#include <sys/errno.h>
+#include <geom/geom.h>
+
+#include <crypto/rijndael/rijndael.h>
+
+#include <crypto/rijndael/rijndael.h>
+
+#define AES_CLASS_NAME "AES"
+
+#define MASTER_KEY_LENGTH (1024/8)
+
+static const u_char *aes_magic = "<<FreeBSD-GEOM-AES>>";
+static const u_char *aes_magic_random = "<<FreeBSD-GEOM-AES-RANDOM>>";
+static const u_char *aes_magic_test = "<<FreeBSD-GEOM-AES-TEST>>";
+
+
+struct g_aes_softc {
+ enum {
+ KEY_ZERO,
+ KEY_RANDOM,
+ KEY_TEST
+ } keying;
+ u_int sectorsize;
+ off_t mediasize;
+ cipherInstance ci;
+ u_char master_key[MASTER_KEY_LENGTH];
+};
+
+/*
+ * Generate a sectorkey from the masterkey and the offset position.
+ *
+ * For KEY_ZERO we just return a key of all zeros.
+ *
+ * We feed the sector byte offset, 16 bytes of the master-key and
+ * the sector byte offset once more to MD5.
+ * The sector byte offset is converted to little-endian format first
+ * to support multi-architecture operation.
+ * We use 16 bytes from the master-key starting at the logical sector
+ * number modulus he length of the master-key. If need be we wrap
+ * around to the start of the master-key.
+ */
+
+static void
+g_aes_makekey(struct g_aes_softc *sc, off_t off, keyInstance *ki, int dir)
+{
+ MD5_CTX cx;
+ u_int64_t u64;
+ u_int u, u1;
+ u_char *p, buf[16];
+
+ if (sc->keying == KEY_ZERO) {
+ rijndael_makeKey(ki, dir, 128, sc->master_key);
+ return;
+ }
+ MD5Init(&cx);
+ u64 = htole64(off);
+ MD5Update(&cx, (u_char *)&u64, sizeof(u64));
+ u = off / sc->sectorsize;
+ u %= sizeof sc->master_key;
+ p = sc->master_key + u;
+ if (u + 16 <= sizeof(sc->master_key)) {
+ MD5Update(&cx, p, 16);
+ } else {
+ u1 = sizeof sc->master_key - u;
+ MD5Update(&cx, p, u1);
+ MD5Update(&cx, sc->master_key, 16 - u1);
+ u1 = 0; /* destroy evidence */
+ }
+ u = 0; /* destroy evidence */
+ MD5Update(&cx, (u_char *)&u64, sizeof(u64));
+ u64 = 0; /* destroy evidence */
+ MD5Final(buf, &cx);
+ bzero(&cx, sizeof cx); /* destroy evidence */
+ rijndael_makeKey(ki, dir, 128, buf);
+ bzero(buf, sizeof buf); /* destroy evidence */
+
+}
+
+static void
+g_aes_read_done(struct bio *bp)
+{
+ struct g_geom *gp;
+ struct g_aes_softc *sc;
+ u_char *p, *b, *e, *sb;
+ keyInstance dkey;
+ off_t o;
+
+ gp = bp->bio_from->geom;
+ sc = gp->softc;
+ sb = g_malloc(sc->sectorsize, M_WAITOK);
+ b = bp->bio_data;
+ e = bp->bio_data;
+ e += bp->bio_length;
+ o = bp->bio_offset - sc->sectorsize;
+ for (p = b; p < e; p += sc->sectorsize) {
+ g_aes_makekey(sc, o, &dkey, DIR_DECRYPT);
+ rijndael_blockDecrypt(&sc->ci, &dkey, p, sc->sectorsize * 8, sb);
+ bcopy(sb, p, sc->sectorsize);
+ o += sc->sectorsize;
+ }
+ bzero(&dkey, sizeof dkey); /* destroy evidence */
+ bzero(sb, sc->sectorsize); /* destroy evidence */
+ g_free(sb);
+ g_std_done(bp);
+}
+
+static void
+g_aes_write_done(struct bio *bp)
+{
+
+ bzero(bp->bio_data, bp->bio_length); /* destroy evidence */
+ g_free(bp->bio_data);
+ g_std_done(bp);
+}
+
+static void
+g_aes_start(struct bio *bp)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_aes_softc *sc;
+ struct bio *bp2;
+ u_char *p1, *p2, *b, *e;
+ keyInstance ekey;
+ off_t o;
+
+ gp = bp->bio_to->geom;
+ cp = LIST_FIRST(&gp->consumer);
+ sc = gp->softc;
+ switch (bp->bio_cmd) {
+ case BIO_READ:
+ bp2 = g_clone_bio(bp);
+ if (bp2 == NULL) {
+ g_io_deliver(bp, ENOMEM);
+ return;
+ }
+ bp2->bio_done = g_aes_read_done;
+ bp2->bio_offset += sc->sectorsize;
+ g_io_request(bp2, cp);
+ break;
+ case BIO_WRITE:
+ bp2 = g_clone_bio(bp);
+ if (bp2 == NULL) {
+ g_io_deliver(bp, ENOMEM);
+ return;
+ }
+ bp2->bio_done = g_aes_write_done;
+ bp2->bio_offset += sc->sectorsize;
+ bp2->bio_data = g_malloc(bp->bio_length, M_WAITOK);
+ b = bp->bio_data;
+ e = bp->bio_data;
+ e += bp->bio_length;
+ p2 = bp2->bio_data;
+ o = bp->bio_offset;
+ for (p1 = b; p1 < e; p1 += sc->sectorsize) {
+ g_aes_makekey(sc, o, &ekey, DIR_ENCRYPT);
+ rijndael_blockEncrypt(&sc->ci, &ekey,
+ p1, sc->sectorsize * 8, p2);
+ p2 += sc->sectorsize;
+ o += sc->sectorsize;
+ }
+ bzero(&ekey, sizeof ekey); /* destroy evidence */
+ g_io_request(bp2, cp);
+ break;
+ case BIO_GETATTR:
+ bp2 = g_clone_bio(bp);
+ if (bp2 == NULL) {
+ g_io_deliver(bp, ENOMEM);
+ return;
+ }
+ bp2->bio_done = g_std_done;
+ bp2->bio_offset += sc->sectorsize;
+ g_io_request(bp2, cp);
+ break;
+ default:
+ g_io_deliver(bp, EOPNOTSUPP);
+ return;
+ }
+ return;
+}
+
+static void
+g_aes_orphan(struct g_consumer *cp)
+{
+ struct g_geom *gp;
+ struct g_aes_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "g_aes_orphan(%p/%s)", cp, cp->provider->name);
+ g_topology_assert();
+ KASSERT(cp->provider->error != 0,
+ ("g_aes_orphan with error == 0"));
+
+ gp = cp->geom;
+ sc = gp->softc;
+ g_wither_geom(gp, cp->provider->error);
+ bzero(sc, sizeof(struct g_aes_softc)); /* destroy evidence */
+ g_free(sc);
+ return;
+}
+
+static int
+g_aes_access(struct g_provider *pp, int dr, int dw, int de)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+
+ gp = pp->geom;
+ cp = LIST_FIRST(&gp->consumer);
+ /* On first open, grab an extra "exclusive" bit */
+ if (cp->acr == 0 && cp->acw == 0 && cp->ace == 0)
+ de++;
+ /* ... and let go of it on last close */
+ if ((cp->acr + dr) == 0 && (cp->acw + dw) == 0 && (cp->ace + de) == 1)
+ de--;
+ return (g_access_rel(cp, dr, dw, de));
+}
+
+static struct g_geom *
+g_aes_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_aes_softc *sc;
+ int error;
+ u_int sectorsize;
+ off_t mediasize;
+ u_char *buf;
+
+ g_trace(G_T_TOPOLOGY, "aes_taste(%s,%s)", mp->name, pp->name);
+ g_topology_assert();
+ gp = g_new_geomf(mp, "%s.aes", pp->name);
+ gp->start = g_aes_start;
+ gp->orphan = g_aes_orphan;
+ gp->spoiled = g_std_spoiled;
+ cp = g_new_consumer(gp);
+ g_attach(cp, pp);
+ error = g_access_rel(cp, 1, 0, 0);
+ if (error) {
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ g_destroy_geom(gp);
+ return (NULL);
+ }
+ buf = NULL;
+ g_topology_unlock();
+ do {
+ if (gp->rank != 2)
+ break;
+ sectorsize = cp->provider->sectorsize;
+ mediasize = cp->provider->mediasize;
+ buf = g_read_data(cp, 0, sectorsize, &error);
+ if (buf == NULL || error != 0) {
+ break;
+ }
+ sc = g_malloc(sizeof(struct g_aes_softc), M_WAITOK | M_ZERO);
+ if (!memcmp(buf, aes_magic, strlen(aes_magic))) {
+ sc->keying = KEY_ZERO;
+ } else if (!memcmp(buf, aes_magic_random,
+ strlen(aes_magic_random))) {
+ sc->keying = KEY_RANDOM;
+ } else if (!memcmp(buf, aes_magic_test,
+ strlen(aes_magic_test))) {
+ sc->keying = KEY_TEST;
+ } else {
+ g_free(sc);
+ break;
+ }
+ g_free(buf);
+ gp->softc = sc;
+ gp->access = g_aes_access;
+ sc->sectorsize = sectorsize;
+ sc->mediasize = mediasize - sectorsize;
+ rijndael_cipherInit(&sc->ci, MODE_CBC, NULL);
+ if (sc->keying == KEY_TEST) {
+ int i;
+ u_char *p;
+
+ p = sc->master_key;
+ for (i = 0; i < (int)sizeof sc->master_key; i ++)
+ *p++ = i;
+ }
+ if (sc->keying == KEY_RANDOM) {
+ int i;
+ u_int32_t u;
+ u_char *p;
+
+ p = sc->master_key;
+ for (i = 0; i < (int)sizeof sc->master_key; i += sizeof u) {
+ u = arc4random();
+ *p++ = u;
+ *p++ = u >> 8;
+ *p++ = u >> 16;
+ *p++ = u >> 24;
+ }
+ }
+ g_topology_lock();
+ pp = g_new_providerf(gp, gp->name);
+ pp->mediasize = mediasize - sectorsize;
+ pp->sectorsize = sectorsize;
+ g_error_provider(pp, 0);
+ g_topology_unlock();
+ } while(0);
+ g_topology_lock();
+ if (buf)
+ g_free(buf);
+ g_access_rel(cp, -1, 0, 0);
+ if (gp->softc != NULL)
+ return (gp);
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ g_destroy_geom(gp);
+ return (NULL);
+}
+
+static struct g_class g_aes_class = {
+ .name = AES_CLASS_NAME,
+ .taste = g_aes_taste,
+};
+
+DECLARE_GEOM_CLASS(g_aes_class, g_aes);
diff --git a/sys/geom/geom_apple.c b/sys/geom/geom_apple.c
new file mode 100644
index 0000000..328b835
--- /dev/null
+++ b/sys/geom/geom_apple.c
@@ -0,0 +1,260 @@
+/*-
+ * Copyright (c) 2002 Peter Grehan.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * GEOM module for Apple Partition Maps
+ * As described in 'Inside Macintosh Vol 3: About the SCSI Manager -
+ * The Structure of Block Devices"
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/endian.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/bio.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <sys/sbuf.h>
+#include <geom/geom.h>
+#include <geom/geom_slice.h>
+
+#define APPLE_CLASS_NAME "APPLE"
+
+#define NAPMPART 16 /* Max partitions */
+
+struct apm_partition {
+ char am_sig[2];
+ u_int32_t am_mapcnt;
+ u_int32_t am_start;
+ u_int32_t am_partcnt;
+ char am_name[32];
+ char am_type[32];
+};
+
+struct g_apple_softc {
+ u_int16_t dd_bsiz;
+ u_int32_t dd_blkcnt;
+ u_int16_t dd_drvrcnt;
+ u_int32_t am_mapcnt0;
+ struct apm_partition apmpart[NAPMPART];
+};
+
+static void
+g_dec_drvrdesc(u_char *ptr, struct g_apple_softc *sc)
+{
+ sc->dd_bsiz = be16dec(ptr + 2);
+ sc->dd_blkcnt = be32dec(ptr + 4);
+ sc->dd_drvrcnt = be32dec(ptr + 16);
+}
+
+static void
+g_dec_apple_partition(u_char *ptr, struct apm_partition *d)
+{
+ d->am_sig[0] = ptr[0];
+ d->am_sig[1] = ptr[1];
+ d->am_mapcnt = be32dec(ptr + 4);
+ d->am_start = be32dec(ptr + 8);
+ d->am_partcnt = be32dec(ptr + 12);
+ memcpy(d->am_name, ptr + 16, 32);
+ memcpy(d->am_type, ptr + 48, 32);
+}
+
+static int
+g_apple_start(struct bio *bp)
+{
+ struct g_provider *pp;
+ struct g_geom *gp;
+ struct g_slicer *gsp;
+
+ pp = bp->bio_to;
+ gp = pp->geom;
+ gsp = gp->softc;
+ if (bp->bio_cmd == BIO_GETATTR) {
+ if (g_handleattr_off_t(bp, "APM::offset",
+ gsp->slices[pp->index].offset))
+ return (1);
+ }
+ return (0);
+}
+
+static void
+g_apple_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
+ struct g_consumer *cp __unused, struct g_provider *pp)
+{
+ struct g_apple_softc *mp;
+ struct g_slicer *gsp;
+
+ gsp = gp->softc;
+ mp = gsp->softc;
+ g_slice_dumpconf(sb, indent, gp, cp, pp);
+ if (pp != NULL) {
+ if (indent == NULL)
+ sbuf_printf(sb, " n %s ty %s",
+ mp->apmpart[pp->index].am_name,
+ mp->apmpart[pp->index].am_type);
+ else {
+ sbuf_printf(sb, "%s<name>%s</name>\n", indent,
+ mp->apmpart[pp->index].am_name);
+ sbuf_printf(sb, "%s<type>%s</type>\n", indent,
+ mp->apmpart[pp->index].am_type);
+ }
+ }
+}
+
+#if 0
+static void
+g_apple_print()
+{
+
+ /* XXX */
+}
+#endif
+
+static struct g_geom *
+g_apple_taste(struct g_class *mp, struct g_provider *pp, int insist)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ int error, i;
+ struct g_apple_softc *ms;
+ struct apm_partition *apm;
+ u_int sectorsize;
+ u_char *buf;
+
+ g_trace(G_T_TOPOLOGY, "apple_taste(%s,%s)", mp->name, pp->name);
+ g_topology_assert();
+ gp = g_slice_new(mp, NAPMPART, pp, &cp, &ms, sizeof *ms, g_apple_start);
+ if (gp == NULL)
+ return (NULL);
+ g_topology_unlock();
+ gp->dumpconf = g_apple_dumpconf;
+ do {
+ if (gp->rank != 2 && insist == 0)
+ break;
+
+ sectorsize = cp->provider->sectorsize;
+ if (sectorsize != 512)
+ break;
+
+ buf = g_read_data(cp, 0, sectorsize, &error);
+ if (buf == NULL || error != 0)
+ break;
+
+ /*
+ * Test for the sector 0 driver record signature, and
+ * validate sector and disk size
+ */
+ if (buf[0] != 'E' && buf[1] != 'R') {
+ g_free(buf);
+ break;
+ }
+ g_dec_drvrdesc(buf, ms);
+ g_free(buf);
+
+ if (ms->dd_bsiz != 512) {
+ break;
+ }
+
+ /*
+ * Read in the first partition map
+ */
+ buf = g_read_data(cp, sectorsize, sectorsize, &error);
+ if (buf == NULL || error != 0)
+ break;
+
+ /*
+ * Decode the first partition: it's another indication of
+ * validity, as well as giving the size of the partition
+ * map
+ */
+ apm = &ms->apmpart[0];
+ g_dec_apple_partition(buf, apm);
+ g_free(buf);
+
+ if (apm->am_sig[0] != 'P' || apm->am_sig[1] != 'M')
+ break;
+ ms->am_mapcnt0 = apm->am_mapcnt;
+
+ buf = g_read_data(cp, 2 * sectorsize,
+ (NAPMPART - 1) * sectorsize, &error);
+ if (buf == NULL || error != 0)
+ break;
+
+ for (i = 1; i < NAPMPART; i++) {
+ g_dec_apple_partition(buf + ((i - 1) * sectorsize),
+ &ms->apmpart[i]);
+ }
+
+ for (i = 0; i < NAPMPART; i++) {
+ apm = &ms->apmpart[i];
+
+ /*
+ * Validate partition sig and global mapcount
+ */
+ if (apm->am_sig[0] != 'P' ||
+ apm->am_sig[1] != 'M')
+ continue;
+ if (apm->am_mapcnt != ms->am_mapcnt0)
+ continue;
+
+ if (bootverbose) {
+ printf("APM Slice %d (%s/%s) on %s:\n",
+ i + 1, apm->am_name, apm->am_type,
+ gp->name);
+ /* g_apple_print(i, dp + i); */
+ }
+ g_topology_lock();
+ g_slice_config(gp, i, G_SLICE_CONFIG_SET,
+ (off_t)apm->am_start << 9ULL,
+ (off_t)apm->am_partcnt << 9ULL,
+ sectorsize,
+ "%ss%d", gp->name, i + 1);
+ g_topology_unlock();
+ }
+ g_free(buf);
+ break;
+ } while(0);
+ g_topology_lock();
+ g_access_rel(cp, -1, 0, 0);
+ if (LIST_EMPTY(&gp->provider)) {
+ g_slice_spoiled(cp);
+ return (NULL);
+ }
+ return (gp);
+}
+
+
+static struct g_class g_apple_class = {
+ .name = APPLE_CLASS_NAME,
+ .taste = g_apple_taste,
+};
+
+DECLARE_GEOM_CLASS(g_apple_class, g_apple);
diff --git a/sys/geom/geom_bsd.c b/sys/geom/geom_bsd.c
new file mode 100644
index 0000000..4f4d565
--- /dev/null
+++ b/sys/geom/geom_bsd.c
@@ -0,0 +1,739 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * This is the method for dealing with BSD disklabels. It has been
+ * extensively (by my standards at least) commented, in the vain hope that
+ * it will serve as the source in future copy&paste operations.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/endian.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/bio.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/md5.h>
+#include <sys/errno.h>
+#include <sys/disklabel.h>
+#include <geom/geom.h>
+#include <geom/geom_slice.h>
+
+#define BSD_CLASS_NAME "BSD"
+
+#define ALPHA_LABEL_OFFSET 64
+
+#define LABELSIZE (148 + 16 * MAXPARTITIONS)
+
+static void g_bsd_hotwrite(void *arg, int flag);
+/*
+ * Our private data about one instance. All the rest is handled by the
+ * slice code and stored in its softc, so this is just the stuff
+ * specific to BSD disklabels.
+ */
+struct g_bsd_softc {
+ off_t labeloffset;
+ off_t mbroffset;
+ off_t rawoffset;
+ struct disklabel ondisk;
+ u_char label[LABELSIZE];
+ u_char labelsum[16];
+};
+
+/*
+ * Modify our slicer to match proposed disklabel, if possible.
+ * This is where we make sure we don't do something stupid.
+ */
+static int
+g_bsd_modify(struct g_geom *gp, u_char *label)
+{
+ int i, error;
+ struct partition *ppp;
+ struct g_slicer *gsp;
+ struct g_consumer *cp;
+ struct g_bsd_softc *ms;
+ u_int secsize, u;
+ off_t rawoffset, o;
+ struct disklabel dl;
+ MD5_CTX md5sum;
+
+ g_topology_assert();
+ gsp = gp->softc;
+ ms = gsp->softc;
+
+ error = bsd_disklabel_le_dec(label, &dl, MAXPARTITIONS);
+ if (error) {
+ return (error);
+ }
+
+ /* Get dimensions of our device. */
+ cp = LIST_FIRST(&gp->consumer);
+ secsize = cp->provider->sectorsize;
+
+ /* ... or a smaller sector size. */
+ if (dl.d_secsize < secsize) {
+ return (EINVAL);
+ }
+
+ /* ... or a non-multiple sector size. */
+ if (dl.d_secsize % secsize != 0) {
+ return (EINVAL);
+ }
+
+ /* Historical braindamage... */
+ rawoffset = (off_t)dl.d_partitions[RAW_PART].p_offset * dl.d_secsize;
+
+ for (i = 0; i < dl.d_npartitions; i++) {
+ ppp = &dl.d_partitions[i];
+ if (ppp->p_size == 0)
+ continue;
+ o = (off_t)ppp->p_offset * dl.d_secsize;
+
+ if (o < rawoffset)
+ rawoffset = 0;
+ }
+
+ if (rawoffset != 0 && (off_t)rawoffset != ms->mbroffset)
+ printf("WARNING: Expected rawoffset %jd, found %jd\n",
+ (intmax_t)ms->mbroffset/dl.d_secsize,
+ (intmax_t)rawoffset/dl.d_secsize);
+
+ /* Don't munge open partitions. */
+ for (i = 0; i < dl.d_npartitions; i++) {
+ ppp = &dl.d_partitions[i];
+
+ o = (off_t)ppp->p_offset * dl.d_secsize;
+ if (o == 0)
+ o = rawoffset;
+ error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK,
+ o - rawoffset,
+ (off_t)ppp->p_size * dl.d_secsize,
+ dl.d_secsize,
+ "%s%c", gp->name, 'a' + i);
+ if (error)
+ return (error);
+ }
+
+ /* Look good, go for it... */
+ for (u = 0; u < gsp->nslice; u++) {
+ ppp = &dl.d_partitions[u];
+ o = (off_t)ppp->p_offset * dl.d_secsize;
+ if (o == 0)
+ o = rawoffset;
+ g_slice_config(gp, u, G_SLICE_CONFIG_SET,
+ o - rawoffset,
+ (off_t)ppp->p_size * dl.d_secsize,
+ dl.d_secsize,
+ "%s%c", gp->name, 'a' + u);
+ }
+
+ /* Update our softc */
+ ms->ondisk = dl;
+ if (label != ms->label)
+ bcopy(label, ms->label, LABELSIZE);
+ ms->rawoffset = rawoffset;
+
+ /*
+ * In order to avoid recursively attaching to the same
+ * on-disk label (it's usually visible through the 'c'
+ * partition) we calculate an MD5 and ask if other BSD's
+ * below us love that label. If they do, we don't.
+ */
+ MD5Init(&md5sum);
+ MD5Update(&md5sum, ms->label, sizeof(ms->label));
+ MD5Final(ms->labelsum, &md5sum);
+
+ return (0);
+}
+
+/*
+ * This is an internal helper function, called multiple times from the taste
+ * function to try to locate a disklabel on the disk. More civilized formats
+ * will not need this, as there is only one possible place on disk to look
+ * for the magic spot.
+ */
+
+static int
+g_bsd_try(struct g_geom *gp, struct g_slicer *gsp, struct g_consumer *cp, int secsize, struct g_bsd_softc *ms, off_t offset)
+{
+ int error;
+ u_char *buf;
+ struct disklabel *dl;
+ off_t secoff;
+
+ /*
+ * We need to read entire aligned sectors, and we assume that the
+ * disklabel does not span sectors, so one sector is enough.
+ */
+ error = 0;
+ secoff = offset % secsize;
+ buf = g_read_data(cp, offset - secoff, secsize, &error);
+ if (buf == NULL || error != 0)
+ return (ENOENT);
+
+ /* Decode into our native format. */
+ dl = &ms->ondisk;
+ error = bsd_disklabel_le_dec(buf + secoff, dl, MAXPARTITIONS);
+ if (!error)
+ bcopy(buf + secoff, ms->label, LABELSIZE);
+
+ /* Remember to free the buffer g_read_data() gave us. */
+ g_free(buf);
+
+ ms->labeloffset = offset;
+ return (error);
+}
+
+/*
+ * This function writes the current label to disk, possibly updating
+ * the alpha SRM checksum.
+ */
+
+static int
+g_bsd_writelabel(struct g_geom *gp, u_char *bootcode)
+{
+ off_t secoff;
+ u_int secsize;
+ struct g_consumer *cp;
+ struct g_slicer *gsp;
+ struct g_bsd_softc *ms;
+ u_char *buf;
+ uint64_t sum;
+ int error, i;
+
+ gsp = gp->softc;
+ ms = gsp->softc;
+ cp = LIST_FIRST(&gp->consumer);
+ /* Get sector size, we need it to read data. */
+ secsize = cp->provider->sectorsize;
+ secoff = ms->labeloffset % secsize;
+ if (bootcode == NULL) {
+ buf = g_read_data(cp, ms->labeloffset - secoff, secsize, &error);
+ if (buf == NULL || error != 0)
+ return (error);
+ bcopy(ms->label, buf + secoff, sizeof(ms->label));
+ } else {
+ buf = bootcode;
+ bcopy(ms->label, buf + ms->labeloffset, sizeof(ms->label));
+ }
+ if (ms->labeloffset == ALPHA_LABEL_OFFSET) {
+ sum = 0;
+ for (i = 0; i < 63; i++)
+ sum += le64dec(buf + i * 8);
+ le64enc(buf + 504, sum);
+ }
+ if (bootcode == NULL) {
+ error = g_write_data(cp, ms->labeloffset - secoff, buf, secsize);
+ g_free(buf);
+ } else {
+ error = g_write_data(cp, 0, bootcode, BBSIZE);
+ }
+ return(error);
+}
+
+
+/*
+ * Implement certain ioctls to modify disklabels with. This function
+ * is called by the event handler thread with topology locked as result
+ * of the g_post_event() in g_bsd_start(). It is not necessary to keep
+ * topology locked all the time but make sure to return with topology
+ * locked as well.
+ */
+
+static void
+g_bsd_ioctl(void *arg, int flag)
+{
+ struct bio *bp;
+ struct g_geom *gp;
+ struct g_ioctl *gio;
+ u_char *label;
+ int error;
+
+ g_topology_assert();
+ bp = arg;
+ if (flag == EV_CANCEL) {
+ g_io_deliver(bp, ENXIO);
+ return;
+ }
+
+ gp = bp->bio_to->geom;
+ gio = (struct g_ioctl *)bp->bio_data;
+
+ label = g_malloc(LABELSIZE, M_WAITOK);
+
+ /* The disklabel to set is the ioctl argument. */
+ bsd_disklabel_le_enc(label, gio->data);
+
+ /* Validate and modify our slice instance to match. */
+ error = g_bsd_modify(gp, label); /* Picks up topology lock on success. */
+ g_free(label);
+ if (error || gio->cmd == DIOCSDINFO) {
+ g_io_deliver(bp, error);
+ return;
+ }
+
+ KASSERT(gio->cmd == DIOCWDINFO, ("Unknown ioctl in g_bsd_ioctl"));
+ g_io_deliver(bp, g_bsd_writelabel(gp, NULL));
+}
+
+/*
+ * Rewrite the bootblock, which is BBSIZE bytes from the start of the disk.
+ * We punch down the disklabel where we expect it to be before writing.
+ */
+static int
+g_bsd_diocbsdbb(dev_t dev, u_long cmd __unused, caddr_t data, int fflag __unused, struct thread *td __unused)
+{
+ struct g_geom *gp;
+ struct g_slicer *gsp;
+ struct g_bsd_softc *ms;
+ struct g_consumer *cp;
+ u_char *buf;
+ void *p;
+ int error, i;
+ uint64_t sum;
+
+ /* Get hold of the interesting bits from the bio. */
+ gp = (void *)dev;
+ gsp = gp->softc;
+ ms = gsp->softc;
+
+ /* The disklabel to set is the ioctl argument. */
+ buf = g_malloc(BBSIZE, M_WAITOK);
+ p = *(void **)data;
+ error = copyin(p, buf, BBSIZE);
+ if (!error) {
+ DROP_GIANT();
+ g_topology_lock();
+ /* Validate and modify our slice instance to match. */
+ error = g_bsd_modify(gp, buf + ms->labeloffset);
+ if (!error) {
+ cp = LIST_FIRST(&gp->consumer);
+ if (ms->labeloffset == ALPHA_LABEL_OFFSET) {
+ sum = 0;
+ for (i = 0; i < 63; i++)
+ sum += le64dec(buf + i * 8);
+ le64enc(buf + 504, sum);
+ }
+ error = g_write_data(cp, 0, buf, BBSIZE);
+ }
+ g_topology_unlock();
+ PICKUP_GIANT();
+ }
+ g_free(buf);
+ return (error);
+}
+
+/*
+ * If the user tries to overwrite our disklabel through an open partition
+ * or via a magicwrite config call, we end up here and try to prevent
+ * footshooting as best we can.
+ */
+static void
+g_bsd_hotwrite(void *arg, int flag)
+{
+ struct bio *bp;
+ struct g_geom *gp;
+ struct g_slicer *gsp;
+ struct g_slice *gsl;
+ struct g_bsd_softc *ms;
+ u_char *p;
+ int error;
+
+ g_topology_assert();
+ /*
+ * We should never get canceled, because that would amount to a removal
+ * of the geom while there was outstanding I/O requests.
+ */
+ KASSERT(flag != EV_CANCEL, ("g_bsd_hotwrite cancelled"));
+ bp = arg;
+ gp = bp->bio_to->geom;
+ gsp = gp->softc;
+ ms = gsp->softc;
+ gsl = &gsp->slices[bp->bio_to->index];
+ p = (u_char*)bp->bio_data + ms->labeloffset
+ - (bp->bio_offset + gsl->offset);
+ error = g_bsd_modify(gp, p);
+ if (error) {
+ g_io_deliver(bp, EPERM);
+ return;
+ }
+ g_slice_finish_hot(bp);
+}
+
+/*-
+ * This start routine is only called for non-trivial requests, all the
+ * trivial ones are handled autonomously by the slice code.
+ * For requests we handle here, we must call the g_io_deliver() on the
+ * bio, and return non-zero to indicate to the slice code that we did so.
+ * This code executes in the "DOWN" I/O path, this means:
+ * * No sleeping.
+ * * Don't grab the topology lock.
+ * * Don't call biowait, g_getattr(), g_setattr() or g_read_data()
+ */
+
+static int
+g_bsd_start(struct bio *bp)
+{
+ struct g_geom *gp;
+ struct g_bsd_softc *ms;
+ struct g_slicer *gsp;
+ struct g_ioctl *gio;
+ int error;
+
+ gp = bp->bio_to->geom;
+ gsp = gp->softc;
+ ms = gsp->softc;
+ switch(bp->bio_cmd) {
+ case BIO_GETATTR:
+ if (g_handleattr(bp, "BSD::labelsum", ms->labelsum,
+ sizeof(ms->labelsum)))
+ return (1);
+ break;
+ default:
+ KASSERT(0 == 1, ("Unknown bio_cmd in g_bsd_start (%d)",
+ bp->bio_cmd));
+ }
+
+ /* We only handle ioctl(2) requests of the right format. */
+ if (strcmp(bp->bio_attribute, "GEOM::ioctl"))
+ return (0);
+ else if (bp->bio_length != sizeof(*gio))
+ return (0);
+
+ /* Get hold of the ioctl parameters. */
+ gio = (struct g_ioctl *)bp->bio_data;
+
+ switch (gio->cmd) {
+ case DIOCGDINFO:
+ /* Return a copy of the disklabel to userland. */
+ bsd_disklabel_le_dec(ms->label, gio->data, MAXPARTITIONS);
+ g_io_deliver(bp, 0);
+ return (1);
+ case DIOCBSDBB:
+ gio->func = g_bsd_diocbsdbb;
+ gio->dev = (void *)gp;
+ g_io_deliver(bp, EDIRIOCTL);
+ return (1);
+ case DIOCSDINFO:
+ case DIOCWDINFO:
+ /*
+ * These we cannot do without the topology lock and some
+ * some I/O requests. Ask the event-handler to schedule
+ * us in a less restricted environment.
+ */
+ error = g_post_event(g_bsd_ioctl, bp, M_NOWAIT, gp, NULL);
+ if (error)
+ g_io_deliver(bp, error);
+ /*
+ * We must return non-zero to indicate that we will deal
+ * with this bio, even though we have not done so yet.
+ */
+ return (1);
+ default:
+ return (0);
+ }
+}
+
+/*
+ * Dump configuration information in XML format.
+ * Notice that the function is called once for the geom and once for each
+ * consumer and provider. We let g_slice_dumpconf() do most of the work.
+ */
+static void
+g_bsd_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp)
+{
+ struct g_bsd_softc *ms;
+ struct g_slicer *gsp;
+
+ gsp = gp->softc;
+ ms = gsp->softc;
+ g_slice_dumpconf(sb, indent, gp, cp, pp);
+ if (indent != NULL && pp == NULL && cp == NULL) {
+ sbuf_printf(sb, "%s<labeloffset>%jd</labeloffset>\n",
+ indent, (intmax_t)ms->labeloffset);
+ sbuf_printf(sb, "%s<rawoffset>%jd</rawoffset>\n",
+ indent, (intmax_t)ms->rawoffset);
+ sbuf_printf(sb, "%s<mbroffset>%jd</mbroffset>\n",
+ indent, (intmax_t)ms->mbroffset);
+ } else if (pp != NULL) {
+ if (indent == NULL)
+ sbuf_printf(sb, " ty %d",
+ ms->ondisk.d_partitions[pp->index].p_fstype);
+ else
+ sbuf_printf(sb, "%s<type>%d</type>\n", indent,
+ ms->ondisk.d_partitions[pp->index].p_fstype);
+ }
+}
+
+/*
+ * The taste function is called from the event-handler, with the topology
+ * lock already held and a provider to examine. The flags are unused.
+ *
+ * If flags == G_TF_NORMAL, the idea is to take a bite of the provider and
+ * if we find valid, consistent magic on it, build a geom on it.
+ * any magic bits which indicate that we should automatically put a BSD
+ * geom on it.
+ *
+ * There may be cases where the operator would like to put a BSD-geom on
+ * providers which do not meet all of the requirements. This can be done
+ * by instead passing the G_TF_INSIST flag, which will override these
+ * checks.
+ *
+ * The final flags value is G_TF_TRANSPARENT, which instructs the method
+ * to put a geom on top of the provider and configure it to be as transparent
+ * as possible. This is not really relevant to the BSD method and therefore
+ * not implemented here.
+ */
+
+static struct g_geom *
+g_bsd_taste(struct g_class *mp, struct g_provider *pp, int flags)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ int error, i;
+ struct g_bsd_softc *ms;
+ u_int secsize;
+ struct g_slicer *gsp;
+ u_char hash[16];
+ MD5_CTX md5sum;
+
+ g_trace(G_T_TOPOLOGY, "bsd_taste(%s,%s)", mp->name, pp->name);
+ g_topology_assert();
+
+ /* We don't implement transparent inserts. */
+ if (flags == G_TF_TRANSPARENT)
+ return (NULL);
+
+ /*
+ * BSD labels are a subclass of the general "slicing" topology so
+ * a lot of the work can be done by the common "slice" code.
+ * Create a geom with space for MAXPARTITIONS providers, one consumer
+ * and a softc structure for us. Specify the provider to attach
+ * the consumer to and our "start" routine for special requests.
+ * The provider is opened with mode (1,0,0) so we can do reads
+ * from it.
+ */
+ gp = g_slice_new(mp, MAXPARTITIONS, pp, &cp, &ms,
+ sizeof(*ms), g_bsd_start);
+ if (gp == NULL)
+ return (NULL);
+
+ /*
+ * Fill in the optional details, in our case we have a dumpconf
+ * routine which the "slice" code should call at the right time
+ */
+ gp->dumpconf = g_bsd_dumpconf;
+
+ /* Get the geom_slicer softc from the geom. */
+ gsp = gp->softc;
+
+ /*
+ * The do...while loop here allows us to have multiple escapes
+ * using a simple "break". This improves code clarity without
+ * ending up in deep nesting and without using goto or come from.
+ */
+ do {
+ /*
+ * If the provider is an MBR we will only auto attach
+ * to type 165 slices in the G_TF_NORMAL case. We will
+ * attach to any other type.
+ */
+ error = g_getattr("MBR::type", cp, &i);
+ if (!error) {
+ if (i != 165 && flags == G_TF_NORMAL)
+ break;
+ error = g_getattr("MBR::offset", cp, &ms->mbroffset);
+ if (error)
+ break;
+ }
+
+ /* Same thing if we are inside a PC98 */
+ error = g_getattr("PC98::type", cp, &i);
+ if (!error) {
+ if (i != 0xc494 && flags == G_TF_NORMAL)
+ break;
+ error = g_getattr("PC98::offset", cp, &ms->mbroffset);
+ if (error)
+ break;
+ }
+
+ /* Get sector size, we need it to read data. */
+ secsize = cp->provider->sectorsize;
+ if (secsize < 512)
+ break;
+
+ /* First look for a label at the start of the second sector. */
+ error = g_bsd_try(gp, gsp, cp, secsize, ms, secsize);
+
+ /* Next, look for alpha labels */
+ if (error)
+ error = g_bsd_try(gp, gsp, cp, secsize, ms,
+ ALPHA_LABEL_OFFSET);
+
+ /* If we didn't find a label, punt. */
+ if (error)
+ break;
+
+ /*
+ * In order to avoid recursively attaching to the same
+ * on-disk label (it's usually visible through the 'c'
+ * partition) we calculate an MD5 and ask if other BSD's
+ * below us love that label. If they do, we don't.
+ */
+ MD5Init(&md5sum);
+ MD5Update(&md5sum, ms->label, sizeof(ms->label));
+ MD5Final(ms->labelsum, &md5sum);
+
+ error = g_getattr("BSD::labelsum", cp, &hash);
+ if (!error && !bcmp(ms->labelsum, hash, sizeof(hash)))
+ break;
+
+ /*
+ * Process the found disklabel, and modify our "slice"
+ * instance to match it, if possible.
+ */
+ error = g_bsd_modify(gp, ms->label);
+ } while (0);
+
+ /* Success or failure, we can close our provider now. */
+ error = g_access_rel(cp, -1, 0, 0);
+
+ /* If we have configured any providers, return the new geom. */
+ if (gsp->nprovider > 0) {
+ g_slice_conf_hot(gp, 0, ms->labeloffset, LABELSIZE,
+ G_SLICE_HOT_ALLOW, G_SLICE_HOT_DENY, G_SLICE_HOT_CALL);
+ gsp->hot = g_bsd_hotwrite;
+ return (gp);
+ }
+ /*
+ * ...else push the "self-destruct" button, by spoiling our own
+ * consumer. This triggers a call to g_slice_spoiled which will
+ * dismantle what was setup.
+ */
+ g_slice_spoiled(cp);
+ return (NULL);
+}
+
+struct h0h0 {
+ struct g_geom *gp;
+ struct g_bsd_softc *ms;
+ u_char *label;
+ int error;
+};
+
+static void
+g_bsd_callconfig(void *arg, int flag)
+{
+ struct h0h0 *hp;
+
+ hp = arg;
+ hp->error = g_bsd_modify(hp->gp, hp->label);
+ if (!hp->error)
+ hp->error = g_bsd_writelabel(hp->gp, NULL);
+}
+
+/*
+ * NB! curthread is user process which GCTL'ed.
+ */
+static void
+g_bsd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
+{
+ u_char *label;
+ int error;
+ struct h0h0 h0h0;
+ struct g_geom *gp;
+ struct g_slicer *gsp;
+ struct g_consumer *cp;
+ struct g_bsd_softc *ms;
+
+ g_topology_assert();
+ gp = gctl_get_geom(req, mp, "geom");
+ if (gp == NULL)
+ return;
+ cp = LIST_FIRST(&gp->consumer);
+ gsp = gp->softc;
+ ms = gsp->softc;
+ if (!strcmp(verb, "read mbroffset")) {
+ gctl_set_param(req, "mbroffset",
+ &ms->mbroffset, sizeof(ms->mbroffset));
+ return;
+ } else if (!strcmp(verb, "write label")) {
+ label = gctl_get_paraml(req, "label", LABELSIZE);
+ if (label == NULL)
+ return;
+ h0h0.gp = gp;
+ h0h0.ms = gsp->softc;
+ h0h0.label = label;
+ h0h0.error = -1;
+ /* XXX: Does this reference register with our selfdestruct code ? */
+ error = g_access_rel(cp, 1, 1, 1);
+ if (error) {
+ gctl_error(req, "could not access consumer");
+ return;
+ }
+ g_bsd_callconfig(&h0h0, 0);
+ error = h0h0.error;
+ g_access_rel(cp, -1, -1, -1);
+ } else if (!strcmp(verb, "write bootcode")) {
+ label = gctl_get_paraml(req, "bootcode", BBSIZE);
+ if (label == NULL)
+ return;
+ /* XXX: Does this reference register with our selfdestruct code ? */
+ error = g_access_rel(cp, 1, 1, 1);
+ if (error) {
+ gctl_error(req, "could not access consumer");
+ return;
+ }
+ error = g_bsd_writelabel(gp, label);
+ g_access_rel(cp, -1, -1, -1);
+ } else {
+ gctl_error(req, "Unknown verb parameter");
+ }
+
+ return;
+}
+
+/* Finally, register with GEOM infrastructure. */
+static struct g_class g_bsd_class = {
+ .name = BSD_CLASS_NAME,
+ .taste = g_bsd_taste,
+ .ctlreq = g_bsd_config,
+};
+
+DECLARE_GEOM_CLASS(g_bsd_class, g_bsd);
diff --git a/sys/geom/geom_bsd_enc.c b/sys/geom/geom_bsd_enc.c
new file mode 100644
index 0000000..dfdeb85
--- /dev/null
+++ b/sys/geom/geom_bsd_enc.c
@@ -0,0 +1,194 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Functions to encode and decode struct disklabel and struct partition into
+ * a bytestream of little endianess and correct packing.
+ *
+ * NB! This file must be usable both in kernel and userland.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/endian.h>
+#include <sys/disklabel.h>
+#include <sys/errno.h>
+#ifdef _KERNEL
+#include <sys/systm.h>
+#else
+#include <string.h>
+#endif
+
+void
+bsd_partition_le_dec(u_char *ptr, struct partition *d)
+{
+ d->p_size = le32dec(ptr + 0);
+ d->p_offset = le32dec(ptr + 4);
+ d->p_fsize = le32dec(ptr + 8);
+ d->p_fstype = ptr[12];
+ d->p_frag = ptr[13];
+ d->p_cpg = le16dec(ptr + 14);
+}
+
+int
+bsd_disklabel_le_dec(u_char *ptr, struct disklabel *d, int maxpart)
+{
+ int i;
+ u_char *p, *pe;
+ uint16_t sum;
+
+ d->d_magic = le32dec(ptr + 0);
+ if (d->d_magic != DISKMAGIC)
+ return(EINVAL);
+
+ d->d_magic2 = le32dec(ptr + 132);
+ if (d->d_magic2 != DISKMAGIC) {
+ return(EINVAL);
+ }
+
+ d->d_npartitions = le16dec(ptr + 138);
+ if (d->d_npartitions > maxpart) {
+ return(EINVAL);
+ }
+
+ pe = ptr + 148 + 16 * d->d_npartitions;
+ sum = 0;
+ for (p = ptr; p < pe; p += 2)
+ sum ^= le16dec(p);
+ if (sum != 0) {
+ return(EINVAL);
+ }
+
+ d->d_type = le16dec(ptr + 4);
+ d->d_subtype = le16dec(ptr + 6);
+ bcopy(ptr + 8, d->d_typename, 16);
+ bcopy(ptr + 24, d->d_packname, 16);
+ d->d_secsize = le32dec(ptr + 40);
+ d->d_nsectors = le32dec(ptr + 44);
+ d->d_ntracks = le32dec(ptr + 48);
+ d->d_ncylinders = le32dec(ptr + 52);
+ d->d_secpercyl = le32dec(ptr + 56);
+ d->d_secperunit = le32dec(ptr + 60);
+ d->d_sparespertrack = le16dec(ptr + 64);
+ d->d_sparespercyl = le16dec(ptr + 66);
+ d->d_acylinders = le32dec(ptr + 68);
+ d->d_rpm = le16dec(ptr + 72);
+ d->d_interleave = le16dec(ptr + 74);
+ d->d_trackskew = le16dec(ptr + 76);
+ d->d_cylskew = le16dec(ptr + 78);
+ d->d_headswitch = le32dec(ptr + 80);
+ d->d_trkseek = le32dec(ptr + 84);
+ d->d_flags = le32dec(ptr + 88);
+ d->d_drivedata[0] = le32dec(ptr + 92);
+ d->d_drivedata[1] = le32dec(ptr + 96);
+ d->d_drivedata[2] = le32dec(ptr + 100);
+ d->d_drivedata[3] = le32dec(ptr + 104);
+ d->d_drivedata[4] = le32dec(ptr + 108);
+ d->d_spare[0] = le32dec(ptr + 112);
+ d->d_spare[1] = le32dec(ptr + 116);
+ d->d_spare[2] = le32dec(ptr + 120);
+ d->d_spare[3] = le32dec(ptr + 124);
+ d->d_spare[4] = le32dec(ptr + 128);
+ d->d_checksum = le16dec(ptr + 136);
+ d->d_npartitions = le16dec(ptr + 138);
+ d->d_bbsize = le32dec(ptr + 140);
+ d->d_sbsize = le32dec(ptr + 144);
+ for (i = 0; i < MAXPARTITIONS; i++)
+ bsd_partition_le_dec(ptr + 148 + 16 * i, &d->d_partitions[i]);
+ return(0);
+}
+
+void
+bsd_partition_le_enc(u_char *ptr, struct partition *d)
+{
+ le32enc(ptr + 0, d->p_size);
+ le32enc(ptr + 4, d->p_offset);
+ le32enc(ptr + 8, d->p_fsize);
+ ptr[12] = d->p_fstype;
+ ptr[13] = d->p_frag;
+ le16enc(ptr + 14, d->p_cpg);
+}
+
+void
+bsd_disklabel_le_enc(u_char *ptr, struct disklabel *d)
+{
+ int i;
+ u_char *p, *pe;
+ uint16_t sum;
+
+ le32enc(ptr + 0, d->d_magic);
+ le16enc(ptr + 4, d->d_type);
+ le16enc(ptr + 6, d->d_subtype);
+ bcopy(d->d_typename, ptr + 8, 16);
+ bcopy(d->d_packname, ptr + 24, 16);
+ le32enc(ptr + 40, d->d_secsize);
+ le32enc(ptr + 44, d->d_nsectors);
+ le32enc(ptr + 48, d->d_ntracks);
+ le32enc(ptr + 52, d->d_ncylinders);
+ le32enc(ptr + 56, d->d_secpercyl);
+ le32enc(ptr + 60, d->d_secperunit);
+ le16enc(ptr + 64, d->d_sparespertrack);
+ le16enc(ptr + 66, d->d_sparespercyl);
+ le32enc(ptr + 68, d->d_acylinders);
+ le16enc(ptr + 72, d->d_rpm);
+ le16enc(ptr + 74, d->d_interleave);
+ le16enc(ptr + 76, d->d_trackskew);
+ le16enc(ptr + 78, d->d_cylskew);
+ le32enc(ptr + 80, d->d_headswitch);
+ le32enc(ptr + 84, d->d_trkseek);
+ le32enc(ptr + 88, d->d_flags);
+ le32enc(ptr + 92, d->d_drivedata[0]);
+ le32enc(ptr + 96, d->d_drivedata[1]);
+ le32enc(ptr + 100, d->d_drivedata[2]);
+ le32enc(ptr + 104, d->d_drivedata[3]);
+ le32enc(ptr + 108, d->d_drivedata[4]);
+ le32enc(ptr + 112, d->d_spare[0]);
+ le32enc(ptr + 116, d->d_spare[1]);
+ le32enc(ptr + 120, d->d_spare[2]);
+ le32enc(ptr + 124, d->d_spare[3]);
+ le32enc(ptr + 128, d->d_spare[4]);
+ le32enc(ptr + 132, d->d_magic2);
+ le16enc(ptr + 136, 0);
+ le16enc(ptr + 138, d->d_npartitions);
+ le32enc(ptr + 140, d->d_bbsize);
+ le32enc(ptr + 144, d->d_sbsize);
+ for (i = 0; i < d->d_npartitions; i++)
+ bsd_partition_le_enc(ptr + 148 + 16 * i, &d->d_partitions[i]);
+ pe = ptr + 148 + 16 * d->d_npartitions;
+ sum = 0;
+ for (p = ptr; p < pe; p += 2)
+ sum ^= le16dec(p);
+ le16enc(ptr + 136, sum);
+}
diff --git a/sys/geom/geom_ccd.c b/sys/geom/geom_ccd.c
new file mode 100644
index 0000000..51f70c3
--- /dev/null
+++ b/sys/geom/geom_ccd.c
@@ -0,0 +1,855 @@
+/*
+ * Copyright (c) 2003 Poul-Henning Kamp.
+ * Copyright (c) 1995 Jason R. Thorpe.
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ * All rights reserved.
+ * Copyright (c) 1988 University of Utah.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed for the NetBSD Project
+ * by Jason R. Thorpe.
+ * 4. The names of the authors may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Dynamic configuration and disklabel support by:
+ * Jason R. Thorpe <thorpej@nas.nasa.gov>
+ * Numerical Aerodynamic Simulation Facility
+ * Mail Stop 258-6
+ * NASA Ames Research Center
+ * Moffett Field, CA 94035
+ *
+ * from: Utah $Hdr: cd.c 1.6 90/11/28$
+ * @(#)cd.c 8.2 (Berkeley) 11/16/93
+ * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/bio.h>
+#include <sys/malloc.h>
+#include <geom/geom.h>
+
+/*
+ * Number of blocks to untouched in front of a component partition.
+ * This is to avoid violating its disklabel area when it starts at the
+ * beginning of the slice.
+ */
+#if !defined(CCD_OFFSET)
+#define CCD_OFFSET 16
+#endif
+
+/* sc_flags */
+#define CCDF_UNIFORM 0x02 /* use LCCD of sizes for uniform interleave */
+#define CCDF_MIRROR 0x04 /* use mirroring */
+
+/* Mask of user-settable ccd flags. */
+#define CCDF_USERMASK (CCDF_UNIFORM|CCDF_MIRROR)
+
+/*
+ * Interleave description table.
+ * Computed at boot time to speed irregular-interleave lookups.
+ * The idea is that we interleave in "groups". First we interleave
+ * evenly over all component disks up to the size of the smallest
+ * component (the first group), then we interleave evenly over all
+ * remaining disks up to the size of the next-smallest (second group),
+ * and so on.
+ *
+ * Each table entry describes the interleave characteristics of one
+ * of these groups. For example if a concatenated disk consisted of
+ * three components of 5, 3, and 7 DEV_BSIZE blocks interleaved at
+ * DEV_BSIZE (1), the table would have three entries:
+ *
+ * ndisk startblk startoff dev
+ * 3 0 0 0, 1, 2
+ * 2 9 3 0, 2
+ * 1 13 5 2
+ * 0 - - -
+ *
+ * which says that the first nine blocks (0-8) are interleaved over
+ * 3 disks (0, 1, 2) starting at block offset 0 on any component disk,
+ * the next 4 blocks (9-12) are interleaved over 2 disks (0, 2) starting
+ * at component block 3, and the remaining blocks (13-14) are on disk
+ * 2 starting at offset 5.
+ */
+struct ccdiinfo {
+ int ii_ndisk; /* # of disks range is interleaved over */
+ daddr_t ii_startblk; /* starting scaled block # for range */
+ daddr_t ii_startoff; /* starting component offset (block #) */
+ int *ii_index; /* ordered list of components in range */
+};
+
+/*
+ * Component info table.
+ * Describes a single component of a concatenated disk.
+ */
+struct ccdcinfo {
+ size_t ci_size; /* size */
+ struct g_provider *ci_provider; /* provider */
+ struct g_consumer *ci_consumer; /* consumer */
+};
+
+/*
+ * A concatenated disk is described by this structure.
+ */
+
+struct ccd_s {
+ LIST_ENTRY(ccd_s) list;
+
+ int sc_unit; /* logical unit number */
+ int sc_flags; /* flags */
+ size_t sc_size; /* size of ccd */
+ int sc_ileave; /* interleave */
+ u_int sc_ndisks; /* number of components */
+ struct ccdcinfo *sc_cinfo; /* component info */
+ struct ccdiinfo *sc_itable; /* interleave table */
+ u_int32_t sc_secsize; /* # bytes per sector */
+ int sc_pick; /* side of mirror picked */
+ daddr_t sc_blk[2]; /* mirror localization */
+};
+
+static g_start_t g_ccd_start;
+static void ccdiodone(struct bio *bp);
+static void ccdinterleave(struct ccd_s *);
+static int ccdinit(struct gctl_req *req, struct ccd_s *);
+static int ccdbuffer(struct bio **ret, struct ccd_s *,
+ struct bio *, daddr_t, caddr_t, long);
+
+static void
+g_ccd_orphan(struct g_consumer *cp)
+{
+ /*
+ * XXX: We don't do anything here. It is not obvious
+ * XXX: what DTRT would be, so we do what the previous
+ * XXX: code did: ignore it and let the user cope.
+ */
+}
+
+static int
+g_ccd_access(struct g_provider *pp, int dr, int dw, int de)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp1, *cp2;
+ int error;
+
+ de += dr;
+ de += dw;
+
+ gp = pp->geom;
+ error = ENXIO;
+ LIST_FOREACH(cp1, &gp->consumer, consumer) {
+ error = g_access_rel(cp1, dr, dw, de);
+ if (error) {
+ LIST_FOREACH(cp2, &gp->consumer, consumer) {
+ if (cp1 == cp2)
+ break;
+ g_access_rel(cp1, -dr, -dw, -de);
+ }
+ break;
+ }
+ }
+ return (error);
+}
+
+/*
+ * Free the softc and its substructures.
+ */
+static void
+g_ccd_freesc(struct ccd_s *sc)
+{
+ struct ccdiinfo *ii;
+
+ g_free(sc->sc_cinfo);
+ if (sc->sc_itable != NULL) {
+ for (ii = sc->sc_itable; ii->ii_ndisk > 0; ii++)
+ if (ii->ii_index != NULL)
+ g_free(ii->ii_index);
+ g_free(sc->sc_itable);
+ }
+ g_free(sc);
+}
+
+
+static int
+ccdinit(struct gctl_req *req, struct ccd_s *cs)
+{
+ struct ccdcinfo *ci;
+ size_t size;
+ int ix;
+ size_t minsize;
+ int maxsecsize;
+ off_t mediasize;
+ u_int sectorsize;
+
+ cs->sc_size = 0;
+
+ maxsecsize = 0;
+ minsize = 0;
+ for (ix = 0; ix < cs->sc_ndisks; ix++) {
+ ci = &cs->sc_cinfo[ix];
+
+ mediasize = ci->ci_provider->mediasize;
+ sectorsize = ci->ci_provider->sectorsize;
+ if (sectorsize > maxsecsize)
+ maxsecsize = sectorsize;
+ size = mediasize / DEV_BSIZE - CCD_OFFSET;
+
+ /* Truncate to interleave boundary */
+
+ if (cs->sc_ileave > 1)
+ size -= size % cs->sc_ileave;
+
+ if (size == 0) {
+ gctl_error(req, "Component %s has effective size zero",
+ ci->ci_provider->name);
+ return(ENODEV);
+ }
+
+ if (minsize == 0 || size < minsize)
+ minsize = size;
+ ci->ci_size = size;
+ cs->sc_size += size;
+ }
+
+ /*
+ * Don't allow the interleave to be smaller than
+ * the biggest component sector.
+ */
+ if ((cs->sc_ileave > 0) &&
+ (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
+ gctl_error(req, "Interleave to small for sector size");
+ return(EINVAL);
+ }
+
+ /*
+ * If uniform interleave is desired set all sizes to that of
+ * the smallest component. This will guarentee that a single
+ * interleave table is generated.
+ *
+ * Lost space must be taken into account when calculating the
+ * overall size. Half the space is lost when CCDF_MIRROR is
+ * specified.
+ */
+ if (cs->sc_flags & CCDF_UNIFORM) {
+ for (ix = 0; ix < cs->sc_ndisks; ix++) {
+ ci = &cs->sc_cinfo[ix];
+ ci->ci_size = minsize;
+ }
+ cs->sc_size = cs->sc_ndisks * minsize;
+ }
+
+ if (cs->sc_flags & CCDF_MIRROR) {
+ /*
+ * Check to see if an even number of components
+ * have been specified. The interleave must also
+ * be non-zero in order for us to be able to
+ * guarentee the topology.
+ */
+ if (cs->sc_ndisks % 2) {
+ gctl_error(req,
+ "Mirroring requires an even number of disks");
+ return(EINVAL);
+ }
+ if (cs->sc_ileave == 0) {
+ gctl_error(req,
+ "An interleave must be specified when mirroring");
+ return(EINVAL);
+ }
+ cs->sc_size = (cs->sc_ndisks/2) * minsize;
+ }
+
+ /*
+ * Construct the interleave table.
+ */
+ ccdinterleave(cs);
+
+ /*
+ * Create pseudo-geometry based on 1MB cylinders. It's
+ * pretty close.
+ */
+ cs->sc_secsize = maxsecsize;
+
+ return (0);
+}
+
+static void
+ccdinterleave(struct ccd_s *cs)
+{
+ struct ccdcinfo *ci, *smallci;
+ struct ccdiinfo *ii;
+ daddr_t bn, lbn;
+ int ix;
+ u_long size;
+
+
+ /*
+ * Allocate an interleave table. The worst case occurs when each
+ * of N disks is of a different size, resulting in N interleave
+ * tables.
+ *
+ * Chances are this is too big, but we don't care.
+ */
+ size = (cs->sc_ndisks + 1) * sizeof(struct ccdiinfo);
+ cs->sc_itable = g_malloc(size, M_WAITOK | M_ZERO);
+
+ /*
+ * Trivial case: no interleave (actually interleave of disk size).
+ * Each table entry represents a single component in its entirety.
+ *
+ * An interleave of 0 may not be used with a mirror setup.
+ */
+ if (cs->sc_ileave == 0) {
+ bn = 0;
+ ii = cs->sc_itable;
+
+ for (ix = 0; ix < cs->sc_ndisks; ix++) {
+ /* Allocate space for ii_index. */
+ ii->ii_index = g_malloc(sizeof(int), M_WAITOK);
+ ii->ii_ndisk = 1;
+ ii->ii_startblk = bn;
+ ii->ii_startoff = 0;
+ ii->ii_index[0] = ix;
+ bn += cs->sc_cinfo[ix].ci_size;
+ ii++;
+ }
+ ii->ii_ndisk = 0;
+ return;
+ }
+
+ /*
+ * The following isn't fast or pretty; it doesn't have to be.
+ */
+ size = 0;
+ bn = lbn = 0;
+ for (ii = cs->sc_itable; ; ii++) {
+ /*
+ * Allocate space for ii_index. We might allocate more then
+ * we use.
+ */
+ ii->ii_index = g_malloc((sizeof(int) * cs->sc_ndisks),
+ M_WAITOK);
+
+ /*
+ * Locate the smallest of the remaining components
+ */
+ smallci = NULL;
+ for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_ndisks];
+ ci++) {
+ if (ci->ci_size > size &&
+ (smallci == NULL ||
+ ci->ci_size < smallci->ci_size)) {
+ smallci = ci;
+ }
+ }
+
+ /*
+ * Nobody left, all done
+ */
+ if (smallci == NULL) {
+ ii->ii_ndisk = 0;
+ g_free(ii->ii_index);
+ ii->ii_index = NULL;
+ break;
+ }
+
+ /*
+ * Record starting logical block using an sc_ileave blocksize.
+ */
+ ii->ii_startblk = bn / cs->sc_ileave;
+
+ /*
+ * Record starting component block using an sc_ileave
+ * blocksize. This value is relative to the beginning of
+ * a component disk.
+ */
+ ii->ii_startoff = lbn;
+
+ /*
+ * Determine how many disks take part in this interleave
+ * and record their indices.
+ */
+ ix = 0;
+ for (ci = cs->sc_cinfo;
+ ci < &cs->sc_cinfo[cs->sc_ndisks]; ci++) {
+ if (ci->ci_size >= smallci->ci_size) {
+ ii->ii_index[ix++] = ci - cs->sc_cinfo;
+ }
+ }
+ ii->ii_ndisk = ix;
+ bn += ix * (smallci->ci_size - size);
+ lbn = smallci->ci_size / cs->sc_ileave;
+ size = smallci->ci_size;
+ }
+}
+
+static void
+g_ccd_start(struct bio *bp)
+{
+ long bcount, rcount;
+ struct bio *cbp[2];
+ caddr_t addr;
+ daddr_t bn;
+ int err;
+ struct ccd_s *cs;
+
+ cs = bp->bio_to->geom->softc;
+
+ /*
+ * Translate the partition-relative block number to an absolute.
+ */
+ bn = bp->bio_offset / cs->sc_secsize;
+
+ /*
+ * Allocate component buffers and fire off the requests
+ */
+ addr = bp->bio_data;
+ for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
+ err = ccdbuffer(cbp, cs, bp, bn, addr, bcount);
+ if (err) {
+ bp->bio_completed += bcount;
+ if (bp->bio_error != 0)
+ bp->bio_error = err;
+ if (bp->bio_completed == bp->bio_length)
+ g_io_deliver(bp, bp->bio_error);
+ return;
+ }
+ rcount = cbp[0]->bio_length;
+
+ if (cs->sc_flags & CCDF_MIRROR) {
+ /*
+ * Mirroring. Writes go to both disks, reads are
+ * taken from whichever disk seems most appropriate.
+ *
+ * We attempt to localize reads to the disk whos arm
+ * is nearest the read request. We ignore seeks due
+ * to writes when making this determination and we
+ * also try to avoid hogging.
+ */
+ if (cbp[0]->bio_cmd != BIO_READ) {
+ g_io_request(cbp[0], cbp[0]->bio_from);
+ g_io_request(cbp[1], cbp[1]->bio_from);
+ } else {
+ int pick = cs->sc_pick;
+ daddr_t range = cs->sc_size / 16;
+
+ if (bn < cs->sc_blk[pick] - range ||
+ bn > cs->sc_blk[pick] + range
+ ) {
+ cs->sc_pick = pick = 1 - pick;
+ }
+ cs->sc_blk[pick] = bn + btodb(rcount);
+ g_io_request(cbp[pick], cbp[pick]->bio_from);
+ }
+ } else {
+ /*
+ * Not mirroring
+ */
+ g_io_request(cbp[0], cbp[0]->bio_from);
+ }
+ bn += btodb(rcount);
+ addr += rcount;
+ }
+}
+
+/*
+ * Build a component buffer header.
+ */
+static int
+ccdbuffer(struct bio **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount)
+{
+ struct ccdcinfo *ci, *ci2 = NULL;
+ struct bio *cbp;
+ daddr_t cbn, cboff;
+ off_t cbc;
+
+ /*
+ * Determine which component bn falls in.
+ */
+ cbn = bn;
+ cboff = 0;
+
+ if (cs->sc_ileave == 0) {
+ /*
+ * Serially concatenated and neither a mirror nor a parity
+ * config. This is a special case.
+ */
+ daddr_t sblk;
+
+ sblk = 0;
+ for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
+ sblk += ci->ci_size;
+ cbn -= sblk;
+ } else {
+ struct ccdiinfo *ii;
+ int ccdisk, off;
+
+ /*
+ * Calculate cbn, the logical superblock (sc_ileave chunks),
+ * and cboff, a normal block offset (DEV_BSIZE chunks) relative
+ * to cbn.
+ */
+ cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */
+ cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */
+
+ /*
+ * Figure out which interleave table to use.
+ */
+ for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
+ if (ii->ii_startblk > cbn)
+ break;
+ }
+ ii--;
+
+ /*
+ * off is the logical superblock relative to the beginning
+ * of this interleave block.
+ */
+ off = cbn - ii->ii_startblk;
+
+ /*
+ * We must calculate which disk component to use (ccdisk),
+ * and recalculate cbn to be the superblock relative to
+ * the beginning of the component. This is typically done by
+ * adding 'off' and ii->ii_startoff together. However, 'off'
+ * must typically be divided by the number of components in
+ * this interleave array to be properly convert it from a
+ * CCD-relative logical superblock number to a
+ * component-relative superblock number.
+ */
+ if (ii->ii_ndisk == 1) {
+ /*
+ * When we have just one disk, it can't be a mirror
+ * or a parity config.
+ */
+ ccdisk = ii->ii_index[0];
+ cbn = ii->ii_startoff + off;
+ } else {
+ if (cs->sc_flags & CCDF_MIRROR) {
+ /*
+ * We have forced a uniform mapping, resulting
+ * in a single interleave array. We double
+ * up on the first half of the available
+ * components and our mirror is in the second
+ * half. This only works with a single
+ * interleave array because doubling up
+ * doubles the number of sectors, so there
+ * cannot be another interleave array because
+ * the next interleave array's calculations
+ * would be off.
+ */
+ int ndisk2 = ii->ii_ndisk / 2;
+ ccdisk = ii->ii_index[off % ndisk2];
+ cbn = ii->ii_startoff + off / ndisk2;
+ ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
+ } else {
+ ccdisk = ii->ii_index[off % ii->ii_ndisk];
+ cbn = ii->ii_startoff + off / ii->ii_ndisk;
+ }
+ }
+
+ ci = &cs->sc_cinfo[ccdisk];
+
+ /*
+ * Convert cbn from a superblock to a normal block so it
+ * can be used to calculate (along with cboff) the normal
+ * block index into this particular disk.
+ */
+ cbn *= cs->sc_ileave;
+ }
+
+ /*
+ * Fill in the component buf structure.
+ */
+ cbp = g_clone_bio(bp);
+ if (cbp == NULL)
+ return (ENOMEM);
+ cbp->bio_done = g_std_done;
+ cbp->bio_offset = dbtob(cbn + cboff + CCD_OFFSET);
+ cbp->bio_data = addr;
+ if (cs->sc_ileave == 0)
+ cbc = dbtob((off_t)(ci->ci_size - cbn));
+ else
+ cbc = dbtob((off_t)(cs->sc_ileave - cboff));
+ cbp->bio_length = (cbc < bcount) ? cbc : bcount;
+
+ cbp->bio_from = ci->ci_consumer;
+ cb[0] = cbp;
+
+ if (cs->sc_flags & CCDF_MIRROR) {
+ cbp = g_clone_bio(bp);
+ if (cbp == NULL)
+ return (ENOMEM);
+ cbp->bio_done = cb[0]->bio_done = ccdiodone;
+ cbp->bio_offset = cb[0]->bio_offset;
+ cbp->bio_data = cb[0]->bio_data;
+ cbp->bio_length = cb[0]->bio_length;
+ cbp->bio_from = ci2->ci_consumer;
+ cbp->bio_caller1 = cb[0];
+ cb[0]->bio_caller1 = cbp;
+ cb[1] = cbp;
+ }
+ return (0);
+}
+
+/*
+ * Called only for mirrored operations.
+ */
+static void
+ccdiodone(struct bio *cbp)
+{
+ struct bio *mbp, *pbp;
+
+ mbp = cbp->bio_caller1;
+ pbp = cbp->bio_parent;
+
+ if (pbp->bio_cmd == BIO_READ) {
+ if (cbp->bio_error == 0) {
+ /* We will not be needing the partner bio */
+ if (mbp != NULL) {
+ pbp->bio_inbed++;
+ g_destroy_bio(mbp);
+ }
+ g_std_done(cbp);
+ return;
+ }
+ if (mbp != NULL) {
+ /* Try partner the bio instead */
+ mbp->bio_caller1 = NULL;
+ pbp->bio_inbed++;
+ g_destroy_bio(cbp);
+ g_io_request(mbp, mbp->bio_from);
+ /*
+ * XXX: If this comes back OK, we should actually
+ * try to write the good data on the failed mirror
+ */
+ return;
+ }
+ g_std_done(cbp);
+ }
+ if (mbp != NULL) {
+ mbp->bio_caller1 = NULL;
+ pbp->bio_inbed++;
+ if (cbp->bio_error != 0 && pbp->bio_error == 0)
+ pbp->bio_error = cbp->bio_error;
+ return;
+ }
+ g_std_done(cbp);
+}
+
+static void
+g_ccd_create(struct gctl_req *req, struct g_class *mp)
+{
+ int *unit, *ileave, *nprovider;
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_provider *pp;
+ struct ccd_s *sc;
+ struct sbuf *sb;
+ char buf[20];
+ int i, error;
+
+ g_topology_assert();
+ unit = gctl_get_paraml(req, "unit", sizeof (*unit));
+ ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave));
+ nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider));
+
+ /* Check for duplicate unit */
+ LIST_FOREACH(gp, &mp->geom, geom) {
+ sc = gp->softc;
+ if (sc->sc_unit == *unit) {
+ gctl_error(req, "Unit %d already configured", *unit);
+ return;
+ }
+ }
+
+ if (*nprovider <= 0) {
+ gctl_error(req, "Bogus nprovider argument (= %d)", *nprovider);
+ return;
+ }
+
+ /* Check all providers are valid */
+ for (i = 0; i < *nprovider; i++) {
+ sprintf(buf, "provider%d", i);
+ pp = gctl_get_provider(req, buf);
+ if (pp == NULL)
+ return;
+ }
+
+ gp = g_new_geomf(mp, "ccd%d", *unit);
+ gp->start = g_ccd_start;
+ gp->orphan = g_ccd_orphan;
+ gp->access = g_ccd_access;
+ sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO);
+ gp->softc = sc;
+ sc->sc_ndisks = *nprovider;
+
+ /* Allocate space for the component info. */
+ sc->sc_cinfo = g_malloc(sc->sc_ndisks * sizeof(struct ccdcinfo),
+ M_WAITOK | M_ZERO);
+
+ /* Create consumers and attach to all providers */
+ for (i = 0; i < *nprovider; i++) {
+ sprintf(buf, "provider%d", i);
+ pp = gctl_get_provider(req, buf);
+ cp = g_new_consumer(gp);
+ error = g_attach(cp, pp);
+ KASSERT(error == 0, ("attach to %s failed", pp->name));
+ sc->sc_cinfo[i].ci_consumer = cp;
+ sc->sc_cinfo[i].ci_provider = pp;
+ }
+
+ sc->sc_unit = *unit;
+ sc->sc_ileave = *ileave;
+
+ if (gctl_get_param(req, "uniform", NULL))
+ sc->sc_flags |= CCDF_UNIFORM;
+ if (gctl_get_param(req, "mirror", NULL))
+ sc->sc_flags |= CCDF_MIRROR;
+
+ if (sc->sc_ileave == 0 && (sc->sc_flags & CCDF_MIRROR)) {
+ printf("%s: disabling mirror, interleave is 0\n", gp->name);
+ sc->sc_flags &= ~(CCDF_MIRROR);
+ }
+
+ if ((sc->sc_flags & CCDF_MIRROR) && !(sc->sc_flags & CCDF_UNIFORM)) {
+ printf("%s: mirror/parity forces uniform flag\n", gp->name);
+ sc->sc_flags |= CCDF_UNIFORM;
+ }
+
+ error = ccdinit(req, sc);
+ if (error != 0) {
+ g_ccd_freesc(sc);
+ gp->softc = NULL;
+ g_wither_geom(gp, ENXIO);
+ return;
+ }
+
+ pp = g_new_providerf(gp, "%s", gp->name);
+ pp->mediasize = sc->sc_size * (off_t)sc->sc_secsize;
+ pp->sectorsize = sc->sc_secsize;
+ g_error_provider(pp, 0);
+
+ sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
+ sbuf_clear(sb);
+ sbuf_printf(sb, "ccd%d: %d components ", sc->sc_unit, *nprovider);
+ for (i = 0; i < *nprovider; i++) {
+ sbuf_printf(sb, "%s%s",
+ i == 0 ? "(" : ", ",
+ sc->sc_cinfo[i].ci_provider->name);
+ }
+ sbuf_printf(sb, "), %jd blocks ", (off_t)pp->mediasize / DEV_BSIZE);
+ if (sc->sc_ileave != 0)
+ sbuf_printf(sb, "interleaved at %d blocks\n",
+ sc->sc_ileave);
+ else
+ sbuf_printf(sb, "concatenated\n");
+ sbuf_finish(sb);
+ gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
+ sbuf_delete(sb);
+}
+
+static void
+g_ccd_destroy(struct gctl_req *req, struct g_class *mp)
+{
+ struct g_geom *gp;
+ struct g_provider *pp;
+ struct ccd_s *sc;
+
+ g_topology_assert();
+ gp = gctl_get_geom(req, mp, "geom");
+ if (gp == NULL)
+ return;
+ sc = gp->softc;
+ pp = LIST_FIRST(&gp->provider);
+ if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
+ gctl_error(req, "%s is open(r%dw%de%d)", gp->name,
+ pp->acr, pp->acw, pp->ace);
+ return;
+ }
+ g_ccd_freesc(sc);
+ gp->softc = NULL;
+ g_wither_geom(gp, ENXIO);
+}
+
+static void
+g_ccd_list(struct gctl_req *req, struct g_class *mp)
+{
+ struct sbuf *sb;
+ struct ccd_s *cs;
+ struct g_geom *gp;
+ int i, unit, *up;
+
+ up = gctl_get_paraml(req, "unit", sizeof (int));
+ unit = *up;
+ sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
+ sbuf_clear(sb);
+ LIST_FOREACH(gp, &mp->geom, geom) {
+ cs = gp->softc;
+ if (unit >= 0 && unit != cs->sc_unit)
+ continue;
+ sbuf_printf(sb, "ccd%d\t\t%d\t%d\t",
+ cs->sc_unit, cs->sc_ileave, cs->sc_flags & CCDF_USERMASK);
+
+ for (i = 0; i < cs->sc_ndisks; ++i) {
+ sbuf_printf(sb, "%s/dev/%s", i == 0 ? "" : " ",
+ cs->sc_cinfo[i].ci_provider->name);
+ }
+ sbuf_printf(sb, "\n");
+ }
+ sbuf_finish(sb);
+ gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
+ sbuf_delete(sb);
+}
+
+static void
+g_ccd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
+{
+
+ g_topology_assert();
+ if (!strcmp(verb, "create geom")) {
+ g_ccd_create(req, mp);
+ } else if (!strcmp(verb, "destroy geom")) {
+ g_ccd_destroy(req, mp);
+ } else if (!strcmp(verb, "list")) {
+ g_ccd_list(req, mp);
+ } else {
+ gctl_error(req, "unknown verb");
+ }
+}
+
+static struct g_class g_ccd_class = {
+ .name = "CCD",
+ .ctlreq = g_ccd_config,
+};
+
+DECLARE_GEOM_CLASS(g_ccd_class, g_ccd);
diff --git a/sys/geom/geom_ctl.c b/sys/geom/geom_ctl.c
new file mode 100644
index 0000000..d543129
--- /dev/null
+++ b/sys/geom/geom_ctl.c
@@ -0,0 +1,495 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_geom.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/bio.h>
+#include <sys/conf.h>
+#include <sys/disk.h>
+#include <sys/malloc.h>
+#include <sys/sysctl.h>
+#include <sys/sbuf.h>
+
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+#include <geom/geom.h>
+#include <geom/geom_int.h>
+#define GCTL_TABLE 1
+#include <geom/geom_ctl.h>
+
+#include <machine/stdarg.h>
+
+static d_ioctl_t g_ctl_ioctl;
+
+static struct cdevsw g_ctl_cdevsw = {
+ .d_open = nullopen,
+ .d_close = nullclose,
+ .d_ioctl = g_ctl_ioctl,
+ .d_name = "g_ctl",
+};
+
+void
+g_ctl_init(void)
+{
+
+ make_dev(&g_ctl_cdevsw, 0,
+ UID_ROOT, GID_OPERATOR, 0640, PATH_GEOM_CTL);
+ KASSERT(GCTL_PARAM_RD == VM_PROT_READ,
+ ("GCTL_PARAM_RD != VM_PROT_READ"));
+ KASSERT(GCTL_PARAM_WR == VM_PROT_WRITE,
+ ("GCTL_PARAM_WR != VM_PROT_WRITE"));
+}
+
+/*
+ * Report an error back to the user in ascii format. Return whatever copyout
+ * returned, or EINVAL if it succeeded.
+ * XXX: should not be static.
+ * XXX: should take printf like args.
+ */
+int
+gctl_error(struct gctl_req *req, const char *fmt, ...)
+{
+ va_list ap;
+
+ if (req == NULL)
+ return (EINVAL);
+
+ /* We only record the first error */
+ if (req->nerror)
+ return (req->nerror);
+
+ va_start(ap, fmt);
+ sbuf_vprintf(req->serror, fmt, ap);
+ va_end(ap);
+ sbuf_finish(req->serror);
+ if (g_debugflags & G_F_CTLDUMP)
+ printf("gctl %p error \"%s\"\n", req, sbuf_data(req->serror));
+ req->nerror = copyout(sbuf_data(req->serror), req->error,
+ imin(req->lerror, sbuf_len(req->serror) + 1));
+ if (!req->nerror)
+ req->nerror = EINVAL;
+ return (req->nerror);
+}
+
+/*
+ * Allocate space and copyin() something.
+ * XXX: this should really be a standard function in the kernel.
+ */
+static void *
+geom_alloc_copyin(struct gctl_req *req, void *uaddr, size_t len)
+{
+ void *ptr;
+
+ ptr = g_malloc(len, M_WAITOK);
+ if (ptr == NULL)
+ req->nerror = ENOMEM;
+ else
+ req->nerror = copyin(uaddr, ptr, len);
+ if (!req->nerror)
+ return (ptr);
+ if (ptr != NULL)
+ g_free(ptr);
+ return (NULL);
+}
+
+static void
+gctl_copyin(struct gctl_req *req)
+{
+ int error, i;
+ struct gctl_req_arg *ap;
+ char *p;
+
+ ap = geom_alloc_copyin(req, req->arg, req->narg * sizeof(*ap));
+ if (ap == NULL) {
+ req->nerror = ENOMEM;
+ req->arg = NULL;
+ return;
+ }
+
+ /* Nothing have been copyin()'ed yet */
+ for (i = 0; i < req->narg; i++) {
+ ap[i].flag &= ~(GCTL_PARAM_NAMEKERNEL|GCTL_PARAM_VALUEKERNEL);
+ ap[i].flag &= ~GCTL_PARAM_CHANGED;
+ ap[i].kvalue = NULL;
+ }
+
+ error = 0;
+ for (i = 0; i < req->narg; i++) {
+ if (ap[i].nlen < 1 || ap[i].nlen > SPECNAMELEN) {
+ error = gctl_error(req,
+ "wrong param name length %d: %d", i, ap[i].nlen);
+ break;
+ }
+ p = geom_alloc_copyin(req, ap[i].name, ap[i].nlen);
+ if (p == NULL)
+ break;
+ if (p[ap[i].nlen - 1] != '\0') {
+ error = gctl_error(req, "unterminated param name");
+ g_free(p);
+ break;
+ }
+ ap[i].name = p;
+ ap[i].flag |= GCTL_PARAM_NAMEKERNEL;
+ if (ap[i].len < 0) {
+ error = gctl_error(req, "negative param length");
+ break;
+ }
+ if (ap[i].len == 0) {
+ ap[i].kvalue = ap[i].value;
+ ap[i].flag |= GCTL_PARAM_VALUEKERNEL;
+ continue;
+ }
+ p = geom_alloc_copyin(req, ap[i].value, ap[i].len);
+ if (p == NULL)
+ break;
+ if ((ap[i].flag & GCTL_PARAM_ASCII) &&
+ p[ap[i].len - 1] != '\0') {
+ error = gctl_error(req, "unterminated param value");
+ g_free(p);
+ break;
+ }
+ ap[i].kvalue = p;
+ ap[i].flag |= GCTL_PARAM_VALUEKERNEL;
+ }
+ req->arg = ap;
+ return;
+}
+
+static void
+gctl_copyout(struct gctl_req *req)
+{
+ int error, i;
+ struct gctl_req_arg *ap;
+
+ if (req->nerror)
+ return;
+ error = 0;
+ ap = req->arg;
+ for (i = 0; i < req->narg; i++, ap++) {
+ if (!(ap->flag & GCTL_PARAM_CHANGED))
+ continue;
+ error = copyout(ap->kvalue, ap->value, ap->len);
+ if (!error)
+ continue;
+ req->nerror = error;
+ return;
+ }
+ return;
+}
+
+static void
+gctl_free(struct gctl_req *req)
+{
+ int i;
+
+ if (req->arg == NULL)
+ return;
+ for (i = 0; i < req->narg; i++) {
+ if (req->arg[i].flag & GCTL_PARAM_NAMEKERNEL)
+ g_free(req->arg[i].name);
+ if ((req->arg[i].flag & GCTL_PARAM_VALUEKERNEL) &&
+ req->arg[i].len > 0)
+ g_free(req->arg[i].kvalue);
+ }
+ g_free(req->arg);
+ sbuf_delete(req->serror);
+}
+
+static void
+gctl_dump(struct gctl_req *req)
+{
+ u_int i;
+ int j;
+ struct gctl_req_arg *ap;
+
+ printf("Dump of gctl request at %p:\n", req);
+ if (req->nerror > 0) {
+ printf(" nerror:\t%d\n", req->nerror);
+ if (sbuf_len(req->serror) > 0)
+ printf(" error:\t\"%s\"\n", sbuf_data(req->serror));
+ }
+ for (i = 0; i < req->narg; i++) {
+ ap = &req->arg[i];
+ if (!(ap->flag & GCTL_PARAM_NAMEKERNEL))
+ printf(" param:\t%d@%p", ap->nlen, ap->name);
+ else
+ printf(" param:\t\"%s\"", ap->name);
+ printf(" [%s%s%d] = ",
+ ap->flag & GCTL_PARAM_RD ? "R" : "",
+ ap->flag & GCTL_PARAM_WR ? "W" : "",
+ ap->len);
+ if (!(ap->flag & GCTL_PARAM_VALUEKERNEL)) {
+ printf(" =@ %p", ap->value);
+ } else if (ap->flag & GCTL_PARAM_ASCII) {
+ printf("\"%s\"", (char *)ap->kvalue);
+ } else if (ap->len > 0) {
+ for (j = 0; j < ap->len; j++)
+ printf(" %02x", ((u_char *)ap->kvalue)[j]);
+ } else {
+ printf(" = %p", ap->kvalue);
+ }
+ printf("\n");
+ }
+}
+
+void
+gctl_set_param(struct gctl_req *req, const char *param, void const *ptr, int len)
+{
+ int i;
+ struct gctl_req_arg *ap;
+
+ for (i = 0; i < req->narg; i++) {
+ ap = &req->arg[i];
+ if (strcmp(param, ap->name))
+ continue;
+ if (!(ap->flag & GCTL_PARAM_WR)) {
+ gctl_error(req, "No write access %s argument", param);
+ return;
+ }
+ if (ap->len < len) {
+ gctl_error(req, "Wrong length %s argument", param);
+ return;
+ }
+ bcopy(ptr, ap->kvalue, len);
+ ap->flag |= GCTL_PARAM_CHANGED;
+ return;
+ }
+ gctl_error(req, "Missing %s argument", param);
+ return;
+}
+
+void *
+gctl_get_param(struct gctl_req *req, const char *param, int *len)
+{
+ int i;
+ void *p;
+ struct gctl_req_arg *ap;
+
+ for (i = 0; i < req->narg; i++) {
+ ap = &req->arg[i];
+ if (strcmp(param, ap->name))
+ continue;
+ if (!(ap->flag & GCTL_PARAM_RD))
+ continue;
+ p = ap->kvalue;
+ if (len != NULL)
+ *len = ap->len;
+ return (p);
+ }
+ return (NULL);
+}
+
+char const *
+gctl_get_asciiparam(struct gctl_req *req, const char *param)
+{
+ int i;
+ char const *p;
+ struct gctl_req_arg *ap;
+
+ for (i = 0; i < req->narg; i++) {
+ ap = &req->arg[i];
+ if (strcmp(param, ap->name))
+ continue;
+ if (!(ap->flag & GCTL_PARAM_RD))
+ continue;
+ p = ap->kvalue;
+ if (ap->len < 1) {
+ gctl_error(req, "No length argument (%s)", param);
+ return (NULL);
+ }
+ if (p[ap->len - 1] != '\0') {
+ gctl_error(req, "Unterminated argument (%s)", param);
+ return (NULL);
+ }
+ return (p);
+ }
+ return (NULL);
+}
+
+void *
+gctl_get_paraml(struct gctl_req *req, const char *param, int len)
+{
+ int i;
+ void *p;
+
+ p = gctl_get_param(req, param, &i);
+ if (p == NULL)
+ gctl_error(req, "Missing %s argument", param);
+ else if (i != len) {
+ p = NULL;
+ gctl_error(req, "Wrong length %s argument", param);
+ }
+ return (p);
+}
+
+struct g_class *
+gctl_get_class(struct gctl_req *req, char const *arg)
+{
+ char const *p;
+ struct g_class *cp;
+
+ p = gctl_get_asciiparam(req, arg);
+ if (p == NULL)
+ return (NULL);
+ LIST_FOREACH(cp, &g_classes, class) {
+ if (!strcmp(p, cp->name))
+ return (cp);
+ }
+ gctl_error(req, "Class not found");
+ return (NULL);
+}
+
+struct g_geom *
+gctl_get_geom(struct gctl_req *req, struct g_class *mpr, char const *arg)
+{
+ char const *p;
+ struct g_class *mp;
+ struct g_geom *gp;
+
+ p = gctl_get_asciiparam(req, arg);
+ if (p != NULL) {
+ LIST_FOREACH(mp, &g_classes, class) {
+ if (mpr != NULL && mpr != mp)
+ continue;
+ LIST_FOREACH(gp, &mp->geom, geom) {
+ if (!strcmp(p, gp->name))
+ return (gp);
+ }
+ }
+ }
+ gctl_error(req, "Geom not found");
+ return (NULL);
+}
+
+struct g_provider *
+gctl_get_provider(struct gctl_req *req, char const *arg)
+{
+ char const *p;
+ struct g_provider *pp;
+
+ p = gctl_get_asciiparam(req, arg);
+ if (p == NULL)
+ return (NULL);
+ pp = g_provider_by_name(p);
+ if (pp != NULL)
+ return (pp);
+ gctl_error(req, "Provider not found");
+ return (NULL);
+}
+
+static void
+g_ctl_req(void *arg, int flag __unused)
+{
+ struct g_class *mp;
+ struct gctl_req *req;
+ char const *verb;
+
+ g_topology_assert();
+ req = arg;
+ mp = gctl_get_class(req, "class");
+ if (mp == NULL) {
+ gctl_error(req, "Class not found");
+ return;
+ }
+ verb = gctl_get_param(req, "verb", NULL);
+ if (mp->ctlreq == NULL)
+ gctl_error(req, "Class takes no requests");
+ else
+ mp->ctlreq(req, mp, verb);
+ g_topology_assert();
+}
+
+
+static int
+g_ctl_ioctl_ctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
+{
+ struct gctl_req *req;
+
+ req = (void *)data;
+ req->nerror = 0;
+ req->serror = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
+ /* It is an error if we cannot return an error text */
+ if (req->lerror < 2)
+ return (EINVAL);
+ if (!useracc(req->error, req->lerror, VM_PROT_WRITE))
+ return (EINVAL);
+
+ /* Check the version */
+ if (req->version != GCTL_VERSION)
+ return (gctl_error(req,
+ "kernel and libgeom version mismatch."));
+
+ /* Get things on board */
+ gctl_copyin(req);
+
+ if (g_debugflags & G_F_CTLDUMP)
+ gctl_dump(req);
+
+ if (!req->nerror) {
+ g_waitfor_event(g_ctl_req, req, M_WAITOK, NULL);
+ gctl_copyout(req);
+ }
+
+ gctl_free(req);
+ return (req->nerror);
+}
+
+static int
+g_ctl_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
+{
+ int error;
+
+ switch(cmd) {
+ case GEOM_CTL:
+ error = g_ctl_ioctl_ctl(dev, cmd, data, fflag, td);
+ break;
+ default:
+ error = ENOIOCTL;
+ break;
+ }
+ return (error);
+
+}
diff --git a/sys/geom/geom_ctl.h b/sys/geom/geom_ctl.h
new file mode 100644
index 0000000..fd68bda
--- /dev/null
+++ b/sys/geom/geom_ctl.h
@@ -0,0 +1,82 @@
+/*-
+ * Copyright (c) 2003 Poul-Henning Kamp
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _GEOM_GEOM_CTL_H_
+#define _GEOM_GEOM_CTL_H_
+
+#include <sys/ioccom.h>
+
+/*
+ * Version number. Used to check consistency between kernel and libgeom.
+ */
+#define GCTL_VERSION 2
+
+struct gctl_req_arg {
+ u_int nlen;
+ char *name;
+ off_t offset;
+ int flag;
+ int len;
+ void *value;
+ /* kernel only fields */
+ void *kvalue;
+};
+
+#define GCTL_PARAM_RD 1 /* Must match VM_PROT_READ */
+#define GCTL_PARAM_WR 2 /* Must match VM_PROT_WRITE */
+#define GCTL_PARAM_RW (GCTL_PARAM_RD | GCTL_PARAM_WR)
+#define GCTL_PARAM_ASCII 4
+
+/* These are used in the kernel only */
+#define GCTL_PARAM_NAMEKERNEL 8
+#define GCTL_PARAM_VALUEKERNEL 16
+#define GCTL_PARAM_CHANGED 32
+
+struct gctl_req {
+ u_int version;
+ u_int serial;
+ u_int narg;
+ struct gctl_req_arg *arg;
+ u_int lerror;
+ char *error;
+ struct gctl_req_table *reqt;
+
+ /* kernel only fields */
+ int nerror;
+ struct sbuf *serror;
+};
+
+#define GEOM_CTL _IOW('G', GCTL_VERSION, struct gctl_req)
+
+#define PATH_GEOM_CTL "geom.ctl"
+
+
+#endif /* _GEOM_GEOM_CTL_H_ */
diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c
new file mode 100644
index 0000000..2dc713a
--- /dev/null
+++ b/sys/geom/geom_dev.c
@@ -0,0 +1,468 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/bio.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/disk.h>
+#include <sys/fcntl.h>
+#include <sys/limits.h>
+#include <geom/geom.h>
+#include <geom/geom_int.h>
+
+static d_open_t g_dev_open;
+static d_close_t g_dev_close;
+static d_strategy_t g_dev_strategy;
+static d_ioctl_t g_dev_ioctl;
+
+static struct cdevsw g_dev_cdevsw = {
+ .d_open = g_dev_open,
+ .d_close = g_dev_close,
+ .d_read = physread,
+ .d_write = physwrite,
+ .d_ioctl = g_dev_ioctl,
+ .d_strategy = g_dev_strategy,
+ .d_name = "g_dev",
+ .d_maj = GEOM_MAJOR,
+ .d_flags = D_DISK | D_TRACKCLOSE,
+};
+
+static g_taste_t g_dev_taste;
+static g_orphan_t g_dev_orphan;
+
+static struct g_class g_dev_class = {
+ .name = "DEV",
+ .taste = g_dev_taste,
+};
+
+void
+g_dev_print(void)
+{
+ struct g_geom *gp;
+ char const *p = "";
+
+ LIST_FOREACH(gp, &g_dev_class.geom, geom) {
+ printf("%s%s", p, gp->name);
+ p = " ";
+ }
+ printf("\n");
+}
+
+/*
+ * XXX: This is disgusting and wrong in every way imaginable: The only reason
+ * XXX: we have a clone function is because of the root-mount hack we currently
+ * XXX: employ. An improvment would be to unregister this cloner once we know
+ * XXX: we no longer need it. Ideally, root-fs would be mounted through DEVFS
+ * XXX: eliminating the need for this hack.
+ */
+static void
+g_dev_clone(void *arg __unused, char *name, int namelen __unused, dev_t *dev)
+{
+ struct g_geom *gp;
+
+ if (*dev != NODEV)
+ return;
+
+ g_waitidle();
+
+ /* g_topology_lock(); */
+ LIST_FOREACH(gp, &g_dev_class.geom, geom) {
+ if (strcmp(gp->name, name))
+ continue;
+ *dev = gp->softc;
+ g_trace(G_T_TOPOLOGY, "g_dev_clone(%s) = %p", name, *dev);
+ return;
+ }
+ /* g_topology_unlock(); */
+ return;
+}
+
+static void
+g_dev_register_cloner(void *foo __unused)
+{
+ static int once;
+
+ /* XXX: why would this happen more than once ?? */
+ if (!once) {
+ EVENTHANDLER_REGISTER(dev_clone, g_dev_clone, 0, 1000);
+ once++;
+ }
+}
+
+SYSINIT(geomdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,g_dev_register_cloner,NULL);
+
+static struct g_geom *
+g_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ static int unit = GEOM_MINOR_PROVIDERS;
+ int error;
+ dev_t dev;
+
+ g_trace(G_T_TOPOLOGY, "dev_taste(%s,%s)", mp->name, pp->name);
+ g_topology_assert();
+ LIST_FOREACH(cp, &pp->consumers, consumers)
+ if (cp->geom->class == mp)
+ return (NULL);
+ gp = g_new_geomf(mp, pp->name);
+ gp->orphan = g_dev_orphan;
+ cp = g_new_consumer(gp);
+ error = g_attach(cp, pp);
+ KASSERT(error == 0,
+ ("g_dev_taste(%s) failed to g_attach, err=%d", pp->name, error));
+ /*
+ * XXX: I'm not 100% sure we can call make_dev(9) without Giant
+ * yet. Once we can, we don't need to drop topology here either.
+ */
+ g_topology_unlock();
+ mtx_lock(&Giant);
+ dev = make_dev(&g_dev_cdevsw, unit2minor(unit++),
+ UID_ROOT, GID_OPERATOR, 0640, gp->name);
+ if (pp->flags & G_PF_CANDELETE)
+ dev->si_flags |= SI_CANDELETE;
+ mtx_unlock(&Giant);
+ g_topology_lock();
+ dev->si_iosize_max = MAXPHYS;
+ dev->si_stripesize = pp->stripesize;
+ dev->si_stripeoffset = pp->stripeoffset;
+ gp->softc = dev;
+ dev->si_drv1 = gp;
+ dev->si_drv2 = cp;
+ return (gp);
+}
+
+static int
+g_dev_open(dev_t dev, int flags, int fmt, struct thread *td)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ int error, r, w, e;
+
+ gp = dev->si_drv1;
+ cp = dev->si_drv2;
+ if (gp == NULL || cp == NULL || gp->softc != dev)
+ return(ENXIO); /* g_dev_taste() not done yet */
+
+ g_trace(G_T_ACCESS, "g_dev_open(%s, %d, %d, %p)",
+ gp->name, flags, fmt, td);
+ r = flags & FREAD ? 1 : 0;
+ w = flags & FWRITE ? 1 : 0;
+#ifdef notyet
+ e = flags & O_EXCL ? 1 : 0;
+#else
+ e = 0;
+#endif
+ DROP_GIANT();
+ g_topology_lock();
+ if (dev->si_devsw == NULL)
+ error = ENXIO; /* We were orphaned */
+ else
+ error = g_access_rel(cp, r, w, e);
+ g_topology_unlock();
+ PICKUP_GIANT();
+ g_waitidle();
+ if (!error)
+ dev->si_bsize_phys = cp->provider->sectorsize;
+ return(error);
+}
+
+static int
+g_dev_close(dev_t dev, int flags, int fmt, struct thread *td)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ int error, r, w, e, i;
+
+ gp = dev->si_drv1;
+ cp = dev->si_drv2;
+ if (gp == NULL || cp == NULL)
+ return(ENXIO);
+ g_trace(G_T_ACCESS, "g_dev_close(%s, %d, %d, %p)",
+ gp->name, flags, fmt, td);
+ r = flags & FREAD ? -1 : 0;
+ w = flags & FWRITE ? -1 : 0;
+#ifdef notyet
+ e = flags & O_EXCL ? -1 : 0;
+#else
+ e = 0;
+#endif
+ DROP_GIANT();
+ g_topology_lock();
+ if (dev->si_devsw == NULL)
+ error = ENXIO; /* We were orphaned */
+ else
+ error = g_access_rel(cp, r, w, e);
+ for (i = 0; i < 10 * hz;) {
+ if (cp->acr != 0 || cp->acw != 0)
+ break;
+ if (cp->nstart == cp->nend)
+ break;
+ tsleep(&i, PRIBIO, "gdevwclose", hz / 10);
+ i += hz / 10;
+ }
+ if (cp->acr == 0 && cp->acw == 0 && cp->nstart != cp->nend) {
+ printf("WARNING: Final close of geom_dev(%s) %s %s",
+ gp->name,
+ "still has outstanding I/O after 10 seconds.",
+ "Completing close anyway, panic may happen later.");
+ }
+ g_topology_unlock();
+ PICKUP_GIANT();
+ g_waitidle();
+ return (error);
+}
+
+/*
+ * XXX: Until we have unmessed the ioctl situation, there is a race against
+ * XXX: a concurrent orphanization. We cannot close it by holding topology
+ * XXX: since that would prevent us from doing our job, and stalling events
+ * XXX: will break (actually: stall) the BSD disklabel hacks.
+ */
+static int
+g_dev_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_kerneldump kd;
+ int i, error;
+ u_int u;
+ struct g_ioctl *gio;
+
+ gp = dev->si_drv1;
+ cp = dev->si_drv2;
+ gio = NULL;
+
+ error = 0;
+ KASSERT(cp->acr || cp->acw,
+ ("Consumer with zero access count in g_dev_ioctl"));
+ DROP_GIANT();
+
+ gio = NULL;
+ i = IOCPARM_LEN(cmd);
+ switch (cmd) {
+ case DIOCGSECTORSIZE:
+ *(u_int *)data = cp->provider->sectorsize;
+ if (*(u_int *)data == 0)
+ error = ENOENT;
+ break;
+ case DIOCGMEDIASIZE:
+ *(off_t *)data = cp->provider->mediasize;
+ if (*(off_t *)data == 0)
+ error = ENOENT;
+ break;
+ case DIOCGFWSECTORS:
+ error = g_io_getattr("GEOM::fwsectors", cp, &i, data);
+ if (error == 0 && *(u_int *)data == 0)
+ error = ENOENT;
+ break;
+ case DIOCGFWHEADS:
+ error = g_io_getattr("GEOM::fwheads", cp, &i, data);
+ if (error == 0 && *(u_int *)data == 0)
+ error = ENOENT;
+ break;
+ case DIOCGFRONTSTUFF:
+ error = g_io_getattr("GEOM::frontstuff", cp, &i, data);
+ break;
+ case DIOCSKERNELDUMP:
+ u = *((u_int *)data);
+ if (!u) {
+ set_dumper(NULL);
+ error = 0;
+ break;
+ }
+ kd.offset = 0;
+ kd.length = OFF_MAX;
+ i = sizeof kd;
+ error = g_io_getattr("GEOM::kerneldump", cp, &i, &kd);
+ if (!error)
+ dev->si_flags |= SI_DUMPDEV;
+ break;
+
+ default:
+ gio = g_malloc(sizeof *gio, M_WAITOK | M_ZERO);
+ gio->cmd = cmd;
+ gio->data = data;
+ gio->fflag = fflag;
+ gio->td = td;
+ i = sizeof *gio;
+ /*
+ * We always issue ioctls as getattr since the direction of data
+ * movement in ioctl is no indication of the ioctl being a "set"
+ * or "get" type ioctl or if such simplistic terms even apply
+ */
+ error = g_io_getattr("GEOM::ioctl", cp, &i, gio);
+ break;
+ }
+
+ PICKUP_GIANT();
+ if (error == EDIRIOCTL) {
+ KASSERT(gio != NULL, ("NULL gio but EDIRIOCTL"));
+ KASSERT(gio->func != NULL, ("NULL function but EDIRIOCTL"));
+ error = (gio->func)(gio->dev, cmd, data, fflag, td);
+ }
+ g_waitidle();
+ if (gio != NULL && (error == EOPNOTSUPP || error == ENOIOCTL)) {
+ if (g_debugflags & G_T_TOPOLOGY) {
+ i = IOCGROUP(cmd);
+ printf("IOCTL(0x%lx) \"%s\"", cmd, gp->name);
+ if (i > ' ' && i <= '~')
+ printf(" '%c'", (int)IOCGROUP(cmd));
+ else
+ printf(" 0x%lx", IOCGROUP(cmd));
+ printf("/%ld ", cmd & 0xff);
+ if (cmd & IOC_IN)
+ printf("I");
+ if (cmd & IOC_OUT)
+ printf("O");
+ printf("(%ld) = ENOIOCTL\n", IOCPARM_LEN(cmd));
+ }
+ error = ENOTTY;
+ }
+ if (gio != NULL)
+ g_free(gio);
+ return (error);
+}
+
+static void
+g_dev_done(struct bio *bp2)
+{
+ struct bio *bp;
+
+ bp = bp2->bio_parent;
+ bp->bio_error = bp2->bio_error;
+ if (bp->bio_error != 0) {
+ g_trace(G_T_BIO, "g_dev_done(%p) had error %d",
+ bp2, bp->bio_error);
+ bp->bio_flags |= BIO_ERROR;
+ } else {
+ g_trace(G_T_BIO, "g_dev_done(%p/%p) resid %ld completed %jd",
+ bp2, bp, bp->bio_resid, (intmax_t)bp2->bio_completed);
+ }
+ bp->bio_resid = bp->bio_bcount - bp2->bio_completed;
+ g_destroy_bio(bp2);
+ mtx_lock(&Giant);
+ biodone(bp);
+ mtx_unlock(&Giant);
+}
+
+static void
+g_dev_strategy(struct bio *bp)
+{
+ struct g_consumer *cp;
+ struct bio *bp2;
+ dev_t dev;
+
+ KASSERT(bp->bio_cmd == BIO_READ ||
+ bp->bio_cmd == BIO_WRITE ||
+ bp->bio_cmd == BIO_DELETE,
+ ("Wrong bio_cmd bio=%p cmd=%d", bp, bp->bio_cmd));
+ dev = bp->bio_dev;
+ cp = dev->si_drv2;
+ KASSERT(cp->acr || cp->acw,
+ ("Consumer with zero access count in g_dev_strategy"));
+
+ bp2 = g_clone_bio(bp);
+ KASSERT(bp2 != NULL, ("XXX: ENOMEM in a bad place"));
+ bp2->bio_offset = (off_t)bp->bio_blkno << DEV_BSHIFT;
+ KASSERT(bp2->bio_offset >= 0,
+ ("Negative bio_offset (%jd) on bio %p",
+ (intmax_t)bp2->bio_offset, bp));
+ bp2->bio_length = (off_t)bp->bio_bcount;
+ bp2->bio_done = g_dev_done;
+ g_trace(G_T_BIO,
+ "g_dev_strategy(%p/%p) offset %jd length %jd data %p cmd %d",
+ bp, bp2, (intmax_t)bp->bio_offset, (intmax_t)bp2->bio_length,
+ bp2->bio_data, bp2->bio_cmd);
+ g_io_request(bp2, cp);
+ KASSERT(cp->acr || cp->acw,
+ ("g_dev_strategy raced with g_dev_close and lost"));
+
+}
+
+/*
+ * g_dev_orphan()
+ *
+ * Called from below when the provider orphaned us.
+ * - Clear any dump settings.
+ * - Destroy the dev_t to prevent any more request from coming in. The
+ * provider is already marked with an error, so anything which comes in
+ * in the interrim will be returned immediately.
+ * - Wait for any outstanding I/O to finish.
+ * - Set our access counts to zero, whatever they were.
+ * - Detach and self-destruct.
+ */
+
+static void
+g_dev_orphan(struct g_consumer *cp)
+{
+ struct g_geom *gp;
+ dev_t dev;
+
+ g_topology_assert();
+ gp = cp->geom;
+ dev = gp->softc;
+ g_trace(G_T_TOPOLOGY, "g_dev_orphan(%p(%s))", cp, gp->name);
+
+ /* Reset any dump-area set on this device */
+ if (dev->si_flags & SI_DUMPDEV)
+ set_dumper(NULL);
+
+ /* Destroy the dev_t so we get no more requests */
+ destroy_dev(dev);
+
+ /* Wait for the cows to come home */
+ while (cp->nstart != cp->nend)
+ msleep(&dev, NULL, PRIBIO, "gdevorphan", hz / 10);
+
+ if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
+ g_access_rel(cp, -cp->acr, -cp->acw, -cp->ace);
+
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ g_destroy_geom(gp);
+}
+
+DECLARE_GEOM_CLASS(g_dev_class, g_dev);
diff --git a/sys/geom/geom_disk.c b/sys/geom/geom_disk.c
new file mode 100644
index 0000000..9b5f79e
--- /dev/null
+++ b/sys/geom/geom_disk.c
@@ -0,0 +1,419 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_geom.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/bio.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/malloc.h>
+#include <sys/sysctl.h>
+#include <sys/devicestat.h>
+#include <machine/md_var.h>
+
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <geom/geom.h>
+#include <geom/geom_disk.h>
+#include <geom/geom_int.h>
+
+static struct mtx g_disk_done_mtx;
+
+static g_access_t g_disk_access;
+static g_init_t g_disk_init;
+static g_fini_t g_disk_fini;
+
+struct g_class g_disk_class = {
+ .name = "DISK",
+ .init = g_disk_init,
+ .fini = g_disk_fini,
+};
+
+static void
+g_disk_init(struct g_class *mp __unused)
+{
+
+ mtx_init(&g_disk_done_mtx, "g_disk_done", MTX_DEF, 0);
+}
+
+static void
+g_disk_fini(struct g_class *mp __unused)
+{
+
+ mtx_destroy(&g_disk_done_mtx);
+}
+
+DECLARE_GEOM_CLASS(g_disk_class, g_disk);
+
+static void __inline
+g_disk_lock_giant(struct disk *dp)
+{
+ if (dp->d_flags & DISKFLAG_NOGIANT)
+ return;
+ mtx_lock(&Giant);
+}
+
+static void __inline
+g_disk_unlock_giant(struct disk *dp)
+{
+ if (dp->d_flags & DISKFLAG_NOGIANT)
+ return;
+ mtx_unlock(&Giant);
+}
+
+static int
+g_disk_access(struct g_provider *pp, int r, int w, int e)
+{
+ struct disk *dp;
+ int error;
+
+ g_trace(G_T_ACCESS, "g_disk_access(%s, %d, %d, %d)",
+ pp->name, r, w, e);
+ g_topology_assert();
+ r += pp->acr;
+ w += pp->acw;
+ e += pp->ace;
+ dp = pp->geom->softc;
+ if (dp == NULL)
+ return (ENXIO);
+ error = 0;
+ if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) {
+ if (dp->d_open != NULL) {
+ g_disk_lock_giant(dp);
+ error = dp->d_open(dp);
+ if (error != 0)
+ printf("Opened disk %s -> %d\n",
+ pp->name, error);
+ g_disk_unlock_giant(dp);
+ }
+ pp->mediasize = dp->d_mediasize;
+ pp->sectorsize = dp->d_sectorsize;
+ dp->d_flags |= DISKFLAG_OPEN;
+ if (dp->d_maxsize == 0) {
+ printf("WARNING: Disk drive %s%d has no d_maxsize\n",
+ dp->d_name, dp->d_unit);
+ dp->d_maxsize = DFLTPHYS;
+ }
+ } else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) {
+ if (dp->d_close != NULL) {
+ g_disk_lock_giant(dp);
+ error = dp->d_close(dp);
+ if (error != 0)
+ printf("Closed disk %s -> %d\n",
+ pp->name, error);
+ g_disk_unlock_giant(dp);
+ }
+ dp->d_flags &= ~DISKFLAG_OPEN;
+ }
+ return (error);
+}
+
+static void
+g_disk_kerneldump(struct bio *bp, struct disk *dp)
+{
+ int error;
+ struct g_kerneldump *gkd;
+ struct dumperinfo di;
+ struct g_geom *gp;
+
+ gkd = (struct g_kerneldump*)bp->bio_data;
+ gp = bp->bio_to->geom;
+ g_trace(G_T_TOPOLOGY, "g_disk_kernedump(%s, %jd, %jd)",
+ gp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length);
+ di.dumper = dp->d_dump;
+ di.priv = dp;
+ di.blocksize = dp->d_sectorsize;
+ di.mediaoffset = gkd->offset;
+ di.mediasize = gkd->length;
+ error = set_dumper(&di);
+ g_io_deliver(bp, error);
+}
+
+static void
+g_disk_done(struct bio *bp)
+{
+ struct bio *bp2;
+ struct disk *dp;
+
+ /* See "notes" for why we need a mutex here */
+ /* XXX: will witness accept a mix of Giant/unGiant drivers here ? */
+ mtx_lock(&g_disk_done_mtx);
+ bp->bio_completed = bp->bio_length - bp->bio_resid;
+
+ bp2 = bp->bio_parent;
+ dp = bp2->bio_to->geom->softc;
+ if (bp2->bio_error == 0)
+ bp2->bio_error = bp->bio_error;
+ bp2->bio_completed += bp->bio_completed;
+ g_destroy_bio(bp);
+ bp2->bio_inbed++;
+ if (bp2->bio_children == bp2->bio_inbed) {
+ bp2->bio_resid = bp2->bio_bcount - bp2->bio_completed;
+ devstat_end_transaction_bio(dp->d_devstat, bp2);
+ g_io_deliver(bp2, bp2->bio_error);
+ }
+ mtx_unlock(&g_disk_done_mtx);
+}
+
+static void
+g_disk_start(struct bio *bp)
+{
+ struct bio *bp2, *bp3;
+ struct disk *dp;
+ struct g_ioctl *gio;
+ int error;
+ off_t off;
+
+ dp = bp->bio_to->geom->softc;
+ if (dp == NULL)
+ g_io_deliver(bp, ENXIO);
+ error = EJUSTRETURN;
+ switch(bp->bio_cmd) {
+ case BIO_DELETE:
+ if (!(dp->d_flags & DISKFLAG_CANDELETE)) {
+ error = 0;
+ break;
+ }
+ /* fall-through */
+ case BIO_READ:
+ case BIO_WRITE:
+ off = 0;
+ bp3 = NULL;
+ bp2 = g_clone_bio(bp);
+ if (bp2 == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ devstat_start_transaction_bio(dp->d_devstat, bp);
+ do {
+ bp2->bio_offset += off;
+ bp2->bio_length -= off;
+ bp2->bio_data += off;
+ if (bp2->bio_length > dp->d_maxsize) {
+ /*
+ * XXX: If we have a stripesize we should really
+ * use it here.
+ */
+ bp2->bio_length = dp->d_maxsize;
+ off += dp->d_maxsize;
+ /*
+ * To avoid a race, we need to grab the next bio
+ * before we schedule this one. See "notes".
+ */
+ bp3 = g_clone_bio(bp);
+ if (bp3 == NULL)
+ bp->bio_error = ENOMEM;
+ }
+ bp2->bio_done = g_disk_done;
+ bp2->bio_blkno = bp2->bio_offset >> DEV_BSHIFT;
+ bp2->bio_pblkno = bp2->bio_offset / dp->d_sectorsize;
+ bp2->bio_bcount = bp2->bio_length;
+ bp2->bio_disk = dp;
+ g_disk_lock_giant(dp);
+ dp->d_strategy(bp2);
+ g_disk_unlock_giant(dp);
+ bp2 = bp3;
+ bp3 = NULL;
+ } while (bp2 != NULL);
+ break;
+ case BIO_GETATTR:
+ if (g_handleattr_int(bp, "GEOM::fwsectors", dp->d_fwsectors))
+ break;
+ else if (g_handleattr_int(bp, "GEOM::fwheads", dp->d_fwheads))
+ break;
+ else if (g_handleattr_off_t(bp, "GEOM::frontstuff", 0))
+ break;
+ else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump"))
+ g_disk_kerneldump(bp, dp);
+ else if ((g_debugflags & G_F_DISKIOCTL) &&
+ (dp->d_ioctl != NULL) &&
+ !strcmp(bp->bio_attribute, "GEOM::ioctl") &&
+ bp->bio_length == sizeof *gio) {
+ gio = (struct g_ioctl *)bp->bio_data;
+ gio->dev = dp;
+ gio->func = (d_ioctl_t *)(dp->d_ioctl);
+ error = EDIRIOCTL;
+ } else
+ error = ENOIOCTL;
+ break;
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+ if (error != EJUSTRETURN)
+ g_io_deliver(bp, error);
+ return;
+}
+
+static void
+g_disk_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp)
+{
+ struct disk *dp;
+
+ dp = gp->softc;
+ if (indent == NULL) {
+ sbuf_printf(sb, " hd %u", dp->d_fwheads);
+ sbuf_printf(sb, " sc %u", dp->d_fwsectors);
+ return;
+ }
+ if (pp != NULL) {
+ sbuf_printf(sb, "%s<fwheads>%u</fwheads>\n",
+ indent, dp->d_fwheads);
+ sbuf_printf(sb, "%s<fwsectors>%u</fwsectors>\n",
+ indent, dp->d_fwsectors);
+ }
+}
+
+static void
+g_disk_create(void *arg, int flag)
+{
+ struct g_geom *gp;
+ struct g_provider *pp;
+ struct disk *dp;
+
+ if (flag == EV_CANCEL)
+ return;
+ g_topology_assert();
+ dp = arg;
+ gp = g_new_geomf(&g_disk_class, "%s%d", dp->d_name, dp->d_unit);
+ gp->start = g_disk_start;
+ gp->access = g_disk_access;
+ gp->softc = dp;
+ gp->dumpconf = g_disk_dumpconf;
+ pp = g_new_providerf(gp, "%s", gp->name);
+ pp->mediasize = dp->d_mediasize;
+ pp->sectorsize = dp->d_sectorsize;
+ if (dp->d_flags & DISKFLAG_CANDELETE)
+ pp->flags |= G_PF_CANDELETE;
+ pp->stripeoffset = dp->d_stripeoffset;
+ pp->stripesize = dp->d_stripesize;
+ if (bootverbose)
+ printf("GEOM: new disk %s\n", gp->name);
+ dp->d_geom = gp;
+ g_error_provider(pp, 0);
+}
+
+
+
+void
+disk_create(int unit, struct disk *dp, int flags, void *unused __unused, void * unused2 __unused)
+{
+
+ dp->d_unit = unit;
+ dp->d_flags = flags;
+ KASSERT(dp->d_strategy != NULL, ("disk_create need d_strategy"));
+ KASSERT(dp->d_name != NULL, ("disk_create need d_name"));
+ KASSERT(*dp->d_name != 0, ("disk_create need d_name"));
+ KASSERT(strlen(dp->d_name) < SPECNAMELEN - 4, ("disk name too long"));
+ dp->d_devstat = devstat_new_entry(dp->d_name, dp->d_unit,
+ dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED,
+ DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
+ dp->d_geom = NULL;
+ g_post_event(g_disk_create, dp, M_WAITOK, dp, NULL);
+}
+
+/*
+ * XXX: There is a race if disk_destroy() is called while the g_disk_create()
+ * XXX: event is running. I belive the current result is that disk_destroy()
+ * XXX: actually doesn't do anything. Considering that the driver owns the
+ * XXX: struct disk and is likely to free it in a few moments, this can
+ * XXX: hardly be said to be optimal. To what extent we can sleep in
+ * XXX: disk_create() and disk_destroy() is currently undefined (but generally
+ * XXX: undesirable) so any solution seems to involve an intrusive decision.
+ */
+
+static void
+disk_destroy_event(void *ptr, int flag)
+{
+
+ g_topology_assert();
+ g_wither_geom(ptr, ENXIO);
+}
+
+void
+disk_destroy(struct disk *dp)
+{
+ struct g_geom *gp;
+
+ g_cancel_event(dp);
+ gp = dp->d_geom;
+ if (gp == NULL)
+ return;
+ gp->softc = NULL;
+ devstat_remove_entry(dp->d_devstat);
+ g_post_event(disk_destroy_event, gp, M_WAITOK, NULL, NULL);
+}
+
+static void
+g_kern_disks(void *p, int flag __unused)
+{
+ struct sbuf *sb;
+ struct g_geom *gp;
+ char *sp;
+
+ sb = p;
+ sp = "";
+ g_topology_assert();
+ LIST_FOREACH(gp, &g_disk_class.geom, geom) {
+ sbuf_printf(sb, "%s%s", sp, gp->name);
+ sp = " ";
+ }
+ sbuf_finish(sb);
+}
+
+static int
+sysctl_disks(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ struct sbuf *sb;
+
+ sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
+ sbuf_clear(sb);
+ g_waitfor_event(g_kern_disks, sb, M_WAITOK, NULL);
+ error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
+ sbuf_delete(sb);
+ return error;
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, disks, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NOLOCK, 0, 0,
+ sysctl_disks, "A", "names of available disks");
+
diff --git a/sys/geom/geom_disk.h b/sys/geom/geom_disk.h
new file mode 100644
index 0000000..712e871
--- /dev/null
+++ b/sys/geom/geom_disk.h
@@ -0,0 +1,97 @@
+/*-
+ * Copyright (c) 2003 Poul-Henning Kamp
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _GEOM_GEOM_DISK_H_
+#define _GEOM_GEOM_DISK_H_
+
+#ifdef _KERNEL
+
+#include <sys/queue.h>
+#include <sys/_lock.h>
+#include <sys/_mutex.h>
+
+typedef int disk_open_t(struct disk *);
+typedef int disk_close_t(struct disk *);
+typedef void disk_strategy_t(struct bio *bp);
+typedef int disk_ioctl_t(struct disk *, u_long cmd, void *data,
+ int fflag, struct thread *td);
+ /* NB: disk_ioctl_t SHALL be cast'able to d_ioctl_t */
+
+struct g_geom;
+struct devstat;
+
+struct disk {
+ /* Fields which are private to geom_disk */
+ struct g_geom *d_geom;
+ struct devstat *d_devstat;
+
+ /* Shared fields */
+ u_int d_flags;
+ const char *d_name;
+ u_int d_unit;
+ struct bio_queue_head *d_queue;
+ struct mtx *d_lock;
+
+ /* Disk methods */
+ disk_open_t *d_open;
+ disk_close_t *d_close;
+ disk_strategy_t *d_strategy;
+ disk_ioctl_t *d_ioctl;
+ dumper_t *d_dump;
+
+ /* Info fields from driver to geom_disk.c. Valid when open */
+ u_int d_sectorsize;
+ off_t d_mediasize;
+ u_int d_fwsectors;
+ u_int d_fwheads;
+ u_int d_maxsize;
+ u_int d_stripeoffset;
+ u_int d_stripesize;
+
+ /* Fields private to the driver */
+ void *d_drv1;
+};
+
+#define DISKFLAG_NOGIANT 0x1
+#define DISKFLAG_OPEN 0x2
+#define DISKFLAG_CANDELETE 0x4
+
+void disk_create(int unit, struct disk *disk, int flags, void *unused, void *unused2);
+void disk_destroy(struct disk *disk);
+
+
+#endif /* _KERNEL */
+#endif /* _GEOM_GEOM_DISK_H_ */
diff --git a/sys/geom/geom_dump.c b/sys/geom/geom_dump.c
new file mode 100644
index 0000000..869d7c4
--- /dev/null
+++ b/sys/geom/geom_dump.c
@@ -0,0 +1,306 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/sbuf.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <machine/stdarg.h>
+
+#include <geom/geom.h>
+#include <geom/geom_int.h>
+
+
+static void
+g_confdot_consumer(struct sbuf *sb, struct g_consumer *cp)
+{
+
+ sbuf_printf(sb, "z%p [label=\"r%dw%de%d\"];\n",
+ cp, cp->acr, cp->acw, cp->ace);
+ if (cp->provider)
+ sbuf_printf(sb, "z%p -> z%p;\n", cp, cp->provider);
+}
+
+static void
+g_confdot_provider(struct sbuf *sb, struct g_provider *pp)
+{
+
+ sbuf_printf(sb, "z%p [shape=hexagon,label=\"%s\\nr%dw%de%d\\nerr#%d\"];\n",
+ pp, pp->name, pp->acr, pp->acw, pp->ace, pp->error);
+}
+
+static void
+g_confdot_geom(struct sbuf *sb, struct g_geom *gp)
+{
+ struct g_consumer *cp;
+ struct g_provider *pp;
+
+ sbuf_printf(sb, "z%p [shape=box,label=\"%s\\n%s\\nr#%d\"];\n",
+ gp, gp->class->name, gp->name, gp->rank);
+ LIST_FOREACH(cp, &gp->consumer, consumer) {
+ g_confdot_consumer(sb, cp);
+ sbuf_printf(sb, "z%p -> z%p;\n", gp, cp);
+ }
+
+ LIST_FOREACH(pp, &gp->provider, provider) {
+ g_confdot_provider(sb, pp);
+ sbuf_printf(sb, "z%p -> z%p;\n", pp, gp);
+ }
+}
+
+static void
+g_confdot_class(struct sbuf *sb, struct g_class *mp)
+{
+ struct g_geom *gp;
+
+ LIST_FOREACH(gp, &mp->geom, geom)
+ g_confdot_geom(sb, gp);
+}
+
+void
+g_confdot(void *p, int flag )
+{
+ struct g_class *mp;
+ struct sbuf *sb;
+
+ KASSERT(flag != EV_CANCEL, ("g_confdot was cancelled"));
+ sb = p;
+ g_topology_assert();
+ sbuf_printf(sb, "digraph geom {\n");
+ LIST_FOREACH(mp, &g_classes, class)
+ g_confdot_class(sb, mp);
+ sbuf_printf(sb, "};\n");
+ sbuf_finish(sb);
+}
+
+static void
+g_conftxt_geom(struct sbuf *sb, struct g_geom *gp, int level)
+{
+ struct g_provider *pp;
+ struct g_consumer *cp;
+
+ LIST_FOREACH(pp, &gp->provider, provider) {
+ sbuf_printf(sb, "%d %s %s %ju %u", level, gp->class->name,
+ pp->name, (uintmax_t)pp->mediasize, pp->sectorsize);
+ if (gp->dumpconf != NULL)
+ gp->dumpconf(sb, NULL, gp, NULL, pp);
+ sbuf_printf(sb, "\n");
+ LIST_FOREACH(cp, &pp->consumers, consumers)
+ g_conftxt_geom(sb, cp->geom, level + 1);
+ }
+}
+
+static void
+g_conftxt_class(struct sbuf *sb, struct g_class *mp)
+{
+ struct g_geom *gp;
+
+ LIST_FOREACH(gp, &mp->geom, geom)
+ g_conftxt_geom(sb, gp, 0);
+}
+
+void
+g_conftxt(void *p, int flag)
+{
+ struct g_class *mp;
+ struct sbuf *sb;
+
+ KASSERT(flag != EV_CANCEL, ("g_conftxt was cancelled"));
+ sb = p;
+ g_topology_assert();
+ LIST_FOREACH(mp, &g_classes, class)
+ if (!strcmp(mp->name, "DISK"))
+ break;
+ if (mp != NULL)
+ g_conftxt_class(sb, mp);
+ sbuf_finish(sb);
+}
+
+
+static void
+g_conf_consumer(struct sbuf *sb, struct g_consumer *cp)
+{
+
+ sbuf_printf(sb, "\t<consumer id=\"%p\">\n", cp);
+ sbuf_printf(sb, "\t <geom ref=\"%p\"/>\n", cp->geom);
+ if (cp->provider != NULL)
+ sbuf_printf(sb, "\t <provider ref=\"%p\"/>\n", cp->provider);
+ sbuf_printf(sb, "\t <mode>r%dw%de%d</mode>\n",
+ cp->acr, cp->acw, cp->ace);
+ if (cp->geom->dumpconf != NULL) {
+ sbuf_printf(sb, "\t <config>\n");
+ cp->geom->dumpconf(sb, "\t ", cp->geom, cp, NULL);
+ sbuf_printf(sb, "\t </config>\n");
+ }
+ sbuf_printf(sb, "\t</consumer>\n");
+}
+
+static void
+g_conf_provider(struct sbuf *sb, struct g_provider *pp)
+{
+
+ sbuf_printf(sb, "\t<provider id=\"%p\">\n", pp);
+ sbuf_printf(sb, "\t <geom ref=\"%p\"/>\n", pp->geom);
+ sbuf_printf(sb, "\t <mode>r%dw%de%d</mode>\n",
+ pp->acr, pp->acw, pp->ace);
+ sbuf_printf(sb, "\t <name>%s</name>\n", pp->name);
+ sbuf_printf(sb, "\t <mediasize>%jd</mediasize>\n",
+ (intmax_t)pp->mediasize);
+ sbuf_printf(sb, "\t <sectorsize>%u</sectorsize>\n", pp->sectorsize);
+ if (pp->geom->dumpconf != NULL) {
+ sbuf_printf(sb, "\t <config>\n");
+ pp->geom->dumpconf(sb, "\t ", pp->geom, NULL, pp);
+ sbuf_printf(sb, "\t </config>\n");
+ }
+ sbuf_printf(sb, "\t</provider>\n");
+}
+
+
+static void
+g_conf_geom(struct sbuf *sb, struct g_geom *gp, struct g_provider *pp, struct g_consumer *cp)
+{
+ struct g_consumer *cp2;
+ struct g_provider *pp2;
+
+ sbuf_printf(sb, " <geom id=\"%p\">\n", gp);
+ sbuf_printf(sb, " <class ref=\"%p\"/>\n", gp->class);
+ sbuf_printf(sb, " <name>%s</name>\n", gp->name);
+ sbuf_printf(sb, " <rank>%d</rank>\n", gp->rank);
+ if (gp->dumpconf != NULL) {
+ sbuf_printf(sb, " <config>\n");
+ gp->dumpconf(sb, "\t", gp, NULL, NULL);
+ sbuf_printf(sb, " </config>\n");
+ }
+ LIST_FOREACH(cp2, &gp->consumer, consumer) {
+ if (cp != NULL && cp != cp2)
+ continue;
+ g_conf_consumer(sb, cp2);
+ }
+
+ LIST_FOREACH(pp2, &gp->provider, provider) {
+ if (pp != NULL && pp != pp2)
+ continue;
+ g_conf_provider(sb, pp2);
+ }
+ sbuf_printf(sb, " </geom>\n");
+}
+
+static void
+g_conf_class(struct sbuf *sb, struct g_class *mp, struct g_geom *gp, struct g_provider *pp, struct g_consumer *cp)
+{
+ struct g_geom *gp2;
+
+ sbuf_printf(sb, " <class id=\"%p\">\n", mp);
+ sbuf_printf(sb, " <name>%s</name>\n", mp->name);
+ LIST_FOREACH(gp2, &mp->geom, geom) {
+ if (gp != NULL && gp != gp2)
+ continue;
+ g_conf_geom(sb, gp2, pp, cp);
+ }
+ sbuf_printf(sb, " </class>\n");
+}
+
+void
+g_conf_specific(struct sbuf *sb, struct g_class *mp, struct g_geom *gp, struct g_provider *pp, struct g_consumer *cp)
+{
+ struct g_class *mp2;
+
+ g_topology_assert();
+ sbuf_printf(sb, "<mesh>\n");
+ LIST_FOREACH(mp2, &g_classes, class) {
+ if (mp != NULL && mp != mp2)
+ continue;
+ g_conf_class(sb, mp2, gp, pp, cp);
+ }
+ sbuf_printf(sb, "</mesh>\n");
+ sbuf_finish(sb);
+}
+
+void
+g_confxml(void *p, int flag)
+{
+
+ KASSERT(flag != EV_CANCEL, ("g_confxml was cancelled"));
+ g_topology_assert();
+ g_conf_specific(p, NULL, NULL, NULL, NULL);
+}
+
+void
+g_trace(int level, const char *fmt, ...)
+{
+ va_list ap;
+
+ g_sanity(NULL);
+ if (!(g_debugflags & level))
+ return;
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+ printf("\n");
+}
+
+void
+g_hexdump(void *ptr, int length)
+{
+ int i, j, k;
+ unsigned char *cp;
+
+ cp = ptr;
+ for (i = 0; i < length; i+= 16) {
+ printf("%04x ", i);
+ for (j = 0; j < 16; j++) {
+ k = i + j;
+ if (k < length)
+ printf(" %02x", cp[k]);
+ else
+ printf(" ");
+ }
+ printf(" |");
+ for (j = 0; j < 16; j++) {
+ k = i + j;
+ if (k >= length)
+ printf(" ");
+ else if (cp[k] >= ' ' && cp[k] <= '~')
+ printf("%c", cp[k]);
+ else
+ printf(".");
+ }
+ printf("|\n");
+ }
+}
+
diff --git a/sys/geom/geom_event.c b/sys/geom/geom_event.c
new file mode 100644
index 0000000..f180c43
--- /dev/null
+++ b/sys/geom/geom_event.c
@@ -0,0 +1,324 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * XXX: How do we in general know that objects referenced in events
+ * have not been destroyed before we get around to handle the event ?
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <machine/stdarg.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <geom/geom.h>
+#include <geom/geom_int.h>
+
+TAILQ_HEAD(event_tailq_head, g_event);
+
+static struct event_tailq_head g_events = TAILQ_HEAD_INITIALIZER(g_events);
+static u_int g_pending_events;
+static TAILQ_HEAD(,g_provider) g_doorstep = TAILQ_HEAD_INITIALIZER(g_doorstep);
+static struct mtx g_eventlock;
+static struct sx g_eventstall;
+
+#define G_N_EVENTREFS 20
+
+struct g_event {
+ TAILQ_ENTRY(g_event) events;
+ g_event_t *func;
+ void *arg;
+ int flag;
+ void *ref[G_N_EVENTREFS];
+};
+
+#define EV_DONE 0x80000
+#define EV_WAKEUP 0x40000
+#define EV_CANCELED 0x20000
+
+void
+g_waitidle(void)
+{
+
+ while (g_pending_events)
+ tsleep(&g_pending_events, PPAUSE, "g_waitidle", hz/5);
+}
+
+void
+g_stall_events(void)
+{
+
+ sx_xlock(&g_eventstall);
+}
+
+void
+g_release_events(void)
+{
+
+ sx_xunlock(&g_eventstall);
+}
+
+void
+g_orphan_provider(struct g_provider *pp, int error)
+{
+
+ g_trace(G_T_TOPOLOGY, "g_orphan_provider(%p(%s), %d)",
+ pp, pp->name, error);
+ KASSERT(error != 0,
+ ("g_orphan_provider(%p(%s), 0) error must be non-zero\n",
+ pp, pp->name));
+ pp->error = error;
+ mtx_lock(&g_eventlock);
+ TAILQ_INSERT_TAIL(&g_doorstep, pp, orphan);
+ mtx_unlock(&g_eventlock);
+ wakeup(&g_wait_event);
+}
+
+/*
+ * This function is called once on each provider which the event handler
+ * finds on its g_doorstep.
+ */
+
+static void
+g_orphan_register(struct g_provider *pp)
+{
+ struct g_consumer *cp, *cp2;
+
+ g_trace(G_T_TOPOLOGY, "g_orphan_register(%s)", pp->name);
+ g_topology_assert();
+
+ /*
+ * Tell all consumers the bad news.
+ * Don't be surprised if they self-destruct.
+ */
+ cp = LIST_FIRST(&pp->consumers);
+ while (cp != NULL) {
+ cp2 = LIST_NEXT(cp, consumers);
+ KASSERT(cp->geom->orphan != NULL,
+ ("geom %s has no orphan, class %s",
+ cp->geom->name, cp->geom->class->name));
+ cp->geom->orphan(cp);
+ cp = cp2;
+ }
+#ifdef notyet
+ cp = LIST_FIRST(&pp->consumers);
+ if (cp != NULL)
+ return;
+ if (pp->geom->flags & G_GEOM_WITHER)
+ g_destroy_provider(pp);
+#endif
+}
+
+static int
+one_event(void)
+{
+ struct g_event *ep;
+ struct g_provider *pp;
+
+ sx_xlock(&g_eventstall);
+ g_topology_lock();
+ for (;;) {
+ mtx_lock(&g_eventlock);
+ pp = TAILQ_FIRST(&g_doorstep);
+ if (pp != NULL)
+ TAILQ_REMOVE(&g_doorstep, pp, orphan);
+ mtx_unlock(&g_eventlock);
+ if (pp == NULL)
+ break;
+ g_orphan_register(pp);
+ }
+ mtx_lock(&g_eventlock);
+ ep = TAILQ_FIRST(&g_events);
+ if (ep == NULL) {
+ mtx_unlock(&g_eventlock);
+ g_topology_unlock();
+ sx_xunlock(&g_eventstall);
+ return (0);
+ }
+ TAILQ_REMOVE(&g_events, ep, events);
+ mtx_unlock(&g_eventlock);
+ g_topology_assert();
+ ep->func(ep->arg, 0);
+ g_topology_assert();
+ if (ep->flag & EV_WAKEUP) {
+ ep->flag |= EV_DONE;
+ wakeup(ep);
+ } else {
+ g_free(ep);
+ }
+ g_pending_events--;
+ if (g_pending_events == 0)
+ wakeup(&g_pending_events);
+ g_topology_unlock();
+ sx_xunlock(&g_eventstall);
+ return (1);
+}
+
+void
+g_run_events()
+{
+
+ while (one_event())
+ ;
+}
+
+void
+g_cancel_event(void *ref)
+{
+ struct g_event *ep, *epn;
+ struct g_provider *pp;
+ u_int n;
+
+ mtx_lock(&g_eventlock);
+ TAILQ_FOREACH(pp, &g_doorstep, orphan) {
+ if (pp != ref)
+ continue;
+ TAILQ_REMOVE(&g_doorstep, pp, orphan);
+ break;
+ }
+ for (ep = TAILQ_FIRST(&g_events); ep != NULL; ep = epn) {
+ epn = TAILQ_NEXT(ep, events);
+ for (n = 0; n < G_N_EVENTREFS; n++) {
+ if (ep->ref[n] == NULL)
+ break;
+ if (ep->ref[n] == ref) {
+ TAILQ_REMOVE(&g_events, ep, events);
+ ep->func(ep->arg, EV_CANCEL);
+ if (ep->flag & EV_WAKEUP) {
+ ep->flag |= EV_DONE;
+ ep->flag |= EV_CANCELED;
+ wakeup(ep);
+ } else {
+ g_free(ep);
+ }
+ break;
+ }
+ }
+ }
+ mtx_unlock(&g_eventlock);
+}
+
+static int
+g_post_event_x(g_event_t *func, void *arg, int flag, struct g_event **epp, va_list ap)
+{
+ struct g_event *ep;
+ void *p;
+ u_int n;
+
+ g_trace(G_T_TOPOLOGY, "g_post_event_x(%p, %p, %d", func, arg, flag);
+ ep = g_malloc(sizeof *ep, flag | M_ZERO);
+ if (ep == NULL)
+ return (ENOMEM);
+ ep->flag = flag;
+ for (n = 0; n < G_N_EVENTREFS; n++) {
+ p = va_arg(ap, void *);
+ if (p == NULL)
+ break;
+ g_trace(G_T_TOPOLOGY, " ref %p", p);
+ ep->ref[n++] = p;
+ }
+ KASSERT(p == NULL, ("Too many references to event"));
+ ep->func = func;
+ ep->arg = arg;
+ mtx_lock(&g_eventlock);
+ g_pending_events++;
+ TAILQ_INSERT_TAIL(&g_events, ep, events);
+ mtx_unlock(&g_eventlock);
+ wakeup(&g_wait_event);
+ if (epp != NULL)
+ *epp = ep;
+ return (0);
+}
+
+int
+g_post_event(g_event_t *func, void *arg, int flag, ...)
+{
+ va_list ap;
+ int i;
+
+ KASSERT(flag == M_WAITOK || flag == M_NOWAIT,
+ ("Wrong flag to g_post_event"));
+ va_start(ap, flag);
+ i = g_post_event_x(func, arg, flag, NULL, ap);
+ va_end(ap);
+ return (i);
+}
+
+
+/*
+ * XXX: It might actually be useful to call this function with topology held.
+ * XXX: This would ensure that the event gets created before anything else
+ * XXX: changes. At present all users have a handle on things in some other
+ * XXX: way, so this remains an XXX for now.
+ */
+
+int
+g_waitfor_event(g_event_t *func, void *arg, int flag, ...)
+{
+ va_list ap;
+ struct g_event *ep;
+ int error;
+
+ /* g_topology_assert_not(); */
+ KASSERT(flag == M_WAITOK || flag == M_NOWAIT,
+ ("Wrong flag to g_post_event"));
+ va_start(ap, flag);
+ error = g_post_event_x(func, arg, flag | EV_WAKEUP, &ep, ap);
+ va_end(ap);
+ if (error)
+ return (error);
+ do
+ tsleep(ep, PRIBIO, "g_waitfor_event", hz);
+ while (!(ep->flag & EV_DONE));
+ if (ep->flag & EV_CANCELED)
+ error = EAGAIN;
+ g_free(ep);
+ return (error);
+}
+
+void
+g_event_init()
+{
+
+ mtx_init(&g_eventlock, "GEOM orphanage", NULL, MTX_DEF);
+ sx_init(&g_eventstall, "GEOM event stalling");
+}
diff --git a/sys/geom/geom_fox.c b/sys/geom/geom_fox.c
new file mode 100644
index 0000000..295840f
--- /dev/null
+++ b/sys/geom/geom_fox.c
@@ -0,0 +1,468 @@
+/*-
+ * Copyright (c) 2003 Poul-Henning Kamp
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ * This is a GEOM module for handling path selection for multi-path
+ * storage devices. It is named "fox" because it, like they, prefer
+ * to have multiple exits to choose from.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/bio.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/libkern.h>
+#include <sys/endian.h>
+#include <sys/md5.h>
+#include <sys/errno.h>
+#include <geom/geom.h>
+
+#define FOX_CLASS_NAME "FOX"
+#define FOX_MAGIC "GEOM::FOX"
+
+struct g_fox_softc {
+ off_t mediasize;
+ u_int sectorsize;
+ TAILQ_HEAD(, bio) queue;
+ struct mtx lock;
+ u_char magic[16];
+ struct g_consumer *path;
+ struct g_consumer *opath;
+ int waiting;
+ int cr, cw, ce;
+};
+
+/*
+ * This function is called whenever we need to select a new path.
+ */
+static void
+g_fox_select_path(void *arg, int flag)
+{
+ struct g_geom *gp;
+ struct g_fox_softc *sc;
+ struct g_consumer *cp1;
+ struct bio *bp;
+ int error;
+
+ g_topology_assert();
+ if (flag == EV_CANCEL)
+ return;
+ gp = arg;
+ sc = gp->softc;
+
+ if (sc->opath != NULL) {
+ /*
+ * First, close the old path entirely.
+ */
+ printf("Closing old path (%s) on fox (%s)\n",
+ sc->opath->provider->name, gp->name);
+
+ cp1 = LIST_NEXT(sc->opath, consumer);
+
+ error = g_access_rel(sc->opath, -sc->cr, -sc->cw, -(sc->ce + 1));
+ KASSERT(error == 0, ("Failed close of old path %d", error));
+
+ /*
+ * The attempt to reopen it with a exclusive count
+ */
+ error = g_access_rel(sc->opath, 0, 0, 1);
+ if (error) {
+ /*
+ * Ok, ditch this consumer, we can't use it.
+ */
+ printf("Drop old path (%s) on fox (%s)\n",
+ sc->opath->provider->name, gp->name);
+ g_detach(sc->opath);
+ g_destroy_consumer(sc->opath);
+ if (LIST_EMPTY(&gp->consumer)) {
+ /* No consumers left */
+ g_wither_geom(gp, ENXIO);
+ for (;;) {
+ bp = TAILQ_FIRST(&sc->queue);
+ if (bp == NULL)
+ break;
+ TAILQ_REMOVE(&sc->queue, bp, bio_queue);
+ bp->bio_error = ENXIO;
+ g_std_done(bp);
+ }
+ return;
+ }
+ } else {
+ printf("Got e-bit on old path (%s) on fox (%s)\n",
+ sc->opath->provider->name, gp->name);
+ }
+ sc->opath = NULL;
+ } else {
+ cp1 = LIST_FIRST(&gp->consumer);
+ }
+ if (cp1 == NULL)
+ cp1 = LIST_FIRST(&gp->consumer);
+ printf("Open new path (%s) on fox (%s)\n",
+ cp1->provider->name, gp->name);
+ error = g_access_rel(cp1, sc->cr, sc->cw, sc->ce);
+ if (error) {
+ /*
+ * If we failed, we take another trip through here
+ */
+ printf("Open new path (%s) on fox (%s) failed, reselect.\n",
+ cp1->provider->name, gp->name);
+ sc->opath = cp1;
+ g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL);
+ } else {
+ printf("Open new path (%s) on fox (%s) succeeded\n",
+ cp1->provider->name, gp->name);
+ mtx_lock(&sc->lock);
+ sc->path = cp1;
+ sc->waiting = 0;
+ for (;;) {
+ bp = TAILQ_FIRST(&sc->queue);
+ if (bp == NULL)
+ break;
+ TAILQ_REMOVE(&sc->queue, bp, bio_queue);
+ g_io_request(bp, sc->path);
+ }
+ mtx_unlock(&sc->lock);
+ }
+}
+
+static void
+g_fox_orphan(struct g_consumer *cp)
+{
+ struct g_geom *gp;
+ struct g_fox_softc *sc;
+ int error, mark;
+
+ g_topology_assert();
+ gp = cp->geom;
+ sc = gp->softc;
+ printf("Removing path (%s) from fox (%s)\n",
+ cp->provider->name, gp->name);
+ mtx_lock(&sc->lock);
+ if (cp == sc->path) {
+ sc->opath = NULL;
+ sc->path = NULL;
+ sc->waiting = 1;
+ mark = 1;
+ } else {
+ mark = 0;
+ }
+ mtx_unlock(&sc->lock);
+
+ g_access_rel(cp, -cp->acr, -cp->acw, -cp->ace);
+ error = cp->provider->error;
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ if (!LIST_EMPTY(&gp->consumer)) {
+ if (mark)
+ g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL);
+ return;
+ }
+
+ mtx_destroy(&sc->lock);
+ gp->softc = NULL;
+ g_free(gp->softc);
+ g_wither_geom(gp, ENXIO);
+}
+
+static void
+g_fox_done(struct bio *bp)
+{
+ struct g_geom *gp;
+ struct g_fox_softc *sc;
+ int error;
+
+ if (bp->bio_error == 0) {
+ g_std_done(bp);
+ return;
+ }
+ gp = bp->bio_from->geom;
+ sc = gp->softc;
+ if (bp->bio_from != sc->path) {
+ g_io_request(bp, sc->path);
+ return;
+ }
+ mtx_lock(&sc->lock);
+ sc->opath = sc->path;
+ sc->path = NULL;
+ error = g_post_event(g_fox_select_path, gp, M_NOWAIT, gp, NULL);
+ if (error) {
+ bp->bio_error = ENOMEM;
+ g_std_done(bp);
+ } else {
+ sc->waiting = 1;
+ TAILQ_INSERT_TAIL(&sc->queue, bp, bio_queue);
+ }
+ mtx_unlock(&sc->lock);
+}
+
+static void
+g_fox_start(struct bio *bp)
+{
+ struct g_geom *gp;
+ struct bio *bp2;
+ struct g_fox_softc *sc;
+ int error;
+
+ gp = bp->bio_to->geom;
+ sc = gp->softc;
+ if (sc == NULL) {
+ g_io_deliver(bp, ENXIO);
+ return;
+ }
+ switch(bp->bio_cmd) {
+ case BIO_READ:
+ case BIO_WRITE:
+ case BIO_DELETE:
+ bp2 = g_clone_bio(bp);
+ if (bp2 == NULL) {
+ g_io_deliver(bp, ENOMEM);
+ break;
+ }
+ bp2->bio_offset += sc->sectorsize;
+ bp2->bio_done = g_fox_done;
+ mtx_lock(&sc->lock);
+ if (sc->path == NULL || !TAILQ_EMPTY(&sc->queue)) {
+ if (sc->waiting == 0) {
+ error = g_post_event(g_fox_select_path, gp,
+ M_NOWAIT, gp, NULL);
+ if (error) {
+ g_destroy_bio(bp2);
+ bp2 = NULL;
+ g_io_deliver(bp, error);
+ } else {
+ sc->waiting = 1;
+ }
+ }
+ if (bp2 != NULL)
+ TAILQ_INSERT_TAIL(&sc->queue, bp2,
+ bio_queue);
+ } else {
+ g_io_request(bp2, sc->path);
+ }
+ mtx_unlock(&sc->lock);
+ break;
+ default:
+ g_io_deliver(bp, EOPNOTSUPP);
+ break;
+ }
+ return;
+}
+
+static int
+g_fox_access(struct g_provider *pp, int dr, int dw, int de)
+{
+ struct g_geom *gp;
+ struct g_fox_softc *sc;
+ struct g_consumer *cp1;
+ int error;
+
+ g_topology_assert();
+ gp = pp->geom;
+ sc = gp->softc;
+ if (sc == NULL)
+ return (ENXIO);
+
+ if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) {
+ /*
+ * First open, open all consumers with an exclusive bit
+ */
+ error = 0;
+ LIST_FOREACH(cp1, &gp->consumer, consumer) {
+ error = g_access_rel(cp1, 0, 0, 1);
+ if (error) {
+ printf("FOX: access(%s,0,0,1) = %d\n",
+ cp1->provider->name, error);
+ break;
+ }
+ }
+ if (error) {
+ LIST_FOREACH(cp1, &gp->consumer, consumer) {
+ if (cp1->ace)
+ g_access_rel(cp1, 0, 0, -1);
+ }
+ return (error);
+ }
+ }
+ if (sc->path == NULL)
+ g_fox_select_path(gp, 0);
+ if (sc->path == NULL)
+ error = ENXIO;
+ else
+ error = g_access_rel(sc->path, dr, dw, de);
+ if (error == 0) {
+ sc->cr += dr;
+ sc->cw += dw;
+ sc->ce += de;
+ if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) {
+ /*
+ * Last close, remove e-bit on all consumers
+ */
+ LIST_FOREACH(cp1, &gp->consumer, consumer)
+ g_access_rel(cp1, 0, 0, -1);
+ }
+ }
+ return (error);
+}
+
+static struct g_geom *
+g_fox_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
+{
+ struct g_geom *gp, *gp2;
+ struct g_provider *pp2;
+ struct g_consumer *cp, *cp2;
+ struct g_fox_softc *sc, *sc2;
+ int error;
+ u_int sectorsize;
+ u_char *buf;
+
+ g_trace(G_T_TOPOLOGY, "fox_taste(%s, %s)", mp->name, pp->name);
+ g_topology_assert();
+ if (!strcmp(pp->geom->class->name, mp->name))
+ return (NULL);
+ gp = g_new_geomf(mp, "%s.fox", pp->name);
+ gp->softc = g_malloc(sizeof(struct g_fox_softc), M_WAITOK | M_ZERO);
+ sc = gp->softc;
+
+ gp->start = g_fox_start;
+ gp->spoiled = g_fox_orphan;
+ gp->orphan = g_fox_orphan;
+ gp->access= g_fox_access;
+ cp = g_new_consumer(gp);
+ g_attach(cp, pp);
+ error = g_access_rel(cp, 1, 0, 0);
+ if (error) {
+ g_free(sc);
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ g_destroy_geom(gp);
+ return(NULL);
+ }
+ do {
+ sectorsize = cp->provider->sectorsize;
+ g_topology_unlock();
+ buf = g_read_data(cp, 0, sectorsize, &error);
+ g_topology_lock();
+ if (buf == NULL || error != 0)
+ break;
+ if (memcmp(buf, FOX_MAGIC, strlen(FOX_MAGIC)))
+ break;
+
+ /*
+ * First we need to see if this a new path for an existing fox.
+ */
+ LIST_FOREACH(gp2, &mp->geom, geom) {
+ sc2 = gp2->softc;
+ if (sc == NULL)
+ continue;
+ if (memcmp(buf + 16, sc2->magic, sizeof sc2->magic))
+ continue;
+ break;
+ }
+ if (gp2 != NULL) {
+ /*
+ * It was. Create a new consumer for that fox,
+ * attach it, and if the fox is open, open this
+ * path with an exclusive count of one.
+ */
+ printf("Adding path (%s) to fox (%s)\n",
+ pp->name, gp2->name);
+ cp2 = g_new_consumer(gp2);
+ g_attach(cp2, pp);
+ pp2 = LIST_FIRST(&gp2->provider);
+ if (pp2->acr > 0 || pp2->acw > 0 || pp2->ace > 0) {
+ error = g_access_rel(cp2, 0, 0, 1);
+ if (error) {
+ /*
+ * This is bad, or more likely,
+ * the user is doing something stupid
+ */
+ printf(
+ "WARNING: New path (%s) to fox(%s) not added: %s\n%s",
+ cp->provider->name, gp2->name,
+ "Could not get exclusive bit.",
+ "WARNING: This indicates a risk of data inconsistency."
+ );
+ g_detach(cp2);
+ g_destroy_consumer(cp2);
+ }
+ }
+ break;
+ }
+ printf("Creating new fox (%s)\n", pp->name);
+ sc->path = cp;
+ memcpy(sc->magic, buf + 16, sizeof sc->magic);
+ pp2 = g_new_providerf(gp, "%s", gp->name);
+ pp2->mediasize = sc->mediasize = pp->mediasize - pp->sectorsize;
+ pp2->sectorsize = sc->sectorsize = pp->sectorsize;
+printf("fox %s lock %p\n", gp->name, &sc->lock);
+
+ mtx_init(&sc->lock, "fox queue", NULL, MTX_DEF);
+ TAILQ_INIT(&sc->queue);
+ g_error_provider(pp2, 0);
+ } while (0);
+ if (buf != NULL)
+ g_free(buf);
+ g_access_rel(cp, -1, 0, 0);
+
+ if (!LIST_EMPTY(&gp->provider))
+ return (gp);
+
+ g_free(gp->softc);
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ g_destroy_geom(gp);
+ return (NULL);
+}
+
+static int
+g_fox_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
+{
+ struct g_fox_softc *sc;
+
+ g_topology_assert();
+ sc = gp->softc;
+ gp->softc = NULL;
+ mtx_destroy(&sc->lock);
+ g_free(gp->softc);
+ g_wither_geom(gp, ENXIO);
+ return (0);
+}
+
+static struct g_class g_fox_class = {
+ .name = FOX_CLASS_NAME,
+ .taste = g_fox_taste,
+ .destroy_geom = g_fox_destroy_geom,
+};
+
+DECLARE_GEOM_CLASS(g_fox_class, g_fox);
diff --git a/sys/geom/geom_gpt.c b/sys/geom/geom_gpt.c
new file mode 100644
index 0000000..52951c4
--- /dev/null
+++ b/sys/geom/geom_gpt.c
@@ -0,0 +1,227 @@
+/*-
+ * Copyright (c) 2002 Marcel Moolenaar
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/bio.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <sys/endian.h>
+#include <sys/sbuf.h>
+#include <sys/uuid.h>
+#include <sys/gpt.h>
+#include <geom/geom.h>
+#include <geom/geom_slice.h>
+
+CTASSERT(offsetof(struct gpt_hdr, padding) == 92);
+CTASSERT(sizeof(struct gpt_ent) == 128);
+
+/*
+ * XXX: GEOM is not dynamic enough. We are forced to use a compile-time
+ * limit. The minimum number of partitions (128) as required by EFI is
+ * most of the time just a waste of space.
+ */
+#define GPT_MAX_SLICES 128
+
+struct g_gpt_softc {
+ struct gpt_ent *part[GPT_MAX_SLICES];
+};
+
+static int
+is_gpt_hdr(struct gpt_hdr *hdr)
+{
+ uint32_t crc;
+
+ if (memcmp(hdr->hdr_sig, GPT_HDR_SIG, sizeof(hdr->hdr_sig)))
+ return (0);
+ crc = hdr->hdr_crc_self;
+ hdr->hdr_crc_self = 0;
+ if (crc32(hdr, hdr->hdr_size) != crc)
+ return (0);
+ hdr->hdr_crc_self = crc;
+ /* We're happy... */
+ return (1);
+}
+
+static int
+g_gpt_start(struct bio *bp)
+{
+
+ return (0);
+}
+
+static void
+g_gpt_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
+ struct g_consumer *cp, struct g_provider *pp)
+{
+ struct g_slicer *gsp = gp->softc;
+ struct g_gpt_softc *gs = gsp->softc;
+ struct uuid *uuid;
+
+ g_slice_dumpconf(sb, indent, gp, cp, pp);
+
+ if (pp != NULL) {
+ uuid = &gs->part[pp->index]->ent_type;
+ if (indent != NULL)
+ sbuf_printf(sb, "%s<type>", indent);
+ else
+ sbuf_printf(sb, " ty ");
+ sbuf_printf_uuid(sb, uuid);
+ if (indent != NULL)
+ sbuf_printf(sb, "</type>\n");
+ }
+}
+
+static struct g_geom *
+g_gpt_taste(struct g_class *mp, struct g_provider *pp, int insist)
+{
+ struct g_consumer *cp;
+ struct g_geom *gp;
+ struct g_gpt_softc *gs;
+ u_char *buf, *mbr;
+ struct gpt_ent *ent;
+ struct gpt_hdr *hdr;
+ u_int i, secsz, tblsz;
+ int error, ps;
+
+ g_trace(G_T_TOPOLOGY, "g_gpt_taste(%s,%s)", mp->name, pp->name);
+ g_topology_assert();
+
+ /*
+ * XXX: I don't like to hardcode a maximum number of slices, since
+ * it's wasting space most of the time and insufficient any time.
+ * It's easier for now...
+ */
+ gp = g_slice_new(mp, GPT_MAX_SLICES, pp, &cp, &gs, sizeof(*gs),
+ g_gpt_start);
+ if (gp == NULL)
+ return (NULL);
+
+ g_topology_unlock();
+ gp->dumpconf = g_gpt_dumpconf;
+
+ do {
+
+ mbr = NULL;
+
+ if (gp->rank != 2 && insist == 0)
+ break;
+
+ secsz = cp->provider->sectorsize;
+ if (secsz < 512)
+ break;
+
+ /* XXX: we need to get the media size as well. */
+
+ /* Read both the MBR sector and the GPT sector. */
+ mbr = g_read_data(cp, 0, 2 * secsz, &error);
+ if (mbr == NULL || error != 0)
+ break;
+#if 0
+ /*
+ * XXX: we should ignore the GPT if there's a MBR and the MBR is
+ * not a PMBR (Protective MBR). I believe this is what the EFI
+ * spec is going to say eventually (this is hearsay :-)
+ * Currently EFI (version 1.02) accepts and uses the GPT even
+ * though there's a valid MBR. We do this too, because it allows
+ * us to test this code without first nuking the only partitioning
+ * scheme we grok until this is working.
+ */
+ if (!is_pmbr((void*)mbr))
+ goto out;
+#endif
+
+ hdr = (void*)(mbr + secsz);
+
+ /*
+ * XXX: if we don't have a GPT header at LBA 1, we should check if
+ * there's a backup GPT at the end of the medium. If we have a valid
+ * backup GPT, we should restore the primary GPT and claim this lunch.
+ */
+ if (!is_gpt_hdr(hdr))
+ break;
+
+ tblsz = (hdr->hdr_entries * hdr->hdr_entsz + secsz - 1) &
+ ~(secsz - 1);
+ buf = g_read_data(cp, hdr->hdr_lba_table * secsz, tblsz, &error);
+ for (i = 0; i < hdr->hdr_entries; i++) {
+ struct uuid unused = GPT_ENT_TYPE_UNUSED;
+ struct uuid freebsd = GPT_ENT_TYPE_FREEBSD;
+ struct uuid tmp;
+ if (i >= GPT_MAX_SLICES)
+ break;
+ ent = (void*)(buf + i * hdr->hdr_entsz);
+ le_uuid_dec(&ent->ent_type, &tmp);
+ if (!memcmp(&tmp, &unused, sizeof(unused)))
+ continue;
+ /* XXX: This memory leaks */
+ gs->part[i] = g_malloc(hdr->hdr_entsz, M_WAITOK);
+ if (gs->part[i] == NULL)
+ break;
+ bcopy(ent, gs->part[i], hdr->hdr_entsz);
+ ps = (!memcmp(&tmp, &freebsd, sizeof(freebsd)))
+ ? 's' : 'p';
+ g_topology_lock();
+ (void)g_slice_config(gp, i, G_SLICE_CONFIG_SET,
+ ent->ent_lba_start * secsz,
+ (1 + ent->ent_lba_end - ent->ent_lba_start) * secsz,
+ secsz,
+ "%s%c%d", gp->name, ps, i + 1);
+ g_topology_unlock();
+ }
+ g_free(buf);
+
+ } while (0);
+
+ if (mbr != NULL)
+ g_free(mbr);
+
+ g_topology_lock();
+ g_access_rel(cp, -1, 0, 0);
+ if (LIST_EMPTY(&gp->provider)) {
+ g_slice_spoiled(cp);
+ return (NULL);
+ }
+ return (gp);
+}
+
+static struct g_class g_gpt_class = {
+ .name = "GPT",
+ .taste = g_gpt_taste,
+};
+
+DECLARE_GEOM_CLASS(g_gpt_class, g_gpt);
diff --git a/sys/geom/geom_int.h b/sys/geom/geom_int.h
new file mode 100644
index 0000000..952b6c6
--- /dev/null
+++ b/sys/geom/geom_int.h
@@ -0,0 +1,88 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+LIST_HEAD(class_list_head, g_class);
+TAILQ_HEAD(g_tailq_head, g_geom);
+
+extern int g_collectstats;
+extern int g_debugflags;
+/*
+ * 1 G_T_TOPOLOGY
+ * 2 G_T_BIO
+ * 4 G_T_ACCESS
+ * 8 Enable sanity checks
+ * 16 Allow footshooting on rank#1 providers
+ * 32 G_T_DETAILS
+ */
+#define G_F_DISKIOCTL 64
+#define G_F_CTLDUMP 128
+
+/*
+ * We actually have a number of drivers sharing the same major number
+ * so we coordinate the major/minor usage here
+ */
+#define GEOM_MAJOR 4
+#define GEOM_MINOR_STATS 0
+#define GEOM_MINOR_PROVIDERS 10
+
+/* geom_dump.c */
+void g_confxml(void *, int flag);
+void g_conf_specific(struct sbuf *sb, struct g_class *mp, struct g_geom *gp, struct g_provider *pp, struct g_consumer *cp);
+void g_confdot(void *, int flag);
+void g_conftxt(void *, int flag);
+
+/* geom_event.c */
+void g_event_init(void);
+void g_run_events(void);
+void g_stall_events(void);
+void g_release_events(void);
+
+/* geom_subr.c */
+extern struct class_list_head g_classes;
+extern char *g_wait_event, *g_wait_sim, *g_wait_up, *g_wait_down;
+
+/* geom_io.c */
+void g_io_init(void);
+void g_io_schedule_down(struct thread *tp);
+void g_io_schedule_up(struct thread *tp);
+
+/* geom_kern.c / geom_kernsim.c */
+void g_init(void);
+extern int g_shutdown;
+
+/* geom_ctl.c */
+void g_ctl_init(void);
diff --git a/sys/geom/geom_io.c b/sys/geom/geom_io.c
new file mode 100644
index 0000000..4bed6f6
--- /dev/null
+++ b/sys/geom/geom_io.c
@@ -0,0 +1,416 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/bio.h>
+
+#include <sys/errno.h>
+#include <geom/geom.h>
+#include <geom/geom_int.h>
+#include <sys/devicestat.h>
+
+#include <vm/uma.h>
+
+static struct g_bioq g_bio_run_down;
+static struct g_bioq g_bio_run_up;
+
+static u_int pace;
+static uma_zone_t biozone;
+
+#include <machine/atomic.h>
+
+static void
+g_bioq_lock(struct g_bioq *bq)
+{
+
+ mtx_lock(&bq->bio_queue_lock);
+}
+
+static void
+g_bioq_unlock(struct g_bioq *bq)
+{
+
+ mtx_unlock(&bq->bio_queue_lock);
+}
+
+#if 0
+static void
+g_bioq_destroy(struct g_bioq *bq)
+{
+
+ mtx_destroy(&bq->bio_queue_lock);
+}
+#endif
+
+static void
+g_bioq_init(struct g_bioq *bq)
+{
+
+ TAILQ_INIT(&bq->bio_queue);
+ mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF);
+}
+
+static struct bio *
+g_bioq_first(struct g_bioq *bq)
+{
+ struct bio *bp;
+
+ bp = TAILQ_FIRST(&bq->bio_queue);
+ if (bp != NULL) {
+ TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue);
+ bq->bio_queue_length--;
+ }
+ return (bp);
+}
+
+static void
+g_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq)
+{
+
+ g_bioq_lock(rq);
+ TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue);
+ rq->bio_queue_length++;
+ g_bioq_unlock(rq);
+}
+
+struct bio *
+g_new_bio(void)
+{
+ struct bio *bp;
+
+ bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
+ return (bp);
+}
+
+void
+g_destroy_bio(struct bio *bp)
+{
+
+ uma_zfree(biozone, bp);
+}
+
+struct bio *
+g_clone_bio(struct bio *bp)
+{
+ struct bio *bp2;
+
+ bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
+ if (bp2 != NULL) {
+ bp2->bio_parent = bp;
+ bp2->bio_cmd = bp->bio_cmd;
+ bp2->bio_length = bp->bio_length;
+ bp2->bio_offset = bp->bio_offset;
+ bp2->bio_data = bp->bio_data;
+ bp2->bio_attribute = bp->bio_attribute;
+ bp->bio_children++;
+ }
+ return(bp2);
+}
+
+void
+g_io_init()
+{
+
+ g_bioq_init(&g_bio_run_down);
+ g_bioq_init(&g_bio_run_up);
+ biozone = uma_zcreate("g_bio", sizeof (struct bio),
+ NULL, NULL,
+ NULL, NULL,
+ 0, 0);
+}
+
+int
+g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr)
+{
+ struct bio *bp;
+ int error;
+
+ g_trace(G_T_BIO, "bio_getattr(%s)", attr);
+ bp = g_new_bio();
+ bp->bio_cmd = BIO_GETATTR;
+ bp->bio_done = NULL;
+ bp->bio_attribute = attr;
+ bp->bio_length = *len;
+ bp->bio_data = ptr;
+ g_io_request(bp, cp);
+ error = biowait(bp, "ggetattr");
+ *len = bp->bio_completed;
+ g_destroy_bio(bp);
+ return (error);
+}
+
+static int
+g_io_check(struct bio *bp)
+{
+ struct g_consumer *cp;
+ struct g_provider *pp;
+
+ cp = bp->bio_from;
+ pp = bp->bio_to;
+
+ /* Fail if access counters dont allow the operation */
+ switch(bp->bio_cmd) {
+ case BIO_READ:
+ case BIO_GETATTR:
+ if (cp->acr == 0)
+ return (EPERM);
+ break;
+ case BIO_WRITE:
+ case BIO_DELETE:
+ if (cp->acw == 0)
+ return (EPERM);
+ break;
+ default:
+ return (EPERM);
+ }
+ /* if provider is marked for error, don't disturb. */
+ if (pp->error)
+ return (pp->error);
+
+ switch(bp->bio_cmd) {
+ case BIO_READ:
+ case BIO_WRITE:
+ case BIO_DELETE:
+ /* Reject I/O not on sector boundary */
+ if (bp->bio_offset % pp->sectorsize)
+ return (EINVAL);
+ /* Reject I/O not integral sector long */
+ if (bp->bio_length % pp->sectorsize)
+ return (EINVAL);
+ /* Reject requests past the end of media. */
+ if (bp->bio_offset > pp->mediasize)
+ return (EIO);
+ break;
+ default:
+ break;
+ }
+ return (0);
+}
+
+void
+g_io_request(struct bio *bp, struct g_consumer *cp)
+{
+ struct g_provider *pp;
+
+ pp = cp->provider;
+ KASSERT(cp != NULL, ("NULL cp in g_io_request"));
+ KASSERT(bp != NULL, ("NULL bp in g_io_request"));
+ KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request"));
+ KASSERT(pp != NULL, ("consumer not attached in g_io_request"));
+
+ bp->bio_from = cp;
+ bp->bio_to = pp;
+ bp->bio_error = 0;
+ bp->bio_completed = 0;
+
+ if (g_collectstats) {
+ devstat_start_transaction_bio(cp->stat, bp);
+ devstat_start_transaction_bio(pp->stat, bp);
+ }
+ cp->nstart++;
+ pp->nstart++;
+
+ /* Pass it on down. */
+ g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d",
+ bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd);
+ g_bioq_enqueue_tail(bp, &g_bio_run_down);
+ wakeup(&g_wait_down);
+}
+
+void
+g_io_deliver(struct bio *bp, int error)
+{
+ struct g_consumer *cp;
+ struct g_provider *pp;
+
+ cp = bp->bio_from;
+ pp = bp->bio_to;
+ KASSERT(bp != NULL, ("NULL bp in g_io_deliver"));
+ KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver"));
+ KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver"));
+ KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver"));
+
+ g_trace(G_T_BIO,
+"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd",
+ bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error,
+ (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
+
+ bp->bio_bcount = bp->bio_length;
+ if (g_collectstats) {
+ bp->bio_resid = bp->bio_bcount - bp->bio_completed;
+ devstat_end_transaction_bio(cp->stat, bp);
+ devstat_end_transaction_bio(pp->stat, bp);
+ }
+ cp->nend++;
+ pp->nend++;
+
+ if (error == ENOMEM) {
+ if (bootverbose)
+ printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name);
+ g_io_request(bp, cp);
+ pace++;
+ return;
+ }
+ bp->bio_error = error;
+ g_bioq_enqueue_tail(bp, &g_bio_run_up);
+ wakeup(&g_wait_up);
+}
+
+void
+g_io_schedule_down(struct thread *tp __unused)
+{
+ struct bio *bp;
+ off_t excess;
+ int error;
+ struct mtx mymutex;
+
+ bzero(&mymutex, sizeof mymutex);
+ mtx_init(&mymutex, "g_xdown", MTX_DEF, 0);
+
+ for(;;) {
+ g_bioq_lock(&g_bio_run_down);
+ bp = g_bioq_first(&g_bio_run_down);
+ if (bp == NULL) {
+ msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock,
+ PRIBIO | PDROP, "-", hz/10);
+ continue;
+ }
+ g_bioq_unlock(&g_bio_run_down);
+ if (pace > 0) {
+ msleep(&error, NULL, PRIBIO, "g_down", hz/10);
+ pace--;
+ }
+ error = g_io_check(bp);
+ if (error) {
+ g_io_deliver(bp, error);
+ continue;
+ }
+ switch (bp->bio_cmd) {
+ case BIO_READ:
+ case BIO_WRITE:
+ case BIO_DELETE:
+ /* Truncate requests to the end of providers media. */
+ excess = bp->bio_offset + bp->bio_length;
+ if (excess > bp->bio_to->mediasize) {
+ excess -= bp->bio_to->mediasize;
+ bp->bio_length -= excess;
+ }
+ /* Deliver zero length transfers right here. */
+ if (bp->bio_length == 0) {
+ g_io_deliver(bp, 0);
+ continue;
+ }
+ break;
+ default:
+ break;
+ }
+ mtx_lock(&mymutex);
+ bp->bio_to->geom->start(bp);
+ mtx_unlock(&mymutex);
+ }
+}
+
+void
+g_io_schedule_up(struct thread *tp __unused)
+{
+ struct bio *bp;
+ struct mtx mymutex;
+
+ bzero(&mymutex, sizeof mymutex);
+ mtx_init(&mymutex, "g_xup", MTX_DEF, 0);
+ for(;;) {
+ g_bioq_lock(&g_bio_run_up);
+ bp = g_bioq_first(&g_bio_run_up);
+ if (bp != NULL) {
+ g_bioq_unlock(&g_bio_run_up);
+ mtx_lock(&mymutex);
+ biodone(bp);
+ mtx_unlock(&mymutex);
+ continue;
+ }
+ msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock,
+ PRIBIO | PDROP, "-", hz/10);
+ }
+}
+
+void *
+g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error)
+{
+ struct bio *bp;
+ void *ptr;
+ int errorc;
+
+ bp = g_new_bio();
+ bp->bio_cmd = BIO_READ;
+ bp->bio_done = NULL;
+ bp->bio_offset = offset;
+ bp->bio_length = length;
+ ptr = g_malloc(length, M_WAITOK);
+ bp->bio_data = ptr;
+ g_io_request(bp, cp);
+ errorc = biowait(bp, "gread");
+ if (error != NULL)
+ *error = errorc;
+ g_destroy_bio(bp);
+ if (errorc) {
+ g_free(ptr);
+ ptr = NULL;
+ }
+ return (ptr);
+}
+
+int
+g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length)
+{
+ struct bio *bp;
+ int error;
+
+ bp = g_new_bio();
+ bp->bio_cmd = BIO_WRITE;
+ bp->bio_done = NULL;
+ bp->bio_offset = offset;
+ bp->bio_length = length;
+ bp->bio_data = ptr;
+ g_io_request(bp, cp);
+ error = biowait(bp, "gwrite");
+ g_destroy_bio(bp);
+ return (error);
+}
diff --git a/sys/geom/geom_kern.c b/sys/geom/geom_kern.c
new file mode 100644
index 0000000..9492241
--- /dev/null
+++ b/sys/geom/geom_kern.c
@@ -0,0 +1,241 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/eventhandler.h>
+#include <sys/malloc.h>
+#include <sys/bio.h>
+#include <sys/sysctl.h>
+#include <sys/proc.h>
+#include <sys/kthread.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sx.h>
+#include <sys/sbuf.h>
+#include <geom/geom.h>
+#include <geom/geom_int.h>
+
+MALLOC_DEFINE(M_GEOM, "GEOM", "Geom data structures");
+
+struct sx topology_lock;
+
+static struct proc *g_up_proc;
+
+int g_debugflags;
+int g_collectstats = 1;
+int g_shutdown;
+
+/*
+ * G_UP and G_DOWN are the two threads which push I/O through the
+ * stack.
+ *
+ * Things are procesed in a FIFO order, but these threads could be
+ * part of I/O prioritization by deciding which bios/bioqs to service
+ * in what order.
+ *
+ * We have only one thread in each direction, it is belived that until
+ * a very non-trivial workload in the UP/DOWN path this will be enough,
+ * but more than one can actually be run without problems.
+ *
+ * Holding the "mymutex" is a debugging feature: It prevents people
+ * from sleeping in the UP/DOWN I/O path by mistake or design (doing
+ * so almost invariably result in deadlocks since it stalls all I/O
+ * processing in the given direction.
+ */
+
+static void
+g_up_procbody(void)
+{
+ struct proc *p = g_up_proc;
+ struct thread *tp = FIRST_THREAD_IN_PROC(p);
+
+ mtx_assert(&Giant, MA_NOTOWNED);
+ tp->td_base_pri = PRIBIO;
+ for(;;) {
+ g_io_schedule_up(tp);
+ }
+}
+
+struct kproc_desc g_up_kp = {
+ "g_up",
+ g_up_procbody,
+ &g_up_proc,
+};
+
+static struct proc *g_down_proc;
+
+static void
+g_down_procbody(void)
+{
+ struct proc *p = g_down_proc;
+ struct thread *tp = FIRST_THREAD_IN_PROC(p);
+
+ mtx_assert(&Giant, MA_NOTOWNED);
+ tp->td_base_pri = PRIBIO;
+ for(;;) {
+ g_io_schedule_down(tp);
+ }
+}
+
+struct kproc_desc g_down_kp = {
+ "g_down",
+ g_down_procbody,
+ &g_down_proc,
+};
+
+static struct proc *g_event_proc;
+
+static void
+g_event_procbody(void)
+{
+ struct proc *p = g_event_proc;
+ struct thread *tp = FIRST_THREAD_IN_PROC(p);
+
+ mtx_assert(&Giant, MA_NOTOWNED);
+ tp->td_base_pri = PRIBIO;
+ for(;;) {
+ g_run_events();
+ tsleep(&g_wait_event, PRIBIO, "-", hz/10);
+ }
+}
+
+static struct kproc_desc g_event_kp = {
+ "g_event",
+ g_event_procbody,
+ &g_event_proc,
+};
+
+static void
+geom_shutdown(void *foo __unused)
+{
+
+ g_shutdown = 1;
+}
+
+void
+g_init(void)
+{
+
+ g_trace(G_T_TOPOLOGY, "g_ignition");
+ sx_init(&topology_lock, "GEOM topology");
+ g_io_init();
+ g_event_init();
+ g_ctl_init();
+ mtx_lock(&Giant);
+ kproc_start(&g_event_kp);
+ kproc_start(&g_up_kp);
+ kproc_start(&g_down_kp);
+ mtx_unlock(&Giant);
+ EVENTHANDLER_REGISTER(shutdown_pre_sync, geom_shutdown, NULL,
+ SHUTDOWN_PRI_FIRST);
+}
+
+static int
+sysctl_kern_geom_conftxt(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ struct sbuf *sb;
+
+ sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
+ sbuf_clear(sb);
+ g_waitfor_event(g_conftxt, sb, M_WAITOK, NULL);
+ error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
+ sbuf_delete(sb);
+ return error;
+}
+
+static int
+sysctl_kern_geom_confdot(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ struct sbuf *sb;
+
+ sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
+ sbuf_clear(sb);
+ g_waitfor_event(g_confdot, sb, M_WAITOK, NULL);
+ error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
+ sbuf_delete(sb);
+ return error;
+}
+
+static int
+sysctl_kern_geom_confxml(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ struct sbuf *sb;
+
+ sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
+ sbuf_clear(sb);
+ g_waitfor_event(g_confxml, sb, M_WAITOK, NULL);
+ error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
+ sbuf_delete(sb);
+ return error;
+}
+
+SYSCTL_NODE(_kern, OID_AUTO, geom, CTLFLAG_RW, 0, "GEOMetry management");
+
+SYSCTL_PROC(_kern_geom, OID_AUTO, confxml, CTLTYPE_STRING|CTLFLAG_RD,
+ 0, 0, sysctl_kern_geom_confxml, "",
+ "Dump the GEOM config in XML");
+
+SYSCTL_PROC(_kern_geom, OID_AUTO, confdot, CTLTYPE_STRING|CTLFLAG_RD,
+ 0, 0, sysctl_kern_geom_confdot, "",
+ "Dump the GEOM config in dot");
+
+SYSCTL_PROC(_kern_geom, OID_AUTO, conftxt, CTLTYPE_STRING|CTLFLAG_RD,
+ 0, 0, sysctl_kern_geom_conftxt, "",
+ "Dump the GEOM config in txt");
+
+SYSCTL_INT(_kern_geom, OID_AUTO, debugflags, CTLFLAG_RW,
+ &g_debugflags, 0, "");
+
+SYSCTL_INT(_kern_geom, OID_AUTO, collectstats, CTLFLAG_RW,
+ &g_collectstats, 0, "");
+
+SYSCTL_INT(_debug_sizeof, OID_AUTO, g_class, CTLFLAG_RD,
+ 0, sizeof(struct g_class), "");
+SYSCTL_INT(_debug_sizeof, OID_AUTO, g_geom, CTLFLAG_RD,
+ 0, sizeof(struct g_geom), "");
+SYSCTL_INT(_debug_sizeof, OID_AUTO, g_provider, CTLFLAG_RD,
+ 0, sizeof(struct g_provider), "");
+SYSCTL_INT(_debug_sizeof, OID_AUTO, g_consumer, CTLFLAG_RD,
+ 0, sizeof(struct g_consumer), "");
+SYSCTL_INT(_debug_sizeof, OID_AUTO, g_bioq, CTLFLAG_RD,
+ 0, sizeof(struct g_bioq), "");
diff --git a/sys/geom/geom_mbr.c b/sys/geom/geom_mbr.c
new file mode 100644
index 0000000..3abcf76
--- /dev/null
+++ b/sys/geom/geom_mbr.c
@@ -0,0 +1,451 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/endian.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/bio.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <sys/diskmbr.h>
+#include <sys/sbuf.h>
+#include <geom/geom.h>
+#include <geom/geom_slice.h>
+
+#define MBR_CLASS_NAME "MBR"
+#define MBREXT_CLASS_NAME "MBREXT"
+
+static struct dos_partition historical_bogus_partition_table[NDOSPART] = {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0x80, 0, 1, 0, DOSPTYP_386BSD, 255, 255, 255, 0, 50000, },
+};
+
+static struct dos_partition historical_bogus_partition_table_fixed[NDOSPART] = {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { 0x80, 0, 1, 0, DOSPTYP_386BSD, 254, 255, 255, 0, 50000, },
+};
+
+static void
+g_mbr_print(int i, struct dos_partition *dp)
+{
+
+ printf("[%d] f:%02x typ:%d", i, dp->dp_flag, dp->dp_typ);
+ printf(" s(CHS):%d/%d/%d", DPCYL(dp->dp_scyl, dp->dp_ssect),
+ dp->dp_shd, DPSECT(dp->dp_ssect));
+ printf(" e(CHS):%d/%d/%d", DPCYL(dp->dp_ecyl, dp->dp_esect),
+ dp->dp_ehd, DPSECT(dp->dp_esect));
+ printf(" s:%d l:%d\n", dp->dp_start, dp->dp_size);
+}
+
+struct g_mbr_softc {
+ int type [NDOSPART];
+ u_int sectorsize;
+ u_char sec0[512];
+};
+
+static int
+g_mbr_modify(struct g_geom *gp, struct g_mbr_softc *ms, u_char *sec0)
+{
+ int i, error;
+ off_t l[NDOSPART];
+ struct dos_partition ndp[NDOSPART], *dp;
+
+ g_topology_assert();
+
+ if (sec0[0x1fe] != 0x55 && sec0[0x1ff] != 0xaa)
+ return (EBUSY);
+
+ dp = ndp;
+ for (i = 0; i < NDOSPART; i++) {
+ dos_partition_dec(
+ sec0 + DOSPARTOFF + i * sizeof(struct dos_partition),
+ dp + i);
+ if (bootverbose)
+ g_mbr_print(i, dp + i);
+ }
+ if ((!bcmp(dp, historical_bogus_partition_table,
+ sizeof historical_bogus_partition_table)) ||
+ (!bcmp(dp, historical_bogus_partition_table_fixed,
+ sizeof historical_bogus_partition_table_fixed))) {
+ /*
+ * We will not allow people to write these from "the inside",
+ * Since properly selfdestructing takes too much code. If
+ * people really want to do this, they cannot have any
+ * providers of this geom open, and in that case they can just
+ * as easily overwrite the MBR in the parent device.
+ */
+ return(EBUSY);
+ }
+ for (i = 0; i < NDOSPART; i++) {
+ /*
+ * A Protective MBR (PMBR) has a single partition of
+ * type 0xEE spanning the whole disk. Such a MBR
+ * protects a GPT on the disk from MBR tools that
+ * don't know anything about GPT. We're interpreting
+ * it a bit more loosely: any partition of type 0xEE
+ * is to be skipped as it doesn't contain any data
+ * that we should care about. We still allow other
+ * partitions to be present in the MBR. A PMBR will
+ * be handled correctly anyway.
+ */
+ if (dp[i].dp_typ == DOSPTYP_PMBR)
+ l[i] = 0;
+ else if (dp[i].dp_flag != 0 && dp[i].dp_flag != 0x80)
+ l[i] = 0;
+ else if (dp[i].dp_typ == 0)
+ l[i] = 0;
+ else
+ l[i] = (off_t)dp[i].dp_size * ms->sectorsize;
+ error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK,
+ (off_t)dp[i].dp_start * ms->sectorsize, l[i],
+ ms->sectorsize, "%ss%d", gp->name, 1 + i);
+ if (error)
+ return (error);
+ }
+ for (i = 0; i < NDOSPART; i++) {
+ ms->type[i] = dp[i].dp_typ;
+ g_slice_config(gp, i, G_SLICE_CONFIG_SET,
+ (off_t)dp[i].dp_start * ms->sectorsize, l[i],
+ ms->sectorsize, "%ss%d", gp->name, 1 + i);
+ }
+ bcopy(sec0, ms->sec0, 512);
+ return (0);
+}
+
+static void
+g_mbr_ioctl(void *arg, int flag)
+{
+ struct bio *bp;
+ struct g_geom *gp;
+ struct g_slicer *gsp;
+ struct g_mbr_softc *ms;
+ struct g_ioctl *gio;
+ struct g_consumer *cp;
+ u_char *sec0;
+ int error;
+
+ bp = arg;
+ if (flag == EV_CANCEL) {
+ g_io_deliver(bp, ENXIO);
+ return;
+ }
+ gp = bp->bio_to->geom;
+ gsp = gp->softc;
+ ms = gsp->softc;
+ gio = (struct g_ioctl *)bp->bio_data;
+
+ /* The disklabel to set is the ioctl argument. */
+ sec0 = gio->data;
+
+ error = g_mbr_modify(gp, ms, sec0);
+ if (error) {
+ g_io_deliver(bp, error);
+ return;
+ }
+ cp = LIST_FIRST(&gp->consumer);
+ error = g_write_data(cp, 0, sec0, 512);
+ g_io_deliver(bp, error);
+}
+
+
+static int
+g_mbr_start(struct bio *bp)
+{
+ struct g_provider *pp;
+ struct g_geom *gp;
+ struct g_mbr_softc *mp;
+ struct g_slicer *gsp;
+ struct g_ioctl *gio;
+ int idx, error;
+
+ pp = bp->bio_to;
+ idx = pp->index;
+ gp = pp->geom;
+ gsp = gp->softc;
+ mp = gsp->softc;
+ if (bp->bio_cmd == BIO_GETATTR) {
+ if (g_handleattr_int(bp, "MBR::type", mp->type[idx]))
+ return (1);
+ if (g_handleattr_off_t(bp, "MBR::offset",
+ gsp->slices[idx].offset))
+ return (1);
+ }
+
+ /* We only handle ioctl(2) requests of the right format. */
+ if (strcmp(bp->bio_attribute, "GEOM::ioctl"))
+ return (0);
+ else if (bp->bio_length != sizeof(*gio))
+ return (0);
+
+ /* Get hold of the ioctl parameters. */
+ gio = (struct g_ioctl *)bp->bio_data;
+
+ switch (gio->cmd) {
+ case DIOCSMBR:
+ /*
+ * These we cannot do without the topology lock and some
+ * some I/O requests. Ask the event-handler to schedule
+ * us in a less restricted environment.
+ */
+ error = g_post_event(g_mbr_ioctl, bp, M_NOWAIT, gp, NULL);
+ if (error)
+ g_io_deliver(bp, error);
+ /*
+ * We must return non-zero to indicate that we will deal
+ * with this bio, even though we have not done so yet.
+ */
+ return (1);
+ default:
+ return (0);
+ }
+}
+
+static void
+g_mbr_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp)
+{
+ struct g_mbr_softc *mp;
+ struct g_slicer *gsp;
+
+ gsp = gp->softc;
+ mp = gsp->softc;
+ g_slice_dumpconf(sb, indent, gp, cp, pp);
+ if (pp != NULL) {
+ if (indent == NULL)
+ sbuf_printf(sb, " ty %d", mp->type[pp->index]);
+ else
+ sbuf_printf(sb, "%s<type>%d</type>\n", indent,
+ mp->type[pp->index]);
+ }
+}
+
+static struct g_geom *
+g_mbr_taste(struct g_class *mp, struct g_provider *pp, int insist)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ int error;
+ struct g_mbr_softc *ms;
+ u_int fwsectors, sectorsize;
+ u_char *buf;
+
+ g_trace(G_T_TOPOLOGY, "mbr_taste(%s,%s)", mp->name, pp->name);
+ g_topology_assert();
+ gp = g_slice_new(mp, NDOSPART, pp, &cp, &ms, sizeof *ms, g_mbr_start);
+ if (gp == NULL)
+ return (NULL);
+ g_topology_unlock();
+ gp->dumpconf = g_mbr_dumpconf;
+ do {
+ if (gp->rank != 2 && insist == 0)
+ break;
+ error = g_getattr("GEOM::fwsectors", cp, &fwsectors);
+ if (error)
+ fwsectors = 17;
+ sectorsize = cp->provider->sectorsize;
+ if (sectorsize < 512)
+ break;
+ ms->sectorsize = sectorsize;
+ buf = g_read_data(cp, 0, sectorsize, &error);
+ if (buf == NULL || error != 0)
+ break;
+ g_topology_lock();
+ g_mbr_modify(gp, ms, buf);
+ g_topology_unlock();
+ g_free(buf);
+ break;
+ } while (0);
+ g_topology_lock();
+ g_access_rel(cp, -1, 0, 0);
+ if (LIST_EMPTY(&gp->provider)) {
+ g_slice_spoiled(cp);
+ return (NULL);
+ }
+ return (gp);
+}
+
+static struct g_class g_mbr_class = {
+ .name = MBR_CLASS_NAME,
+ .taste = g_mbr_taste,
+};
+
+DECLARE_GEOM_CLASS(g_mbr_class, g_mbr);
+
+#define NDOSEXTPART 32
+struct g_mbrext_softc {
+ int type [NDOSEXTPART];
+};
+
+static int
+g_mbrext_start(struct bio *bp)
+{
+ struct g_provider *pp;
+ struct g_geom *gp;
+ struct g_mbrext_softc *mp;
+ struct g_slicer *gsp;
+ int idx;
+
+ pp = bp->bio_to;
+ idx = pp->index;
+ gp = pp->geom;
+ gsp = gp->softc;
+ mp = gsp->softc;
+ if (bp->bio_cmd == BIO_GETATTR) {
+ if (g_handleattr_int(bp, "MBR::type", mp->type[idx]))
+ return (1);
+ }
+ return (0);
+}
+
+static void
+g_mbrext_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp)
+{
+ struct g_mbrext_softc *mp;
+ struct g_slicer *gsp;
+
+ g_slice_dumpconf(sb, indent, gp, cp, pp);
+ gsp = gp->softc;
+ mp = gsp->softc;
+ if (pp != NULL) {
+ if (indent == NULL)
+ sbuf_printf(sb, " ty %d", mp->type[pp->index]);
+ else
+ sbuf_printf(sb, "%s<type>%d</type>\n", indent,
+ mp->type[pp->index]);
+ }
+}
+
+static struct g_geom *
+g_mbrext_taste(struct g_class *mp, struct g_provider *pp, int insist __unused)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ int error, i, slice;
+ struct g_mbrext_softc *ms;
+ off_t off;
+ u_char *buf;
+ struct dos_partition dp[4];
+ u_int fwsectors, sectorsize;
+
+ g_trace(G_T_TOPOLOGY, "g_mbrext_taste(%s,%s)", mp->name, pp->name);
+ g_topology_assert();
+ if (strcmp(pp->geom->class->name, MBR_CLASS_NAME))
+ return (NULL);
+ gp = g_slice_new(mp, NDOSEXTPART, pp, &cp, &ms, sizeof *ms,
+ g_mbrext_start);
+ if (gp == NULL)
+ return (NULL);
+ g_topology_unlock();
+ gp->dumpconf = g_mbrext_dumpconf;
+ off = 0;
+ slice = 0;
+ do {
+ error = g_getattr("MBR::type", cp, &i);
+ if (error || (i != DOSPTYP_EXT && i != DOSPTYP_EXTLBA))
+ break;
+ error = g_getattr("GEOM::fwsectors", cp, &fwsectors);
+ if (error)
+ fwsectors = 17;
+ sectorsize = cp->provider->sectorsize;
+ if (sectorsize != 512)
+ break;
+ for (;;) {
+ buf = g_read_data(cp, off, sectorsize, &error);
+ if (buf == NULL || error != 0)
+ break;
+ if (buf[0x1fe] != 0x55 && buf[0x1ff] != 0xaa) {
+ g_free(buf);
+ break;
+ }
+ for (i = 0; i < NDOSPART; i++)
+ dos_partition_dec(
+ buf + DOSPARTOFF +
+ i * sizeof(struct dos_partition), dp + i);
+ g_free(buf);
+ if (bootverbose) {
+ printf("MBREXT Slice %d on %s:\n",
+ slice + 5, gp->name);
+ g_mbr_print(0, dp);
+ g_mbr_print(1, dp + 1);
+ }
+ if ((dp[0].dp_flag & 0x7f) == 0 &&
+ dp[0].dp_size != 0 && dp[0].dp_typ != 0) {
+ g_topology_lock();
+ g_slice_config(gp, slice, G_SLICE_CONFIG_SET,
+ (((off_t)dp[0].dp_start) << 9ULL) + off,
+ ((off_t)dp[0].dp_size) << 9ULL,
+ sectorsize,
+ "%*.*s%d",
+ strlen(gp->name) - 1,
+ strlen(gp->name) - 1,
+ gp->name,
+ slice + 5);
+ g_topology_unlock();
+ ms->type[slice] = dp[0].dp_typ;
+ slice++;
+ }
+ if (dp[1].dp_flag != 0)
+ break;
+ if (dp[1].dp_typ != DOSPTYP_EXT)
+ break;
+ if (dp[1].dp_size == 0)
+ break;
+ off = ((off_t)dp[1].dp_start) << 9ULL;
+ }
+ break;
+ } while (0);
+ g_topology_lock();
+ g_access_rel(cp, -1, 0, 0);
+ if (LIST_EMPTY(&gp->provider)) {
+ g_slice_spoiled(cp);
+ return (NULL);
+ }
+ return (gp);
+}
+
+
+static struct g_class g_mbrext_class = {
+ .name = MBREXT_CLASS_NAME,
+ .taste = g_mbrext_taste,
+};
+
+DECLARE_GEOM_CLASS(g_mbrext_class, g_mbrext);
diff --git a/sys/geom/geom_mbr_enc.c b/sys/geom/geom_mbr_enc.c
new file mode 100644
index 0000000..da5f997
--- /dev/null
+++ b/sys/geom/geom_mbr_enc.c
@@ -0,0 +1,72 @@
+/*-
+ * Copyright (c) 2003 Poul-Henning Kamp
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Functions to encode or decode struct dos_partition into a bytestream
+ * of correct endianess and packing. These functions do no validation
+ * or sanity checking, they only pack/unpack the fields correctly.
+ *
+ * NB! This file must be usable both in kernel and userland.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/diskmbr.h>
+#include <sys/endian.h>
+
+void
+dos_partition_dec(void const *pp, struct dos_partition *d)
+{
+ unsigned char const *p = pp;
+
+ d->dp_flag = p[0];
+ d->dp_shd = p[1];
+ d->dp_ssect = p[2];
+ d->dp_scyl = p[3];
+ d->dp_typ = p[4];
+ d->dp_ehd = p[5];
+ d->dp_esect = p[6];
+ d->dp_ecyl = p[7];
+ d->dp_start = le32dec(p + 8);
+ d->dp_size = le32dec(p + 12);
+}
+
+void
+dos_partition_enc(void *pp, struct dos_partition *d)
+{
+ unsigned char *p = pp;
+
+ p[0] = d->dp_flag;
+ p[1] = d->dp_shd;
+ p[2] = d->dp_ssect;
+ p[3] = d->dp_scyl;
+ p[4] = d->dp_typ;
+ p[5] = d->dp_ehd;
+ p[6] = d->dp_esect;
+ p[7] = d->dp_ecyl;
+ le32enc(p + 8, d->dp_start);
+ le32enc(p + 12, d->dp_size);
+}
diff --git a/sys/geom/geom_mirror.c b/sys/geom/geom_mirror.c
new file mode 100644
index 0000000..98111c5
--- /dev/null
+++ b/sys/geom/geom_mirror.c
@@ -0,0 +1,237 @@
+/*-
+ * Copyright (c) 2003 Poul-Henning Kamp
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/bio.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/libkern.h>
+#include <sys/endian.h>
+#include <sys/md5.h>
+#include <sys/errno.h>
+#include <geom/geom.h>
+
+#define MIRROR_MAGIC "GEOM::MIRROR"
+
+struct g_mirror_softc {
+ off_t mediasize;
+ u_int sectorsize;
+ u_char magic[16];
+};
+
+
+static int
+g_mirror_add(struct g_geom *gp, struct g_provider *pp)
+{
+ struct g_consumer *cp;
+
+ g_trace(G_T_TOPOLOGY, "g_mirror_add(%s, %s)", gp->name, pp->name);
+ g_topology_assert();
+ cp = g_new_consumer(gp);
+ g_attach(cp, pp);
+ return (0);
+}
+
+static void
+g_mirror_orphan(struct g_consumer *cp)
+{
+ struct g_geom *gp;
+ int error;
+
+ g_topology_assert();
+ gp = cp->geom;
+ g_access_rel(cp, -cp->acr, -cp->acw, -cp->ace);
+ error = cp->provider->error;
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ if (!LIST_EMPTY(&gp->consumer))
+ return;
+ g_free(gp->softc);
+ g_wither_geom(gp, error);
+}
+
+static void
+g_mirror_done(struct bio *bp)
+{
+ struct g_geom *gp;
+ struct g_mirror_softc *sc;
+ struct g_consumer *cp;
+
+ gp = bp->bio_to->geom;
+ sc = gp->softc;
+ cp = LIST_NEXT(bp->bio_from, consumer);
+ if (cp == NULL)
+ g_std_done(bp);
+ else
+ g_io_request(bp, cp);
+}
+
+static void
+g_mirror_start(struct bio *bp)
+{
+ struct g_geom *gp;
+ struct bio *bp2;
+ struct g_mirror_softc *sc;
+
+ gp = bp->bio_to->geom;
+ sc = gp->softc;
+ switch(bp->bio_cmd) {
+ case BIO_READ:
+ bp2 = g_clone_bio(bp);
+ bp2->bio_offset += sc->sectorsize;
+ bp2->bio_done = g_std_done;
+ g_io_request(bp2, LIST_FIRST(&gp->consumer));
+ return;
+ case BIO_WRITE:
+ case BIO_DELETE:
+ bp2 = g_clone_bio(bp);
+ bp2->bio_offset += sc->sectorsize;
+ bp2->bio_done = g_mirror_done;
+ g_io_request(bp2, LIST_FIRST(&gp->consumer));
+ return;
+ default:
+ g_io_deliver(bp, EOPNOTSUPP);
+ return;
+ }
+}
+
+static int
+g_mirror_access(struct g_provider *pp, int dr, int dw, int de)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp1, *cp2;
+ int error;
+
+ de += dr;
+ de += dw;
+
+ gp = pp->geom;
+ error = ENXIO;
+ LIST_FOREACH(cp1, &gp->consumer, consumer) {
+ error = g_access_rel(cp1, dr, dw, de);
+ if (error) {
+ LIST_FOREACH(cp2, &gp->consumer, consumer) {
+ if (cp2 == cp1)
+ break;
+ g_access_rel(cp2, -dr, -dw, -de);
+ }
+ return (error);
+ }
+ }
+ return (error);
+}
+
+static struct g_geom *
+g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
+{
+ struct g_geom *gp, *gp2;
+ struct g_provider *pp2;
+ struct g_consumer *cp;
+ struct g_mirror_softc *sc;
+ int error;
+ u_int sectorsize;
+ u_char *buf;
+
+ g_trace(G_T_TOPOLOGY, "mirror_taste(%s, %s)", mp->name, pp->name);
+ g_topology_assert();
+ gp = g_new_geomf(mp, "%s.mirror", pp->name);
+
+ gp->start = g_mirror_start;
+ gp->spoiled = g_mirror_orphan;
+ gp->orphan = g_mirror_orphan;
+ gp->access= g_mirror_access;
+ cp = g_new_consumer(gp);
+ g_attach(cp, pp);
+ error = g_access_rel(cp, 1, 0, 0);
+ if (error) {
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ g_destroy_geom(gp);
+ return(NULL);
+ }
+ g_topology_unlock();
+ do {
+ sectorsize = cp->provider->sectorsize;
+ buf = g_read_data(cp, 0, sectorsize, &error);
+ if (buf == NULL || error != 0)
+ break;
+ if (memcmp(buf, MIRROR_MAGIC, strlen(MIRROR_MAGIC)))
+ break;
+ LIST_FOREACH(gp2, &mp->geom, geom) {
+ sc = gp2->softc;
+ if (sc == NULL)
+ continue;
+ if (memcmp(buf + 16, sc->magic, sizeof sc->magic))
+ continue;
+ break;
+ }
+ /* We found somebody else */
+ if (gp2 != NULL) {
+ g_topology_lock();
+ g_mirror_add(gp2, pp);
+ g_topology_unlock();
+ break;
+ }
+ gp->softc = g_malloc(sizeof(struct g_mirror_softc), M_WAITOK);
+ sc = gp->softc;
+ memcpy(sc->magic, buf + 16, sizeof sc->magic);
+ g_topology_lock();
+ pp2 = g_new_providerf(gp, "%s", gp->name);
+ pp2->mediasize = sc->mediasize = pp->mediasize - pp->sectorsize;
+ pp2->sectorsize = sc->sectorsize = pp->sectorsize;
+ g_error_provider(pp2, 0);
+ g_topology_unlock();
+ } while (0);
+ g_topology_lock();
+ if (buf != NULL)
+ g_free(buf);
+ g_access_rel(cp, -1, 0, 0);
+ if (gp->softc != NULL)
+ return (gp);
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ g_destroy_geom(gp);
+ return (NULL);
+}
+
+#define MIRROR_CLASS_NAME "MIRROR"
+
+static struct g_class g_mirror_class = {
+ .name = MIRROR_CLASS_NAME,
+ .taste = g_mirror_taste,
+};
+
+DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
diff --git a/sys/geom/geom_pc98.c b/sys/geom/geom_pc98.c
new file mode 100644
index 0000000..b6d2c21
--- /dev/null
+++ b/sys/geom/geom_pc98.c
@@ -0,0 +1,319 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/endian.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/bio.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <sys/diskpc98.h>
+#include <geom/geom.h>
+#include <geom/geom_slice.h>
+
+#define PC98_CLASS_NAME "PC98"
+
+struct g_pc98_softc {
+ u_int fwsectors, fwheads, sectorsize;
+ int type[NDOSPART];
+ u_char sec[8192];
+};
+
+static void
+g_pc98_print(int i, struct pc98_partition *dp)
+{
+ char sname[17];
+
+ strncpy(sname, dp->dp_name, 16);
+ sname[16] = '\0';
+
+ g_hexdump(dp, sizeof(dp[0]));
+ printf("[%d] mid:%d(0x%x) sid:%d(0x%x)",
+ i, dp->dp_mid, dp->dp_mid, dp->dp_sid, dp->dp_sid);
+ printf(" s:%d/%d/%d", dp->dp_scyl, dp->dp_shd, dp->dp_ssect);
+ printf(" e:%d/%d/%d", dp->dp_ecyl, dp->dp_ehd, dp->dp_esect);
+ printf(" sname:%s\n", sname);
+}
+
+static int
+g_pc98_modify(struct g_geom *gp, struct g_pc98_softc *ms, u_char *sec)
+{
+ int i, error;
+ off_t s[NDOSPART], l[NDOSPART];
+ struct pc98_partition dp[NDOSPART];
+
+ g_topology_assert();
+
+ if (sec[0x1fe] != 0x55 || sec[0x1ff] != 0xaa)
+ return (EBUSY);
+
+#if 0
+ /*
+ * XXX: Some sources indicate this is a magic sequence, but appearantly
+ * XXX: it is not universal. Documentation would be wonderful to have.
+ */
+ if (sec[4] != 'I' || sec[5] != 'P' || sec[6] != 'L' || sec[7] != '1')
+ return (EBUSY);
+#endif
+
+ for (i = 0; i < NDOSPART; i++)
+ pc98_partition_dec(
+ sec + 512 + i * sizeof(struct pc98_partition), &dp[i]);
+
+ for (i = 0; i < NDOSPART; i++) {
+ /* If start and end are identical it's bogus */
+ if (dp[i].dp_ssect == dp[i].dp_esect &&
+ dp[i].dp_shd == dp[i].dp_ehd &&
+ dp[i].dp_scyl == dp[i].dp_ecyl)
+ s[i] = l[i] = 0;
+ else if (dp[i].dp_ecyl == 0)
+ s[i] = l[i] = 0;
+ else {
+ s[i] = (off_t)dp[i].dp_scyl *
+ ms->fwsectors * ms->fwheads * ms->sectorsize;
+ l[i] = (off_t)(dp[i].dp_ecyl - dp[i].dp_scyl + 1) *
+ ms->fwsectors * ms->fwheads * ms->sectorsize;
+ }
+ if (bootverbose) {
+ printf("PC98 Slice %d on %s:\n", i + 1, gp->name);
+ g_pc98_print(i, dp + i);
+ }
+ if (s[i] < 0 || l[i] < 0)
+ error = EBUSY;
+ else
+ error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK,
+ s[i], l[i], ms->sectorsize,
+ "%ss%d", gp->name, i + 1);
+ if (error)
+ return (error);
+ }
+
+ for (i = 0; i < NDOSPART; i++) {
+ ms->type[i] = (dp[i].dp_sid << 8) | dp[i].dp_mid;
+ g_slice_config(gp, i, G_SLICE_CONFIG_SET, s[i], l[i],
+ ms->sectorsize, "%ss%d", gp->name, i + 1);
+ }
+
+ bcopy(sec, ms->sec, sizeof (ms->sec));
+
+ return (0);
+}
+
+static void
+g_pc98_ioctl(void *arg, int flag)
+{
+ struct bio *bp;
+ struct g_geom *gp;
+ struct g_slicer *gsp;
+ struct g_pc98_softc *ms;
+ struct g_ioctl *gio;
+ struct g_consumer *cp;
+ u_char *sec;
+ int error;
+
+ bp = arg;
+ if (flag == EV_CANCEL) {
+ g_io_deliver(bp, ENXIO);
+ return;
+ }
+ gp = bp->bio_to->geom;
+ gsp = gp->softc;
+ ms = gsp->softc;
+ gio = (struct g_ioctl *)bp->bio_data;
+
+ /* The disklabel to set is the ioctl argument. */
+ sec = gio->data;
+
+ error = g_pc98_modify(gp, ms, sec);
+ if (error) {
+ g_io_deliver(bp, error);
+ return;
+ }
+ cp = LIST_FIRST(&gp->consumer);
+ error = g_write_data(cp, 0, sec, 8192);
+ g_io_deliver(bp, error);
+}
+
+static int
+g_pc98_start(struct bio *bp)
+{
+ struct g_provider *pp;
+ struct g_geom *gp;
+ struct g_pc98_softc *mp;
+ struct g_slicer *gsp;
+ struct g_ioctl *gio;
+ int idx, error;
+
+ pp = bp->bio_to;
+ idx = pp->index;
+ gp = pp->geom;
+ gsp = gp->softc;
+ mp = gsp->softc;
+ if (bp->bio_cmd == BIO_GETATTR) {
+ if (g_handleattr_int(bp, "PC98::type", mp->type[idx]))
+ return (1);
+ if (g_handleattr_off_t(bp, "PC98::offset",
+ gsp->slices[idx].offset))
+ return (1);
+ }
+
+ /* We only handle ioctl(2) requests of the right format. */
+ if (strcmp(bp->bio_attribute, "GEOM::ioctl"))
+ return (0);
+ else if (bp->bio_length != sizeof(*gio))
+ return (0);
+ /* Get hold of the ioctl parameters. */
+ gio = (struct g_ioctl *)bp->bio_data;
+
+ switch (gio->cmd) {
+ case DIOCSPC98:
+ /*
+ * These we cannot do without the topology lock and some
+ * some I/O requests. Ask the event-handler to schedule
+ * us in a less restricted environment.
+ */
+ error = g_post_event(g_pc98_ioctl, bp, M_NOWAIT, gp, NULL);
+ if (error)
+ g_io_deliver(bp, error);
+ /*
+ * We must return non-zero to indicate that we will deal
+ * with this bio, even though we have not done so yet.
+ */
+ return (1);
+ default:
+ return (0);
+ }
+}
+
+static void
+g_pc98_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
+ struct g_consumer *cp __unused, struct g_provider *pp)
+{
+ struct g_pc98_softc *mp;
+ struct g_slicer *gsp;
+ struct pc98_partition dp;
+ char sname[17];
+
+ gsp = gp->softc;
+ mp = gsp->softc;
+ g_slice_dumpconf(sb, indent, gp, cp, pp);
+ if (pp != NULL) {
+ pc98_partition_dec(
+ mp->sec + 512 +
+ pp->index * sizeof(struct pc98_partition), &dp);
+ strncpy(sname, dp.dp_name, 16);
+ sname[16] = '\0';
+ if (indent == NULL) {
+ sbuf_printf(sb, " ty %d", mp->type[pp->index]);
+ sbuf_printf(sb, " sn %s", sname);
+ } else {
+ sbuf_printf(sb, "%s<type>%d</type>\n", indent,
+ mp->type[pp->index]);
+ sbuf_printf(sb, "%s<sname>%s</sname>\n", indent,
+ sname);
+ }
+ }
+}
+
+static struct g_geom *
+g_pc98_taste(struct g_class *mp, struct g_provider *pp, int flags)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ int error;
+ struct g_pc98_softc *ms;
+ u_int fwsectors, fwheads, sectorsize;
+ u_char *buf;
+
+ g_trace(G_T_TOPOLOGY, "g_pc98_taste(%s,%s)", mp->name, pp->name);
+ g_topology_assert();
+ if (flags == G_TF_NORMAL &&
+ !strcmp(pp->geom->class->name, PC98_CLASS_NAME))
+ return (NULL);
+ gp = g_slice_new(mp, NDOSPART, pp, &cp, &ms, sizeof *ms, g_pc98_start);
+ if (gp == NULL)
+ return (NULL);
+ g_topology_unlock();
+ gp->dumpconf = g_pc98_dumpconf;
+ do {
+ if (gp->rank != 2 && flags == G_TF_NORMAL)
+ break;
+ error = g_getattr("GEOM::fwsectors", cp, &fwsectors);
+ if (error || fwsectors == 0) {
+ fwsectors = 17;
+ if (bootverbose)
+ printf("g_pc98_taste: guessing %d sectors\n",
+ fwsectors);
+ }
+ error = g_getattr("GEOM::fwheads", cp, &fwheads);
+ if (error || fwheads == 0) {
+ fwheads = 8;
+ if (bootverbose)
+ printf("g_pc98_taste: guessing %d heads\n",
+ fwheads);
+ }
+ sectorsize = cp->provider->sectorsize;
+ if (sectorsize < 512)
+ break;
+ buf = g_read_data(cp, 0, 8192, &error);
+ if (buf == NULL || error != 0)
+ break;
+ ms->fwsectors = fwsectors;
+ ms->fwheads = fwheads;
+ ms->sectorsize = sectorsize;
+ g_topology_lock();
+ g_pc98_modify(gp, ms, buf);
+ g_topology_unlock();
+ g_free(buf);
+ break;
+ } while (0);
+ g_topology_lock();
+ g_access_rel(cp, -1, 0, 0);
+ if (LIST_EMPTY(&gp->provider)) {
+ g_slice_spoiled(cp);
+ return (NULL);
+ }
+ return (gp);
+}
+
+static struct g_class g_pc98_class = {
+ .name = PC98_CLASS_NAME,
+ .taste = g_pc98_taste,
+};
+
+DECLARE_GEOM_CLASS(g_pc98_class, g_pc98);
diff --git a/sys/geom/geom_pc98_enc.c b/sys/geom/geom_pc98_enc.c
new file mode 100644
index 0000000..04de220
--- /dev/null
+++ b/sys/geom/geom_pc98_enc.c
@@ -0,0 +1,78 @@
+/*-
+ * Copyright (c) 2003 TAKAHASHI Yoshihiro
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/diskpc98.h>
+#include <sys/endian.h>
+
+void
+pc98_partition_dec(void const *pp, struct pc98_partition *d)
+{
+ unsigned char const *ptr = pp;
+ int i;
+
+ d->dp_mid = ptr[0];
+ d->dp_sid = ptr[1];
+ d->dp_dum1 = ptr[2];
+ d->dp_dum2 = ptr[3];
+ d->dp_ipl_sct = ptr[4];
+ d->dp_ipl_head = ptr[5];
+ d->dp_ipl_cyl = le16dec(ptr + 6);
+ d->dp_ssect = ptr[8];
+ d->dp_shd = ptr[9];
+ d->dp_scyl = le16dec(ptr + 10);
+ d->dp_esect = ptr[12];
+ d->dp_ehd = ptr[13];
+ d->dp_ecyl = le16dec(ptr + 14);
+ for (i = 0; i < sizeof (d->dp_name); i++)
+ d->dp_name[i] = ptr[16 + i];
+}
+
+void
+pc98_partition_enc(void *pp, struct pc98_partition *d)
+{
+ unsigned char *ptr = pp;
+ int i;
+
+ ptr[0] = d->dp_mid;
+ ptr[1] = d->dp_sid;
+ ptr[2] = d->dp_dum1;
+ ptr[3] = d->dp_dum2;
+ ptr[4] = d->dp_ipl_sct;
+ ptr[5] = d->dp_ipl_head;
+ le16enc(ptr + 6, d->dp_ipl_cyl);
+ ptr[8] = d->dp_ssect;
+ ptr[9] = d->dp_shd;
+ le16enc(ptr + 10, d->dp_scyl);
+ ptr[12] = d->dp_esect;
+ ptr[13] = d->dp_ehd;
+ le16enc(ptr + 14, d->dp_ecyl);
+ for (i = 0; i < sizeof (d->dp_name); i++)
+ ptr[16 + i] = d->dp_name[i];
+}
diff --git a/sys/geom/geom_slice.c b/sys/geom/geom_slice.c
new file mode 100644
index 0000000..64c000e
--- /dev/null
+++ b/sys/geom/geom_slice.c
@@ -0,0 +1,488 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/bio.h>
+#include <sys/sysctl.h>
+#include <sys/proc.h>
+#include <sys/kthread.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/errno.h>
+#include <sys/sbuf.h>
+#include <geom/geom.h>
+#include <geom/geom_slice.h>
+#include <machine/stdarg.h>
+
+static g_orphan_t g_slice_orphan;
+static g_access_t g_slice_access;
+static g_start_t g_slice_start;
+
+static struct g_slicer *
+g_slice_alloc(unsigned nslice, unsigned scsize)
+{
+ struct g_slicer *gsp;
+
+ gsp = g_malloc(sizeof *gsp, M_WAITOK | M_ZERO);
+ gsp->softc = g_malloc(scsize, M_WAITOK | M_ZERO);
+ gsp->slices = g_malloc(nslice * sizeof(struct g_slice),
+ M_WAITOK | M_ZERO);
+ gsp->nslice = nslice;
+ return (gsp);
+}
+
+static void
+g_slice_free(struct g_slicer *gsp)
+{
+
+ g_free(gsp->slices);
+ if (gsp->hotspot != NULL)
+ g_free(gsp->hotspot);
+ g_free(gsp->softc);
+ g_free(gsp);
+}
+
+static int
+g_slice_access(struct g_provider *pp, int dr, int dw, int de)
+{
+ int error;
+ u_int u;
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_provider *pp2;
+ struct g_slicer *gsp;
+ struct g_slice *gsl, *gsl2;
+
+ gp = pp->geom;
+ cp = LIST_FIRST(&gp->consumer);
+ KASSERT (cp != NULL, ("g_slice_access but no consumer"));
+ gsp = gp->softc;
+ gsl = &gsp->slices[pp->index];
+ for (u = 0; u < gsp->nslice; u++) {
+ gsl2 = &gsp->slices[u];
+ if (gsl2->length == 0)
+ continue;
+ if (u == pp->index)
+ continue;
+ if (gsl->offset + gsl->length <= gsl2->offset)
+ continue;
+ if (gsl2->offset + gsl2->length <= gsl->offset)
+ continue;
+ /* overlap */
+ pp2 = gsl2->provider;
+ if ((pp->acw + dw) > 0 && pp2->ace > 0)
+ return (EPERM);
+ if ((pp->ace + de) > 0 && pp2->acw > 0)
+ return (EPERM);
+ }
+ /* On first open, grab an extra "exclusive" bit */
+ if (cp->acr == 0 && cp->acw == 0 && cp->ace == 0)
+ de++;
+ /* ... and let go of it on last close */
+ if ((cp->acr + dr) == 0 && (cp->acw + dw) == 0 && (cp->ace + de) == 1)
+ de--;
+ error = g_access_rel(cp, dr, dw, de);
+ return (error);
+}
+
+/*
+ * XXX: It should be possible to specify here if we should finish all of the
+ * XXX: bio, or only the non-hot bits. This would get messy if there were
+ * XXX: two hot spots in the same bio, so for now we simply finish off the
+ * XXX: entire bio. Modifying hot data on the way to disk is frowned on
+ * XXX: so making that considerably harder is not a bad idea anyway.
+ */
+void
+g_slice_finish_hot(struct bio *bp)
+{
+ struct bio *bp2;
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_slicer *gsp;
+ struct g_slice *gsl;
+ int idx;
+
+ KASSERT(bp->bio_to != NULL,
+ ("NULL bio_to in g_slice_finish_hot(%p)", bp));
+ KASSERT(bp->bio_from != NULL,
+ ("NULL bio_from in g_slice_finish_hot(%p)", bp));
+ gp = bp->bio_to->geom;
+ gsp = gp->softc;
+ cp = LIST_FIRST(&gp->consumer);
+ KASSERT(cp != NULL, ("NULL consumer in g_slice_finish_hot(%p)", bp));
+ idx = bp->bio_to->index;
+ gsl = &gsp->slices[idx];
+
+ bp2 = g_clone_bio(bp);
+ if (bp2 == NULL) {
+ g_io_deliver(bp, ENOMEM);
+ return;
+ }
+ if (bp2->bio_offset + bp2->bio_length > gsl->length)
+ bp2->bio_length = gsl->length - bp2->bio_offset;
+ bp2->bio_done = g_std_done;
+ bp2->bio_offset += gsl->offset;
+ g_io_request(bp2, cp);
+ return;
+}
+
+static void
+g_slice_start(struct bio *bp)
+{
+ struct bio *bp2;
+ struct g_provider *pp;
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_slicer *gsp;
+ struct g_slice *gsl;
+ struct g_slice_hot *ghp;
+ int idx, error;
+ u_int m_index;
+ off_t t;
+
+ pp = bp->bio_to;
+ gp = pp->geom;
+ gsp = gp->softc;
+ cp = LIST_FIRST(&gp->consumer);
+ idx = pp->index;
+ gsl = &gsp->slices[idx];
+ switch(bp->bio_cmd) {
+ case BIO_READ:
+ case BIO_WRITE:
+ case BIO_DELETE:
+ if (bp->bio_offset > gsl->length) {
+ g_io_deliver(bp, EINVAL); /* XXX: EWHAT ? */
+ return;
+ }
+ /*
+ * Check if we collide with any hot spaces, and call the
+ * method once if so.
+ */
+ t = bp->bio_offset + gsl->offset;
+ for (m_index = 0; m_index < gsp->nhotspot; m_index++) {
+ ghp = &gsp->hotspot[m_index];
+ if (t >= ghp->offset + ghp->length)
+ continue;
+ if (t + bp->bio_length <= ghp->offset)
+ continue;
+ switch(bp->bio_cmd) {
+ case BIO_READ: idx = ghp->ract; break;
+ case BIO_WRITE: idx = ghp->wact; break;
+ case BIO_DELETE: idx = ghp->dact; break;
+ }
+ switch(idx) {
+ case G_SLICE_HOT_ALLOW:
+ /* Fall out and continue normal processing */
+ continue;
+ case G_SLICE_HOT_DENY:
+ g_io_deliver(bp, EROFS);
+ return;
+ case G_SLICE_HOT_START:
+ error = gsp->start(bp);
+ if (error && error != EJUSTRETURN)
+ g_io_deliver(bp, error);
+ return;
+ case G_SLICE_HOT_CALL:
+ error = g_post_event(gsp->hot, bp, M_NOWAIT,
+ gp, NULL);
+ if (error)
+ g_io_deliver(bp, error);
+ return;
+ }
+ break;
+ }
+ bp2 = g_clone_bio(bp);
+ if (bp2 == NULL) {
+ g_io_deliver(bp, ENOMEM);
+ return;
+ }
+ if (bp2->bio_offset + bp2->bio_length > gsl->length)
+ bp2->bio_length = gsl->length - bp2->bio_offset;
+ bp2->bio_done = g_std_done;
+ bp2->bio_offset += gsl->offset;
+ g_io_request(bp2, cp);
+ return;
+ case BIO_GETATTR:
+ /* Give the real method a chance to override */
+ if (gsp->start != NULL && gsp->start(bp))
+ return;
+ if (!strcmp("GEOM::kerneldump", bp->bio_attribute)) {
+ struct g_kerneldump *gkd;
+
+ gkd = (struct g_kerneldump *)bp->bio_data;
+ gkd->offset += gsp->slices[idx].offset;
+ if (gkd->length > gsp->slices[idx].length)
+ gkd->length = gsp->slices[idx].length;
+ /* now, pass it on downwards... */
+ }
+ bp2 = g_clone_bio(bp);
+ if (bp2 == NULL) {
+ g_io_deliver(bp, ENOMEM);
+ return;
+ }
+ bp2->bio_done = g_std_done;
+ g_io_request(bp2, cp);
+ break;
+ default:
+ g_io_deliver(bp, EOPNOTSUPP);
+ return;
+ }
+}
+
+void
+g_slice_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp)
+{
+ struct g_slicer *gsp;
+
+ gsp = gp->softc;
+ if (indent == NULL) {
+ sbuf_printf(sb, " i %u", pp->index);
+ sbuf_printf(sb, " o %ju",
+ (uintmax_t)gsp->slices[pp->index].offset);
+ return;
+ }
+ if (pp != NULL) {
+ sbuf_printf(sb, "%s<index>%u</index>\n", indent, pp->index);
+ sbuf_printf(sb, "%s<length>%ju</length>\n",
+ indent, (uintmax_t)gsp->slices[pp->index].length);
+ sbuf_printf(sb, "%s<seclength>%ju</seclength>\n", indent,
+ (uintmax_t)gsp->slices[pp->index].length / 512);
+ sbuf_printf(sb, "%s<offset>%ju</offset>\n", indent,
+ (uintmax_t)gsp->slices[pp->index].offset);
+ sbuf_printf(sb, "%s<secoffset>%ju</secoffset>\n", indent,
+ (uintmax_t)gsp->slices[pp->index].offset / 512);
+ }
+}
+
+int
+g_slice_config(struct g_geom *gp, u_int idx, int how, off_t offset, off_t length, u_int sectorsize, const char *fmt, ...)
+{
+ struct g_provider *pp, *pp2;
+ struct g_slicer *gsp;
+ struct g_slice *gsl;
+ va_list ap;
+ struct sbuf *sb;
+ int acc;
+
+ g_trace(G_T_TOPOLOGY, "g_slice_config(%s, %d, %d)",
+ gp->name, idx, how);
+ g_topology_assert();
+ gsp = gp->softc;
+ if (idx >= gsp->nslice)
+ return(EINVAL);
+ gsl = &gsp->slices[idx];
+ pp = gsl->provider;
+ if (pp != NULL)
+ acc = pp->acr + pp->acw + pp->ace;
+ else
+ acc = 0;
+ if (acc != 0 && how != G_SLICE_CONFIG_FORCE) {
+ if (length < gsl->length)
+ return(EBUSY);
+ if (offset != gsl->offset)
+ return(EBUSY);
+ }
+ /* XXX: check offset + length <= MEDIASIZE */
+ if (how == G_SLICE_CONFIG_CHECK)
+ return (0);
+ gsl->length = length;
+ gsl->offset = offset;
+ gsl->sectorsize = sectorsize;
+ if (length == 0) {
+ if (pp == NULL)
+ return (0);
+ if (bootverbose)
+ printf("GEOM: Deconfigure %s\n", pp->name);
+ g_orphan_provider(pp, ENXIO);
+ gsl->provider = NULL;
+ gsp->nprovider--;
+ return (0);
+ }
+ if (pp != NULL) {
+ if (bootverbose)
+ printf("GEOM: Reconfigure %s, start %jd length %jd end %jd\n",
+ pp->name, (intmax_t)offset, (intmax_t)length,
+ (intmax_t)(offset + length - 1));
+ pp->mediasize = gsl->length;
+ return (0);
+ }
+ sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
+ va_start(ap, fmt);
+ sbuf_vprintf(sb, fmt, ap);
+ va_end(ap);
+ sbuf_finish(sb);
+ pp = g_new_providerf(gp, sbuf_data(sb));
+ pp2 = LIST_FIRST(&gp->consumer)->provider;
+ pp->flags = pp2->flags & G_PF_CANDELETE;
+ if (pp2->stripesize > 0) {
+ pp->stripesize = pp2->stripesize;
+ pp->stripeoffset = (pp2->stripeoffset + offset) % pp->stripesize;
+ }
+ if (bootverbose)
+ printf("GEOM: Configure %s, start %jd length %jd end %jd\n",
+ pp->name, (intmax_t)offset, (intmax_t)length,
+ (intmax_t)(offset + length - 1));
+ pp->index = idx;
+ pp->mediasize = gsl->length;
+ pp->sectorsize = gsl->sectorsize;
+ gsl->provider = pp;
+ gsp->nprovider++;
+ g_error_provider(pp, 0);
+ sbuf_delete(sb);
+ return(0);
+}
+
+/*
+ * Configure "hotspots". A hotspot is a piece of the parent device which
+ * this particular slicer cares about for some reason. Typically because
+ * it contains meta-data used to configure the slicer.
+ * A hotspot is identified by its index number. The offset and length are
+ * relative to the parent device, and the three "?act" fields specify
+ * what action to take on BIO_READ, BIO_DELETE and BIO_WRITE.
+ *
+ * XXX: There may be a race relative to g_slice_start() here, if an existing
+ * XXX: hotspot is changed wile I/O is happening. Should this become a problem
+ * XXX: we can protect the hotspot stuff with a mutex.
+ */
+
+int
+g_slice_conf_hot(struct g_geom *gp, u_int idx, off_t offset, off_t length, int ract, int dact, int wact)
+{
+ struct g_slicer *gsp;
+ struct g_slice_hot *gsl, *gsl2;
+
+ g_trace(G_T_TOPOLOGY, "g_slice_conf_hot(%s, idx: %d, off: %jd, len: %jd)",
+ gp->name, idx, (intmax_t)offset, (intmax_t)length);
+ g_topology_assert();
+ gsp = gp->softc;
+ gsl = gsp->hotspot;
+ if(idx >= gsp->nhotspot) {
+ gsl2 = g_malloc((idx + 1) * sizeof *gsl2, M_WAITOK | M_ZERO);
+ if (gsp->hotspot != NULL)
+ bcopy(gsp->hotspot, gsl2, gsp->nhotspot * sizeof *gsl2);
+ gsp->hotspot = gsl2;
+ if (gsp->hotspot != NULL)
+ g_free(gsl);
+ gsl = gsl2;
+ gsp->nhotspot = idx + 1;
+ }
+ gsl[idx].offset = offset;
+ gsl[idx].length = length;
+ KASSERT(!((ract | dact | wact) & G_SLICE_HOT_START)
+ || gsp->start != NULL, ("G_SLICE_HOT_START but no slice->start"));
+ /* XXX: check that we _have_ a start function if HOT_START specified */
+ gsl[idx].ract = ract;
+ gsl[idx].dact = dact;
+ gsl[idx].wact = wact;
+ return (0);
+}
+
+void
+g_slice_spoiled(struct g_consumer *cp)
+{
+ struct g_geom *gp;
+ struct g_slicer *gsp;
+
+ g_topology_assert();
+ gp = cp->geom;
+ g_trace(G_T_TOPOLOGY, "g_slice_spoiled(%p/%s)", cp, gp->name);
+ gsp = gp->softc;
+ gp->softc = NULL;
+ g_slice_free(gsp);
+ g_wither_geom(gp, ENXIO);
+}
+
+int
+g_slice_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
+{
+
+ g_slice_spoiled(LIST_FIRST(&gp->consumer));
+ return (0);
+}
+
+struct g_geom *
+g_slice_new(struct g_class *mp, u_int slices, struct g_provider *pp, struct g_consumer **cpp, void *extrap, int extra, g_slice_start_t *start)
+{
+ struct g_geom *gp;
+ struct g_slicer *gsp;
+ struct g_consumer *cp;
+ void **vp;
+ int error;
+
+ g_topology_assert();
+ vp = (void **)extrap;
+ gp = g_new_geomf(mp, "%s", pp->name);
+ gsp = g_slice_alloc(slices, extra);
+ gsp->start = start;
+ gp->access = g_slice_access;
+ gp->orphan = g_slice_orphan;
+ gp->softc = gsp;
+ gp->start = g_slice_start;
+ gp->spoiled = g_slice_spoiled;
+ gp->dumpconf = g_slice_dumpconf;
+ if (gp->class->destroy_geom == NULL)
+ gp->class->destroy_geom = g_slice_destroy_geom;
+ cp = g_new_consumer(gp);
+ error = g_attach(cp, pp);
+ if (error == 0)
+ error = g_access_rel(cp, 1, 0, 0);
+ if (error) {
+ g_wither_geom(gp, ENXIO);
+ return (NULL);
+ }
+ *vp = gsp->softc;
+ *cpp = cp;
+ return (gp);
+}
+
+static void
+g_slice_orphan(struct g_consumer *cp)
+{
+
+ g_trace(G_T_TOPOLOGY, "g_slice_orphan(%p/%s)", cp, cp->provider->name);
+ g_topology_assert();
+ KASSERT(cp->provider->error != 0,
+ ("g_slice_orphan with error == 0"));
+
+ /* XXX: Not good enough we leak the softc and its suballocations */
+ g_slice_free(cp->geom->softc);
+ g_wither_geom(cp->geom, cp->provider->error);
+}
diff --git a/sys/geom/geom_slice.h b/sys/geom/geom_slice.h
new file mode 100644
index 0000000..4003c8f
--- /dev/null
+++ b/sys/geom/geom_slice.h
@@ -0,0 +1,89 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _GEOM_GEOM_SLICE_H_
+#define _GEOM_GEOM_SLICE_H_
+
+struct g_slice {
+ off_t offset;
+ off_t length;
+ u_int sectorsize;
+ struct g_provider *provider;
+};
+
+struct g_slice_hot {
+ off_t offset;
+ off_t length;
+ int ract;
+ int dact;
+ int wact;
+};
+
+typedef int g_slice_start_t (struct bio *bp);
+
+struct g_slicer {
+ u_int nslice;
+ u_int nprovider;
+ struct g_slice *slices;
+
+ u_int nhotspot;
+ struct g_slice_hot *hotspot;
+
+ void *softc;
+ g_slice_start_t *start;
+ g_event_t *hot;
+};
+
+g_dumpconf_t g_slice_dumpconf;
+int g_slice_config(struct g_geom *gp, u_int idx, int how, off_t offset, off_t length, u_int sectorsize, const char *fmt, ...);
+void g_slice_spoiled(struct g_consumer *cp);
+#define G_SLICE_CONFIG_CHECK 0
+#define G_SLICE_CONFIG_SET 1
+#define G_SLICE_CONFIG_FORCE 2
+struct g_geom * g_slice_new(struct g_class *mp, u_int slices, struct g_provider *pp, struct g_consumer **cpp, void *extrap, int extra, g_slice_start_t *start);
+
+int g_slice_conf_hot(struct g_geom *gp, u_int idx, off_t offset, off_t length, int ract, int dact, int wact);
+#define G_SLICE_HOT_ALLOW 1
+#define G_SLICE_HOT_DENY 2
+#define G_SLICE_HOT_START 4
+#define G_SLICE_HOT_CALL 8
+
+int g_slice_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp);
+
+void g_slice_finish_hot(struct bio *bp);
+
+#endif /* _GEOM_GEOM_SLICE_H_ */
diff --git a/sys/geom/geom_subr.c b/sys/geom/geom_subr.c
new file mode 100644
index 0000000..98b8f8f
--- /dev/null
+++ b/sys/geom/geom_subr.c
@@ -0,0 +1,809 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/devicestat.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/bio.h>
+#include <sys/sysctl.h>
+#include <sys/proc.h>
+#include <sys/kthread.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/errno.h>
+#include <sys/sbuf.h>
+#include <geom/geom.h>
+#include <geom/geom_int.h>
+#include <machine/stdarg.h>
+
+struct class_list_head g_classes = LIST_HEAD_INITIALIZER(g_classes);
+static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms);
+char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim;
+
+
+struct g_hh00 {
+ struct g_class *mp;
+ int error;
+};
+
+/*
+ * This event offers a new class a chance to taste all preexisting providers.
+ */
+static void
+g_load_class(void *arg, int flag)
+{
+ struct g_hh00 *hh;
+ struct g_class *mp2, *mp;
+ struct g_geom *gp;
+ struct g_provider *pp;
+
+ g_topology_assert();
+ if (flag == EV_CANCEL) /* XXX: can't happen ? */
+ return;
+ if (g_shutdown)
+ return;
+
+ hh = arg;
+ mp = hh->mp;
+ g_free(hh);
+ g_trace(G_T_TOPOLOGY, "g_load_class(%s)", mp->name);
+ LIST_FOREACH(mp2, &g_classes, class) {
+ KASSERT(mp2 != mp,
+ ("The GEOM class %s already loaded", mp2->name));
+ KASSERT(strcmp(mp2->name, mp->name) != 0,
+ ("A GEOM class named %s is already loaded", mp2->name));
+ }
+
+ if (mp->init != NULL)
+ mp->init(mp);
+ LIST_INIT(&mp->geom);
+ LIST_INSERT_HEAD(&g_classes, mp, class);
+ if (mp->taste == NULL)
+ return;
+ LIST_FOREACH(mp2, &g_classes, class) {
+ if (mp == mp2)
+ continue;
+ LIST_FOREACH(gp, &mp2->geom, geom) {
+ LIST_FOREACH(pp, &gp->provider, provider) {
+ mp->taste(mp, pp, 0);
+ g_topology_assert();
+ }
+ }
+ }
+}
+
+static void
+g_unload_class(void *arg, int flag)
+{
+ struct g_hh00 *hh;
+ struct g_class *mp;
+ struct g_geom *gp;
+ struct g_provider *pp;
+ struct g_consumer *cp;
+ int error;
+
+ g_topology_assert();
+ hh = arg;
+ mp = hh->mp;
+ g_trace(G_T_TOPOLOGY, "g_unload_class(%s)", mp->name);
+ if (mp->destroy_geom == NULL) {
+ hh->error = EOPNOTSUPP;
+ return;
+ }
+
+ /* We refuse to unload if anything is open */
+ LIST_FOREACH(gp, &mp->geom, geom) {
+ LIST_FOREACH(pp, &gp->provider, provider)
+ if (pp->acr || pp->acw || pp->ace) {
+ hh->error = EBUSY;
+ return;
+ }
+ LIST_FOREACH(cp, &gp->consumer, consumer)
+ if (cp->acr || cp->acw || cp->ace) {
+ hh->error = EBUSY;
+ return;
+ }
+ }
+
+ /* Bar new entries */
+ mp->taste = NULL;
+ mp->config = NULL;
+
+ error = 0;
+ LIST_FOREACH(gp, &mp->geom, geom) {
+ error = mp->destroy_geom(NULL, mp, gp);
+ if (error != 0)
+ break;
+ }
+ if (error == 0) {
+ LIST_REMOVE(mp, class);
+ if (mp->fini != NULL)
+ mp->fini(mp);
+ }
+ hh->error = error;
+ return;
+}
+
+int
+g_modevent(module_t mod, int type, void *data)
+{
+ struct g_hh00 *hh;
+ int error;
+ static int g_ignition;
+
+ if (!g_ignition) {
+ g_ignition++;
+ g_init();
+ }
+ hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
+ hh->mp = data;
+ error = EOPNOTSUPP;
+ switch (type) {
+ case MOD_LOAD:
+ g_trace(G_T_TOPOLOGY, "g_modevent(%s, LOAD)", hh->mp->name);
+ g_post_event(g_load_class, hh, M_WAITOK, NULL);
+ error = 0;
+ break;
+ case MOD_UNLOAD:
+ g_trace(G_T_TOPOLOGY, "g_modevent(%s, UNLOAD)", hh->mp->name);
+ error = g_waitfor_event(g_unload_class, hh, M_WAITOK, NULL);
+ if (error == 0)
+ error = hh->error;
+ g_waitidle();
+ KASSERT(LIST_EMPTY(&hh->mp->geom),
+ ("Unloaded class (%s) still has geom", hh->mp->name));
+ g_free(hh);
+ break;
+ }
+ return (error);
+}
+
+struct g_geom *
+g_new_geomf(struct g_class *mp, const char *fmt, ...)
+{
+ struct g_geom *gp;
+ va_list ap;
+ struct sbuf *sb;
+
+ g_topology_assert();
+ sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
+ va_start(ap, fmt);
+ sbuf_vprintf(sb, fmt, ap);
+ va_end(ap);
+ sbuf_finish(sb);
+ gp = g_malloc(sizeof *gp, M_WAITOK | M_ZERO);
+ gp->name = g_malloc(sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
+ gp->class = mp;
+ gp->rank = 1;
+ LIST_INIT(&gp->consumer);
+ LIST_INIT(&gp->provider);
+ LIST_INSERT_HEAD(&mp->geom, gp, geom);
+ TAILQ_INSERT_HEAD(&geoms, gp, geoms);
+ strcpy(gp->name, sbuf_data(sb));
+ sbuf_delete(sb);
+ return (gp);
+}
+
+void
+g_destroy_geom(struct g_geom *gp)
+{
+
+ g_trace(G_T_TOPOLOGY, "g_destroy_geom(%p(%s))", gp, gp->name);
+ g_topology_assert();
+ KASSERT(LIST_EMPTY(&gp->consumer),
+ ("g_destroy_geom(%s) with consumer(s) [%p]",
+ gp->name, LIST_FIRST(&gp->consumer)));
+ KASSERT(LIST_EMPTY(&gp->provider),
+ ("g_destroy_geom(%s) with provider(s) [%p]",
+ gp->name, LIST_FIRST(&gp->consumer)));
+ g_cancel_event(gp);
+ LIST_REMOVE(gp, geom);
+ TAILQ_REMOVE(&geoms, gp, geoms);
+ g_free(gp->name);
+ g_free(gp);
+}
+
+/*
+ * This function is called (repeatedly) until has withered away.
+ */
+void
+g_wither_geom(struct g_geom *gp, int error)
+{
+ struct g_provider *pp, *pp2;
+ struct g_consumer *cp, *cp2;
+ static int once_is_enough;
+
+ if (once_is_enough)
+ return;
+ once_is_enough = 1;
+ g_trace(G_T_TOPOLOGY, "g_wither_geom(%p(%s))", gp, gp->name);
+ g_topology_assert();
+ if (!(gp->flags & G_GEOM_WITHER)) {
+ gp->flags |= G_GEOM_WITHER;
+ LIST_FOREACH(pp, &gp->provider, provider)
+ g_orphan_provider(pp, error);
+ }
+ for (pp = LIST_FIRST(&gp->provider); pp != NULL; pp = pp2) {
+ pp2 = LIST_NEXT(pp, provider);
+ if (!LIST_EMPTY(&pp->consumers))
+ continue;
+ g_destroy_provider(pp);
+ }
+ for (cp = LIST_FIRST(&gp->consumer); cp != NULL; cp = cp2) {
+ cp2 = LIST_NEXT(cp, consumer);
+ if (cp->acr || cp->acw || cp->ace)
+ continue;
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ }
+ if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer))
+ g_destroy_geom(gp);
+ once_is_enough = 0;
+}
+
+struct g_consumer *
+g_new_consumer(struct g_geom *gp)
+{
+ struct g_consumer *cp;
+
+ g_topology_assert();
+ KASSERT(gp->orphan != NULL,
+ ("g_new_consumer on geom(%s) (class %s) without orphan",
+ gp->name, gp->class->name));
+
+ cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO);
+ cp->geom = gp;
+ cp->stat = devstat_new_entry(cp, -1, 0, DEVSTAT_ALL_SUPPORTED,
+ DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
+ LIST_INSERT_HEAD(&gp->consumer, cp, consumer);
+ return(cp);
+}
+
+void
+g_destroy_consumer(struct g_consumer *cp)
+{
+ struct g_geom *gp;
+
+ g_trace(G_T_TOPOLOGY, "g_destroy_consumer(%p)", cp);
+ g_topology_assert();
+ KASSERT (cp->provider == NULL, ("g_destroy_consumer but attached"));
+ KASSERT (cp->acr == 0, ("g_destroy_consumer with acr"));
+ KASSERT (cp->acw == 0, ("g_destroy_consumer with acw"));
+ KASSERT (cp->ace == 0, ("g_destroy_consumer with ace"));
+ g_cancel_event(cp);
+ gp = cp->geom;
+ LIST_REMOVE(cp, consumer);
+ devstat_remove_entry(cp->stat);
+ g_free(cp);
+ if (gp->flags & G_GEOM_WITHER)
+ g_wither_geom(gp, 0);
+}
+
+static void
+g_new_provider_event(void *arg, int flag)
+{
+ struct g_class *mp;
+ struct g_provider *pp;
+ struct g_consumer *cp;
+ int i;
+
+ g_topology_assert();
+ if (flag == EV_CANCEL)
+ return;
+ if (g_shutdown)
+ return;
+ pp = arg;
+ LIST_FOREACH(mp, &g_classes, class) {
+ if (mp->taste == NULL)
+ continue;
+ i = 1;
+ LIST_FOREACH(cp, &pp->consumers, consumers)
+ if (cp->geom->class == mp)
+ i = 0;
+ if (!i)
+ continue;
+ mp->taste(mp, pp, 0);
+ g_topology_assert();
+ }
+}
+
+
+struct g_provider *
+g_new_providerf(struct g_geom *gp, const char *fmt, ...)
+{
+ struct g_provider *pp;
+ struct sbuf *sb;
+ va_list ap;
+
+ g_topology_assert();
+ sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
+ va_start(ap, fmt);
+ sbuf_vprintf(sb, fmt, ap);
+ va_end(ap);
+ sbuf_finish(sb);
+ pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
+ pp->name = (char *)(pp + 1);
+ strcpy(pp->name, sbuf_data(sb));
+ sbuf_delete(sb);
+ LIST_INIT(&pp->consumers);
+ pp->error = ENXIO;
+ pp->geom = gp;
+ pp->stat = devstat_new_entry(pp, -1, 0, DEVSTAT_ALL_SUPPORTED,
+ DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
+ LIST_INSERT_HEAD(&gp->provider, pp, provider);
+ g_post_event(g_new_provider_event, pp, M_WAITOK, pp, NULL);
+ return (pp);
+}
+
+void
+g_error_provider(struct g_provider *pp, int error)
+{
+
+ pp->error = error;
+}
+
+struct g_provider *
+g_provider_by_name(char const *arg)
+{
+ struct g_class *cp;
+ struct g_geom *gp;
+ struct g_provider *pp;
+
+ LIST_FOREACH(cp, &g_classes, class) {
+ LIST_FOREACH(gp, &cp->geom, geom) {
+ LIST_FOREACH(pp, &gp->provider, provider) {
+ if (!strcmp(arg, pp->name))
+ return (pp);
+ }
+ }
+ }
+ return (NULL);
+}
+
+void
+g_destroy_provider(struct g_provider *pp)
+{
+ struct g_geom *gp;
+
+ g_topology_assert();
+ KASSERT(LIST_EMPTY(&pp->consumers),
+ ("g_destroy_provider but attached"));
+ KASSERT (pp->acr == 0, ("g_destroy_provider with acr"));
+ KASSERT (pp->acw == 0, ("g_destroy_provider with acw"));
+ KASSERT (pp->acw == 0, ("g_destroy_provider with ace"));
+ g_cancel_event(pp);
+ LIST_REMOVE(pp, provider);
+ gp = pp->geom;
+ devstat_remove_entry(pp->stat);
+ g_free(pp);
+ if ((gp->flags & G_GEOM_WITHER))
+ g_wither_geom(gp, 0);
+}
+
+/*
+ * We keep the "geoms" list sorted by topological order (== increasing
+ * numerical rank) at all times.
+ * When an attach is done, the attaching geoms rank is invalidated
+ * and it is moved to the tail of the list.
+ * All geoms later in the sequence has their ranks reevaluated in
+ * sequence. If we cannot assign rank to a geom because it's
+ * prerequisites do not have rank, we move that element to the tail
+ * of the sequence with invalid rank as well.
+ * At some point we encounter our original geom and if we stil fail
+ * to assign it a rank, there must be a loop and we fail back to
+ * g_attach() which detach again and calls redo_rank again
+ * to fix up the damage.
+ * It would be much simpler code wise to do it recursively, but we
+ * can't risk that on the kernel stack.
+ */
+
+static int
+redo_rank(struct g_geom *gp)
+{
+ struct g_consumer *cp;
+ struct g_geom *gp1, *gp2;
+ int n, m;
+
+ g_topology_assert();
+
+ /* Invalidate this geoms rank and move it to the tail */
+ gp1 = TAILQ_NEXT(gp, geoms);
+ if (gp1 != NULL) {
+ gp->rank = 0;
+ TAILQ_REMOVE(&geoms, gp, geoms);
+ TAILQ_INSERT_TAIL(&geoms, gp, geoms);
+ } else {
+ gp1 = gp;
+ }
+
+ /* re-rank the rest of the sequence */
+ for (; gp1 != NULL; gp1 = gp2) {
+ gp1->rank = 0;
+ m = 1;
+ LIST_FOREACH(cp, &gp1->consumer, consumer) {
+ if (cp->provider == NULL)
+ continue;
+ n = cp->provider->geom->rank;
+ if (n == 0) {
+ m = 0;
+ break;
+ } else if (n >= m)
+ m = n + 1;
+ }
+ gp1->rank = m;
+ gp2 = TAILQ_NEXT(gp1, geoms);
+
+ /* got a rank, moving on */
+ if (m != 0)
+ continue;
+
+ /* no rank to original geom means loop */
+ if (gp == gp1)
+ return (ELOOP);
+
+ /* no rank, put it at the end move on */
+ TAILQ_REMOVE(&geoms, gp1, geoms);
+ TAILQ_INSERT_TAIL(&geoms, gp1, geoms);
+ }
+ return (0);
+}
+
+int
+g_attach(struct g_consumer *cp, struct g_provider *pp)
+{
+ int error;
+
+ g_topology_assert();
+ KASSERT(cp->provider == NULL, ("attach but attached"));
+ cp->provider = pp;
+ LIST_INSERT_HEAD(&pp->consumers, cp, consumers);
+ error = redo_rank(cp->geom);
+ if (error) {
+ LIST_REMOVE(cp, consumers);
+ cp->provider = NULL;
+ redo_rank(cp->geom);
+ }
+ return (error);
+}
+
+void
+g_detach(struct g_consumer *cp)
+{
+ struct g_provider *pp;
+
+ g_trace(G_T_TOPOLOGY, "g_detach(%p)", cp);
+ KASSERT(cp != (void*)0xd0d0d0d0, ("ARGH!"));
+ g_topology_assert();
+ KASSERT(cp->provider != NULL, ("detach but not attached"));
+ KASSERT(cp->acr == 0, ("detach but nonzero acr"));
+ KASSERT(cp->acw == 0, ("detach but nonzero acw"));
+ KASSERT(cp->ace == 0, ("detach but nonzero ace"));
+ KASSERT(cp->nstart == cp->nend,
+ ("detach with active requests"));
+ pp = cp->provider;
+ LIST_REMOVE(cp, consumers);
+ cp->provider = NULL;
+ if (pp->geom->flags & G_GEOM_WITHER)
+ g_wither_geom(pp->geom, 0);
+ redo_rank(cp->geom);
+}
+
+
+/*
+ * g_access_abs()
+ *
+ * Access-check with absolute new values: Just fall through
+ * and use the relative version.
+ */
+int
+g_access_abs(struct g_consumer *cp, int acr, int acw, int ace)
+{
+
+ g_topology_assert();
+ return(g_access_rel(cp,
+ acr - cp->acr,
+ acw - cp->acw,
+ ace - cp->ace));
+}
+
+/*
+ * g_access_rel()
+ *
+ * Access-check with delta values. The question asked is "can provider
+ * "cp" change the access counters by the relative amounts dc[rwe] ?"
+ */
+
+int
+g_access_rel(struct g_consumer *cp, int dcr, int dcw, int dce)
+{
+ struct g_provider *pp;
+ int pr,pw,pe;
+ int error;
+
+ pp = cp->provider;
+
+ g_trace(G_T_ACCESS, "g_access_rel(%p(%s), %d, %d, %d)",
+ cp, pp->name, dcr, dcw, dce);
+
+ g_topology_assert();
+ KASSERT(cp->provider != NULL, ("access but not attached"));
+ KASSERT(cp->acr + dcr >= 0, ("access resulting in negative acr"));
+ KASSERT(cp->acw + dcw >= 0, ("access resulting in negative acw"));
+ KASSERT(cp->ace + dce >= 0, ("access resulting in negative ace"));
+ KASSERT(pp->geom->access != NULL, ("NULL geom->access"));
+
+ /*
+ * If our class cares about being spoiled, and we have been, we
+ * are probably just ahead of the event telling us that. Fail
+ * now rather than having to unravel this later.
+ */
+ if (cp->geom->spoiled != NULL && cp->spoiled) {
+ KASSERT(dcr <= 0, ("spoiled but dcr = %d", dcr));
+ KASSERT(dcw <= 0, ("spoiled but dce = %d", dcw));
+ KASSERT(dce <= 0, ("spoiled but dcw = %d", dce));
+ }
+
+ /*
+ * Figure out what counts the provider would have had, if this
+ * consumer had (r0w0e0) at this time.
+ */
+ pr = pp->acr - cp->acr;
+ pw = pp->acw - cp->acw;
+ pe = pp->ace - cp->ace;
+
+ g_trace(G_T_ACCESS,
+ "open delta:[r%dw%de%d] old:[r%dw%de%d] provider:[r%dw%de%d] %p(%s)",
+ dcr, dcw, dce,
+ cp->acr, cp->acw, cp->ace,
+ pp->acr, pp->acw, pp->ace,
+ pp, pp->name);
+
+ /* If foot-shooting is enabled, any open on rank#1 is OK */
+ if ((g_debugflags & 16) && pp->geom->rank == 1)
+ ;
+ /* If we try exclusive but already write: fail */
+ else if (dce > 0 && pw > 0)
+ return (EPERM);
+ /* If we try write but already exclusive: fail */
+ else if (dcw > 0 && pe > 0)
+ return (EPERM);
+ /* If we try to open more but provider is error'ed: fail */
+ else if ((dcr > 0 || dcw > 0 || dce > 0) && pp->error != 0)
+ return (pp->error);
+
+ /* Ok then... */
+
+ error = pp->geom->access(pp, dcr, dcw, dce);
+ if (!error) {
+ /*
+ * If we open first write, spoil any partner consumers.
+ * If we close last write, trigger re-taste.
+ */
+ if (pp->acw == 0 && dcw != 0)
+ g_spoil(pp, cp);
+ else if (pp->acw != 0 && pp->acw == -dcw &&
+ !(pp->geom->flags & G_GEOM_WITHER))
+ g_post_event(g_new_provider_event, pp, M_WAITOK,
+ pp, NULL);
+
+ pp->acr += dcr;
+ pp->acw += dcw;
+ pp->ace += dce;
+ cp->acr += dcr;
+ cp->acw += dcw;
+ cp->ace += dce;
+ }
+ return (error);
+}
+
+int
+g_handleattr_int(struct bio *bp, const char *attribute, int val)
+{
+
+ return (g_handleattr(bp, attribute, &val, sizeof val));
+}
+
+int
+g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val)
+{
+
+ return (g_handleattr(bp, attribute, &val, sizeof val));
+}
+
+int
+g_handleattr(struct bio *bp, const char *attribute, void *val, int len)
+{
+ int error;
+
+ if (strcmp(bp->bio_attribute, attribute))
+ return (0);
+ if (bp->bio_length != len) {
+ printf("bio_length %jd len %d -> EFAULT\n",
+ (intmax_t)bp->bio_length, len);
+ error = EFAULT;
+ } else {
+ error = 0;
+ bcopy(val, bp->bio_data, len);
+ bp->bio_completed = len;
+ }
+ g_io_deliver(bp, error);
+ return (1);
+}
+
+int
+g_std_access(struct g_provider *pp __unused,
+ int dr __unused, int dw __unused, int de __unused)
+{
+
+ return (0);
+}
+
+void
+g_std_done(struct bio *bp)
+{
+ struct bio *bp2;
+
+ bp2 = bp->bio_parent;
+ if (bp2->bio_error == 0)
+ bp2->bio_error = bp->bio_error;
+ bp2->bio_completed += bp->bio_completed;
+ g_destroy_bio(bp);
+ bp2->bio_inbed++;
+ if (bp2->bio_children == bp2->bio_inbed)
+ g_io_deliver(bp2, bp2->bio_error);
+}
+
+/* XXX: maybe this is only g_slice_spoiled */
+
+void
+g_std_spoiled(struct g_consumer *cp)
+{
+ struct g_geom *gp;
+ struct g_provider *pp;
+
+ g_trace(G_T_TOPOLOGY, "g_std_spoiled(%p)", cp);
+ g_topology_assert();
+ g_detach(cp);
+ gp = cp->geom;
+ LIST_FOREACH(pp, &gp->provider, provider)
+ g_orphan_provider(pp, ENXIO);
+ g_destroy_consumer(cp);
+ if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer))
+ g_destroy_geom(gp);
+ else
+ gp->flags |= G_GEOM_WITHER;
+}
+
+/*
+ * Spoiling happens when a provider is opened for writing, but consumers
+ * which are configured by in-band data are attached (slicers for instance).
+ * Since the write might potentially change the in-band data, such consumers
+ * need to re-evaluate their existence after the writing session closes.
+ * We do this by (offering to) tear them down when the open for write happens
+ * in return for a re-taste when it closes again.
+ * Together with the fact that such consumers grab an 'e' bit whenever they
+ * are open, regardless of mode, this ends up DTRT.
+ */
+
+static void
+g_spoil_event(void *arg, int flag)
+{
+ struct g_provider *pp;
+ struct g_consumer *cp, *cp2;
+
+ g_topology_assert();
+ if (flag == EV_CANCEL)
+ return;
+ pp = arg;
+ for (cp = LIST_FIRST(&pp->consumers); cp != NULL; cp = cp2) {
+ cp2 = LIST_NEXT(cp, consumers);
+ if (!cp->spoiled)
+ continue;
+ cp->spoiled = 0;
+ if (cp->geom->spoiled == NULL)
+ continue;
+ cp->geom->spoiled(cp);
+ g_topology_assert();
+ }
+}
+
+void
+g_spoil(struct g_provider *pp, struct g_consumer *cp)
+{
+ struct g_consumer *cp2;
+
+ g_topology_assert();
+
+ LIST_FOREACH(cp2, &pp->consumers, consumers) {
+ if (cp2 == cp)
+ continue;
+/*
+ KASSERT(cp2->acr == 0, ("spoiling cp->acr = %d", cp2->acr));
+ KASSERT(cp2->acw == 0, ("spoiling cp->acw = %d", cp2->acw));
+*/
+ KASSERT(cp2->ace == 0, ("spoiling cp->ace = %d", cp2->ace));
+ cp2->spoiled++;
+ }
+ g_post_event(g_spoil_event, pp, M_WAITOK, pp, NULL);
+}
+
+int
+g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len)
+{
+ int error, i;
+
+ i = len;
+ error = g_io_getattr(attr, cp, &i, var);
+ if (error)
+ return (error);
+ if (i != len)
+ return (EINVAL);
+ return (0);
+}
+
+/*
+ * Check if the given pointer is a live object
+ */
+
+void
+g_sanity(void const *ptr)
+{
+ struct g_class *mp;
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_provider *pp;
+
+ if (!(g_debugflags & 0x8))
+ return;
+ LIST_FOREACH(mp, &g_classes, class) {
+ KASSERT(mp != ptr, ("Ptr is live class"));
+ LIST_FOREACH(gp, &mp->geom, geom) {
+ KASSERT(gp != ptr, ("Ptr is live geom"));
+ KASSERT(gp->name != ptr, ("Ptr is live geom's name"));
+ LIST_FOREACH(cp, &gp->consumer, consumer) {
+ KASSERT(cp != ptr, ("Ptr is live consumer"));
+ }
+ LIST_FOREACH(pp, &gp->provider, provider) {
+ KASSERT(pp != ptr, ("Ptr is live provider"));
+ }
+ }
+ }
+}
+
diff --git a/sys/geom/geom_sunlabel.c b/sys/geom/geom_sunlabel.c
new file mode 100644
index 0000000..0718056
--- /dev/null
+++ b/sys/geom/geom_sunlabel.c
@@ -0,0 +1,281 @@
+/*-
+ * Copyright (c) 2002 Poul-Henning Kamp
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Poul-Henning Kamp
+ * and NAI Labs, the Security Research Division of Network Associates, Inc.
+ * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/endian.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/bio.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sun_disklabel.h>
+#include <geom/geom.h>
+#include <geom/geom_slice.h>
+#include <machine/endian.h>
+
+#define SUNLABEL_CLASS_NAME "SUN"
+
+struct g_sunlabel_softc {
+ int sectorsize;
+ int nheads;
+ int nsects;
+ int nalt;
+};
+
+static int
+g_sunlabel_modify(struct g_geom *gp, struct g_sunlabel_softc *ms, u_char *sec0)
+{
+ int i, error;
+ u_int u, v, csize;
+ struct sun_disklabel sl;
+
+ error = sunlabel_dec(sec0, &sl);
+ if (error)
+ return (error);
+
+ csize = sl.sl_ntracks * sl.sl_nsectors;
+
+ for (i = 0; i < SUN_NPART; i++) {
+ v = sl.sl_part[i].sdkp_cyloffset;
+ u = sl.sl_part[i].sdkp_nsectors;
+ error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK,
+ ((off_t)v * csize) << 9ULL,
+ ((off_t)u) << 9ULL,
+ ms->sectorsize,
+ "%s%c", gp->name, 'a' + i);
+ if (error)
+ return (error);
+ }
+ for (i = 0; i < SUN_NPART; i++) {
+ v = sl.sl_part[i].sdkp_cyloffset;
+ u = sl.sl_part[i].sdkp_nsectors;
+ g_slice_config(gp, i, G_SLICE_CONFIG_SET,
+ ((off_t)v * csize) << 9ULL,
+ ((off_t)u) << 9ULL,
+ ms->sectorsize,
+ "%s%c", gp->name, 'a' + i);
+ }
+ ms->nalt = sl.sl_acylinders;
+ ms->nheads = sl.sl_ntracks;
+ ms->nsects = sl.sl_nsectors;
+
+ return (0);
+}
+
+static void
+g_sunlabel_hotwrite(void *arg, int flag)
+{
+ struct bio *bp;
+ struct g_geom *gp;
+ struct g_slicer *gsp;
+ struct g_slice *gsl;
+ struct g_sunlabel_softc *ms;
+ u_char *p;
+ int error;
+
+ KASSERT(flag != EV_CANCEL, ("g_sunlabel_hotwrite cancelled"));
+ bp = arg;
+ gp = bp->bio_to->geom;
+ gsp = gp->softc;
+ ms = gsp->softc;
+ gsl = &gsp->slices[bp->bio_to->index];
+ /*
+ * XXX: For all practical purposes, this whould be equvivalent to
+ * XXX: "p = (u_char *)bp->bio_data;" because the label is always
+ * XXX: in the first sector and we refuse sectors smaller than the
+ * XXX: label.
+ */
+ p = (u_char *)bp->bio_data - (bp->bio_offset + gsl->offset);
+
+ error = g_sunlabel_modify(gp, ms, p);
+ if (error) {
+ g_io_deliver(bp, EPERM);
+ return;
+ }
+ g_slice_finish_hot(bp);
+}
+
+static void
+g_sunlabel_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp)
+{
+ struct g_slicer *gsp;
+ struct g_sunlabel_softc *ms;
+
+ gsp = gp->softc;
+ ms = gsp->softc;
+ g_slice_dumpconf(sb, indent, gp, cp, pp);
+ if (indent == NULL) {
+ sbuf_printf(sb, " sc %u hd %u alt %u",
+ ms->nsects, ms->nheads, ms->nalt);
+ }
+}
+
+struct g_hh01 {
+ struct g_geom *gp;
+ struct g_sunlabel_softc *ms;
+ u_char *label;
+ int error;
+};
+
+static void
+g_sunlabel_callconfig(void *arg, int flag)
+{
+ struct g_hh01 *hp;
+
+ hp = arg;
+ hp->error = g_sunlabel_modify(hp->gp, hp->ms, hp->label);
+ if (!hp->error)
+ hp->error = g_write_data(LIST_FIRST(&hp->gp->consumer),
+ 0, hp->label, SUN_SIZE);
+}
+
+/*
+ * NB! curthread is user process which GCTL'ed.
+ */
+static void
+g_sunlabel_config(struct gctl_req *req, struct g_class *mp, const char *verb)
+{
+ u_char *label;
+ int error, i;
+ struct g_hh01 h0h0;
+ struct g_slicer *gsp;
+ struct g_geom *gp;
+ struct g_consumer *cp;
+
+ g_topology_assert();
+ gp = gctl_get_geom(req, mp, "geom");
+ if (gp == NULL)
+ return;
+ cp = LIST_FIRST(&gp->consumer);
+ gsp = gp->softc;
+ if (!strcmp(verb, "write label")) {
+ label = gctl_get_paraml(req, "label", SUN_SIZE);
+ if (label == NULL)
+ return;
+ h0h0.gp = gp;
+ h0h0.ms = gsp->softc;
+ h0h0.label = label;
+ h0h0.error = -1;
+ /* XXX: Does this reference register with our selfdestruct code ? */
+ error = g_access_rel(cp, 1, 1, 1);
+ if (error) {
+ gctl_error(req, "could not access consumer");
+ return;
+ }
+ g_sunlabel_callconfig(&h0h0, 0);
+ g_access_rel(cp, -1, -1, -1);
+ } else if (!strcmp(verb, "write bootcode")) {
+ label = gctl_get_paraml(req, "bootcode", SUN_BOOTSIZE);
+ if (label == NULL)
+ return;
+ /* XXX: Does this reference register with our selfdestruct code ? */
+ error = g_access_rel(cp, 1, 1, 1);
+ if (error) {
+ gctl_error(req, "could not access consumer");
+ return;
+ }
+ for (i = 0; i < SUN_NPART; i++) {
+ if (gsp->slices[i].length <= SUN_BOOTSIZE)
+ continue;
+ g_write_data(cp,
+ gsp->slices[i].offset + SUN_SIZE, label + SUN_SIZE,
+ SUN_BOOTSIZE - SUN_SIZE);
+ }
+ g_access_rel(cp, -1, -1, -1);
+ } else {
+ gctl_error(req, "Unknown verb parameter");
+ }
+}
+
+static struct g_geom *
+g_sunlabel_taste(struct g_class *mp, struct g_provider *pp, int flags)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ int error;
+ u_char *buf;
+ struct g_sunlabel_softc *ms;
+ struct g_slicer *gsp;
+
+ g_trace(G_T_TOPOLOGY, "g_sunlabel_taste(%s,%s)", mp->name, pp->name);
+ g_topology_assert();
+ if (flags == G_TF_NORMAL &&
+ !strcmp(pp->geom->class->name, SUNLABEL_CLASS_NAME))
+ return (NULL);
+ gp = g_slice_new(mp, 8, pp, &cp, &ms, sizeof *ms, NULL);
+ if (gp == NULL)
+ return (NULL);
+ gsp = gp->softc;
+ gp->dumpconf = g_sunlabel_dumpconf;
+ do {
+ if (gp->rank != 2 && flags == G_TF_NORMAL)
+ break;
+ ms->sectorsize = cp->provider->sectorsize;
+ if (ms->sectorsize < 512)
+ break;
+ g_topology_unlock();
+ buf = g_read_data(cp, 0, ms->sectorsize, &error);
+ g_topology_lock();
+ if (buf == NULL || error != 0)
+ break;
+
+ g_sunlabel_modify(gp, ms, buf);
+ g_free(buf);
+
+ break;
+ } while (0);
+ g_access_rel(cp, -1, 0, 0);
+ if (LIST_EMPTY(&gp->provider)) {
+ g_slice_spoiled(cp);
+ return (NULL);
+ }
+ g_slice_conf_hot(gp, 0, 0, SUN_SIZE,
+ G_SLICE_HOT_ALLOW, G_SLICE_HOT_DENY, G_SLICE_HOT_CALL);
+ gsp->hot = g_sunlabel_hotwrite;
+ return (gp);
+}
+
+static struct g_class g_sunlabel_class = {
+ .name = SUNLABEL_CLASS_NAME,
+ .taste = g_sunlabel_taste,
+ .ctlreq = g_sunlabel_config,
+};
+
+DECLARE_GEOM_CLASS(g_sunlabel_class, g_sunlabel);
diff --git a/sys/geom/geom_sunlabel_enc.c b/sys/geom/geom_sunlabel_enc.c
new file mode 100644
index 0000000..d153e11
--- /dev/null
+++ b/sys/geom/geom_sunlabel_enc.c
@@ -0,0 +1,127 @@
+/*-
+ * Copyright (c) 2003 Jake Burkholder
+ * Copyright (c) 2003 Poul-Henning Kamp
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Functions to encode or decode struct sun_disklabel into a bytestream
+ * of correct endianess and packing.
+ *
+ * NB! This file must be usable both in kernel and userland.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/endian.h>
+#include <sys/errno.h>
+#include <sys/sun_disklabel.h>
+
+#define SL_TEXT 0x0
+#define SL_TEXT_SIZEOF 0x80
+#define SL_RPM 0x1a4
+#define SL_PCYLINDERS 0x1a6
+#define SL_SPARESPERCYL 0x1a8
+#define SL_INTERLEAVE 0x1ae
+#define SL_NCYLINDERS 0x1b0
+#define SL_ACYLINDERS 0x1b2
+#define SL_NTRACKS 0x1b4
+#define SL_NSECTORS 0x1b6
+#define SL_PART 0x1bc
+#define SL_MAGIC 0x1fc
+#define SL_CKSUM 0x1fe
+
+#define SDKP_CYLOFFSET 0
+#define SDKP_NSECTORS 0x4
+#define SDKP_SIZEOF 0x8
+
+/*
+ * Decode the relevant fields of a sun disk label, and return zero if the
+ * magic and checksum works out OK.
+ */
+int
+sunlabel_dec(void const *pp, struct sun_disklabel *sl)
+{
+ const uint8_t *p;
+ size_t i;
+ u_int u;
+
+ p = pp;
+ for (i = 0; i < sizeof(sl->sl_text); i++)
+ sl->sl_text[i] = p[SL_TEXT + i];
+ sl->sl_rpm = be16dec(p + SL_RPM);
+ sl->sl_pcylinders = be16dec(p + SL_PCYLINDERS);
+ sl->sl_sparespercyl = be16dec(p + SL_SPARESPERCYL);
+ sl->sl_interleave = be16dec(p + SL_INTERLEAVE);
+ sl->sl_ncylinders = be16dec(p + SL_NCYLINDERS);
+ sl->sl_acylinders = be16dec(p + SL_ACYLINDERS);
+ sl->sl_ntracks = be16dec(p + SL_NTRACKS);
+ sl->sl_nsectors = be16dec(p + SL_NSECTORS);
+ for (i = 0; i < SUN_NPART; i++) {
+ sl->sl_part[i].sdkp_cyloffset = be32dec(p + SL_PART +
+ (i * SDKP_SIZEOF) + SDKP_CYLOFFSET);
+ sl->sl_part[i].sdkp_nsectors = be32dec(p + SL_PART +
+ (i * SDKP_SIZEOF) + SDKP_NSECTORS);
+ }
+ sl->sl_magic = be16dec(p + SL_MAGIC);
+ for (i = u = 0; i < SUN_SIZE; i += 2)
+ u ^= be16dec(p + i);
+ if (u == 0 && sl->sl_magic == SUN_DKMAGIC)
+ return (0);
+ else
+ return (EINVAL);
+}
+
+/*
+ * Encode the relevant fields into a sun disklabel, compute new checksum.
+ */
+void
+sunlabel_enc(void *pp, struct sun_disklabel *sl)
+{
+ uint8_t *p;
+ size_t i;
+ u_int u;
+
+ p = pp;
+ for (i = 0; i < SL_TEXT_SIZEOF; i++)
+ p[SL_TEXT + i] = sl->sl_text[i];
+ be16enc(p + SL_RPM, sl->sl_rpm);
+ be16enc(p + SL_PCYLINDERS, sl->sl_pcylinders);
+ be16enc(p + SL_SPARESPERCYL, sl->sl_sparespercyl);
+ be16enc(p + SL_INTERLEAVE, sl->sl_interleave);
+ be16enc(p + SL_NCYLINDERS, sl->sl_ncylinders);
+ be16enc(p + SL_ACYLINDERS, sl->sl_acylinders);
+ be16enc(p + SL_NTRACKS, sl->sl_ntracks);
+ be16enc(p + SL_NSECTORS, sl->sl_nsectors);
+ for (i = 0; i < SUN_NPART; i++) {
+ be32enc(p + SL_PART + (i * SDKP_SIZEOF) + SDKP_CYLOFFSET,
+ sl->sl_part[i].sdkp_cyloffset);
+ be32enc(p + SL_PART + (i * SDKP_SIZEOF) + SDKP_NSECTORS,
+ sl->sl_part[i].sdkp_nsectors);
+ }
+ be16enc(p + SL_MAGIC, sl->sl_magic);
+ for (i = u = 0; i < SUN_SIZE; i += 2)
+ u ^= be16dec(p + i);
+ be16enc(p + SL_CKSUM, u);
+}
diff --git a/sys/geom/geom_vol_ffs.c b/sys/geom/geom_vol_ffs.c
new file mode 100644
index 0000000..de046ed
--- /dev/null
+++ b/sys/geom/geom_vol_ffs.c
@@ -0,0 +1,143 @@
+/*-
+ * Copyright (c) 2002, 2003 Gordon Tetlow
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/bio.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <ufs/ufs/dinode.h>
+#include <ufs/ffs/fs.h>
+
+#include <geom/geom.h>
+#include <geom/geom_slice.h>
+
+#define VOL_FFS_CLASS_NAME "VOL_FFS"
+
+static int superblocks[] = SBLOCKSEARCH;
+
+struct g_vol_ffs_softc {
+ char * vol;
+};
+
+static int
+g_vol_ffs_start(struct bio *bp __unused)
+{
+ return(0);
+}
+
+static struct g_geom *
+g_vol_ffs_taste(struct g_class *mp, struct g_provider *pp, int flags)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_vol_ffs_softc *ms;
+ int error, sb, superblock;
+ struct fs *fs;
+
+ g_trace(G_T_TOPOLOGY, "vol_taste(%s,%s)", mp->name, pp->name);
+ g_topology_assert();
+
+ /*
+ * XXX This is a really weak way to make sure we don't recurse.
+ * Probably ought to use BIO_GETATTR to check for this.
+ */
+ if (flags == G_TF_NORMAL &&
+ !strcmp(pp->geom->class->name, VOL_FFS_CLASS_NAME))
+ return (NULL);
+
+ gp = g_slice_new(mp, 1, pp, &cp, &ms, sizeof(*ms), g_vol_ffs_start);
+ if (gp == NULL)
+ return (NULL);
+ g_topology_unlock();
+ /*
+ * Walk through the standard places that superblocks hide and look
+ * for UFS magic. If we find magic, then check that the size in the
+ * superblock corresponds to the size of the underlying provider.
+ * Finally, look for a volume label and create an appropriate
+ * provider based on that.
+ */
+ for (sb=0; (superblock = superblocks[sb]) != -1; sb++) {
+ fs = (struct fs *) g_read_data(cp, superblock,
+ SBLOCKSIZE, &error);
+ if (fs == NULL || error != 0)
+ continue;
+ /* Check for magic and make sure things are the right size */
+ if (fs->fs_magic == FS_UFS1_MAGIC) {
+ if (fs->fs_old_size * fs->fs_fsize !=
+ (int32_t) pp->mediasize) {
+ g_free(fs);
+ continue;
+ }
+ } else if (fs->fs_magic == FS_UFS2_MAGIC) {
+ if (fs->fs_size * fs->fs_fsize !=
+ (int64_t) pp->mediasize) {
+ g_free(fs);
+ continue;
+ }
+ } else {
+ g_free(fs);
+ continue;
+ }
+ /* Check for volume label */
+ if (fs->fs_volname[0] == '\0') {
+ g_free(fs);
+ continue;
+ }
+ /* XXX We need to check for namespace conflicts. */
+ /* XXX How do you handle a mirror set? */
+ /* XXX We don't validate the volume name. */
+ g_topology_lock();
+ /* Alright, we have a label and a volume name, reconfig. */
+ g_slice_config(gp, 0, G_SLICE_CONFIG_SET, (off_t) 0,
+ pp->mediasize, pp->sectorsize, "vol/%s",
+ fs->fs_volname);
+ g_free(fs);
+ g_topology_unlock();
+ break;
+ }
+ g_topology_lock();
+ g_access_rel(cp, -1, 0, 0);
+ if (LIST_EMPTY(&gp->provider)) {
+ g_slice_spoiled(cp);
+ return (NULL);
+ }
+ return (gp);
+}
+
+static struct g_class g_vol_ffs_class = {
+ .name = VOL_FFS_CLASS_NAME,
+ .taste = g_vol_ffs_taste,
+};
+
+DECLARE_GEOM_CLASS(g_vol_ffs_class, g_vol_ffs);
diff --git a/sys/geom/notes b/sys/geom/notes
new file mode 100644
index 0000000..3b0f811
--- /dev/null
+++ b/sys/geom/notes
@@ -0,0 +1,140 @@
+$FreeBSD$
+
+For the lack of a better place to put them, this file will contain
+notes on some of the more intricate details of geom.
+
+-----------------------------------------------------------------------
+Locking of bio_children and bio_inbed
+
+bio_children is used by g_std_done() and g_clone_bio() to keep track
+of children cloned off a request. g_clone_bio will increment the
+bio_children counter for each time it is called and g_std_done will
+increment bio_inbed for every call, and if the two counters are
+equal, call g_io_deliver() on the parent bio.
+
+The general assumption is that g_clone_bio() is called only in
+the g_down thread, and g_std_done() only in the g_up thread and
+therefore the two fields do not generally need locking. These
+restrictions are not enforced by the code, but only with great
+care should they be violated.
+
+It is the responsibility of the class implementation to avoid the
+following race condition: A class intend to split a bio in two
+children. It clones the bio, and requests I/O on the child.
+This I/O operation completes before the second child is cloned
+and g_std_done() sees the counters both equal 1 and finishes off
+the bio.
+
+There is no race present in the common case where the bio is split
+in multiple parts in the class start method and the I/O is requested
+on another GEOM class below: There is only one g_down thread and
+the class below will not get its start method run until we return
+from our start method, and consequently the I/O cannot complete
+prematurely.
+
+In all other cases, this race needs to be mitigated, for instance
+by cloning all children before I/O is request on any of them.
+
+Notice that cloning an "extra" child and calling g_std_done() on
+it directly opens another race since the assumption is that
+g_std_done() only is called in the g_up thread.
+
+-----------------------------------------------------------------------
+Statistics collection
+
+Statistics collection can run at three levels controlled by the
+"kern.geom.collectstats" sysctl.
+
+At level zero, only the number of transactions started and completed
+are counted, and this is only because GEOM internally uses the difference
+between these two as sanity checks.
+
+At level one we collect the full statistics. Higher levels are
+reserved for future use. Statistics are collected independently
+on both the provider and the consumer, because multiple consumers
+can be active against the same provider at the same time.
+
+The statistics collection falls in two parts:
+
+The first and simpler part consists of g_io_request() timestamping
+the struct bio when the request is first started and g_io_deliver()
+updating the consumer and providers statistics based on fields in
+the bio when it is completed. There are no concurrency or locking
+concerns in this part. The statistics collected consists of number
+of requests, number of bytes, number of ENOMEM errors, number of
+other errors and duration of the request for each of the three
+major request types: BIO_READ, BIO_WRITE and BIO_DELETE.
+
+The second part is trying to keep track of the "busy%".
+
+If in g_io_request() we find that there are no outstanding requests,
+(based on the counters for scheduled and completed requests being
+equal), we set a timestamp in the "wentbusy" field. Since there
+are no outstanding requests, and as long as there is only one thread
+pushing the g_down queue, we cannot possibly conflict with
+g_io_deliver() until we ship the current request down.
+
+In g_io_deliver() we calculate the delta-T from wentbusy and add this
+to the "bt" field, and set wentbusy to the current timestamp. We
+take care to do this before we increment the "requests completed"
+counter, since that prevents g_io_request() from touching the
+"wentbusy" timestamp concurrently.
+
+The statistics data is made available to userland through the use
+of a special allocator (in geom_stats.c) which through a device
+allows userland to mmap(2) the pages containing the statistics data.
+In order to indicate to userland when the data in a statstics
+structure might be inconsistent, g_io_deliver() atomically sets a
+flag "updating" and resets it when the structure is again consistent.
+-----------------------------------------------------------------------
+maxsize, stripesize and stripeoffset
+
+maxsize is the biggest request we are willing to handle. If not
+set there is no upper bound on the size of a request and the code
+is responsible for chopping it up. Only hardware methods should
+set an upper bound in this field. Geom_disk will inherit the upper
+bound set by the device driver.
+
+stripesize is the width of any natural request boundaries for the
+device. This would be the width of a stripe on a raid-5 unit or
+one zone in GBDE. The idea with this field is to hint to clustering
+type code to not trivially overrun these boundaries.
+
+stripeoffset is the amount of the first stripe which lies before the
+devices beginning.
+
+If we have a device with 64k stripes:
+ [0...64k[
+ [64k...128k[
+ [128k..192k[
+Then it will have stripesize = 64k and stripeoffset = 0.
+
+If we put a MBR on this device, where slice#1 starts on sector#63,
+then this slice will have: stripesize = 64k, stripeoffset = 63 * sectorsize.
+
+If the clustering code wants to widen a request which writes to
+sector#53 of the slice, it can calculate how many bytes till the end of
+the stripe as:
+ stripewith - (53 * sectorsize + stripeoffset) % stripewidth.
+-----------------------------------------------------------------------
+
+#include file usage:
+
+ geom.h|geom_int.h|geom_ext.h|geom_ctl.h|libgeom.h
+----------------+------+----------+----------+----------+--------+
+geom class | | | | | |
+implementation | X | | | | |
+----------------+------+----------+----------+----------+--------+
+geom kernel | | | | | |
+infrastructure | X | X | X | X | |
+----------------+------+----------+----------+----------+--------+
+libgeom | | | | | |
+implementation | | | X | X | X |
+----------------+------+----------+----------+----------+--------+
+geom aware | | | | | |
+application | | | | X | X |
+----------------+------+----------+----------+----------+--------+
+
+geom_slice.h is special in that it documents a "library" for implementing
+a specific kind of class, and consequently does not appear in the above
+matrix.
OpenPOWER on IntegriCloud