summaryrefslogtreecommitdiffstats
path: root/sys/geom
diff options
context:
space:
mode:
authorle <le@FreeBSD.org>2004-06-12 21:16:10 +0000
committerle <le@FreeBSD.org>2004-06-12 21:16:10 +0000
commitcf31d52b42bd2309bb855b34e8260283eabfc570 (patch)
treeb37e9b83eff28125aba7f626ab2e3bea5b487658 /sys/geom
parentf66d897510d4772f7c5efd834cd66203558e9cb5 (diff)
downloadFreeBSD-src-cf31d52b42bd2309bb855b34e8260283eabfc570.zip
FreeBSD-src-cf31d52b42bd2309bb855b34e8260283eabfc570.tar.gz
Add a first version of a GEOMified vinum.
Diffstat (limited to 'sys/geom')
-rw-r--r--sys/geom/vinum/geom_vinum.c569
-rw-r--r--sys/geom/vinum/geom_vinum.h79
-rw-r--r--sys/geom/vinum/geom_vinum_drive.c476
-rw-r--r--sys/geom/vinum/geom_vinum_init.c405
-rw-r--r--sys/geom/vinum/geom_vinum_list.c466
-rw-r--r--sys/geom/vinum/geom_vinum_plex.c456
-rw-r--r--sys/geom/vinum/geom_vinum_raid5.c616
-rw-r--r--sys/geom/vinum/geom_vinum_raid5.h93
-rw-r--r--sys/geom/vinum/geom_vinum_rm.c346
-rw-r--r--sys/geom/vinum/geom_vinum_share.c651
-rw-r--r--sys/geom/vinum/geom_vinum_share.h62
-rw-r--r--sys/geom/vinum/geom_vinum_state.c289
-rw-r--r--sys/geom/vinum/geom_vinum_subr.c804
-rw-r--r--sys/geom/vinum/geom_vinum_var.h279
-rw-r--r--sys/geom/vinum/geom_vinum_volume.c260
15 files changed, 5851 insertions, 0 deletions
diff --git a/sys/geom/vinum/geom_vinum.c b/sys/geom/vinum/geom_vinum.c
new file mode 100644
index 0000000..44a8061
--- /dev/null
+++ b/sys/geom/vinum/geom_vinum.c
@@ -0,0 +1,569 @@
+/*
+ * Copyright (c) 2004 Lukas Ertl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+
+#include <geom/geom.h>
+#include <geom/vinum/geom_vinum_var.h>
+#include <geom/vinum/geom_vinum.h>
+#include <geom/vinum/geom_vinum_share.h>
+
+#if 0
+SYSCTL_DECL(_kern_geom);
+SYSCTL_NODE(_kern_geom, OID_AUTO, vinum, CTLFLAG_RW, 0, "GEOM_VINUM stuff");
+SYSCTL_UINT(_kern_geom_vinum, OID_AUTO, debug, CTLFLAG_RW, &gv_debug, 0,
+ "Debug level");
+#endif
+
+int gv_create(struct g_geom *, struct gctl_req *);
+void config_new_drive(struct gv_drive *);
+
+static void
+gv_orphan(struct g_consumer *cp)
+{
+ struct g_geom *gp;
+ struct gv_softc *sc;
+ int error;
+
+ g_topology_assert();
+
+ KASSERT(cp != NULL, ("gv_orphan: null cp"));
+ gp = cp->geom;
+ KASSERT(gp != NULL, ("gv_orphan: null gp"));
+ sc = gp->softc;
+
+ g_trace(G_T_TOPOLOGY, "gv_orphan(%s)", gp->name);
+
+ if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
+ g_access(cp, -cp->acr, -cp->acw, -cp->ace);
+ error = cp->provider->error;
+ if (error == 0)
+ error = ENXIO;
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ if (!LIST_EMPTY(&gp->consumer))
+ return;
+ g_free(sc);
+ g_wither_geom(gp, error);
+}
+
+static void
+gv_start(struct bio *bp)
+{
+ struct bio *bp2;
+ struct g_geom *gp;
+
+ gp = bp->bio_to->geom;
+ switch(bp->bio_cmd) {
+ case BIO_READ:
+ case BIO_WRITE:
+ case BIO_DELETE:
+ bp2 = g_clone_bio(bp);
+ bp2->bio_done = g_std_done;
+ g_io_request(bp2, LIST_FIRST(&gp->consumer));
+ return;
+ default:
+ g_io_deliver(bp, EOPNOTSUPP);
+ return;
+ }
+}
+
+static int
+gv_access(struct g_provider *pp, int dr, int dw, int de)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ int error;
+
+ gp = pp->geom;
+ error = ENXIO;
+ cp = LIST_FIRST(&gp->consumer);
+ error = g_access(cp, dr, dw, de);
+ return (error);
+}
+
+static struct g_geom *
+gv_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct gv_softc *sc;
+ struct gv_hdr *vhdr;
+ int error, first;
+ char *buf;
+
+ vhdr = NULL;
+ buf = NULL;
+ first = 0;
+
+ g_trace(G_T_TOPOLOGY, "gv_taste(%s, %s)", mp->name, pp->name);
+ g_topology_assert();
+
+ if (pp->sectorsize == 0)
+ return (NULL);
+
+ /* Check if we already have a VINUM geom, or create a new one. */
+ if (LIST_EMPTY(&mp->geom)) {
+ gp = g_new_geomf(mp, "VINUM");
+ gp->spoiled = gv_orphan;
+ gp->orphan = gv_orphan;
+ gp->access = gv_access;
+ gp->start = gv_start;
+ gp->softc = g_malloc(sizeof(struct gv_softc),
+ M_WAITOK | M_ZERO);
+ sc = gp->softc;
+ sc->geom = gp;
+ LIST_INIT(&sc->drives);
+ LIST_INIT(&sc->subdisks);
+ LIST_INIT(&sc->plexes);
+ LIST_INIT(&sc->volumes);
+ first++;
+ } else {
+ gp = LIST_FIRST(&mp->geom);
+ sc = gp->softc;
+ }
+
+
+ /* We need a temporary consumer to read the config from. */
+ cp = g_new_consumer(gp);
+ error = g_attach(cp, pp);
+ if (error) {
+ g_destroy_consumer(cp);
+ if (first) {
+ g_free(sc);
+ g_destroy_geom(gp);
+ }
+ return (NULL);
+ }
+ error = g_access(cp, 1, 0, 0);
+ if (error) {
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ if (first) {
+ g_free(gp->softc);
+ g_destroy_geom(gp);
+ }
+ return (NULL);
+ }
+
+ g_topology_unlock();
+
+ /* Check if the provided slice is a valid vinum drive. */
+ vhdr = g_read_data(cp, GV_HDR_OFFSET, GV_HDR_LEN, &error);
+ if (vhdr == NULL || error != 0) {
+ g_topology_lock();
+ g_access(cp, -1, 0, 0);
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ if (first) {
+ g_free(sc);
+ g_destroy_geom(gp);
+ }
+ return (NULL);
+ }
+
+ /* This provider has no vinum magic on board. */
+ if (vhdr->magic != GV_MAGIC) {
+ /* Release the temporary consumer, we don't need it anymore. */
+ g_topology_lock();
+ g_access(cp, -1, 0, 0);
+ g_detach(cp);
+ g_destroy_consumer(cp);
+
+ g_free(vhdr);
+
+ /*
+ * If there is no other VINUM geom yet just take this one; the
+ * configuration is still empty, but it can be filled by other
+ * valid vinum drives later.
+ */
+ if (first)
+ return (gp);
+ else
+ return (NULL);
+
+ /*
+ * We have found a valid vinum drive, now read the on-disk
+ * configuration.
+ */
+ } else {
+ g_free(vhdr);
+
+ buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN,
+ &error);
+ if (buf == NULL || error != 0) {
+ g_topology_lock();
+ g_access(cp, -1, 0, 0);
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ if (first) {
+ g_free(sc);
+ g_destroy_geom(gp);
+ }
+ return (NULL);
+ }
+
+ /* Release the temporary consumer, we don't need it anymore. */
+ g_topology_lock();
+ g_access(cp, -1, 0, 0);
+ g_detach(cp);
+ g_destroy_consumer(cp);
+
+ /* We are the first VINUM geom. */
+ if (first) {
+ gv_parse_config(sc, buf, 0);
+ g_free(buf);
+ return (gp);
+
+ /* Just merge the configs. */
+ } else {
+ gv_parse_config(sc, buf, 1);
+ g_free(buf);
+ return (NULL);
+ }
+ }
+}
+
+/* XXX this really belongs somewhere else */
+void
+config_new_drive(struct gv_drive *d)
+{
+ struct gv_hdr *vhdr;
+ struct gv_freelist *fl;
+
+ KASSERT(d != NULL, ("config_new_drive: NULL d"));
+
+ vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO);
+ vhdr->magic = GV_MAGIC;
+ vhdr->config_length = GV_CFG_LEN;
+
+ bcopy(hostname, vhdr->label.sysname, GV_HOSTNAME_LEN);
+ strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME);
+ microtime(&vhdr->label.date_of_birth);
+
+ d->hdr = vhdr;
+
+ LIST_INIT(&d->subdisks);
+ LIST_INIT(&d->freelist);
+
+ fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO);
+ fl->offset = GV_DATA_START;
+ fl->size = d->avail;
+ LIST_INSERT_HEAD(&d->freelist, fl, freelist);
+ d->freelist_entries = 1;
+
+}
+
+/* Handle userland requests for creating new objects. */
+int
+gv_create(struct g_geom *gp, struct gctl_req *req)
+{
+ struct gv_softc *sc;
+ struct gv_drive *d, *d2;
+ struct gv_plex *p, *p2;
+ struct gv_sd *s, *s2;
+ struct gv_volume *v, *v2;
+ struct g_consumer *cp;
+ struct g_provider *pp;
+ int error, i, *drives, *plexes, *subdisks, *volumes;
+ char buf[20], errstr[ERRBUFSIZ];
+
+ g_topology_assert();
+
+ sc = gp->softc;
+
+ /* Find out how many of each object have been passed in. */
+ volumes = gctl_get_paraml(req, "volumes", sizeof(*volumes));
+ plexes = gctl_get_paraml(req, "plexes", sizeof(*plexes));
+ subdisks = gctl_get_paraml(req, "subdisks", sizeof(*subdisks));
+ drives = gctl_get_paraml(req, "drives", sizeof(*drives));
+
+ /* First, handle drive definitions ... */
+ for (i = 0; i < *drives; i++) {
+ snprintf(buf, sizeof(buf), "drive%d", i);
+ d2 = gctl_get_paraml(req, buf, sizeof(*d2));
+ d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
+ bcopy(d2, d, sizeof(*d));
+
+ /* XXX */
+ pp = g_provider_by_name(d->device);
+ d->size = pp->mediasize - GV_DATA_START;
+ d->avail = d->size;
+
+ config_new_drive(d);
+
+ LIST_INSERT_HEAD(&sc->drives, d, drive);
+ }
+
+ /* ... then volume definitions ... */
+ for (i = 0; i < *volumes; i++) {
+ error = 0;
+ snprintf(buf, sizeof(buf), "volume%d", i);
+ v2 = gctl_get_paraml(req, buf, sizeof(*v2));
+
+ v = gv_find_vol(sc, v2->name);
+ if (v != NULL) {
+ gctl_error(req, "volume '%s' is already known",
+ v->name);
+ return (-1);
+ }
+
+ v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO);
+ bcopy(v2, v, sizeof(*v));
+
+ v->vinumconf = sc;
+ LIST_INIT(&v->plexes);
+ LIST_INSERT_HEAD(&sc->volumes, v, volume);
+ }
+
+ /* ... then plex definitions ... */
+ for (i = 0; i < *plexes; i++) {
+ error = 0;
+ snprintf(buf, sizeof(buf), "plex%d", i);
+ p2 = gctl_get_paraml(req, buf, sizeof(*p2));
+
+ p = gv_find_plex(sc, p2->name);
+ if (p != NULL) {
+ gctl_error(req, "plex '%s' is already known", p->name);
+ return (-1);
+ }
+
+ p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO);
+ bcopy(p2, p, sizeof(*p));
+
+ /* Find the volume this plex should be attached to. */
+ v = gv_find_vol(sc, p->volume);
+ if (v != NULL) {
+ if (v->plexcount)
+ p->flags |= GV_PLEX_ADDED;
+ p->vol_sc = v;
+ v->plexcount++;
+ LIST_INSERT_HEAD(&v->plexes, p, in_volume);
+ }
+
+ p->vinumconf = sc;
+ LIST_INIT(&p->subdisks);
+ LIST_INSERT_HEAD(&sc->plexes, p, plex);
+ }
+
+ /* ... and finally, subdisk definitions. */
+ for (i = 0; i < *subdisks; i++) {
+ error = 0;
+ snprintf(buf, sizeof(buf), "sd%d", i);
+ s2 = gctl_get_paraml(req, buf, sizeof(*s2));
+
+ s = gv_find_sd(sc, s2->name);
+ if (s != NULL) {
+ gctl_error(req, "subdisk '%s' is already known",
+ s->name);
+ return (-1);
+ }
+
+ s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO);
+ bcopy(s2, s, sizeof(*s));
+
+ /* Find the drive where this subdisk should be put on. */
+ d = gv_find_drive(sc, s->drive);
+
+ /* drive not found - XXX */
+ if (d == NULL) {
+ printf("FOO: drive '%s' not found\n", s->drive);
+ g_free(s);
+ continue;
+ }
+
+ /* Find the plex where this subdisk belongs to. */
+ p = gv_find_plex(sc, s->plex);
+
+ /* plex not found - XXX */
+ if (p == NULL) {
+ printf("FOO: plex '%s' not found\n", s->plex);
+ g_free(s);
+ continue;
+ }
+
+ /*
+ * First we give the subdisk to the drive, to handle autosized
+ * values ...
+ */
+ error = gv_sd_to_drive(sc, d, s, errstr, sizeof(errstr));
+ if (error) {
+ gctl_error(req, errstr);
+ g_free(s);
+ continue;
+ }
+
+ /*
+ * Then, we give the subdisk to the plex; we check if the
+ * given values are correct and maybe adjust them.
+ */
+ error = gv_sd_to_plex(p, s, 1);
+ if (error) {
+ printf("FOO: couldn't give sd '%s' to plex '%s'\n",
+ s->name, p->name);
+ }
+ s->flags |= GV_SD_NEWBORN;
+
+ s->vinumconf = sc;
+ LIST_INSERT_HEAD(&sc->subdisks, s, sd);
+ }
+
+ LIST_FOREACH(s, &sc->subdisks, sd)
+ gv_update_sd_state(s);
+ LIST_FOREACH(p, &sc->plexes, plex)
+ gv_update_plex_config(p);
+ LIST_FOREACH(v, &sc->volumes, volume)
+ gv_update_vol_state(v);
+
+ /*
+ * Write out the configuration to each drive. If the drive doesn't
+ * have a valid geom_slice geom yet, attach it temporarily to our VINUM
+ * geom.
+ */
+ LIST_FOREACH(d, &sc->drives, drive) {
+ if (d->geom == NULL) {
+ /* XXX */
+ pp = g_provider_by_name(d->device);
+ cp = g_new_consumer(gp);
+ g_attach(cp, pp);
+ gv_save_config(cp, d, sc);
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ } else
+ gv_save_config(NULL, d, sc);
+ }
+
+ return (0);
+}
+
+static void
+gv_config(struct gctl_req *req, struct g_class *mp, char const *verb)
+{
+ struct g_geom *gp;
+ struct gv_softc *sc;
+ struct sbuf *sb;
+ char *comment;
+
+ g_topology_assert();
+
+ gp = LIST_FIRST(&mp->geom);
+ sc = gp->softc;
+
+ if (!strcmp(verb, "list")) {
+ gv_list(gp, req);
+
+ /* Save our configuration back to disk. */
+ } else if (!strcmp(verb, "saveconfig")) {
+
+ gv_save_config_all(sc);
+
+ /* Return configuration in string form. */
+ } else if (!strcmp(verb, "getconfig")) {
+ comment = gctl_get_param(req, "comment", NULL);
+
+ sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
+ gv_format_config(sc, sb, 0, comment);
+ sbuf_finish(sb);
+ gctl_set_param(req, "config", sbuf_data(sb), sbuf_len(sb) + 1);
+ sbuf_delete(sb);
+
+ } else if (!strcmp(verb, "create")) {
+ gv_create(gp, req);
+
+ } else if (!strcmp(verb, "remove")) {
+ gv_remove(gp, req);
+
+ } else if (!strcmp(verb, "start")) {
+ gv_start_obj(gp, req);
+
+ } else
+ gctl_error(req, "Unknown verb parameter");
+}
+
+static int
+gv_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
+{
+ struct g_geom *gp2;
+ struct gv_softc *sc;
+ struct gv_drive *d, *d2;
+ struct gv_freelist *fl, *fl2;
+
+ g_trace(G_T_TOPOLOGY, "gv_destroy_geom: %s", gp->name);
+ g_topology_assert();
+
+ KASSERT(gp != NULL, ("gv_destroy_geom: null gp"));
+ KASSERT(gp->softc != NULL, ("gv_destroy_geom: null sc"));
+
+ sc = gp->softc;
+
+ /*
+ * Check if any of our drives is still open; if so, refuse destruction.
+ */
+ LIST_FOREACH(d, &sc->drives, drive) {
+ gp2 = d->geom;
+ if (gv_is_open(gp2))
+ return (EBUSY);
+ }
+
+ LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
+ g_free(d->hdr);
+ d->hdr = NULL;
+ LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
+ d->freelist_entries--;
+ LIST_REMOVE(fl, freelist);
+ g_free(fl);
+ fl = NULL;
+ }
+ LIST_REMOVE(d, drive);
+ }
+
+ g_free(sc);
+ sc = NULL;
+ g_wither_geom(gp, ENXIO);
+ return (0);
+}
+
+#define VINUM_CLASS_NAME "VINUM"
+
+static struct g_class g_vinum_class = {
+ .name = VINUM_CLASS_NAME,
+ .taste = gv_taste,
+ .destroy_geom = gv_destroy_geom,
+ .ctlreq = gv_config,
+};
+
+DECLARE_GEOM_CLASS(g_vinum_class, g_vinum);
diff --git a/sys/geom/vinum/geom_vinum.h b/sys/geom/vinum/geom_vinum.h
new file mode 100644
index 0000000..567c8b6
--- /dev/null
+++ b/sys/geom/vinum/geom_vinum.h
@@ -0,0 +1,79 @@
+/*-
+ * Copyright (c) 2004 Lukas Ertl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _GEOM_VINUM_H_
+#define _GEOM_VINUM_H_
+
+#define ERRBUFSIZ 1024
+
+/* geom_vinum_drive.c */
+void gv_save_config_all(struct gv_softc *);
+void gv_save_config(struct g_consumer *, struct gv_drive *,
+ struct gv_softc *);
+
+/* geom_vinum_init.c */
+void gv_start_obj(struct g_geom *, struct gctl_req *);
+
+/* geom_vinum_list.c */
+void gv_ld(struct g_geom *, struct gctl_req *, struct sbuf *);
+void gv_lp(struct g_geom *, struct gctl_req *, struct sbuf *);
+void gv_ls(struct g_geom *, struct gctl_req *, struct sbuf *);
+void gv_lv(struct g_geom *, struct gctl_req *, struct sbuf *);
+void gv_list(struct g_geom *, struct gctl_req *);
+
+/* geom_vinum_rm.c */
+void gv_remove(struct g_geom *, struct gctl_req *);
+
+/* geom_vinum_state.c */
+int gv_sdstatemap(struct gv_plex *);
+int gv_set_drive_state(struct gv_drive *, int, int);
+int gv_set_sd_state(struct gv_sd *, int, int);
+void gv_update_sd_state(struct gv_sd *);
+void gv_update_plex_state(struct gv_plex *);
+void gv_update_vol_state(struct gv_volume *);
+
+/* geom_vinum_subr.c */
+void gv_adjust_freespace(struct gv_sd *, off_t);
+struct g_geom *find_vinum_geom(void);
+struct gv_drive *gv_find_drive(struct gv_softc *, char *);
+struct gv_plex *gv_find_plex(struct gv_softc *, char *);
+struct gv_sd *gv_find_sd(struct gv_softc *, char *);
+struct gv_volume *gv_find_vol(struct gv_softc *, char *);
+void gv_format_config(struct gv_softc *, struct sbuf *, int, char *);
+int gv_is_striped(struct gv_plex *);
+int gv_is_open(struct g_geom *);
+void gv_kill_thread(struct gv_plex *);
+int gv_object_type(struct gv_softc *, char *);
+void gv_parse_config(struct gv_softc *, u_char *, int);
+const char *gv_roughlength(off_t, int);
+int gv_sd_to_drive(struct gv_softc *, struct gv_drive *, struct gv_sd *,
+ char *, int);
+int gv_sd_to_plex(struct gv_plex *, struct gv_sd *, int);
+void gv_update_plex_config(struct gv_plex *);
+
+#endif /* !_GEOM_VINUM_H_ */
diff --git a/sys/geom/vinum/geom_vinum_drive.c b/sys/geom/vinum/geom_vinum_drive.c
new file mode 100644
index 0000000..161b6ac
--- /dev/null
+++ b/sys/geom/vinum/geom_vinum_drive.c
@@ -0,0 +1,476 @@
+/*-
+ * Copyright (c) 2004 Lukas Ertl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/errno.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/libkern.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/sbuf.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+
+#include <geom/geom.h>
+#include <geom/vinum/geom_vinum_var.h>
+#include <geom/vinum/geom_vinum.h>
+#include <geom/vinum/geom_vinum_share.h>
+
+void gv_drive_modify(struct gv_drive *);
+
+void
+gv_save_config_all(struct gv_softc *sc)
+{
+ struct gv_drive *d;
+
+ g_topology_assert();
+
+ LIST_FOREACH(d, &sc->drives, drive) {
+ if (d->geom == NULL)
+ continue;
+ gv_save_config(NULL, d, sc);
+ }
+}
+
+/* Save the vinum configuration back to disk. */
+void
+gv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp2;
+ struct gv_hdr *vhdr, *hdr;
+ struct sbuf *sb;
+ int error;
+
+ g_topology_assert();
+
+ KASSERT(d != NULL, ("gv_save_config: null d"));
+ KASSERT(sc != NULL, ("gv_save_config: null sc"));
+
+ if (cp == NULL) {
+ gp = d->geom;
+ KASSERT(gp != NULL, ("gv_save_config: null gp"));
+ cp2 = LIST_FIRST(&gp->consumer);
+ KASSERT(cp2 != NULL, ("gv_save_config: null cp2"));
+ } else
+ cp2 = cp;
+
+ vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO);
+ vhdr->magic = GV_MAGIC;
+ vhdr->config_length = GV_CFG_LEN;
+
+ hdr = d->hdr;
+ if (hdr == NULL) {
+ printf("NULL hdr!!!\n");
+ g_free(vhdr);
+ return;
+ }
+ microtime(&hdr->label.last_update);
+ bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label));
+
+ sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
+ gv_format_config(sc, sb, 1, NULL);
+ sbuf_finish(sb);
+
+ error = g_access(cp2, 0, 1, 0);
+ if (error) {
+ printf("g_access failed: %d\n", error);
+ sbuf_delete(sb);
+ return;
+ }
+ g_topology_unlock();
+
+ do {
+ error = g_write_data(cp2, GV_HDR_OFFSET, vhdr, GV_HDR_LEN);
+ if (error) {
+ printf("writing vhdr failed: %d", error);
+ break;
+ }
+
+ error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb),
+ GV_CFG_LEN);
+ if (error) {
+ printf("writing first config copy failed: %d", error);
+ break;
+ }
+
+ error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN,
+ sbuf_data(sb), GV_CFG_LEN);
+ if (error)
+ printf("writing second config copy failed: %d", error);
+ } while (0);
+
+ g_topology_lock();
+ g_access(cp2, 0, -1, 0);
+ sbuf_delete(sb);
+ g_free(vhdr);
+
+ if (d->geom != NULL)
+ gv_drive_modify(d);
+}
+
+/* This resembles g_slice_access(). */
+static int
+gv_drive_access(struct g_provider *pp, int dr, int dw, int de)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_provider *pp2;
+ struct gv_drive *d;
+ struct gv_sd *s, *s2;
+ int error;
+
+ gp = pp->geom;
+ cp = LIST_FIRST(&gp->consumer);
+ KASSERT(cp != NULL, ("gv_drive_access: NULL cp"));
+
+ d = gp->softc;
+
+ s = pp->private;
+ KASSERT(s != NULL, ("gv_drive_access: NULL s"));
+
+ LIST_FOREACH(s2, &d->subdisks, from_drive) {
+ if (s == s2)
+ continue;
+ if (s->drive_offset + s->size <= s2->drive_offset)
+ continue;
+ if (s2->drive_offset + s2->size <= s->drive_offset)
+ continue;
+
+ /* Overlap. */
+ pp2 = s2->provider;
+ KASSERT(s2 != NULL, ("gv_drive_access: NULL s2"));
+ if ((pp->acw + dw) > 0 && pp2->ace > 0) {
+ printf("FOOO: permission denied - e\n");
+ return (EPERM);
+ }
+ if ((pp->ace + de) > 0 && pp2->acw > 0) {
+ printf("FOOO: permission denied - w\n");
+ return (EPERM);
+ }
+ }
+
+ /* On first open, grab an extra "exclusive" bit */
+ if (cp->acr == 0 && cp->acw == 0 && cp->ace == 0)
+ de++;
+ /* ... and let go of it on last close */
+ if ((cp->acr + dr) == 0 && (cp->acw + dw) == 0 && (cp->ace + de) == 1)
+ de--;
+ error = g_access(cp, dr, dw, de);
+ if (error) {
+ printf("FOOO: g_access failed: %d\n", error);
+ }
+ return (error);
+}
+
+static void
+gv_drive_start(struct bio *bp)
+{
+ struct bio *bp2;
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_provider *pp;
+ struct gv_drive *d;
+ struct gv_sd *s;
+
+ pp = bp->bio_to;
+ gp = pp->geom;
+ cp = LIST_FIRST(&gp->consumer);
+ d = gp->softc;
+ s = pp->private;
+
+ if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) {
+ g_io_deliver(bp, ENXIO);
+ return;
+ }
+
+ switch(bp->bio_cmd) {
+ case BIO_READ:
+ case BIO_WRITE:
+ case BIO_DELETE:
+ if (bp->bio_offset > s->size) {
+ g_io_deliver(bp, EINVAL); /* XXX: EWHAT ? */
+ return;
+ }
+ bp2 = g_clone_bio(bp);
+ if (bp2 == NULL) {
+ g_io_deliver(bp, ENOMEM);
+ return;
+ }
+ if (bp2->bio_offset + bp2->bio_length > s->size)
+ bp2->bio_length = s->size - bp2->bio_offset;
+ bp2->bio_done = g_std_done;
+ bp2->bio_offset += s->drive_offset;
+ g_io_request(bp2, cp);
+ return;
+
+ case BIO_GETATTR:
+ if (!strcmp("GEOM::kerneldump", bp->bio_attribute)) {
+ struct g_kerneldump *gkd;
+
+ gkd = (struct g_kerneldump *)bp->bio_data;
+ gkd->offset += s->drive_offset;
+ if (gkd->length > s->size)
+ gkd->length = s->size;
+ /* now, pass it on downwards... */
+ }
+ bp2 = g_clone_bio(bp);
+ if (bp2 == NULL) {
+ g_io_deliver(bp, ENOMEM);
+ return;
+ }
+ bp2->bio_done = g_std_done;
+ g_io_request(bp2, cp);
+ return;
+
+ default:
+ g_io_deliver(bp, EOPNOTSUPP);
+ return;
+ }
+}
+
+static void
+gv_drive_orphan(struct g_consumer *cp)
+{
+ struct g_geom *gp;
+ int error;
+
+ g_topology_assert();
+ gp = cp->geom;
+ g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name);
+ if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
+ g_access(cp, -cp->acr, -cp->acw, -cp->ace);
+ error = cp->provider->error;
+ if (error == 0)
+ error = ENXIO;
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ if (!LIST_EMPTY(&gp->consumer))
+ return;
+ g_free(gp->softc);
+ g_wither_geom(gp, error);
+}
+
+static struct g_geom *
+gv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
+{
+ struct g_geom *gp, *gp2;
+ struct g_consumer *cp;
+ struct gv_drive *d;
+ struct gv_sd *s;
+ struct gv_softc *sc;
+ struct gv_freelist *fl;
+ struct gv_hdr *vhdr;
+ int error;
+ char errstr[ERRBUFSIZ];
+
+ vhdr = NULL;
+ d = NULL;
+
+ g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name);
+ g_topology_assert();
+
+ if (pp->sectorsize == 0)
+ return(NULL);
+
+ /* Find the VINUM class and its associated geom. */
+ gp2 = find_vinum_geom();
+ if (gp2 == NULL)
+ return (NULL);
+ sc = gp2->softc;
+
+ gp = g_new_geomf(mp, "%s.vinumdrive", pp->name);
+ gp->start = gv_drive_start;
+ gp->spoiled = gv_drive_orphan;
+ gp->orphan = gv_drive_orphan;
+ gp->access = gv_drive_access;
+ gp->start = gv_drive_start;
+
+ cp = g_new_consumer(gp);
+ g_attach(cp, pp);
+ error = g_access(cp, 1, 0, 0);
+ if (error) {
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ g_destroy_geom(gp);
+ return (NULL);
+ }
+
+ g_topology_unlock();
+
+ /* Now check if the provided slice is a valid vinum drive. */
+ do {
+ vhdr = g_read_data(cp, GV_HDR_OFFSET, GV_HDR_LEN, &error);
+ if (vhdr == NULL || error != 0)
+ break;
+ if (vhdr->magic != GV_MAGIC) {
+ g_free(vhdr);
+ break;
+ }
+
+ /*
+ * We have found a valid vinum drive. Let's see if it is
+ * already known in the configuration.
+ */
+ g_topology_lock();
+ g_access(cp, -1, 0, 0);
+
+ d = gv_find_drive(sc, vhdr->label.name);
+
+ /* We already know about this drive. */
+ if (d != NULL) {
+ bcopy(vhdr, d->hdr, sizeof(*vhdr));
+
+ /* This is a new drive. */
+ } else {
+ d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
+
+ /* Initialize all needed variables. */
+ d->size = pp->mediasize - GV_DATA_START;
+ d->avail = d->size;
+ d->hdr = vhdr;
+ strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME);
+ LIST_INIT(&d->subdisks);
+ LIST_INIT(&d->freelist);
+
+ /* We also need a freelist entry. */
+ fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
+ fl->offset = GV_DATA_START;
+ fl->size = d->avail;
+ LIST_INSERT_HEAD(&d->freelist, fl, freelist);
+ d->freelist_entries = 1;
+
+ /* Save it into the main configuration. */
+ LIST_INSERT_HEAD(&sc->drives, d, drive);
+ }
+
+ gp->softc = d;
+ d->geom = gp;
+ strncpy(d->device, pp->name, GV_MAXDRIVENAME);
+
+ /*
+ * Find out which subdisks belong to this drive and crosslink
+ * them.
+ */
+ LIST_FOREACH(s, &sc->subdisks, sd) {
+ if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME))
+ /* XXX: errors ignored */
+ gv_sd_to_drive(sc, d, s, errstr,
+ sizeof(errstr));
+ }
+
+ /* This drive is now up for sure. */
+ gv_set_drive_state(d, GV_DRIVE_UP, 0);
+
+ /*
+ * If there are subdisks on this drive, we need to create
+ * providers for them.
+ */
+ if (d->sdcount)
+ gv_drive_modify(d);
+
+ return (gp);
+
+ } while (0);
+
+ g_topology_lock();
+ g_access(cp, -1, 0, 0);
+
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ g_free(gp->softc);
+ g_destroy_geom(gp);
+ return (NULL);
+}
+
+/*
+ * Modify the providers for the given drive 'd'. It is assumed that the
+ * subdisk list of 'd' is already correctly set up.
+ */
+void
+gv_drive_modify(struct gv_drive *d)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_provider *pp, *pp2;
+ struct gv_sd *s;
+ int nsd;
+
+ KASSERT(d != NULL, ("gv_drive_modify: null d"));
+ gp = d->geom;
+ KASSERT(gp != NULL, ("gv_drive_modify: null gp"));
+ cp = LIST_FIRST(&gp->consumer);
+ KASSERT(cp != NULL, ("gv_drive_modify: null cp"));
+ pp = cp->provider;
+ KASSERT(pp != NULL, ("gv_drive_modify: null pp"));
+
+ g_topology_assert();
+
+ nsd = 0;
+ LIST_FOREACH(s, &d->subdisks, from_drive) {
+ /* This subdisk already has a provider. */
+ if (s->provider != NULL)
+ continue;
+ pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name);
+ pp2->mediasize = s->size;
+ pp2->sectorsize = pp->sectorsize;
+ g_error_provider(pp2, 0);
+ s->provider = pp2;
+ pp2->private = s;
+ }
+}
+
+static int
+gv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp,
+ struct g_geom *gp)
+{
+ /*struct gv_drive *d;*/
+
+ g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name);
+ g_topology_assert();
+
+ /* g_free(sc); */
+ g_wither_geom(gp, ENXIO);
+ return (0);
+}
+
+#define VINUMDRIVE_CLASS_NAME "VINUMDRIVE"
+
+static struct g_class g_vinum_drive_class = {
+ .name = VINUMDRIVE_CLASS_NAME,
+ .taste = gv_drive_taste,
+ .destroy_geom = gv_drive_destroy_geom
+};
+
+DECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive);
diff --git a/sys/geom/vinum/geom_vinum_init.c b/sys/geom/vinum/geom_vinum_init.c
new file mode 100644
index 0000000..1eaa63d
--- /dev/null
+++ b/sys/geom/vinum/geom_vinum_init.c
@@ -0,0 +1,405 @@
+/*-
+ * Copyright (c) 2004 Lukas Ertl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/libkern.h>
+#include <sys/malloc.h>
+#include <sys/queue.h>
+
+#include <geom/geom.h>
+#include <geom/vinum/geom_vinum_var.h>
+#include <geom/vinum/geom_vinum.h>
+#include <geom/vinum/geom_vinum_share.h>
+
+int gv_init_plex(struct gv_plex *);
+int gv_init_sd(struct gv_sd *);
+void gv_init_td(void *);
+void gv_start_plex(struct gv_plex *);
+void gv_start_vol(struct gv_volume *);
+void gv_sync(struct gv_volume *);
+void gv_sync_td(void *);
+
+struct gv_sync_args {
+ struct gv_volume *v;
+ struct gv_plex *from;
+ struct gv_plex *to;
+ off_t syncsize;
+};
+
+void
+gv_start_obj(struct g_geom *gp, struct gctl_req *req)
+{
+ struct gv_softc *sc;
+ struct gv_volume *v;
+ struct gv_plex *p;
+ int *argc, *initsize;
+ char *argv, buf[20];
+ int i, type;
+
+ argc = gctl_get_paraml(req, "argc", sizeof(*argc));
+ initsize = gctl_get_paraml(req, "initsize", sizeof(*initsize));
+
+ if (argc == NULL || *argc == 0) {
+ gctl_error(req, "no arguments given");
+ return;
+ }
+
+ sc = gp->softc;
+
+ for (i = 0; i < *argc; i++) {
+ snprintf(buf, sizeof(buf), "argv%d", i);
+ argv = gctl_get_param(req, buf, NULL);
+ if (argv == NULL)
+ continue;
+ type = gv_object_type(sc, argv);
+ switch (type) {
+ case GV_TYPE_VOL:
+ v = gv_find_vol(sc, argv);
+ gv_start_vol(v);
+ break;
+
+ case GV_TYPE_PLEX:
+ p = gv_find_plex(sc, argv);
+ gv_start_plex(p);
+ break;
+
+ case GV_TYPE_SD:
+ case GV_TYPE_DRIVE:
+ /* XXX not yet */
+ gctl_error(req, "cannot start '%s'", argv);
+ return;
+ default:
+ gctl_error(req, "unknown object '%s'", argv);
+ return;
+ }
+ }
+}
+
+void
+gv_start_plex(struct gv_plex *p)
+{
+ struct gv_volume *v;
+
+ KASSERT(p != NULL, ("gv_start_plex: NULL p"));
+
+ if (p->state == GV_PLEX_UP)
+ return;
+
+ v = p->vol_sc;
+ if ((v != NULL) && (v->plexcount > 1))
+ gv_sync(v);
+ else if (p->org == GV_PLEX_RAID5)
+ gv_init_plex(p);
+
+ return;
+}
+
+void
+gv_start_vol(struct gv_volume *v)
+{
+ struct gv_plex *p;
+
+ KASSERT(v != NULL, ("gv_start_vol: NULL v"));
+
+ if (v->plexcount == 0)
+ return;
+
+ else if (v->plexcount == 1) {
+ p = LIST_FIRST(&v->plexes);
+ KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name));
+ if (p->org == GV_PLEX_RAID5) {
+ switch (p->state) {
+ case GV_PLEX_DOWN:
+ gv_init_plex(p);
+ break;
+ case GV_PLEX_DEGRADED: /* XXX not yet */
+ default:
+ return;
+ }
+ }
+ } else
+ gv_sync(v);
+}
+
+void
+gv_sync(struct gv_volume *v)
+{
+ struct gv_softc *sc;
+ struct gv_plex *p, *up;
+ struct gv_sync_args *sync;
+
+ KASSERT(v != NULL, ("gv_sync: NULL v"));
+ sc = v->vinumconf;
+ KASSERT(sc != NULL, ("gv_sync: NULL sc on %s", v->name));
+
+ /* Find the plex that's up. */
+ up = NULL;
+ LIST_FOREACH(up, &v->plexes, in_volume) {
+ if (up->state == GV_PLEX_UP)
+ break;
+ }
+
+ /* Didn't find a good plex. */
+ if (up == NULL)
+ return;
+
+ LIST_FOREACH(p, &v->plexes, in_volume) {
+ if ((p == up) || (p->state == GV_PLEX_UP))
+ continue;
+ sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO);
+ sync->v = v;
+ sync->from = up;
+ sync->to = p;
+ sync->syncsize = GV_DFLT_SYNCSIZE;
+ kthread_create(gv_sync_td, sync, NULL, 0, 0, "sync_p '%s'",
+ p->name);
+ }
+}
+
+int
+gv_init_plex(struct gv_plex *p)
+{
+ struct gv_sd *s;
+ int err;
+
+ KASSERT(p != NULL, ("gv_init_plex: NULL p"));
+
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ err = gv_init_sd(s);
+ if (err)
+ return (err);
+ }
+
+ return (0);
+}
+
+int
+gv_init_sd(struct gv_sd *s)
+{
+ KASSERT(s != NULL, ("gv_init_sd: NULL s"));
+
+ if (gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE))
+ return (-1);
+
+ s->init_size = GV_DFLT_SYNCSIZE;
+ s->flags &= ~GV_SD_INITCANCEL;
+
+ /* Spawn the thread that does the work for us. */
+ kthread_create(gv_init_td, s, NULL, 0, 0, "init_sd %s", s->name);
+
+ return (0);
+}
+
+void
+gv_sync_td(void *arg)
+{
+ struct bio *bp;
+ struct gv_plex *p;
+ struct g_consumer *from, *to;
+ struct gv_sync_args *sync;
+ u_char *buf;
+ off_t i;
+ int error;
+
+ sync = arg;
+
+ from = sync->from->consumer;
+ to = sync->to->consumer;
+
+ p = sync->to;
+ p->synced = 0;
+ p->flags |= GV_PLEX_SYNCING;
+
+ error = 0;
+
+ g_topology_lock();
+ error = g_access(from, 1, 0, 0);
+ if (error) {
+ g_topology_unlock();
+ printf("gvinum: sync from '%s' failed to access consumer: %d\n",
+ sync->from->name, error);
+ kthread_exit(error);
+ }
+ error = g_access(to, 0, 1, 0);
+ if (error) {
+ g_access(from, -1, 0, 0);
+ g_topology_unlock();
+ printf("gvinum: sync to '%s' failed to access consumer: %d\n",
+ p->name, error);
+ kthread_exit(error);
+ }
+ g_topology_unlock();
+
+ for (i = 0; i < p->size; i+= sync->syncsize) {
+ /* Read some bits from the good plex. */
+ buf = g_read_data(from, i, sync->syncsize, &error);
+ if (buf == NULL) {
+ printf("gvinum: sync read from '%s' failed at offset "
+ "%jd, errno: %d\n", sync->from->name, i, error);
+ break;
+ }
+
+ /*
+ * Create a bio and schedule it down on the 'bad' plex. We
+ * cannot simply use g_write_data() because we have to let the
+ * lower parts know that we are an initialization process and
+ * not a 'normal' request.
+ */
+ bp = g_new_bio();
+ if (bp == NULL) {
+ printf("gvinum: sync write to '%s' failed at offset "
+ "%jd, out of memory\n", p->name, i);
+ g_free(buf);
+ break;
+ }
+ bp->bio_cmd = BIO_WRITE;
+ bp->bio_offset = i;
+ bp->bio_length = sync->syncsize;
+ bp->bio_data = buf;
+ bp->bio_done = NULL;
+
+ /*
+ * This hack declare this bio as part of an initialization
+ * process, so that the lower levels allow it to get through.
+ */
+ bp->bio_caller1 = p;
+
+ /* Schedule it down ... */
+ g_io_request(bp, to);
+
+ /* ... and wait for the result. */
+ error = biowait(bp, "gwrite");
+ g_destroy_bio(bp);
+ g_free(buf);
+ if (error) {
+ printf("gvinum: sync write to '%s' failed at offset "
+ "%jd, errno: %d\n", p->name, i, error);
+ break;
+ }
+
+ /* Note that we have synced a little bit more. */
+ p->synced += sync->syncsize;
+ }
+
+ g_topology_lock();
+ g_access(from, -1, 0, 0);
+ g_access(to, 0, -1, 0);
+ g_topology_unlock();
+
+ /* Successful initialization. */
+ if (!error) {
+ p->flags &= ~GV_PLEX_SYNCING;
+ printf("gvinum: plex '%s': sync finished\n", p->name);
+ }
+
+ g_free(sync);
+ kthread_exit(error);
+}
+
+void
+gv_init_td(void *arg)
+{
+ struct gv_sd *s;
+ struct gv_drive *d;
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ int error;
+ off_t i, init_size, start, offset, length;
+ u_char *buf;
+
+ s = arg;
+ KASSERT(s != NULL, ("gv_init_td: NULL s"));
+ d = s->drive_sc;
+ KASSERT(d != NULL, ("gv_init_td: NULL d"));
+ gp = d->geom;
+ KASSERT(gp != NULL, ("gv_init_td: NULL gp"));
+
+ cp = LIST_FIRST(&gp->consumer);
+ KASSERT(cp != NULL, ("gv_init_td: NULL cp"));
+
+ s->init_error = 0;
+ init_size = s->init_size;
+ start = s->drive_offset + s->initialized;
+ offset = s->drive_offset;
+ length = s->size;
+
+ buf = g_malloc(s->init_size, M_WAITOK | M_ZERO);
+
+ g_topology_lock();
+ error = g_access(cp, 0, 1, 0);
+ if (error) {
+ s->init_error = error;
+ g_topology_unlock();
+ printf("geom_vinum: init '%s' failed to access consumer: %d\n",
+ s->name, error);
+ kthread_exit(error);
+ }
+ g_topology_unlock();
+
+ for (i = start; i < offset + length; i += init_size) {
+ if (s->flags & GV_SD_INITCANCEL) {
+ printf("geom_vinum: subdisk '%s' init: cancelled at"
+ " offset %jd (drive offset %jd)\n", s->name,
+ (intmax_t)s->initialized, (intmax_t)i);
+ error = EAGAIN;
+ break;
+ }
+ error = g_write_data(cp, i, buf, init_size);
+ if (error) {
+ printf("geom_vinum: subdisk '%s' init: write failed"
+ " at offset %jd (drive offset %jd)\n", s->name,
+ (intmax_t)s->initialized, (intmax_t)i);
+ break;
+ }
+ s->initialized += init_size;
+ }
+
+ g_free(buf);
+
+ g_topology_lock();
+ g_access(cp, 0, -1, 0);
+ g_topology_unlock();
+ if (error) {
+ s->init_error = error;
+ g_topology_lock();
+ gv_set_sd_state(s, GV_SD_STALE,
+ GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
+ g_topology_unlock();
+ } else {
+ g_topology_lock();
+ gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
+ g_topology_unlock();
+ s->initialized = 0;
+ printf("geom_vinum: init '%s' finished\n", s->name);
+ }
+ kthread_exit(error);
+}
diff --git a/sys/geom/vinum/geom_vinum_list.c b/sys/geom/vinum/geom_vinum_list.c
new file mode 100644
index 0000000..f70cffb
--- /dev/null
+++ b/sys/geom/vinum/geom_vinum_list.c
@@ -0,0 +1,466 @@
+/*
+ * Copyright (c) 2004 Lukas Ertl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/libkern.h>
+#include <sys/malloc.h>
+
+#include <geom/geom.h>
+#include <geom/vinum/geom_vinum_var.h>
+#include <geom/vinum/geom_vinum.h>
+#include <geom/vinum/geom_vinum_share.h>
+
+void gv_lvi(struct gv_volume *, struct sbuf *, int);
+void gv_lpi(struct gv_plex *, struct sbuf *, int);
+void gv_lsi(struct gv_sd *, struct sbuf *, int);
+void gv_ldi(struct gv_drive *, struct sbuf *, int);
+
+void
+gv_list(struct g_geom *gp, struct gctl_req *req)
+{
+ struct gv_softc *sc;
+ struct gv_drive *d;
+ struct gv_plex *p;
+ struct gv_sd *s;
+ struct gv_volume *v;
+ struct sbuf *sb;
+ int *argc, i, *flags, type;
+ char *arg, buf[20], *cmd;
+
+ argc = gctl_get_paraml(req, "argc", sizeof(*argc));
+
+ if (argc == NULL) {
+ gctl_error(req, "no arguments given");
+ return;
+ }
+
+ flags = gctl_get_paraml(req, "flags", sizeof(*flags));
+
+ sc = gp->softc;
+
+ sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
+
+ /* Figure out which command was given. */
+ cmd = gctl_get_param(req, "cmd", NULL);
+
+ /* List specific objects or everything. */
+ if (!strcmp(cmd, "list") || !strcmp(cmd, "l")) {
+ if (*argc) {
+ for (i = 0; i < *argc; i++) {
+ snprintf(buf, sizeof(buf), "argv%d", i);
+ arg = gctl_get_param(req, buf, NULL);
+ if (arg == NULL)
+ continue;
+ type = gv_object_type(sc, arg);
+ switch (type) {
+ case GV_TYPE_VOL:
+ v = gv_find_vol(sc, arg);
+ gv_lvi(v, sb, *flags);
+ break;
+ case GV_TYPE_PLEX:
+ p = gv_find_plex(sc, arg);
+ gv_lpi(p, sb, *flags);
+ break;
+ case GV_TYPE_SD:
+ s = gv_find_sd(sc, arg);
+ gv_lsi(s, sb, *flags);
+ break;
+ case GV_TYPE_DRIVE:
+ d = gv_find_drive(sc, arg);
+ gv_ldi(d, sb, *flags);
+ break;
+ default:
+ gctl_error(req, "unknown object '%s'",
+ arg);
+ break;
+ }
+ }
+ } else {
+ gv_ld(gp, req, sb);
+ sbuf_printf(sb, "\n");
+ gv_lv(gp, req, sb);
+ sbuf_printf(sb, "\n");
+ gv_lp(gp, req, sb);
+ sbuf_printf(sb, "\n");
+ gv_ls(gp, req, sb);
+ }
+
+ /* List drives. */
+ } else if (!strcmp(cmd, "ld")) {
+ if (*argc) {
+ for (i = 0; i < *argc; i++) {
+ snprintf(buf, sizeof(buf), "argv%d", i);
+ arg = gctl_get_param(req, buf, NULL);
+ if (arg == NULL)
+ continue;
+ type = gv_object_type(sc, arg);
+ if (type != GV_TYPE_DRIVE) {
+ gctl_error(req, "'%s' is not a drive",
+ arg);
+ continue;
+ } else {
+ d = gv_find_drive(sc, arg);
+ gv_ldi(d, sb, *flags);
+ }
+ }
+ } else
+ gv_ld(gp, req, sb);
+
+ /* List volumes. */
+ } else if (!strcmp(cmd, "lv")) {
+ if (*argc) {
+ for (i = 0; i < *argc; i++) {
+ snprintf(buf, sizeof(buf), "argv%d", i);
+ arg = gctl_get_param(req, buf, NULL);
+ if (arg == NULL)
+ continue;
+ type = gv_object_type(sc, arg);
+ if (type != GV_TYPE_VOL) {
+ gctl_error(req, "'%s' is not a volume",
+ arg);
+ continue;
+ } else {
+ v = gv_find_vol(sc, arg);
+ gv_lvi(v, sb, *flags);
+ }
+ }
+ } else
+ gv_lv(gp, req, sb);
+
+ /* List plexes. */
+ } else if (!strcmp(cmd, "lp")) {
+ if (*argc) {
+ for (i = 0; i < *argc; i++) {
+ snprintf(buf, sizeof(buf), "argv%d", i);
+ arg = gctl_get_param(req, buf, NULL);
+ if (arg == NULL)
+ continue;
+ type = gv_object_type(sc, arg);
+ if (type != GV_TYPE_PLEX) {
+ gctl_error(req, "'%s' is not a plex",
+ arg);
+ continue;
+ } else {
+ p = gv_find_plex(sc, arg);
+ gv_lpi(p, sb, *flags);
+ }
+ }
+ } else
+ gv_lp(gp, req, sb);
+
+ /* List subdisks. */
+ } else if (!strcmp(cmd, "ls")) {
+ if (*argc) {
+ for (i = 0; i < *argc; i++) {
+ snprintf(buf, sizeof(buf), "argv%d", i);
+ arg = gctl_get_param(req, buf, NULL);
+ if (arg == NULL)
+ continue;
+ type = gv_object_type(sc, arg);
+ if (type != GV_TYPE_SD) {
+ gctl_error(req, "'%s' is not a subdisk",
+ arg);
+ continue;
+ } else {
+ s = gv_find_sd(sc, arg);
+ gv_lsi(s, sb, *flags);
+ }
+ }
+ } else
+ gv_ls(gp, req, sb);
+
+ } else
+ gctl_error(req, "unknown command '%s'", cmd);
+
+ sbuf_finish(sb);
+ gctl_set_param(req, "config", sbuf_data(sb), sbuf_len(sb) + 1);
+ sbuf_delete(sb);
+}
+
+/* List one or more volumes. */
+void
+gv_lv(struct g_geom *gp, struct gctl_req *req, struct sbuf *sb)
+{
+ struct gv_softc *sc;
+ struct gv_volume *v;
+ int i, *flags;
+
+ sc = gp->softc;
+ i = 0;
+
+ LIST_FOREACH(v, &sc->volumes, volume)
+ i++;
+
+ sbuf_printf(sb, "%d volume%s:\n", i, i == 1 ? "" : "s");
+
+ if (i) {
+ flags = gctl_get_paraml(req, "flags", sizeof(*flags));
+ LIST_FOREACH(v, &sc->volumes, volume)
+ gv_lvi(v, sb, *flags);
+ }
+}
+
+/* List a single volume. */
+void
+gv_lvi(struct gv_volume *v, struct sbuf *sb, int flags)
+{
+ struct gv_plex *p;
+ int i;
+
+ if (flags & GV_FLAG_V) {
+ sbuf_printf(sb, "Volume %s:\tSize: %jd bytes (%jd MB)\n",
+ v->name, (intmax_t)v->size, (intmax_t)v->size / MEGABYTE);
+ sbuf_printf(sb, "\t\tState: %s\n", gv_volstate(v->state));
+ } else {
+ sbuf_printf(sb, "V %-21s State: %s\tPlexes: %7d\tSize: %s\n",
+ v->name, gv_volstate(v->state), v->plexcount,
+ gv_roughlength(v->size, 0));
+ }
+
+ if (flags & GV_FLAG_VV) {
+ i = 0;
+ LIST_FOREACH(p, &v->plexes, in_volume) {
+ sbuf_printf(sb, "\t\tPlex %2d:\t%s\t(%s), %s\n", i,
+ p->name, gv_plexstate(p->state),
+ gv_roughlength(p->size, 0));
+ i++;
+ }
+ }
+
+ if (flags & GV_FLAG_R) {
+ LIST_FOREACH(p, &v->plexes, in_volume)
+ gv_lpi(p, sb, flags);
+ }
+}
+
+/* List one or more plexes. */
+void
+gv_lp(struct g_geom *gp, struct gctl_req *req, struct sbuf *sb)
+{
+ struct gv_softc *sc;
+ struct gv_plex *p;
+ int i, *flags;
+
+ sc = gp->softc;
+ i = 0;
+
+ LIST_FOREACH(p, &sc->plexes, plex)
+ i++;
+
+ sbuf_printf(sb, "%d plex%s:\n", i, i == 1 ? "" : "es");
+
+ if (i) {
+ flags = gctl_get_paraml(req, "flags", sizeof(*flags));
+ LIST_FOREACH(p, &sc->plexes, plex)
+ gv_lpi(p, sb, *flags);
+ }
+}
+
+/* List a single plex. */
+void
+gv_lpi(struct gv_plex *p, struct sbuf *sb, int flags)
+{
+ struct gv_sd *s;
+ int i;
+
+ if (flags & GV_FLAG_V) {
+ sbuf_printf(sb, "Plex %s:\tSize:\t%9jd bytes (%jd MB)\n",
+ p->name, (intmax_t)p->size, (intmax_t)p->size / MEGABYTE);
+ sbuf_printf(sb, "\t\tSubdisks: %8d\n", p->sdcount);
+ sbuf_printf(sb, "\t\tState: %s\n\t\tOrganization: %s",
+ gv_plexstate(p->state), gv_plexorg(p->org));
+ if (gv_is_striped(p)) {
+ sbuf_printf(sb, "\tStripe size: %s\n",
+ gv_roughlength(p->stripesize, 1));
+ }
+ if (p->vol_sc != NULL) {
+ sbuf_printf(sb, "\t\tPart of volume %s\n", p->volume);
+ }
+ } else {
+ sbuf_printf(sb, "P %-18s %2s State: %s\tSubdisks: %5d"
+ "\tSize: %s\n", p->name, gv_plexorg_short(p->org),
+ gv_plexstate(p->state), p->sdcount,
+ gv_roughlength(p->size, 0));
+ }
+
+ if (flags & GV_FLAG_VV) {
+ i = 0;
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ sbuf_printf(sb, "\t\tSubdisk %d:\t%s\n", i, s->name);
+ sbuf_printf(sb, "\t\t state: %s\tsize %11jd "
+ "(%jd MB)\n", gv_sdstate(s->state),
+ (intmax_t)s->size, (intmax_t)s->size / MEGABYTE);
+ if (p->org == GV_PLEX_CONCAT) {
+ sbuf_printf(sb, "\t\t\toffset %9jd (0x%jx)\n",
+ (intmax_t)s->plex_offset,
+ (intmax_t)s->plex_offset);
+ }
+ i++;
+ }
+ }
+
+ if (flags & GV_FLAG_R) {
+ LIST_FOREACH(s, &p->subdisks, in_plex)
+ gv_lsi(s, sb, flags);
+ }
+}
+
+/* List one or more subdisks. */
+void
+gv_ls(struct g_geom *gp, struct gctl_req *req, struct sbuf *sb)
+{
+ struct gv_softc *sc;
+ struct gv_sd *s;
+ int i, *flags;
+
+ sc = gp->softc;
+ i = 0;
+
+ LIST_FOREACH(s, &sc->subdisks, sd)
+ i++;
+
+ sbuf_printf(sb, "%d subdisk%s:\n", i, i == 1 ? "" : "s");
+
+ if (i) {
+ flags = gctl_get_paraml(req, "flags", sizeof(*flags));
+ LIST_FOREACH(s, &sc->subdisks, sd)
+ gv_lsi(s, sb, *flags);
+ }
+}
+
+/* List a single subdisk. */
+void
+gv_lsi(struct gv_sd *s, struct sbuf *sb, int flags)
+{
+ if (flags & GV_FLAG_V) {
+ sbuf_printf(sb, "Subdisk %s:\n", s->name);
+ sbuf_printf(sb, "\t\tSize: %16jd bytes (%jd MB)\n",
+ (intmax_t)s->size, (intmax_t)s->size / MEGABYTE);
+ sbuf_printf(sb, "\t\tState: %s\n", gv_sdstate(s->state));
+
+ if (s->state == GV_SD_INITIALIZING) {
+ sbuf_printf(sb, "\t\tInitialized: %16jd bytes "
+ "(%d%%)\n", (intmax_t)s->initialized,
+ (int)((s->initialized * 100) / s->size));
+ }
+
+ if (s->plex_sc != NULL) {
+ sbuf_printf(sb, "\t\tPlex %s at offset %jd (%s)\n",
+ s->plex, (intmax_t)s->plex_offset,
+ gv_roughlength(s->plex_offset, 1));
+ }
+
+ if (s->state == GV_SD_REVIVING) {
+ /* XXX */
+ }
+
+ sbuf_printf(sb, "\t\tDrive %s (%s) at offset %jd (%s)\n",
+ s->drive,
+ s->drive_sc == NULL ? "*missing*" : s->drive_sc->name,
+ (intmax_t)s->drive_offset,
+ gv_roughlength(s->drive_offset, 1));
+ } else {
+ /* XXX reviving and initializing... */
+ sbuf_printf(sb, "S %-21s State: ", s->name);
+ if (s->state == GV_SD_INITIALIZING) {
+ sbuf_printf(sb, "I %d%%\t",
+ (int)((s->initialized * 100) / s->size));
+ } else {
+ sbuf_printf(sb, "%s\t", gv_sdstate(s->state));
+ }
+ sbuf_printf(sb, "D: %-12s Size: %s\n", s->drive,
+ gv_roughlength(s->size, 0));
+ }
+}
+
+/* List one or more drives. */
+void
+gv_ld(struct g_geom *gp, struct gctl_req *req, struct sbuf *sb)
+{
+ struct gv_softc *sc;
+ struct gv_drive *d;
+ int i, *flags;
+
+ sc = gp->softc;
+ i = 0;
+
+ LIST_FOREACH(d, &sc->drives, drive)
+ i++;
+
+ sbuf_printf(sb, "%d drive%s:\n", i, i == 1 ? "" : "s");
+
+ if (i) {
+ flags = gctl_get_paraml(req, "flags", sizeof(*flags));
+ LIST_FOREACH(d, &sc->drives, drive)
+ gv_ldi(d, sb, *flags);
+ }
+}
+
+/* List a single drive. */
+void
+gv_ldi(struct gv_drive *d, struct sbuf *sb, int flags)
+{
+ struct gv_freelist *fl;
+ struct gv_sd *s;
+
+ /* Verbose listing. */
+ if (flags & GV_FLAG_V) {
+ sbuf_printf(sb, "Drive %s:\tDevice %s\n", d->name, d->device);
+ sbuf_printf(sb, "\t\tSize: %16jd bytes (%jd MB)\n",
+ (intmax_t)d->size, (intmax_t)d->size / MEGABYTE);
+ sbuf_printf(sb, "\t\tUsed: %16jd bytes (%jd MB)\n",
+ (intmax_t)d->size - d->avail,
+ (intmax_t)(d->size - d->avail) / MEGABYTE);
+ sbuf_printf(sb, "\t\tAvailable: %11jd bytes (%jd MB)\n",
+ (intmax_t)d->avail, (intmax_t)d->avail / MEGABYTE);
+ sbuf_printf(sb, "\t\tState: %s\n", gv_drivestate(d->state));
+
+ /* Be very verbose. */
+ if (flags & GV_FLAG_VV) {
+ sbuf_printf(sb, "\t\tFree list contains %d entries:\n",
+ d->freelist_entries);
+ sbuf_printf(sb, "\t\t Offset\t Size\n");
+ LIST_FOREACH(fl, &d->freelist, freelist)
+ sbuf_printf(sb, "\t\t%9jd\t%9jd\n",
+ (intmax_t)fl->offset, (intmax_t)fl->size);
+ }
+ } else {
+ sbuf_printf(sb, "D %-21s State: %s\t/dev/%s\tA: %jd/%jd MB "
+ "(%d%%)\n", d->name, gv_drivestate(d->state), d->device,
+ (intmax_t)d->avail / MEGABYTE, (intmax_t)d->size / MEGABYTE,
+ (int)((d->avail * 100) / d->size));
+ }
+
+ /* Recursive listing. */
+ if (flags & GV_FLAG_R) {
+ LIST_FOREACH(s, &d->subdisks, from_drive)
+ gv_lsi(s, sb, flags);
+ }
+}
diff --git a/sys/geom/vinum/geom_vinum_plex.c b/sys/geom/vinum/geom_vinum_plex.c
new file mode 100644
index 0000000..a7acf72
--- /dev/null
+++ b/sys/geom/vinum/geom_vinum_plex.c
@@ -0,0 +1,456 @@
+/*-
+ * Copyright (c) 2004 Lukas Ertl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/libkern.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+
+#include <geom/geom.h>
+#include <geom/vinum/geom_vinum_var.h>
+#include <geom/vinum/geom_vinum_raid5.h>
+#include <geom/vinum/geom_vinum.h>
+
+/* XXX: is this the place to catch dying subdisks? */
+static void
+gv_plex_orphan(struct g_consumer *cp)
+{
+ struct g_geom *gp;
+ struct gv_plex *p;
+ int error;
+
+ g_topology_assert();
+ gp = cp->geom;
+ g_trace(G_T_TOPOLOGY, "gv_plex_orphan(%s)", gp->name);
+
+ if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
+ g_access(cp, -cp->acr, -cp->acw, -cp->ace);
+ error = cp->provider->error;
+ if (error == 0)
+ error = ENXIO;
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ if (!LIST_EMPTY(&gp->consumer))
+ return;
+
+ p = gp->softc;
+ gv_kill_thread(p);
+ g_free(p);
+ g_wither_geom(gp, error);
+}
+
+static void
+gv_plex_done(struct bio *bp)
+{
+ struct g_geom *gp;
+ struct gv_sd *s;
+
+ gp = bp->bio_to->geom;
+
+ s = bp->bio_caller1;
+ KASSERT(s != NULL, ("gv_plex_done: NULL s"));
+
+ if (bp->bio_error == 0)
+ s->initialized += bp->bio_length;
+
+ if (s->initialized >= s->size) {
+ gv_set_sd_state(s, GV_SD_UP, 0);
+ s->initialized = 0;
+ }
+
+ g_std_done(bp);
+}
+
+/* Find the correct subdisk to send the bio to and build a bio to send. */
+static int
+gv_plexbuffer(struct bio *bp, struct bio **bp2, struct g_consumer **cp,
+ caddr_t addr, long bcount, off_t boff)
+{
+ struct g_geom *gp;
+ struct gv_plex *p;
+ struct gv_sd *s;
+ struct bio *cbp;
+ int i, sdno;
+ off_t len_left, real_len, real_off, stripeend, stripeno, stripestart;
+
+ s = NULL;
+
+ gp = bp->bio_to->geom;
+ p = gp->softc;
+
+ if (p == NULL || LIST_EMPTY(&p->subdisks))
+ return (ENXIO);
+
+ /*
+ * We only handle concatenated and striped plexes here. RAID5 plexes
+ * are handled in build_raid5_request().
+ */
+ switch (p->org) {
+ case GV_PLEX_CONCAT:
+ /*
+ * Find the subdisk where this request starts. The subdisks in
+ * this list must be ordered by plex_offset.
+ */
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (s->plex_offset <= boff &&
+ s->plex_offset + s->size > boff)
+ break;
+ }
+ /* Subdisk not found. */
+ if (s == NULL)
+ return (ENXIO);
+
+ /* Calculate corresponding offsets on disk. */
+ real_off = boff - s->plex_offset;
+ len_left = s->size - real_off;
+ real_len = (bcount > len_left) ? len_left : bcount;
+ break;
+
+ case GV_PLEX_STRIPED:
+ /* The number of the stripe where the request starts. */
+ stripeno = boff / p->stripesize;
+
+ /* The number of the subdisk where the stripe resides. */
+ sdno = stripeno % p->sdcount;
+
+ /* Find the right subdisk. */
+ i = 0;
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (i == sdno)
+ break;
+ i++;
+ }
+
+ /* Subdisk not found. */
+ if (s == NULL)
+ return (ENXIO);
+
+ /* The offset of the stripe from the start of the subdisk. */
+ stripestart = (stripeno / p->sdcount) *
+ p->stripesize;
+
+ /* The offset at the end of the stripe. */
+ stripeend = stripestart + p->stripesize;
+
+ /* The offset of the request on this subdisk. */
+ real_off = boff - (stripeno * p->stripesize) +
+ stripestart;
+
+ /* The length left in this stripe. */
+ len_left = stripeend - real_off;
+
+ real_len = (bcount <= len_left) ? bcount : len_left;
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ /* Now check if we can handle the request on this subdisk. */
+ switch (s->state) {
+ case GV_SD_UP:
+ /* If the subdisk is up, just continue. */
+ break;
+
+ case GV_SD_STALE:
+ if (bp->bio_caller1 != p)
+ return (ENXIO);
+
+ printf("FOO: setting sd %s to GV_SD_INITIALIZING\n", s->name);
+ gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
+ break;
+
+ case GV_SD_INITIALIZING:
+ if (bp->bio_cmd == BIO_READ)
+ return (ENXIO);
+ break;
+
+ default:
+ /* All other subdisk states mean it's not accessible. */
+ return (ENXIO);
+ }
+
+ /* Clone the bio and adjust the offsets and sizes. */
+ cbp = g_clone_bio(bp);
+ if (cbp == NULL)
+ return (ENOMEM);
+ cbp->bio_offset = real_off;
+ cbp->bio_length = real_len;
+ cbp->bio_data = addr;
+ if (bp->bio_caller1 == p) {
+ cbp->bio_caller1 = s;
+ cbp->bio_done = gv_plex_done;
+ } else
+ cbp->bio_done = g_std_done;
+ *bp2 = cbp;
+ *cp = s->consumer;
+ return (0);
+}
+
+static void
+gv_plex_start(struct bio *bp)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct gv_plex *p;
+ struct gv_raid5_packet *wp;
+ struct bio *bp2;
+ caddr_t addr;
+ off_t boff;
+ long bcount, rcount;
+ int err;
+
+ gp = bp->bio_to->geom;
+ p = gp->softc;
+
+ /*
+ * We cannot handle this request if too many of our subdisks are
+ * inaccessible.
+ */
+ if ((p->state < GV_PLEX_DEGRADED) && (bp->bio_caller1 != p)) {
+ g_io_deliver(bp, ENXIO); /* XXX: correct way? */
+ return;
+ }
+
+ switch(bp->bio_cmd) {
+ case BIO_READ:
+ case BIO_WRITE:
+ case BIO_DELETE:
+ /*
+ * We split up the request in smaller packets and hand them
+ * down to our subdisks.
+ */
+ wp = NULL;
+ addr = bp->bio_data;
+ boff = bp->bio_offset;
+ for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
+ /*
+ * RAID5 requests usually need to be split up in
+ * several subrequests.
+ */
+ if (p->org == GV_PLEX_RAID5) {
+ wp = gv_new_raid5_packet();
+ wp->bio = bp;
+ err = gv_build_raid5_req(wp, bp, addr, bcount,
+ boff);
+ } else
+ err = gv_plexbuffer(bp, &bp2, &cp, addr, bcount,
+ boff);
+
+ if (err) {
+ bp->bio_completed += bcount;
+ if (bp->bio_error == 0)
+ bp->bio_error = err;
+ if (bp->bio_completed == bp->bio_length)
+ g_io_deliver(bp, bp->bio_error);
+ return;
+ }
+
+ if (p->org != GV_PLEX_RAID5) {
+ rcount = bp2->bio_length;
+ g_io_request(bp2, cp);
+
+ /*
+ * RAID5 subrequests are queued on a worklist
+ * and picked up from the worker thread. This
+ * ensures correct order.
+ */
+ } else {
+ mtx_lock(&p->worklist_mtx);
+ TAILQ_INSERT_TAIL(&p->worklist, wp,
+ list);
+ mtx_unlock(&p->worklist_mtx);
+ wakeup(&p);
+ rcount = wp->length;
+ }
+
+ boff += rcount;
+ addr += rcount;
+ }
+ return;
+
+ default:
+ g_io_deliver(bp, EOPNOTSUPP);
+ return;
+ }
+}
+
+static int
+gv_plex_access(struct g_provider *pp, int dr, int dw, int de)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp, *cp2;
+ int error;
+
+ gp = pp->geom;
+
+ error = ENXIO;
+ LIST_FOREACH(cp, &gp->consumer, consumer) {
+ error = g_access(cp, dr, dw, de);
+ if (error) {
+ LIST_FOREACH(cp2, &gp->consumer, consumer) {
+ if (cp == cp2)
+ break;
+ g_access(cp2, -dr, -dw, -de);
+ }
+ return (error);
+ }
+ }
+ return (error);
+}
+
+static struct g_geom *
+gv_plex_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_provider *pp2;
+ struct gv_plex *p;
+ struct gv_sd *s;
+ struct gv_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "gv_plex_taste(%s, %s)", mp->name, pp->name);
+ g_topology_assert();
+
+ /* We only want to attach to subdisks. */
+ if (strcmp(pp->geom->class->name, "VINUMDRIVE"))
+ return (NULL);
+
+ /* Find the VINUM class and its associated geom. */
+ gp = find_vinum_geom();
+ if (gp == NULL)
+ return (NULL);
+ sc = gp->softc;
+ KASSERT(sc != NULL, ("gv_plex_taste: NULL sc"));
+
+ /* Find out which subdisk the offered provider corresponds to. */
+ s = pp->private;
+ KASSERT(s != NULL, ("gv_plex_taste: NULL s"));
+
+ /* Now find the correct plex where this subdisk belongs to. */
+ p = gv_find_plex(sc, s->plex);
+ KASSERT(p != NULL, ("gv_plex_taste: NULL p"));
+
+ /*
+ * Add this subdisk to this plex. Since we trust the on-disk
+ * configuration, we don't check the given value (should we?).
+ * XXX: shouldn't be done here
+ */
+ gv_sd_to_plex(p, s, 0);
+
+ /* Now check if there's already a geom for this plex. */
+ gp = p->geom;
+
+ /* Yes, there is already a geom, so we just add the consumer. */
+ if (gp != NULL) {
+ /* Need to attach a new consumer to this subdisk. */
+ cp = g_new_consumer(gp);
+ g_attach(cp, pp);
+ s->consumer = cp;
+
+ /* Adjust the size of the providers this plex has. */
+ LIST_FOREACH(pp2, &gp->provider, provider)
+ pp2->mediasize = p->size;
+
+ return (NULL);
+
+ /* We need to create a new geom. */
+ } else {
+ gp = g_new_geomf(mp, "%s", p->name);
+ gp->start = gv_plex_start;
+ gp->orphan = gv_plex_orphan;
+ gp->access = gv_plex_access;
+ gp->softc = p;
+ p->geom = gp;
+
+ /* RAID5 plexes need a 'worker' thread, where IO is handled. */
+ if (p->org == GV_PLEX_RAID5) {
+ TAILQ_INIT(&p->worklist);
+ mtx_init(&p->worklist_mtx, "gvinum_worklist", NULL,
+ MTX_DEF);
+ p->flags &= ~GV_PLEX_THREAD_DIE;
+ kthread_create(gv_raid5_worker, gp, NULL, 0, 0,
+ "gv_raid5");
+ p->flags |= GV_PLEX_THREAD_ACTIVE;
+ }
+
+ /* Attach a consumer to this provider. */
+ cp = g_new_consumer(gp);
+ g_attach(cp, pp);
+ s->consumer = cp;
+
+ /* Create a provider for the outside world. */
+ pp2 = g_new_providerf(gp, "gvinum/plex/%s", p->name);
+ pp2->mediasize = p->size;
+ pp2->sectorsize = pp->sectorsize;
+ p->provider = pp2;
+ g_error_provider(pp2, 0);
+ return (gp);
+ }
+}
+
+static int
+gv_plex_destroy_geom(struct gctl_req *req, struct g_class *mp,
+ struct g_geom *gp)
+{
+ struct gv_plex *p;
+
+ g_trace(G_T_TOPOLOGY, "gv_plex_destroy_geom: %s", gp->name);
+ g_topology_assert();
+
+ p = gp->softc;
+
+ KASSERT(p != NULL, ("gv_plex_destroy_geom: null p of '%s'", gp->name));
+
+ /*
+ * If this is a RAID5 plex, check if its worker thread is still active
+ * and signal it to self destruct.
+ */
+ gv_kill_thread(p);
+ mtx_destroy(&p->worklist_mtx);
+ /* g_free(sc); */
+ g_wither_geom(gp, ENXIO);
+ return (0);
+}
+
+#define VINUMPLEX_CLASS_NAME "VINUMPLEX"
+
+static struct g_class g_vinum_plex_class = {
+ .name = VINUMPLEX_CLASS_NAME,
+ .taste = gv_plex_taste,
+ .destroy_geom = gv_plex_destroy_geom,
+};
+
+DECLARE_GEOM_CLASS(g_vinum_plex_class, g_vinum_plex);
diff --git a/sys/geom/vinum/geom_vinum_raid5.c b/sys/geom/vinum/geom_vinum_raid5.c
new file mode 100644
index 0000000..0c604fe
--- /dev/null
+++ b/sys/geom/vinum/geom_vinum_raid5.c
@@ -0,0 +1,616 @@
+/*-
+ * Copyright (c) 2004 Lukas Ertl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/conf.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/libkern.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+
+#include <geom/geom.h>
+#include <geom/vinum/geom_vinum_var.h>
+#include <geom/vinum/geom_vinum_raid5.h>
+#include <geom/vinum/geom_vinum.h>
+
+int gv_raid5_parity(struct gv_raid5_packet *);
+int gv_stripe_active(struct gv_raid5_packet *, struct gv_plex *);
+
+struct gv_raid5_bit *
+gv_new_raid5_bit(void)
+{
+ struct gv_raid5_bit *r;
+ r = g_malloc(sizeof(*r), M_NOWAIT | M_ZERO);
+ KASSERT(r != NULL, ("gv_new_raid5_bit: NULL r"));
+ return (r);
+}
+
+struct gv_raid5_packet *
+gv_new_raid5_packet(void)
+{
+ struct gv_raid5_packet *wp;
+
+ wp = g_malloc(sizeof(*wp), M_NOWAIT | M_ZERO);
+ KASSERT(wp != NULL, ("gv_new_raid5_packet: NULL wp"));
+ wp->state = SETUP;
+ wp->type = JUNK;
+ TAILQ_INIT(&wp->bits);
+
+ return (wp);
+}
+
+/*
+ * Check if the stripe that the work packet wants is already being used by
+ * some other work packet.
+ */
+int
+gv_stripe_active(struct gv_raid5_packet *wp, struct gv_plex *sc)
+{
+ struct gv_raid5_packet *wpa;
+
+ TAILQ_FOREACH(wpa, &sc->worklist, list) {
+ if (wpa->lockbase == wp->lockbase) {
+ if (wpa->bio == wp->bio)
+ return (0);
+ return (1);
+ }
+ }
+ return (0);
+}
+
+/*
+ * The "worker" thread that runs through the worklist and fires off the
+ * "subrequests" needed to fulfill a RAID5 read or write request.
+ */
+void
+gv_raid5_worker(void *arg)
+{
+ struct bio *bp;
+ struct g_geom *gp;
+ struct gv_plex *p;
+ struct gv_raid5_packet *wp, *wpt;
+ struct gv_raid5_bit *rbp, *rbpt;
+ int error, restart;
+
+ gp = arg;
+ p = gp->softc;
+
+ mtx_lock(&p->worklist_mtx);
+ for (;;) {
+ restart = 0;
+ g_trace(G_T_TOPOLOGY, "gv_raid5_worker scan");
+ TAILQ_FOREACH_SAFE(wp, &p->worklist, list, wpt) {
+ /* This request packet is already being processed. */
+ if (wp->state == IO)
+ continue;
+ /* This request packet is ready for processing. */
+ if (wp->state == VALID) {
+ /* Couldn't get the lock, try again. */
+ if ((wp->lockbase != -1) &&
+ gv_stripe_active(wp, p))
+ continue;
+
+ wp->state = IO;
+ mtx_unlock(&p->worklist_mtx);
+ TAILQ_FOREACH_SAFE(rbp, &wp->bits, list, rbpt)
+ g_io_request(rbp->bio, rbp->consumer);
+ mtx_lock(&p->worklist_mtx);
+ continue;
+ }
+ if (wp->state == FINISH) {
+ bp = wp->bio;
+ bp->bio_completed += wp->length;
+ /*
+ * Deliver the original request if we have
+ * finished.
+ */
+ if (bp->bio_completed == bp->bio_length) {
+ mtx_unlock(&p->worklist_mtx);
+ g_io_deliver(bp, 0);
+ mtx_lock(&p->worklist_mtx);
+ }
+ TAILQ_REMOVE(&p->worklist, wp, list);
+ if (wp->bufmalloc == 1)
+ g_free(wp->buf);
+ g_free(wp);
+ restart++;
+ /*break;*/
+ }
+ }
+ if (!restart) {
+ /* Self-destruct. */
+ if (p->flags & GV_PLEX_THREAD_DIE)
+ break;
+ g_trace(G_T_TOPOLOGY, "gv_raid5_worker sleep");
+ error = msleep(p, &p->worklist_mtx, PRIBIO, "-",
+ hz/100);
+ }
+ }
+ mtx_unlock(&p->worklist_mtx);
+
+ g_trace(G_T_TOPOLOGY, "gv_raid5_worker die");
+
+ /* Signal our plex that we are dead. */
+ p->flags |= GV_PLEX_THREAD_DEAD;
+ wakeup(p);
+ kthread_exit(0);
+}
+
+/* Final bio transaction to write out the parity data. */
+int
+gv_raid5_parity(struct gv_raid5_packet *wp)
+{
+ struct bio *bp;
+
+ bp = g_new_bio();
+ if (bp == NULL)
+ return (ENOMEM);
+
+ wp->type = ISPARITY;
+ bp->bio_cmd = BIO_WRITE;
+ bp->bio_data = wp->buf;
+ bp->bio_offset = wp->offset;
+ bp->bio_length = wp->length;
+ bp->bio_done = gv_raid5_done;
+ bp->bio_caller1 = wp;
+ bp->bio_caller2 = NULL;
+ g_io_request(bp, wp->parity);
+
+ return (0);
+}
+
+/* We end up here after each subrequest. */
+void
+gv_raid5_done(struct bio *bp)
+{
+ struct bio *obp;
+ struct g_geom *gp;
+ struct gv_plex *p;
+ struct gv_raid5_packet *wp;
+ struct gv_raid5_bit *rbp;
+ off_t i;
+ int error;
+
+ wp = bp->bio_caller1;
+ rbp = bp->bio_caller2;
+ obp = wp->bio;
+ gp = bp->bio_from->geom;
+ p = gp->softc;
+
+ /* One less active subrequest. */
+ wp->active--;
+
+ switch (obp->bio_cmd) {
+ case BIO_READ:
+ /* Degraded reads need to handle parity data. */
+ if (wp->type == DEGRADED) {
+ for (i = 0; i < wp->length; i++)
+ wp->buf[i] ^= bp->bio_data[i];
+
+ /* When we're finished copy back the data we want. */
+ if (wp->active == 0)
+ bcopy(wp->buf, wp->data, wp->length);
+ }
+
+ break;
+
+ case BIO_WRITE:
+ /* Handle the parity data, if needed. */
+ if ((wp->type != NOPARITY) && (wp->type != ISPARITY)) {
+ for (i = 0; i < wp->length; i++)
+ wp->buf[i] ^= bp->bio_data[i];
+
+ /* Write out the parity data we calculated. */
+ if (wp->active == 0) {
+ wp->active++;
+ error = gv_raid5_parity(wp);
+ }
+ }
+ break;
+ }
+
+ g_destroy_bio(bp);
+
+ if (rbp != NULL) {
+ if (rbp->malloc == 1)
+ g_free(rbp->buf);
+ TAILQ_REMOVE(&wp->bits, rbp, list);
+ g_free(rbp);
+ }
+
+ /* This request group is done. */
+ if (wp->active == 0)
+ wp->state = FINISH;
+}
+
+/* Build a request group to perform (part of) a RAID5 request. */
+int
+gv_build_raid5_req(struct gv_raid5_packet *wp, struct bio *bp, caddr_t addr,
+ long bcount, off_t boff)
+{
+ struct g_geom *gp;
+ struct gv_plex *p;
+ struct gv_raid5_bit *rbp;
+ struct gv_sd *broken, *original, *parity, *s;
+ int i, psdno, sdno;
+ off_t len_left, real_off, stripeend, stripeoff, stripestart;
+
+ gp = bp->bio_to->geom;
+ p = gp->softc;
+
+ if (p == NULL || LIST_EMPTY(&p->subdisks))
+ return (ENXIO);
+
+ /* We are optimistic and assume that this request will be OK. */
+ wp->type = NORMAL;
+ original = parity = broken = NULL;
+
+ /* The number of the subdisk containing the parity stripe. */
+ psdno = p->sdcount - 1 - ( boff / (p->stripesize * (p->sdcount - 1))) %
+ p->sdcount;
+ KASSERT(psdno >= 0, ("gv_build_raid5_request: psdno < 0"));
+
+ /* Offset of the start address from the start of the stripe. */
+ stripeoff = boff % (p->stripesize * (p->sdcount - 1));
+ KASSERT(stripeoff >= 0, ("gv_build_raid5_request: stripeoff < 0"));
+
+ /* The number of the subdisk where the stripe resides. */
+ sdno = stripeoff / p->stripesize;
+ KASSERT(sdno >= 0, ("gv_build_raid5_request: sdno < 0"));
+
+ /* At or past parity subdisk. */
+ if (sdno >= psdno)
+ sdno++;
+
+ /* The offset of the stripe on this subdisk. */
+ stripestart = (boff - stripeoff) / (p->sdcount - 1);
+ KASSERT(stripestart >= 0, ("gv_build_raid5_request: stripestart < 0"));
+
+ if (stripeoff >= p->stripesize)
+ stripeoff -= p->stripesize;
+
+ /* The offset of the request on this subdisk. */
+ real_off = stripestart + stripeoff;
+
+ stripeend = stripestart + p->stripesize;
+ len_left = stripeend - real_off;
+ KASSERT(len_left >= 0, ("gv_build_raid5_request: len_left < 0"));
+
+ /* Find the right subdisks. */
+ i = 0;
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (i == sdno)
+ original = s;
+ if (i == psdno)
+ parity = s;
+ if (s->state != GV_SD_UP)
+ broken = s;
+ i++;
+ }
+
+ if ((original == NULL) || (parity == NULL))
+ return (ENXIO);
+
+ /* Our data stripe is missing. */
+ if (original->state != GV_SD_UP)
+ wp->type = DEGRADED;
+ /* Our parity stripe is missing. */
+ if (parity->state != GV_SD_UP) {
+ /* We cannot take another failure if we're already degraded. */
+ if (wp->type != NORMAL)
+ return (ENXIO);
+ else
+ wp->type = NOPARITY;
+ }
+
+ /*
+ * A combined write is necessary when the original data subdisk and the
+ * parity subdisk are both up, but one of the other subdisks isn't.
+ */
+ if ((broken != NULL) && (broken != parity) && (broken != original))
+ wp->type = COMBINED;
+
+ wp->offset = real_off;
+ wp->length = (bcount <= len_left) ? bcount : len_left;
+ wp->data = addr;
+ wp->original = original->consumer;
+ wp->parity = parity->consumer;
+ wp->lockbase = stripestart;
+
+ KASSERT(wp->length >= 0, ("gv_build_raid5_request: wp->length < 0"));
+
+ switch (bp->bio_cmd) {
+ case BIO_READ:
+ /*
+ * For a degraded read we need to read in all stripes except
+ * the broken one plus the parity stripe and then recalculate
+ * the desired data.
+ */
+ if (wp->type == DEGRADED) {
+ wp->buf = g_malloc(wp->length, M_WAITOK | M_ZERO);
+ wp->bufmalloc = 1;
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ /* Skip the broken subdisk. */
+ if (s == broken)
+ continue;
+ rbp = gv_new_raid5_bit();
+ rbp->consumer = s->consumer;
+ rbp->bio = g_new_bio();
+ if (rbp->bio == NULL)
+ return (ENOMEM);
+ rbp->buf = g_malloc(wp->length,
+ M_WAITOK | M_ZERO);
+ rbp->malloc = 1;
+ rbp->bio->bio_cmd = BIO_READ;
+ rbp->bio->bio_offset = wp->offset;
+ rbp->bio->bio_length = wp->length;
+ rbp->bio->bio_data = rbp->buf;
+ rbp->bio->bio_done = gv_raid5_done;
+ rbp->bio->bio_caller1 = wp;
+ rbp->bio->bio_caller2 = rbp;
+ TAILQ_INSERT_HEAD(&wp->bits, rbp, list);
+ wp->active++;
+ wp->rqcount++;
+ }
+
+ /* A normal read can be fulfilled with the original subdisk. */
+ } else {
+ rbp = gv_new_raid5_bit();
+ rbp->consumer = wp->original;
+ rbp->bio = g_new_bio();
+ if (rbp->bio == NULL)
+ return (ENOMEM);
+ rbp->bio->bio_cmd = BIO_READ;
+ rbp->bio->bio_offset = wp->offset;
+ rbp->bio->bio_length = wp->length;
+ rbp->buf = addr;
+ rbp->bio->bio_data = rbp->buf;
+ rbp->bio->bio_done = gv_raid5_done;
+ rbp->bio->bio_caller1 = wp;
+ rbp->bio->bio_caller2 = rbp;
+ TAILQ_INSERT_HEAD(&wp->bits, rbp, list);
+ wp->active++;
+ wp->rqcount++;
+ }
+ if (wp->type != COMBINED)
+ wp->lockbase = -1;
+ break;
+
+ case BIO_WRITE:
+ /*
+ * A degraded write means we cannot write to the original data
+ * subdisk. Thus we need to read in all valid stripes,
+ * recalculate the parity from the original data, and then
+ * write the parity stripe back out.
+ */
+ if (wp->type == DEGRADED) {
+ wp->buf = g_malloc(wp->length, M_WAITOK | M_ZERO);
+ wp->bufmalloc = 1;
+
+ /* Copy the original data. */
+ bcopy(wp->data, wp->buf, wp->length);
+
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ /* Skip the broken and the parity subdisk. */
+ if ((s == broken) ||
+ (s->consumer == wp->parity))
+ continue;
+
+ rbp = gv_new_raid5_bit();
+ rbp->consumer = s->consumer;
+ rbp->bio = g_new_bio();
+ if (rbp->bio == NULL)
+ return (ENOMEM);
+ rbp->buf = g_malloc(wp->length,
+ M_WAITOK | M_ZERO);
+ rbp->malloc = 1;
+ rbp->bio->bio_cmd = BIO_READ;
+ rbp->bio->bio_data = rbp->buf;
+ rbp->bio->bio_offset = wp->offset;
+ rbp->bio->bio_length = wp->length;
+ rbp->bio->bio_done = gv_raid5_done;
+ rbp->bio->bio_caller1 = wp;
+ rbp->bio->bio_caller2 = rbp;
+ TAILQ_INSERT_HEAD(&wp->bits, rbp, list);
+ wp->active++;
+ wp->rqcount++;
+ }
+
+ /*
+ * When we don't have the parity stripe we just write out the
+ * data.
+ */
+ } else if (wp->type == NOPARITY) {
+ rbp = gv_new_raid5_bit();
+ rbp->consumer = wp->original;
+ rbp->bio = g_new_bio();
+ if (rbp->bio == NULL)
+ return (ENOMEM);
+ rbp->bio->bio_cmd = BIO_WRITE;
+ rbp->bio->bio_offset = wp->offset;
+ rbp->bio->bio_length = wp->length;
+ rbp->bio->bio_data = addr;
+ rbp->bio->bio_done = gv_raid5_done;
+ rbp->bio->bio_caller1 = wp;
+ rbp->bio->bio_caller2 = rbp;
+ TAILQ_INSERT_HEAD(&wp->bits, rbp, list);
+ wp->active++;
+ wp->rqcount++;
+
+ /*
+ * A combined write means that our data subdisk and the parity
+ * subdisks are both up, but another subdisk isn't. We need to
+ * read all valid stripes including the parity to recalculate
+ * the data of the stripe that is missing. Then we write our
+ * original data, and together with the other data stripes
+ * recalculate the parity again.
+ */
+ } else if (wp->type == COMBINED) {
+ wp->buf = g_malloc(wp->length, M_WAITOK | M_ZERO);
+ wp->bufmalloc = 1;
+
+ /* Get the data from all subdisks. */
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ /* Skip the broken subdisk. */
+ if (s == broken)
+ continue;
+
+ rbp = gv_new_raid5_bit();
+ rbp->consumer = s->consumer;
+ rbp->bio = g_new_bio();
+ if (rbp->bio == NULL)
+ return (ENOMEM);
+ rbp->bio->bio_cmd = BIO_READ;
+ rbp->buf = g_malloc(wp->length,
+ M_WAITOK | M_ZERO);
+ rbp->malloc = 1;
+ rbp->bio->bio_data = rbp->buf;
+ rbp->bio->bio_offset = wp->offset;
+ rbp->bio->bio_length = wp->length;
+ rbp->bio->bio_done = gv_raid5_done;
+ rbp->bio->bio_caller1 = wp;
+ rbp->bio->bio_caller2 = rbp;
+ TAILQ_INSERT_HEAD(&wp->bits, rbp, list);
+ wp->active++;
+ wp->rqcount++;
+ }
+
+ /* Write the original data. */
+ rbp = gv_new_raid5_bit();
+ rbp->consumer = wp->original;
+ rbp->buf = addr;
+ rbp->bio = g_new_bio();
+ if (rbp->bio == NULL)
+ return (ENOMEM);
+ rbp->bio->bio_cmd = BIO_WRITE;
+ rbp->bio->bio_data = rbp->buf;
+ rbp->bio->bio_offset = wp->offset;
+ rbp->bio->bio_length = wp->length;
+ rbp->bio->bio_done = gv_raid5_done;
+ rbp->bio->bio_caller1 = wp;
+ rbp->bio->bio_caller2 = rbp;
+ /*
+ * Insert at the tail, because we want to read the old
+ * data first.
+ */
+ TAILQ_INSERT_TAIL(&wp->bits, rbp, list);
+ wp->active++;
+ wp->rqcount++;
+
+ /* Get the rest of the data again. */
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ /*
+ * Skip the broken subdisk, the parity, and the
+ * one we just wrote.
+ */
+ if ((s == broken) ||
+ (s->consumer == wp->parity) ||
+ (s->consumer == wp->original))
+ continue;
+ rbp = gv_new_raid5_bit();
+ rbp->consumer = s->consumer;
+ rbp->bio = g_new_bio();
+ if (rbp->bio == NULL)
+ return (ENOMEM);
+ rbp->bio->bio_cmd = BIO_READ;
+ rbp->buf = g_malloc(wp->length,
+ M_WAITOK | M_ZERO);
+ rbp->malloc = 1;
+ rbp->bio->bio_data = rbp->buf;
+ rbp->bio->bio_offset = wp->offset;
+ rbp->bio->bio_length = wp->length;
+ rbp->bio->bio_done = gv_raid5_done;
+ rbp->bio->bio_caller1 = wp;
+ rbp->bio->bio_caller2 = rbp;
+ /*
+ * Again, insert at the tail to keep correct
+ * order.
+ */
+ TAILQ_INSERT_TAIL(&wp->bits, rbp, list);
+ wp->active++;
+ wp->rqcount++;
+ }
+
+
+ /*
+ * A normal write request goes to the original subdisk, then we
+ * read in all other stripes, recalculate the parity and write
+ * out the parity again.
+ */
+ } else {
+ wp->buf = g_malloc(wp->length, M_WAITOK | M_ZERO);
+ wp->bufmalloc = 1;
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ /* Skip the parity stripe. */
+ if (s->consumer == wp->parity)
+ continue;
+
+ rbp = gv_new_raid5_bit();
+ rbp->consumer = s->consumer;
+ rbp->bio = g_new_bio();
+ if (rbp->bio == NULL)
+ return (ENOMEM);
+ /*
+ * The data for the original stripe is written,
+ * the others need to be read in for the parity
+ * calculation.
+ */
+ if (s->consumer == wp->original) {
+ rbp->bio->bio_cmd = BIO_WRITE;
+ rbp->buf = addr;
+ } else {
+ rbp->bio->bio_cmd = BIO_READ;
+ rbp->buf = g_malloc(wp->length,
+ M_WAITOK | M_ZERO);
+ rbp->malloc = 1;
+ }
+ rbp->bio->bio_data = rbp->buf;
+ rbp->bio->bio_offset = wp->offset;
+ rbp->bio->bio_length = wp->length;
+ rbp->bio->bio_done = gv_raid5_done;
+ rbp->bio->bio_caller1 = wp;
+ rbp->bio->bio_caller2 = rbp;
+ TAILQ_INSERT_HEAD(&wp->bits, rbp, list);
+ wp->active++;
+ wp->rqcount++;
+ }
+ }
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ wp->state = VALID;
+ return (0);
+}
diff --git a/sys/geom/vinum/geom_vinum_raid5.h b/sys/geom/vinum/geom_vinum_raid5.h
new file mode 100644
index 0000000..c43cb10
--- /dev/null
+++ b/sys/geom/vinum/geom_vinum_raid5.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2004 Lukas Ertl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _GEOM_VINUM_RAID5_H_
+#define _GEOM_VINUM_RAID5_H_
+
+/*
+ * A single RAID5 request usually needs more than one I/O transaction,
+ * depending on the state of the associated subdisks and the direction of the
+ * transaction (read or write). Every subrequest of a RAID5 request,
+ * represented by a gv_raid_packet, is defined by a gv_raid5_bit.
+ */
+
+/* A subrequest of a RAID5 read/write operation. */
+struct gv_raid5_bit {
+ struct bio *bio; /* BIO of this subrequest. */
+ caddr_t buf; /* Data buffer of this subrequest. */
+ int malloc; /* Flag if data buffer was malloced. */
+ struct g_consumer *consumer; /* Consumer to send the BIO to. */
+ TAILQ_ENTRY(gv_raid5_bit) list; /* Entry in the list of this request. */
+};
+
+/* Container for one or more gv_raid5_bits; represents a RAID5 I/O request. */
+struct gv_raid5_packet {
+ caddr_t buf; /* Data buffer of this RAID5 request. */
+ off_t length; /* Size of data buffer. */
+ off_t lockbase; /* Deny access to our plex offset. */
+ off_t offset; /* The drive offset of the subdisk. */
+ int bufmalloc; /* Flag if data buffer was malloced. */
+ int active; /* Count of active subrequests. */
+ int rqcount; /* Count of subrequests. */
+
+ struct bio *bio; /* Pointer to the original bio. */
+ caddr_t data; /* Pointer to the original data. */
+
+ struct g_consumer *original; /* Consumer to the data stripe. */
+ struct g_consumer *parity; /* Consumer to the parity stripe. */
+
+ /* State of this RAID5 packet. */
+ enum {
+ SETUP, /* Newly created. */
+ VALID, /* Ready for processing. */
+ IO, /* Currently doing I/O. */
+ FINISH /* Packet has finished. */
+ } state;
+
+ /* Type of this RAID5 transaction. */
+ enum {
+ JUNK, /* Newly created, not valid. */
+ NORMAL, /* Normal read or write. */
+ ISPARITY, /* Containing only parity data. */
+ NOPARITY, /* Parity stripe not available. */
+ DEGRADED, /* Data stripe not available. */
+ COMBINED /* Data and parity stripes ok, others not. */
+ } type;
+
+ TAILQ_HEAD(,gv_raid5_bit) bits; /* List of subrequests. */
+ TAILQ_ENTRY(gv_raid5_packet) list; /* Entry in plex's packet list. */
+};
+
+int gv_build_raid5_req(struct gv_raid5_packet *, struct bio *, caddr_t,
+ long, off_t);
+void gv_raid5_done(struct bio *);
+void gv_raid5_worker(void *);
+struct gv_raid5_packet *gv_new_raid5_packet(void);
+struct gv_raid5_bit *gv_new_raid5_bit(void);
+
+#endif /* !_GEOM_VINUM_RAID5_H_ */
diff --git a/sys/geom/vinum/geom_vinum_rm.c b/sys/geom/vinum/geom_vinum_rm.c
new file mode 100644
index 0000000..181a954
--- /dev/null
+++ b/sys/geom/vinum/geom_vinum_rm.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2004 Lukas Ertl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/libkern.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <geom/geom.h>
+#include <geom/vinum/geom_vinum_var.h>
+#include <geom/vinum/geom_vinum.h>
+#include <geom/vinum/geom_vinum_share.h>
+
+static void gv_cleanup_pp(void *, int);
+static void gv_free_sd(struct gv_sd *);
+static int gv_rm_plex(struct gv_softc *, struct gctl_req *,
+ struct gv_plex *, int);
+static int gv_rm_sd(struct gv_softc *, struct gctl_req *, struct gv_sd *,
+ int);
+static int gv_rm_vol(struct gv_softc *, struct gctl_req *,
+ struct gv_volume *, int);
+
+/* General 'remove' routine. */
+void
+gv_remove(struct g_geom *gp, struct gctl_req *req)
+{
+ struct gv_softc *sc;
+ struct gv_volume *v;
+ struct gv_plex *p;
+ struct gv_sd *s;
+ int *argc, *flags;
+ char *argv, buf[20];
+ int i, type, err;
+
+ argc = gctl_get_paraml(req, "argc", sizeof(*argc));
+ flags = gctl_get_paraml(req, "flags", sizeof(*flags));
+
+ if (argc == NULL || *argc == 0) {
+ gctl_error(req, "no arguments given");
+ return;
+ }
+
+ sc = gp->softc;
+
+ for (i = 0; i < *argc; i++) {
+ snprintf(buf, sizeof(buf), "argv%d", i);
+ argv = gctl_get_param(req, buf, NULL);
+ if (argv == NULL)
+ continue;
+ type = gv_object_type(sc, argv);
+ switch (type) {
+ case GV_TYPE_VOL:
+ v = gv_find_vol(sc, argv);
+ if (v == NULL) {
+ gctl_error(req, "unknown volume '%s'", argv);
+ return;
+ }
+ err = gv_rm_vol(sc, req, v, *flags);
+ if (err)
+ return;
+ break;
+ case GV_TYPE_PLEX:
+ p = gv_find_plex(sc, argv);
+ if (p == NULL) {
+ gctl_error(req, "unknown plex '%s'", argv);
+ return;
+ }
+ err = gv_rm_plex(sc, req, p, *flags);
+ if (err)
+ return;
+ break;
+ case GV_TYPE_SD:
+ s = gv_find_sd(sc, argv);
+ if (s == NULL) {
+ gctl_error(req, "unknown subdisk '%s'", argv);
+ return;
+ }
+ err = gv_rm_sd(sc, req, s, *flags);
+ if (err)
+ return;
+ break;
+ default:
+ gctl_error(req, "unknown object '%s'", argv);
+ return;
+ }
+ }
+
+ gv_save_config_all(sc);
+}
+
+/* Remove a volume. */
+static int
+gv_rm_vol(struct gv_softc *sc, struct gctl_req *req, struct gv_volume *v, int flags)
+{
+ struct g_geom *gp;
+ struct gv_plex *p, *p2;
+ int err;
+
+ g_topology_assert();
+ KASSERT(v != NULL, ("gv_rm_vol: NULL v"));
+
+ /* If this volume has plexes, we want a recursive removal. */
+ if (!LIST_EMPTY(&v->plexes) && !(flags & GV_FLAG_R)) {
+ gctl_error(req, "volume '%s' has attached plexes", v->name);
+ return (-1);
+ }
+
+ gp = v->geom;
+
+ /* Check if any of our consumers is open. */
+ if (gp != NULL && gv_is_open(gp)) {
+ gctl_error(req, "volume '%s' is busy", v->name);
+ return (-1);
+ }
+
+ /* Remove the plexes our volume has. */
+ LIST_FOREACH_SAFE(p, &v->plexes, in_volume, p2) {
+ v->plexcount--;
+ LIST_REMOVE(p, in_volume);
+ p->vol_sc = NULL;
+
+ err = gv_rm_plex(sc, req, p, flags);
+ if (err)
+ return (err);
+ }
+
+ /* Clean up and let our geom fade away. */
+ LIST_REMOVE(v, volume);
+ g_free(v);
+ if (gp != NULL) {
+ gp->softc = NULL;
+ g_wither_geom(gp, ENXIO);
+ }
+
+ return (0);
+}
+
+/* Remove a plex. */
+static int
+gv_rm_plex(struct gv_softc *sc, struct gctl_req *req, struct gv_plex *p, int flags)
+{
+ struct g_geom *gp;
+ struct gv_sd *s, *s2;
+ int err;
+
+ g_topology_assert();
+
+ KASSERT(p != NULL, ("gv_rm_plex: NULL p"));
+
+ /* If this plex has subdisks, we want a recursive removal. */
+ if (!LIST_EMPTY(&p->subdisks) && !(flags & GV_FLAG_R)) {
+ gctl_error(req, "plex '%s' has attached subdisks", p->name);
+ return (-1);
+ }
+
+ if (p->vol_sc != NULL && p->vol_sc->plexcount == 1) {
+ gctl_error(req, "plex '%s' is still attached to volume '%s'",
+ p->name, p->volume);
+ return (-1);
+ }
+
+ gp = p->geom;
+
+ /* Check if any of our consumers is open. */
+ if (gp != NULL && gv_is_open(gp)) {
+ gctl_error(req, "plex '%s' is busy", p->name);
+ return (-1);
+ }
+
+ /* Remove the subdisks our plex has. */
+ LIST_FOREACH_SAFE(s, &p->subdisks, in_plex, s2) {
+ p->sdcount--;
+#if 0
+ LIST_REMOVE(s, in_plex);
+ s->plex_sc = NULL;
+#endif
+
+ err = gv_rm_sd(sc, req, s, flags);
+ if (err)
+ return (err);
+ }
+
+ /* Clean up and let our geom fade away. */
+ LIST_REMOVE(p, plex);
+ if (p->vol_sc != NULL) {
+ p->vol_sc->plexcount--;
+ LIST_REMOVE(p, in_volume);
+ p->vol_sc = NULL;
+ }
+
+ gv_kill_thread(p);
+ g_free(p);
+
+ if (gp != NULL) {
+ gp->softc = NULL;
+ g_wither_geom(gp, ENXIO);
+ }
+
+ return (0);
+}
+
+/* Remove a subdisk. */
+static int
+gv_rm_sd(struct gv_softc *sc, struct gctl_req *req, struct gv_sd *s, int flags)
+{
+ struct gv_drive *d;
+ struct g_geom *gp;
+ struct g_provider *pp;
+
+ KASSERT(s != NULL, ("gv_rm_sd: NULL s"));
+ d = s->drive_sc;
+ KASSERT(d != NULL, ("gv_rm_sd: NULL d"));
+ gp = d->geom;
+ KASSERT(gp != NULL, ("gv_rm_sd: NULL gp"));
+
+ pp = s->provider;
+
+ /* Clean up. */
+ LIST_REMOVE(s, in_plex);
+ LIST_REMOVE(s, from_drive);
+ LIST_REMOVE(s, sd);
+ gv_free_sd(s);
+ g_free(s);
+
+ /* If the subdisk has a provider we need to clean up this one too. */
+ if (pp != NULL) {
+ g_orphan_provider(pp, ENXIO);
+ if (LIST_EMPTY(&pp->consumers))
+ g_destroy_provider(pp);
+ else
+ /* Schedule this left-over provider for destruction. */
+ g_post_event(gv_cleanup_pp, pp, M_WAITOK, pp, NULL);
+ }
+
+ return (0);
+}
+
+/*
+ * This function is called from the event queue to clean up left-over subdisk
+ * providers.
+ */
+static void
+gv_cleanup_pp(void *arg, int flag)
+{
+ struct g_provider *pp;
+
+ g_topology_assert();
+
+ if (flag == EV_CANCEL)
+ return;
+
+ pp = arg;
+ if (pp == NULL) {
+ printf("gv_cleanup_pp: provider has gone\n");
+ return;
+ }
+
+ if (!LIST_EMPTY(&pp->consumers)) {
+ printf("gv_cleanup_pp: provider still not empty\n");
+ return;
+ }
+
+ g_destroy_provider(pp);
+}
+
+static void
+gv_free_sd(struct gv_sd *s)
+{
+ struct gv_drive *d;
+ struct gv_freelist *fl, *fl2;
+
+ KASSERT(s != NULL, ("gv_free_sd: NULL s"));
+ d = s->drive_sc;
+ KASSERT(d != NULL, ("gv_free_sd: NULL d"));
+
+ /*
+ * First, find the free slot that's immediately before or after this
+ * subdisk.
+ */
+ fl = NULL;
+ LIST_FOREACH(fl, &d->freelist, freelist) {
+ if (fl->offset == s->drive_offset + s->size)
+ break;
+ if (fl->offset + fl->size == s->drive_offset)
+ break;
+ }
+
+ /* If there is no free slot behind this subdisk, so create one. */
+ if (fl == NULL) {
+
+ fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
+ fl->size = s->size;
+ fl->offset = s->drive_offset;
+
+ if (d->freelist_entries == 0) {
+ LIST_INSERT_HEAD(&d->freelist, fl, freelist);
+ } else {
+ LIST_FOREACH(fl2, &d->freelist, freelist) {
+ if (fl->offset < fl2->offset) {
+ LIST_INSERT_BEFORE(fl2, fl, freelist);
+ break;
+ } else if (LIST_NEXT(fl2, freelist) == NULL) {
+ LIST_INSERT_AFTER(fl2, fl, freelist);
+ break;
+ }
+ }
+ }
+
+ d->freelist_entries++;
+
+ /* Expand the free slot we just found. */
+ } else {
+ fl->size += s->size;
+ if (fl->offset > s->drive_offset)
+ fl->offset = s->drive_offset;
+ }
+
+ d->avail += s->size;
+}
diff --git a/sys/geom/vinum/geom_vinum_share.c b/sys/geom/vinum/geom_vinum_share.c
new file mode 100644
index 0000000..2c6530e
--- /dev/null
+++ b/sys/geom/vinum/geom_vinum_share.c
@@ -0,0 +1,651 @@
+/*-
+ * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 1997, 1998, 1999
+ * Nan Yang Computer Services Limited. All rights reserved.
+ *
+ * Parts written by Greg Lehey
+ *
+ * This software is distributed under the so-called ``Berkeley
+ * License'':
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Nan Yang Computer
+ * Services Limited.
+ * 4. Neither the name of the Company nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * This software is provided ``as is'', and any express or implied
+ * warranties, including, but not limited to, the implied warranties of
+ * merchantability and fitness for a particular purpose are disclaimed.
+ * In no event shall the company or contributors be liable for any
+ * direct, indirect, incidental, special, exemplary, or consequential
+ * damages (including, but not limited to, procurement of substitute
+ * goods or services; loss of use, data, or profits; or business
+ * interruption) however caused and on any theory of liability, whether
+ * in contract, strict liability, or tort (including negligence or
+ * otherwise) arising in any way out of the use of this software, even if
+ * advised of the possibility of such damage.
+ *
+ */
+
+/* This file is shared between kernel and userland. */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#ifdef _KERNEL
+#include <sys/bio.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+
+#include <geom/geom.h>
+#define iswhite(c) (((c) == ' ') || ((c) == '\t'))
+#else
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define iswhite isspace
+#define g_free free
+#endif /* _KERNEL */
+
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+
+#include <geom/vinum/geom_vinum_var.h>
+#include <geom/vinum/geom_vinum_share.h>
+
+/*
+ * Take a blank separated list of tokens and turn it into a list of
+ * individual nul-delimited strings. Build a list of pointers at
+ * token, which must have enough space for the tokens. Return the
+ * number of tokens, or -1 on error (typically a missing string
+ * delimiter).
+ */
+int
+gv_tokenize(char *cptr, char *token[], int maxtoken)
+{
+ int tokennr; /* Index of this token. */
+ char delim; /* Delimiter for searching for the partner. */
+
+ for (tokennr = 0; tokennr < maxtoken;) {
+
+ /* Skip leading white space. */
+ while (iswhite(*cptr))
+ cptr++;
+
+ /* End of line. */
+ if ((*cptr == '\0') || (*cptr == '\n') || (*cptr == '#'))
+ return tokennr;
+
+ delim = *cptr;
+ token[tokennr] = cptr; /* Point to it. */
+ tokennr++; /* One more. */
+
+ /* Run off the end? */
+ if (tokennr == maxtoken)
+ return tokennr;
+
+ /* Quoted? */
+ if ((delim == '\'') || (delim == '"')) {
+ for (;;) {
+ cptr++;
+
+ /* Found the partner. */
+ if ((*cptr == delim) && (cptr[-1] != '\\')) {
+ cptr++;
+
+ /* Space after closing quote needed. */
+ if (!iswhite(*cptr))
+ return -1;
+
+ /* Delimit. */
+ *cptr++ = '\0';
+
+ /* End-of-line? */
+ } else if ((*cptr == '\0') || (*cptr == '\n'))
+ return -1;
+ }
+
+ /* Not quoted. */
+ } else {
+ while ((*cptr != '\0') &&
+ (!iswhite(*cptr)) &&
+ (*cptr != '\n'))
+ cptr++;
+
+ /* Not end-of-line; delimit and move to the next. */
+ if (*cptr != '\0')
+ *cptr++ = '\0';
+ }
+ }
+
+ /* Can't get here. */
+ return maxtoken;
+}
+
+
+/*
+ * Take a number with an optional scale factor and convert it to a number of
+ * bytes.
+ *
+ * The scale factors are:
+ *
+ * s sectors (of 512 bytes)
+ * b blocks (of 512 bytes). This unit is deprecated, because it's
+ * confusing, but maintained to avoid confusing Veritas users.
+ * k kilobytes (1024 bytes)
+ * m megabytes (of 1024 * 1024 bytes)
+ * g gigabytes (of 1024 * 1024 * 1024 bytes)
+ *
+ * XXX: need a way to signal error
+ */
+off_t
+gv_sizespec(char *spec)
+{
+ uint64_t size;
+ char *s;
+ int sign;
+
+ size = 0;
+ sign = 1;
+ if (spec != NULL) { /* we have a parameter */
+ s = spec;
+ if (*s == '-') { /* negative, */
+ sign = -1;
+ s++; /* skip */
+ }
+
+ /* It's numeric. */
+ if ((*s >= '0') && (*s <= '9')) {
+
+ /* It's numeric. */
+ while ((*s >= '0') && (*s <= '9'))
+ /* Convert it. */
+ size = size * 10 + *s++ - '0';
+
+ switch (*s) {
+ case '\0':
+ return size * sign;
+
+ case 'B':
+ case 'b':
+ case 'S':
+ case 's':
+ return size * sign * 512;
+
+ case 'K':
+ case 'k':
+ return size * sign * 1024;
+
+ case 'M':
+ case 'm':
+ return size * sign * 1024 * 1024;
+
+ case 'G':
+ case 'g':
+ return size * sign * 1024 * 1024 * 1024;
+ }
+ }
+ }
+
+ return (0);
+}
+
+const char *
+gv_drivestate(int state)
+{
+ switch (state) {
+ case GV_DRIVE_DOWN:
+ return "down";
+ case GV_DRIVE_UP:
+ return "up";
+ default:
+ return "??";
+ }
+}
+
+int
+gv_drivestatei(char *buf)
+{
+ if (!strcmp(buf, "up"))
+ return (GV_DRIVE_UP);
+ else
+ return (GV_DRIVE_DOWN);
+}
+
+/* Translate from a string to a subdisk state. */
+int
+gv_sdstatei(char *buf)
+{
+ if (!strcmp(buf, "up"))
+ return (GV_SD_UP);
+ else if (!strcmp(buf, "reviving"))
+ return (GV_SD_REVIVING);
+ else if (!strcmp(buf, "stale"))
+ return (GV_SD_STALE);
+ else
+ return (GV_SD_DOWN);
+}
+
+/* Translate from a subdisk state to a string. */
+const char *
+gv_sdstate(int state)
+{
+ switch (state) {
+ case GV_SD_INITIALIZING:
+ return "initializing";
+ case GV_SD_STALE:
+ return "stale";
+ case GV_SD_DOWN:
+ return "down";
+ case GV_SD_REVIVING:
+ return "reviving";
+ case GV_SD_UP:
+ return "up";
+ default:
+ return "??";
+ }
+}
+
+/* Translate from a string to a plex state. */
+int
+gv_plexstatei(char *buf)
+{
+ if (!strcmp(buf, "up"))
+ return (GV_PLEX_UP);
+ else if (!strcmp(buf, "initializing"))
+ return (GV_PLEX_INITIALIZING);
+ else if (!strcmp(buf, "degraded"))
+ return (GV_PLEX_DEGRADED);
+ else
+ return (GV_PLEX_DOWN);
+}
+
+/* Translate from a plex state to a string. */
+const char *
+gv_plexstate(int state)
+{
+ switch (state) {
+ case GV_PLEX_DOWN:
+ return "down";
+ case GV_PLEX_INITIALIZING:
+ return "initializing";
+ case GV_PLEX_DEGRADED:
+ return "degraded";
+ case GV_PLEX_UP:
+ return "up";
+ default:
+ return "??";
+ }
+}
+
+/* Translate from a string to a plex organization. */
+int
+gv_plexorgi(char *buf)
+{
+ if (!strcmp(buf, "concat"))
+ return (GV_PLEX_CONCAT);
+ else if (!strcmp(buf, "striped"))
+ return (GV_PLEX_STRIPED);
+ else if (!strcmp(buf, "raid5"))
+ return (GV_PLEX_RAID5);
+ else
+ return (GV_PLEX_DISORG);
+}
+
+int
+gv_volstatei(char *buf)
+{
+ if (!strcmp(buf, "up"))
+ return (GV_VOL_UP);
+ else
+ return (GV_VOL_DOWN);
+}
+
+const char *
+gv_volstate(int state)
+{
+ switch (state) {
+ case GV_VOL_UP:
+ return "up";
+ case GV_VOL_DOWN:
+ return "down";
+ default:
+ return "??";
+ }
+}
+
+/* Translate from a plex organization to a string. */
+const char *
+gv_plexorg(int org)
+{
+ switch (org) {
+ case GV_PLEX_DISORG:
+ return "??";
+ case GV_PLEX_CONCAT:
+ return "concat";
+ case GV_PLEX_STRIPED:
+ return "striped";
+ case GV_PLEX_RAID5:
+ return "raid5";
+ default:
+ return "??";
+ }
+}
+
+const char *
+gv_plexorg_short(int org)
+{
+ switch (org) {
+ case GV_PLEX_DISORG:
+ return "??";
+ case GV_PLEX_CONCAT:
+ return "C";
+ case GV_PLEX_STRIPED:
+ return "S";
+ case GV_PLEX_RAID5:
+ return "R5";
+ default:
+ return "??";
+ }
+}
+
+/* Get a new drive object. */
+struct gv_drive *
+gv_new_drive(int max, char *token[])
+{
+ struct gv_drive *d;
+ int j, errors;
+ char *ptr;
+
+ if (token[1] == NULL || *token[1] == '\0')
+ return (NULL);
+
+#ifdef _KERNEL
+ d = g_malloc(sizeof(struct gv_drive), M_WAITOK | M_ZERO);
+
+#else
+ d = malloc(sizeof(struct gv_drive));
+ if (d == NULL)
+ return (NULL);
+ bzero(d, sizeof(struct gv_drive));
+#endif
+
+ errors = 0;
+ for (j = 1; j < max; j++) {
+ if (!strcmp(token[j], "state")) {
+ j++;
+ if (j >= max) {
+ errors++;
+ break;
+ }
+ d->state = gv_drivestatei(token[j]);
+ } else if (!strcmp(token[j], "device")) {
+ j++;
+ if (j >= max) {
+ errors++;
+ break;
+ }
+ ptr = token[j] + strlen(token[j]);
+ while (ptr != token[j] && *ptr != '/')
+ ptr--;
+ ptr++;
+ strncpy(d->device, ptr, GV_MAXDRIVENAME);
+ } else {
+ /* We assume this is the drive name. */
+ strncpy(d->name, token[j], GV_MAXDRIVENAME);
+ }
+ }
+
+ if (strlen(d->name) == 0 || strlen(d->device) == 0)
+ errors++;
+
+ if (errors) {
+ g_free(d);
+ return (NULL);
+ }
+
+ return (d);
+}
+
+/* Get a new volume object. */
+struct gv_volume *
+gv_new_volume(int max, char *token[])
+{
+ struct gv_volume *v;
+ int j, errors;
+
+ if (token[1] == NULL || *token[1] == '\0')
+ return (NULL);
+
+#ifdef _KERNEL
+ v = g_malloc(sizeof(struct gv_volume), M_WAITOK | M_ZERO);
+
+#else
+ v = malloc(sizeof(struct gv_volume));
+ if (v == NULL)
+ return (NULL);
+ bzero(v, sizeof(struct gv_volume));
+#endif
+
+ errors = 0;
+ for (j = 1; j < max; j++) {
+ if (!strcmp(token[j], "state")) {
+ j++;
+ if (j >= max) {
+ errors++;
+ break;
+ }
+ v->state = gv_volstatei(token[j]);
+ } else {
+ /* We assume this is the volume name. */
+ strncpy(v->name, token[j], GV_MAXVOLNAME);
+ }
+ }
+
+ if (strlen(v->name) == 0)
+ errors++;
+
+ if (errors) {
+ g_free(v);
+ return (NULL);
+ }
+
+ return (v);
+}
+
+/* Get a new plex object. */
+struct gv_plex *
+gv_new_plex(int max, char *token[])
+{
+ struct gv_plex *p;
+ int j, errors;
+
+ if (token[1] == NULL || *token[1] == '\0')
+ return (NULL);
+
+#ifdef _KERNEL
+ p = g_malloc(sizeof(struct gv_plex), M_WAITOK | M_ZERO);
+#else
+ p = malloc(sizeof(struct gv_plex));
+ if (p == NULL)
+ return (NULL);
+ bzero(p, sizeof(struct gv_plex));
+#endif
+
+ errors = 0;
+ for (j = 1; j < max; j++) {
+ if (!strcmp(token[j], "name")) {
+ j++;
+ if (j >= max) {
+ errors++;
+ break;
+ }
+ strncpy(p->name, token[j], GV_MAXPLEXNAME);
+ } else if (!strcmp(token[j], "org")) {
+ j++;
+ if (j >= max) {
+ errors++;
+ break;
+ }
+ p->org = gv_plexorgi(token[j]);
+ if ((p->org == GV_PLEX_RAID5) ||
+ (p->org == GV_PLEX_STRIPED)) {
+ j++;
+ if (j >= max) {
+ errors++;
+ break;
+ }
+ p->stripesize = gv_sizespec(token[j]);
+ if (p->stripesize == 0) {
+ errors++;
+ break;
+ }
+ }
+ } else if (!strcmp(token[j], "state")) {
+ j++;
+ if (j >= max) {
+ errors++;
+ break;
+ }
+ p->state = gv_plexstatei(token[j]);
+ } else if (!strcmp(token[j], "vol")) {
+ j++;
+ if (j >= max) {
+ errors++;
+ break;
+ }
+ strncpy(p->volume, token[j], GV_MAXVOLNAME);
+ } else {
+ errors++;
+ break;
+ }
+ }
+
+ if (errors) {
+ g_free(p);
+ return (NULL);
+ }
+
+ return (p);
+}
+
+/* Get a new subdisk object. */
+struct gv_sd *
+gv_new_sd(int max, char *token[])
+{
+ struct gv_sd *s;
+ int j, errors;
+
+ if (token[1] == NULL || *token[1] == '\0')
+ return NULL;
+
+#ifdef _KERNEL
+ s = g_malloc(sizeof(struct gv_sd), M_WAITOK | M_ZERO);
+#else
+ s = malloc(sizeof(struct gv_sd));
+ if (s == NULL)
+ return NULL;
+ bzero(s, sizeof(struct gv_sd));
+#endif
+
+ s->plex_offset = -1;
+ s->size = -1;
+ s->drive_offset = -1;
+ errors = 0;
+ for (j = 1; j < max; j++) {
+ if (!strcmp(token[j], "name")) {
+ j++;
+ if (j >= max) {
+ errors++;
+ break;
+ }
+ strncpy(s->name, token[j], GV_MAXSDNAME);
+ } else if (!strcmp(token[j], "drive")) {
+ j++;
+ if (j >= max) {
+ errors++;
+ break;
+ }
+ strncpy(s->drive, token[j], GV_MAXDRIVENAME);
+ } else if (!strcmp(token[j], "plex")) {
+ j++;
+ if (j >= max) {
+ errors++;
+ break;
+ }
+ strncpy(s->plex, token[j], GV_MAXPLEXNAME);
+ } else if (!strcmp(token[j], "state")) {
+ j++;
+ if (j >= max) {
+ errors++;
+ break;
+ }
+ s->state = gv_sdstatei(token[j]);
+ } else if (!strcmp(token[j], "len") ||
+ !strcmp(token[j], "length")) {
+ j++;
+ if (j >= max) {
+ errors++;
+ break;
+ }
+ s->size = gv_sizespec(token[j]);
+ if (s->size <= 0) {
+ errors++;
+ break;
+ }
+ } else if (!strcmp(token[j], "driveoffset")) {
+ j++;
+ if (j >= max) {
+ errors++;
+ break;
+ }
+ s->drive_offset = gv_sizespec(token[j]);
+ if (s->drive_offset != 0 &&
+ s->drive_offset < GV_DATA_START) {
+ errors++;
+ break;
+ }
+ } else if (!strcmp(token[j], "plexoffset")) {
+ j++;
+ if (j >= max) {
+ errors++;
+ break;
+ }
+ s->plex_offset = gv_sizespec(token[j]);
+ if (s->plex_offset < 0) {
+ errors++;
+ break;
+ }
+ } else {
+ errors++;
+ break;
+ }
+ }
+
+ if (strlen(s->drive) == 0)
+ errors++;
+
+ if (errors) {
+ g_free(s);
+ return (NULL);
+ }
+
+ return (s);
+}
diff --git a/sys/geom/vinum/geom_vinum_share.h b/sys/geom/vinum/geom_vinum_share.h
new file mode 100644
index 0000000..177e971
--- /dev/null
+++ b/sys/geom/vinum/geom_vinum_share.h
@@ -0,0 +1,62 @@
+/*-
+ * Copyright (c) 2004 Lukas Ertl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _GEOM_VINUM_SHARE_H_
+#define _GEOM_VINUM_SHARE_H_
+
+/* Maximum number of arguments for a single command. */
+#define GV_MAXARGS 64
+
+enum {
+ KILOBYTE = 1024,
+ MEGABYTE = 1048576,
+ GIGABYTE = 1073741824
+};
+
+off_t gv_sizespec(char *);
+int gv_tokenize(char *, char **, int);
+
+struct gv_drive *gv_new_drive(int, char **);
+struct gv_plex *gv_new_plex(int, char **);
+struct gv_sd *gv_new_sd(int, char **);
+struct gv_volume *gv_new_volume(int, char **);
+
+int gv_drivestatei(char *);
+int gv_plexorgi(char *);
+int gv_plexstatei(char *);
+int gv_sdstatei(char *);
+int gv_volstatei(char *);
+
+const char *gv_drivestate(int);
+const char *gv_plexorg(int);
+const char *gv_plexorg_short(int);
+const char *gv_plexstate(int);
+const char *gv_sdstate(int);
+const char *gv_volstate(int);
+
+#endif /* _GEOM_VINUM_SHARE_H_ */
diff --git a/sys/geom/vinum/geom_vinum_state.c b/sys/geom/vinum/geom_vinum_state.c
new file mode 100644
index 0000000..fe8a88e
--- /dev/null
+++ b/sys/geom/vinum/geom_vinum_state.c
@@ -0,0 +1,289 @@
+/*-
+ * Copyright (c) 2004 Lukas Ertl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/libkern.h>
+#include <sys/malloc.h>
+
+#include <geom/geom.h>
+#include <geom/vinum/geom_vinum_var.h>
+#include <geom/vinum/geom_vinum.h>
+#include <geom/vinum/geom_vinum_share.h>
+
+/* Update drive state; return 1 if the state changes, otherwise 0. */
+int
+gv_set_drive_state(struct gv_drive *d, int newstate, int flags)
+{
+ struct gv_sd *s;
+ int oldstate;
+
+ KASSERT(d != NULL, ("gv_set_drive_state: NULL d"));
+
+ oldstate = d->state;
+
+ if (newstate == oldstate)
+ return (1);
+
+ /* We allow to take down an open drive only with force. */
+ if ((newstate == GV_DRIVE_DOWN) && gv_is_open(d->geom) &&
+ (!(flags & GV_SETSTATE_FORCE)))
+ return (0);
+
+ d->state = newstate;
+
+ if (d->state != oldstate) {
+ LIST_FOREACH(s, &d->subdisks, from_drive)
+ gv_update_sd_state(s);
+ }
+
+ return (1);
+}
+
+int
+gv_set_sd_state(struct gv_sd *s, int newstate, int flags)
+{
+ struct gv_drive *d;
+ struct gv_plex *p;
+ int oldstate, status;
+
+ KASSERT(s != NULL, ("gv_set_sd_state: NULL s"));
+
+ oldstate = s->state;
+
+ /* We are optimistic and assume it will work. */
+ status = 0;
+
+ if (newstate == oldstate)
+ return (0);
+
+ switch (newstate) {
+ case GV_SD_DOWN:
+ /*
+ * If we're attached to a plex, we won't go down without use of
+ * force.
+ */
+ if ((s->plex_sc != NULL) && !(flags & GV_SETSTATE_FORCE))
+ return (-1);
+ break;
+
+ case GV_SD_UP:
+ /* We can't bring the subdisk up if our drive is dead. */
+ d = s->drive_sc;
+ if ((d == NULL) || (d->state != GV_DRIVE_UP))
+ return (-1);
+
+ /* Check from where we want to be brought up. */
+ switch (s->state) {
+ case GV_SD_REVIVING:
+ case GV_SD_INITIALIZING:
+ /*
+ * The subdisk was initializing. We allow it to be
+ * brought up.
+ */
+ break;
+
+ case GV_SD_DOWN:
+ /*
+ * The subdisk is currently down. We allow it to be
+ * brought up if it is not attached to a plex.
+ */
+ p = s->plex_sc;
+ if (p == NULL)
+ break;
+
+ /*
+ * If this subdisk is attached to a plex, we allow it
+ * to be brought up if the plex if it's not a RAID5
+ * plex, otherwise it's made 'stale'.
+ */
+
+ if (p->org != GV_PLEX_RAID5)
+ break;
+ else
+ s->state = GV_SD_STALE;
+
+ status = -1;
+ break;
+
+ case GV_SD_STALE:
+ /*
+ * A stale subdisk can't be brought up directly, it
+ * needs to be revived or initialized first.
+ */
+ /* FALLTHROUGH */
+ default:
+ return (-1);
+ }
+ break;
+
+ /* Other state transitions are only possible with force. */
+ default:
+ if (!(flags & GV_SETSTATE_FORCE))
+ return (-1);
+ }
+
+ /* We can change the state and do it. */
+ if (status == 0)
+ s->state = newstate;
+
+ /* Update our plex, if we're attached to one. */
+ if (s->plex_sc != NULL)
+ gv_update_plex_state(s->plex_sc);
+
+ /* Save the config back to disk. */
+ if (flags & GV_SETSTATE_CONFIG)
+ gv_save_config_all(s->vinumconf);
+
+ return (status);
+}
+
+
+/* Update the state of a subdisk based on its environment. */
+void
+gv_update_sd_state(struct gv_sd *s)
+{
+ struct gv_drive *d;
+
+ KASSERT(s != NULL, ("gv_update_sd_state: NULL s"));
+ d = s->drive_sc;
+ KASSERT(d != NULL, ("gv_update_sd_state: NULL d"));
+
+ /* If our drive isn't up we cannot be up either. */
+ if (d->state != GV_DRIVE_UP)
+ s->state = GV_SD_DOWN;
+ /* If this subdisk was just created, we assume it is good.*/
+ else if (s->flags & GV_SD_NEWBORN) {
+ s->state = GV_SD_UP;
+ s->flags &= ~GV_SD_NEWBORN;
+ } else if (s->state != GV_SD_UP)
+ s->state = GV_SD_STALE;
+ else
+ s->state = GV_SD_UP;
+
+ printf("FOO: sd %s is %s\n", s->name, gv_sdstate(s->state));
+ /* Update the plex, if we have one. */
+ if (s->plex_sc != NULL)
+ gv_update_plex_state(s->plex_sc);
+}
+
+/* Update the state of a plex based on its environment. */
+void
+gv_update_plex_state(struct gv_plex *p)
+{
+ int sdstates;
+
+ KASSERT(p != NULL, ("gv_update_plex_state: NULL p"));
+
+ /* First, check the state of our subdisks. */
+ sdstates = gv_sdstatemap(p);
+
+ /* If all subdisks are up, our plex can be up, too. */
+ if (sdstates == GV_SD_UPSTATE)
+ p->state = GV_PLEX_UP;
+
+ /* One or more of our subdisks are down. */
+ else if (sdstates & GV_SD_DOWNSTATE) {
+ /* A RAID5 plex can handle one dead subdisk. */
+ if ((p->org == GV_PLEX_RAID5) && (p->sddown == 1))
+ p->state = GV_PLEX_DEGRADED;
+ else
+ p->state = GV_PLEX_DOWN;
+
+ /* Some of our subdisks are initializing. */
+ } else if (sdstates & GV_SD_INITSTATE) {
+ if (p->flags & GV_PLEX_SYNCING)
+ p->state = GV_PLEX_DEGRADED;
+ else
+ p->state = GV_PLEX_DOWN;
+ } else
+ p->state = GV_PLEX_DOWN;
+
+ printf("FOO: plex %s is %s\n", p->name, gv_plexstate(p->state));
+ /* Update our volume, if we have one. */
+ if (p->vol_sc != NULL)
+ gv_update_vol_state(p->vol_sc);
+}
+
+/* Update the volume state based on its plexes. */
+void
+gv_update_vol_state(struct gv_volume *v)
+{
+ struct gv_plex *p;
+
+ KASSERT(v != NULL, ("gv_update_vol_state: NULL v"));
+
+ LIST_FOREACH(p, &v->plexes, in_volume) {
+ /* One of our plexes is accessible, and so are we. */
+ if (p->state > GV_PLEX_DEGRADED) {
+ v->state = GV_VOL_UP;
+ return;
+ }
+ }
+
+ /* Not one of our plexes is up, so we can't be either. */
+ v->state = GV_VOL_DOWN;
+}
+
+/* Return a state map for the subdisks of a plex. */
+int
+gv_sdstatemap(struct gv_plex *p)
+{
+ struct gv_sd *s;
+ int statemap;
+
+ KASSERT(p != NULL, ("gv_sdstatemap: NULL p"));
+
+ statemap = 0;
+ p->sddown = 0; /* No subdisks down yet. */
+
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ switch (s->state) {
+ case GV_SD_DOWN:
+ case GV_SD_STALE:
+ statemap |= GV_SD_DOWNSTATE;
+ p->sddown++; /* Another unusable subdisk. */
+ break;
+
+ case GV_SD_UP:
+ statemap |= GV_SD_UPSTATE;
+ break;
+
+ case GV_SD_INITIALIZING:
+ statemap |= GV_SD_INITSTATE;
+ break;
+
+ case GV_SD_REVIVING:
+ statemap |= GV_SD_INITSTATE;
+ p->sddown++; /* XXX: Another unusable subdisk? */
+ break;
+ }
+ }
+ return (statemap);
+}
diff --git a/sys/geom/vinum/geom_vinum_subr.c b/sys/geom/vinum/geom_vinum_subr.c
new file mode 100644
index 0000000..55cf583
--- /dev/null
+++ b/sys/geom/vinum/geom_vinum_subr.c
@@ -0,0 +1,804 @@
+/*-
+ * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 1997, 1998, 1999
+ * Nan Yang Computer Services Limited. All rights reserved.
+ *
+ * Parts written by Greg Lehey
+ *
+ * This software is distributed under the so-called ``Berkeley
+ * License'':
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Nan Yang Computer
+ * Services Limited.
+ * 4. Neither the name of the Company nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * This software is provided ``as is'', and any express or implied
+ * warranties, including, but not limited to, the implied warranties of
+ * merchantability and fitness for a particular purpose are disclaimed.
+ * In no event shall the company or contributors be liable for any
+ * direct, indirect, incidental, special, exemplary, or consequential
+ * damages (including, but not limited to, procurement of substitute
+ * goods or services; loss of use, data, or profits; or business
+ * interruption) however caused and on any theory of liability, whether
+ * in contract, strict liability, or tort (including negligence or
+ * otherwise) arising in any way out of the use of this software, even if
+ * advised of the possibility of such damage.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/libkern.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+
+#include <geom/geom.h>
+#include <geom/geom_int.h>
+#include <geom/vinum/geom_vinum_var.h>
+#include <geom/vinum/geom_vinum.h>
+#include <geom/vinum/geom_vinum_share.h>
+
+/* Find the VINUM class and it's associated geom. */
+struct g_geom *
+find_vinum_geom(void)
+{
+ struct g_class *mp;
+ struct g_geom *gp;
+
+ g_topology_assert();
+
+ gp = NULL;
+
+ LIST_FOREACH(mp, &g_classes, class) {
+ if (!strcmp(mp->name, "VINUM")) {
+ gp = LIST_FIRST(&mp->geom);
+ break;
+ }
+ }
+
+ return (gp);
+}
+
+/*
+ * Parse the vinum config provided in *buf and store it in *gp's softc.
+ * If parameter 'merge' is non-zero, then the given config is merged into
+ * *gp.
+ */
+void
+gv_parse_config(struct gv_softc *sc, u_char *buf, int merge)
+{
+ char *aptr, *bptr, *cptr;
+ struct gv_volume *v, *v2;
+ struct gv_plex *p, *p2;
+ struct gv_sd *s, *s2;
+ int tokens;
+ char *token[GV_MAXARGS];
+
+ g_topology_assert();
+
+ KASSERT(sc != NULL, ("gv_parse_config: NULL softc"));
+
+ /* Until the end of the string *buf. */
+ for (aptr = buf; *aptr != '\0'; aptr = bptr) {
+ bptr = aptr;
+ cptr = aptr;
+
+ /* Seperate input lines. */
+ while (*bptr != '\n')
+ bptr++;
+ *bptr = '\0';
+ bptr++;
+
+ tokens = gv_tokenize(cptr, token, GV_MAXARGS);
+
+ if (tokens > 0) {
+ if (!strcmp(token[0], "volume")) {
+ v = gv_new_volume(tokens, token);
+ if (v == NULL) {
+ printf("geom_vinum: failed volume\n");
+ break;
+ }
+
+ if (merge) {
+ v2 = gv_find_vol(sc, v->name);
+ if (v2 != NULL) {
+ g_free(v);
+ continue;
+ }
+ }
+
+ v->vinumconf = sc;
+ LIST_INIT(&v->plexes);
+ LIST_INSERT_HEAD(&sc->volumes, v, volume);
+
+ } else if (!strcmp(token[0], "plex")) {
+ p = gv_new_plex(tokens, token);
+ if (p == NULL) {
+ printf("geom_vinum: failed plex\n");
+ break;
+ }
+
+ if (merge) {
+ p2 = gv_find_plex(sc, p->name);
+ if (p2 != NULL) {
+ g_free(p);
+ continue;
+ }
+ }
+
+ p->vinumconf = sc;
+ LIST_INIT(&p->subdisks);
+ LIST_INSERT_HEAD(&sc->plexes, p, plex);
+
+ } else if (!strcmp(token[0], "sd")) {
+ s = gv_new_sd(tokens, token);
+
+ if (s == NULL) {
+ printf("geom_vinum: failed subdisk\n");
+ break;
+ }
+
+ if (merge) {
+ s2 = gv_find_sd(sc, s->name);
+ if (s2 != NULL) {
+ g_free(s);
+ continue;
+ }
+ }
+
+ s->vinumconf = sc;
+ LIST_INSERT_HEAD(&sc->subdisks, s, sd);
+ }
+ }
+ }
+}
+
+/*
+ * Format the vinum configuration properly. If ondisk is non-zero then the
+ * configuration is intended to be written to disk later.
+ */
+void
+gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
+{
+ struct gv_drive *d;
+ struct gv_sd *s;
+ struct gv_plex *p;
+ struct gv_volume *v;
+
+ g_topology_assert();
+
+ /*
+ * We don't need the drive configuration if we're not writing the
+ * config to disk.
+ */
+ if (!ondisk) {
+ LIST_FOREACH(d, &sc->drives, drive) {
+ sbuf_printf(sb, "%sdrive %s device %s\n", prefix,
+ d->name, d->device);
+ }
+ }
+
+ LIST_FOREACH(v, &sc->volumes, volume) {
+ if (!ondisk)
+ sbuf_printf(sb, "%s", prefix);
+ sbuf_printf(sb, "volume %s", v->name);
+ if (ondisk)
+ sbuf_printf(sb, " state %s", gv_volstate(v->state));
+ sbuf_printf(sb, "\n");
+ }
+
+ LIST_FOREACH(p, &sc->plexes, plex) {
+ if (!ondisk)
+ sbuf_printf(sb, "%s", prefix);
+ sbuf_printf(sb, "plex name %s org %s ", p->name,
+ gv_plexorg(p->org));
+ if (gv_is_striped(p))
+ sbuf_printf(sb, "%ds ", p->stripesize / 512);
+ if (p->vol_sc != NULL)
+ sbuf_printf(sb, "vol %s", p->volume);
+ if (ondisk)
+ sbuf_printf(sb, " state %s", gv_plexstate(p->state));
+ sbuf_printf(sb, "\n");
+ }
+
+ LIST_FOREACH(s, &sc->subdisks, sd) {
+ if (!ondisk)
+ sbuf_printf(sb, "%s", prefix);
+ sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset "
+ "%jds", s->name, s->drive, s->size / 512,
+ s->drive_offset / 512);
+ if (s->plex_sc != NULL) {
+ sbuf_printf(sb, " plex %s plexoffset %jds", s->plex,
+ s->plex_offset / 512);
+ }
+ if (ondisk)
+ sbuf_printf(sb, " state %s", gv_sdstate(s->state));
+ sbuf_printf(sb, "\n");
+ }
+
+ return;
+}
+
+/*
+ * Take a size in bytes and return a pointer to a string which represents the
+ * size best. If lj is != 0, return left justified, otherwise in a fixed 10
+ * character field suitable for columnar printing.
+ *
+ * Note this uses a static string: it's only intended to be used immediately
+ * for printing.
+ */
+const char *
+gv_roughlength(off_t bytes, int lj)
+{
+ static char desc[16];
+
+ /* Gigabytes. */
+ if (bytes > (off_t)MEGABYTE * 10000)
+ snprintf(desc, sizeof(desc), lj ? "%jd GB" : "%10jd GB",
+ bytes / GIGABYTE);
+
+ /* Megabytes. */
+ else if (bytes > KILOBYTE * 10000)
+ snprintf(desc, sizeof(desc), lj ? "%jd MB" : "%10jd MB",
+ bytes / MEGABYTE);
+
+ /* Kilobytes. */
+ else if (bytes > 10000)
+ snprintf(desc, sizeof(desc), lj ? "%jd kB" : "%10jd kB",
+ bytes / KILOBYTE);
+
+ /* Bytes. */
+ else
+ snprintf(desc, sizeof(desc), lj ? "%jd B" : "%10jd B", bytes);
+
+ return (desc);
+}
+
+int
+gv_sd_to_plex(struct gv_plex *p, struct gv_sd *s, int check)
+{
+ struct gv_sd *s2;
+
+ g_topology_assert();
+
+ /* If this subdisk was already given to this plex, do nothing. */
+ if (s->plex_sc == p)
+ return (0);
+
+ /* Find the correct plex offset for this subdisk, if needed. */
+ if (s->plex_offset == -1) {
+ if (p->sdcount) {
+ LIST_FOREACH(s2, &p->subdisks, in_plex) {
+ if (gv_is_striped(p))
+ s->plex_offset = p->sdcount *
+ p->stripesize;
+ else
+ s->plex_offset = s2->plex_offset +
+ s2->size;
+ }
+ } else
+ s->plex_offset = 0;
+ }
+
+ p->sdcount++;
+
+ /* Adjust the size of our plex. */
+ switch (p->org) {
+ case GV_PLEX_CONCAT:
+ case GV_PLEX_STRIPED:
+ p->size += s->size;
+ break;
+
+ case GV_PLEX_RAID5:
+ p->size = (p->sdcount - 1) * s->size;
+ break;
+
+ default:
+ break;
+ }
+
+ /* There are no subdisks for this plex yet, just insert it. */
+ if (LIST_EMPTY(&p->subdisks)) {
+ LIST_INSERT_HEAD(&p->subdisks, s, in_plex);
+
+ /* Insert in correct order, depending on plex_offset. */
+ } else {
+ LIST_FOREACH(s2, &p->subdisks, in_plex) {
+ if (s->plex_offset < s2->plex_offset) {
+ LIST_INSERT_BEFORE(s2, s, in_plex);
+ break;
+ } else if (LIST_NEXT(s2, in_plex) == NULL) {
+ LIST_INSERT_AFTER(s2, s, in_plex);
+ break;
+ }
+ }
+ }
+
+ s->plex_sc = p;
+
+ return (0);
+}
+
+void
+gv_update_plex_config(struct gv_plex *p)
+{
+ struct gv_sd *s, *s2;
+ off_t remainder;
+ int required_sds, state;
+
+ KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
+
+ /* This is what we want the plex to be. */
+ state = GV_PLEX_UP;
+
+ /* The plex was added to an already running volume. */
+ if (p->flags & GV_PLEX_ADDED)
+ state = GV_PLEX_DOWN;
+
+ switch (p->org) {
+ case GV_PLEX_STRIPED:
+ required_sds = 2;
+ break;
+ case GV_PLEX_RAID5:
+ required_sds = 3;
+ break;
+ case GV_PLEX_CONCAT:
+ default:
+ required_sds = 0;
+ break;
+ }
+
+ if (required_sds) {
+ if (p->sdcount < required_sds) {
+ state = GV_PLEX_DOWN;
+ }
+
+ /*
+ * The subdisks in striped plexes must all have the same size.
+ */
+ s = LIST_FIRST(&p->subdisks);
+ LIST_FOREACH(s2, &p->subdisks, in_plex) {
+ if (s->size != s2->size) {
+ printf("geom_vinum: subdisk size mismatch "
+ "%s (%jd) <> %s (%jd)\n", s->name, s->size,
+ s2->name, s2->size);
+ state = GV_PLEX_DOWN;
+ }
+ }
+
+ /* Trim subdisk sizes so that they match the stripe size. */
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ remainder = s->size % p->stripesize;
+ if (remainder) {
+ printf("gvinum: size of sd %s is not a "
+ "multiple of plex stripesize, taking off "
+ "%jd bytes\n", s->name,
+ (intmax_t)remainder);
+ gv_adjust_freespace(s, remainder);
+ }
+ }
+ }
+
+ /* Adjust the size of our plex. */
+ if (p->sdcount > 0) {
+ p->size = 0;
+ switch (p->org) {
+ case GV_PLEX_CONCAT:
+ LIST_FOREACH(s, &p->subdisks, in_plex)
+ p->size += s->size;
+ break;
+
+ case GV_PLEX_STRIPED:
+ s = LIST_FIRST(&p->subdisks);
+ p->size = p->sdcount * s->size;
+ break;
+
+ case GV_PLEX_RAID5:
+ s = LIST_FIRST(&p->subdisks);
+ p->size = (p->sdcount - 1) * s->size;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (p->sdcount == 0)
+ state = GV_PLEX_DOWN;
+ else if ((p->flags & GV_PLEX_ADDED) || (p->org == GV_PLEX_RAID5)) {
+ LIST_FOREACH(s, &p->subdisks, in_plex)
+ s->state = GV_SD_STALE;
+ p->flags &= ~GV_PLEX_ADDED;
+ p->state = GV_PLEX_DOWN;
+ }
+}
+
+/*
+ * Give a subdisk to a drive, check and adjust several parameters, adjust
+ * freelist.
+ */
+int
+gv_sd_to_drive(struct gv_softc *sc, struct gv_drive *d, struct gv_sd *s,
+ char *errstr, int errlen)
+{
+ struct gv_sd *s2;
+ struct gv_freelist *fl, *fl2;
+ off_t tmp;
+ int i;
+
+ g_topology_assert();
+
+ fl2 = NULL;
+
+ KASSERT(sc != NULL, ("gv_sd_to_drive: NULL softc"));
+ KASSERT(d != NULL, ("gv_sd_to_drive: NULL drive"));
+ KASSERT(s != NULL, ("gv_sd_to_drive: NULL subdisk"));
+ KASSERT(errstr != NULL, ("gv_sd_to_drive: NULL errstr"));
+ KASSERT(errlen >= ERRBUFSIZ, ("gv_sd_to_drive: short errlen", errlen));
+
+ /* Check if this subdisk was already given to this drive. */
+ if (s->drive_sc == d)
+ return (0);
+
+ /* Preliminary checks. */
+ if (s->size > d->avail || d->freelist_entries == 0) {
+ snprintf(errstr, errlen, "not enough space on '%s' for '%s'",
+ d->name, s->name);
+ return (-1);
+ }
+
+ /* No size given, autosize it. */
+ if (s->size == -1) {
+ /* Find the largest available slot. */
+ LIST_FOREACH(fl, &d->freelist, freelist) {
+ if (fl->size > s->size) {
+ s->size = fl->size;
+ s->drive_offset = fl->offset;
+ fl2 = fl;
+ }
+ }
+
+ /* No good slot found? */
+ if (s->size == -1) {
+ snprintf(errstr, errlen, "couldn't autosize '%s' on "
+ "'%s'", s->name, d->name);
+ return (-1);
+ }
+
+ /*
+ * Check if we have a free slot that's large enough for the given size.
+ */
+ } else {
+ i = 0;
+ LIST_FOREACH(fl, &d->freelist, freelist) {
+ /* Yes, this subdisk fits. */
+ if (fl->size >= s->size) {
+ i++;
+ /* Override drive_offset, if given. */
+ s->drive_offset = fl->offset;
+ fl2 = fl;
+ break;
+ }
+ }
+
+ /* Couldn't find a good free slot. */
+ if (i == 0) {
+ snprintf(errstr, errlen, "free slots to small for '%s' "
+ "on '%s'", s->name, d->name);
+ return (-1);
+ }
+ }
+
+ /* No drive offset given, try to calculate it. */
+ if (s->drive_offset == -1) {
+
+ /* Add offsets and sizes from other subdisks on this drive. */
+ LIST_FOREACH(s2, &d->subdisks, from_drive) {
+ s->drive_offset = s2->drive_offset + s2->size;
+ }
+
+ /*
+ * If there are no other subdisks yet, then set the default
+ * offset to GV_DATA_START.
+ */
+ if (s->drive_offset == 0)
+ s->drive_offset = GV_DATA_START;
+
+ /* Check if we have a free slot at the given drive offset. */
+ } else {
+ i = 0;
+ LIST_FOREACH(fl, &d->freelist, freelist) {
+ /* Yes, this subdisk fits. */
+ if ((fl->offset <= s->drive_offset) &&
+ (fl->offset + fl->size >=
+ s->drive_offset + s->size)) {
+ i++;
+ fl2 = fl;
+ break;
+ }
+ }
+
+ /* Couldn't find a good free slot. */
+ if (i == 0) {
+ snprintf(errstr, errlen, "given drive_offset for '%s' "
+ "won't fit on '%s'", s->name, d->name);
+ return (-1);
+ }
+ }
+
+ /*
+ * Now that all parameters are checked and set up, we can give the
+ * subdisk to the drive and adjust the freelist.
+ */
+
+ /* First, adjust the freelist. */
+ LIST_FOREACH(fl, &d->freelist, freelist) {
+
+ /* This is the free slot that we have found before. */
+ if (fl == fl2) {
+
+ /*
+ * The subdisk starts at the beginning of the free
+ * slot.
+ */
+ if (fl->offset == s->drive_offset) {
+ fl->offset += s->size;
+ fl->size -= s->size;
+
+ /*
+ * The subdisk uses the whole slot, so remove
+ * it.
+ */
+ if (fl->size == 0) {
+ d->freelist_entries--;
+ LIST_REMOVE(fl, freelist);
+ }
+ /*
+ * The subdisk does not start at the beginning of the
+ * free slot.
+ */
+ } else {
+ tmp = fl->offset + fl->size;
+ fl->size = s->drive_offset - fl->offset;
+
+ /*
+ * The subdisk didn't use the complete rest of
+ * the free slot, so we need to split it.
+ */
+ if (s->drive_offset + s->size != tmp) {
+ fl2 = g_malloc(sizeof(*fl2),
+ M_WAITOK | M_ZERO);
+ fl2->offset = s->drive_offset + s->size;
+ fl2->size = tmp - fl2->offset;
+ LIST_INSERT_AFTER(fl, fl2, freelist);
+ d->freelist_entries++;
+ }
+ }
+ break;
+ }
+ }
+
+ /*
+ * This is the first subdisk on this drive, just insert it into the
+ * list.
+ */
+ if (LIST_EMPTY(&d->subdisks)) {
+ LIST_INSERT_HEAD(&d->subdisks, s, from_drive);
+
+ /* There are other subdisks, so insert this one in correct order. */
+ } else {
+ LIST_FOREACH(s2, &d->subdisks, from_drive) {
+ if (s->drive_offset < s2->drive_offset) {
+ LIST_INSERT_BEFORE(s2, s, from_drive);
+ break;
+ } else if (LIST_NEXT(s2, from_drive) == NULL) {
+ LIST_INSERT_AFTER(s2, s, from_drive);
+ break;
+ }
+ }
+ }
+
+ d->sdcount++;
+ d->avail -= s->size;
+
+ /* Link back from the subdisk to this drive. */
+ s->drive_sc = d;
+
+ return (0);
+}
+
+void
+gv_adjust_freespace(struct gv_sd *s, off_t remainder)
+{
+ struct gv_drive *d;
+ struct gv_freelist *fl, *fl2;
+
+ KASSERT(s != NULL, ("gv_adjust_freespace: NULL s"));
+ d = s->drive_sc;
+ KASSERT(d != NULL, ("gv_adjust_freespace: NULL d"));
+
+ /* First, find the free slot that's immediately after this subdisk. */
+ fl = NULL;
+ LIST_FOREACH(fl, &d->freelist, freelist) {
+ if (fl->offset == s->drive_offset + s->size)
+ break;
+ }
+
+ /* If there is no free slot behind this subdisk, so create one. */
+ if (fl == NULL) {
+
+ fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
+ fl->size = remainder;
+ fl->offset = s->drive_offset + s->size - remainder;
+
+ if (d->freelist_entries == 0) {
+ LIST_INSERT_HEAD(&d->freelist, fl, freelist);
+ } else {
+ LIST_FOREACH(fl2, &d->freelist, freelist) {
+ if (fl->offset < fl2->offset) {
+ LIST_INSERT_BEFORE(fl2, fl, freelist);
+ break;
+ } else if (LIST_NEXT(fl2, freelist) == NULL) {
+ LIST_INSERT_AFTER(fl2, fl, freelist);
+ break;
+ }
+ }
+ }
+
+ d->freelist_entries++;
+
+ /* Expand the free slot we just found. */
+ } else {
+ fl->offset -= remainder;
+ fl->size += remainder;
+ }
+
+ s->size -= remainder;
+ d->avail += remainder;
+}
+
+/* Check if the given plex is a striped one. */
+int
+gv_is_striped(struct gv_plex *p)
+{
+ KASSERT(p != NULL, ("gv_is_striped: NULL p"));
+ switch(p->org) {
+ case GV_PLEX_STRIPED:
+ case GV_PLEX_RAID5:
+ return (1);
+ default:
+ return (0);
+ }
+}
+
+/* Find a volume by name. */
+struct gv_volume *
+gv_find_vol(struct gv_softc *sc, char *name)
+{
+ struct gv_volume *v;
+
+ LIST_FOREACH(v, &sc->volumes, volume) {
+ if (!strncmp(v->name, name, GV_MAXVOLNAME))
+ return (v);
+ }
+
+ return (NULL);
+}
+
+/* Find a plex by name. */
+struct gv_plex *
+gv_find_plex(struct gv_softc *sc, char *name)
+{
+ struct gv_plex *p;
+
+ LIST_FOREACH(p, &sc->plexes, plex) {
+ if (!strncmp(p->name, name, GV_MAXPLEXNAME))
+ return (p);
+ }
+
+ return (NULL);
+}
+
+/* Find a subdisk by name. */
+struct gv_sd *
+gv_find_sd(struct gv_softc *sc, char *name)
+{
+ struct gv_sd *s;
+
+ LIST_FOREACH(s, &sc->subdisks, sd) {
+ if (!strncmp(s->name, name, GV_MAXSDNAME))
+ return (s);
+ }
+
+ return (NULL);
+}
+
+/* Find a drive by name. */
+struct gv_drive *
+gv_find_drive(struct gv_softc *sc, char *name)
+{
+ struct gv_drive *d;
+
+ LIST_FOREACH(d, &sc->drives, drive) {
+ if (!strncmp(d->name, name, GV_MAXDRIVENAME))
+ return (d);
+ }
+
+ return (NULL);
+}
+
+/* Check if any consumer of the given geom is open. */
+int
+gv_is_open(struct g_geom *gp)
+{
+ struct g_consumer *cp;
+
+ KASSERT(gp != NULL, ("gv_is_open: NULL gp"));
+
+ LIST_FOREACH(cp, &gp->consumer, consumer) {
+ if (cp->acr || cp->acw || cp->ace)
+ return (1);
+ }
+
+ return (0);
+}
+
+/* Return the type of object identified by string 'name'. */
+int
+gv_object_type(struct gv_softc *sc, char *name)
+{
+ struct gv_drive *d;
+ struct gv_plex *p;
+ struct gv_sd *s;
+ struct gv_volume *v;
+
+ LIST_FOREACH(v, &sc->volumes, volume) {
+ if (!strncmp(v->name, name, GV_MAXVOLNAME))
+ return (GV_TYPE_VOL);
+ }
+
+ LIST_FOREACH(p, &sc->plexes, plex) {
+ if (!strncmp(p->name, name, GV_MAXPLEXNAME))
+ return (GV_TYPE_PLEX);
+ }
+
+ LIST_FOREACH(s, &sc->subdisks, sd) {
+ if (!strncmp(s->name, name, GV_MAXSDNAME))
+ return (GV_TYPE_SD);
+ }
+
+ LIST_FOREACH(d, &sc->drives, drive) {
+ if (!strncmp(d->name, name, GV_MAXDRIVENAME))
+ return (GV_TYPE_DRIVE);
+ }
+
+ return (-1);
+}
+
+void
+gv_kill_thread(struct gv_plex *p)
+{
+ if ((p->org == GV_PLEX_RAID5) && (p->flags & GV_PLEX_THREAD_ACTIVE)) {
+ p->flags |= GV_PLEX_THREAD_DIE;
+ wakeup(p);
+ while (!(p->flags & GV_PLEX_THREAD_DEAD))
+ tsleep(p, PRIBIO, "gv_die", hz);
+ p->flags &= ~GV_PLEX_THREAD_ACTIVE;
+ }
+}
diff --git a/sys/geom/vinum/geom_vinum_var.h b/sys/geom/vinum/geom_vinum_var.h
new file mode 100644
index 0000000..4c38923
--- /dev/null
+++ b/sys/geom/vinum/geom_vinum_var.h
@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 1997, 1998, 1999
+ * Nan Yang Computer Services Limited. All rights reserved.
+ *
+ * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
+ * Parts written by Greg Lehey.
+ *
+ * This software is distributed under the so-called ``Berkeley
+ * License'': *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Nan Yang Computer
+ * Services Limited.
+ * 4. Neither the name of the Company nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * This software is provided ``as is'', and any express or implied
+ * warranties, including, but not limited to, the implied warranties of
+ * merchantability and fitness for a particular purpose are disclaimed.
+ * In no event shall the company or contributors be liable for any * direct, indirect, incidental, special, exemplary, or consequential
+ * damages (including, but not limited to, procurement of substitute
+ * goods or services; loss of use, data, or profits; or business
+ * interruption) however caused and on any theory of liability, whether
+ * in contract, strict liability, or tort (including negligence or
+ * otherwise) arising in any way out of the use of this software, even if
+ * advised of the possibility of such damage.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _GEOM_VINUM_VAR_H_
+#define _GEOM_VINUM_VAR_H_
+
+/*
+ * Slice header
+ *
+ * Vinum drives start with this structure:
+ *
+ *\ Sector
+ * |--------------------------------------|
+ * | PDP-11 memorial boot block | 0
+ * |--------------------------------------|
+ * | Disk label, maybe | 1
+ * |--------------------------------------|
+ * | Slice definition (vinum_hdr) | 8
+ * |--------------------------------------|
+ * | |
+ * | Configuration info, first copy | 9
+ * | |
+ * |--------------------------------------|
+ * | |
+ * | Configuration info, second copy | 9 + size of config
+ * | |
+ * |--------------------------------------|
+ */
+
+/* Sizes and offsets of our information. */
+#define GV_HDR_OFFSET 4096 /* Offset of vinum header. */
+#define GV_HDR_LEN 512 /* Size of vinum header. */
+#define GV_CFG_OFFSET 4608 /* Offset of first config copy. */
+#define GV_CFG_LEN 65536 /* Size of config copy. */
+
+/* This is where the actual data starts. */
+#define GV_DATA_START (GV_CFG_LEN * 2 + GV_CFG_OFFSET)
+/* #define GV_DATA_START (GV_CFG_LEN * 2 + GV_HDR_LEN) */
+
+#define GV_MAXDRIVENAME 32 /* Maximum length of a device name. */
+#define GV_MAXSDNAME 64 /* Maximum length of a subdisk name. */
+#define GV_MAXPLEXNAME 64 /* Maximum length of a plex name. */
+#define GV_MAXVOLNAME 64 /* Maximum length of a volume name. */
+
+/* Command line flags. */
+#define GV_FLAG_R 0x01
+#define GV_FLAG_S 0x02
+#define GV_FLAG_V 0x04
+#define GV_FLAG_VV 0x08
+#define GV_FLAG_F 0x10
+
+/* Object types. */
+#define GV_TYPE_VOL 1
+#define GV_TYPE_PLEX 2
+#define GV_TYPE_SD 3
+#define GV_TYPE_DRIVE 4
+
+/* State changing flags. */
+#define GV_SETSTATE_FORCE 0x1
+#define GV_SETSTATE_CONFIG 0x2
+
+/* Subdisk state bitmaps for plexes. */
+#define GV_SD_DOWNSTATE 0x01 /* Subdisk is down. */
+#define GV_SD_STALESTATE 0x02 /* Subdisk is stale. */
+#define GV_SD_INITSTATE 0x04 /* Subdisk is initializing. */
+#define GV_SD_UPSTATE 0x08 /* Subdisk is up. */
+
+/* Synchronization/initialization request sizes. */
+#define GV_MIN_SYNCSIZE 512
+#define GV_MAX_SYNCSIZE MAXPHYS
+#define GV_DFLT_SYNCSIZE 65536
+
+/*
+ * hostname is 256 bytes long, but we don't need to shlep multiple copies in
+ * vinum. We use the host name just to identify this system, and 32 bytes
+ * should be ample for that purpose.
+ */
+
+#define GV_HOSTNAME_LEN 32
+struct gv_label {
+ char sysname[GV_HOSTNAME_LEN]; /* System name at creation time. */
+ char name[GV_MAXDRIVENAME]; /* Our name of the drive. */
+ struct timeval date_of_birth; /* The time it was created ... */
+ struct timeval last_update; /* ... and the time of last update. */
+ off_t drive_size; /* Total size incl. headers. */
+};
+
+/* The 'header' of each valid vinum drive. */
+struct gv_hdr {
+ uint64_t magic;
+#define GV_MAGIC 22322600044678729LL
+#define GV_NOMAGIC 22322600044678990LL
+
+ int config_length;
+ struct gv_label label;
+};
+
+/* A single freelist entry of a drive. */
+struct gv_freelist {
+ off_t size; /* Size of this free slot. */
+ off_t offset; /* Offset on the drive. */
+ LIST_ENTRY(gv_freelist) freelist;
+};
+
+/* This struct contains the main vinum config. */
+struct gv_softc {
+ /*struct mtx config_mtx; XXX not yet */
+
+ /* Linked lists of all objects in our setup. */
+ LIST_HEAD(,gv_drive) drives; /* All drives. */
+ LIST_HEAD(,gv_plex) plexes; /* All plexes. */
+ LIST_HEAD(,gv_sd) subdisks; /* All subdisks. */
+ LIST_HEAD(,gv_volume) volumes; /* All volumes. */
+
+ struct g_geom *geom; /* Pointer to our VINUM geom. */
+};
+
+/* softc for a drive. */
+struct gv_drive {
+ char name[GV_MAXDRIVENAME]; /* The name of this drive. */
+ char device[GV_MAXDRIVENAME]; /* Associated device. */
+ int state; /* The state of this drive. */
+#define GV_DRIVE_DOWN 0
+#define GV_DRIVE_UP 1
+
+ off_t size; /* Size of this drive. */
+ off_t avail; /* Available space. */
+ int sdcount; /* Number of subdisks. */
+
+ struct gv_hdr *hdr; /* The drive header. */
+
+ int freelist_entries; /* Count of freelist entries. */
+ LIST_HEAD(,gv_freelist) freelist; /* List of freelist entries. */
+ LIST_HEAD(,gv_sd) subdisks; /* Subdisks on this drive. */
+ LIST_ENTRY(gv_drive) drive; /* Entry in the vinum config. */
+
+ struct g_geom *geom; /* The geom of this drive. */
+ struct gv_softc *vinumconf; /* Pointer to the vinum conf. */
+};
+
+/* softc for a subdisk. */
+struct gv_sd {
+ char name[GV_MAXSDNAME]; /* The name of this subdisk. */
+ off_t size; /* The size of this subdisk. */
+ off_t drive_offset; /* Offset in the underlying drive. */
+ off_t plex_offset; /* Offset in the associated plex. */
+ int state; /* The state of this subdisk. */
+#define GV_SD_DOWN 0
+#define GV_SD_STALE 1
+#define GV_SD_INITIALIZING 2
+#define GV_SD_REVIVING 3
+#define GV_SD_UP 4
+
+ off_t initialized; /* Count of initialized bytes. */
+
+ int init_size; /* Initialization read/write size. */
+ int init_error; /* Flag error on initialization. */
+
+ int flags;
+#define GV_SD_NEWBORN 0x01 /* Subdisk was just created. */
+#define GV_SD_INITCANCEL 0x02 /* Cancel initialization process. */
+
+ char drive[GV_MAXDRIVENAME]; /* Name of underlying drive. */
+ char plex[GV_MAXPLEXNAME]; /* Name of associated plex. */
+
+ struct gv_drive *drive_sc; /* Pointer to underlying drive. */
+ struct gv_plex *plex_sc; /* Pointer to associated plex. */
+
+ struct g_provider *provider; /* The provider this sd represents. */
+ struct g_consumer *consumer; /* Consumer attached to our provider. */
+
+ LIST_ENTRY(gv_sd) from_drive; /* Subdisk list of underlying drive. */
+ LIST_ENTRY(gv_sd) in_plex; /* Subdisk list of associated plex. */
+ LIST_ENTRY(gv_sd) sd; /* Entry in the vinum config. */
+
+ struct gv_softc *vinumconf; /* Pointer to the vinum config. */
+};
+
+/* softc for a plex. */
+struct gv_plex {
+ char name[GV_MAXPLEXNAME]; /* The name of the plex. */
+ off_t size; /* The size of the plex. */
+ int state; /* The plex state. */
+#define GV_PLEX_DOWN 0
+#define GV_PLEX_INITIALIZING 1
+#define GV_PLEX_DEGRADED 2
+#define GV_PLEX_UP 3
+
+ int org; /* The plex organisation. */
+#define GV_PLEX_DISORG 0
+#define GV_PLEX_CONCAT 1
+#define GV_PLEX_STRIPED 2
+#define GV_PLEX_RAID5 4
+
+ int stripesize; /* The stripe size of the plex. */
+
+ char volume[GV_MAXVOLNAME]; /* Name of associated volume. */
+ struct gv_volume *vol_sc; /* Pointer to associated volume. */
+
+ int sdcount; /* Number of subdisks in this plex. */
+ int sddown; /* Number of subdisks that are down. */
+ int flags;
+#define GV_PLEX_ADDED 0x01 /* Added to an existing volume. */
+#define GV_PLEX_SYNCING 0x02 /* Plex is syncing from another plex. */
+#define GV_PLEX_THREAD_ACTIVE 0x04 /* Plex has an active RAID5 thread. */
+#define GV_PLEX_THREAD_DIE 0x08 /* Signal the RAID5 thread to die. */
+#define GV_PLEX_THREAD_DEAD 0x10 /* The RAID5 thread has died. */
+#define GV_PLEX_NEWBORN 0x20 /* The plex was just created. */
+
+ off_t synced; /* Count of synced bytes. */
+
+ struct mtx worklist_mtx; /* Mutex for RAID5 worklist. */
+ TAILQ_HEAD(,gv_raid5_packet) worklist; /* List of RAID5 work packets. */
+
+ LIST_HEAD(,gv_sd) subdisks; /* List of attached subdisks. */
+ LIST_ENTRY(gv_plex) in_volume; /* Plex list of associated volume. */
+ LIST_ENTRY(gv_plex) plex; /* Entry in the vinum config. */
+
+ struct g_provider *provider; /* The provider this plex represents. */
+ struct g_consumer *consumer; /* Consumer attached to our provider. */
+
+ struct g_geom *geom; /* The geom of this plex. */
+ struct gv_softc *vinumconf; /* Pointer to the vinum config. */
+};
+
+/* softc for a volume. */
+struct gv_volume {
+ char name[GV_MAXVOLNAME]; /* The name of the volume. */
+ off_t size; /* The size of the volume. */
+ int plexcount; /* Number of plexes. */
+ int state; /* The state of the volume. */
+#define GV_VOL_DOWN 0
+#define GV_VOL_UP 1
+
+ LIST_HEAD(,gv_plex) plexes; /* List of attached plexes. */
+ LIST_ENTRY(gv_volume) volume; /* Entry in vinum config. */
+
+ struct g_geom *geom; /* The geom of this volume. */
+ struct gv_softc *vinumconf; /* Pointer to the vinum config. */
+};
+
+#endif /* !_GEOM_VINUM_VAR_H */
diff --git a/sys/geom/vinum/geom_vinum_volume.c b/sys/geom/vinum/geom_vinum_volume.c
new file mode 100644
index 0000000..c916af4
--- /dev/null
+++ b/sys/geom/vinum/geom_vinum_volume.c
@@ -0,0 +1,260 @@
+/*-
+ * Copyright (c) 2004 Lukas Ertl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/libkern.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+
+#include <geom/geom.h>
+#include <geom/vinum/geom_vinum_var.h>
+#include <geom/vinum/geom_vinum.h>
+
+static void
+gv_volume_orphan(struct g_consumer *cp)
+{
+ struct g_geom *gp;
+ int error;
+
+ g_topology_assert();
+ gp = cp->geom;
+ g_trace(G_T_TOPOLOGY, "gv_volume_orphan(%s)", gp->name);
+ if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
+ g_access(cp, -cp->acr, -cp->acw, -cp->ace);
+ error = cp->provider->error;
+ if (error == 0)
+ error = ENXIO;
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ if (!LIST_EMPTY(&gp->consumer))
+ return;
+ g_free(gp->softc);
+ g_wither_geom(gp, error);
+}
+
+/* We end up here after the requests to our plexes are done. */
+static void
+gv_volume_done(struct bio *bp)
+{
+ struct g_consumer *cp;
+
+ /* The next plex in this volume. */
+ cp = LIST_NEXT(bp->bio_from, consumer);
+
+ switch (bp->bio_cmd) {
+ case BIO_READ:
+ /*
+ * If no error occured on this request, or if we have no plex
+ * left, finish here...
+ */
+ if ((bp->bio_error == 0) || (cp == NULL)) {
+ g_std_done(bp);
+ return;
+ }
+
+ /* ... or try to read from the next plex. */
+ g_io_request(bp, cp);
+ return;
+
+ case BIO_WRITE:
+ case BIO_DELETE:
+ /* No more plexes left. */
+ if (cp == NULL) {
+ /*
+ * Clear any errors if one of the previous writes
+ * succeeded.
+ */
+ if (bp->bio_caller1 == (int *)1)
+ bp->bio_error = 0;
+ g_std_done(bp);
+ return;
+ }
+
+ /* If this write request had no errors, remember that fact... */
+ if (bp->bio_error == 0)
+ bp->bio_caller1 = (int *)1;
+
+ /* ... and write to the next plex. */
+ g_io_request(bp, cp);
+ return;
+ }
+}
+
+static void
+gv_volume_start(struct bio *bp)
+{
+ struct g_geom *gp;
+ struct bio *bp2;
+ struct gv_volume *v;
+
+ gp = bp->bio_to->geom;
+ v = gp->softc;
+ if (v->state != GV_VOL_UP) {
+ g_io_deliver(bp, ENXIO);
+ return;
+ }
+ switch(bp->bio_cmd) {
+ case BIO_READ:
+ case BIO_WRITE:
+ case BIO_DELETE:
+ bp2 = g_clone_bio(bp);
+ if (bp2 == NULL) {
+ g_io_deliver(bp, ENOMEM);
+ return;
+ }
+ bp2->bio_done = gv_volume_done;
+ g_io_request(bp2, LIST_FIRST(&gp->consumer));
+ return;
+ default:
+ g_io_deliver(bp, EOPNOTSUPP);
+ return;
+ }
+}
+
+static int
+gv_volume_access(struct g_provider *pp, int dr, int dw, int de)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp, *cp2;
+ int error;
+
+ gp = pp->geom;
+
+ error = ENXIO;
+ LIST_FOREACH(cp, &gp->consumer, consumer) {
+ error = g_access(cp, dr, dw, de);
+ if (error) {
+ LIST_FOREACH(cp2, &gp->consumer, consumer) {
+ if (cp == cp2)
+ break;
+ g_access(cp2, -dr, -dw, -de);
+ }
+ return (error);
+ }
+ }
+ return (error);
+}
+
+static struct g_geom *
+gv_volume_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
+{
+ struct g_geom *gp;
+ struct g_provider *pp2;
+ struct g_consumer *cp;
+ struct gv_softc *sc;
+ struct gv_volume *v;
+ struct gv_plex *p;
+ int first;
+
+ g_trace(G_T_TOPOLOGY, "gv_volume_taste(%s, %s)", mp->name, pp->name);
+ g_topology_assert();
+
+ /* First, find the VINUM class and its associated geom. */
+ gp = find_vinum_geom();
+ if (gp == NULL)
+ return (NULL);
+
+ sc = gp->softc;
+ KASSERT(sc != NULL, ("gv_volume_taste: NULL sc"));
+
+ gp = pp->geom;
+
+ /* We only want to attach to plexes. */
+ if (strcmp(gp->class->name, "VINUMPLEX"))
+ return (NULL);
+
+ first = 0;
+ p = gp->softc;
+ v = gv_find_vol(sc, p->volume);
+ if (v == NULL)
+ return (NULL);
+ if (v->geom == NULL) {
+ gp = g_new_geomf(mp, "%s", p->volume);
+ gp->start = gv_volume_start;
+ gp->orphan = gv_volume_orphan;
+ gp->access = gv_volume_access;
+ gp->softc = v;
+ first++;
+ } else
+ gp = v->geom;
+
+ cp = g_new_consumer(gp);
+ g_attach(cp, pp);
+ p->consumer = cp;
+
+ if (p->vol_sc != v) {
+ p->vol_sc = v;
+ v->plexcount++;
+ LIST_INSERT_HEAD(&v->plexes, p, in_volume);
+ }
+
+ /* We need to setup a new VINUMVOLUME geom. */
+ if (first) {
+ pp2 = g_new_providerf(gp, "gvinum/%s", v->name);
+ pp2->mediasize = pp->mediasize;
+ pp2->sectorsize = pp->sectorsize;
+ g_error_provider(pp2, 0);
+ v->size = pp2->mediasize;
+ v->geom = gp;
+ return (gp);
+ }
+
+ return (NULL);
+}
+
+static int
+gv_volume_destroy_geom(struct gctl_req *req, struct g_class *mp,
+ struct g_geom *gp)
+{
+ g_trace(G_T_TOPOLOGY, "gv_volume_destroy_geom: %s", gp->name);
+ g_topology_assert();
+/*
+ if (gp->softc != NULL)
+ g_free(gp->softc);
+ gp->softc = NULL;
+*/
+ g_wither_geom(gp, ENXIO);
+ return (0);
+}
+
+#define VINUMVOLUME_CLASS_NAME "VINUMVOLUME"
+
+static struct g_class g_vinum_volume_class = {
+ .name = VINUMVOLUME_CLASS_NAME,
+ .taste = gv_volume_taste,
+ .destroy_geom = gv_volume_destroy_geom,
+};
+
+DECLARE_GEOM_CLASS(g_vinum_volume_class, g_vinum_volume);
OpenPOWER on IntegriCloud