summaryrefslogtreecommitdiffstats
path: root/sys/geom/vinum/geom_vinum_plex.c
diff options
context:
space:
mode:
authorle <le@FreeBSD.org>2004-06-12 21:16:10 +0000
committerle <le@FreeBSD.org>2004-06-12 21:16:10 +0000
commitcf31d52b42bd2309bb855b34e8260283eabfc570 (patch)
treeb37e9b83eff28125aba7f626ab2e3bea5b487658 /sys/geom/vinum/geom_vinum_plex.c
parentf66d897510d4772f7c5efd834cd66203558e9cb5 (diff)
downloadFreeBSD-src-cf31d52b42bd2309bb855b34e8260283eabfc570.zip
FreeBSD-src-cf31d52b42bd2309bb855b34e8260283eabfc570.tar.gz
Add a first version of a GEOMified vinum.
Diffstat (limited to 'sys/geom/vinum/geom_vinum_plex.c')
-rw-r--r--sys/geom/vinum/geom_vinum_plex.c456
1 files changed, 456 insertions, 0 deletions
diff --git a/sys/geom/vinum/geom_vinum_plex.c b/sys/geom/vinum/geom_vinum_plex.c
new file mode 100644
index 0000000..a7acf72
--- /dev/null
+++ b/sys/geom/vinum/geom_vinum_plex.c
@@ -0,0 +1,456 @@
+/*-
+ * Copyright (c) 2004 Lukas Ertl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/libkern.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+
+#include <geom/geom.h>
+#include <geom/vinum/geom_vinum_var.h>
+#include <geom/vinum/geom_vinum_raid5.h>
+#include <geom/vinum/geom_vinum.h>
+
+/* XXX: is this the place to catch dying subdisks? */
+static void
+gv_plex_orphan(struct g_consumer *cp)
+{
+ struct g_geom *gp;
+ struct gv_plex *p;
+ int error;
+
+ g_topology_assert();
+ gp = cp->geom;
+ g_trace(G_T_TOPOLOGY, "gv_plex_orphan(%s)", gp->name);
+
+ if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
+ g_access(cp, -cp->acr, -cp->acw, -cp->ace);
+ error = cp->provider->error;
+ if (error == 0)
+ error = ENXIO;
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ if (!LIST_EMPTY(&gp->consumer))
+ return;
+
+ p = gp->softc;
+ gv_kill_thread(p);
+ g_free(p);
+ g_wither_geom(gp, error);
+}
+
+static void
+gv_plex_done(struct bio *bp)
+{
+ struct g_geom *gp;
+ struct gv_sd *s;
+
+ gp = bp->bio_to->geom;
+
+ s = bp->bio_caller1;
+ KASSERT(s != NULL, ("gv_plex_done: NULL s"));
+
+ if (bp->bio_error == 0)
+ s->initialized += bp->bio_length;
+
+ if (s->initialized >= s->size) {
+ gv_set_sd_state(s, GV_SD_UP, 0);
+ s->initialized = 0;
+ }
+
+ g_std_done(bp);
+}
+
+/* Find the correct subdisk to send the bio to and build a bio to send. */
+static int
+gv_plexbuffer(struct bio *bp, struct bio **bp2, struct g_consumer **cp,
+ caddr_t addr, long bcount, off_t boff)
+{
+ struct g_geom *gp;
+ struct gv_plex *p;
+ struct gv_sd *s;
+ struct bio *cbp;
+ int i, sdno;
+ off_t len_left, real_len, real_off, stripeend, stripeno, stripestart;
+
+ s = NULL;
+
+ gp = bp->bio_to->geom;
+ p = gp->softc;
+
+ if (p == NULL || LIST_EMPTY(&p->subdisks))
+ return (ENXIO);
+
+ /*
+ * We only handle concatenated and striped plexes here. RAID5 plexes
+ * are handled in build_raid5_request().
+ */
+ switch (p->org) {
+ case GV_PLEX_CONCAT:
+ /*
+ * Find the subdisk where this request starts. The subdisks in
+ * this list must be ordered by plex_offset.
+ */
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (s->plex_offset <= boff &&
+ s->plex_offset + s->size > boff)
+ break;
+ }
+ /* Subdisk not found. */
+ if (s == NULL)
+ return (ENXIO);
+
+ /* Calculate corresponding offsets on disk. */
+ real_off = boff - s->plex_offset;
+ len_left = s->size - real_off;
+ real_len = (bcount > len_left) ? len_left : bcount;
+ break;
+
+ case GV_PLEX_STRIPED:
+ /* The number of the stripe where the request starts. */
+ stripeno = boff / p->stripesize;
+
+ /* The number of the subdisk where the stripe resides. */
+ sdno = stripeno % p->sdcount;
+
+ /* Find the right subdisk. */
+ i = 0;
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (i == sdno)
+ break;
+ i++;
+ }
+
+ /* Subdisk not found. */
+ if (s == NULL)
+ return (ENXIO);
+
+ /* The offset of the stripe from the start of the subdisk. */
+ stripestart = (stripeno / p->sdcount) *
+ p->stripesize;
+
+ /* The offset at the end of the stripe. */
+ stripeend = stripestart + p->stripesize;
+
+ /* The offset of the request on this subdisk. */
+ real_off = boff - (stripeno * p->stripesize) +
+ stripestart;
+
+ /* The length left in this stripe. */
+ len_left = stripeend - real_off;
+
+ real_len = (bcount <= len_left) ? bcount : len_left;
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ /* Now check if we can handle the request on this subdisk. */
+ switch (s->state) {
+ case GV_SD_UP:
+ /* If the subdisk is up, just continue. */
+ break;
+
+ case GV_SD_STALE:
+ if (bp->bio_caller1 != p)
+ return (ENXIO);
+
+ printf("FOO: setting sd %s to GV_SD_INITIALIZING\n", s->name);
+ gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
+ break;
+
+ case GV_SD_INITIALIZING:
+ if (bp->bio_cmd == BIO_READ)
+ return (ENXIO);
+ break;
+
+ default:
+ /* All other subdisk states mean it's not accessible. */
+ return (ENXIO);
+ }
+
+ /* Clone the bio and adjust the offsets and sizes. */
+ cbp = g_clone_bio(bp);
+ if (cbp == NULL)
+ return (ENOMEM);
+ cbp->bio_offset = real_off;
+ cbp->bio_length = real_len;
+ cbp->bio_data = addr;
+ if (bp->bio_caller1 == p) {
+ cbp->bio_caller1 = s;
+ cbp->bio_done = gv_plex_done;
+ } else
+ cbp->bio_done = g_std_done;
+ *bp2 = cbp;
+ *cp = s->consumer;
+ return (0);
+}
+
+static void
+gv_plex_start(struct bio *bp)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct gv_plex *p;
+ struct gv_raid5_packet *wp;
+ struct bio *bp2;
+ caddr_t addr;
+ off_t boff;
+ long bcount, rcount;
+ int err;
+
+ gp = bp->bio_to->geom;
+ p = gp->softc;
+
+ /*
+ * We cannot handle this request if too many of our subdisks are
+ * inaccessible.
+ */
+ if ((p->state < GV_PLEX_DEGRADED) && (bp->bio_caller1 != p)) {
+ g_io_deliver(bp, ENXIO); /* XXX: correct way? */
+ return;
+ }
+
+ switch(bp->bio_cmd) {
+ case BIO_READ:
+ case BIO_WRITE:
+ case BIO_DELETE:
+ /*
+ * We split up the request in smaller packets and hand them
+ * down to our subdisks.
+ */
+ wp = NULL;
+ addr = bp->bio_data;
+ boff = bp->bio_offset;
+ for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
+ /*
+ * RAID5 requests usually need to be split up in
+ * several subrequests.
+ */
+ if (p->org == GV_PLEX_RAID5) {
+ wp = gv_new_raid5_packet();
+ wp->bio = bp;
+ err = gv_build_raid5_req(wp, bp, addr, bcount,
+ boff);
+ } else
+ err = gv_plexbuffer(bp, &bp2, &cp, addr, bcount,
+ boff);
+
+ if (err) {
+ bp->bio_completed += bcount;
+ if (bp->bio_error == 0)
+ bp->bio_error = err;
+ if (bp->bio_completed == bp->bio_length)
+ g_io_deliver(bp, bp->bio_error);
+ return;
+ }
+
+ if (p->org != GV_PLEX_RAID5) {
+ rcount = bp2->bio_length;
+ g_io_request(bp2, cp);
+
+ /*
+ * RAID5 subrequests are queued on a worklist
+ * and picked up from the worker thread. This
+ * ensures correct order.
+ */
+ } else {
+ mtx_lock(&p->worklist_mtx);
+ TAILQ_INSERT_TAIL(&p->worklist, wp,
+ list);
+ mtx_unlock(&p->worklist_mtx);
+ wakeup(&p);
+ rcount = wp->length;
+ }
+
+ boff += rcount;
+ addr += rcount;
+ }
+ return;
+
+ default:
+ g_io_deliver(bp, EOPNOTSUPP);
+ return;
+ }
+}
+
+static int
+gv_plex_access(struct g_provider *pp, int dr, int dw, int de)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp, *cp2;
+ int error;
+
+ gp = pp->geom;
+
+ error = ENXIO;
+ LIST_FOREACH(cp, &gp->consumer, consumer) {
+ error = g_access(cp, dr, dw, de);
+ if (error) {
+ LIST_FOREACH(cp2, &gp->consumer, consumer) {
+ if (cp == cp2)
+ break;
+ g_access(cp2, -dr, -dw, -de);
+ }
+ return (error);
+ }
+ }
+ return (error);
+}
+
+static struct g_geom *
+gv_plex_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct g_provider *pp2;
+ struct gv_plex *p;
+ struct gv_sd *s;
+ struct gv_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "gv_plex_taste(%s, %s)", mp->name, pp->name);
+ g_topology_assert();
+
+ /* We only want to attach to subdisks. */
+ if (strcmp(pp->geom->class->name, "VINUMDRIVE"))
+ return (NULL);
+
+ /* Find the VINUM class and its associated geom. */
+ gp = find_vinum_geom();
+ if (gp == NULL)
+ return (NULL);
+ sc = gp->softc;
+ KASSERT(sc != NULL, ("gv_plex_taste: NULL sc"));
+
+ /* Find out which subdisk the offered provider corresponds to. */
+ s = pp->private;
+ KASSERT(s != NULL, ("gv_plex_taste: NULL s"));
+
+ /* Now find the correct plex where this subdisk belongs to. */
+ p = gv_find_plex(sc, s->plex);
+ KASSERT(p != NULL, ("gv_plex_taste: NULL p"));
+
+ /*
+ * Add this subdisk to this plex. Since we trust the on-disk
+ * configuration, we don't check the given value (should we?).
+ * XXX: shouldn't be done here
+ */
+ gv_sd_to_plex(p, s, 0);
+
+ /* Now check if there's already a geom for this plex. */
+ gp = p->geom;
+
+ /* Yes, there is already a geom, so we just add the consumer. */
+ if (gp != NULL) {
+ /* Need to attach a new consumer to this subdisk. */
+ cp = g_new_consumer(gp);
+ g_attach(cp, pp);
+ s->consumer = cp;
+
+ /* Adjust the size of the providers this plex has. */
+ LIST_FOREACH(pp2, &gp->provider, provider)
+ pp2->mediasize = p->size;
+
+ return (NULL);
+
+ /* We need to create a new geom. */
+ } else {
+ gp = g_new_geomf(mp, "%s", p->name);
+ gp->start = gv_plex_start;
+ gp->orphan = gv_plex_orphan;
+ gp->access = gv_plex_access;
+ gp->softc = p;
+ p->geom = gp;
+
+ /* RAID5 plexes need a 'worker' thread, where IO is handled. */
+ if (p->org == GV_PLEX_RAID5) {
+ TAILQ_INIT(&p->worklist);
+ mtx_init(&p->worklist_mtx, "gvinum_worklist", NULL,
+ MTX_DEF);
+ p->flags &= ~GV_PLEX_THREAD_DIE;
+ kthread_create(gv_raid5_worker, gp, NULL, 0, 0,
+ "gv_raid5");
+ p->flags |= GV_PLEX_THREAD_ACTIVE;
+ }
+
+ /* Attach a consumer to this provider. */
+ cp = g_new_consumer(gp);
+ g_attach(cp, pp);
+ s->consumer = cp;
+
+ /* Create a provider for the outside world. */
+ pp2 = g_new_providerf(gp, "gvinum/plex/%s", p->name);
+ pp2->mediasize = p->size;
+ pp2->sectorsize = pp->sectorsize;
+ p->provider = pp2;
+ g_error_provider(pp2, 0);
+ return (gp);
+ }
+}
+
+static int
+gv_plex_destroy_geom(struct gctl_req *req, struct g_class *mp,
+ struct g_geom *gp)
+{
+ struct gv_plex *p;
+
+ g_trace(G_T_TOPOLOGY, "gv_plex_destroy_geom: %s", gp->name);
+ g_topology_assert();
+
+ p = gp->softc;
+
+ KASSERT(p != NULL, ("gv_plex_destroy_geom: null p of '%s'", gp->name));
+
+ /*
+ * If this is a RAID5 plex, check if its worker thread is still active
+ * and signal it to self destruct.
+ */
+ gv_kill_thread(p);
+ mtx_destroy(&p->worklist_mtx);
+ /* g_free(sc); */
+ g_wither_geom(gp, ENXIO);
+ return (0);
+}
+
+#define VINUMPLEX_CLASS_NAME "VINUMPLEX"
+
+static struct g_class g_vinum_plex_class = {
+ .name = VINUMPLEX_CLASS_NAME,
+ .taste = gv_plex_taste,
+ .destroy_geom = gv_plex_destroy_geom,
+};
+
+DECLARE_GEOM_CLASS(g_vinum_plex_class, g_vinum_plex);
OpenPOWER on IntegriCloud