summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormjacob <mjacob@FreeBSD.org>2007-02-27 04:01:58 +0000
committermjacob <mjacob@FreeBSD.org>2007-02-27 04:01:58 +0000
commit05b92097cb751ac3e6ba126eed272f954f9c7210 (patch)
tree3740487c6f9b60010343610543a779a7e9a63d31
parent502eb2ec0e1f3a8e4b78b14ccfaad6822c33bbb6 (diff)
downloadFreeBSD-src-05b92097cb751ac3e6ba126eed272f954f9c7210.zip
FreeBSD-src-05b92097cb751ac3e6ba126eed272f954f9c7210.tar.gz
First cut at GEOM based multipath. This is an active/passive{/passive...}
arrangement that has no intrinsic internal knowledge of whether devices it is given are truly multipath devices. As such, this is a simplistic approach, but still a useful one. The basic approach is to (at present- this will change soon) use camcontrol to find likely identical devices and and label the trailing sector of the first one. This label contains both a full UUID and a name. The name is what is presented in /dev/multipath, but the UUID is used as a true distinguishor at g_taste time, thus making sure we don't have chaos on a shared SAN where everyone names their data multipath as "Fred". The first of N identical devices (and N *may* be 1!) becomes the active path until a BIO request is failed with EIO or ENXIO. When this occurs, the active disk is ripped away and the next in a list is picked to (retry and) continue with. During g_taste events new disks that meet the match criteria for existing multipath geoms get added to the tail end of the list. Thus, this active/passive setup actually does work for devices which go away and come back, as do (now) mpt(4) and isp(4) SAN based disks. There is still a lot to do to improve this- like about 5 of the 12 recommendations I've received about it, but it's been functional enough for a while that it deserves a broader test base. Reviewed by: pjd Sponsored by: IronPort Systems MFC: 2 months
-rw-r--r--etc/mtree/BSD.include.dist2
-rw-r--r--sbin/geom/class/Makefile1
-rw-r--r--sbin/geom/class/multipath/Makefile10
-rw-r--r--sbin/geom/class/multipath/geom_multipath.c230
-rw-r--r--sys/conf/NOTES1
-rw-r--r--sys/conf/files1
-rw-r--r--sys/geom/multipath/g_multipath.c768
-rw-r--r--sys/geom/multipath/g_multipath.h99
-rw-r--r--sys/modules/geom/Makefile1
-rw-r--r--sys/modules/geom/geom_multipath/Makefile8
10 files changed, 1121 insertions, 0 deletions
diff --git a/etc/mtree/BSD.include.dist b/etc/mtree/BSD.include.dist
index d1b2558..0ec9f8b 100644
--- a/etc/mtree/BSD.include.dist
+++ b/etc/mtree/BSD.include.dist
@@ -114,6 +114,8 @@
..
mirror
..
+ multipath
+ ..
nop
..
raid3
diff --git a/sbin/geom/class/Makefile b/sbin/geom/class/Makefile
index 7adfe5e..f52695b 100644
--- a/sbin/geom/class/Makefile
+++ b/sbin/geom/class/Makefile
@@ -10,6 +10,7 @@ SUBDIR+=eli
SUBDIR+=journal
SUBDIR+=label
SUBDIR+=mirror
+SUBDIR+=multipath
SUBDIR+=nop
SUBDIR+=raid3
SUBDIR+=shsec
diff --git a/sbin/geom/class/multipath/Makefile b/sbin/geom/class/multipath/Makefile
new file mode 100644
index 0000000..7b418e1
--- /dev/null
+++ b/sbin/geom/class/multipath/Makefile
@@ -0,0 +1,10 @@
+# $FreeBSD$
+
+.PATH: ${.CURDIR}/../../misc
+CLASS= multipath
+NO_MAN=true
+
+
+.include <bsd.lib.mk>
+
+CFLAGS+= -I${.CURDIR}/../../../../sys
diff --git a/sbin/geom/class/multipath/geom_multipath.c b/sbin/geom/class/multipath/geom_multipath.c
new file mode 100644
index 0000000..d729af7
--- /dev/null
+++ b/sbin/geom/class/multipath/geom_multipath.c
@@ -0,0 +1,230 @@
+/*-
+ * Copyright (c) 2006 Mathew Jacob <mjacob@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+#include <errno.h>
+#include <paths.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+#include <libgeom.h>
+#include <uuid.h>
+#include <geom/multipath/g_multipath.h>
+
+#include "core/geom.h"
+#include "misc/subr.h"
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_MULTIPATH_VERSION;
+
+static void mp_main(struct gctl_req *, unsigned int);
+static void mp_label(struct gctl_req *);
+static void mp_clear(struct gctl_req *);
+
+struct g_command class_commands[] = {
+ {
+ "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, mp_main, G_NULL_OPTS,
+ "[-v] name prov ..."
+ },
+ {
+ "clear", G_FLAG_VERBOSE, mp_main, G_NULL_OPTS,
+ "[-v] prov ..."
+ },
+ G_CMD_SENTINEL
+};
+
+static void
+mp_main(struct gctl_req *req, unsigned int flags __unused)
+{
+ const char *name;
+
+ name = gctl_get_ascii(req, "verb");
+ if (name == NULL) {
+ gctl_error(req, "No '%s' argument.", "verb");
+ return;
+ }
+ if (strcmp(name, "label") == 0) {
+ mp_label(req);
+ } else if (strcmp(name, "clear") == 0) {
+ mp_clear(req);
+ } else {
+ gctl_error(req, "Unknown command: %s.", name);
+ }
+}
+
+static void
+mp_label(struct gctl_req *req)
+{
+ struct g_multipath_metadata md;
+ off_t disksiz = 0, msize;
+ uint8_t *sector;
+ char *ptr;
+ uuid_t uuid;
+ uint32_t secsize = 0, ssize, status;
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 2) {
+ gctl_error(req, "wrong number of arguments.");
+ return;
+ }
+
+ /*
+ * First, check each provider to make sure it's the same size.
+ * This also gets us our size and sectorsize for the metadata.
+ */
+ for (i = 1; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ msize = g_get_mediasize(name);
+ ssize = g_get_sectorsize(name);
+ if (msize == 0 || ssize == 0) {
+ gctl_error(req, "cannot get information about %s: %s.",
+ name, strerror(errno));
+ return;
+ }
+ if (i == 1) {
+ secsize = ssize;
+ disksiz = msize;
+ } else {
+ if (secsize != ssize) {
+ gctl_error(req, "%s sector size %u different.",
+ name, ssize);
+ return;
+ }
+ if (disksiz != msize) {
+ gctl_error(req, "%s media size %ju different.",
+ name, (intmax_t)msize);
+ return;
+ }
+ }
+
+ }
+
+ /*
+ * Allocate a sector to write as metadata.
+ */
+ sector = malloc(secsize);
+ if (sector == NULL) {
+ gctl_error(req, "unable to allocate metadata buffer");
+ return;
+ }
+ memset(sector, 0, secsize);
+
+ /*
+ * Generate metadata.
+ */
+ strlcpy(md.md_magic, G_MULTIPATH_MAGIC, sizeof(md.md_magic));
+ md.md_version = G_MULTIPATH_VERSION;
+ name = gctl_get_ascii(req, "arg0");
+ strlcpy(md.md_name, name, sizeof(md.md_name));
+ md.md_size = disksiz;
+ md.md_sectorsize = secsize;
+ uuid_create(&uuid, &status);
+ if (status != uuid_s_ok) {
+ gctl_error(req, "cannot create a UUID.");
+ return;
+ }
+ uuid_to_string(&uuid, &ptr, &status);
+ if (status != uuid_s_ok) {
+ gctl_error(req, "cannot stringify a UUID.");
+ return;
+ }
+ strlcpy(md.md_uuid, ptr, sizeof (md.md_uuid));
+ free(ptr);
+
+ /*
+ * Clear last sector first for each provider to spoil anything extant
+ */
+ for (i = 1; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(name, NULL);
+ if (error != 0) {
+ gctl_error(req, "cannot clear metadata on %s: %s.",
+ name, strerror(error));
+ return;
+ }
+ }
+
+ multipath_metadata_encode(&md, sector);
+
+ /*
+ * Ok, store metadata.
+ */
+ for (i = 1; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_store(name, sector, secsize);
+ if (error != 0) {
+ fprintf(stderr, "Can't store metadata on %s: %s.\n",
+ name, strerror(error));
+ goto fail;
+ }
+ }
+ return;
+
+fail:
+ /*
+ * Clear last sector first for each provider to spoil anything extant
+ */
+ for (i = 1; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(name, NULL);
+ if (error != 0) {
+ gctl_error(req, "cannot clear metadata on %s: %s.",
+ name, strerror(error));
+ continue;
+ }
+ }
+}
+
+static void
+mp_clear(struct gctl_req *req)
+{
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(name, G_MULTIPATH_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't clear metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ }
+}
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 3174ad4..e309655 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -147,6 +147,7 @@ options GEOM_JOURNAL # Journaling.
options GEOM_LABEL # Providers labelization.
options GEOM_MBR # DOS/MBR partitioning
options GEOM_MIRROR # Disk mirroring.
+options GEOM_MULTIPATH # Disk multipath
options GEOM_NOP # Test class.
options GEOM_PART_APM # Apple partitioning
options GEOM_PART_GPT # GPT partitioning
diff --git a/sys/conf/files b/sys/conf/files
index 9881214..d8d6c80 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1210,6 +1210,7 @@ geom/label/g_label_reiserfs.c optional geom_label
geom/label/g_label_ufs.c optional geom_label
geom/mirror/g_mirror.c optional geom_mirror
geom/mirror/g_mirror_ctl.c optional geom_mirror
+geom/multipath/g_multipath.c optional geom_multipath
geom/nop/g_nop.c optional geom_nop
geom/part/g_part.c standard
geom/part/g_part_if.m standard
diff --git a/sys/geom/multipath/g_multipath.c b/sys/geom/multipath/g_multipath.c
new file mode 100644
index 0000000..0398628
--- /dev/null
+++ b/sys/geom/multipath/g_multipath.c
@@ -0,0 +1,768 @@
+/*-
+ * Copyright (c) 2006-2007 Matthew Jacob <mjacob@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Based upon work by Pawel Jakub Dawidek <pjd@FreeBSD.org> for all of the
+ * fine geom examples, and by Poul Henning Kamp <phk@FreeBSD.org> for GEOM
+ * itself, all of which is most gratefully acknowledged.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/bio.h>
+#include <sys/sysctl.h>
+#include <sys/kthread.h>
+#include <sys/malloc.h>
+#include <geom/geom.h>
+#include <geom/multipath/g_multipath.h>
+
+
+SYSCTL_DECL(_kern_geom);
+SYSCTL_NODE(_kern_geom, OID_AUTO, multipath, CTLFLAG_RW, 0,
+ "GEOM_MULTIPATH tunables");
+static u_int g_multipath_debug = 0;
+SYSCTL_UINT(_kern_geom_multipath, OID_AUTO, debug, CTLFLAG_RW,
+ &g_multipath_debug, 0, "Debug level");
+
+static enum {
+ GKT_NIL,
+ GKT_RUN,
+ GKT_DIE
+} g_multipath_kt_state;
+static struct bio_queue_head gmtbq;
+static struct mtx gmtbq_mtx;
+
+static void g_multipath_orphan(struct g_consumer *);
+static void g_multipath_start(struct bio *);
+static void g_multipath_done(struct bio *);
+static void g_multipath_done_error(struct bio *);
+static void g_multipath_kt(void *);
+
+static int g_multipath_destroy(struct g_geom *);
+static int
+g_multipath_destroy_geom(struct gctl_req *, struct g_class *, struct g_geom *);
+
+static g_taste_t g_multipath_taste;
+static g_ctl_req_t g_multipath_config;
+static g_init_t g_multipath_init;
+static g_fini_t g_multipath_fini;
+
+struct g_class g_multipath_class = {
+ .name = G_MULTIPATH_CLASS_NAME,
+ .version = G_VERSION,
+ .ctlreq = g_multipath_config,
+ .taste = g_multipath_taste,
+ .destroy_geom = g_multipath_destroy_geom,
+ .init = g_multipath_init,
+ .fini = g_multipath_fini
+};
+
+#define MP_BAD 0x1
+#define MP_POSTED 0x2
+
+static void
+g_mpd(void *arg, int flags __unused)
+{
+ struct g_consumer *cp;
+
+ g_topology_assert();
+ cp = arg;
+ if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) {
+ g_access(cp, -cp->acr, -cp->acw, -cp->ace);
+ }
+ if (cp->provider) {
+ printf("GEOM_MULTIPATH: %s removed from %s\n",
+ cp->provider->name, cp->geom->name);
+ g_detach(cp);
+ }
+ g_destroy_consumer(cp);
+}
+
+static void
+g_multipath_orphan(struct g_consumer *cp)
+{
+ if ((cp->index & MP_POSTED) == 0) {
+ cp->index |= MP_POSTED;
+ printf("GEOM_MULTIPATH: %s orphaned in %s\n",
+ cp->provider->name, cp->geom->name);
+ g_mpd(cp, 0);
+ }
+}
+
+static void
+g_multipath_start(struct bio *bp)
+{
+ struct g_multipath_softc *sc;
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct bio *cbp;
+
+ gp = bp->bio_to->geom;
+ sc = gp->softc;
+ KASSERT(sc != NULL, ("NULL sc"));
+ cp = sc->cp_active;
+ if (cp == NULL) {
+ g_io_deliver(bp, ENXIO);
+ return;
+ }
+ cbp = g_clone_bio(bp);
+ if (cbp == NULL) {
+ g_io_deliver(bp, ENOMEM);
+ return;
+ }
+ cbp->bio_done = g_multipath_done;
+ g_io_request(cbp, cp);
+}
+
+static void
+g_multipath_done(struct bio *bp)
+{
+ if (bp->bio_error == ENXIO || bp->bio_error == EIO) {
+ mtx_lock(&gmtbq_mtx);
+ bioq_insert_tail(&gmtbq, bp);
+ wakeup(&g_multipath_kt_state);
+ mtx_unlock(&gmtbq_mtx);
+ } else {
+ g_std_done(bp);
+ }
+}
+
+static void
+g_multipath_done_error(struct bio *bp)
+{
+ struct bio *pbp;
+ struct g_geom *gp;
+ struct g_multipath_softc *sc;
+ struct g_consumer *cp;
+ struct g_provider *pp;
+
+ /*
+ * If we had a failure, we have to check first to see
+ * whether the consumer it failed on was the currently
+ * active consumer (i.e., this is the first in perhaps
+ * a number of failures). If so, we then switch consumers
+ * to the next available consumer.
+ */
+
+ g_topology_lock();
+ pbp = bp->bio_parent;
+ gp = pbp->bio_to->geom;
+ sc = gp->softc;
+ cp = bp->bio_from;
+ pp = cp->provider;
+
+ cp->index |= MP_BAD;
+ if (cp->nend == cp->nstart && pp->nend == pp->nstart) {
+ cp->index |= MP_POSTED;
+ g_post_event(g_mpd, cp, M_NOWAIT, NULL);
+ }
+ if (cp == sc->cp_active) {
+ struct g_consumer *lcp;
+ printf("GEOM_MULTIPATH: %s failed in %s\n",
+ pp->name, sc->sc_name);
+ sc->cp_active = NULL;
+ LIST_FOREACH(lcp, &gp->consumer, consumer) {
+ if ((lcp->index & MP_BAD) == 0) {
+ sc->cp_active = lcp;
+ break;
+ }
+ }
+ if (sc->cp_active == NULL) {
+ printf("GEOM_MULTIPATH: out of providers for %s\n",
+ sc->sc_name);
+ return;
+ } else {
+ printf("GEOM_MULTIPATH: %s now active path in %s\n",
+ sc->cp_active->provider->name, sc->sc_name);
+ }
+ }
+ g_topology_unlock();
+
+ /*
+ * If we can fruitfully restart the I/O, do so.
+ */
+ if (sc->cp_active) {
+ g_destroy_bio(bp);
+ pbp->bio_children--;
+ g_multipath_start(pbp);
+ } else {
+ g_std_done(bp);
+ }
+}
+
+static void
+g_multipath_kt(void *arg)
+{
+ g_multipath_kt_state = GKT_RUN;
+ mtx_lock(&gmtbq_mtx);
+ while (g_multipath_kt_state == GKT_RUN) {
+ for (;;) {
+ struct bio *bp;
+ bp = bioq_takefirst(&gmtbq);
+ if (bp == NULL) {
+ break;
+ }
+ mtx_unlock(&gmtbq_mtx);
+ g_multipath_done_error(bp);
+ mtx_lock(&gmtbq_mtx);
+ }
+ msleep(&g_multipath_kt_state, &gmtbq_mtx, PRIBIO,
+ "gkt:wait", hz / 10);
+ }
+ mtx_unlock(&gmtbq_mtx);
+ wakeup(&g_multipath_kt_state);
+ kthread_exit(0);
+}
+
+
+static int
+g_multipath_access(struct g_provider *pp, int dr, int dw, int de)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp, *badcp = NULL;
+ int error;
+
+ gp = pp->geom;
+
+ LIST_FOREACH(cp, &gp->consumer, consumer) {
+ error = g_access(cp, dr, dw, de);
+ if (error) {
+ badcp = cp;
+ goto fail;
+ }
+ }
+ return (0);
+
+fail:
+ LIST_FOREACH(cp, &gp->consumer, consumer) {
+ if (cp == badcp) {
+ break;
+ }
+ (void) g_access(cp, -dr, -dw, -de);
+ }
+ return (error);
+}
+
+static struct g_geom *
+g_multipath_create(struct g_class *mp, struct g_multipath_metadata *md)
+{
+ struct g_multipath_softc *sc;
+ struct g_geom *gp;
+ struct g_provider *pp;
+
+ g_topology_assert();
+
+ LIST_FOREACH(gp, &mp->geom, geom) {
+ if (strcmp(gp->name, md->md_name) == 0) {
+ printf("GEOM_MULTIPATH: name %s already exists\n",
+ md->md_name);
+ return (NULL);
+ }
+ }
+
+ gp = g_new_geomf(mp, md->md_name);
+ if (gp == NULL) {
+ goto fail;
+ }
+
+ sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
+ if (sc == NULL) {
+ goto fail;
+ }
+
+ gp->softc = sc;
+ gp->start = g_multipath_start;
+ gp->orphan = g_multipath_orphan;
+ gp->access = g_multipath_access;
+ memcpy(sc->sc_uuid, md->md_uuid, sizeof (sc->sc_uuid));
+ memcpy(sc->sc_name, md->md_name, sizeof (sc->sc_name));
+
+ pp = g_new_providerf(gp, "multipath/%s", md->md_name);
+ if (pp == NULL) {
+ goto fail;
+ }
+ /* limit the provider to not have it stomp on metadata */
+ pp->mediasize = md->md_size - md->md_sectorsize;
+ pp->sectorsize = md->md_sectorsize;
+ sc->pp = pp;
+ g_error_provider(pp, 0);
+ return (gp);
+fail:
+ if (gp != NULL) {
+ if (gp->softc != NULL) {
+ g_free(gp->softc);
+ }
+ g_destroy_geom(gp);
+ }
+ return (NULL);
+}
+
+static int
+g_multipath_add_disk(struct g_geom *gp, struct g_provider *pp)
+{
+ struct g_multipath_softc *sc;
+ struct g_consumer *cp, *nxtcp;
+ int error;
+
+ g_topology_assert();
+
+ sc = gp->softc;
+ KASSERT(sc, ("no softc"));
+
+ /*
+ * Make sure that the passed provider isn't already attached
+ */
+ LIST_FOREACH(cp, &gp->consumer, consumer) {
+ if (cp->provider == pp) {
+ break;
+ }
+ }
+ if (cp) {
+ printf("GEOM_MULTIPATH: provider %s already attached to %s\n",
+ pp->name, gp->name);
+ return (EEXIST);
+ }
+ nxtcp = LIST_FIRST(&gp->consumer);
+ cp = g_new_consumer(gp);
+ if (cp == NULL) {
+ return (ENOMEM);
+ }
+ error = g_attach(cp, pp);
+ if (error != 0) {
+ printf("GEOM_MULTIPATH: cannot attach %s to %s",
+ pp->name, sc->sc_name);
+ g_destroy_consumer(cp);
+ return (error);
+ }
+ cp->private = sc;
+ cp->index = 0;
+
+ /*
+ * Set access permissions on new consumer to match other consumers
+ */
+ if (nxtcp && (nxtcp->acr + nxtcp->acw + nxtcp->ace)) {
+ error = g_access(cp, nxtcp->acr, nxtcp->acw, nxtcp->ace);
+ if (error) {
+ printf("GEOM_MULTIPATH: cannot set access in "
+ "attaching %s to %s/%s (%d)\n",
+ pp->name, sc->sc_name, sc->sc_uuid, error);
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ return (error);
+ }
+ }
+ printf("GEOM_MULTIPATH: adding %s to %s/%s\n",
+ pp->name, sc->sc_name, sc->sc_uuid);
+ if (sc->cp_active == NULL) {
+ sc->cp_active = cp;
+ printf("GEOM_MULTIPATH: %s now active path in %s\n",
+ pp->name, sc->sc_name);
+ }
+ return (0);
+}
+
+static int
+g_multipath_destroy(struct g_geom *gp)
+{
+ struct g_provider *pp;
+
+ g_topology_assert();
+ if (gp->softc == NULL) {
+ return (ENXIO);
+ }
+ pp = LIST_FIRST(&gp->provider);
+ if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
+ return (EBUSY);
+ }
+ printf("GEOM_MULTIPATH: destroying %s\n", gp->name);
+ g_free(gp->softc);
+ gp->softc = NULL;
+ g_wither_geom(gp, ENXIO);
+ return (0);
+}
+
+static int
+g_multipath_destroy_geom(struct gctl_req *req, struct g_class *mp,
+ struct g_geom *gp)
+{
+ return (g_multipath_destroy(gp));
+}
+
+static void
+g_multipath_init(struct g_class *mp)
+{
+ bioq_init(&gmtbq);
+ mtx_init(&gmtbq_mtx, "gmtbq", NULL, MTX_DEF);
+ if (kthread_create(g_multipath_kt, mp, NULL, 0, 0, "g_mp_kt") == 0) {
+ g_multipath_kt_state = GKT_RUN;
+ }
+}
+
+static void
+g_multipath_fini(struct g_class *mp)
+{
+ if (g_multipath_kt_state == GKT_RUN) {
+ mtx_lock(&gmtbq_mtx);
+ g_multipath_kt_state = GKT_DIE;
+ wakeup(&g_multipath_kt_state);
+ msleep(&g_multipath_kt_state, &gmtbq_mtx, PRIBIO,
+ "gmp:fini", 0);
+ mtx_unlock(&gmtbq_mtx);
+ }
+}
+
+static int
+g_multipath_read_metadata(struct g_consumer *cp,
+ struct g_multipath_metadata *md)
+{
+ struct g_provider *pp;
+ u_char *buf;
+ int error;
+
+ g_topology_assert();
+ error = g_access(cp, 1, 0, 0);
+ if (error != 0) {
+ return (error);
+ }
+ pp = cp->provider;
+ g_topology_unlock();
+ buf = g_read_data(cp, pp->mediasize - pp->sectorsize,
+ pp->sectorsize, &error);
+ g_topology_lock();
+ g_access(cp, -1, 0, 0);
+ if (buf == NULL) {
+ return (error);
+ }
+ multipath_metadata_decode(buf, md);
+ g_free(buf);
+ return (0);
+}
+
+static struct g_geom *
+g_multipath_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
+{
+ struct g_multipath_metadata md;
+ struct g_multipath_softc *sc;
+ struct g_consumer *cp;
+ struct g_geom *gp, *gp1;
+ int error, isnew;
+
+ g_topology_assert();
+
+ gp = g_new_geomf(mp, "multipath:taste");
+ gp->start = g_multipath_start;
+ gp->access = g_multipath_access;
+ gp->orphan = g_multipath_orphan;
+ cp = g_new_consumer(gp);
+ g_attach(cp, pp);
+ error = g_multipath_read_metadata(cp, &md);
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ g_destroy_geom(gp);
+ if (error != 0) {
+ return (NULL);
+ }
+ gp = NULL;
+
+ if (strcmp(md.md_magic, G_MULTIPATH_MAGIC) != 0) {
+ if (g_multipath_debug) {
+ printf("%s is not MULTIPATH\n", pp->name);
+ }
+ return (NULL);
+ }
+ if (md.md_version != G_MULTIPATH_VERSION) {
+ printf("%s has version %d multipath id- this module is version "
+ " %d: rejecting\n", pp->name, md.md_version,
+ G_MULTIPATH_VERSION);
+ return (NULL);
+ }
+ if (g_multipath_debug) {
+ printf("MULTIPATH: %s/%s\n", md.md_name, md.md_uuid);
+ }
+
+ /*
+ * Let's check if such a device already is present. We check against
+ * uuid alone first because that's the true distinguishor. If that
+ * passes, then we check for name conflicts. If there are conflicts,
+ * modify the name.
+ *
+ * The whole purpose of this is to solve the problem that people don't
+ * pick good unique names, but good unique names (like uuids) are a
+ * pain to use. So, we allow people to build GEOMs with friendly names
+ * and uuids, and modify the names in case there's a collision.
+ */
+ sc = NULL;
+ LIST_FOREACH(gp, &mp->geom, geom) {
+ sc = gp->softc;
+ if (sc == NULL) {
+ continue;
+ }
+ if (strncmp(md.md_uuid, sc->sc_uuid, sizeof(md.md_uuid)) == 0) {
+ break;
+ }
+ }
+
+ LIST_FOREACH(gp1, &mp->geom, geom) {
+ if (gp1 == gp) {
+ continue;
+ }
+ sc = gp1->softc;
+ if (sc == NULL) {
+ continue;
+ }
+ if (strncmp(md.md_name, sc->sc_name, sizeof(md.md_name)) == 0) {
+ break;
+ }
+ }
+
+ /*
+ * If gp is NULL, we had no extant MULTIPATH geom with this uuid.
+ *
+ * If gp1 is *not* NULL, that means we have a MULTIPATH geom extant
+ * with the same name (but a different UUID).
+ *
+ * If gp is NULL, then modify the name with a random number and
+ * complain, but allow the creation of the geom to continue.
+ *
+ * If gp is *not* NULL, just use the geom's name as we're attaching
+ * this disk to the (previously generated) name.
+ */
+
+ if (gp1) {
+ sc = gp1->softc;
+ if (gp == NULL) {
+ char buf[16];
+ u_long rand = random();
+
+ snprintf(buf, sizeof (buf), "%s-%lu", md.md_name, rand);
+ printf("GEOM_MULTIPATH: geom %s/%s exists already\n",
+ sc->sc_name, sc->sc_uuid);
+ printf("GEOM_MULTIPATH: %s will be (temporarily) %s\n",
+ md.md_uuid, buf);
+ strlcpy(md.md_name, buf, sizeof (md.md_name));
+ } else {
+ strlcpy(md.md_name, sc->sc_name, sizeof (md.md_name));
+ }
+ }
+
+ if (gp == NULL) {
+ gp = g_multipath_create(mp, &md);
+ if (gp == NULL) {
+ printf("GEOM_MULTIPATH: cannot create geom %s/%s\n",
+ md.md_name, md.md_uuid);
+ return (NULL);
+ }
+ isnew = 1;
+ } else {
+ isnew = 0;
+ }
+
+ sc = gp->softc;
+ KASSERT(sc != NULL, ("sc is NULL"));
+ error = g_multipath_add_disk(gp, pp);
+ if (error != 0) {
+ if (isnew) {
+ g_multipath_destroy(gp);
+ }
+ return (NULL);
+ }
+ return (gp);
+}
+
+static void
+g_multipath_ctl_create(struct gctl_req *req, struct g_class *mp)
+{
+ struct g_geom *gp;
+ struct g_provider *pp0, *pp1;
+ struct g_multipath_metadata md;
+ const char *name, *mpname, *uuid;
+ static const char devpf[6] = "/dev/";
+ int *nargs, error;
+
+ g_topology_assert();
+
+ mpname = gctl_get_asciiparam(req, "arg0");
+ if (mpname == NULL) {
+ gctl_error(req, "No 'arg0' argument");
+ return;
+ }
+
+ nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
+ if (nargs == NULL) {
+ gctl_error(req, "No 'nargs' argument");
+ return;
+ }
+ if (*nargs != 4) {
+ gctl_error(req, "missing device or uuid arguments");
+ return;
+ }
+
+ name = gctl_get_asciiparam(req, "arg1");
+ if (name == NULL) {
+ gctl_error(req, "No 'arg1' argument");
+ return;
+ }
+ if (strncmp(name, devpf, 5) == 0) {
+ name += 5;
+ }
+ pp0 = g_provider_by_name(name);
+ if (pp0 == NULL) {
+ gctl_error(req, "Provider %s is invalid", name);
+ return;
+ }
+
+ name = gctl_get_asciiparam(req, "arg2");
+ if (name == NULL) {
+ gctl_error(req, "No 'arg2' argument");
+ return;
+ }
+ if (strncmp(name, devpf, 5) == 0) {
+ name += 5;
+ }
+ pp1 = g_provider_by_name(name);
+ if (pp1 == NULL) {
+ gctl_error(req, "Provider %s is invalid", name);
+ return;
+ }
+
+ uuid = gctl_get_asciiparam(req, "arg3");
+ if (uuid == NULL) {
+ gctl_error(req, "No uuid argument");
+ return;
+ }
+ if (strlen(uuid) != 36) {
+ gctl_error(req, "Malformed uuid argument");
+ return;
+ }
+
+ /*
+ * Check to make sure parameters from the two providers are the same
+ */
+ if (pp0 == pp1) {
+ gctl_error(req, "providers %s and %s are the same",
+ pp0->name, pp1->name);
+ return;
+ }
+ if (pp0->mediasize != pp1->mediasize) {
+ gctl_error(req, "Provider %s is %jd; Provider %s is %jd",
+ pp0->name, (intmax_t) pp0->mediasize,
+ pp1->name, (intmax_t) pp1->mediasize);
+ return;
+ }
+ if (pp0->sectorsize != pp1->sectorsize) {
+ gctl_error(req, "Provider %s has sectorsize %u; Provider %s "
+ "has sectorsize %u", pp0->name, pp0->sectorsize,
+ pp1->name, pp1->sectorsize);
+ return;
+ }
+
+ /*
+ * cons up enough of a metadata structure to use.
+ */
+ memset(&md, 0, sizeof(md));
+ md.md_size = pp0->mediasize;
+ md.md_sectorsize = pp0->sectorsize;
+ strncpy(md.md_name, mpname, sizeof (md.md_name));
+ strncpy(md.md_uuid, uuid, sizeof (md.md_uuid));
+
+ gp = g_multipath_create(mp, &md);
+ if (gp == NULL) {
+ return;
+ }
+ error = g_multipath_add_disk(gp, pp0);
+ if (error) {
+ g_multipath_destroy(gp);
+ return;
+ }
+ error = g_multipath_add_disk(gp, pp1);
+ if (error) {
+ g_multipath_destroy(gp);
+ return;
+ }
+}
+
+static struct g_geom *
+g_multipath_find_geom(struct g_class *mp, const char *name)
+{
+ struct g_geom *gp;
+
+ LIST_FOREACH(gp, &mp->geom, geom) {
+ if (strcmp(gp->name, name) == 0) {
+ return (gp);
+ }
+ }
+ return (NULL);
+}
+
+static void
+g_multipath_ctl_destroy(struct gctl_req *req, struct g_class *mp)
+{
+ struct g_geom *gp;
+ const char *name;
+ int error;
+
+ g_topology_assert();
+
+ name = gctl_get_asciiparam(req, "arg0");
+ if (name == NULL) {
+ gctl_error(req, "No 'arg0' argument");
+ return;
+ }
+ gp = g_multipath_find_geom(mp, name);
+ if (gp == NULL) {
+ gctl_error(req, "Device %s is invalid", name);
+ return;
+ }
+ error = g_multipath_destroy(gp);
+ if (error != 0) {
+ gctl_error(req, "failed to destroy %s (err=%d)", name, error);
+ }
+}
+
+static void
+g_multipath_config(struct gctl_req *req, struct g_class *mp, const char *verb)
+{
+ uint32_t *version;
+ g_topology_assert();
+ version = gctl_get_paraml(req, "version", sizeof(*version));
+ if (version == NULL) {
+ gctl_error(req, "No 'version' argument");
+ } else if (*version != G_MULTIPATH_VERSION) {
+ gctl_error(req, "Userland and kernel parts are out of sync");
+ } else if (strcmp(verb, "create") == 0) {
+ g_multipath_ctl_create(req, mp);
+ } else if (strcmp(verb, "destroy") == 0) {
+ g_multipath_ctl_destroy(req, mp);
+ } else {
+ gctl_error(req, "Unknown verb %s", verb);
+ }
+}
+DECLARE_GEOM_CLASS(g_multipath_class, g_multipath);
diff --git a/sys/geom/multipath/g_multipath.h b/sys/geom/multipath/g_multipath.h
new file mode 100644
index 0000000..22d6157
--- /dev/null
+++ b/sys/geom/multipath/g_multipath.h
@@ -0,0 +1,99 @@
+/*-
+ * Copyright (c) 2006-2007 Matthew Jacob <mjacob@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+/*
+ * Based upon work by Pawel Jakub Dawidek <pjd@FreeBSD.org> for all of the
+ * fine geom examples, and by Poul Henning Kamp <phk@FreeBSD.org> for GEOM
+ * itself, all of which is most gratefully acknowledged.
+ */
+
+#ifndef _G_MULTIPATH_H_
+#define _G_MULTIPATH_H_
+
+#define G_MULTIPATH_CLASS_NAME "MULTIPATH"
+#define G_MULTIPATH_VERSION 1
+#define G_MULTIPATH_MAGIC "GEOM::MULTIPATH"
+
+#include <sys/endian.h>
+
+#ifdef _KERNEL
+
+struct g_multipath_softc {
+ struct g_provider * pp;
+ struct g_consumer * cp_active;
+ char sc_name[16];
+ char sc_uuid[40];
+};
+#endif /* _KERNEL */
+
+struct g_multipath_metadata {
+ char md_magic[16]; /* Magic Value */
+ char md_uuid[40]; /* more magic */
+ char md_name[16]; /* a friendly name */
+ uint32_t md_version; /* version */
+ uint32_t md_sectorsize; /* sectorsize of provider */
+ uint64_t md_size; /* absolute size of provider */
+};
+
+static __inline void
+multipath_metadata_encode(const struct g_multipath_metadata *, u_char *);
+
+static __inline void
+multipath_metadata_decode(u_char *, struct g_multipath_metadata *);
+
+static __inline void
+multipath_metadata_encode(const struct g_multipath_metadata *md, u_char *data)
+{
+ bcopy(md->md_magic, data, sizeof(md->md_magic));
+ data += sizeof(md->md_magic);
+ bcopy(md->md_uuid, data, sizeof(md->md_uuid));
+ data += sizeof(md->md_uuid);
+ bcopy(md->md_name, data, sizeof(md->md_name));
+ data += sizeof(md->md_name);
+ le32enc(data, md->md_version);
+ data += sizeof(md->md_version);
+ le32enc(data, md->md_sectorsize);
+ data += sizeof(md->md_sectorsize);
+ le64enc(data, md->md_size);
+}
+
+static __inline void
+multipath_metadata_decode(u_char *data, struct g_multipath_metadata *md)
+{
+ bcopy(data, md->md_magic, sizeof(md->md_magic));
+ data += sizeof(md->md_magic);
+ bcopy(data, md->md_uuid, sizeof(md->md_uuid));
+ data += sizeof(md->md_uuid);
+ bcopy(data, md->md_name, sizeof(md->md_name));
+ data += sizeof(md->md_name);
+ md->md_version = le32dec(data);
+ data += sizeof(md->md_version);
+ md->md_sectorsize = le32dec(data);
+ data += sizeof(md->md_sectorsize);
+ md->md_size = le64dec(data);
+}
+#endif /* _G_MULTIPATH_H_ */
diff --git a/sys/modules/geom/Makefile b/sys/modules/geom/Makefile
index 011ff4c..26e2209 100644
--- a/sys/modules/geom/Makefile
+++ b/sys/modules/geom/Makefile
@@ -12,6 +12,7 @@ SUBDIR= geom_bde \
geom_label \
geom_mbr \
geom_mirror \
+ geom_multipath \
geom_nop \
geom_pc98 \
geom_raid3 \
diff --git a/sys/modules/geom/geom_multipath/Makefile b/sys/modules/geom/geom_multipath/Makefile
new file mode 100644
index 0000000..d036fe0
--- /dev/null
+++ b/sys/modules/geom/geom_multipath/Makefile
@@ -0,0 +1,8 @@
+# $FreeBSD$
+
+.PATH: ${.CURDIR}/../../../geom/multipath
+
+KMOD= geom_multipath
+SRCS= g_multipath.c
+
+.include <bsd.kmod.mk>
OpenPOWER on IntegriCloud