summaryrefslogtreecommitdiffstats
path: root/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c')
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c583
1 files changed, 583 insertions, 0 deletions
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
new file mode 100644
index 0000000..7c87963
--- /dev/null
+++ b/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
@@ -0,0 +1,583 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/bio.h>
+#include <sys/disk.h>
+#include <sys/spa.h>
+#include <sys/vdev_impl.h>
+#include <sys/fs/zfs.h>
+#include <sys/zio.h>
+#include <geom/geom.h>
+#include <geom/geom_int.h>
+
+/*
+ * Virtual device vector for GEOM.
+ */
+
+struct g_class zfs_vdev_class = {
+ .name = "ZFS::VDEV",
+ .version = G_VERSION,
+};
+
+DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
+
+typedef struct vdev_geom_ctx {
+ struct g_consumer *gc_consumer;
+ int gc_state;
+ struct bio_queue_head gc_queue;
+ struct mtx gc_queue_mtx;
+} vdev_geom_ctx_t;
+
+static void
+vdev_geom_release(vdev_t *vd)
+{
+ vdev_geom_ctx_t *ctx;
+
+ ctx = vd->vdev_tsd;
+ vd->vdev_tsd = NULL;
+
+ mtx_lock(&ctx->gc_queue_mtx);
+ ctx->gc_state = 1;
+ wakeup_one(&ctx->gc_queue);
+ while (ctx->gc_state != 2)
+ msleep(&ctx->gc_state, &ctx->gc_queue_mtx, 0, "vgeom:w", 0);
+ mtx_unlock(&ctx->gc_queue_mtx);
+ mtx_destroy(&ctx->gc_queue_mtx);
+ kmem_free(ctx, sizeof(*ctx));
+}
+
+static void
+vdev_geom_orphan(struct g_consumer *cp)
+{
+ struct g_geom *gp;
+ vdev_t *vd;
+ int error;
+
+ g_topology_assert();
+
+ vd = cp->private;
+ gp = cp->geom;
+ error = cp->provider->error;
+
+ ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
+ if (cp->acr + cp->acw + cp->ace > 0)
+ g_access(cp, -cp->acr, -cp->acw, -cp->ace);
+ ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ /* Destroy geom if there are no consumers left. */
+ if (LIST_EMPTY(&gp->consumer)) {
+ ZFS_LOG(1, "Destroyed geom %s.", gp->name);
+ g_wither_geom(gp, error);
+ }
+ vdev_geom_release(vd);
+ /* Both methods below work, but in a bit different way. */
+#if 0
+ vd->vdev_reopen_wanted = 1;
+#else
+ vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
+ vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, vd->vdev_stat.vs_aux);
+#endif
+}
+
+static struct g_consumer *
+vdev_geom_attach(struct g_provider *pp, int write)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+
+ g_topology_assert();
+
+ ZFS_LOG(1, "Attaching to %s.", pp->name);
+ /* Do we have geom already? No? Create one. */
+ LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
+ if (gp->flags & G_GEOM_WITHER)
+ continue;
+ if (strcmp(gp->name, "zfs::vdev") != 0)
+ continue;
+ break;
+ }
+ if (gp == NULL) {
+ gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
+ gp->orphan = vdev_geom_orphan;
+ cp = g_new_consumer(gp);
+ if (g_attach(cp, pp) != 0) {
+ g_wither_geom(gp, ENXIO);
+ return (NULL);
+ }
+ if (g_access(cp, 1, write, 1) != 0) {
+ g_wither_geom(gp, ENXIO);
+ return (NULL);
+ }
+ ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
+ } else {
+ /* Check if we are already connected to this provider. */
+ LIST_FOREACH(cp, &gp->consumer, consumer) {
+ if (cp->provider == pp) {
+ ZFS_LOG(1, "Found consumer for %s.", pp->name);
+ break;
+ }
+ }
+ if (cp == NULL) {
+ cp = g_new_consumer(gp);
+ if (g_attach(cp, pp) != 0) {
+ g_destroy_consumer(cp);
+ return (NULL);
+ }
+ if (g_access(cp, 1, write, 1) != 0) {
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ return (NULL);
+ }
+ ZFS_LOG(1, "Created consumer for %s.", pp->name);
+ } else {
+ if (g_access(cp, 1, cp->acw > 0 ? 0 : write, 1) != 0)
+ return (NULL);
+ ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
+ }
+ }
+ return (cp);
+}
+
+static void
+vdev_geom_detach(void *arg, int flag __unused)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+
+ g_topology_assert();
+ cp = arg;
+ gp = cp->geom;
+
+ ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
+ g_access(cp, -1, 0, -1);
+ /* Destroy consumer on last close. */
+ if (cp->acr == 0 && cp->ace == 0) {
+ ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
+ if (cp->acw > 0)
+ g_access(cp, 0, -cp->acw, 0);
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ }
+ /* Destroy geom if there are no consumers left. */
+ if (LIST_EMPTY(&gp->consumer)) {
+ ZFS_LOG(1, "Destroyed geom %s.", gp->name);
+ g_wither_geom(gp, ENXIO);
+ }
+}
+
+static void
+vdev_geom_worker(void *arg)
+{
+ vdev_geom_ctx_t *ctx;
+ zio_t *zio;
+ struct bio *bp;
+
+ ctx = arg;
+ for (;;) {
+ mtx_lock(&ctx->gc_queue_mtx);
+ bp = bioq_takefirst(&ctx->gc_queue);
+ if (bp == NULL) {
+ if (ctx->gc_state == 1) {
+ ctx->gc_state = 2;
+ wakeup_one(&ctx->gc_state);
+ mtx_unlock(&ctx->gc_queue_mtx);
+ kthread_exit(0);
+ }
+ msleep(&ctx->gc_queue, &ctx->gc_queue_mtx,
+ PRIBIO | PDROP, "vgeom:io", 0);
+ continue;
+ }
+ mtx_unlock(&ctx->gc_queue_mtx);
+ zio = bp->bio_caller1;
+ zio->io_error = bp->bio_error;
+ if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) {
+ vdev_t *vd;
+
+ /*
+ * If we get ENOTSUP, we know that no future
+ * attempts will ever succeed. In this case we
+ * set a persistent bit so that we don't bother
+ * with the ioctl in the future.
+ */
+ vd = zio->io_vd;
+ vd->vdev_nowritecache = B_TRUE;
+ }
+ g_destroy_bio(bp);
+ zio_next_stage_async(zio);
+ }
+}
+
+static char *
+vdev_geom_get_id(struct g_consumer *cp)
+{
+ char *id;
+ int len;
+
+ g_topology_assert_not();
+ len = DISK_IDENT_SIZE;
+ id = kmem_zalloc(len, KM_SLEEP);
+ if (g_io_getattr("GEOM::ident", cp, &len, id) != 0) {
+ kmem_free(id, DISK_IDENT_SIZE);
+ return (NULL);
+ }
+ return (id);
+}
+
+static void
+vdev_geom_free_id(char *id)
+{
+
+ if (id != NULL)
+ kmem_free(id, DISK_IDENT_SIZE);
+}
+
+struct vdev_geom_find {
+ const char *id;
+ int write;
+ struct g_consumer *cp;
+};
+
+static void
+vdev_geom_taste_orphan(struct g_consumer *cp)
+{
+
+ KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
+ cp->provider->name));
+}
+
+static void
+vdev_geom_attach_by_id_event(void *arg, int flags __unused)
+{
+ struct vdev_geom_find *ap;
+ struct g_class *mp;
+ struct g_geom *gp, *zgp;
+ struct g_provider *pp;
+ struct g_consumer *zcp;
+ char *id;
+
+ g_topology_assert();
+
+ ap = arg;
+
+ zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
+ /* This orphan function should be never called. */
+ zgp->orphan = vdev_geom_taste_orphan;
+ zcp = g_new_consumer(zgp);
+
+ LIST_FOREACH(mp, &g_classes, class) {
+ if (mp == &zfs_vdev_class)
+ continue;
+ LIST_FOREACH(gp, &mp->geom, geom) {
+ if (gp->flags & G_GEOM_WITHER)
+ continue;
+ LIST_FOREACH(pp, &gp->provider, provider) {
+ if (pp->flags & G_PF_WITHER)
+ continue;
+ g_attach(zcp, pp);
+ if (g_access(zcp, 1, 0, 0) != 0) {
+ g_detach(zcp);
+ continue;
+ }
+ g_topology_unlock();
+ id = vdev_geom_get_id(zcp);
+ g_topology_lock();
+ g_access(zcp, -1, 0, 0);
+ g_detach(zcp);
+ if (id == NULL || strcmp(id, ap->id) != 0) {
+ vdev_geom_free_id(id);
+ continue;
+ }
+ vdev_geom_free_id(id);
+ ap->cp = vdev_geom_attach(pp, ap->write);
+ if (ap->cp == NULL) {
+ printf("ZFS WARNING: Cannot open %s "
+ "for writting.\n", pp->name);
+ continue;
+ }
+ goto end;
+ }
+ }
+ }
+ ap->cp = NULL;
+end:
+ g_destroy_consumer(zcp);
+ g_destroy_geom(zgp);
+}
+
+static struct g_consumer *
+vdev_geom_attach_by_id(const char *id, int write)
+{
+ struct vdev_geom_find *ap;
+ struct g_consumer *cp;
+
+ ap = kmem_zalloc(sizeof(*ap), KM_SLEEP);
+ ap->id = id;
+ ap->write = write;
+ g_waitfor_event(vdev_geom_attach_by_id_event, ap, M_WAITOK, NULL);
+ cp = ap->cp;
+ kmem_free(ap, sizeof(*ap));
+ return (cp);
+}
+
+static int
+vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
+{
+ vdev_geom_ctx_t *ctx;
+ struct g_provider *pp;
+ struct g_consumer *cp;
+ char *id = NULL;
+ int owned;
+
+ /*
+ * We must have a pathname, and it must be absolute.
+ */
+ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
+ vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
+ return (EINVAL);
+ }
+
+ if ((owned = mtx_owned(&Giant)))
+ mtx_unlock(&Giant);
+ cp = NULL;
+ g_topology_lock();
+ pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
+ if (pp != NULL) {
+ ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
+ cp = vdev_geom_attach(pp, !!(spa_mode & FWRITE));
+ if (cp != NULL && vd->vdev_devid != NULL) {
+ g_topology_unlock();
+ id = vdev_geom_get_id(cp);
+ g_topology_lock();
+ if (id == NULL || strcmp(id, vd->vdev_devid) != 0) {
+ vdev_geom_detach(cp, 0);
+ cp = NULL;
+ ZFS_LOG(1, "ID mismatch for provider %s: "
+ "[%s]!=[%s].", vd->vdev_path,
+ vd->vdev_devid, id);
+ goto next;
+ }
+ ZFS_LOG(1, "ID match for provider %s.", vd->vdev_path);
+ }
+ }
+next:
+ g_topology_unlock();
+ vdev_geom_free_id(id);
+ if (cp == NULL && vd->vdev_devid != NULL) {
+ ZFS_LOG(1, "Searching by ID [%s].", vd->vdev_devid);
+ cp = vdev_geom_attach_by_id(vd->vdev_devid,
+ !!(spa_mode & FWRITE));
+ if (cp != NULL) {
+ size_t len = strlen(cp->provider->name) + 6; /* 6 == strlen("/dev/") + 1 */
+ char *buf = kmem_alloc(len, KM_SLEEP);
+
+ snprintf(buf, len, "/dev/%s", cp->provider->name);
+ spa_strfree(vd->vdev_path);
+ vd->vdev_path = buf;
+
+ ZFS_LOG(1, "Attach by ID [%s] succeeded, provider %s.",
+ vd->vdev_devid, vd->vdev_path);
+ }
+ }
+ if (owned)
+ mtx_lock(&Giant);
+ if (cp == NULL) {
+ ZFS_LOG(1, "Provider %s (id=[%s]) not found.", vd->vdev_path,
+ vd->vdev_devid);
+ vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
+ return (EACCES);
+ }
+ pp = cp->provider;
+
+ /*
+ * Determine the actual size of the device.
+ */
+ *psize = pp->mediasize;
+
+ /*
+ * Determine the device's minimum transfer size.
+ */
+ *ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
+
+ /*
+ * Clear the nowritecache bit, so that on a vdev_reopen() we will
+ * try again.
+ */
+ vd->vdev_nowritecache = B_FALSE;
+
+ cp->private = vd;
+
+ ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP);
+ bioq_init(&ctx->gc_queue);
+ mtx_init(&ctx->gc_queue_mtx, "zfs:vdev:geom:queue", NULL, MTX_DEF);
+ ctx->gc_consumer = cp;
+ ctx->gc_state = 0;
+
+ vd->vdev_tsd = ctx;
+
+ kthread_create(vdev_geom_worker, ctx, NULL, 0, 0, "vdev:worker %s",
+ pp->name);
+
+ return (0);
+}
+
+static void
+vdev_geom_close(vdev_t *vd)
+{
+ vdev_geom_ctx_t *ctx;
+ struct g_consumer *cp;
+
+ if ((ctx = vd->vdev_tsd) == NULL)
+ return;
+ if ((cp = ctx->gc_consumer) == NULL)
+ return;
+ vdev_geom_release(vd);
+ g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL);
+}
+
+static void
+vdev_geom_io_intr(struct bio *bp)
+{
+ vdev_geom_ctx_t *ctx;
+ zio_t *zio;
+
+ zio = bp->bio_caller1;
+ ctx = zio->io_vd->vdev_tsd;
+
+ mtx_lock(&ctx->gc_queue_mtx);
+ bioq_insert_tail(&ctx->gc_queue, bp);
+ wakeup_one(&ctx->gc_queue);
+ mtx_unlock(&ctx->gc_queue_mtx);
+}
+
+static void
+vdev_geom_io_start(zio_t *zio)
+{
+ vdev_t *vd;
+ vdev_geom_ctx_t *ctx;
+ struct g_consumer *cp;
+ struct bio *bp;
+ int error;
+
+ cp = NULL;
+
+ vd = zio->io_vd;
+ ctx = vd->vdev_tsd;
+ if (ctx != NULL)
+ cp = ctx->gc_consumer;
+
+ if (zio->io_type == ZIO_TYPE_IOCTL) {
+ zio_vdev_io_bypass(zio);
+
+ /* XXPOLICY */
+ if (vdev_is_dead(vd)) {
+ zio->io_error = ENXIO;
+ zio_next_stage_async(zio);
+ return;
+ }
+
+ switch (zio->io_cmd) {
+
+ case DKIOCFLUSHWRITECACHE:
+ if (vd->vdev_nowritecache) {
+ zio->io_error = ENOTSUP;
+ break;
+ }
+
+ goto sendreq;
+ default:
+ zio->io_error = ENOTSUP;
+ }
+
+ zio_next_stage_async(zio);
+ return;
+ }
+
+ if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
+ return;
+
+ if ((zio = vdev_queue_io(zio)) == NULL)
+ return;
+
+sendreq:
+
+ error = vdev_is_dead(vd) ? ENXIO : vdev_error_inject(vd, zio);
+ if (error == 0 && cp == NULL)
+ error = ENXIO;
+ if (error) {
+ zio->io_error = error;
+ zio_next_stage_async(zio);
+ return;
+ }
+
+ bp = g_alloc_bio();
+ bp->bio_caller1 = zio;
+ switch (zio->io_type) {
+ case ZIO_TYPE_READ:
+ case ZIO_TYPE_WRITE:
+ bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
+ bp->bio_data = zio->io_data;
+ bp->bio_offset = zio->io_offset;
+ bp->bio_length = zio->io_size;
+ break;
+ case ZIO_TYPE_IOCTL:
+ bp->bio_cmd = BIO_FLUSH;
+ bp->bio_data = NULL;
+ bp->bio_offset = cp->provider->mediasize;
+ bp->bio_length = 0;
+ break;
+ }
+ bp->bio_done = vdev_geom_io_intr;
+
+ g_io_request(bp, cp);
+}
+
+static void
+vdev_geom_io_done(zio_t *zio)
+{
+ vdev_queue_io_done(zio);
+
+ if (zio->io_type == ZIO_TYPE_WRITE)
+ vdev_cache_write(zio);
+
+ if (zio_injection_enabled && zio->io_error == 0)
+ zio->io_error = zio_handle_device_injection(zio->io_vd, EIO);
+
+ zio_next_stage(zio);
+}
+
+vdev_ops_t vdev_geom_ops = {
+ vdev_geom_open,
+ vdev_geom_close,
+ vdev_default_asize,
+ vdev_geom_io_start,
+ vdev_geom_io_done,
+ NULL,
+ VDEV_TYPE_DISK, /* name of this vdev type */
+ B_TRUE /* leaf vdev */
+};
OpenPOWER on IntegriCloud