diff options
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c')
-rw-r--r-- | sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c | 171 |
1 files changed, 103 insertions, 68 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c index fa42871..4d4b63c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c @@ -47,31 +47,39 @@ struct g_class zfs_vdev_class = { DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); +/* + * Don't send BIO_FLUSH. + */ +static int vdev_geom_bio_flush_disable = 0; +TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable); +SYSCTL_DECL(_vfs_zfs_vdev); +SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW, + &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); + static void vdev_geom_orphan(struct g_consumer *cp) { - struct g_geom *gp; vdev_t *vd; - int error; g_topology_assert(); vd = cp->private; - gp = cp->geom; - error = cp->provider->error; - ZFS_LOG(1, "Closing access to %s.", cp->provider->name); - if (cp->acr + cp->acw + cp->ace > 0) - g_access(cp, -cp->acr, -cp->acw, -cp->ace); - ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name); - g_detach(cp); - g_destroy_consumer(cp); - /* Destroy geom if there are no consumers left. */ - if (LIST_EMPTY(&gp->consumer)) { - ZFS_LOG(1, "Destroyed geom %s.", gp->name); - g_wither_geom(gp, error); - } - vd->vdev_tsd = NULL; + /* + * Orphan callbacks occur from the GEOM event thread. + * Concurrent with this call, new I/O requests may be + * working their way through GEOM about to find out + * (only once executed by the g_down thread) that we've + * been orphaned from our disk provider. These I/Os + * must be retired before we can detach our consumer. + * This is most easily achieved by acquiring the + * SPA ZIO configuration lock as a writer, but doing + * so with the GEOM topology lock held would cause + * a lock order reversal. Instead, rely on the SPA's + * async removal support to invoke a close on this + * vdev once it is safe to do so. + */ + zfs_post_remove(vd->vdev_spa, vd); vd->vdev_remove_wanted = B_TRUE; spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); } @@ -223,16 +231,12 @@ vdev_geom_read_guid(struct g_consumer *cp) uint64_t psize; off_t offset, size; uint64_t guid; - int error, l, len, iszvol; + int error, l, len; g_topology_assert_not(); pp = cp->provider; ZFS_LOG(1, "Reading guid from %s...", pp->name); - if (g_getattr("ZFS::iszvol", cp, &iszvol) == 0 && iszvol) { - ZFS_LOG(1, "Skipping ZVOL-based provider %s.", pp->name); - return (0); - } psize = pp->mediasize; psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t)); @@ -270,11 +274,6 @@ vdev_geom_read_guid(struct g_consumer *cp) return (guid); } -struct vdev_geom_find { - uint64_t guid; - struct g_consumer *cp; -}; - static void vdev_geom_taste_orphan(struct g_consumer *cp) { @@ -283,25 +282,23 @@ vdev_geom_taste_orphan(struct g_consumer *cp) cp->provider->name)); } -static void -vdev_geom_attach_by_guid_event(void *arg, int flags __unused) +static struct g_consumer * +vdev_geom_attach_by_guid(uint64_t guid) { - struct vdev_geom_find *ap; struct g_class *mp; struct g_geom *gp, *zgp; struct g_provider *pp; - struct g_consumer *zcp; - uint64_t guid; + struct g_consumer *cp, *zcp; + uint64_t pguid; g_topology_assert(); - ap = arg; - zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste"); /* This orphan function should be never called. */ zgp->orphan = vdev_geom_taste_orphan; zcp = g_new_consumer(zgp); + cp = NULL; LIST_FOREACH(mp, &g_classes, class) { if (mp == &zfs_vdev_class) continue; @@ -317,39 +314,29 @@ vdev_geom_attach_by_guid_event(void *arg, int flags __unused) continue; } g_topology_unlock(); - guid = vdev_geom_read_guid(zcp); + pguid = vdev_geom_read_guid(zcp); g_topology_lock(); g_access(zcp, -1, 0, 0); g_detach(zcp); - if (guid != ap->guid) + if (pguid != guid) continue; - ap->cp = vdev_geom_attach(pp); - if (ap->cp == NULL) { + cp = vdev_geom_attach(pp); + if (cp == NULL) { printf("ZFS WARNING: Unable to attach to %s.\n", pp->name); continue; } - goto end; + break; } + if (cp != NULL) + break; } + if (cp != NULL) + break; } - ap->cp = NULL; end: g_destroy_consumer(zcp); g_destroy_geom(zgp); -} - -static struct g_consumer * -vdev_geom_attach_by_guid(uint64_t guid) -{ - struct vdev_geom_find *ap; - struct g_consumer *cp; - - ap = kmem_zalloc(sizeof(*ap), KM_SLEEP); - ap->guid = guid; - g_waitfor_event(vdev_geom_attach_by_guid_event, ap, M_WAITOK, NULL); - cp = ap->cp; - kmem_free(ap, sizeof(*ap)); return (cp); } @@ -360,6 +347,8 @@ vdev_geom_open_by_guid(vdev_t *vd) char *buf; size_t len; + g_topology_assert(); + ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid); cp = vdev_geom_attach_by_guid(vd->vdev_guid); if (cp != NULL) { @@ -387,8 +376,9 @@ vdev_geom_open_by_path(vdev_t *vd, int check_guid) struct g_consumer *cp; uint64_t guid; + g_topology_assert(); + cp = NULL; - g_topology_lock(); pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); if (pp != NULL) { ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); @@ -410,7 +400,6 @@ vdev_geom_open_by_path(vdev_t *vd, int check_guid) } } } - g_topology_unlock(); return (cp); } @@ -420,7 +409,8 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) { struct g_provider *pp; struct g_consumer *cp; - int error, owned; + size_t bufsize; + int error, lock; /* * We must have a pathname, and it must be absolute. @@ -432,15 +422,22 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) vd->vdev_tsd = NULL; - if ((owned = mtx_owned(&Giant))) - mtx_unlock(&Giant); + if (mutex_owned(&spa_namespace_lock)) { + mutex_exit(&spa_namespace_lock); + lock = 1; + } else { + lock = 0; + } + DROP_GIANT(); + g_topology_lock(); error = 0; /* - * If we're creating pool, just find GEOM provider by its name - * and ignore GUID mismatches. + * If we're creating or splitting a pool, just find the GEOM provider + * by its name and ignore GUID mismatches. */ - if (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE) + if (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || + vd->vdev_spa->spa_splitting_newspa == B_TRUE) cp = vdev_geom_open_by_path(vd, 0); else { cp = vdev_geom_open_by_path(vd, 1); @@ -472,7 +469,6 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) { int i; - g_topology_lock(); for (i = 0; i < 5; i++) { error = g_access(cp, 0, 1, 0); if (error == 0) @@ -487,10 +483,11 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) vdev_geom_detach(cp, 0); cp = NULL; } - g_topology_unlock(); } - if (owned) - mtx_lock(&Giant); + g_topology_unlock(); + PICKUP_GIANT(); + if (lock) + mutex_enter(&spa_namespace_lock); if (cp == NULL) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); @@ -516,6 +513,12 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) */ vd->vdev_nowritecache = B_FALSE; + if (vd->vdev_physpath != NULL) + spa_strfree(vd->vdev_physpath); + bufsize = sizeof("/dev/") + strlen(pp->name); + vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP); + snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name); + return (0); } @@ -528,30 +531,50 @@ vdev_geom_close(vdev_t *vd) if (cp == NULL) return; vd->vdev_tsd = NULL; + vd->vdev_delayed_close = B_FALSE; g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL); } static void vdev_geom_io_intr(struct bio *bp) { + vdev_t *vd; zio_t *zio; zio = bp->bio_caller1; + vd = zio->io_vd; zio->io_error = bp->bio_error; if (zio->io_error == 0 && bp->bio_resid != 0) zio->io_error = EIO; if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) { - vdev_t *vd; - /* * If we get ENOTSUP, we know that no future * attempts will ever succeed. In this case we * set a persistent bit so that we don't bother * with the ioctl in the future. */ - vd = zio->io_vd; vd->vdev_nowritecache = B_TRUE; } + if (zio->io_error == EIO && !vd->vdev_remove_wanted) { + /* + * If provider's error is set we assume it is being + * removed. + */ + if (bp->bio_to->error != 0) { + /* + * We post the resource as soon as possible, instead of + * when the async removal actually happens, because the + * DE is using this information to discard previous I/O + * errors. + */ + /* XXX: zfs_post_remove() can sleep. */ + zfs_post_remove(zio->io_spa, vd); + vd->vdev_remove_wanted = B_TRUE; + spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); + } else if (!vd->vdev_delayed_close) { + vd->vdev_delayed_close = B_TRUE; + } + } g_destroy_bio(bp); zio_interrupt(zio); } @@ -577,7 +600,7 @@ vdev_geom_io_start(zio_t *zio) case DKIOCFLUSHWRITECACHE: - if (zfs_nocacheflush) + if (zfs_nocacheflush || vdev_geom_bio_flush_disable) break; if (vd->vdev_nowritecache) { @@ -628,6 +651,16 @@ vdev_geom_io_done(zio_t *zio) { } +static void +vdev_geom_hold(vdev_t *vd) +{ +} + +static void +vdev_geom_rele(vdev_t *vd) +{ +} + vdev_ops_t vdev_geom_ops = { vdev_geom_open, vdev_geom_close, @@ -635,6 +668,8 @@ vdev_ops_t vdev_geom_ops = { vdev_geom_io_start, vdev_geom_io_done, NULL, + vdev_geom_hold, + vdev_geom_rele, VDEV_TYPE_DISK, /* name of this vdev type */ B_TRUE /* leaf vdev */ }; |