summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c171
1 files changed, 103 insertions, 68 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
index fa42871..4d4b63c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
@@ -47,31 +47,39 @@ struct g_class zfs_vdev_class = {
DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
+/*
+ * Don't send BIO_FLUSH.
+ */
+static int vdev_geom_bio_flush_disable = 0;
+TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
+SYSCTL_DECL(_vfs_zfs_vdev);
+SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
+ &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
+
static void
vdev_geom_orphan(struct g_consumer *cp)
{
- struct g_geom *gp;
vdev_t *vd;
- int error;
g_topology_assert();
vd = cp->private;
- gp = cp->geom;
- error = cp->provider->error;
- ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
- if (cp->acr + cp->acw + cp->ace > 0)
- g_access(cp, -cp->acr, -cp->acw, -cp->ace);
- ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
- g_detach(cp);
- g_destroy_consumer(cp);
- /* Destroy geom if there are no consumers left. */
- if (LIST_EMPTY(&gp->consumer)) {
- ZFS_LOG(1, "Destroyed geom %s.", gp->name);
- g_wither_geom(gp, error);
- }
- vd->vdev_tsd = NULL;
+ /*
+ * Orphan callbacks occur from the GEOM event thread.
+ * Concurrent with this call, new I/O requests may be
+ * working their way through GEOM about to find out
+ * (only once executed by the g_down thread) that we've
+ * been orphaned from our disk provider. These I/Os
+ * must be retired before we can detach our consumer.
+ * This is most easily achieved by acquiring the
+ * SPA ZIO configuration lock as a writer, but doing
+ * so with the GEOM topology lock held would cause
+ * a lock order reversal. Instead, rely on the SPA's
+ * async removal support to invoke a close on this
+ * vdev once it is safe to do so.
+ */
+ zfs_post_remove(vd->vdev_spa, vd);
vd->vdev_remove_wanted = B_TRUE;
spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
}
@@ -223,16 +231,12 @@ vdev_geom_read_guid(struct g_consumer *cp)
uint64_t psize;
off_t offset, size;
uint64_t guid;
- int error, l, len, iszvol;
+ int error, l, len;
g_topology_assert_not();
pp = cp->provider;
ZFS_LOG(1, "Reading guid from %s...", pp->name);
- if (g_getattr("ZFS::iszvol", cp, &iszvol) == 0 && iszvol) {
- ZFS_LOG(1, "Skipping ZVOL-based provider %s.", pp->name);
- return (0);
- }
psize = pp->mediasize;
psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
@@ -270,11 +274,6 @@ vdev_geom_read_guid(struct g_consumer *cp)
return (guid);
}
-struct vdev_geom_find {
- uint64_t guid;
- struct g_consumer *cp;
-};
-
static void
vdev_geom_taste_orphan(struct g_consumer *cp)
{
@@ -283,25 +282,23 @@ vdev_geom_taste_orphan(struct g_consumer *cp)
cp->provider->name));
}
-static void
-vdev_geom_attach_by_guid_event(void *arg, int flags __unused)
+static struct g_consumer *
+vdev_geom_attach_by_guid(uint64_t guid)
{
- struct vdev_geom_find *ap;
struct g_class *mp;
struct g_geom *gp, *zgp;
struct g_provider *pp;
- struct g_consumer *zcp;
- uint64_t guid;
+ struct g_consumer *cp, *zcp;
+ uint64_t pguid;
g_topology_assert();
- ap = arg;
-
zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
/* This orphan function should be never called. */
zgp->orphan = vdev_geom_taste_orphan;
zcp = g_new_consumer(zgp);
+ cp = NULL;
LIST_FOREACH(mp, &g_classes, class) {
if (mp == &zfs_vdev_class)
continue;
@@ -317,39 +314,29 @@ vdev_geom_attach_by_guid_event(void *arg, int flags __unused)
continue;
}
g_topology_unlock();
- guid = vdev_geom_read_guid(zcp);
+ pguid = vdev_geom_read_guid(zcp);
g_topology_lock();
g_access(zcp, -1, 0, 0);
g_detach(zcp);
- if (guid != ap->guid)
+ if (pguid != guid)
continue;
- ap->cp = vdev_geom_attach(pp);
- if (ap->cp == NULL) {
+ cp = vdev_geom_attach(pp);
+ if (cp == NULL) {
printf("ZFS WARNING: Unable to attach to %s.\n",
pp->name);
continue;
}
- goto end;
+ break;
}
+ if (cp != NULL)
+ break;
}
+ if (cp != NULL)
+ break;
}
- ap->cp = NULL;
end:
g_destroy_consumer(zcp);
g_destroy_geom(zgp);
-}
-
-static struct g_consumer *
-vdev_geom_attach_by_guid(uint64_t guid)
-{
- struct vdev_geom_find *ap;
- struct g_consumer *cp;
-
- ap = kmem_zalloc(sizeof(*ap), KM_SLEEP);
- ap->guid = guid;
- g_waitfor_event(vdev_geom_attach_by_guid_event, ap, M_WAITOK, NULL);
- cp = ap->cp;
- kmem_free(ap, sizeof(*ap));
return (cp);
}
@@ -360,6 +347,8 @@ vdev_geom_open_by_guid(vdev_t *vd)
char *buf;
size_t len;
+ g_topology_assert();
+
ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid);
cp = vdev_geom_attach_by_guid(vd->vdev_guid);
if (cp != NULL) {
@@ -387,8 +376,9 @@ vdev_geom_open_by_path(vdev_t *vd, int check_guid)
struct g_consumer *cp;
uint64_t guid;
+ g_topology_assert();
+
cp = NULL;
- g_topology_lock();
pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
if (pp != NULL) {
ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
@@ -410,7 +400,6 @@ vdev_geom_open_by_path(vdev_t *vd, int check_guid)
}
}
}
- g_topology_unlock();
return (cp);
}
@@ -420,7 +409,8 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
{
struct g_provider *pp;
struct g_consumer *cp;
- int error, owned;
+ size_t bufsize;
+ int error, lock;
/*
* We must have a pathname, and it must be absolute.
@@ -432,15 +422,22 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
vd->vdev_tsd = NULL;
- if ((owned = mtx_owned(&Giant)))
- mtx_unlock(&Giant);
+ if (mutex_owned(&spa_namespace_lock)) {
+ mutex_exit(&spa_namespace_lock);
+ lock = 1;
+ } else {
+ lock = 0;
+ }
+ DROP_GIANT();
+ g_topology_lock();
error = 0;
/*
- * If we're creating pool, just find GEOM provider by its name
- * and ignore GUID mismatches.
+ * If we're creating or splitting a pool, just find the GEOM provider
+ * by its name and ignore GUID mismatches.
*/
- if (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE)
+ if (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE ||
+ vd->vdev_spa->spa_splitting_newspa == B_TRUE)
cp = vdev_geom_open_by_path(vd, 0);
else {
cp = vdev_geom_open_by_path(vd, 1);
@@ -472,7 +469,6 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
} else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
int i;
- g_topology_lock();
for (i = 0; i < 5; i++) {
error = g_access(cp, 0, 1, 0);
if (error == 0)
@@ -487,10 +483,11 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
vdev_geom_detach(cp, 0);
cp = NULL;
}
- g_topology_unlock();
}
- if (owned)
- mtx_lock(&Giant);
+ g_topology_unlock();
+ PICKUP_GIANT();
+ if (lock)
+ mutex_enter(&spa_namespace_lock);
if (cp == NULL) {
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
return (error);
@@ -516,6 +513,12 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
*/
vd->vdev_nowritecache = B_FALSE;
+ if (vd->vdev_physpath != NULL)
+ spa_strfree(vd->vdev_physpath);
+ bufsize = sizeof("/dev/") + strlen(pp->name);
+ vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP);
+ snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name);
+
return (0);
}
@@ -528,30 +531,50 @@ vdev_geom_close(vdev_t *vd)
if (cp == NULL)
return;
vd->vdev_tsd = NULL;
+ vd->vdev_delayed_close = B_FALSE;
g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL);
}
static void
vdev_geom_io_intr(struct bio *bp)
{
+ vdev_t *vd;
zio_t *zio;
zio = bp->bio_caller1;
+ vd = zio->io_vd;
zio->io_error = bp->bio_error;
if (zio->io_error == 0 && bp->bio_resid != 0)
zio->io_error = EIO;
if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) {
- vdev_t *vd;
-
/*
* If we get ENOTSUP, we know that no future
* attempts will ever succeed. In this case we
* set a persistent bit so that we don't bother
* with the ioctl in the future.
*/
- vd = zio->io_vd;
vd->vdev_nowritecache = B_TRUE;
}
+ if (zio->io_error == EIO && !vd->vdev_remove_wanted) {
+ /*
+ * If provider's error is set we assume it is being
+ * removed.
+ */
+ if (bp->bio_to->error != 0) {
+ /*
+ * We post the resource as soon as possible, instead of
+ * when the async removal actually happens, because the
+ * DE is using this information to discard previous I/O
+ * errors.
+ */
+ /* XXX: zfs_post_remove() can sleep. */
+ zfs_post_remove(zio->io_spa, vd);
+ vd->vdev_remove_wanted = B_TRUE;
+ spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
+ } else if (!vd->vdev_delayed_close) {
+ vd->vdev_delayed_close = B_TRUE;
+ }
+ }
g_destroy_bio(bp);
zio_interrupt(zio);
}
@@ -577,7 +600,7 @@ vdev_geom_io_start(zio_t *zio)
case DKIOCFLUSHWRITECACHE:
- if (zfs_nocacheflush)
+ if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
break;
if (vd->vdev_nowritecache) {
@@ -628,6 +651,16 @@ vdev_geom_io_done(zio_t *zio)
{
}
+static void
+vdev_geom_hold(vdev_t *vd)
+{
+}
+
+static void
+vdev_geom_rele(vdev_t *vd)
+{
+}
+
vdev_ops_t vdev_geom_ops = {
vdev_geom_open,
vdev_geom_close,
@@ -635,6 +668,8 @@ vdev_ops_t vdev_geom_ops = {
vdev_geom_io_start,
vdev_geom_io_done,
NULL,
+ vdev_geom_hold,
+ vdev_geom_rele,
VDEV_TYPE_DISK, /* name of this vdev type */
B_TRUE /* leaf vdev */
};
OpenPOWER on IntegriCloud