summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2010-05-16 11:56:42 +0000
committerpjd <pjd@FreeBSD.org>2010-05-16 11:56:42 +0000
commit4f5465212474337736d67366f6feaefd8199ca17 (patch)
tree8be7be1135510dcf415136018b2846675ba753b9
parent9100ea8887f05a87a102fb3749975a39b5117f2f (diff)
downloadFreeBSD-src-4f5465212474337736d67366f6feaefd8199ca17.zip
FreeBSD-src-4f5465212474337736d67366f6feaefd8199ca17.tar.gz
The whole point of having dedicated worker thread for each leaf VDEV was to
avoid calling zio_interrupt() from geom_up thread context. It turns out that when provider is forcibly removed from the system and we kill worker thread there can still be some ZIOs pending. To complete pending ZIOs when there is no worker thread anymore we still have to call zio_interrupt() from geom_up context. To avoid this race just remove use of worker threads altogether. This should be more or less fine, because I also thought that zio_interrupt() does more work, but it only makes small UMA allocation with M_WAITOK. It also saves one context switch per I/O request. PR: kern/145339 Reported by: Alex Bakhtin <Alex.Bakhtin@gmail.com> MFC after: 1 week
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c127
1 files changed, 22 insertions, 105 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
index 3bd245e..b831a9c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
@@ -47,31 +47,6 @@ struct g_class zfs_vdev_class = {
DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
-typedef struct vdev_geom_ctx {
- struct g_consumer *gc_consumer;
- int gc_state;
- struct bio_queue_head gc_queue;
- struct mtx gc_queue_mtx;
-} vdev_geom_ctx_t;
-
-static void
-vdev_geom_release(vdev_t *vd)
-{
- vdev_geom_ctx_t *ctx;
-
- ctx = vd->vdev_tsd;
- vd->vdev_tsd = NULL;
-
- mtx_lock(&ctx->gc_queue_mtx);
- ctx->gc_state = 1;
- wakeup_one(&ctx->gc_queue);
- while (ctx->gc_state != 2)
- msleep(&ctx->gc_state, &ctx->gc_queue_mtx, 0, "vgeom:w", 0);
- mtx_unlock(&ctx->gc_queue_mtx);
- mtx_destroy(&ctx->gc_queue_mtx);
- kmem_free(ctx, sizeof(*ctx));
-}
-
static void
vdev_geom_orphan(struct g_consumer *cp)
{
@@ -96,8 +71,7 @@ vdev_geom_orphan(struct g_consumer *cp)
ZFS_LOG(1, "Destroyed geom %s.", gp->name);
g_wither_geom(gp, error);
}
- vdev_geom_release(vd);
-
+ vd->vdev_tsd = NULL;
vd->vdev_remove_wanted = B_TRUE;
spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
}
@@ -188,52 +162,6 @@ vdev_geom_detach(void *arg, int flag __unused)
}
}
-static void
-vdev_geom_worker(void *arg)
-{
- vdev_geom_ctx_t *ctx;
- zio_t *zio;
- struct bio *bp;
-
- thread_lock(curthread);
- sched_prio(curthread, PRIBIO);
- thread_unlock(curthread);
-
- ctx = arg;
- for (;;) {
- mtx_lock(&ctx->gc_queue_mtx);
- bp = bioq_takefirst(&ctx->gc_queue);
- if (bp == NULL) {
- if (ctx->gc_state == 1) {
- ctx->gc_state = 2;
- wakeup_one(&ctx->gc_state);
- mtx_unlock(&ctx->gc_queue_mtx);
- kthread_exit();
- }
- msleep(&ctx->gc_queue, &ctx->gc_queue_mtx,
- PRIBIO | PDROP, "vgeom:io", 0);
- continue;
- }
- mtx_unlock(&ctx->gc_queue_mtx);
- zio = bp->bio_caller1;
- zio->io_error = bp->bio_error;
- if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) {
- vdev_t *vd;
-
- /*
- * If we get ENOTSUP, we know that no future
- * attempts will ever succeed. In this case we
- * set a persistent bit so that we don't bother
- * with the ioctl in the future.
- */
- vd = zio->io_vd;
- vd->vdev_nowritecache = B_TRUE;
- }
- g_destroy_bio(bp);
- zio_interrupt(zio);
- }
-}
-
static uint64_t
nvlist_get_guid(nvlist_t *list)
{
@@ -488,7 +416,6 @@ vdev_geom_open_by_path(vdev_t *vd, int check_guid)
static int
vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
{
- vdev_geom_ctx_t *ctx;
struct g_provider *pp;
struct g_consumer *cp;
int error, owned;
@@ -557,19 +484,9 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
}
cp->private = vd;
-
- ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP);
- bioq_init(&ctx->gc_queue);
- mtx_init(&ctx->gc_queue_mtx, "zfs:vdev:geom:queue", NULL, MTX_DEF);
- ctx->gc_consumer = cp;
- ctx->gc_state = 0;
-
- vd->vdev_tsd = ctx;
+ vd->vdev_tsd = cp;
pp = cp->provider;
- kproc_kthread_add(vdev_geom_worker, ctx, &zfsproc, NULL, 0, 0,
- "zfskern", "vdev %s", pp->name);
-
/*
* Determine the actual size of the device.
*/
@@ -592,50 +509,49 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
static void
vdev_geom_close(vdev_t *vd)
{
- vdev_geom_ctx_t *ctx;
struct g_consumer *cp;
- if ((ctx = vd->vdev_tsd) == NULL)
- return;
- if ((cp = ctx->gc_consumer) == NULL)
+ cp = vd->vdev_tsd;
+ if (cp == NULL)
return;
- vdev_geom_release(vd);
+ vd->vdev_tsd = NULL;
g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL);
}
static void
vdev_geom_io_intr(struct bio *bp)
{
- vdev_geom_ctx_t *ctx;
zio_t *zio;
zio = bp->bio_caller1;
- ctx = zio->io_vd->vdev_tsd;
-
- if ((zio->io_error = bp->bio_error) == 0 && bp->bio_resid != 0)
+ zio->io_error = bp->bio_error;
+ if (zio->io_error == 0 && bp->bio_resid != 0)
zio->io_error = EIO;
-
- mtx_lock(&ctx->gc_queue_mtx);
- bioq_insert_tail(&ctx->gc_queue, bp);
- wakeup_one(&ctx->gc_queue);
- mtx_unlock(&ctx->gc_queue_mtx);
+ if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) {
+ vdev_t *vd;
+
+ /*
+ * If we get ENOTSUP, we know that no future
+ * attempts will ever succeed. In this case we
+ * set a persistent bit so that we don't bother
+ * with the ioctl in the future.
+ */
+ vd = zio->io_vd;
+ vd->vdev_nowritecache = B_TRUE;
+ }
+ g_destroy_bio(bp);
+ zio_interrupt(zio);
}
static int
vdev_geom_io_start(zio_t *zio)
{
vdev_t *vd;
- vdev_geom_ctx_t *ctx;
struct g_consumer *cp;
struct bio *bp;
int error;
- cp = NULL;
-
vd = zio->io_vd;
- ctx = vd->vdev_tsd;
- if (ctx != NULL)
- cp = ctx->gc_consumer;
if (zio->io_type == ZIO_TYPE_IOCTL) {
/* XXPOLICY */
@@ -664,6 +580,7 @@ vdev_geom_io_start(zio_t *zio)
return (ZIO_PIPELINE_CONTINUE);
}
sendreq:
+ cp = vd->vdev_tsd;
if (cp == NULL) {
zio->io_error = ENXIO;
return (ZIO_PIPELINE_CONTINUE);
OpenPOWER on IntegriCloud