summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsmh <smh@FreeBSD.org>2014-04-30 17:46:29 +0000
committersmh <smh@FreeBSD.org>2014-04-30 17:46:29 +0000
commit14906a620b6524bb6bd3f17265e6c4464f7e31d1 (patch)
tree85e14bcb4f575216f44ab083c3402c7352507c9f
parent6429f5f6361b358fde4f2643463a38637e700f1b (diff)
downloadFreeBSD-src-14906a620b6524bb6bd3f17265e6c4464f7e31d1.zip
FreeBSD-src-14906a620b6524bb6bd3f17265e6c4464f7e31d1.tar.gz
Reintroduce priority for the TRIM ZIOs instead of using the "NOW" priority
The changes how TRIM requests are generated to use ZIO_TYPE_FREE + a priority instead of ZIO_TYPE_IOCTL, until processed by vdev_geom; only then is it translated the required geom values. This reduces the amount of changes required for FREE requests to be supported by the new IO scheduler. This also eliminates the need for a specific DKIOCTRIM. Also fixed FREE vdev child IO's from running ZIO_STAGE_VDEV_IO_DONE as part of their schedule. As the new IO scheduler can result in a request to execute one type of IO to actually run a different type of IO it requires that zio_trim requests are processed without holding the trim map lock (tm->tm_lock), as the free request execute call may result in write request running hence triggering a trim_map_write_start call, which takes the trim map lock and hence would result in recused on no-recursive sx lock. This is based off avg's original work, so credit to him. MFC after: 1 month
-rw-r--r--sys/cddl/compat/opensolaris/sys/dkio.h2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h5
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c15
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c42
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c50
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c79
7 files changed, 124 insertions, 73 deletions
diff --git a/sys/cddl/compat/opensolaris/sys/dkio.h b/sys/cddl/compat/opensolaris/sys/dkio.h
index 50cafab..a365c33 100644
--- a/sys/cddl/compat/opensolaris/sys/dkio.h
+++ b/sys/cddl/compat/opensolaris/sys/dkio.h
@@ -75,8 +75,6 @@ extern "C" {
*/
#define DKIOCFLUSHWRITECACHE (DKIOC|34) /* flush cache to phys medium */
-#define DKIOCTRIM (DKIOC|35) /* TRIM a block */
-
struct dk_callback {
void (*dkc_callback)(void *dkc_cookie, int error);
void *dkc_cookie;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
index 8b4e834..2cce781 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
@@ -135,9 +135,10 @@ typedef enum zio_priority {
ZIO_PRIORITY_ASYNC_READ, /* prefetch */
ZIO_PRIORITY_ASYNC_WRITE, /* spa_sync() */
ZIO_PRIORITY_SCRUB, /* asynchronous scrub/resilver reads */
+ ZIO_PRIORITY_TRIM, /* free requests used for TRIM */
ZIO_PRIORITY_NUM_QUEUEABLE,
- ZIO_PRIORITY_NOW /* non-queued i/os (e.g. free) */
+ ZIO_PRIORITY_NOW /* non-queued I/Os (e.g. ioctl) */
} zio_priority_t;
#define ZIO_PIPELINE_CONTINUE 0x100
@@ -508,7 +509,7 @@ extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg,
extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
uint64_t offset, uint64_t size, zio_done_func_t *done, void *priv,
- enum zio_flag flags);
+ zio_priority_t priority, enum zio_flag flags);
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h
index ad212b7..33b8edb 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h
@@ -215,6 +215,10 @@ enum zio_stage {
ZIO_STAGE_FREE_BP_INIT | \
ZIO_STAGE_DVA_FREE)
+#define ZIO_FREE_PHYS_PIPELINE \
+ (ZIO_INTERLOCK_STAGES | \
+ ZIO_VDEV_IO_STAGES)
+
#define ZIO_DDT_FREE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
ZIO_STAGE_FREE_BP_INIT | \
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c
index fd7394a..08c8418 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c
@@ -449,7 +449,7 @@ trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
{
trim_map_t *tm = vd->vdev_trimmap;
trim_seg_t *ts;
- uint64_t size, txgtarget, txgsafe;
+ uint64_t size, offset, txgtarget, txgsafe;
hrtime_t timelimit;
ASSERT(vd->vdev_ops->vdev_op_leaf);
@@ -477,9 +477,20 @@ trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
avl_remove(&tm->tm_queued_frees, ts);
avl_add(&tm->tm_inflight_frees, ts);
size = ts->ts_end - ts->ts_start;
- zio_nowait(zio_trim(zio, spa, vd, ts->ts_start, size));
+ offset = ts->ts_start;
TRIM_MAP_SDEC(tm, size);
TRIM_MAP_QDEC(tm);
+ /*
+ * We drop the lock while we call zio_nowait as the IO
+ * scheduler can result in a different IO being run e.g.
+ * a write which would result in a recursive lock.
+ */
+ mutex_exit(&tm->tm_lock);
+
+ zio_nowait(zio_trim(zio, spa, vd, offset, size));
+
+ mutex_enter(&tm->tm_lock);
+ ts = trim_map_first(tm, txgtarget, txgsafe, timelimit);
}
mutex_exit(&tm->tm_lock);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
index 1c30941..b64ebdd 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
@@ -800,10 +800,11 @@ vdev_geom_io_start(zio_t *zio)
vd = zio->io_vd;
- if (zio->io_type == ZIO_TYPE_IOCTL) {
+ switch (zio->io_type) {
+ case ZIO_TYPE_IOCTL:
/* XXPOLICY */
if (!vdev_readable(vd)) {
- zio->io_error = ENXIO;
+ zio->io_error = SET_ERROR(ENXIO);
return (ZIO_PIPELINE_CONTINUE);
}
@@ -812,28 +813,28 @@ vdev_geom_io_start(zio_t *zio)
if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
break;
if (vd->vdev_nowritecache) {
- zio->io_error = ENOTSUP;
- break;
- }
- goto sendreq;
- case DKIOCTRIM:
- if (vdev_geom_bio_delete_disable)
- break;
- if (vd->vdev_notrim) {
- zio->io_error = ENOTSUP;
+ zio->io_error = SET_ERROR(ENOTSUP);
break;
}
goto sendreq;
default:
- zio->io_error = ENOTSUP;
+ zio->io_error = SET_ERROR(ENOTSUP);
}
return (ZIO_PIPELINE_CONTINUE);
+ case ZIO_TYPE_FREE:
+ if (vdev_geom_bio_delete_disable)
+ return (ZIO_PIPELINE_CONTINUE);
+
+ if (vd->vdev_notrim) {
+ zio->io_error = SET_ERROR(ENOTSUP);
+ return (ZIO_PIPELINE_CONTINUE);
+ }
}
sendreq:
cp = vd->vdev_tsd;
if (cp == NULL) {
- zio->io_error = ENXIO;
+ zio->io_error = SET_ERROR(ENXIO);
return (ZIO_PIPELINE_CONTINUE);
}
bp = g_alloc_bio();
@@ -846,21 +847,20 @@ sendreq:
bp->bio_offset = zio->io_offset;
bp->bio_length = zio->io_size;
break;
+ case ZIO_TYPE_FREE:
+ bp->bio_cmd = BIO_DELETE;
+ bp->bio_data = NULL;
+ bp->bio_offset = zio->io_offset;
+ bp->bio_length = zio->io_size;
+ break;
case ZIO_TYPE_IOCTL:
- switch (zio->io_cmd) {
- case DKIOCFLUSHWRITECACHE:
+ if (zio->io_cmd == DKIOCFLUSHWRITECACHE) {
bp->bio_cmd = BIO_FLUSH;
bp->bio_flags |= BIO_ORDERED;
bp->bio_data = NULL;
bp->bio_offset = cp->provider->mediasize;
bp->bio_length = 0;
break;
- case DKIOCTRIM:
- bp->bio_cmd = BIO_DELETE;
- bp->bio_data = NULL;
- bp->bio_offset = zio->io_offset;
- bp->bio_length = zio->io_size;
- break;
}
break;
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
index d7d0dee..5747aa4 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
@@ -40,9 +40,9 @@
*
* ZFS issues I/O operations to leaf vdevs to satisfy and complete zios. The
* I/O scheduler determines when and in what order those operations are
- * issued. The I/O scheduler divides operations into five I/O classes
+ * issued. The I/O scheduler divides operations into six I/O classes
* prioritized in the following order: sync read, sync write, async read,
- * async write, and scrub/resilver. Each queue defines the minimum and
+ * async write, scrub/resilver and trim. Each queue defines the minimum and
* maximum number of concurrent operations that may be issued to the device.
* In addition, the device has an aggregate maximum. Note that the sum of the
* per-queue minimums must not exceed the aggregate maximum, and if the
@@ -61,7 +61,7 @@
* done in the order specified above. No further operations are issued if the
* aggregate maximum number of concurrent operations has been hit or if there
* are no operations queued for an I/O class that has not hit its maximum.
- * Every time an i/o is queued or an operation completes, the I/O scheduler
+ * Every time an I/O is queued or an operation completes, the I/O scheduler
* looks for new operations to issue.
*
* All I/O classes have a fixed maximum number of outstanding operations
@@ -70,7 +70,7 @@
* transaction groups (see txg.c). Transaction groups enter the syncing state
* periodically so the number of queued async writes will quickly burst up and
* then bleed down to zero. Rather than servicing them as quickly as possible,
- * the I/O scheduler changes the maximum number of active async write i/os
+ * the I/O scheduler changes the maximum number of active async write I/Os
* according to the amount of dirty data in the pool (see dsl_pool.c). Since
* both throughput and latency typically increase with the number of
* concurrent operations issued to physical devices, reducing the burstiness
@@ -113,14 +113,14 @@
*/
/*
- * The maximum number of i/os active to each device. Ideally, this will be >=
+ * The maximum number of I/Os active to each device. Ideally, this will be >=
* the sum of each queue's max_active. It must be at least the sum of each
* queue's min_active.
*/
uint32_t zfs_vdev_max_active = 1000;
/*
- * Per-queue limits on the number of i/os active to each device. If the
+ * Per-queue limits on the number of I/Os active to each device. If the
* sum of the queue's max_active is < zfs_vdev_max_active, then the
* min_active comes into play. We will send min_active from each queue,
* and then select from queues in the order defined by zio_priority_t.
@@ -145,6 +145,14 @@ uint32_t zfs_vdev_async_write_min_active = 1;
uint32_t zfs_vdev_async_write_max_active = 10;
uint32_t zfs_vdev_scrub_min_active = 1;
uint32_t zfs_vdev_scrub_max_active = 2;
+uint32_t zfs_vdev_trim_min_active = 1;
+/*
+ * TRIM max active is large in comparison to the other values due to the fact
+ * that TRIM IOs are coalesced at the device layer. This value is set such
+ * that a typical SSD can process the queued IOs in a single request.
+ */
+uint32_t zfs_vdev_trim_max_active = 64;
+
/*
* When the pool has less than zfs_vdev_async_write_active_min_dirty_percent
@@ -171,7 +179,7 @@ SYSCTL_DECL(_vfs_zfs_vdev);
TUNABLE_INT("vfs.zfs.vdev.max_active", &zfs_vdev_max_active);
SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, max_active, CTLFLAG_RW,
&zfs_vdev_max_active, 0,
- "The maximum number of i/os of all types active for each device.");
+ "The maximum number of I/Os of all types active for each device.");
#define ZFS_VDEV_QUEUE_KNOB_MIN(name) \
TUNABLE_INT("vfs.zfs.vdev." #name "_min_active", \
@@ -199,6 +207,8 @@ ZFS_VDEV_QUEUE_KNOB_MIN(async_write);
ZFS_VDEV_QUEUE_KNOB_MAX(async_write);
ZFS_VDEV_QUEUE_KNOB_MIN(scrub);
ZFS_VDEV_QUEUE_KNOB_MAX(scrub);
+ZFS_VDEV_QUEUE_KNOB_MIN(trim);
+ZFS_VDEV_QUEUE_KNOB_MAX(trim);
#undef ZFS_VDEV_QUEUE_KNOB
@@ -299,6 +309,7 @@ static void
vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
+ ASSERT(MUTEX_HELD(&vq->vq_lock));
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
avl_add(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
@@ -315,6 +326,7 @@ static void
vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
+ ASSERT(MUTEX_HELD(&vq->vq_lock));
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
avl_remove(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
@@ -403,6 +415,8 @@ vdev_queue_class_min_active(zio_priority_t p)
return (zfs_vdev_async_write_min_active);
case ZIO_PRIORITY_SCRUB:
return (zfs_vdev_scrub_min_active);
+ case ZIO_PRIORITY_TRIM:
+ return (zfs_vdev_trim_min_active);
default:
panic("invalid priority %u", p);
return (0);
@@ -454,6 +468,8 @@ vdev_queue_class_max_active(spa_t *spa, zio_priority_t p)
spa->spa_dsl_pool->dp_dirty_total));
case ZIO_PRIORITY_SCRUB:
return (zfs_vdev_scrub_max_active);
+ case ZIO_PRIORITY_TRIM:
+ return (zfs_vdev_trim_max_active);
default:
panic("invalid priority %u", p);
return (0);
@@ -470,6 +486,8 @@ vdev_queue_class_to_issue(vdev_queue_t *vq)
spa_t *spa = vq->vq_vdev->vdev_spa;
zio_priority_t p;
+ ASSERT(MUTEX_HELD(&vq->vq_lock));
+
if (avl_numnodes(&vq->vq_active_tree) >= zfs_vdev_max_active)
return (ZIO_PRIORITY_NUM_QUEUEABLE);
@@ -511,10 +529,11 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
zio_t *first, *last, *aio, *dio, *mandatory, *nio;
uint64_t maxgap = 0;
uint64_t size;
- boolean_t stretch = B_FALSE;
- vdev_queue_class_t *vqc = &vq->vq_class[zio->io_priority];
- avl_tree_t *t = &vqc->vqc_queued_tree;
- enum zio_flag flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
+ boolean_t stretch;
+ avl_tree_t *t;
+ enum zio_flag flags;
+
+ ASSERT(MUTEX_HELD(&vq->vq_lock));
if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE)
return (NULL);
@@ -552,6 +571,8 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
* Walk backwards through sufficiently contiguous I/Os
* recording the last non-option I/O.
*/
+ flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
+ t = &vq->vq_class[zio->io_priority].vqc_queued_tree;
while ((dio = AVL_PREV(t, first)) != NULL &&
(dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
IO_SPAN(dio, last) <= zfs_vdev_aggregation_limit &&
@@ -591,6 +612,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
* non-optional I/O is close enough to make aggregation
* worthwhile.
*/
+ stretch = B_FALSE;
if (zio->io_type == ZIO_TYPE_WRITE && mandatory != NULL) {
zio_t *nio = last;
while ((dio = AVL_NEXT(t, nio)) != NULL &&
@@ -731,11 +753,13 @@ vdev_queue_io(zio_t *zio)
zio->io_priority != ZIO_PRIORITY_ASYNC_READ &&
zio->io_priority != ZIO_PRIORITY_SCRUB)
zio->io_priority = ZIO_PRIORITY_ASYNC_READ;
- } else {
- ASSERT(zio->io_type == ZIO_TYPE_WRITE);
+ } else if (zio->io_type == ZIO_TYPE_WRITE) {
if (zio->io_priority != ZIO_PRIORITY_SYNC_WRITE &&
zio->io_priority != ZIO_PRIORITY_ASYNC_WRITE)
zio->io_priority = ZIO_PRIORITY_ASYNC_WRITE;
+ } else {
+ ASSERT(zio->io_type == ZIO_TYPE_FREE);
+ zio->io_priority = ZIO_PRIORITY_TRIM;
}
zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
index 95f6bbd..d0940bf 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
@@ -788,6 +788,8 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
else if (BP_IS_GANG(bp) || BP_GET_DEDUP(bp))
stage |= ZIO_STAGE_ISSUE_ASYNC;
+ flags |= ZIO_FLAG_DONT_QUEUE;
+
zio = zio_create(pio, spa, txg, bp, NULL, size,
NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_NOW, flags,
NULL, 0, NULL, ZIO_STAGE_OPEN, stage);
@@ -827,14 +829,14 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio_t *
zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, uint64_t offset,
uint64_t size, zio_done_func_t *done, void *private,
- enum zio_flag flags)
+ zio_priority_t priority, enum zio_flag flags)
{
zio_t *zio;
int c;
if (vd->vdev_children == 0) {
zio = zio_create(pio, spa, 0, NULL, NULL, size, done, private,
- ZIO_TYPE_IOCTL, ZIO_PRIORITY_NOW, flags, vd, offset, NULL,
+ ZIO_TYPE_IOCTL, priority, flags, vd, offset, NULL,
ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE);
zio->io_cmd = cmd;
@@ -843,7 +845,7 @@ zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, uint64_t offset,
for (c = 0; c < vd->vdev_children; c++)
zio_nowait(zio_ioctl(zio, spa, vd->vdev_child[c], cmd,
- offset, size, done, private, flags));
+ offset, size, done, private, priority, flags));
}
return (zio);
@@ -928,6 +930,10 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY;
}
+ /* Not all IO types require vdev io done stage e.g. free */
+ if (!(pio->io_pipeline & ZIO_STAGE_VDEV_IO_DONE))
+ pipeline &= ~ZIO_STAGE_VDEV_IO_DONE;
+
if (vd->vdev_children == 0)
offset += VDEV_LABEL_START_SIZE;
@@ -973,7 +979,7 @@ void
zio_flush(zio_t *zio, vdev_t *vd)
{
zio_nowait(zio_ioctl(zio, zio->io_spa, vd, DKIOCFLUSHWRITECACHE, 0, 0,
- NULL, NULL,
+ NULL, NULL, ZIO_PRIORITY_NOW,
ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY));
}
@@ -983,9 +989,10 @@ zio_trim(zio_t *zio, spa_t *spa, vdev_t *vd, uint64_t offset, uint64_t size)
ASSERT(vd->vdev_ops->vdev_op_leaf);
- return zio_ioctl(zio, spa, vd, DKIOCTRIM, offset, size,
- NULL, NULL,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY);
+ return (zio_create(zio, spa, 0, NULL, NULL, size, NULL, NULL,
+ ZIO_TYPE_FREE, ZIO_PRIORITY_TRIM, ZIO_FLAG_DONT_AGGREGATE |
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY,
+ vd, offset, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PHYS_PIPELINE));
}
void
@@ -2530,7 +2537,8 @@ zio_vdev_io_start(zio_t **ziop)
return (vdev_mirror_ops.vdev_op_io_start(zio));
}
- if (vd->vdev_ops->vdev_op_leaf && zio->io_type == ZIO_TYPE_FREE) {
+ if (vd->vdev_ops->vdev_op_leaf && zio->io_type == ZIO_TYPE_FREE &&
+ zio->io_priority == ZIO_PRIORITY_NOW) {
trim_map_free(vd, zio->io_offset, zio->io_size, zio->io_txg);
return (ZIO_PIPELINE_CONTINUE);
}
@@ -2598,31 +2606,33 @@ zio_vdev_io_start(zio_t **ziop)
return (ZIO_PIPELINE_CONTINUE);
}
- if (vd->vdev_ops->vdev_op_leaf &&
- (zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE)) {
-
- if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio))
- return (ZIO_PIPELINE_CONTINUE);
-
- if ((zio = vdev_queue_io(zio)) == NULL)
- return (ZIO_PIPELINE_STOP);
- *ziop = zio;
+ if (vd->vdev_ops->vdev_op_leaf) {
+ switch (zio->io_type) {
+ case ZIO_TYPE_READ:
+ if (vdev_cache_read(zio))
+ return (ZIO_PIPELINE_CONTINUE);
+ /* FALLTHROUGH */
+ case ZIO_TYPE_WRITE:
+ case ZIO_TYPE_FREE:
+ if ((zio = vdev_queue_io(zio)) == NULL)
+ return (ZIO_PIPELINE_STOP);
+ *ziop = zio;
- if (!vdev_accessible(vd, zio)) {
- zio->io_error = SET_ERROR(ENXIO);
- zio_interrupt(zio);
- return (ZIO_PIPELINE_STOP);
+ if (!vdev_accessible(vd, zio)) {
+ zio->io_error = SET_ERROR(ENXIO);
+ zio_interrupt(zio);
+ return (ZIO_PIPELINE_STOP);
+ }
+ break;
}
- }
-
- /*
- * Note that we ignore repair writes for TRIM because they can conflict
- * with normal writes. This isn't an issue because, by definition, we
- * only repair blocks that aren't freed.
- */
- if (vd->vdev_ops->vdev_op_leaf && zio->io_type == ZIO_TYPE_WRITE &&
- !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) {
- if (!trim_map_write_start(zio))
+ /*
+ * Note that we ignore repair writes for TRIM because they can
+ * conflict with normal writes. This isn't an issue because, by
+ * definition, we only repair blocks that aren't freed.
+ */
+ if (zio->io_type == ZIO_TYPE_WRITE &&
+ !(zio->io_flags & ZIO_FLAG_IO_REPAIR) &&
+ !trim_map_write_start(zio))
return (ZIO_PIPELINE_STOP);
}
@@ -2644,7 +2654,8 @@ zio_vdev_io_done(zio_t **ziop)
zio->io_type == ZIO_TYPE_WRITE || zio->io_type == ZIO_TYPE_FREE);
if (vd != NULL && vd->vdev_ops->vdev_op_leaf &&
- (zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE)) {
+ (zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE ||
+ zio->io_type == ZIO_TYPE_FREE)) {
if (zio->io_type == ZIO_TYPE_WRITE &&
!(zio->io_flags & ZIO_FLAG_IO_REPAIR))
@@ -2725,7 +2736,8 @@ zio_vdev_io_assess(zio_t **ziop)
if (zio_injection_enabled && zio->io_error == 0)
zio->io_error = zio_handle_fault_injection(zio, EIO);
- if (zio->io_type == ZIO_TYPE_IOCTL && zio->io_cmd == DKIOCTRIM)
+ if (zio->io_type == ZIO_TYPE_FREE &&
+ zio->io_priority != ZIO_PRIORITY_NOW) {
switch (zio->io_error) {
case 0:
ZIO_TRIM_STAT_INCR(bytes, zio->io_size);
@@ -2738,6 +2750,7 @@ zio_vdev_io_assess(zio_t **ziop)
ZIO_TRIM_STAT_BUMP(failed);
break;
}
+ }
/*
* If the I/O failed, determine whether we should attempt to retry it.
OpenPOWER on IntegriCloud