summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2015-10-03 07:32:34 +0000
committermav <mav@FreeBSD.org>2015-10-03 07:32:34 +0000
commit6eeac1fd52e44e90b32d7ba537b3d3def3e2e558 (patch)
treeb903d5bfa0bcc1fc7b40a4cee8bfa8010812cc91 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
parent53e7b45f2e0fdd71ccf762ad374cf78c9d263d25 (diff)
downloadFreeBSD-src-6eeac1fd52e44e90b32d7ba537b3d3def3e2e558.zip
FreeBSD-src-6eeac1fd52e44e90b32d7ba537b3d3def3e2e558.tar.gz
MFC r286579: 5313 Allow I/Os to be aggregated across ZIO priority classes
Reviewed by: Andriy Gapon <avg@FreeBSD.org> Reviewed by: Will Andrews <willa@SpectraLogic.com> Reviewed by: Matt Ahrens <mahrens@delphix.com> Reviewed by: George Wilson <george@delphix.com> Approved by: Robert Mustacchi <rm@joyent.com> Author: Justin T. Gibbs <justing@spectralogic.com> illumos/illumos-gate@fe319232d24f4ae183730a5a24a09423d8ab4429
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c68
1 files changed, 48 insertions, 20 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
index 1dba319..245f360 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
@@ -302,6 +302,22 @@ vdev_queue_offset_compare(const void *x1, const void *x2)
return (0);
}
+static inline avl_tree_t *
+vdev_queue_class_tree(vdev_queue_t *vq, zio_priority_t p)
+{
+ return (&vq->vq_class[p].vqc_queued_tree);
+}
+
+static inline avl_tree_t *
+vdev_queue_type_tree(vdev_queue_t *vq, zio_type_t t)
+{
+ ASSERT(t == ZIO_TYPE_READ || t == ZIO_TYPE_WRITE);
+ if (t == ZIO_TYPE_READ)
+ return (&vq->vq_read_offset_tree);
+ else
+ return (&vq->vq_write_offset_tree);
+}
+
int
vdev_queue_timestamp_compare(const void *x1, const void *x2)
{
@@ -336,19 +352,27 @@ vdev_queue_init(vdev_t *vd)
avl_create(&vq->vq_active_tree, vdev_queue_offset_compare,
sizeof (zio_t), offsetof(struct zio, io_queue_node));
+ avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_READ),
+ vdev_queue_offset_compare, sizeof (zio_t),
+ offsetof(struct zio, io_offset_node));
+ avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE),
+ vdev_queue_offset_compare, sizeof (zio_t),
+ offsetof(struct zio, io_offset_node));
for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
+ int (*compfn) (const void *, const void *);
+
/*
- * The synchronous i/o queues are FIFO rather than LBA ordered.
- * This provides more consistent latency for these i/os, and
- * they tend to not be tightly clustered anyway so there is
- * little to no throughput loss.
+ * The synchronous i/o queues are dispatched in FIFO rather
+ * than LBA order. This provides more consistent latency for
+ * these i/os.
*/
- boolean_t fifo = (p == ZIO_PRIORITY_SYNC_READ ||
- p == ZIO_PRIORITY_SYNC_WRITE);
- avl_create(&vq->vq_class[p].vqc_queued_tree,
- fifo ? vdev_queue_timestamp_compare :
- vdev_queue_offset_compare,
+ if (p == ZIO_PRIORITY_SYNC_READ || p == ZIO_PRIORITY_SYNC_WRITE)
+ compfn = vdev_queue_timestamp_compare;
+ else
+ compfn = vdev_queue_offset_compare;
+
+ avl_create(vdev_queue_class_tree(vq, p), compfn,
sizeof (zio_t), offsetof(struct zio, io_queue_node));
}
@@ -361,8 +385,10 @@ vdev_queue_fini(vdev_t *vd)
vdev_queue_t *vq = &vd->vdev_queue;
for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++)
- avl_destroy(&vq->vq_class[p].vqc_queued_tree);
+ avl_destroy(vdev_queue_class_tree(vq, p));
avl_destroy(&vq->vq_active_tree);
+ avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_READ));
+ avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE));
mutex_destroy(&vq->vq_lock);
}
@@ -373,7 +399,8 @@ vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
spa_t *spa = zio->io_spa;
ASSERT(MUTEX_HELD(&vq->vq_lock));
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
- avl_add(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
+ avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio);
+ avl_add(vdev_queue_type_tree(vq, zio->io_type), zio);
#ifdef illumos
mutex_enter(&spa->spa_iokstat_lock);
@@ -390,7 +417,8 @@ vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
spa_t *spa = zio->io_spa;
ASSERT(MUTEX_HELD(&vq->vq_lock));
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
- avl_remove(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
+ avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio);
+ avl_remove(vdev_queue_type_tree(vq, zio->io_type), zio);
#ifdef illumos
mutex_enter(&spa->spa_iokstat_lock);
@@ -563,7 +591,7 @@ vdev_queue_class_to_issue(vdev_queue_t *vq)
/* find a queue that has not reached its minimum # outstanding i/os */
for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
- if (avl_numnodes(&vq->vq_class[p].vqc_queued_tree) > 0 &&
+ if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
vq->vq_class[p].vqc_active <
vdev_queue_class_min_active(p))
return (p);
@@ -574,7 +602,7 @@ vdev_queue_class_to_issue(vdev_queue_t *vq)
* maximum # outstanding i/os.
*/
for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
- if (avl_numnodes(&vq->vq_class[p].vqc_queued_tree) > 0 &&
+ if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
vq->vq_class[p].vqc_active <
vdev_queue_class_max_active(spa, p))
return (p);
@@ -642,7 +670,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
* recording the last non-option I/O.
*/
flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
- t = &vq->vq_class[zio->io_priority].vqc_queued_tree;
+ t = vdev_queue_type_tree(vq, zio->io_type);
while ((dio = AVL_PREV(t, first)) != NULL &&
(dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
IO_SPAN(dio, last) <= zfs_vdev_aggregation_limit &&
@@ -751,7 +779,7 @@ vdev_queue_io_to_issue(vdev_queue_t *vq)
zio_t *zio, *aio;
zio_priority_t p;
avl_index_t idx;
- vdev_queue_class_t *vqc;
+ avl_tree_t *tree;
zio_t search;
again:
@@ -770,13 +798,13 @@ again:
*
* For FIFO queues (sync), issue the i/o with the lowest timestamp.
*/
- vqc = &vq->vq_class[p];
+ tree = vdev_queue_class_tree(vq, p);
search.io_timestamp = 0;
search.io_offset = vq->vq_last_offset + 1;
- VERIFY3P(avl_find(&vqc->vqc_queued_tree, &search, &idx), ==, NULL);
- zio = avl_nearest(&vqc->vqc_queued_tree, idx, AVL_AFTER);
+ VERIFY3P(avl_find(tree, &search, &idx), ==, NULL);
+ zio = avl_nearest(tree, idx, AVL_AFTER);
if (zio == NULL)
- zio = avl_first(&vqc->vqc_queued_tree);
+ zio = avl_first(tree);
ASSERT3U(zio->io_priority, ==, p);
aio = vdev_queue_aggregate(vq, zio);
OpenPOWER on IntegriCloud