summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2014-05-20 11:49:02 -0600
committerJens Axboe <axboe@fb.com>2014-05-20 11:49:02 -0600
commite3a2b3f931f59d5284abd13faf8bded726884ffd (patch)
treef5426a4745996e95afc2f01f826e846710929dc2
parent64b14519e5913e8d4de9f2e5d9ef59abba3ed83d (diff)
downloadop-kernel-dev-e3a2b3f931f59d5284abd13faf8bded726884ffd.zip
op-kernel-dev-e3a2b3f931f59d5284abd13faf8bded726884ffd.tar.gz
blk-mq: allow changing of queue depth through sysfs
For request_fn based devices, the block layer exports a 'nr_requests' file through sysfs to allow adjusting of queue depth on the fly. Currently this returns -EINVAL for blk-mq, since it's not wired up. Wire this up for blk-mq, so that it now also always dynamic adjustments of the allowed queue depth for any given block device managed by blk-mq. Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--block/blk-core.c41
-rw-r--r--block/blk-mq-tag.c80
-rw-r--r--block/blk-mq-tag.h1
-rw-r--r--block/blk-mq.c22
-rw-r--r--block/blk-mq.h1
-rw-r--r--block/blk-sysfs.c45
-rw-r--r--block/blk.h2
-rw-r--r--include/linux/blk-mq.h2
8 files changed, 134 insertions, 60 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index a6bd3e7..fe81e19 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -848,6 +848,47 @@ static void freed_request(struct request_list *rl, unsigned int flags)
__freed_request(rl, sync ^ 1);
}
+int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
+{
+ struct request_list *rl;
+
+ spin_lock_irq(q->queue_lock);
+ q->nr_requests = nr;
+ blk_queue_congestion_threshold(q);
+
+ /* congestion isn't cgroup aware and follows root blkcg for now */
+ rl = &q->root_rl;
+
+ if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
+ blk_set_queue_congested(q, BLK_RW_SYNC);
+ else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
+ blk_clear_queue_congested(q, BLK_RW_SYNC);
+
+ if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
+ blk_set_queue_congested(q, BLK_RW_ASYNC);
+ else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
+ blk_clear_queue_congested(q, BLK_RW_ASYNC);
+
+ blk_queue_for_each_rl(rl, q) {
+ if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
+ blk_set_rl_full(rl, BLK_RW_SYNC);
+ } else {
+ blk_clear_rl_full(rl, BLK_RW_SYNC);
+ wake_up(&rl->wait[BLK_RW_SYNC]);
+ }
+
+ if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
+ blk_set_rl_full(rl, BLK_RW_ASYNC);
+ } else {
+ blk_clear_rl_full(rl, BLK_RW_ASYNC);
+ wake_up(&rl->wait[BLK_RW_ASYNC]);
+ }
+ }
+
+ spin_unlock_irq(q->queue_lock);
+ return 0;
+}
+
/*
* Determine if elevator data should be initialized when allocating the
* request associated with @bio.
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index e6b3fba..f6dea96 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -57,23 +57,13 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
}
/*
- * If a previously busy queue goes inactive, potential waiters could now
- * be allowed to queue. Wake them up and check.
+ * Wakeup all potentially sleeping on normal (non-reserved) tags
*/
-void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
+static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
{
- struct blk_mq_tags *tags = hctx->tags;
struct blk_mq_bitmap_tags *bt;
int i, wake_index;
- if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
- return;
-
- atomic_dec(&tags->active_queues);
-
- /*
- * Will only throttle depth on non-reserved tags
- */
bt = &tags->bitmap_tags;
wake_index = bt->wake_index;
for (i = 0; i < BT_WAIT_QUEUES; i++) {
@@ -87,6 +77,22 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
}
/*
+ * If a previously busy queue goes inactive, potential waiters could now
+ * be allowed to queue. Wake them up and check.
+ */
+void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
+{
+ struct blk_mq_tags *tags = hctx->tags;
+
+ if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+ return;
+
+ atomic_dec(&tags->active_queues);
+
+ blk_mq_tag_wakeup_all(tags);
+}
+
+/*
* For shared tag users, we track the number of currently active users
* and attempt to provide a fair share of the tag depth for each of them.
*/
@@ -408,6 +414,28 @@ static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt)
return bt->depth - used;
}
+static void bt_update_count(struct blk_mq_bitmap_tags *bt,
+ unsigned int depth)
+{
+ unsigned int tags_per_word = 1U << bt->bits_per_word;
+ unsigned int map_depth = depth;
+
+ if (depth) {
+ int i;
+
+ for (i = 0; i < bt->map_nr; i++) {
+ bt->map[i].depth = min(map_depth, tags_per_word);
+ map_depth -= bt->map[i].depth;
+ }
+ }
+
+ bt->wake_cnt = BT_WAIT_BATCH;
+ if (bt->wake_cnt > depth / 4)
+ bt->wake_cnt = max(1U, depth / 4);
+
+ bt->depth = depth;
+}
+
static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
int node, bool reserved)
{
@@ -420,7 +448,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
* condition.
*/
if (depth) {
- unsigned int nr, i, map_depth, tags_per_word;
+ unsigned int nr, tags_per_word;
tags_per_word = (1 << bt->bits_per_word);
@@ -444,11 +472,6 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
return -ENOMEM;
bt->map_nr = nr;
- map_depth = depth;
- for (i = 0; i < nr; i++) {
- bt->map[i].depth = min(map_depth, tags_per_word);
- map_depth -= tags_per_word;
- }
}
bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL);
@@ -460,11 +483,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
for (i = 0; i < BT_WAIT_QUEUES; i++)
init_waitqueue_head(&bt->bs[i].wait);
- bt->wake_cnt = BT_WAIT_BATCH;
- if (bt->wake_cnt > depth / 4)
- bt->wake_cnt = max(1U, depth / 4);
-
- bt->depth = depth;
+ bt_update_count(bt, depth);
return 0;
}
@@ -525,6 +544,21 @@ void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *tag)
*tag = prandom_u32() % depth;
}
+int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
+{
+ tdepth -= tags->nr_reserved_tags;
+ if (tdepth > tags->nr_tags)
+ return -EINVAL;
+
+ /*
+ * Don't need (or can't) update reserved tags here, they remain
+ * static and should never need resizing.
+ */
+ bt_update_count(&tags->bitmap_tags, tdepth);
+ blk_mq_tag_wakeup_all(tags);
+ return 0;
+}
+
ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
{
char *orig_page = page;
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index e144f68..e7ff5ce 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -55,6 +55,7 @@ extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data
extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag);
+extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth);
enum {
BLK_MQ_TAG_CACHE_MIN = 1,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0fbef7e..7b71ab1 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1789,6 +1789,28 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
}
EXPORT_SYMBOL(blk_mq_free_tag_set);
+int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
+{
+ struct blk_mq_tag_set *set = q->tag_set;
+ struct blk_mq_hw_ctx *hctx;
+ int i, ret;
+
+ if (!set || nr > set->queue_depth)
+ return -EINVAL;
+
+ ret = 0;
+ queue_for_each_hw_ctx(q, hctx, i) {
+ ret = blk_mq_tag_update_depth(hctx->tags, nr);
+ if (ret)
+ break;
+ }
+
+ if (!ret)
+ q->nr_requests = nr;
+
+ return ret;
+}
+
void blk_mq_disable_hotplug(void)
{
mutex_lock(&all_q_mutex);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 5e5a378..7db4fe4 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -32,6 +32,7 @@ void blk_mq_drain_queue(struct request_queue *q);
void blk_mq_free_queue(struct request_queue *q);
void blk_mq_clone_flush_request(struct request *flush_rq,
struct request *orig_rq);
+int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
/*
* CPU hotplug helpers
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 7500f87..4d6811a 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -48,11 +48,10 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page)
static ssize_t
queue_requests_store(struct request_queue *q, const char *page, size_t count)
{
- struct request_list *rl;
unsigned long nr;
- int ret;
+ int ret, err;
- if (!q->request_fn)
+ if (!q->request_fn && !q->mq_ops)
return -EINVAL;
ret = queue_var_store(&nr, page, count);
@@ -62,40 +61,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
if (nr < BLKDEV_MIN_RQ)
nr = BLKDEV_MIN_RQ;
- spin_lock_irq(q->queue_lock);
- q->nr_requests = nr;
- blk_queue_congestion_threshold(q);
-
- /* congestion isn't cgroup aware and follows root blkcg for now */
- rl = &q->root_rl;
-
- if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
- blk_set_queue_congested(q, BLK_RW_SYNC);
- else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
- blk_clear_queue_congested(q, BLK_RW_SYNC);
-
- if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
- blk_set_queue_congested(q, BLK_RW_ASYNC);
- else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
- blk_clear_queue_congested(q, BLK_RW_ASYNC);
-
- blk_queue_for_each_rl(rl, q) {
- if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
- blk_set_rl_full(rl, BLK_RW_SYNC);
- } else {
- blk_clear_rl_full(rl, BLK_RW_SYNC);
- wake_up(&rl->wait[BLK_RW_SYNC]);
- }
-
- if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
- blk_set_rl_full(rl, BLK_RW_ASYNC);
- } else {
- blk_clear_rl_full(rl, BLK_RW_ASYNC);
- wake_up(&rl->wait[BLK_RW_ASYNC]);
- }
- }
+ if (q->request_fn)
+ err = blk_update_nr_requests(q, nr);
+ else
+ err = blk_mq_update_nr_requests(q, nr);
+
+ if (err)
+ return err;
- spin_unlock_irq(q->queue_lock);
return ret;
}
diff --git a/block/blk.h b/block/blk.h
index 95cab70..45385e9 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -188,6 +188,8 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
return q->nr_congestion_off;
}
+extern int blk_update_nr_requests(struct request_queue *, unsigned int);
+
/*
* Contribute to IO statistics IFF:
*
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a06ca7b..f454244 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -63,7 +63,7 @@ struct blk_mq_hw_ctx {
struct blk_mq_tag_set {
struct blk_mq_ops *ops;
unsigned int nr_hw_queues;
- unsigned int queue_depth;
+ unsigned int queue_depth; /* max hw supported */
unsigned int reserved_tags;
unsigned int cmd_size; /* per-request extra data */
int numa_node;
OpenPOWER on IntegriCloud