From 9bddeb2a5b981507cbe2d7bdb545c32f204109c7 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 26 May 2017 19:53:20 +0800 Subject: blk-mq: make per-sw-queue bio merge as default .bio_merge Because what the per-sw-queue bio merge does is basically same with scheduler's .bio_merge(), this patch makes per-sw-queue bio merge as the default .bio_merge if no scheduler is used or io scheduler doesn't provide .bio_merge(). Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 5 deletions(-) (limited to 'block/blk-mq-sched.c') diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 1f5b692..c4e2afb 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -221,19 +221,71 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, } EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); +/* + * Reverse check our software queue for entries that we could potentially + * merge with. Currently includes a hand-wavy stop count of 8, to not spend + * too much time checking for merges. + */ +static bool blk_mq_attempt_merge(struct request_queue *q, + struct blk_mq_ctx *ctx, struct bio *bio) +{ + struct request *rq; + int checked = 8; + + list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) { + bool merged = false; + + if (!checked--) + break; + + if (!blk_rq_merge_ok(rq, bio)) + continue; + + switch (blk_try_merge(rq, bio)) { + case ELEVATOR_BACK_MERGE: + if (blk_mq_sched_allow_merge(q, rq, bio)) + merged = bio_attempt_back_merge(q, rq, bio); + break; + case ELEVATOR_FRONT_MERGE: + if (blk_mq_sched_allow_merge(q, rq, bio)) + merged = bio_attempt_front_merge(q, rq, bio); + break; + case ELEVATOR_DISCARD_MERGE: + merged = bio_attempt_discard_merge(q, rq, bio); + break; + default: + continue; + } + + if (merged) + ctx->rq_merged++; + return merged; + } + + return false; +} + bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) { struct elevator_queue *e = q->elevator; + struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); + bool ret = false; - if (e->type->ops.mq.bio_merge) { - struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); - + if (e && e->type->ops.mq.bio_merge) { blk_mq_put_ctx(ctx); return e->type->ops.mq.bio_merge(hctx, bio); } - return false; + if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) { + /* default per sw-queue merge */ + spin_lock(&ctx->lock); + ret = blk_mq_attempt_merge(q, ctx, bio); + spin_unlock(&ctx->lock); + } + + blk_mq_put_ctx(ctx); + return ret; } bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq) -- cgit v1.1 From d2c0d3832469b947ca158e8977e66e8e2e64d8dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 16 Jun 2017 18:15:19 +0200 Subject: blk-mq: move blk_mq_sched_{get,put}_request to blk-mq.c Having them out of line in blk-mq-sched.c just makes the code flow unnecessarily complicated. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 69 ++-------------------------------------------------- 1 file changed, 2 insertions(+), 67 deletions(-) (limited to 'block/blk-mq-sched.c') diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c4e2afb..62db188 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -58,8 +58,8 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q, rq->elv.icq = NULL; } -static void blk_mq_sched_assign_ioc(struct request_queue *q, - struct request *rq, struct bio *bio) +void blk_mq_sched_assign_ioc(struct request_queue *q, struct request *rq, + struct bio *bio) { struct io_context *ioc; @@ -68,71 +68,6 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q, __blk_mq_sched_assign_ioc(q, rq, bio, ioc); } -struct request *blk_mq_sched_get_request(struct request_queue *q, - struct bio *bio, - unsigned int op, - struct blk_mq_alloc_data *data) -{ - struct elevator_queue *e = q->elevator; - struct request *rq; - - blk_queue_enter_live(q); - data->q = q; - if (likely(!data->ctx)) - data->ctx = blk_mq_get_ctx(q); - if (likely(!data->hctx)) - data->hctx = blk_mq_map_queue(q, data->ctx->cpu); - - if (e) { - data->flags |= BLK_MQ_REQ_INTERNAL; - - /* - * Flush requests are special and go directly to the - * dispatch list. - */ - if (!op_is_flush(op) && e->type->ops.mq.get_request) { - rq = e->type->ops.mq.get_request(q, op, data); - if (rq) - rq->rq_flags |= RQF_QUEUED; - } else - rq = __blk_mq_alloc_request(data, op); - } else { - rq = __blk_mq_alloc_request(data, op); - } - - if (rq) { - if (!op_is_flush(op)) { - rq->elv.icq = NULL; - if (e && e->type->icq_cache) - blk_mq_sched_assign_ioc(q, rq, bio); - } - data->hctx->queued++; - return rq; - } - - blk_queue_exit(q); - return NULL; -} - -void blk_mq_sched_put_request(struct request *rq) -{ - struct request_queue *q = rq->q; - struct elevator_queue *e = q->elevator; - - if (rq->rq_flags & RQF_ELVPRIV) { - blk_mq_sched_put_rq_priv(rq->q, rq); - if (rq->elv.icq) { - put_io_context(rq->elv.icq->ioc); - rq->elv.icq = NULL; - } - } - - if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request) - e->type->ops.mq.put_request(rq); - else - blk_mq_finish_request(rq); -} - void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; -- cgit v1.1 From ea511e3c28c892f689173c91662437c4ddb2ab38 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 16 Jun 2017 18:15:20 +0200 Subject: blk-mq: remove blk_mq_sched_{get,put}_rq_priv Having these as separate helpers in a header really does not help readability, or my chances to refactor this code sanely. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'block/blk-mq-sched.c') diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 62db188..22601e5 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -36,6 +36,7 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q, struct bio *bio, struct io_context *ioc) { + struct elevator_queue *e = q->elevator; struct io_cq *icq; spin_lock_irq(q->queue_lock); @@ -49,13 +50,14 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q, } rq->elv.icq = icq; - if (!blk_mq_sched_get_rq_priv(q, rq, bio)) { - rq->rq_flags |= RQF_ELVPRIV; - get_io_context(icq->ioc); + if (e && e->type->ops.mq.get_rq_priv && + e->type->ops.mq.get_rq_priv(q, rq, bio)) { + rq->elv.icq = NULL; return; } - rq->elv.icq = NULL; + rq->rq_flags |= RQF_ELVPRIV; + get_io_context(icq->ioc); } void blk_mq_sched_assign_ioc(struct request_queue *q, struct request *rq, -- cgit v1.1 From 44e8c2bff80bb384a608406009948f90a78bf8a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 16 Jun 2017 18:15:25 +0200 Subject: blk-mq: refactor blk_mq_sched_assign_ioc blk_mq_sched_assign_ioc now only handles the assigned of the ioc if the schedule needs it (bfq only at the moment). The caller to the per-request initializer is moved out so that it can be merged with a similar call for the kyber I/O scheduler. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) (limited to 'block/blk-mq-sched.c') diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 22601e5..254d1c1 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -31,12 +31,10 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data); -static void __blk_mq_sched_assign_ioc(struct request_queue *q, - struct request *rq, - struct bio *bio, - struct io_context *ioc) +void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio) { - struct elevator_queue *e = q->elevator; + struct request_queue *q = rq->q; + struct io_context *ioc = rq_ioc(bio); struct io_cq *icq; spin_lock_irq(q->queue_lock); @@ -48,26 +46,8 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q, if (!icq) return; } - - rq->elv.icq = icq; - if (e && e->type->ops.mq.get_rq_priv && - e->type->ops.mq.get_rq_priv(q, rq, bio)) { - rq->elv.icq = NULL; - return; - } - - rq->rq_flags |= RQF_ELVPRIV; get_io_context(icq->ioc); -} - -void blk_mq_sched_assign_ioc(struct request_queue *q, struct request *rq, - struct bio *bio) -{ - struct io_context *ioc; - - ioc = rq_ioc(bio); - if (ioc) - __blk_mq_sched_assign_ioc(q, rq, bio, ioc); + rq->elv.icq = icq; } void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) -- cgit v1.1 From f4560ffe8cec1361b1021d81aca6a4173f8e7c87 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 18 Jun 2017 14:24:27 -0600 Subject: blk-mq: use QUEUE_FLAG_QUIESCED to quiesce queue It is required that no dispatch can happen any more once blk_mq_quiesce_queue() returns, and we don't have such requirement on APIs of stopping queue. But blk_mq_quiesce_queue() still may not block/drain dispatch in the the case of BLK_MQ_S_START_ON_RUN, so use the new introduced flag of QUEUE_FLAG_QUIESCED and evaluate it inside RCU read-side critical sections for fixing this issue. Also blk_mq_quiesce_queue() is implemented via stopping queue, which limits its uses, and easy to cause race, because any queue restart in other paths may break blk_mq_quiesce_queue(). With the introduced flag of QUEUE_FLAG_QUIESCED, we don't need to depend on stopping queue for quiescing any more. Signed-off-by: Ming Lei Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'block/blk-mq-sched.c') diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 254d1c1..9f02528 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -58,7 +58,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) bool did_work = false; LIST_HEAD(rq_list); - if (unlikely(blk_mq_hctx_stopped(hctx))) + /* RCU or SRCU read lock is needed before checking quiesced flag */ + if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) return; hctx->run++; -- cgit v1.1 From 7b6078146ccbe9bd165d578586b10ea092ac489e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 20 Jun 2017 11:15:47 -0700 Subject: blk-mq: Document locking assumptions Document the locking assumptions in functions that modify blk_mq_ctx.rq_list to make it easier for humans to verify this code. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Cc: Hannes Reinecke Cc: Omar Sandoval Cc: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'block/blk-mq-sched.c') diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 9f02528..191bf82 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -150,6 +150,8 @@ static bool blk_mq_attempt_merge(struct request_queue *q, struct request *rq; int checked = 8; + lockdep_assert_held(&ctx->lock); + list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) { bool merged = false; -- cgit v1.1