From 9bddeb2a5b981507cbe2d7bdb545c32f204109c7 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 26 May 2017 19:53:20 +0800
Subject: blk-mq: make per-sw-queue bio merge as default .bio_merge

Because what the per-sw-queue bio merge does is basically same with
scheduler's .bio_merge(), this patch makes per-sw-queue bio merge
as the default .bio_merge if no scheduler is used or io scheduler
doesn't provide .bio_merge().

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sched.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 57 insertions(+), 5 deletions(-)

(limited to 'block/blk-mq-sched.c')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 1f5b692..c4e2afb 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -221,19 +221,71 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
 }
 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
 
+/*
+ * Reverse check our software queue for entries that we could potentially
+ * merge with. Currently includes a hand-wavy stop count of 8, to not spend
+ * too much time checking for merges.
+ */
+static bool blk_mq_attempt_merge(struct request_queue *q,
+				 struct blk_mq_ctx *ctx, struct bio *bio)
+{
+	struct request *rq;
+	int checked = 8;
+
+	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
+		bool merged = false;
+
+		if (!checked--)
+			break;
+
+		if (!blk_rq_merge_ok(rq, bio))
+			continue;
+
+		switch (blk_try_merge(rq, bio)) {
+		case ELEVATOR_BACK_MERGE:
+			if (blk_mq_sched_allow_merge(q, rq, bio))
+				merged = bio_attempt_back_merge(q, rq, bio);
+			break;
+		case ELEVATOR_FRONT_MERGE:
+			if (blk_mq_sched_allow_merge(q, rq, bio))
+				merged = bio_attempt_front_merge(q, rq, bio);
+			break;
+		case ELEVATOR_DISCARD_MERGE:
+			merged = bio_attempt_discard_merge(q, rq, bio);
+			break;
+		default:
+			continue;
+		}
+
+		if (merged)
+			ctx->rq_merged++;
+		return merged;
+	}
+
+	return false;
+}
+
 bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
 {
 	struct elevator_queue *e = q->elevator;
+	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+	bool ret = false;
 
-	if (e->type->ops.mq.bio_merge) {
-		struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
-		struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
-
+	if (e && e->type->ops.mq.bio_merge) {
 		blk_mq_put_ctx(ctx);
 		return e->type->ops.mq.bio_merge(hctx, bio);
 	}
 
-	return false;
+	if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) {
+		/* default per sw-queue merge */
+		spin_lock(&ctx->lock);
+		ret = blk_mq_attempt_merge(q, ctx, bio);
+		spin_unlock(&ctx->lock);
+	}
+
+	blk_mq_put_ctx(ctx);
+	return ret;
 }
 
 bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
-- 
cgit v1.1


From d2c0d3832469b947ca158e8977e66e8e2e64d8dd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Jun 2017 18:15:19 +0200
Subject: blk-mq: move blk_mq_sched_{get,put}_request to blk-mq.c

Having them out of line in blk-mq-sched.c just makes the code flow
unnecessarily complicated.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c | 69 ++--------------------------------------------------
 1 file changed, 2 insertions(+), 67 deletions(-)

(limited to 'block/blk-mq-sched.c')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index c4e2afb..62db188 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -58,8 +58,8 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q,
 	rq->elv.icq = NULL;
 }
 
-static void blk_mq_sched_assign_ioc(struct request_queue *q,
-				    struct request *rq, struct bio *bio)
+void blk_mq_sched_assign_ioc(struct request_queue *q, struct request *rq,
+			     struct bio *bio)
 {
 	struct io_context *ioc;
 
@@ -68,71 +68,6 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q,
 		__blk_mq_sched_assign_ioc(q, rq, bio, ioc);
 }
 
-struct request *blk_mq_sched_get_request(struct request_queue *q,
-					 struct bio *bio,
-					 unsigned int op,
-					 struct blk_mq_alloc_data *data)
-{
-	struct elevator_queue *e = q->elevator;
-	struct request *rq;
-
-	blk_queue_enter_live(q);
-	data->q = q;
-	if (likely(!data->ctx))
-		data->ctx = blk_mq_get_ctx(q);
-	if (likely(!data->hctx))
-		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
-
-	if (e) {
-		data->flags |= BLK_MQ_REQ_INTERNAL;
-
-		/*
-		 * Flush requests are special and go directly to the
-		 * dispatch list.
-		 */
-		if (!op_is_flush(op) && e->type->ops.mq.get_request) {
-			rq = e->type->ops.mq.get_request(q, op, data);
-			if (rq)
-				rq->rq_flags |= RQF_QUEUED;
-		} else
-			rq = __blk_mq_alloc_request(data, op);
-	} else {
-		rq = __blk_mq_alloc_request(data, op);
-	}
-
-	if (rq) {
-		if (!op_is_flush(op)) {
-			rq->elv.icq = NULL;
-			if (e && e->type->icq_cache)
-				blk_mq_sched_assign_ioc(q, rq, bio);
-		}
-		data->hctx->queued++;
-		return rq;
-	}
-
-	blk_queue_exit(q);
-	return NULL;
-}
-
-void blk_mq_sched_put_request(struct request *rq)
-{
-	struct request_queue *q = rq->q;
-	struct elevator_queue *e = q->elevator;
-
-	if (rq->rq_flags & RQF_ELVPRIV) {
-		blk_mq_sched_put_rq_priv(rq->q, rq);
-		if (rq->elv.icq) {
-			put_io_context(rq->elv.icq->ioc);
-			rq->elv.icq = NULL;
-		}
-	}
-
-	if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
-		e->type->ops.mq.put_request(rq);
-	else
-		blk_mq_finish_request(rq);
-}
-
 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 {
 	struct request_queue *q = hctx->queue;
-- 
cgit v1.1


From ea511e3c28c892f689173c91662437c4ddb2ab38 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Jun 2017 18:15:20 +0200
Subject: blk-mq: remove blk_mq_sched_{get,put}_rq_priv

Having these as separate helpers in a header really does not help
readability, or my chances to refactor this code sanely.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'block/blk-mq-sched.c')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 62db188..22601e5 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -36,6 +36,7 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q,
 				      struct bio *bio,
 				      struct io_context *ioc)
 {
+	struct elevator_queue *e = q->elevator;
 	struct io_cq *icq;
 
 	spin_lock_irq(q->queue_lock);
@@ -49,13 +50,14 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q,
 	}
 
 	rq->elv.icq = icq;
-	if (!blk_mq_sched_get_rq_priv(q, rq, bio)) {
-		rq->rq_flags |= RQF_ELVPRIV;
-		get_io_context(icq->ioc);
+	if (e && e->type->ops.mq.get_rq_priv &&
+	    e->type->ops.mq.get_rq_priv(q, rq, bio)) {
+		rq->elv.icq = NULL;
 		return;
 	}
 
-	rq->elv.icq = NULL;
+	rq->rq_flags |= RQF_ELVPRIV;
+	get_io_context(icq->ioc);
 }
 
 void blk_mq_sched_assign_ioc(struct request_queue *q, struct request *rq,
-- 
cgit v1.1


From 44e8c2bff80bb384a608406009948f90a78bf8a3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Jun 2017 18:15:25 +0200
Subject: blk-mq: refactor blk_mq_sched_assign_ioc

blk_mq_sched_assign_ioc now only handles the assigned of the ioc if
the schedule needs it (bfq only at the moment).  The caller to the
per-request initializer is moved out so that it can be merged with
a similar call for the kyber I/O scheduler.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

(limited to 'block/blk-mq-sched.c')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 22601e5..254d1c1 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -31,12 +31,10 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q,
 }
 EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
 
-static void __blk_mq_sched_assign_ioc(struct request_queue *q,
-				      struct request *rq,
-				      struct bio *bio,
-				      struct io_context *ioc)
+void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
 {
-	struct elevator_queue *e = q->elevator;
+	struct request_queue *q = rq->q;
+	struct io_context *ioc = rq_ioc(bio);
 	struct io_cq *icq;
 
 	spin_lock_irq(q->queue_lock);
@@ -48,26 +46,8 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q,
 		if (!icq)
 			return;
 	}
-
-	rq->elv.icq = icq;
-	if (e && e->type->ops.mq.get_rq_priv &&
-	    e->type->ops.mq.get_rq_priv(q, rq, bio)) {
-		rq->elv.icq = NULL;
-		return;
-	}
-
-	rq->rq_flags |= RQF_ELVPRIV;
 	get_io_context(icq->ioc);
-}
-
-void blk_mq_sched_assign_ioc(struct request_queue *q, struct request *rq,
-			     struct bio *bio)
-{
-	struct io_context *ioc;
-
-	ioc = rq_ioc(bio);
-	if (ioc)
-		__blk_mq_sched_assign_ioc(q, rq, bio, ioc);
+	rq->elv.icq = icq;
 }
 
 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
-- 
cgit v1.1


From f4560ffe8cec1361b1021d81aca6a4173f8e7c87 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Sun, 18 Jun 2017 14:24:27 -0600
Subject: blk-mq: use QUEUE_FLAG_QUIESCED to quiesce queue

It is required that no dispatch can happen any more once
blk_mq_quiesce_queue() returns, and we don't have such requirement
on APIs of stopping queue.

But blk_mq_quiesce_queue() still may not block/drain dispatch in the
the case of BLK_MQ_S_START_ON_RUN, so use the new introduced flag of
QUEUE_FLAG_QUIESCED and evaluate it inside RCU read-side critical
sections for fixing this issue.

Also blk_mq_quiesce_queue() is implemented via stopping queue, which
limits its uses, and easy to cause race, because any queue restart in
other paths may break blk_mq_quiesce_queue(). With the introduced
flag of QUEUE_FLAG_QUIESCED, we don't need to depend on stopping queue
for quiescing any more.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'block/blk-mq-sched.c')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 254d1c1..9f02528 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -58,7 +58,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 	bool did_work = false;
 	LIST_HEAD(rq_list);
 
-	if (unlikely(blk_mq_hctx_stopped(hctx)))
+	/* RCU or SRCU read lock is needed before checking quiesced flag */
+	if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
 		return;
 
 	hctx->run++;
-- 
cgit v1.1


From 7b6078146ccbe9bd165d578586b10ea092ac489e Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 20 Jun 2017 11:15:47 -0700
Subject: blk-mq: Document locking assumptions

Document the locking assumptions in functions that modify
blk_mq_ctx.rq_list to make it easier for humans to verify
this code.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'block/blk-mq-sched.c')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 9f02528..191bf82 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -150,6 +150,8 @@ static bool blk_mq_attempt_merge(struct request_queue *q,
 	struct request *rq;
 	int checked = 8;
 
+	lockdep_assert_held(&ctx->lock);
+
 	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
 		bool merged = false;
 
-- 
cgit v1.1