From 2940474af79744411da0cb63b041ad52c57bc443 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Jun 2014 13:49:23 +0200 Subject: block: remove elv_abort_queue and blk_abort_flushes elv_abort_queue has no callers, and blk_abort_flushes is only called by elv_abort_queue. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-flush.c | 38 -------------------------------------- block/blk.h | 1 - block/elevator.c | 20 -------------------- 3 files changed, 59 deletions(-) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index 8ffee4b..3cb5e9e 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -422,44 +422,6 @@ void blk_insert_flush(struct request *rq) } /** - * blk_abort_flushes - @q is being aborted, abort flush requests - * @q: request_queue being aborted - * - * To be called from elv_abort_queue(). @q is being aborted. Prepare all - * FLUSH/FUA requests for abortion. - * - * CONTEXT: - * spin_lock_irq(q->queue_lock) - */ -void blk_abort_flushes(struct request_queue *q) -{ - struct request *rq, *n; - int i; - - /* - * Requests in flight for data are already owned by the dispatch - * queue or the device driver. Just restore for normal completion. - */ - list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) { - list_del_init(&rq->flush.list); - blk_flush_restore_request(rq); - } - - /* - * We need to give away requests on flush queues. Restore for - * normal completion and put them on the dispatch queue. - */ - for (i = 0; i < ARRAY_SIZE(q->flush_queue); i++) { - list_for_each_entry_safe(rq, n, &q->flush_queue[i], - flush.list) { - list_del_init(&rq->flush.list); - blk_flush_restore_request(rq); - list_add_tail(&rq->queuelist, &q->queue_head); - } - } -} - -/** * blkdev_issue_flush - queue a flush * @bdev: blockdev to issue flush for * @gfp_mask: memory allocation flags (for bio_alloc) diff --git a/block/blk.h b/block/blk.h index 45385e9..6748c4f 100644 --- a/block/blk.h +++ b/block/blk.h @@ -84,7 +84,6 @@ static inline void blk_clear_rq_complete(struct request *rq) #define ELV_ON_HASH(rq) ((rq)->cmd_flags & REQ_HASHED) void blk_insert_flush(struct request *rq); -void blk_abort_flushes(struct request_queue *q); static inline struct request *__elv_next_request(struct request_queue *q) { diff --git a/block/elevator.c b/block/elevator.c index f35eddd..34bded1 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -729,26 +729,6 @@ int elv_may_queue(struct request_queue *q, int rw) return ELV_MQUEUE_MAY; } -void elv_abort_queue(struct request_queue *q) -{ - struct request *rq; - - blk_abort_flushes(q); - - while (!list_empty(&q->queue_head)) { - rq = list_entry_rq(q->queue_head.next); - rq->cmd_flags |= REQ_QUIET; - trace_block_rq_abort(q, rq); - /* - * Mark this request as started so we don't trigger - * any debug logic in the end I/O path. - */ - blk_start_request(rq); - __blk_end_request_all(rq, -EIO); - } -} -EXPORT_SYMBOL(elv_abort_queue); - void elv_completed_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; -- cgit v1.1 From 28747fcd2211d0fccbe3d3c91a1067c3744db908 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Wed, 11 Jun 2014 23:43:54 +0200 Subject: block: remove WQ_POWER_EFFICIENT from kblockd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit blk-mq issues async requests through kblockd. To issue a work request on a specific CPU, kblockd_schedule_delayed_work_on is used. However, the specific CPU choice may not be honored, if the power_efficient option for workqueues is set. blk-mq requires that we have strict per-cpu scheduling, so it wont work properly if kblockd is marked POWER_EFFICIENT and power_efficient is set. Remove the kblockd WQ_POWER_EFFICIENT flag to prevent this behavior. This essentially reverts part of commit 695588f9454b, which added the WQ_POWER_EFFICIENT marker to kblockd. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- block/blk-core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 9aca8c7..55f91b8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -3311,8 +3311,7 @@ int __init blk_dev_init(void) /* used for unplugging and affects IO latency/throughput - HIGHPRI */ kblockd_workqueue = alloc_workqueue("kblockd", - WQ_MEM_RECLAIM | WQ_HIGHPRI | - WQ_POWER_EFFICIENT, 0); + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); if (!kblockd_workqueue) panic("Failed to create kblockd\n"); -- cgit v1.1 From 8f5280f4ee75333ca12bde99ef6280ff65a8af43 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Jun 2014 19:43:04 +0200 Subject: blk-mq: properly drain stopped queues If we need to drain a queue we need to run all queues, even if they are marked stopped to make sure the driver has a chance to error out on all queued requests. This fixes surprise removal with scsi-mq. Reported-by: Bart Van Assche Tested-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index e11f5f8..fd8b485 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -120,7 +120,7 @@ static void __blk_mq_drain_queue(struct request_queue *q) if (count == 0) break; - blk_mq_run_queues(q, false); + blk_mq_start_hw_queues(q); msleep(10); } } -- cgit v1.1 From 95ed068165d8edf6a81f9a73df0bb05c38602c3c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Jun 2014 19:43:35 +0200 Subject: blk-mq: merge blk_mq_drain_queue and __blk_mq_drain_queue Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index fd8b485..0ef2dc7 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -109,7 +109,7 @@ static void blk_mq_queue_exit(struct request_queue *q) __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); } -static void __blk_mq_drain_queue(struct request_queue *q) +void blk_mq_drain_queue(struct request_queue *q) { while (true) { s64 count; @@ -139,12 +139,7 @@ static void blk_mq_freeze_queue(struct request_queue *q) spin_unlock_irq(q->queue_lock); if (drain) - __blk_mq_drain_queue(q); -} - -void blk_mq_drain_queue(struct request_queue *q) -{ - __blk_mq_drain_queue(q); + blk_mq_drain_queue(q); } static void blk_mq_unfreeze_queue(struct request_queue *q) -- cgit v1.1 From 8537b12034cf1fd3fab3da2c859d71f76846fae9 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 17 Jun 2014 22:12:35 -0700 Subject: blk-mq: bitmap tag: fix races on shared ::wake_index fields Fix racy updates of shared blk_mq_bitmap_tags::wake_index and blk_mq_hw_ctx::wake_index fields. Cc: Ming Lei Signed-off-by: Alexander Gordeev Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 32 +++++++++++++++++++++----------- block/blk-mq-tag.h | 2 +- 2 files changed, 22 insertions(+), 12 deletions(-) (limited to 'block') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 1aab39f..6deb130 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -43,9 +43,16 @@ bool blk_mq_has_free_tags(struct blk_mq_tags *tags) return bt_has_free_tags(&tags->bitmap_tags); } -static inline void bt_index_inc(unsigned int *index) +static inline int bt_index_inc(int index) { - *index = (*index + 1) & (BT_WAIT_QUEUES - 1); + return (index + 1) & (BT_WAIT_QUEUES - 1); +} + +static inline void bt_index_atomic_inc(atomic_t *index) +{ + int old = atomic_read(index); + int new = bt_index_inc(old); + atomic_cmpxchg(index, old, new); } /* @@ -69,14 +76,14 @@ static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags) int i, wake_index; bt = &tags->bitmap_tags; - wake_index = bt->wake_index; + wake_index = atomic_read(&bt->wake_index); for (i = 0; i < BT_WAIT_QUEUES; i++) { struct bt_wait_state *bs = &bt->bs[wake_index]; if (waitqueue_active(&bs->wait)) wake_up(&bs->wait); - bt_index_inc(&wake_index); + wake_index = bt_index_inc(wake_index); } } @@ -212,12 +219,14 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx) { struct bt_wait_state *bs; + int wait_index; if (!hctx) return &bt->bs[0]; - bs = &bt->bs[hctx->wait_index]; - bt_index_inc(&hctx->wait_index); + wait_index = atomic_read(&hctx->wait_index); + bs = &bt->bs[wait_index]; + bt_index_atomic_inc(&hctx->wait_index); return bs; } @@ -313,18 +322,19 @@ static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt) { int i, wake_index; - wake_index = bt->wake_index; + wake_index = atomic_read(&bt->wake_index); for (i = 0; i < BT_WAIT_QUEUES; i++) { struct bt_wait_state *bs = &bt->bs[wake_index]; if (waitqueue_active(&bs->wait)) { - if (wake_index != bt->wake_index) - bt->wake_index = wake_index; + int o = atomic_read(&bt->wake_index); + if (wake_index != o) + atomic_cmpxchg(&bt->wake_index, o, wake_index); return bs; } - bt_index_inc(&wake_index); + wake_index = bt_index_inc(wake_index); } return NULL; @@ -344,7 +354,7 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) bs = bt_wake_ptr(bt); if (bs && atomic_dec_and_test(&bs->wait_cnt)) { atomic_set(&bs->wait_cnt, bt->wake_cnt); - bt_index_inc(&bt->wake_index); + bt_index_atomic_inc(&bt->wake_index); wake_up(&bs->wait); } } diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 98696a6..6206ed1 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -24,7 +24,7 @@ struct blk_mq_bitmap_tags { unsigned int map_nr; struct blk_align_bitmap *map; - unsigned int wake_index; + atomic_t wake_index; struct bt_wait_state *bs; }; -- cgit v1.1 From 2971c35f35886b87af54675313a2afef937c1b0c Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 12 Jun 2014 17:05:37 +0200 Subject: blk-mq: bitmap tag: fix race on blk_mq_bitmap_tags::wake_cnt This piece of code in bt_clear_tag() function is racy: bs = bt_wake_ptr(bt); if (bs && atomic_dec_and_test(&bs->wait_cnt)) { atomic_set(&bs->wait_cnt, bt->wake_cnt); wake_up(&bs->wait); } Since nothing prevents bt_wake_ptr() from returning the very same 'bs' address on multiple CPUs, the following scenario is possible: CPU1 CPU2 ---- ---- 0. bs = bt_wake_ptr(bt); bs = bt_wake_ptr(bt); 1. atomic_dec_and_test(&bs->wait_cnt) 2. atomic_dec_and_test(&bs->wait_cnt) 3. atomic_set(&bs->wait_cnt, bt->wake_cnt); If the decrement in [1] yields zero then for some amount of time the decrement in [2] results in a negative/overflow value, which is not expected. The follow-up assignment in [3] overwrites the invalid value with the batch value (and likely prevents the issue from being severe) which is still incorrect and should be a lesser. Cc: Ming Lei Cc: Jens Axboe Signed-off-by: Alexander Gordeev Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 6deb130..08fc671 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -344,6 +344,7 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) { const int index = TAG_TO_INDEX(bt, tag); struct bt_wait_state *bs; + int wait_cnt; /* * The unlock memory barrier need to order access to req in free @@ -352,10 +353,19 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word); bs = bt_wake_ptr(bt); - if (bs && atomic_dec_and_test(&bs->wait_cnt)) { - atomic_set(&bs->wait_cnt, bt->wake_cnt); + if (!bs) + return; + + wait_cnt = atomic_dec_return(&bs->wait_cnt); + if (wait_cnt == 0) { +wake: + atomic_add(bt->wake_cnt, &bs->wait_cnt); bt_index_atomic_inc(&bt->wake_index); wake_up(&bs->wait); + } else if (wait_cnt < 0) { + wait_cnt = atomic_inc_return(&bs->wait_cnt); + if (!wait_cnt) + goto wake; } } -- cgit v1.1 From 86fb5c56cfa26de5e91c9a50e2767a695dff366e Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 17 Jun 2014 22:37:23 +0200 Subject: blk-mq: bitmap tag: fix races in bt_get() function This update fixes few issues in bt_get() function: - list_empty(&wait.task_list) check is not protected; - was_empty check is always true which results in *every* thread entering the loop resets bt_wait_state::wait_cnt counter rather than every bt->wake_cnt'th thread; - 'bt_wait_state::wait_cnt' counter update is redundant, since it also gets reset in bt_clear_tag() function; Cc: Christoph Hellwig Cc: Ming Lei Cc: Jens Axboe Signed-off-by: Alexander Gordeev Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'block') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 08fc671..c1b9242 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -248,18 +248,12 @@ static int bt_get(struct blk_mq_alloc_data *data, bs = bt_wait_ptr(bt, hctx); do { - bool was_empty; - - was_empty = list_empty(&wait.task_list); prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); tag = __bt_get(hctx, bt, last_tag); if (tag != -1) break; - if (was_empty) - atomic_set(&bs->wait_cnt, bt->wake_cnt); - blk_mq_put_ctx(data->ctx); io_schedule(); @@ -519,10 +513,13 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, return -ENOMEM; } - for (i = 0; i < BT_WAIT_QUEUES; i++) + bt_update_count(bt, depth); + + for (i = 0; i < BT_WAIT_QUEUES; i++) { init_waitqueue_head(&bt->bs[i].wait); + atomic_set(&bt->bs[i].wait_cnt, bt->wake_cnt); + } - bt_update_count(bt, depth); return 0; } -- cgit v1.1