diff options
-rw-r--r-- | block/Makefile | 6 | ||||
-rw-r--r-- | block/bio-integrity.c (renamed from fs/bio-integrity.c) | 0 | ||||
-rw-r--r-- | block/bio.c (renamed from fs/bio.c) | 0 | ||||
-rw-r--r-- | block/blk-core.c | 7 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 10 | ||||
-rw-r--r-- | block/blk-mq-tag.h | 9 | ||||
-rw-r--r-- | block/blk-mq.c | 125 | ||||
-rw-r--r-- | block/blk-mq.h | 9 | ||||
-rw-r--r-- | block/ioprio.c (renamed from fs/ioprio.c) | 0 | ||||
-rw-r--r-- | fs/Makefile | 3 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 10 |
11 files changed, 128 insertions, 51 deletions
diff --git a/block/Makefile b/block/Makefile index 20645e8..b4c4d3b 100644 --- a/block/Makefile +++ b/block/Makefile @@ -2,12 +2,13 @@ # Makefile for the kernel block layer # -obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ +obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \ blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ - genhd.o scsi_ioctl.o partition-generic.o partitions/ + genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ + partitions/ obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o @@ -20,3 +21,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o +obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o diff --git a/fs/bio-integrity.c b/block/bio-integrity.c index 9e24106..9e24106 100644 --- a/fs/bio-integrity.c +++ b/block/bio-integrity.c diff --git a/block/blk-core.c b/block/blk-core.c index c426970..a6bd3e7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1233,12 +1233,15 @@ static void add_acct_request(struct request_queue *q, struct request *rq, static void part_round_stats_single(int cpu, struct hd_struct *part, unsigned long now) { + int inflight; + if (now == part->stamp) return; - if (part_in_flight(part)) { + inflight = part_in_flight(part); + if (inflight) { __part_stat_add(cpu, part, time_in_queue, - part_in_flight(part) * (now - part->stamp)); + inflight * (now - part->stamp)); __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); } part->stamp = now; diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index c80086c..e6b3fba 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -20,7 +20,7 @@ static bool bt_has_free_tags(struct blk_mq_bitmap_tags *bt) int i; for (i = 0; i < bt->map_nr; i++) { - struct blk_mq_bitmap *bm = &bt->map[i]; + struct blk_align_bitmap *bm = &bt->map[i]; int ret; ret = find_first_zero_bit(&bm->word, bm->depth); @@ -117,7 +117,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, return atomic_read(&hctx->nr_active) < depth; } -static int __bt_get_word(struct blk_mq_bitmap *bm, unsigned int last_tag) +static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) { int tag, org_last_tag, end; @@ -360,7 +360,7 @@ static void bt_for_each_free(struct blk_mq_bitmap_tags *bt, int i; for (i = 0; i < bt->map_nr; i++) { - struct blk_mq_bitmap *bm = &bt->map[i]; + struct blk_align_bitmap *bm = &bt->map[i]; int bit = 0; do { @@ -400,7 +400,7 @@ static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt) unsigned int i, used; for (i = 0, used = 0; i < bt->map_nr; i++) { - struct blk_mq_bitmap *bm = &bt->map[i]; + struct blk_align_bitmap *bm = &bt->map[i]; used += bitmap_weight(&bm->word, bm->depth); } @@ -438,7 +438,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, } nr = ALIGN(depth, tags_per_word) / tags_per_word; - bt->map = kzalloc_node(nr * sizeof(struct blk_mq_bitmap), + bt->map = kzalloc_node(nr * sizeof(struct blk_align_bitmap), GFP_KERNEL, node); if (!bt->map) return -ENOMEM; diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 0f5ec8b..e144f68 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -1,6 +1,8 @@ #ifndef INT_BLK_MQ_TAG_H #define INT_BLK_MQ_TAG_H +#include "blk-mq.h" + enum { BT_WAIT_QUEUES = 8, BT_WAIT_BATCH = 8, @@ -14,18 +16,13 @@ struct bt_wait_state { #define TAG_TO_INDEX(bt, tag) ((tag) >> (bt)->bits_per_word) #define TAG_TO_BIT(bt, tag) ((tag) & ((1 << (bt)->bits_per_word) - 1)) -struct blk_mq_bitmap { - unsigned long word; - unsigned long depth; -} ____cacheline_aligned_in_smp; - struct blk_mq_bitmap_tags { unsigned int depth; unsigned int wake_cnt; unsigned int bits_per_word; unsigned int map_nr; - struct blk_mq_bitmap *map; + struct blk_align_bitmap *map; unsigned int wake_index; struct bt_wait_state *bs; diff --git a/block/blk-mq.c b/block/blk-mq.c index 3c4f1fc..0fbef7e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -56,21 +56,40 @@ static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) { unsigned int i; - for (i = 0; i < hctx->nr_ctx_map; i++) - if (hctx->ctx_map[i]) + for (i = 0; i < hctx->ctx_map.map_size; i++) + if (hctx->ctx_map.map[i].word) return true; return false; } +static inline struct blk_align_bitmap *get_bm(struct blk_mq_hw_ctx *hctx, + struct blk_mq_ctx *ctx) +{ + return &hctx->ctx_map.map[ctx->index_hw / hctx->ctx_map.bits_per_word]; +} + +#define CTX_TO_BIT(hctx, ctx) \ + ((ctx)->index_hw & ((hctx)->ctx_map.bits_per_word - 1)) + /* * Mark this ctx as having pending work in this hardware queue */ static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { - if (!test_bit(ctx->index_hw, hctx->ctx_map)) - set_bit(ctx->index_hw, hctx->ctx_map); + struct blk_align_bitmap *bm = get_bm(hctx, ctx); + + if (!test_bit(CTX_TO_BIT(hctx, ctx), &bm->word)) + set_bit(CTX_TO_BIT(hctx, ctx), &bm->word); +} + +static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, + struct blk_mq_ctx *ctx) +{ + struct blk_align_bitmap *bm = get_bm(hctx, ctx); + + clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word); } static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, @@ -630,6 +649,40 @@ static bool blk_mq_attempt_merge(struct request_queue *q, } /* + * Process software queues that have been marked busy, splicing them + * to the for-dispatch + */ +static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) +{ + struct blk_mq_ctx *ctx; + int i; + + for (i = 0; i < hctx->ctx_map.map_size; i++) { + struct blk_align_bitmap *bm = &hctx->ctx_map.map[i]; + unsigned int off, bit; + + if (!bm->word) + continue; + + bit = 0; + off = i * hctx->ctx_map.bits_per_word; + do { + bit = find_next_bit(&bm->word, bm->depth, bit); + if (bit >= bm->depth) + break; + + ctx = hctx->ctxs[bit + off]; + clear_bit(bit, &bm->word); + spin_lock(&ctx->lock); + list_splice_tail_init(&ctx->rq_list, list); + spin_unlock(&ctx->lock); + + bit++; + } while (1); + } +} + +/* * Run this hardware queue, pulling any software queues mapped to it in. * Note that this function currently has various problems around ordering * of IO. In particular, we'd like FIFO behaviour on handling existing @@ -638,10 +691,9 @@ static bool blk_mq_attempt_merge(struct request_queue *q, static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; - struct blk_mq_ctx *ctx; struct request *rq; LIST_HEAD(rq_list); - int bit, queued; + int queued; WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)); @@ -653,14 +705,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) /* * Touch any software queue that has pending entries. */ - for_each_set_bit(bit, hctx->ctx_map, hctx->nr_ctx) { - clear_bit(bit, hctx->ctx_map); - ctx = hctx->ctxs[bit]; - - spin_lock(&ctx->lock); - list_splice_tail_init(&ctx->rq_list, &rq_list); - spin_unlock(&ctx->lock); - } + flush_busy_ctxs(hctx, &rq_list); /* * If we have previous entries on our dispatch list, grab them @@ -674,13 +719,9 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) } /* - * Delete and return all entries from our dispatch list - */ - queued = 0; - - /* * Now process all the entries, sending them to the driver. */ + queued = 0; while (!list_empty(&rq_list)) { int ret; @@ -1103,17 +1144,15 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) } if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE)) { - init_request_from_bio(rq, bio); - + blk_mq_bio_to_request(rq, bio); spin_lock(&ctx->lock); insert_rq: __blk_mq_insert_request(hctx, rq, false); spin_unlock(&ctx->lock); - blk_account_io_start(rq, 1); } else { spin_lock(&ctx->lock); if (!blk_mq_attempt_merge(q, ctx, bio)) { - init_request_from_bio(rq, bio); + blk_mq_bio_to_request(rq, bio); goto insert_rq; } @@ -1175,7 +1214,7 @@ static void blk_mq_hctx_notify(void *data, unsigned long action, spin_lock(&ctx->lock); if (!list_empty(&ctx->rq_list)) { list_splice_init(&ctx->rq_list, &tmp); - clear_bit(ctx->index_hw, hctx->ctx_map); + blk_mq_hctx_clear_pending(hctx, ctx); } spin_unlock(&ctx->lock); @@ -1315,6 +1354,34 @@ fail: return NULL; } +static void blk_mq_free_bitmap(struct blk_mq_ctxmap *bitmap) +{ + kfree(bitmap->map); +} + +static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node) +{ + unsigned int bpw = 8, total, num_maps, i; + + bitmap->bits_per_word = bpw; + + num_maps = ALIGN(nr_cpu_ids, bpw) / bpw; + bitmap->map = kzalloc_node(num_maps * sizeof(struct blk_align_bitmap), + GFP_KERNEL, node); + if (!bitmap->map) + return -ENOMEM; + + bitmap->map_size = num_maps; + + total = nr_cpu_ids; + for (i = 0; i < num_maps; i++) { + bitmap->map[i].depth = min(total, bitmap->bits_per_word); + total -= bitmap->map[i].depth; + } + + return 0; +} + static int blk_mq_init_hw_queues(struct request_queue *q, struct blk_mq_tag_set *set) { @@ -1325,7 +1392,6 @@ static int blk_mq_init_hw_queues(struct request_queue *q, * Initialize hardware queues */ queue_for_each_hw_ctx(q, hctx, i) { - unsigned int num_maps; int node; node = hctx->numa_node; @@ -1356,13 +1422,9 @@ static int blk_mq_init_hw_queues(struct request_queue *q, if (!hctx->ctxs) break; - num_maps = ALIGN(nr_cpu_ids, BITS_PER_LONG) / BITS_PER_LONG; - hctx->ctx_map = kzalloc_node(num_maps * sizeof(unsigned long), - GFP_KERNEL, node); - if (!hctx->ctx_map) + if (blk_mq_alloc_bitmap(&hctx->ctx_map, node)) break; - hctx->nr_ctx_map = num_maps; hctx->nr_ctx = 0; if (set->ops->init_hctx && @@ -1385,7 +1447,7 @@ static int blk_mq_init_hw_queues(struct request_queue *q, blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); kfree(hctx->ctxs); - kfree(hctx->ctx_map); + blk_mq_free_bitmap(&hctx->ctx_map); } return 1; @@ -1614,7 +1676,6 @@ void blk_mq_free_queue(struct request_queue *q) blk_mq_del_queue_tag_set(q); queue_for_each_hw_ctx(q, hctx, i) { - kfree(hctx->ctx_map); kfree(hctx->ctxs); blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); if (q->mq_ops->exit_hctx) diff --git a/block/blk-mq.h b/block/blk-mq.h index 97cfab9..5e5a378 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -52,4 +52,13 @@ void blk_mq_disable_hotplug(void); extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set); extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues); +/* + * Basic implementation of sparser bitmap, allowing the user to spread + * the bits over more cachelines. + */ +struct blk_align_bitmap { + unsigned long word; + unsigned long depth; +} ____cacheline_aligned_in_smp; + #endif diff --git a/fs/ioprio.c b/block/ioprio.c index e50170c..e50170c 100644 --- a/fs/ioprio.c +++ b/block/ioprio.c diff --git a/fs/Makefile b/fs/Makefile index f9cb987..4030cbf 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -14,14 +14,13 @@ obj-y := open.o read_write.o file_table.o super.o \ stack.o fs_struct.o statfs.o ifeq ($(CONFIG_BLOCK),y) -obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o +obj-y += buffer.o block_dev.o direct-io.o mpage.o else obj-y += no-block.o endif obj-$(CONFIG_PROC_FS) += proc_namespace.o -obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o obj-y += notify/ obj-$(CONFIG_EPOLL) += eventpoll.o obj-$(CONFIG_ANON_INODES) += anon_inodes.o diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 379f88d..a06ca7b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -11,6 +11,12 @@ struct blk_mq_cpu_notifier { void (*notify)(void *data, unsigned long action, unsigned int cpu); }; +struct blk_mq_ctxmap { + unsigned int map_size; + unsigned int bits_per_word; + struct blk_align_bitmap *map; +}; + struct blk_mq_hw_ctx { struct { spinlock_t lock; @@ -31,8 +37,8 @@ struct blk_mq_hw_ctx { void *driver_data; - unsigned int nr_ctx_map; - unsigned long *ctx_map; + struct blk_mq_ctxmap ctx_map; + unsigned int nr_ctx; struct blk_mq_ctx **ctxs; |