diff options
author | Jens Axboe <axboe@fb.com> | 2017-02-17 14:08:19 -0700 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2017-02-17 14:08:19 -0700 |
commit | 818551e2b2c662a1b26de6b4f7d6b8411a838d18 (patch) | |
tree | f38b4c951df4d33db81ae7b7765a56bce491c2a8 /block | |
parent | 6010720da8aab51f33beee63b73cf88016e9b250 (diff) | |
parent | 7520872c0cf4d3df6d74242c6edfb9e70a47df4d (diff) | |
download | op-kernel-dev-818551e2b2c662a1b26de6b4f7d6b8411a838d18.zip op-kernel-dev-818551e2b2c662a1b26de6b4f7d6b8411a838d18.tar.gz |
Merge branch 'for-4.11/next' into for-4.11/linus-merge
Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 5 | ||||
-rw-r--r-- | block/Makefile | 5 | ||||
-rw-r--r-- | block/bio.c | 10 | ||||
-rw-r--r-- | block/blk-cgroup.c | 10 | ||||
-rw-r--r-- | block/blk-core.c | 325 | ||||
-rw-r--r-- | block/blk-exec.c | 19 | ||||
-rw-r--r-- | block/blk-flush.c | 14 | ||||
-rw-r--r-- | block/blk-integrity.c | 4 | ||||
-rw-r--r-- | block/blk-ioc.c | 22 | ||||
-rw-r--r-- | block/blk-map.c | 13 | ||||
-rw-r--r-- | block/blk-merge.c | 58 | ||||
-rw-r--r-- | block/blk-mq-debugfs.c | 88 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 86 | ||||
-rw-r--r-- | block/blk-mq-sched.h | 9 | ||||
-rw-r--r-- | block/blk-mq-sysfs.c | 2 | ||||
-rw-r--r-- | block/blk-mq.c | 73 | ||||
-rw-r--r-- | block/blk-mq.h | 5 | ||||
-rw-r--r-- | block/blk-settings.c | 22 | ||||
-rw-r--r-- | block/blk-sysfs.c | 68 | ||||
-rw-r--r-- | block/blk-wbt.c | 8 | ||||
-rw-r--r-- | block/blk.h | 21 | ||||
-rw-r--r-- | block/bsg-lib.c | 49 | ||||
-rw-r--r-- | block/bsg.c | 64 | ||||
-rw-r--r-- | block/cfq-iosched.c | 4 | ||||
-rw-r--r-- | block/compat_ioctl.c | 7 | ||||
-rw-r--r-- | block/deadline-iosched.c | 12 | ||||
-rw-r--r-- | block/elevator.c | 16 | ||||
-rw-r--r-- | block/genhd.c | 25 | ||||
-rw-r--r-- | block/ioctl.c | 7 | ||||
-rw-r--r-- | block/mq-deadline.c | 15 | ||||
-rw-r--r-- | block/scsi_ioctl.c | 83 |
31 files changed, 640 insertions, 509 deletions
diff --git a/block/Kconfig b/block/Kconfig index 1aef809..a2a92e5 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -49,9 +49,13 @@ config LBDAF If unsure, say Y. +config BLK_SCSI_REQUEST + bool + config BLK_DEV_BSG bool "Block layer SG support v4" default y + select BLK_SCSI_REQUEST help Saying Y here will enable generic SG (SCSI generic) v4 support for any block device. @@ -71,6 +75,7 @@ config BLK_DEV_BSGLIB bool "Block layer SG support v4 helper lib" default n select BLK_DEV_BSG + select BLK_SCSI_REQUEST help Subsystems will normally enable this if needed. Users will not normally need to manually enable this. diff --git a/block/Makefile b/block/Makefile index 6ba1b1b..2ad7c30 100644 --- a/block/Makefile +++ b/block/Makefile @@ -7,10 +7,11 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \ - genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ + genhd.o partition-generic.o ioprio.o \ badblocks.o partitions/ -obj-$(CONFIG_BOUNCE) += bounce.o +obj-$(CONFIG_BOUNCE) += bounce.o +obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o diff --git a/block/bio.c b/block/bio.c index d3c26d1..4b564d0 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1227,9 +1227,6 @@ struct bio *bio_copy_user_iov(struct request_queue *q, if (!bio) goto out_bmd; - if (iter->type & WRITE) - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - ret = 0; if (map_data) { @@ -1394,12 +1391,6 @@ struct bio *bio_map_user_iov(struct request_queue *q, kfree(pages); - /* - * set data direction, and check if mapped pages need bouncing - */ - if (iter->type & WRITE) - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - bio_set_flag(bio, BIO_USER_MAPPED); /* @@ -1590,7 +1581,6 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, bio->bi_private = data; } else { bio->bi_end_io = bio_copy_kern_endio; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); } return bio; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index fb59a3e..295e98c2 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -184,7 +184,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, goto err_free_blkg; } - wb_congested = wb_congested_get_create(&q->backing_dev_info, + wb_congested = wb_congested_get_create(q->backing_dev_info, blkcg->css.id, GFP_NOWAIT | __GFP_NOWARN); if (!wb_congested) { @@ -469,8 +469,8 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css, const char *blkg_dev_name(struct blkcg_gq *blkg) { /* some drivers (floppy) instantiate a queue w/o disk registered */ - if (blkg->q->backing_dev_info.dev) - return dev_name(blkg->q->backing_dev_info.dev); + if (blkg->q->backing_dev_info->dev) + return dev_name(blkg->q->backing_dev_info->dev); return NULL; } EXPORT_SYMBOL_GPL(blkg_dev_name); @@ -1079,10 +1079,8 @@ int blkcg_init_queue(struct request_queue *q) if (preloaded) radix_tree_preload_end(); - if (IS_ERR(blkg)) { - blkg_free(new_blkg); + if (IS_ERR(blkg)) return PTR_ERR(blkg); - } q->root_blkg = blkg; q->root_rl.blkg = blkg; diff --git a/block/blk-core.c b/block/blk-core.c index b2df55a..b9e857f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -33,6 +33,7 @@ #include <linux/ratelimit.h> #include <linux/pm_runtime.h> #include <linux/blk-cgroup.h> +#include <linux/debugfs.h> #define CREATE_TRACE_POINTS #include <trace/events/block.h> @@ -42,6 +43,10 @@ #include "blk-mq-sched.h" #include "blk-wbt.h" +#ifdef CONFIG_DEBUG_FS +struct dentry *blk_debugfs_root; +#endif + EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); @@ -75,7 +80,7 @@ static void blk_clear_congested(struct request_list *rl, int sync) * flip its congestion state for events on other blkcgs. */ if (rl == &rl->q->root_rl) - clear_wb_congested(rl->q->backing_dev_info.wb.congested, sync); + clear_wb_congested(rl->q->backing_dev_info->wb.congested, sync); #endif } @@ -86,7 +91,7 @@ static void blk_set_congested(struct request_list *rl, int sync) #else /* see blk_clear_congested() */ if (rl == &rl->q->root_rl) - set_wb_congested(rl->q->backing_dev_info.wb.congested, sync); + set_wb_congested(rl->q->backing_dev_info->wb.congested, sync); #endif } @@ -105,22 +110,6 @@ void blk_queue_congestion_threshold(struct request_queue *q) q->nr_congestion_off = nr; } -/** - * blk_get_backing_dev_info - get the address of a queue's backing_dev_info - * @bdev: device - * - * Locates the passed device's request queue and returns the address of its - * backing_dev_info. This function can only be called if @bdev is opened - * and the return value is never NULL. - */ -struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - return &q->backing_dev_info; -} -EXPORT_SYMBOL(blk_get_backing_dev_info); - void blk_rq_init(struct request_queue *q, struct request *rq) { memset(rq, 0, sizeof(*rq)); @@ -132,8 +121,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->__sector = (sector_t) -1; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); - rq->cmd = rq->__cmd; - rq->cmd_len = BLK_MAX_CDB; rq->tag = -1; rq->internal_tag = -1; rq->start_time = jiffies; @@ -160,10 +147,8 @@ static void req_bio_endio(struct request *rq, struct bio *bio, void blk_dump_rq_flags(struct request *rq, char *msg) { - int bit; - - printk(KERN_INFO "%s: dev %s: type=%x, flags=%llx\n", msg, - rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, + printk(KERN_INFO "%s: dev %s: flags=%llx\n", msg, + rq->rq_disk ? rq->rq_disk->disk_name : "?", (unsigned long long) rq->cmd_flags); printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", @@ -171,13 +156,6 @@ void blk_dump_rq_flags(struct request *rq, char *msg) blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); printk(KERN_INFO " bio %p, biotail %p, len %u\n", rq->bio, rq->biotail, blk_rq_bytes(rq)); - - if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { - printk(KERN_INFO " cdb: "); - for (bit = 0; bit < BLK_MAX_CDB; bit++) - printk("%02x ", rq->cmd[bit]); - printk("\n"); - } } EXPORT_SYMBOL(blk_dump_rq_flags); @@ -588,7 +566,7 @@ void blk_cleanup_queue(struct request_queue *q) blk_flush_integrity(); /* @q won't process any more request, flush async actions */ - del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); + del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer); blk_sync_queue(q); if (q->mq_ops) @@ -600,7 +578,8 @@ void blk_cleanup_queue(struct request_queue *q) q->queue_lock = &q->__queue_lock; spin_unlock_irq(lock); - bdi_unregister(&q->backing_dev_info); + bdi_unregister(q->backing_dev_info); + put_disk_devt(q->disk_devt); /* @q is and will stay empty, shutdown and put */ blk_put_queue(q); @@ -608,17 +587,41 @@ void blk_cleanup_queue(struct request_queue *q) EXPORT_SYMBOL(blk_cleanup_queue); /* Allocate memory local to the request queue */ -static void *alloc_request_struct(gfp_t gfp_mask, void *data) +static void *alloc_request_simple(gfp_t gfp_mask, void *data) { - int nid = (int)(long)data; - return kmem_cache_alloc_node(request_cachep, gfp_mask, nid); + struct request_queue *q = data; + + return kmem_cache_alloc_node(request_cachep, gfp_mask, q->node); } -static void free_request_struct(void *element, void *unused) +static void free_request_simple(void *element, void *data) { kmem_cache_free(request_cachep, element); } +static void *alloc_request_size(gfp_t gfp_mask, void *data) +{ + struct request_queue *q = data; + struct request *rq; + + rq = kmalloc_node(sizeof(struct request) + q->cmd_size, gfp_mask, + q->node); + if (rq && q->init_rq_fn && q->init_rq_fn(q, rq, gfp_mask) < 0) { + kfree(rq); + rq = NULL; + } + return rq; +} + +static void free_request_size(void *element, void *data) +{ + struct request_queue *q = data; + + if (q->exit_rq_fn) + q->exit_rq_fn(q, element); + kfree(element); +} + int blk_init_rl(struct request_list *rl, struct request_queue *q, gfp_t gfp_mask) { @@ -631,10 +634,15 @@ int blk_init_rl(struct request_list *rl, struct request_queue *q, init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); - rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, alloc_request_struct, - free_request_struct, - (void *)(long)q->node, gfp_mask, - q->node); + if (q->cmd_size) { + rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, + alloc_request_size, free_request_size, + q, gfp_mask, q->node); + } else { + rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, + alloc_request_simple, free_request_simple, + q, gfp_mask, q->node); + } if (!rl->rq_pool) return -ENOMEM; @@ -697,7 +705,6 @@ static void blk_rq_timed_out_timer(unsigned long data) struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) { struct request_queue *q; - int err; q = kmem_cache_alloc_node(blk_requestq_cachep, gfp_mask | __GFP_ZERO, node_id); @@ -712,17 +719,17 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) if (!q->bio_split) goto fail_id; - q->backing_dev_info.ra_pages = + q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id); + if (!q->backing_dev_info) + goto fail_split; + + q->backing_dev_info->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE; - q->backing_dev_info.capabilities = BDI_CAP_CGROUP_WRITEBACK; - q->backing_dev_info.name = "block"; + q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK; + q->backing_dev_info->name = "block"; q->node = node_id; - err = bdi_init(&q->backing_dev_info); - if (err) - goto fail_split; - - setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, + setup_timer(&q->backing_dev_info->laptop_mode_wb_timer, laptop_mode_timer_fn, (unsigned long) q); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); INIT_LIST_HEAD(&q->queue_head); @@ -772,7 +779,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) fail_ref: percpu_ref_exit(&q->q_usage_counter); fail_bdi: - bdi_destroy(&q->backing_dev_info); + bdi_put(q->backing_dev_info); fail_split: bioset_free(q->bio_split); fail_id: @@ -825,15 +832,19 @@ EXPORT_SYMBOL(blk_init_queue); struct request_queue * blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) { - struct request_queue *uninit_q, *q; + struct request_queue *q; - uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id); - if (!uninit_q) + q = blk_alloc_queue_node(GFP_KERNEL, node_id); + if (!q) return NULL; - q = blk_init_allocated_queue(uninit_q, rfn, lock); - if (!q) - blk_cleanup_queue(uninit_q); + q->request_fn = rfn; + if (lock) + q->queue_lock = lock; + if (blk_init_allocated_queue(q) < 0) { + blk_cleanup_queue(q); + return NULL; + } return q; } @@ -841,30 +852,22 @@ EXPORT_SYMBOL(blk_init_queue_node); static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio); -struct request_queue * -blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, - spinlock_t *lock) -{ - if (!q) - return NULL; - q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0); +int blk_init_allocated_queue(struct request_queue *q) +{ + q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size); if (!q->fq) - return NULL; + return -ENOMEM; + + if (q->init_rq_fn && q->init_rq_fn(q, q->fq->flush_rq, GFP_KERNEL)) + goto out_free_flush_queue; if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) - goto fail; + goto out_exit_flush_rq; INIT_WORK(&q->timeout_work, blk_timeout_work); - q->request_fn = rfn; - q->prep_rq_fn = NULL; - q->unprep_rq_fn = NULL; q->queue_flags |= QUEUE_FLAG_DEFAULT; - /* Override internal queue lock with supplied lock pointer */ - if (lock) - q->queue_lock = lock; - /* * This also sets hw/phys segments, boundary and size */ @@ -878,17 +881,19 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, /* init elevator */ if (elevator_init(q, NULL)) { mutex_unlock(&q->sysfs_lock); - goto fail; + goto out_exit_flush_rq; } mutex_unlock(&q->sysfs_lock); + return 0; - return q; - -fail: +out_exit_flush_rq: + if (q->exit_rq_fn) + q->exit_rq_fn(q, q->fq->flush_rq); +out_free_flush_queue: blk_free_flush_queue(q->fq); wbt_exit(q); - return NULL; + return -ENOMEM; } EXPORT_SYMBOL(blk_init_allocated_queue); @@ -1024,25 +1029,6 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr) return 0; } -/* - * Determine if elevator data should be initialized when allocating the - * request associated with @bio. - */ -static bool blk_rq_should_init_elevator(struct bio *bio) -{ - if (!bio) - return true; - - /* - * Flush requests do not use the elevator so skip initialization. - * This allows a request to share the flush and elevator data. - */ - if (op_is_flush(bio->bi_opf)) - return false; - - return true; -} - /** * __get_request - get a free request * @rl: request list to allocate from @@ -1121,10 +1107,13 @@ static struct request *__get_request(struct request_list *rl, unsigned int op, * request is freed. This guarantees icq's won't be destroyed and * makes creating new ones safe. * + * Flush requests do not use the elevator so skip initialization. + * This allows a request to share the flush and elevator data. + * * Also, lookup icq while holding queue_lock. If it doesn't exist, * it will be created after releasing queue_lock. */ - if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) { + if (!op_is_flush(op) && !blk_queue_bypass(q)) { rq_flags |= RQF_ELVPRIV; q->nr_rqs_elvpriv++; if (et->icq_cache && ioc) @@ -1184,7 +1173,7 @@ fail_elvpriv: * disturb iosched and blkcg but weird is bettern than dead. */ printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n", - __func__, dev_name(q->backing_dev_info.dev)); + __func__, dev_name(q->backing_dev_info->dev)); rq->rq_flags &= ~RQF_ELVPRIV; rq->elv.icq = NULL; @@ -1278,8 +1267,6 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, { struct request *rq; - BUG_ON(rw != READ && rw != WRITE); - /* create ioc upfront */ create_io_context(gfp_mask, q->node); @@ -1309,18 +1296,6 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) EXPORT_SYMBOL(blk_get_request); /** - * blk_rq_set_block_pc - initialize a request to type BLOCK_PC - * @rq: request to be initialized - * - */ -void blk_rq_set_block_pc(struct request *rq) -{ - rq->cmd_type = REQ_TYPE_BLOCK_PC; - memset(rq->__cmd, 0, sizeof(rq->__cmd)); -} -EXPORT_SYMBOL(blk_rq_set_block_pc); - -/** * blk_requeue_request - put a request back on queue * @q: request queue where request should be inserted * @rq: request to be inserted @@ -1510,6 +1485,30 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, return true; } +bool bio_attempt_discard_merge(struct request_queue *q, struct request *req, + struct bio *bio) +{ + unsigned short segments = blk_rq_nr_discard_segments(req); + + if (segments >= queue_max_discard_segments(q)) + goto no_merge; + if (blk_rq_sectors(req) + bio_sectors(bio) > + blk_rq_get_max_sectors(req, blk_rq_pos(req))) + goto no_merge; + + req->biotail->bi_next = bio; + req->biotail = bio; + req->__data_len += bio->bi_iter.bi_size; + req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); + req->nr_phys_segments = segments + 1; + + blk_account_io_start(req, false); + return true; +no_merge: + req_set_nomerge(q, req); + return false; +} + /** * blk_attempt_plug_merge - try to merge with %current's plugged list * @q: request_queue new bio is being queued at @@ -1538,12 +1537,11 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, { struct blk_plug *plug; struct request *rq; - bool ret = false; struct list_head *plug_list; plug = current->plug; if (!plug) - goto out; + return false; *request_count = 0; if (q->mq_ops) @@ -1552,7 +1550,7 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, plug_list = &plug->list; list_for_each_entry_reverse(rq, plug_list, queuelist) { - int el_ret; + bool merged = false; if (rq->q == q) { (*request_count)++; @@ -1568,19 +1566,25 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, if (rq->q != q || !blk_rq_merge_ok(rq, bio)) continue; - el_ret = blk_try_merge(rq, bio); - if (el_ret == ELEVATOR_BACK_MERGE) { - ret = bio_attempt_back_merge(q, rq, bio); - if (ret) - break; - } else if (el_ret == ELEVATOR_FRONT_MERGE) { - ret = bio_attempt_front_merge(q, rq, bio); - if (ret) - break; + switch (blk_try_merge(rq, bio)) { + case ELEVATOR_BACK_MERGE: + merged = bio_attempt_back_merge(q, rq, bio); + break; + case ELEVATOR_FRONT_MERGE: + merged = bio_attempt_front_merge(q, rq, bio); + break; + case ELEVATOR_DISCARD_MERGE: + merged = bio_attempt_discard_merge(q, rq, bio); + break; + default: + break; } + + if (merged) + return true; } -out: - return ret; + + return false; } unsigned int blk_plug_queued_count(struct request_queue *q) @@ -1609,7 +1613,6 @@ out: void init_request_from_bio(struct request *req, struct bio *bio) { - req->cmd_type = REQ_TYPE_FS; if (bio->bi_opf & REQ_RAHEAD) req->cmd_flags |= REQ_FAILFAST_MASK; @@ -1623,8 +1626,8 @@ void init_request_from_bio(struct request *req, struct bio *bio) static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) { struct blk_plug *plug; - int el_ret, where = ELEVATOR_INSERT_SORT; - struct request *req; + int where = ELEVATOR_INSERT_SORT; + struct request *req, *free; unsigned int request_count = 0; unsigned int wb_acct; @@ -1661,21 +1664,29 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) spin_lock_irq(q->queue_lock); - el_ret = elv_merge(q, &req, bio); - if (el_ret == ELEVATOR_BACK_MERGE) { - if (bio_attempt_back_merge(q, req, bio)) { - elv_bio_merged(q, req, bio); - if (!attempt_back_merge(q, req)) - elv_merged_request(q, req, el_ret); - goto out_unlock; - } - } else if (el_ret == ELEVATOR_FRONT_MERGE) { - if (bio_attempt_front_merge(q, req, bio)) { - elv_bio_merged(q, req, bio); - if (!attempt_front_merge(q, req)) - elv_merged_request(q, req, el_ret); - goto out_unlock; - } + switch (elv_merge(q, &req, bio)) { + case ELEVATOR_BACK_MERGE: + if (!bio_attempt_back_merge(q, req, bio)) + break; + elv_bio_merged(q, req, bio); + free = attempt_back_merge(q, req); + if (free) + __blk_put_request(q, free); + else + elv_merged_request(q, req, ELEVATOR_BACK_MERGE); + goto out_unlock; + case ELEVATOR_FRONT_MERGE: + if (!bio_attempt_front_merge(q, req, bio)) + break; + elv_bio_merged(q, req, bio); + free = attempt_front_merge(q, req); + if (free) + __blk_put_request(q, free); + else + elv_merged_request(q, req, ELEVATOR_FRONT_MERGE); + goto out_unlock; + default: + break; } get_rq: @@ -2452,14 +2463,6 @@ void blk_start_request(struct request *req) wbt_issue(req->q->rq_wb, &req->issue_stat); } - /* - * We are now handing the request to the hardware, initialize - * resid_len to full count and add the timeout handler. - */ - req->resid_len = blk_rq_bytes(req); - if (unlikely(blk_bidi_rq(req))) - req->next_rq->resid_len = blk_rq_bytes(req->next_rq); - BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags)); blk_add_timer(req); } @@ -2530,10 +2533,10 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) * TODO: tj: This is too subtle. It would be better to let * low level drivers do what they see fit. */ - if (req->cmd_type == REQ_TYPE_FS) + if (!blk_rq_is_passthrough(req)) req->errors = 0; - if (error && req->cmd_type == REQ_TYPE_FS && + if (error && !blk_rq_is_passthrough(req) && !(req->rq_flags & RQF_QUIET)) { char *error_type; @@ -2605,7 +2608,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) req->__data_len -= total_bytes; /* update sector only for requests with clear definition of sector */ - if (req->cmd_type == REQ_TYPE_FS) + if (!blk_rq_is_passthrough(req)) req->__sector += total_bytes >> 9; /* mixed attributes always follow the first bio */ @@ -2683,8 +2686,8 @@ void blk_finish_request(struct request *req, int error) BUG_ON(blk_queued_rq(req)); - if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS) - laptop_io_completion(&req->q->backing_dev_info); + if (unlikely(laptop_mode) && !blk_rq_is_passthrough(req)) + laptop_io_completion(req->q->backing_dev_info); blk_delete_timer(req); @@ -3007,8 +3010,6 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); static void __blk_rq_prep_clone(struct request *dst, struct request *src) { dst->cpu = src->cpu; - dst->cmd_flags = src->cmd_flags | REQ_NOMERGE; - dst->cmd_type = src->cmd_type; dst->__sector = blk_rq_pos(src); dst->__data_len = blk_rq_bytes(src); dst->nr_phys_segments = src->nr_phys_segments; @@ -3484,5 +3485,9 @@ int __init blk_dev_init(void) blk_requestq_cachep = kmem_cache_create("request_queue", sizeof(struct request_queue), 0, SLAB_PANIC, NULL); +#ifdef CONFIG_DEBUG_FS + blk_debugfs_root = debugfs_create_dir("block", NULL); +#endif + return 0; } diff --git a/block/blk-exec.c b/block/blk-exec.c index ed1f101..8cd0e9b 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -11,11 +11,6 @@ #include "blk.h" #include "blk-mq-sched.h" -/* - * for max sense size - */ -#include <scsi/scsi_cmnd.h> - /** * blk_end_sync_rq - executes a completion event on a request * @rq: request to complete @@ -56,7 +51,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; WARN_ON(irqs_disabled()); - WARN_ON(rq->cmd_type == REQ_TYPE_FS); + WARN_ON(!blk_rq_is_passthrough(rq)); rq->rq_disk = bd_disk; rq->end_io = done; @@ -101,16 +96,9 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, struct request *rq, int at_head) { DECLARE_COMPLETION_ONSTACK(wait); - char sense[SCSI_SENSE_BUFFERSIZE]; int err = 0; unsigned long hang_check; - if (!rq->sense) { - memset(sense, 0, sizeof(sense)); - rq->sense = sense; - rq->sense_len = 0; - } - rq->end_io_data = &wait; blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); @@ -124,11 +112,6 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, if (rq->errors) err = -EIO; - if (rq->sense == sense) { - rq->sense = NULL; - rq->sense_len = 0; - } - return err; } EXPORT_SYMBOL(blk_execute_rq); diff --git a/block/blk-flush.c b/block/blk-flush.c index 4427896..0d5a9c1 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -297,8 +297,14 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending)) return false; - /* C2 and C3 */ + /* C2 and C3 + * + * For blk-mq + scheduling, we can risk having all driver tags + * assigned to empty flushes, and we deadlock if we are expecting + * other requests to make progress. Don't defer for that case. + */ if (!list_empty(&fq->flush_data_in_flight) && + !(q->mq_ops && q->elevator) && time_before(jiffies, fq->flush_pending_since + FLUSH_PENDING_TIMEOUT)) return false; @@ -327,7 +333,6 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq); } - flush_rq->cmd_type = REQ_TYPE_FS; flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH; flush_rq->rq_flags |= RQF_FLUSH_SEQ; flush_rq->rq_disk = first_rq->rq_disk; @@ -547,11 +552,10 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, if (!fq) goto fail; - if (q->mq_ops) { + if (q->mq_ops) spin_lock_init(&fq->mq_flush_lock); - rq_sz = round_up(rq_sz + cmd_size, cache_line_size()); - } + rq_sz = round_up(rq_sz + cmd_size, cache_line_size()); fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node); if (!fq->flush_rq) goto fail_rq; diff --git a/block/blk-integrity.c b/block/blk-integrity.c index d69c5c7..9f0ff5b 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -443,10 +443,10 @@ void blk_integrity_revalidate(struct gendisk *disk) return; if (bi->profile) - disk->queue->backing_dev_info.capabilities |= + disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; else - disk->queue->backing_dev_info.capabilities &= + disk->queue->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; } diff --git a/block/blk-ioc.c b/block/blk-ioc.c index fe186a9..b12f9c8 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -35,7 +35,10 @@ static void icq_free_icq_rcu(struct rcu_head *head) kmem_cache_free(icq->__rcu_icq_cache, icq); } -/* Exit an icq. Called with both ioc and q locked. */ +/* + * Exit an icq. Called with both ioc and q locked for sq, only ioc locked for + * mq. + */ static void ioc_exit_icq(struct io_cq *icq) { struct elevator_type *et = icq->q->elevator->type; @@ -166,6 +169,7 @@ EXPORT_SYMBOL(put_io_context); */ void put_io_context_active(struct io_context *ioc) { + struct elevator_type *et; unsigned long flags; struct io_cq *icq; @@ -184,13 +188,19 @@ retry: hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) { if (icq->flags & ICQ_EXITED) continue; - if (spin_trylock(icq->q->queue_lock)) { + + et = icq->q->elevator->type; + if (et->uses_mq) { ioc_exit_icq(icq); - spin_unlock(icq->q->queue_lock); } else { - spin_unlock_irqrestore(&ioc->lock, flags); - cpu_relax(); - goto retry; + if (spin_trylock(icq->q->queue_lock)) { + ioc_exit_icq(icq); + spin_unlock(icq->q->queue_lock); + } else { + spin_unlock_irqrestore(&ioc->lock, flags); + cpu_relax(); + goto retry; + } } } spin_unlock_irqrestore(&ioc->lock, flags); diff --git a/block/blk-map.c b/block/blk-map.c index 0acb664..2f18c2a 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -16,8 +16,6 @@ int blk_rq_append_bio(struct request *rq, struct bio *bio) { if (!rq->bio) { - rq->cmd_flags &= REQ_OP_MASK; - rq->cmd_flags |= (bio->bi_opf & REQ_OP_MASK); blk_rq_bio_prep(rq->q, rq, bio); } else { if (!ll_back_merge_fn(rq->q, rq, bio)) @@ -62,6 +60,9 @@ static int __blk_rq_map_user_iov(struct request *rq, if (IS_ERR(bio)) return PTR_ERR(bio); + bio->bi_opf &= ~REQ_OP_MASK; + bio->bi_opf |= req_op(rq); + if (map_data && map_data->null_mapped) bio_set_flag(bio, BIO_NULL_MAPPED); @@ -90,7 +91,7 @@ static int __blk_rq_map_user_iov(struct request *rq, } /** - * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage + * blk_rq_map_user_iov - map user data to a request, for passthrough requests * @q: request queue where request should be inserted * @rq: request to map data to * @map_data: pointer to the rq_map_data holding pages (if necessary) @@ -199,7 +200,7 @@ int blk_rq_unmap_user(struct bio *bio) EXPORT_SYMBOL(blk_rq_unmap_user); /** - * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage + * blk_rq_map_kern - map kernel data to a request, for passthrough requests * @q: request queue where request should be inserted * @rq: request to fill * @kbuf: the kernel buffer @@ -234,8 +235,8 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (IS_ERR(bio)) return PTR_ERR(bio); - if (!reading) - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf &= ~REQ_OP_MASK; + bio->bi_opf |= req_op(rq); if (do_copy) rq->rq_flags |= RQF_COPY_USER; diff --git a/block/blk-merge.c b/block/blk-merge.c index 6aa43de..2afa262 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -482,13 +482,6 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL(blk_rq_map_sg); -static void req_set_nomerge(struct request_queue *q, struct request *req) -{ - req->cmd_flags |= REQ_NOMERGE; - if (req == q->last_merge) - q->last_merge = NULL; -} - static inline int ll_new_hw_segment(struct request_queue *q, struct request *req, struct bio *bio) @@ -659,31 +652,32 @@ static void blk_account_io_merge(struct request *req) } /* - * Has to be called with the request spinlock acquired + * For non-mq, this has to be called with the request spinlock acquired. + * For mq with scheduling, the appropriate queue wide lock should be held. */ -static int attempt_merge(struct request_queue *q, struct request *req, - struct request *next) +static struct request *attempt_merge(struct request_queue *q, + struct request *req, struct request *next) { if (!rq_mergeable(req) || !rq_mergeable(next)) - return 0; + return NULL; if (req_op(req) != req_op(next)) - return 0; + return NULL; /* * not contiguous */ if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) - return 0; + return NULL; if (rq_data_dir(req) != rq_data_dir(next) || req->rq_disk != next->rq_disk || req_no_special_merge(next)) - return 0; + return NULL; if (req_op(req) == REQ_OP_WRITE_SAME && !blk_write_same_mergeable(req->bio, next->bio)) - return 0; + return NULL; /* * If we are allowed to merge, then append bio list @@ -692,7 +686,7 @@ static int attempt_merge(struct request_queue *q, struct request *req, * counts here. */ if (!ll_merge_requests_fn(q, req, next)) - return 0; + return NULL; /* * If failfast settings disagree or any of the two is already @@ -732,42 +726,51 @@ static int attempt_merge(struct request_queue *q, struct request *req, if (blk_rq_cpu_valid(next)) req->cpu = next->cpu; - /* owner-ship of bio passed from next to req */ + /* + * ownership of bio passed from next to req, return 'next' for + * the caller to free + */ next->bio = NULL; - __blk_put_request(q, next); - return 1; + return next; } -int attempt_back_merge(struct request_queue *q, struct request *rq) +struct request *attempt_back_merge(struct request_queue *q, struct request *rq) { struct request *next = elv_latter_request(q, rq); if (next) return attempt_merge(q, rq, next); - return 0; + return NULL; } -int attempt_front_merge(struct request_queue *q, struct request *rq) +struct request *attempt_front_merge(struct request_queue *q, struct request *rq) { struct request *prev = elv_former_request(q, rq); if (prev) return attempt_merge(q, prev, rq); - return 0; + return NULL; } int blk_attempt_req_merge(struct request_queue *q, struct request *rq, struct request *next) { struct elevator_queue *e = q->elevator; + struct request *free; if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn) if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next)) return 0; - return attempt_merge(q, rq, next); + free = attempt_merge(q, rq, next); + if (free) { + __blk_put_request(q, free); + return 1; + } + + return 0; } bool blk_rq_merge_ok(struct request *rq, struct bio *bio) @@ -798,9 +801,12 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) return true; } -int blk_try_merge(struct request *rq, struct bio *bio) +enum elv_merge blk_try_merge(struct request *rq, struct bio *bio) { - if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) + if (req_op(rq) == REQ_OP_DISCARD && + queue_max_discard_segments(rq->q) > 1) + return ELEVATOR_DISCARD_MERGE; + else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) return ELEVATOR_BACK_MERGE; else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector) return ELEVATOR_FRONT_MERGE; diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 5cd2b43..f6d9179 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -19,6 +19,7 @@ #include <linux/debugfs.h> #include <linux/blk-mq.h> +#include "blk.h" #include "blk-mq.h" #include "blk-mq-tag.h" @@ -28,8 +29,6 @@ struct blk_mq_debugfs_attr { const struct file_operations *fops; }; -static struct dentry *block_debugfs_root; - static int blk_mq_debugfs_seq_open(struct inode *inode, struct file *file, const struct seq_operations *ops) { @@ -88,13 +87,14 @@ static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) { struct request *rq = list_entry_rq(v); - seq_printf(m, "%p {.cmd_type=%u, .cmd_flags=0x%x, .rq_flags=0x%x, .tag=%d, .internal_tag=%d}\n", - rq, rq->cmd_type, rq->cmd_flags, (unsigned int)rq->rq_flags, + seq_printf(m, "%p {.cmd_flags=0x%x, .rq_flags=0x%x, .tag=%d, .internal_tag=%d}\n", + rq, rq->cmd_flags, (__force unsigned int)rq->rq_flags, rq->tag, rq->internal_tag); return 0; } static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos) + __acquires(&hctx->lock) { struct blk_mq_hw_ctx *hctx = m->private; @@ -110,6 +110,7 @@ static void *hctx_dispatch_next(struct seq_file *m, void *v, loff_t *pos) } static void hctx_dispatch_stop(struct seq_file *m, void *v) + __releases(&hctx->lock) { struct blk_mq_hw_ctx *hctx = m->private; @@ -176,13 +177,17 @@ static int hctx_tags_show(struct seq_file *m, void *v) { struct blk_mq_hw_ctx *hctx = m->private; struct request_queue *q = hctx->queue; + int res; - mutex_lock(&q->sysfs_lock); + res = mutex_lock_interruptible(&q->sysfs_lock); + if (res) + goto out; if (hctx->tags) blk_mq_debugfs_tags_show(m, hctx->tags); mutex_unlock(&q->sysfs_lock); - return 0; +out: + return res; } static int hctx_tags_open(struct inode *inode, struct file *file) @@ -201,12 +206,17 @@ static int hctx_tags_bitmap_show(struct seq_file *m, void *v) { struct blk_mq_hw_ctx *hctx = m->private; struct request_queue *q = hctx->queue; + int res; - mutex_lock(&q->sysfs_lock); + res = mutex_lock_interruptible(&q->sysfs_lock); + if (res) + goto out; if (hctx->tags) sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m); mutex_unlock(&q->sysfs_lock); - return 0; + +out: + return res; } static int hctx_tags_bitmap_open(struct inode *inode, struct file *file) @@ -225,13 +235,17 @@ static int hctx_sched_tags_show(struct seq_file *m, void *v) { struct blk_mq_hw_ctx *hctx = m->private; struct request_queue *q = hctx->queue; + int res; - mutex_lock(&q->sysfs_lock); + res = mutex_lock_interruptible(&q->sysfs_lock); + if (res) + goto out; if (hctx->sched_tags) blk_mq_debugfs_tags_show(m, hctx->sched_tags); mutex_unlock(&q->sysfs_lock); - return 0; +out: + return res; } static int hctx_sched_tags_open(struct inode *inode, struct file *file) @@ -250,12 +264,17 @@ static int hctx_sched_tags_bitmap_show(struct seq_file *m, void *v) { struct blk_mq_hw_ctx *hctx = m->private; struct request_queue *q = hctx->queue; + int res; - mutex_lock(&q->sysfs_lock); + res = mutex_lock_interruptible(&q->sysfs_lock); + if (res) + goto out; if (hctx->sched_tags) sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags.sb, m); mutex_unlock(&q->sysfs_lock); - return 0; + +out: + return res; } static int hctx_sched_tags_bitmap_open(struct inode *inode, struct file *file) @@ -482,6 +501,7 @@ static const struct file_operations hctx_active_fops = { }; static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos) + __acquires(&ctx->lock) { struct blk_mq_ctx *ctx = m->private; @@ -497,6 +517,7 @@ static void *ctx_rq_list_next(struct seq_file *m, void *v, loff_t *pos) } static void ctx_rq_list_stop(struct seq_file *m, void *v) + __releases(&ctx->lock) { struct blk_mq_ctx *ctx = m->private; @@ -630,6 +651,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = { {"queued", 0600, &hctx_queued_fops}, {"run", 0600, &hctx_run_fops}, {"active", 0400, &hctx_active_fops}, + {}, }; static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { @@ -637,14 +659,15 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { {"dispatched", 0600, &ctx_dispatched_fops}, {"merged", 0600, &ctx_merged_fops}, {"completed", 0600, &ctx_completed_fops}, + {}, }; int blk_mq_debugfs_register(struct request_queue *q, const char *name) { - if (!block_debugfs_root) + if (!blk_debugfs_root) return -ENOENT; - q->debugfs_dir = debugfs_create_dir(name, block_debugfs_root); + q->debugfs_dir = debugfs_create_dir(name, blk_debugfs_root); if (!q->debugfs_dir) goto err; @@ -665,27 +688,31 @@ void blk_mq_debugfs_unregister(struct request_queue *q) q->debugfs_dir = NULL; } +static bool debugfs_create_files(struct dentry *parent, void *data, + const struct blk_mq_debugfs_attr *attr) +{ + for (; attr->name; attr++) { + if (!debugfs_create_file(attr->name, attr->mode, parent, + data, attr->fops)) + return false; + } + return true; +} + static int blk_mq_debugfs_register_ctx(struct request_queue *q, struct blk_mq_ctx *ctx, struct dentry *hctx_dir) { struct dentry *ctx_dir; char name[20]; - int i; snprintf(name, sizeof(name), "cpu%u", ctx->cpu); ctx_dir = debugfs_create_dir(name, hctx_dir); if (!ctx_dir) return -ENOMEM; - for (i = 0; i < ARRAY_SIZE(blk_mq_debugfs_ctx_attrs); i++) { - const struct blk_mq_debugfs_attr *attr; - - attr = &blk_mq_debugfs_ctx_attrs[i]; - if (!debugfs_create_file(attr->name, attr->mode, ctx_dir, ctx, - attr->fops)) - return -ENOMEM; - } + if (!debugfs_create_files(ctx_dir, ctx, blk_mq_debugfs_ctx_attrs)) + return -ENOMEM; return 0; } @@ -703,14 +730,8 @@ static int blk_mq_debugfs_register_hctx(struct request_queue *q, if (!hctx_dir) return -ENOMEM; - for (i = 0; i < ARRAY_SIZE(blk_mq_debugfs_hctx_attrs); i++) { - const struct blk_mq_debugfs_attr *attr; - - attr = &blk_mq_debugfs_hctx_attrs[i]; - if (!debugfs_create_file(attr->name, attr->mode, hctx_dir, hctx, - attr->fops)) - return -ENOMEM; - } + if (!debugfs_create_files(hctx_dir, hctx, blk_mq_debugfs_hctx_attrs)) + return -ENOMEM; hctx_for_each_ctx(hctx, ctx, i) { if (blk_mq_debugfs_register_ctx(q, ctx, hctx_dir)) @@ -749,8 +770,3 @@ void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) debugfs_remove_recursive(q->mq_debugfs_dir); q->mq_debugfs_dir = NULL; } - -void blk_mq_debugfs_init(void) -{ - block_debugfs_root = debugfs_create_dir("block", NULL); -} diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 114814e..9e8d679 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -68,7 +68,9 @@ error: EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data); static void __blk_mq_sched_assign_ioc(struct request_queue *q, - struct request *rq, struct io_context *ioc) + struct request *rq, + struct bio *bio, + struct io_context *ioc) { struct io_cq *icq; @@ -83,7 +85,7 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q, } rq->elv.icq = icq; - if (!blk_mq_sched_get_rq_priv(q, rq)) { + if (!blk_mq_sched_get_rq_priv(q, rq, bio)) { rq->rq_flags |= RQF_ELVPRIV; get_io_context(icq->ioc); return; @@ -99,7 +101,7 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q, ioc = rq_ioc(bio); if (ioc) - __blk_mq_sched_assign_ioc(q, rq, ioc); + __blk_mq_sched_assign_ioc(q, rq, bio, ioc); } struct request *blk_mq_sched_get_request(struct request_queue *q, @@ -173,6 +175,8 @@ void blk_mq_sched_put_request(struct request *rq) void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) { struct elevator_queue *e = hctx->queue->elevator; + const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request; + bool did_work = false; LIST_HEAD(rq_list); if (unlikely(blk_mq_hctx_stopped(hctx))) @@ -202,11 +206,18 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) */ if (!list_empty(&rq_list)) { blk_mq_sched_mark_restart(hctx); - blk_mq_dispatch_rq_list(hctx, &rq_list); - } else if (!e || !e->type->ops.mq.dispatch_request) { + did_work = blk_mq_dispatch_rq_list(hctx, &rq_list); + } else if (!has_sched_dispatch) { blk_mq_flush_busy_ctxs(hctx, &rq_list); blk_mq_dispatch_rq_list(hctx, &rq_list); - } else { + } + + /* + * We want to dispatch from the scheduler if we had no work left + * on the dispatch list, OR if we did have work but weren't able + * to make progress. + */ + if (!did_work && has_sched_dispatch) { do { struct request *rq; @@ -234,31 +245,33 @@ void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx, } EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch); -bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio) +bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, + struct request **merged_request) { struct request *rq; - int ret; - ret = elv_merge(q, &rq, bio); - if (ret == ELEVATOR_BACK_MERGE) { + switch (elv_merge(q, &rq, bio)) { + case ELEVATOR_BACK_MERGE: if (!blk_mq_sched_allow_merge(q, rq, bio)) return false; - if (bio_attempt_back_merge(q, rq, bio)) { - if (!attempt_back_merge(q, rq)) - elv_merged_request(q, rq, ret); - return true; - } - } else if (ret == ELEVATOR_FRONT_MERGE) { + if (!bio_attempt_back_merge(q, rq, bio)) + return false; + *merged_request = attempt_back_merge(q, rq); + if (!*merged_request) + elv_merged_request(q, rq, ELEVATOR_BACK_MERGE); + return true; + case ELEVATOR_FRONT_MERGE: if (!blk_mq_sched_allow_merge(q, rq, bio)) return false; - if (bio_attempt_front_merge(q, rq, bio)) { - if (!attempt_front_merge(q, rq)) - elv_merged_request(q, rq, ret); - return true; - } + if (!bio_attempt_front_merge(q, rq, bio)) + return false; + *merged_request = attempt_front_merge(q, rq); + if (!*merged_request) + elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE); + return true; + default: + return false; } - - return false; } EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); @@ -289,7 +302,8 @@ void blk_mq_sched_request_inserted(struct request *rq) } EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted); -bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq) +static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, + struct request *rq) { if (rq->tag == -1) { rq->rq_flags |= RQF_SORTED; @@ -305,7 +319,6 @@ bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq) spin_unlock(&hctx->lock); return true; } -EXPORT_SYMBOL_GPL(blk_mq_sched_bypass_insert); static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) { @@ -347,7 +360,7 @@ static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx, blk_insert_flush(rq); blk_mq_run_hw_queue(hctx, true); } else - blk_mq_add_to_requeue_list(rq, true, true); + blk_mq_add_to_requeue_list(rq, false, true); } void blk_mq_sched_insert_request(struct request *rq, bool at_head, @@ -363,6 +376,9 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, return; } + if (e && blk_mq_sched_bypass_insert(hctx, rq)) + goto run; + if (e && e->type->ops.mq.insert_requests) { LIST_HEAD(list); @@ -374,6 +390,7 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, spin_unlock(&ctx->lock); } +run: if (run_queue) blk_mq_run_hw_queue(hctx, async); } @@ -385,6 +402,23 @@ void blk_mq_sched_insert_requests(struct request_queue *q, struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); struct elevator_queue *e = hctx->queue->elevator; + if (e) { + struct request *rq, *next; + + /* + * We bypass requests that already have a driver tag assigned, + * which should only be flushes. Flushes are only ever inserted + * as single requests, so we shouldn't ever hit the + * WARN_ON_ONCE() below (but let's handle it just in case). + */ + list_for_each_entry_safe(rq, next, list, queuelist) { + if (WARN_ON_ONCE(rq->tag != -1)) { + list_del_init(&rq->queuelist); + blk_mq_sched_bypass_insert(hctx, rq); + } + } + } + if (e && e->type->ops.mq.insert_requests) e->type->ops.mq.insert_requests(hctx, list, false); else diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 9478aae..7b5f3b9 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -15,8 +15,8 @@ struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bi void blk_mq_sched_put_request(struct request *rq); void blk_mq_sched_request_inserted(struct request *rq); -bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq); -bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio); +bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, + struct request **merged_request); bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio); bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq); void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx); @@ -49,12 +49,13 @@ blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) } static inline int blk_mq_sched_get_rq_priv(struct request_queue *q, - struct request *rq) + struct request *rq, + struct bio *bio) { struct elevator_queue *e = q->elevator; if (e && e->type->ops.mq.get_rq_priv) - return e->type->ops.mq.get_rq_priv(q, rq); + return e->type->ops.mq.get_rq_priv(q, rq, bio); return 0; } diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 308b3f4..295e696 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -254,7 +254,7 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) kobject_put(&hctx->kobj); } - blk_mq_debugfs_unregister(q); + blk_mq_debugfs_unregister_hctxs(q); kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_del(&q->mq_kobj); diff --git a/block/blk-mq.c b/block/blk-mq.c index 489076e..b29e7dc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -199,13 +199,7 @@ void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, rq->special = NULL; /* tag was already set */ rq->errors = 0; - - rq->cmd = rq->__cmd; - rq->extra_len = 0; - rq->sense_len = 0; - rq->resid_len = 0; - rq->sense = NULL; INIT_LIST_HEAD(&rq->timeout_list); rq->timeout = 0; @@ -487,10 +481,6 @@ void blk_mq_start_request(struct request *rq) trace_block_rq_issue(q, rq); - rq->resid_len = blk_rq_bytes(rq); - if (unlikely(blk_bidi_rq(rq))) - rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq); - if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { blk_stat_set_issue_time(&rq->issue_stat); rq->rq_flags |= RQF_STATS; @@ -773,7 +763,7 @@ static bool blk_mq_attempt_merge(struct request_queue *q, int checked = 8; list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) { - int el_ret; + bool merged = false; if (!checked--) break; @@ -781,26 +771,25 @@ static bool blk_mq_attempt_merge(struct request_queue *q, if (!blk_rq_merge_ok(rq, bio)) continue; - el_ret = blk_try_merge(rq, bio); - if (el_ret == ELEVATOR_NO_MERGE) - continue; - - if (!blk_mq_sched_allow_merge(q, rq, bio)) + switch (blk_try_merge(rq, bio)) { + case ELEVATOR_BACK_MERGE: + if (blk_mq_sched_allow_merge(q, rq, bio)) + merged = bio_attempt_back_merge(q, rq, bio); break; - - if (el_ret == ELEVATOR_BACK_MERGE) { - if (bio_attempt_back_merge(q, rq, bio)) { - ctx->rq_merged++; - return true; - } + case ELEVATOR_FRONT_MERGE: + if (blk_mq_sched_allow_merge(q, rq, bio)) + merged = bio_attempt_front_merge(q, rq, bio); break; - } else if (el_ret == ELEVATOR_FRONT_MERGE) { - if (bio_attempt_front_merge(q, rq, bio)) { - ctx->rq_merged++; - return true; - } + case ELEVATOR_DISCARD_MERGE: + merged = bio_attempt_discard_merge(q, rq, bio); break; + default: + continue; } + + if (merged) + ctx->rq_merged++; + return merged; } return false; @@ -1013,7 +1002,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) blk_mq_run_hw_queue(hctx, true); } - return ret != BLK_MQ_RQ_QUEUE_BUSY; + return queued != 0; } static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) @@ -1442,12 +1431,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) cookie = request_to_qc_t(data.hctx, rq); if (unlikely(is_flush_fua)) { - blk_mq_put_ctx(data.ctx); + if (q->elevator) + goto elv_insert; blk_mq_bio_to_request(rq, bio); - blk_mq_get_driver_tag(rq, NULL, true); blk_insert_flush(rq); - blk_mq_run_hw_queue(data.hctx, true); - goto done; + goto run_queue; } plug = current->plug; @@ -1497,6 +1485,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) } if (q->elevator) { +elv_insert: blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); blk_mq_sched_insert_request(rq, false, true, @@ -1510,6 +1499,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) * latter allows for merging opportunities and more efficient * dispatching. */ +run_queue: blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); } blk_mq_put_ctx(data.ctx); @@ -1565,12 +1555,11 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) cookie = request_to_qc_t(data.hctx, rq); if (unlikely(is_flush_fua)) { - blk_mq_put_ctx(data.ctx); + if (q->elevator) + goto elv_insert; blk_mq_bio_to_request(rq, bio); - blk_mq_get_driver_tag(rq, NULL, true); blk_insert_flush(rq); - blk_mq_run_hw_queue(data.hctx, true); - goto done; + goto run_queue; } /* @@ -1608,6 +1597,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) } if (q->elevator) { +elv_insert: blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); blk_mq_sched_insert_request(rq, false, true, @@ -1621,6 +1611,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) * latter allows for merging opportunities and more efficient * dispatching. */ +run_queue: blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); } @@ -2637,10 +2628,14 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_realloc_hw_ctxs(set, q); + /* + * Manually set the make_request_fn as blk_queue_make_request + * resets a lot of the queue settings. + */ if (q->nr_hw_queues > 1) - blk_queue_make_request(q, blk_mq_make_request); + q->make_request_fn = blk_mq_make_request; else - blk_queue_make_request(q, blk_sq_make_request); + q->make_request_fn = blk_sq_make_request; blk_mq_queue_reinit(q, cpu_online_mask); } @@ -2824,8 +2819,6 @@ void blk_mq_enable_hotplug(void) static int __init blk_mq_init(void) { - blk_mq_debugfs_init(); - cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL, blk_mq_hctx_notify_dead); diff --git a/block/blk-mq.h b/block/blk-mq.h index b52abd6..24b2256 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -85,16 +85,11 @@ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); * debugfs helpers */ #ifdef CONFIG_BLK_DEBUG_FS -void blk_mq_debugfs_init(void); int blk_mq_debugfs_register(struct request_queue *q, const char *name); void blk_mq_debugfs_unregister(struct request_queue *q); int blk_mq_debugfs_register_hctxs(struct request_queue *q); void blk_mq_debugfs_unregister_hctxs(struct request_queue *q); #else -static inline void blk_mq_debugfs_init(void) -{ -} - static inline int blk_mq_debugfs_register(struct request_queue *q, const char *name) { diff --git a/block/blk-settings.c b/block/blk-settings.c index 529e55f..1e7174f 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -88,6 +88,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy); void blk_set_default_limits(struct queue_limits *lim) { lim->max_segments = BLK_MAX_SEGMENTS; + lim->max_discard_segments = 1; lim->max_integrity_segments = 0; lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; lim->virt_boundary_mask = 0; @@ -128,6 +129,7 @@ void blk_set_stacking_limits(struct queue_limits *lim) /* Inherit limits from component devices */ lim->discard_zeroes_data = 1; lim->max_segments = USHRT_MAX; + lim->max_discard_segments = 1; lim->max_hw_sectors = UINT_MAX; lim->max_segment_size = UINT_MAX; lim->max_sectors = UINT_MAX; @@ -253,7 +255,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors); max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS); limits->max_sectors = max_sectors; - q->backing_dev_info.io_pages = max_sectors >> (PAGE_SHIFT - 9); + q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9); } EXPORT_SYMBOL(blk_queue_max_hw_sectors); @@ -337,6 +339,22 @@ void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments EXPORT_SYMBOL(blk_queue_max_segments); /** + * blk_queue_max_discard_segments - set max segments for discard requests + * @q: the request queue for the device + * @max_segments: max number of segments + * + * Description: + * Enables a low level driver to set an upper limit on the number of + * segments in a discard request. + **/ +void blk_queue_max_discard_segments(struct request_queue *q, + unsigned short max_segments) +{ + q->limits.max_discard_segments = max_segments; +} +EXPORT_SYMBOL_GPL(blk_queue_max_discard_segments); + +/** * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg * @q: the request queue for the device * @max_size: max size of segment in bytes @@ -553,6 +571,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->virt_boundary_mask); t->max_segments = min_not_zero(t->max_segments, b->max_segments); + t->max_discard_segments = min_not_zero(t->max_discard_segments, + b->max_discard_segments); t->max_integrity_segments = min_not_zero(t->max_integrity_segments, b->max_integrity_segments); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 1dbce05..002af83 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -89,7 +89,7 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) static ssize_t queue_ra_show(struct request_queue *q, char *page) { - unsigned long ra_kb = q->backing_dev_info.ra_pages << + unsigned long ra_kb = q->backing_dev_info->ra_pages << (PAGE_SHIFT - 10); return queue_var_show(ra_kb, (page)); @@ -104,7 +104,7 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count) if (ret < 0) return ret; - q->backing_dev_info.ra_pages = ra_kb >> (PAGE_SHIFT - 10); + q->backing_dev_info->ra_pages = ra_kb >> (PAGE_SHIFT - 10); return ret; } @@ -121,6 +121,12 @@ static ssize_t queue_max_segments_show(struct request_queue *q, char *page) return queue_var_show(queue_max_segments(q), (page)); } +static ssize_t queue_max_discard_segments_show(struct request_queue *q, + char *page) +{ + return queue_var_show(queue_max_discard_segments(q), (page)); +} + static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *page) { return queue_var_show(q->limits.max_integrity_segments, (page)); @@ -236,7 +242,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) spin_lock_irq(q->queue_lock); q->limits.max_sectors = max_sectors_kb << 1; - q->backing_dev_info.io_pages = max_sectors_kb >> (PAGE_SHIFT - 10); + q->backing_dev_info->io_pages = max_sectors_kb >> (PAGE_SHIFT - 10); spin_unlock_irq(q->queue_lock); return ret; @@ -545,6 +551,11 @@ static struct queue_sysfs_entry queue_max_segments_entry = { .show = queue_max_segments_show, }; +static struct queue_sysfs_entry queue_max_discard_segments_entry = { + .attr = {.name = "max_discard_segments", .mode = S_IRUGO }, + .show = queue_max_discard_segments_show, +}; + static struct queue_sysfs_entry queue_max_integrity_segments_entry = { .attr = {.name = "max_integrity_segments", .mode = S_IRUGO }, .show = queue_max_integrity_segments_show, @@ -697,6 +708,7 @@ static struct attribute *default_attrs[] = { &queue_max_hw_sectors_entry.attr, &queue_max_sectors_entry.attr, &queue_max_segments_entry.attr, + &queue_max_discard_segments_entry.attr, &queue_max_integrity_segments_entry.attr, &queue_max_segment_size_entry.attr, &queue_iosched_entry.attr, @@ -799,7 +811,7 @@ static void blk_release_queue(struct kobject *kobj) container_of(kobj, struct request_queue, kobj); wbt_exit(q); - bdi_exit(&q->backing_dev_info); + bdi_put(q->backing_dev_info); blkcg_exit_queue(q); if (q->elevator) { @@ -814,13 +826,19 @@ static void blk_release_queue(struct kobject *kobj) if (q->queue_tags) __blk_queue_free_tags(q); - if (!q->mq_ops) + if (!q->mq_ops) { + if (q->exit_rq_fn) + q->exit_rq_fn(q, q->fq->flush_rq); blk_free_flush_queue(q->fq); - else + } else { blk_mq_release(q); + } blk_trace_shutdown(q); + if (q->mq_ops) + blk_mq_debugfs_unregister(q); + if (q->bio_split) bioset_free(q->bio_split); @@ -884,32 +902,36 @@ int blk_register_queue(struct gendisk *disk) if (ret) return ret; + if (q->mq_ops) + blk_mq_register_dev(dev, q); + + /* Prevent changes through sysfs until registration is completed. */ + mutex_lock(&q->sysfs_lock); + ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue"); if (ret < 0) { blk_trace_remove_sysfs(dev); - return ret; + goto unlock; } kobject_uevent(&q->kobj, KOBJ_ADD); - if (q->mq_ops) - blk_mq_register_dev(dev, q); - blk_wb_init(q); - if (!q->request_fn) - return 0; - - ret = elv_register_queue(q); - if (ret) { - kobject_uevent(&q->kobj, KOBJ_REMOVE); - kobject_del(&q->kobj); - blk_trace_remove_sysfs(dev); - kobject_put(&dev->kobj); - return ret; + if (q->request_fn || (q->mq_ops && q->elevator)) { + ret = elv_register_queue(q); + if (ret) { + kobject_uevent(&q->kobj, KOBJ_REMOVE); + kobject_del(&q->kobj); + blk_trace_remove_sysfs(dev); + kobject_put(&dev->kobj); + goto unlock; + } } - - return 0; + ret = 0; +unlock: + mutex_unlock(&q->sysfs_lock); + return ret; } void blk_unregister_queue(struct gendisk *disk) @@ -922,7 +944,7 @@ void blk_unregister_queue(struct gendisk *disk) if (q->mq_ops) blk_mq_unregister_dev(disk_to_dev(disk), q); - if (q->request_fn) + if (q->request_fn || (q->mq_ops && q->elevator)) elv_unregister_queue(q); kobject_uevent(&q->kobj, KOBJ_REMOVE); diff --git a/block/blk-wbt.c b/block/blk-wbt.c index f0a9c07..1aedb1f 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -96,7 +96,7 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) */ static bool wb_recent_wait(struct rq_wb *rwb) { - struct bdi_writeback *wb = &rwb->queue->backing_dev_info.wb; + struct bdi_writeback *wb = &rwb->queue->backing_dev_info->wb; return time_before(jiffies, wb->dirty_sleep + HZ); } @@ -279,7 +279,7 @@ enum { static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) { - struct backing_dev_info *bdi = &rwb->queue->backing_dev_info; + struct backing_dev_info *bdi = rwb->queue->backing_dev_info; u64 thislat; /* @@ -339,7 +339,7 @@ static int latency_exceeded(struct rq_wb *rwb) static void rwb_trace_step(struct rq_wb *rwb, const char *msg) { - struct backing_dev_info *bdi = &rwb->queue->backing_dev_info; + struct backing_dev_info *bdi = rwb->queue->backing_dev_info; trace_wbt_step(bdi, msg, rwb->scale_step, rwb->cur_win_nsec, rwb->wb_background, rwb->wb_normal, rwb->wb_max); @@ -423,7 +423,7 @@ static void wb_timer_fn(unsigned long data) status = latency_exceeded(rwb); - trace_wbt_timer(&rwb->queue->backing_dev_info, status, rwb->scale_step, + trace_wbt_timer(rwb->queue->backing_dev_info, status, rwb->scale_step, inflight); /* diff --git a/block/blk.h b/block/blk.h index 9a716b5..d1ea4bd9 100644 --- a/block/blk.h +++ b/block/blk.h @@ -14,6 +14,10 @@ /* Max future timer expiry for timeouts */ #define BLK_MAX_TIMEOUT (5 * HZ) +#ifdef CONFIG_DEBUG_FS +extern struct dentry *blk_debugfs_root; +#endif + struct blk_flush_queue { unsigned int flush_queue_delayed:1; unsigned int flush_pending_idx:1; @@ -96,6 +100,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, struct bio *bio); bool bio_attempt_back_merge(struct request_queue *q, struct request *req, struct bio *bio); +bool bio_attempt_discard_merge(struct request_queue *q, struct request *req, + struct bio *bio); bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, unsigned int *request_count, struct request **same_queue_rq); @@ -204,14 +210,14 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, struct bio *bio); int ll_front_merge_fn(struct request_queue *q, struct request *req, struct bio *bio); -int attempt_back_merge(struct request_queue *q, struct request *rq); -int attempt_front_merge(struct request_queue *q, struct request *rq); +struct request *attempt_back_merge(struct request_queue *q, struct request *rq); +struct request *attempt_front_merge(struct request_queue *q, struct request *rq); int blk_attempt_req_merge(struct request_queue *q, struct request *rq, struct request *next); void blk_recalc_rq_segments(struct request *rq); void blk_rq_set_mixed_merge(struct request *rq); bool blk_rq_merge_ok(struct request *rq, struct bio *bio); -int blk_try_merge(struct request *rq, struct bio *bio); +enum elv_merge blk_try_merge(struct request *rq, struct bio *bio); void blk_queue_congestion_threshold(struct request_queue *q); @@ -249,7 +255,14 @@ static inline int blk_do_io_stat(struct request *rq) { return rq->rq_disk && (rq->rq_flags & RQF_IO_STAT) && - (rq->cmd_type == REQ_TYPE_FS); + !blk_rq_is_passthrough(rq); +} + +static inline void req_set_nomerge(struct request_queue *q, struct request *req) +{ + req->cmd_flags |= REQ_NOMERGE; + if (req == q->last_merge) + q->last_merge = NULL; } /* diff --git a/block/bsg-lib.c b/block/bsg-lib.c index 9d652a9..cd15f9d 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -71,22 +71,24 @@ void bsg_job_done(struct bsg_job *job, int result, { struct request *req = job->req; struct request *rsp = req->next_rq; + struct scsi_request *rq = scsi_req(req); int err; err = job->req->errors = result; if (err < 0) /* we're only returning the result field in the reply */ - job->req->sense_len = sizeof(u32); + rq->sense_len = sizeof(u32); else - job->req->sense_len = job->reply_len; + rq->sense_len = job->reply_len; /* we assume all request payload was transferred, residual == 0 */ - req->resid_len = 0; + rq->resid_len = 0; if (rsp) { - WARN_ON(reply_payload_rcv_len > rsp->resid_len); + WARN_ON(reply_payload_rcv_len > scsi_req(rsp)->resid_len); /* set reply (bidi) residual */ - rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len); + scsi_req(rsp)->resid_len -= + min(reply_payload_rcv_len, scsi_req(rsp)->resid_len); } blk_complete_request(req); } @@ -113,6 +115,7 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req) if (!buf->sg_list) return -ENOMEM; sg_init_table(buf->sg_list, req->nr_phys_segments); + scsi_req(req)->resid_len = blk_rq_bytes(req); buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list); buf->payload_len = blk_rq_bytes(req); return 0; @@ -127,6 +130,7 @@ static int bsg_create_job(struct device *dev, struct request *req) { struct request *rsp = req->next_rq; struct request_queue *q = req->q; + struct scsi_request *rq = scsi_req(req); struct bsg_job *job; int ret; @@ -140,9 +144,9 @@ static int bsg_create_job(struct device *dev, struct request *req) job->req = req; if (q->bsg_job_size) job->dd_data = (void *)&job[1]; - job->request = req->cmd; - job->request_len = req->cmd_len; - job->reply = req->sense; + job->request = rq->cmd; + job->request_len = rq->cmd_len; + job->reply = rq->sense; job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer * allocated */ if (req->bio) { @@ -177,7 +181,7 @@ failjob_rls_job: * * Drivers/subsys should pass this to the queue init function. */ -void bsg_request_fn(struct request_queue *q) +static void bsg_request_fn(struct request_queue *q) __releases(q->queue_lock) __acquires(q->queue_lock) { @@ -214,24 +218,30 @@ void bsg_request_fn(struct request_queue *q) put_device(dev); spin_lock_irq(q->queue_lock); } -EXPORT_SYMBOL_GPL(bsg_request_fn); /** * bsg_setup_queue - Create and add the bsg hooks so we can receive requests * @dev: device to attach bsg device to - * @q: request queue setup by caller * @name: device to give bsg device * @job_fn: bsg job handler * @dd_job_size: size of LLD data needed for each job - * - * The caller should have setup the reuqest queue with bsg_request_fn - * as the request_fn. */ -int bsg_setup_queue(struct device *dev, struct request_queue *q, - char *name, bsg_job_fn *job_fn, int dd_job_size) +struct request_queue *bsg_setup_queue(struct device *dev, char *name, + bsg_job_fn *job_fn, int dd_job_size) { + struct request_queue *q; int ret; + q = blk_alloc_queue(GFP_KERNEL); + if (!q) + return ERR_PTR(-ENOMEM); + q->cmd_size = sizeof(struct scsi_request); + q->request_fn = bsg_request_fn; + + ret = blk_init_allocated_queue(q); + if (ret) + goto out_cleanup_queue; + q->queuedata = dev; q->bsg_job_size = dd_job_size; q->bsg_job_fn = job_fn; @@ -243,9 +253,12 @@ int bsg_setup_queue(struct device *dev, struct request_queue *q, if (ret) { printk(KERN_ERR "%s: bsg interface failed to " "initialize - register queue\n", dev->kobj.name); - return ret; + goto out_cleanup_queue; } - return 0; + return q; +out_cleanup_queue: + blk_cleanup_queue(q); + return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(bsg_setup_queue); diff --git a/block/bsg.c b/block/bsg.c index a57046d..a9a8b8e 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -85,7 +85,6 @@ struct bsg_command { struct bio *bidi_bio; int err; struct sg_io_v4 hdr; - char sense[SCSI_SENSE_BUFFERSIZE]; }; static void bsg_free_command(struct bsg_command *bc) @@ -140,18 +139,20 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, struct sg_io_v4 *hdr, struct bsg_device *bd, fmode_t has_write_perm) { + struct scsi_request *req = scsi_req(rq); + if (hdr->request_len > BLK_MAX_CDB) { - rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL); - if (!rq->cmd) + req->cmd = kzalloc(hdr->request_len, GFP_KERNEL); + if (!req->cmd) return -ENOMEM; } - if (copy_from_user(rq->cmd, (void __user *)(unsigned long)hdr->request, + if (copy_from_user(req->cmd, (void __user *)(unsigned long)hdr->request, hdr->request_len)) return -EFAULT; if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) { - if (blk_verify_command(rq->cmd, has_write_perm)) + if (blk_verify_command(req->cmd, has_write_perm)) return -EPERM; } else if (!capable(CAP_SYS_RAWIO)) return -EPERM; @@ -159,7 +160,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, /* * fill in request structure */ - rq->cmd_len = hdr->request_len; + req->cmd_len = hdr->request_len; rq->timeout = msecs_to_jiffies(hdr->timeout); if (!rq->timeout) @@ -176,7 +177,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, * Check if sg_io_v4 from user is allowed and valid */ static int -bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *rw) +bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *op) { int ret = 0; @@ -197,7 +198,7 @@ bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *rw) ret = -EINVAL; } - *rw = hdr->dout_xfer_len ? WRITE : READ; + *op = hdr->dout_xfer_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN; return ret; } @@ -205,13 +206,12 @@ bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *rw) * map sg_io_v4 to a request. */ static struct request * -bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, - u8 *sense) +bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm) { struct request_queue *q = bd->queue; struct request *rq, *next_rq = NULL; - int ret, rw; - unsigned int dxfer_len; + int ret; + unsigned int op, dxfer_len; void __user *dxferp = NULL; struct bsg_class_device *bcd = &q->bsg_dev; @@ -226,36 +226,35 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp, hdr->din_xfer_len); - ret = bsg_validate_sgv4_hdr(hdr, &rw); + ret = bsg_validate_sgv4_hdr(hdr, &op); if (ret) return ERR_PTR(ret); /* * map scatter-gather elements separately and string them to request */ - rq = blk_get_request(q, rw, GFP_KERNEL); + rq = blk_get_request(q, op, GFP_KERNEL); if (IS_ERR(rq)) return rq; - blk_rq_set_block_pc(rq); + scsi_req_init(rq); ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm); if (ret) goto out; - if (rw == WRITE && hdr->din_xfer_len) { + if (op == REQ_OP_SCSI_OUT && hdr->din_xfer_len) { if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) { ret = -EOPNOTSUPP; goto out; } - next_rq = blk_get_request(q, READ, GFP_KERNEL); + next_rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL); if (IS_ERR(next_rq)) { ret = PTR_ERR(next_rq); next_rq = NULL; goto out; } rq->next_rq = next_rq; - next_rq->cmd_type = rq->cmd_type; dxferp = (void __user *)(unsigned long)hdr->din_xferp; ret = blk_rq_map_user(q, next_rq, NULL, dxferp, @@ -280,13 +279,9 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, goto out; } - rq->sense = sense; - rq->sense_len = 0; - return rq; out: - if (rq->cmd != rq->__cmd) - kfree(rq->cmd); + scsi_req_free_cmd(scsi_req(rq)); blk_put_request(rq); if (next_rq) { blk_rq_unmap_user(next_rq->bio); @@ -393,6 +388,7 @@ static struct bsg_command *bsg_get_done_cmd(struct bsg_device *bd) static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, struct bio *bio, struct bio *bidi_bio) { + struct scsi_request *req = scsi_req(rq); int ret = 0; dprintk("rq %p bio %p 0x%x\n", rq, bio, rq->errors); @@ -407,12 +403,12 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, hdr->info |= SG_INFO_CHECK; hdr->response_len = 0; - if (rq->sense_len && hdr->response) { + if (req->sense_len && hdr->response) { int len = min_t(unsigned int, hdr->max_response_len, - rq->sense_len); + req->sense_len); ret = copy_to_user((void __user *)(unsigned long)hdr->response, - rq->sense, len); + req->sense, len); if (!ret) hdr->response_len = len; else @@ -420,14 +416,14 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, } if (rq->next_rq) { - hdr->dout_resid = rq->resid_len; - hdr->din_resid = rq->next_rq->resid_len; + hdr->dout_resid = req->resid_len; + hdr->din_resid = scsi_req(rq->next_rq)->resid_len; blk_rq_unmap_user(bidi_bio); blk_put_request(rq->next_rq); } else if (rq_data_dir(rq) == READ) - hdr->din_resid = rq->resid_len; + hdr->din_resid = req->resid_len; else - hdr->dout_resid = rq->resid_len; + hdr->dout_resid = req->resid_len; /* * If the request generated a negative error number, return it @@ -439,8 +435,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, ret = rq->errors; blk_rq_unmap_user(bio); - if (rq->cmd != rq->__cmd) - kfree(rq->cmd); + scsi_req_free_cmd(req); blk_put_request(rq); return ret; @@ -625,7 +620,7 @@ static int __bsg_write(struct bsg_device *bd, const char __user *buf, /* * get a request, fill in the blanks, and add to request queue */ - rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm, bc->sense); + rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm); if (IS_ERR(rq)) { ret = PTR_ERR(rq); rq = NULL; @@ -911,12 +906,11 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct bio *bio, *bidi_bio = NULL; struct sg_io_v4 hdr; int at_head; - u8 sense[SCSI_SENSE_BUFFERSIZE]; if (copy_from_user(&hdr, uarg, sizeof(hdr))) return -EFAULT; - rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE, sense); + rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE); if (IS_ERR(rq)) return PTR_ERR(rq); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index f0f29ee..9212627 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2528,7 +2528,7 @@ static void cfq_remove_request(struct request *rq) } } -static int cfq_merge(struct request_queue *q, struct request **req, +static enum elv_merge cfq_merge(struct request_queue *q, struct request **req, struct bio *bio) { struct cfq_data *cfqd = q->elevator->elevator_data; @@ -2544,7 +2544,7 @@ static int cfq_merge(struct request_queue *q, struct request **req, } static void cfq_merged_request(struct request_queue *q, struct request *req, - int type) + enum elv_merge type) { if (type == ELEVATOR_FRONT_MERGE) { struct cfq_queue *cfqq = RQ_CFQQ(req); diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 556826a..570021a 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -661,7 +661,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) struct block_device *bdev = inode->i_bdev; struct gendisk *disk = bdev->bd_disk; fmode_t mode = file->f_mode; - struct backing_dev_info *bdi; loff_t size; unsigned int max_sectors; @@ -708,9 +707,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKFRAGET: if (!arg) return -EINVAL; - bdi = blk_get_backing_dev_info(bdev); return compat_put_long(arg, - (bdi->ra_pages * PAGE_SIZE) / 512); + (bdev->bd_bdi->ra_pages * PAGE_SIZE) / 512); case BLKROGET: /* compatible */ return compat_put_int(arg, bdev_read_only(bdev) != 0); case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ @@ -728,8 +726,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKFRASET: if (!capable(CAP_SYS_ADMIN)) return -EACCES; - bdi = blk_get_backing_dev_info(bdev); - bdi->ra_pages = (arg * 512) / PAGE_SIZE; + bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE; return 0; case BLKGETSIZE: size = i_size_read(bdev->bd_inode); diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 05fc0ea..c68f6bb 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -120,12 +120,11 @@ static void deadline_remove_request(struct request_queue *q, struct request *rq) deadline_del_rq_rb(dd, rq); } -static int +static enum elv_merge deadline_merge(struct request_queue *q, struct request **req, struct bio *bio) { struct deadline_data *dd = q->elevator->elevator_data; struct request *__rq; - int ret; /* * check for front merge @@ -138,20 +137,17 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio) BUG_ON(sector != blk_rq_pos(__rq)); if (elv_bio_merge_ok(__rq, bio)) { - ret = ELEVATOR_FRONT_MERGE; - goto out; + *req = __rq; + return ELEVATOR_FRONT_MERGE; } } } return ELEVATOR_NO_MERGE; -out: - *req = __rq; - return ret; } static void deadline_merged_request(struct request_queue *q, - struct request *req, int type) + struct request *req, enum elv_merge type) { struct deadline_data *dd = q->elevator->elevator_data; diff --git a/block/elevator.c b/block/elevator.c index b2a5516..699d10f 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -428,11 +428,11 @@ void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) } EXPORT_SYMBOL(elv_dispatch_add_tail); -int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) +enum elv_merge elv_merge(struct request_queue *q, struct request **req, + struct bio *bio) { struct elevator_queue *e = q->elevator; struct request *__rq; - int ret; /* * Levels of merges: @@ -447,7 +447,8 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) * First try one-hit cache. */ if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) { - ret = blk_try_merge(q->last_merge, bio); + enum elv_merge ret = blk_try_merge(q->last_merge, bio); + if (ret != ELEVATOR_NO_MERGE) { *req = q->last_merge; return ret; @@ -515,7 +516,8 @@ bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq) return ret; } -void elv_merged_request(struct request_queue *q, struct request *rq, int type) +void elv_merged_request(struct request_queue *q, struct request *rq, + enum elv_merge type) { struct elevator_queue *e = q->elevator; @@ -539,7 +541,7 @@ void elv_merge_requests(struct request_queue *q, struct request *rq, if (e->uses_mq && e->type->ops.mq.requests_merged) e->type->ops.mq.requests_merged(q, rq, next); else if (e->type->ops.sq.elevator_merge_req_fn) { - next_sorted = next->rq_flags & RQF_SORTED; + next_sorted = (__force bool)(next->rq_flags & RQF_SORTED); if (next_sorted) e->type->ops.sq.elevator_merge_req_fn(q, rq, next); } @@ -635,7 +637,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) if (rq->rq_flags & RQF_SOFTBARRIER) { /* barriers are scheduling boundary, update end_sector */ - if (rq->cmd_type == REQ_TYPE_FS) { + if (!blk_rq_is_passthrough(rq)) { q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; } @@ -677,7 +679,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) if (elv_attempt_insert_merge(q, rq)) break; case ELEVATOR_INSERT_SORT: - BUG_ON(rq->cmd_type != REQ_TYPE_FS); + BUG_ON(blk_rq_is_passthrough(rq)); rq->rq_flags |= RQF_SORTED; q->nr_sorted++; if (rq_mergeable(rq)) { diff --git a/block/genhd.c b/block/genhd.c index fcd6d4f..3631cd4 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -572,6 +572,20 @@ exit: disk_part_iter_exit(&piter); } +void put_disk_devt(struct disk_devt *disk_devt) +{ + if (disk_devt && atomic_dec_and_test(&disk_devt->count)) + disk_devt->release(disk_devt); +} +EXPORT_SYMBOL(put_disk_devt); + +void get_disk_devt(struct disk_devt *disk_devt) +{ + if (disk_devt) + atomic_inc(&disk_devt->count); +} +EXPORT_SYMBOL(get_disk_devt); + /** * device_add_disk - add partitioning information to kernel list * @parent: parent device for the disk @@ -612,8 +626,15 @@ void device_add_disk(struct device *parent, struct gendisk *disk) disk_alloc_events(disk); + /* + * Take a reference on the devt and assign it to queue since it + * must not be reallocated while the bdi is registered + */ + disk->queue->disk_devt = disk->disk_devt; + get_disk_devt(disk->disk_devt); + /* Register BDI before referencing it from bdev */ - bdi = &disk->queue->backing_dev_info; + bdi = disk->queue->backing_dev_info; bdi_register_owner(bdi, disk_to_dev(disk)); blk_register_region(disk_devt(disk), disk->minors, NULL, @@ -648,6 +669,8 @@ void del_gendisk(struct gendisk *disk) disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); while ((part = disk_part_iter_next(&piter))) { + bdev_unhash_inode(MKDEV(disk->major, + disk->first_minor + part->partno)); invalidate_partition(disk, part->partno); delete_partition(disk, part->partno); } diff --git a/block/ioctl.c b/block/ioctl.c index be7f4de..7b88820 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -505,7 +505,6 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode, int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long arg) { - struct backing_dev_info *bdi; void __user *argp = (void __user *)arg; loff_t size; unsigned int max_sectors; @@ -532,8 +531,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKFRAGET: if (!arg) return -EINVAL; - bdi = blk_get_backing_dev_info(bdev); - return put_long(arg, (bdi->ra_pages * PAGE_SIZE) / 512); + return put_long(arg, (bdev->bd_bdi->ra_pages*PAGE_SIZE) / 512); case BLKROGET: return put_int(arg, bdev_read_only(bdev) != 0); case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ @@ -560,8 +558,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKFRASET: if(!capable(CAP_SYS_ADMIN)) return -EACCES; - bdi = blk_get_backing_dev_info(bdev); - bdi->ra_pages = (arg * 512) / PAGE_SIZE; + bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE; return 0; case BLKBSZSET: return blkdev_bszset(bdev, mode, argp); diff --git a/block/mq-deadline.c b/block/mq-deadline.c index d93ec71..23612163 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -121,7 +121,7 @@ static void deadline_remove_request(struct request_queue *q, struct request *rq) } static void dd_request_merged(struct request_queue *q, struct request *req, - int type) + enum elv_merge type) { struct deadline_data *dd = q->elevator->elevator_data; @@ -371,12 +371,16 @@ static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) { struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; - int ret; + struct request *free = NULL; + bool ret; spin_lock(&dd->lock); - ret = blk_mq_sched_try_merge(q, bio); + ret = blk_mq_sched_try_merge(q, bio, &free); spin_unlock(&dd->lock); + if (free) + blk_mq_free_request(free); + return ret; } @@ -395,10 +399,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_mq_sched_request_inserted(rq); - if (blk_mq_sched_bypass_insert(hctx, rq)) - return; - - if (at_head || rq->cmd_type != REQ_TYPE_FS) { + if (at_head || blk_rq_is_passthrough(rq)) { if (at_head) list_add(&rq->queuelist, &dd->dispatch); else diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index c2b6492..2a2fc76 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -230,15 +230,17 @@ EXPORT_SYMBOL(blk_verify_command); static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, struct sg_io_hdr *hdr, fmode_t mode) { - if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len)) + struct scsi_request *req = scsi_req(rq); + + if (copy_from_user(req->cmd, hdr->cmdp, hdr->cmd_len)) return -EFAULT; - if (blk_verify_command(rq->cmd, mode & FMODE_WRITE)) + if (blk_verify_command(req->cmd, mode & FMODE_WRITE)) return -EPERM; /* * fill in request structure */ - rq->cmd_len = hdr->cmd_len; + req->cmd_len = hdr->cmd_len; rq->timeout = msecs_to_jiffies(hdr->timeout); if (!rq->timeout) @@ -254,6 +256,7 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, struct bio *bio) { + struct scsi_request *req = scsi_req(rq); int r, ret = 0; /* @@ -267,13 +270,13 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, hdr->info = 0; if (hdr->masked_status || hdr->host_status || hdr->driver_status) hdr->info |= SG_INFO_CHECK; - hdr->resid = rq->resid_len; + hdr->resid = req->resid_len; hdr->sb_len_wr = 0; - if (rq->sense_len && hdr->sbp) { - int len = min((unsigned int) hdr->mx_sb_len, rq->sense_len); + if (req->sense_len && hdr->sbp) { + int len = min((unsigned int) hdr->mx_sb_len, req->sense_len); - if (!copy_to_user(hdr->sbp, rq->sense, len)) + if (!copy_to_user(hdr->sbp, req->sense, len)) hdr->sb_len_wr = len; else ret = -EFAULT; @@ -294,7 +297,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, int writing = 0; int at_head = 0; struct request *rq; - char sense[SCSI_SENSE_BUFFERSIZE]; + struct scsi_request *req; struct bio *bio; if (hdr->interface_id != 'S') @@ -318,14 +321,16 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, at_head = 1; ret = -ENOMEM; - rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL); + rq = blk_get_request(q, writing ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, + GFP_KERNEL); if (IS_ERR(rq)) return PTR_ERR(rq); - blk_rq_set_block_pc(rq); + req = scsi_req(rq); + scsi_req_init(rq); if (hdr->cmd_len > BLK_MAX_CDB) { - rq->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL); - if (!rq->cmd) + req->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL); + if (!req->cmd) goto out_put_request; } @@ -357,9 +362,6 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, goto out_free_cdb; bio = rq->bio; - memset(sense, 0, sizeof(sense)); - rq->sense = sense; - rq->sense_len = 0; rq->retries = 0; start_time = jiffies; @@ -375,8 +377,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, ret = blk_complete_sghdr_rq(rq, hdr, bio); out_free_cdb: - if (rq->cmd != rq->__cmd) - kfree(rq->cmd); + scsi_req_free_cmd(req); out_put_request: blk_put_request(rq); return ret; @@ -420,9 +421,10 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, struct scsi_ioctl_command __user *sic) { struct request *rq; + struct scsi_request *req; int err; unsigned int in_len, out_len, bytes, opcode, cmdlen; - char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE]; + char *buffer = NULL; if (!sic) return -EINVAL; @@ -447,12 +449,14 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, } - rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_RECLAIM); + rq = blk_get_request(q, in_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, + __GFP_RECLAIM); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto error_free_buffer; } - blk_rq_set_block_pc(rq); + req = scsi_req(rq); + scsi_req_init(rq); cmdlen = COMMAND_SIZE(opcode); @@ -460,14 +464,14 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, * get command and data to send to device, if any */ err = -EFAULT; - rq->cmd_len = cmdlen; - if (copy_from_user(rq->cmd, sic->data, cmdlen)) + req->cmd_len = cmdlen; + if (copy_from_user(req->cmd, sic->data, cmdlen)) goto error; if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) goto error; - err = blk_verify_command(rq->cmd, mode & FMODE_WRITE); + err = blk_verify_command(req->cmd, mode & FMODE_WRITE); if (err) goto error; @@ -503,18 +507,14 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, goto error; } - memset(sense, 0, sizeof(sense)); - rq->sense = sense; - rq->sense_len = 0; - blk_execute_rq(q, disk, rq, 0); err = rq->errors & 0xff; /* only 8 bit SCSI status */ if (err) { - if (rq->sense_len && rq->sense) { - bytes = (OMAX_SB_LEN > rq->sense_len) ? - rq->sense_len : OMAX_SB_LEN; - if (copy_to_user(sic->data, rq->sense, bytes)) + if (req->sense_len && req->sense) { + bytes = (OMAX_SB_LEN > req->sense_len) ? + req->sense_len : OMAX_SB_LEN; + if (copy_to_user(sic->data, req->sense, bytes)) err = -EFAULT; } } else { @@ -539,14 +539,14 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, struct request *rq; int err; - rq = blk_get_request(q, WRITE, __GFP_RECLAIM); + rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM); if (IS_ERR(rq)) return PTR_ERR(rq); - blk_rq_set_block_pc(rq); + scsi_req_init(rq); rq->timeout = BLK_DEFAULT_SG_TIMEOUT; - rq->cmd[0] = cmd; - rq->cmd[4] = data; - rq->cmd_len = 6; + scsi_req(rq)->cmd[0] = cmd; + scsi_req(rq)->cmd[4] = data; + scsi_req(rq)->cmd_len = 6; err = blk_execute_rq(q, bd_disk, rq, 0); blk_put_request(rq); @@ -743,6 +743,17 @@ int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode, } EXPORT_SYMBOL(scsi_cmd_blk_ioctl); +void scsi_req_init(struct request *rq) +{ + struct scsi_request *req = scsi_req(rq); + + memset(req->__cmd, 0, sizeof(req->__cmd)); + req->cmd = req->__cmd; + req->cmd_len = BLK_MAX_CDB; + req->sense_len = 0; +} +EXPORT_SYMBOL(scsi_req_init); + static int __init blk_scsi_ioctl_init(void) { blk_set_cmd_filter_defaults(&blk_default_cmd_filter); |