From 33879d4512c021ae65be9706608dacb36b4687b1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Nov 2013 22:33:32 -0800 Subject: block: submit_bio_wait() conversions It was being open coded in a few places. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: Joern Engel Cc: Prasad Joshi Cc: Neil Brown Cc: Chris Mason Acked-by: NeilBrown --- block/blk-flush.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index 331e627..fb6f3c0 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -502,15 +502,6 @@ void blk_abort_flushes(struct request_queue *q) } } -static void bio_end_flush(struct bio *bio, int err) -{ - if (err) - clear_bit(BIO_UPTODATE, &bio->bi_flags); - if (bio->bi_private) - complete(bio->bi_private); - bio_put(bio); -} - /** * blkdev_issue_flush - queue a flush * @bdev: blockdev to issue flush for @@ -526,7 +517,6 @@ static void bio_end_flush(struct bio *bio, int err) int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, sector_t *error_sector) { - DECLARE_COMPLETION_ONSTACK(wait); struct request_queue *q; struct bio *bio; int ret = 0; @@ -548,13 +538,9 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, return -ENXIO; bio = bio_alloc(gfp_mask, 0); - bio->bi_end_io = bio_end_flush; bio->bi_bdev = bdev; - bio->bi_private = &wait; - bio_get(bio); - submit_bio(WRITE_FLUSH, bio); - wait_for_completion_io(&wait); + ret = submit_bio_wait(WRITE_FLUSH, bio); /* * The driver must store the error location in ->bi_sector, if @@ -564,9 +550,6 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, if (error_sector) *error_sector = bio->bi_sector; - if (!bio_flagged(bio, BIO_UPTODATE)) - ret = -EIO; - bio_put(bio); return ret; } -- cgit v1.1 From 4f024f3797c43cb4b73cd2c50cec728842d0e49e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 11 Oct 2013 15:44:27 -0700 Subject: block: Abstract out bvec iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Immutable biovecs are going to require an explicit iterator. To implement immutable bvecs, a later patch is going to add a bi_bvec_done member to this struct; for now, this patch effectively just renames things. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: Geert Uytterhoeven Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Ed L. Cashin" Cc: Nick Piggin Cc: Lars Ellenberg Cc: Jiri Kosina Cc: Matthew Wilcox Cc: Geoff Levand Cc: Yehuda Sadeh Cc: Sage Weil Cc: Alex Elder Cc: ceph-devel@vger.kernel.org Cc: Joshua Morris Cc: Philip Kelleher Cc: Rusty Russell Cc: "Michael S. Tsirkin" Cc: Konrad Rzeszutek Wilk Cc: Jeremy Fitzhardinge Cc: Neil Brown Cc: Alasdair Kergon Cc: Mike Snitzer Cc: dm-devel@redhat.com Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux390@de.ibm.com Cc: Boaz Harrosh Cc: Benny Halevy Cc: "James E.J. Bottomley" Cc: Greg Kroah-Hartman Cc: "Nicholas A. Bellinger" Cc: Alexander Viro Cc: Chris Mason Cc: "Theodore Ts'o" Cc: Andreas Dilger Cc: Jaegeuk Kim Cc: Steven Whitehouse Cc: Dave Kleikamp Cc: Joern Engel Cc: Prasad Joshi Cc: Trond Myklebust Cc: KONISHI Ryusuke Cc: Mark Fasheh Cc: Joel Becker Cc: Ben Myers Cc: xfs@oss.sgi.com Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Len Brown Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: Herton Ronaldo Krzesinski Cc: Ben Hutchings Cc: Andrew Morton Cc: Guo Chao Cc: Tejun Heo Cc: Asai Thambi S P Cc: Selvan Mani Cc: Sam Bradshaw Cc: Wei Yongjun Cc: "Roger Pau Monné" Cc: Jan Beulich Cc: Stefano Stabellini Cc: Ian Campbell Cc: Sebastian Ott Cc: Christian Borntraeger Cc: Minchan Kim Cc: Jiang Liu Cc: Nitin Gupta Cc: Jerome Marchand Cc: Joe Perches Cc: Peng Tao Cc: Andy Adamson Cc: fanchaoting Cc: Jie Liu Cc: Sunil Mushran Cc: "Martin K. Petersen" Cc: Namjae Jeon Cc: Pankaj Kumar Cc: Dan Magenheimer Cc: Mel Gorman 6 --- block/blk-core.c | 36 ++++++++++++++++++------------------ block/blk-flush.c | 2 +- block/blk-lib.c | 12 ++++++------ block/blk-map.c | 6 +++--- block/blk-merge.c | 4 ++-- block/blk-mq.c | 2 +- block/blk-throttle.c | 14 +++++++------- block/elevator.c | 2 +- 8 files changed, 39 insertions(+), 39 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 8bdd012..5c2ab2c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -130,7 +130,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio, bio_advance(bio, nbytes); /* don't actually finish bio if it's part of flush sequence */ - if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) + if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) bio_endio(bio, error); } @@ -1326,7 +1326,7 @@ void blk_add_request_payload(struct request *rq, struct page *page, bio->bi_io_vec->bv_offset = 0; bio->bi_io_vec->bv_len = len; - bio->bi_size = len; + bio->bi_iter.bi_size = len; bio->bi_vcnt = 1; bio->bi_phys_segments = 1; @@ -1351,7 +1351,7 @@ bool bio_attempt_back_merge(struct request_queue *q, struct request *req, req->biotail->bi_next = bio; req->biotail = bio; - req->__data_len += bio->bi_size; + req->__data_len += bio->bi_iter.bi_size; req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); blk_account_io_start(req, false); @@ -1380,8 +1380,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, * not touch req->buffer either... */ req->buffer = bio_data(bio); - req->__sector = bio->bi_sector; - req->__data_len += bio->bi_size; + req->__sector = bio->bi_iter.bi_sector; + req->__data_len += bio->bi_iter.bi_size; req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); blk_account_io_start(req, false); @@ -1459,7 +1459,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->cmd_flags |= REQ_FAILFAST_MASK; req->errors = 0; - req->__sector = bio->bi_sector; + req->__sector = bio->bi_iter.bi_sector; req->ioprio = bio_prio(bio); blk_rq_bio_prep(req->q, req, bio); } @@ -1583,12 +1583,12 @@ static inline void blk_partition_remap(struct bio *bio) if (bio_sectors(bio) && bdev != bdev->bd_contains) { struct hd_struct *p = bdev->bd_part; - bio->bi_sector += p->start_sect; + bio->bi_iter.bi_sector += p->start_sect; bio->bi_bdev = bdev->bd_contains; trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio, bdev->bd_dev, - bio->bi_sector - p->start_sect); + bio->bi_iter.bi_sector - p->start_sect); } } @@ -1654,7 +1654,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) /* Test device or partition size, when known. */ maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; if (maxsector) { - sector_t sector = bio->bi_sector; + sector_t sector = bio->bi_iter.bi_sector; if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { /* @@ -1690,7 +1690,7 @@ generic_make_request_checks(struct bio *bio) "generic_make_request: Trying to access " "nonexistent block-device %s (%Lu)\n", bdevname(bio->bi_bdev, b), - (long long) bio->bi_sector); + (long long) bio->bi_iter.bi_sector); goto end_io; } @@ -1704,9 +1704,9 @@ generic_make_request_checks(struct bio *bio) } part = bio->bi_bdev->bd_part; - if (should_fail_request(part, bio->bi_size) || + if (should_fail_request(part, bio->bi_iter.bi_size) || should_fail_request(&part_to_disk(part)->part0, - bio->bi_size)) + bio->bi_iter.bi_size)) goto end_io; /* @@ -1865,7 +1865,7 @@ void submit_bio(int rw, struct bio *bio) if (rw & WRITE) { count_vm_events(PGPGOUT, count); } else { - task_io_account_read(bio->bi_size); + task_io_account_read(bio->bi_iter.bi_size); count_vm_events(PGPGIN, count); } @@ -1874,7 +1874,7 @@ void submit_bio(int rw, struct bio *bio) printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n", current->comm, task_pid_nr(current), (rw & WRITE) ? "WRITE" : "READ", - (unsigned long long)bio->bi_sector, + (unsigned long long)bio->bi_iter.bi_sector, bdevname(bio->bi_bdev, b), count); } @@ -2007,7 +2007,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq) for (bio = rq->bio; bio; bio = bio->bi_next) { if ((bio->bi_rw & ff) != ff) break; - bytes += bio->bi_size; + bytes += bio->bi_iter.bi_size; } /* this could lead to infinite loop */ @@ -2378,9 +2378,9 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) total_bytes = 0; while (req->bio) { struct bio *bio = req->bio; - unsigned bio_bytes = min(bio->bi_size, nr_bytes); + unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes); - if (bio_bytes == bio->bi_size) + if (bio_bytes == bio->bi_iter.bi_size) req->bio = bio->bi_next; req_bio_endio(req, bio, bio_bytes, error); @@ -2728,7 +2728,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, rq->nr_phys_segments = bio_phys_segments(q, bio); rq->buffer = bio_data(bio); } - rq->__data_len = bio->bi_size; + rq->__data_len = bio->bi_iter.bi_size; rq->bio = rq->biotail = bio; if (bio->bi_bdev) diff --git a/block/blk-flush.c b/block/blk-flush.c index fb6f3c0..9288aaf 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -548,7 +548,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, * copied from blk_rq_pos(rq). */ if (error_sector) - *error_sector = bio->bi_sector; + *error_sector = bio->bi_iter.bi_sector; bio_put(bio); return ret; diff --git a/block/blk-lib.c b/block/blk-lib.c index 9b5b561..2da76c9 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -108,12 +108,12 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, req_sects = end_sect - sector; } - bio->bi_sector = sector; + bio->bi_iter.bi_sector = sector; bio->bi_end_io = bio_batch_end_io; bio->bi_bdev = bdev; bio->bi_private = &bb; - bio->bi_size = req_sects << 9; + bio->bi_iter.bi_size = req_sects << 9; nr_sects -= req_sects; sector = end_sect; @@ -174,7 +174,7 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, break; } - bio->bi_sector = sector; + bio->bi_iter.bi_sector = sector; bio->bi_end_io = bio_batch_end_io; bio->bi_bdev = bdev; bio->bi_private = &bb; @@ -184,11 +184,11 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); if (nr_sects > max_write_same_sectors) { - bio->bi_size = max_write_same_sectors << 9; + bio->bi_iter.bi_size = max_write_same_sectors << 9; nr_sects -= max_write_same_sectors; sector += max_write_same_sectors; } else { - bio->bi_size = nr_sects << 9; + bio->bi_iter.bi_size = nr_sects << 9; nr_sects = 0; } @@ -240,7 +240,7 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, break; } - bio->bi_sector = sector; + bio->bi_iter.bi_sector = sector; bio->bi_bdev = bdev; bio->bi_end_io = bio_batch_end_io; bio->bi_private = &bb; diff --git a/block/blk-map.c b/block/blk-map.c index 623e1cd..ae4ae10 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -20,7 +20,7 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq, rq->biotail->bi_next = bio; rq->biotail = bio; - rq->__data_len += bio->bi_size; + rq->__data_len += bio->bi_iter.bi_size; } return 0; } @@ -76,7 +76,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, ret = blk_rq_append_bio(q, rq, bio); if (!ret) - return bio->bi_size; + return bio->bi_iter.bi_size; /* if it was boucned we must call the end io function */ bio_endio(bio, 0); @@ -220,7 +220,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, if (IS_ERR(bio)) return PTR_ERR(bio); - if (bio->bi_size != len) { + if (bio->bi_iter.bi_size != len) { /* * Grab an extra reference to this bio, as bio_unmap_user() * expects to be able to drop it twice as it happens on the diff --git a/block/blk-merge.c b/block/blk-merge.c index 1ffc589..03bc083 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -543,9 +543,9 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) int blk_try_merge(struct request *rq, struct bio *bio) { - if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_sector) + if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) return ELEVATOR_BACK_MERGE; - else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_sector) + else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector) return ELEVATOR_FRONT_MERGE; return ELEVATOR_NO_MERGE; } diff --git a/block/blk-mq.c b/block/blk-mq.c index cdc629c..e4fbcc3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -301,7 +301,7 @@ void blk_mq_complete_request(struct request *rq, int error) struct bio *next = bio->bi_next; bio->bi_next = NULL; - bytes += bio->bi_size; + bytes += bio->bi_iter.bi_size; blk_mq_bio_endio(rq, bio, error); bio = next; } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 0653404..20f82003 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -877,14 +877,14 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, do_div(tmp, HZ); bytes_allowed = tmp; - if (tg->bytes_disp[rw] + bio->bi_size <= bytes_allowed) { + if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) { if (wait) *wait = 0; return 1; } /* Calc approx time to dispatch */ - extra_bytes = tg->bytes_disp[rw] + bio->bi_size - bytes_allowed; + extra_bytes = tg->bytes_disp[rw] + bio->bi_iter.bi_size - bytes_allowed; jiffy_wait = div64_u64(extra_bytes * HZ, tg->bps[rw]); if (!jiffy_wait) @@ -987,7 +987,7 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) bool rw = bio_data_dir(bio); /* Charge the bio to the group */ - tg->bytes_disp[rw] += bio->bi_size; + tg->bytes_disp[rw] += bio->bi_iter.bi_size; tg->io_disp[rw]++; /* @@ -1003,8 +1003,8 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) */ if (!(bio->bi_rw & REQ_THROTTLED)) { bio->bi_rw |= REQ_THROTTLED; - throtl_update_dispatch_stats(tg_to_blkg(tg), bio->bi_size, - bio->bi_rw); + throtl_update_dispatch_stats(tg_to_blkg(tg), + bio->bi_iter.bi_size, bio->bi_rw); } } @@ -1508,7 +1508,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) if (tg) { if (!tg->has_rules[rw]) { throtl_update_dispatch_stats(tg_to_blkg(tg), - bio->bi_size, bio->bi_rw); + bio->bi_iter.bi_size, bio->bi_rw); goto out_unlock_rcu; } } @@ -1564,7 +1564,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) /* out-of-limit, queue to @tg */ throtl_log(sq, "[%c] bio. bdisp=%llu sz=%u bps=%llu iodisp=%u iops=%u queued=%d/%d", rw == READ ? 'R' : 'W', - tg->bytes_disp[rw], bio->bi_size, tg->bps[rw], + tg->bytes_disp[rw], bio->bi_iter.bi_size, tg->bps[rw], tg->io_disp[rw], tg->iops[rw], sq->nr_queued[READ], sq->nr_queued[WRITE]); diff --git a/block/elevator.c b/block/elevator.c index b7ff286..42c45a7 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -440,7 +440,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) /* * See if our hash lookup can find a potential backmerge. */ - __rq = elv_rqhash_find(q, bio->bi_sector); + __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector); if (__rq && elv_rq_merge_ok(__rq, bio)) { *req = __rq; return ELEVATOR_BACK_MERGE; -- cgit v1.1 From 7988613b0e5b2638caf6cd493cc78e9595eba19c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Nov 2013 17:19:00 -0800 Subject: block: Convert bio_for_each_segment() to bvec_iter More prep work for immutable biovecs - with immutable bvecs drivers won't be able to use the biovec directly, they'll need to use helpers that take into account bio->bi_iter.bi_bvec_done. This updates callers for the new usage without changing the implementation yet. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: Geert Uytterhoeven Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Ed L. Cashin" Cc: Nick Piggin Cc: Lars Ellenberg Cc: Jiri Kosina Cc: Paul Clements Cc: Jim Paris Cc: Geoff Levand Cc: Yehuda Sadeh Cc: Sage Weil Cc: Alex Elder Cc: ceph-devel@vger.kernel.org Cc: Joshua Morris Cc: Philip Kelleher Cc: Konrad Rzeszutek Wilk Cc: Jeremy Fitzhardinge Cc: Neil Brown Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux390@de.ibm.com Cc: Nagalakshmi Nandigama Cc: Sreekanth Reddy Cc: support@lsi.com Cc: "James E.J. Bottomley" Cc: Greg Kroah-Hartman Cc: Alexander Viro Cc: Steven Whitehouse Cc: Herton Ronaldo Krzesinski Cc: Tejun Heo Cc: Andrew Morton Cc: Guo Chao Cc: Asai Thambi S P Cc: Selvan Mani Cc: Sam Bradshaw Cc: Matthew Wilcox Cc: Keith Busch Cc: Stephen Hemminger Cc: Quoc-Son Anh Cc: Sebastian Ott Cc: Nitin Gupta Cc: Minchan Kim Cc: Jerome Marchand Cc: Seth Jennings Cc: "Martin K. Petersen" Cc: Mike Snitzer Cc: Vivek Goyal Cc: "Darrick J. Wong" Cc: Chris Metcalf Cc: Jan Kara Cc: linux-m68k@lists.linux-m68k.org Cc: linuxppc-dev@lists.ozlabs.org Cc: drbd-user@lists.linbit.com Cc: nbd-general@lists.sourceforge.net Cc: cbe-oss-dev@lists.ozlabs.org Cc: xen-devel@lists.xensource.com Cc: virtualization@lists.linux-foundation.org Cc: linux-raid@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: DL-MPTFusionLinux@lsi.com Cc: linux-scsi@vger.kernel.org Cc: devel@driverdev.osuosl.org Cc: linux-fsdevel@vger.kernel.org Cc: cluster-devel@redhat.com Cc: linux-mm@kvack.org Acked-by: Geoff Levand --- block/blk-core.c | 4 ++-- block/blk-merge.c | 49 +++++++++++++++++++++++-------------------------- 2 files changed, 25 insertions(+), 28 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 5c2ab2c..5da8e90 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2746,10 +2746,10 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, void rq_flush_dcache_pages(struct request *rq) { struct req_iterator iter; - struct bio_vec *bvec; + struct bio_vec bvec; rq_for_each_segment(bvec, rq, iter) - flush_dcache_page(bvec->bv_page); + flush_dcache_page(bvec.bv_page); } EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); #endif diff --git a/block/blk-merge.c b/block/blk-merge.c index 03bc083..a1ead90 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -12,10 +12,11 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, struct bio *bio) { - struct bio_vec *bv, *bvprv = NULL; - int cluster, i, high, highprv = 1; + struct bio_vec bv, bvprv = { NULL }; + int cluster, high, highprv = 1; unsigned int seg_size, nr_phys_segs; struct bio *fbio, *bbio; + struct bvec_iter iter; if (!bio) return 0; @@ -25,25 +26,23 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, seg_size = 0; nr_phys_segs = 0; for_each_bio(bio) { - bio_for_each_segment(bv, bio, i) { + bio_for_each_segment(bv, bio, iter) { /* * the trick here is making sure that a high page is * never considered part of another segment, since that * might change with the bounce page. */ - high = page_to_pfn(bv->bv_page) > queue_bounce_pfn(q); - if (high || highprv) - goto new_segment; - if (cluster) { - if (seg_size + bv->bv_len + high = page_to_pfn(bv.bv_page) > queue_bounce_pfn(q); + if (!high && !highprv && cluster) { + if (seg_size + bv.bv_len > queue_max_segment_size(q)) goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) + if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) + if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) goto new_segment; - seg_size += bv->bv_len; + seg_size += bv.bv_len; bvprv = bv; continue; } @@ -54,7 +53,7 @@ new_segment: nr_phys_segs++; bvprv = bv; - seg_size = bv->bv_len; + seg_size = bv.bv_len; highprv = high; } bbio = bio; @@ -110,21 +109,21 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, return 0; } -static void +static inline void __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, - struct scatterlist *sglist, struct bio_vec **bvprv, + struct scatterlist *sglist, struct bio_vec *bvprv, struct scatterlist **sg, int *nsegs, int *cluster) { int nbytes = bvec->bv_len; - if (*bvprv && *cluster) { + if (*sg && *cluster) { if ((*sg)->length + nbytes > queue_max_segment_size(q)) goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(*bvprv, bvec)) + if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, *bvprv, bvec)) + if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) goto new_segment; (*sg)->length += nbytes; @@ -150,7 +149,7 @@ new_segment: sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); (*nsegs)++; } - *bvprv = bvec; + *bvprv = *bvec; } /* @@ -160,7 +159,7 @@ new_segment: int blk_rq_map_sg(struct request_queue *q, struct request *rq, struct scatterlist *sglist) { - struct bio_vec *bvec, *bvprv; + struct bio_vec bvec, bvprv; struct req_iterator iter; struct scatterlist *sg; int nsegs, cluster; @@ -171,10 +170,9 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, /* * for each bio in rq */ - bvprv = NULL; sg = NULL; rq_for_each_segment(bvec, rq, iter) { - __blk_segment_map_sg(q, bvec, sglist, &bvprv, &sg, + __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, &nsegs, &cluster); } /* segments in rq */ @@ -223,18 +221,17 @@ EXPORT_SYMBOL(blk_rq_map_sg); int blk_bio_map_sg(struct request_queue *q, struct bio *bio, struct scatterlist *sglist) { - struct bio_vec *bvec, *bvprv; + struct bio_vec bvec, bvprv; struct scatterlist *sg; int nsegs, cluster; - unsigned long i; + struct bvec_iter iter; nsegs = 0; cluster = blk_queue_cluster(q); - bvprv = NULL; sg = NULL; - bio_for_each_segment(bvec, bio, i) { - __blk_segment_map_sg(q, bvec, sglist, &bvprv, &sg, + bio_for_each_segment(bvec, bio, iter) { + __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, &nsegs, &cluster); } /* segments in bio */ -- cgit v1.1 From d57a5f7c6605f15f3b5134837e68b448a7cea88e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Nov 2013 17:20:16 -0800 Subject: bio-integrity: Convert to bvec_iter The bio integrity is also stored in a bvec array, so if we use the bvec iter code we just added, the integrity code won't need to implement its own iteration stuff (bio_integrity_mark_head(), bio_integrity_mark_tail()) Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: "Martin K. Petersen" Cc: "James E.J. Bottomley" --- block/blk-integrity.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'block') diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 03cf717..7fbab84 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -43,30 +43,32 @@ static const char *bi_unsupported_name = "unsupported"; */ int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio) { - struct bio_vec *iv, *ivprv = NULL; + struct bio_vec iv, ivprv = { NULL }; unsigned int segments = 0; unsigned int seg_size = 0; - unsigned int i = 0; + struct bvec_iter iter; + int prev = 0; - bio_for_each_integrity_vec(iv, bio, i) { + bio_for_each_integrity_vec(iv, bio, iter) { - if (ivprv) { - if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv)) + if (prev) { + if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv)) goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv)) + if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv)) goto new_segment; - if (seg_size + iv->bv_len > queue_max_segment_size(q)) + if (seg_size + iv.bv_len > queue_max_segment_size(q)) goto new_segment; - seg_size += iv->bv_len; + seg_size += iv.bv_len; } else { new_segment: segments++; - seg_size = iv->bv_len; + seg_size = iv.bv_len; } + prev = 1; ivprv = iv; } @@ -87,24 +89,25 @@ EXPORT_SYMBOL(blk_rq_count_integrity_sg); int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio, struct scatterlist *sglist) { - struct bio_vec *iv, *ivprv = NULL; + struct bio_vec iv, ivprv = { NULL }; struct scatterlist *sg = NULL; unsigned int segments = 0; - unsigned int i = 0; + struct bvec_iter iter; + int prev = 0; - bio_for_each_integrity_vec(iv, bio, i) { + bio_for_each_integrity_vec(iv, bio, iter) { - if (ivprv) { - if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv)) + if (prev) { + if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv)) goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv)) + if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv)) goto new_segment; - if (sg->length + iv->bv_len > queue_max_segment_size(q)) + if (sg->length + iv.bv_len > queue_max_segment_size(q)) goto new_segment; - sg->length += iv->bv_len; + sg->length += iv.bv_len; } else { new_segment: if (!sg) @@ -114,10 +117,11 @@ new_segment: sg = sg_next(sg); } - sg_set_page(sg, iv->bv_page, iv->bv_len, iv->bv_offset); + sg_set_page(sg, iv.bv_page, iv.bv_len, iv.bv_offset); segments++; } + prev = 1; ivprv = iv; } -- cgit v1.1 From f619d25460473788944e3b71b030398681e8809b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2013 14:30:33 -0700 Subject: block: Kill bio_iovec_idx(), __bio_iovec() bio_iovec_idx() and __bio_iovec() don't have any valid uses anymore - previous users have been converted to bio_iovec_iter() or other methods. __BVEC_END() has to go too - the bvec array can't be used directly for the last biovec because we might only be using the first portion of it, we have to iterate over the bvec array with bio_for_each_segment() which checks against the current value of bi_iter.bi_size. Signed-off-by: Kent Overstreet Cc: Jens Axboe --- block/blk-merge.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/blk-merge.c b/block/blk-merge.c index a1ead90..05c17be 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -86,6 +86,9 @@ EXPORT_SYMBOL(blk_recount_segments); static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, struct bio *nxt) { + struct bio_vec end_bv, nxt_bv; + struct bvec_iter iter; + if (!blk_queue_cluster(q)) return 0; @@ -96,14 +99,20 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, if (!bio_has_data(bio)) return 1; - if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) + bio_for_each_segment(end_bv, bio, iter) + if (end_bv.bv_len == iter.bi_size) + break; + + nxt_bv = bio_iovec(nxt); + + if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) return 0; /* * bio and nxt are contiguous in memory; check if the queue allows * these two to be merged into one */ - if (BIO_SEG_BOUNDARY(q, bio, nxt)) + if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv)) return 1; return 0; -- cgit v1.1 From 3f273d301b535ef46f9c689e5b2828b741e81050 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 26 Nov 2013 16:36:49 -0800 Subject: block: Silence spurious compiler warnings Signed-off-by: Kent Overstreet Signed-off-by: Jens Axboe --- block/blk-merge.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'block') diff --git a/block/blk-merge.c b/block/blk-merge.c index 05c17be..0b097f6 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -89,6 +89,8 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, struct bio_vec end_bv, nxt_bv; struct bvec_iter iter; + uninitialized_var(end_bv); + if (!blk_queue_cluster(q)) return 0; @@ -173,6 +175,8 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, struct scatterlist *sg; int nsegs, cluster; + uninitialized_var(bvprv); + nsegs = 0; cluster = blk_queue_cluster(q); @@ -235,6 +239,8 @@ int blk_bio_map_sg(struct request_queue *q, struct bio *bio, int nsegs, cluster; struct bvec_iter iter; + uninitialized_var(bvprv); + nsegs = 0; cluster = blk_queue_cluster(q); -- cgit v1.1 From 2b8221e181c128ac3bc7a9cdc80db04884951e89 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 3 Dec 2013 14:29:09 -0700 Subject: block: Really silence spurious compiler warnings The uninitialized_var() macro appears to not work on structs... Get rid of it, and manually initialize instead. Signed-off-by: Kent Overstreet Signed-off-by: Jens Axboe --- block/blk-merge.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'block') diff --git a/block/blk-merge.c b/block/blk-merge.c index 0b097f6..8f8adaa 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -86,11 +86,9 @@ EXPORT_SYMBOL(blk_recount_segments); static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, struct bio *nxt) { - struct bio_vec end_bv, nxt_bv; + struct bio_vec end_bv = { NULL }, nxt_bv; struct bvec_iter iter; - uninitialized_var(end_bv); - if (!blk_queue_cluster(q)) return 0; @@ -170,13 +168,11 @@ new_segment: int blk_rq_map_sg(struct request_queue *q, struct request *rq, struct scatterlist *sglist) { - struct bio_vec bvec, bvprv; + struct bio_vec bvec, bvprv = { NULL }; struct req_iterator iter; struct scatterlist *sg; int nsegs, cluster; - uninitialized_var(bvprv); - nsegs = 0; cluster = blk_queue_cluster(q); @@ -234,13 +230,11 @@ EXPORT_SYMBOL(blk_rq_map_sg); int blk_bio_map_sg(struct request_queue *q, struct bio *bio, struct scatterlist *sglist) { - struct bio_vec bvec, bvprv; + struct bio_vec bvec, bvprv = { NULL }; struct scatterlist *sg; int nsegs, cluster; struct bvec_iter iter; - uninitialized_var(bvprv); - nsegs = 0; cluster = blk_queue_cluster(q); -- cgit v1.1 From 2da8ca822d49c8b8781800ad155aaa00e7bb5f1a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 5 Dec 2013 12:28:04 -0500 Subject: cgroup: replace cftype->read_seq_string() with cftype->seq_show() In preparation of conversion to kernfs, cgroup file handling is updated so that it can be easily mapped to kernfs. This patch replaces cftype->read_seq_string() with cftype->seq_show() which is not limited to single_open() operation and will map directcly to kernfs seq_file interface. The conversions are mechanical. As ->seq_show() doesn't have @css and @cft, the functions which make use of them are converted to use seq_css() and seq_cft() respectively. In several occassions, e.f. if it has seq_string in its name, the function name is updated to fit the new method better. This patch does not introduce any behavior changes. Signed-off-by: Tejun Heo Acked-by: Aristeu Rozanski Acked-by: Vivek Goyal Acked-by: Michal Hocko Acked-by: Daniel Wagner Acked-by: Li Zefan Cc: Jens Axboe Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Johannes Weiner Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: Neil Horman --- block/blk-throttle.c | 35 ++++++-------- block/cfq-iosched.c | 131 +++++++++++++++++++++++---------------------------- 2 files changed, 73 insertions(+), 93 deletions(-) (limited to 'block') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 0653404..a760857 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1303,13 +1303,10 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, return __blkg_prfill_rwstat(sf, pd, &rwstat); } -static int tg_print_cpu_rwstat(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int tg_print_cpu_rwstat(struct seq_file *sf, void *v) { - struct blkcg *blkcg = css_to_blkcg(css); - - blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkcg_policy_throtl, - cft->private, true); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_cpu_rwstat, + &blkcg_policy_throtl, seq_cft(sf)->private, true); return 0; } @@ -1335,19 +1332,17 @@ static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd, return __blkg_prfill_u64(sf, pd, v); } -static int tg_print_conf_u64(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int tg_print_conf_u64(struct seq_file *sf, void *v) { - blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_u64, - &blkcg_policy_throtl, cft->private, false); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_u64, + &blkcg_policy_throtl, seq_cft(sf)->private, false); return 0; } -static int tg_print_conf_uint(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int tg_print_conf_uint(struct seq_file *sf, void *v) { - blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_uint, - &blkcg_policy_throtl, cft->private, false); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_uint, + &blkcg_policy_throtl, seq_cft(sf)->private, false); return 0; } @@ -1428,40 +1423,40 @@ static struct cftype throtl_files[] = { { .name = "throttle.read_bps_device", .private = offsetof(struct throtl_grp, bps[READ]), - .read_seq_string = tg_print_conf_u64, + .seq_show = tg_print_conf_u64, .write_string = tg_set_conf_u64, .max_write_len = 256, }, { .name = "throttle.write_bps_device", .private = offsetof(struct throtl_grp, bps[WRITE]), - .read_seq_string = tg_print_conf_u64, + .seq_show = tg_print_conf_u64, .write_string = tg_set_conf_u64, .max_write_len = 256, }, { .name = "throttle.read_iops_device", .private = offsetof(struct throtl_grp, iops[READ]), - .read_seq_string = tg_print_conf_uint, + .seq_show = tg_print_conf_uint, .write_string = tg_set_conf_uint, .max_write_len = 256, }, { .name = "throttle.write_iops_device", .private = offsetof(struct throtl_grp, iops[WRITE]), - .read_seq_string = tg_print_conf_uint, + .seq_show = tg_print_conf_uint, .write_string = tg_set_conf_uint, .max_write_len = 256, }, { .name = "throttle.io_service_bytes", .private = offsetof(struct tg_stats_cpu, service_bytes), - .read_seq_string = tg_print_cpu_rwstat, + .seq_show = tg_print_cpu_rwstat, }, { .name = "throttle.io_serviced", .private = offsetof(struct tg_stats_cpu, serviced), - .read_seq_string = tg_print_cpu_rwstat, + .seq_show = tg_print_cpu_rwstat, }, { } /* terminate */ }; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 4d5cec1..744833b 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1632,11 +1632,11 @@ static u64 cfqg_prfill_weight_device(struct seq_file *sf, return __blkg_prfill_u64(sf, pd, cfqg->dev_weight); } -static int cfqg_print_weight_device(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int cfqg_print_weight_device(struct seq_file *sf, void *v) { - blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_weight_device, - &blkcg_policy_cfq, 0, false); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), + cfqg_prfill_weight_device, &blkcg_policy_cfq, + 0, false); return 0; } @@ -1650,26 +1650,23 @@ static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf, return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight); } -static int cfqg_print_leaf_weight_device(struct cgroup_subsys_state *css, - struct cftype *cft, - struct seq_file *sf) +static int cfqg_print_leaf_weight_device(struct seq_file *sf, void *v) { - blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_leaf_weight_device, - &blkcg_policy_cfq, 0, false); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), + cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq, + 0, false); return 0; } -static int cfq_print_weight(struct cgroup_subsys_state *css, struct cftype *cft, - struct seq_file *sf) +static int cfq_print_weight(struct seq_file *sf, void *v) { - seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_weight); + seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_weight); return 0; } -static int cfq_print_leaf_weight(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int cfq_print_leaf_weight(struct seq_file *sf, void *v) { - seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_leaf_weight); + seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_leaf_weight); return 0; } @@ -1762,23 +1759,17 @@ static int cfq_set_leaf_weight(struct cgroup_subsys_state *css, return __cfq_set_weight(css, cft, val, true); } -static int cfqg_print_stat(struct cgroup_subsys_state *css, struct cftype *cft, - struct seq_file *sf) +static int cfqg_print_stat(struct seq_file *sf, void *v) { - struct blkcg *blkcg = css_to_blkcg(css); - - blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkcg_policy_cfq, - cft->private, false); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat, + &blkcg_policy_cfq, seq_cft(sf)->private, false); return 0; } -static int cfqg_print_rwstat(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int cfqg_print_rwstat(struct seq_file *sf, void *v) { - struct blkcg *blkcg = css_to_blkcg(css); - - blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkcg_policy_cfq, - cft->private, true); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat, + &blkcg_policy_cfq, seq_cft(sf)->private, true); return 0; } @@ -1798,23 +1789,19 @@ static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf, return __blkg_prfill_rwstat(sf, pd, &sum); } -static int cfqg_print_stat_recursive(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int cfqg_print_stat_recursive(struct seq_file *sf, void *v) { - struct blkcg *blkcg = css_to_blkcg(css); - - blkcg_print_blkgs(sf, blkcg, cfqg_prfill_stat_recursive, - &blkcg_policy_cfq, cft->private, false); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), + cfqg_prfill_stat_recursive, &blkcg_policy_cfq, + seq_cft(sf)->private, false); return 0; } -static int cfqg_print_rwstat_recursive(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int cfqg_print_rwstat_recursive(struct seq_file *sf, void *v) { - struct blkcg *blkcg = css_to_blkcg(css); - - blkcg_print_blkgs(sf, blkcg, cfqg_prfill_rwstat_recursive, - &blkcg_policy_cfq, cft->private, true); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), + cfqg_prfill_rwstat_recursive, &blkcg_policy_cfq, + seq_cft(sf)->private, true); return 0; } @@ -1835,13 +1822,11 @@ static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, } /* print avg_queue_size */ -static int cfqg_print_avg_queue_size(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int cfqg_print_avg_queue_size(struct seq_file *sf, void *v) { - struct blkcg *blkcg = css_to_blkcg(css); - - blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size, - &blkcg_policy_cfq, 0, false); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), + cfqg_prfill_avg_queue_size, &blkcg_policy_cfq, + 0, false); return 0; } #endif /* CONFIG_DEBUG_BLK_CGROUP */ @@ -1851,14 +1836,14 @@ static struct cftype cfq_blkcg_files[] = { { .name = "weight_device", .flags = CFTYPE_ONLY_ON_ROOT, - .read_seq_string = cfqg_print_leaf_weight_device, + .seq_show = cfqg_print_leaf_weight_device, .write_string = cfqg_set_leaf_weight_device, .max_write_len = 256, }, { .name = "weight", .flags = CFTYPE_ONLY_ON_ROOT, - .read_seq_string = cfq_print_leaf_weight, + .seq_show = cfq_print_leaf_weight, .write_u64 = cfq_set_leaf_weight, }, @@ -1866,26 +1851,26 @@ static struct cftype cfq_blkcg_files[] = { { .name = "weight_device", .flags = CFTYPE_NOT_ON_ROOT, - .read_seq_string = cfqg_print_weight_device, + .seq_show = cfqg_print_weight_device, .write_string = cfqg_set_weight_device, .max_write_len = 256, }, { .name = "weight", .flags = CFTYPE_NOT_ON_ROOT, - .read_seq_string = cfq_print_weight, + .seq_show = cfq_print_weight, .write_u64 = cfq_set_weight, }, { .name = "leaf_weight_device", - .read_seq_string = cfqg_print_leaf_weight_device, + .seq_show = cfqg_print_leaf_weight_device, .write_string = cfqg_set_leaf_weight_device, .max_write_len = 256, }, { .name = "leaf_weight", - .read_seq_string = cfq_print_leaf_weight, + .seq_show = cfq_print_leaf_weight, .write_u64 = cfq_set_leaf_weight, }, @@ -1893,114 +1878,114 @@ static struct cftype cfq_blkcg_files[] = { { .name = "time", .private = offsetof(struct cfq_group, stats.time), - .read_seq_string = cfqg_print_stat, + .seq_show = cfqg_print_stat, }, { .name = "sectors", .private = offsetof(struct cfq_group, stats.sectors), - .read_seq_string = cfqg_print_stat, + .seq_show = cfqg_print_stat, }, { .name = "io_service_bytes", .private = offsetof(struct cfq_group, stats.service_bytes), - .read_seq_string = cfqg_print_rwstat, + .seq_show = cfqg_print_rwstat, }, { .name = "io_serviced", .private = offsetof(struct cfq_group, stats.serviced), - .read_seq_string = cfqg_print_rwstat, + .seq_show = cfqg_print_rwstat, }, { .name = "io_service_time", .private = offsetof(struct cfq_group, stats.service_time), - .read_seq_string = cfqg_print_rwstat, + .seq_show = cfqg_print_rwstat, }, { .name = "io_wait_time", .private = offsetof(struct cfq_group, stats.wait_time), - .read_seq_string = cfqg_print_rwstat, + .seq_show = cfqg_print_rwstat, }, { .name = "io_merged", .private = offsetof(struct cfq_group, stats.merged), - .read_seq_string = cfqg_print_rwstat, + .seq_show = cfqg_print_rwstat, }, { .name = "io_queued", .private = offsetof(struct cfq_group, stats.queued), - .read_seq_string = cfqg_print_rwstat, + .seq_show = cfqg_print_rwstat, }, /* the same statictics which cover the cfqg and its descendants */ { .name = "time_recursive", .private = offsetof(struct cfq_group, stats.time), - .read_seq_string = cfqg_print_stat_recursive, + .seq_show = cfqg_print_stat_recursive, }, { .name = "sectors_recursive", .private = offsetof(struct cfq_group, stats.sectors), - .read_seq_string = cfqg_print_stat_recursive, + .seq_show = cfqg_print_stat_recursive, }, { .name = "io_service_bytes_recursive", .private = offsetof(struct cfq_group, stats.service_bytes), - .read_seq_string = cfqg_print_rwstat_recursive, + .seq_show = cfqg_print_rwstat_recursive, }, { .name = "io_serviced_recursive", .private = offsetof(struct cfq_group, stats.serviced), - .read_seq_string = cfqg_print_rwstat_recursive, + .seq_show = cfqg_print_rwstat_recursive, }, { .name = "io_service_time_recursive", .private = offsetof(struct cfq_group, stats.service_time), - .read_seq_string = cfqg_print_rwstat_recursive, + .seq_show = cfqg_print_rwstat_recursive, }, { .name = "io_wait_time_recursive", .private = offsetof(struct cfq_group, stats.wait_time), - .read_seq_string = cfqg_print_rwstat_recursive, + .seq_show = cfqg_print_rwstat_recursive, }, { .name = "io_merged_recursive", .private = offsetof(struct cfq_group, stats.merged), - .read_seq_string = cfqg_print_rwstat_recursive, + .seq_show = cfqg_print_rwstat_recursive, }, { .name = "io_queued_recursive", .private = offsetof(struct cfq_group, stats.queued), - .read_seq_string = cfqg_print_rwstat_recursive, + .seq_show = cfqg_print_rwstat_recursive, }, #ifdef CONFIG_DEBUG_BLK_CGROUP { .name = "avg_queue_size", - .read_seq_string = cfqg_print_avg_queue_size, + .seq_show = cfqg_print_avg_queue_size, }, { .name = "group_wait_time", .private = offsetof(struct cfq_group, stats.group_wait_time), - .read_seq_string = cfqg_print_stat, + .seq_show = cfqg_print_stat, }, { .name = "idle_time", .private = offsetof(struct cfq_group, stats.idle_time), - .read_seq_string = cfqg_print_stat, + .seq_show = cfqg_print_stat, }, { .name = "empty_time", .private = offsetof(struct cfq_group, stats.empty_time), - .read_seq_string = cfqg_print_stat, + .seq_show = cfqg_print_stat, }, { .name = "dequeue", .private = offsetof(struct cfq_group, stats.dequeue), - .read_seq_string = cfqg_print_stat, + .seq_show = cfqg_print_stat, }, { .name = "unaccounted_time", .private = offsetof(struct cfq_group, stats.unaccounted_time), - .read_seq_string = cfqg_print_stat, + .seq_show = cfqg_print_stat, }, #endif /* CONFIG_DEBUG_BLK_CGROUP */ { } /* terminate */ -- cgit v1.1 From 8515736604941334bd9e8fc01edea685a228acd5 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Fri, 6 Dec 2013 09:06:41 +0400 Subject: block: fix memory leaks on unplugging block device All objects, which are allocated in blk_mq_register_disk, must be released in blk_mq_unregister_disk. I use a KVM virtual machine and virtio disk to reproduce this issue. kmemleak: 18 new suspected memory leaks (see /sys/kernel/debug/kmemleak) $ cat /sys/kernel/debug/kmemleak | head -n 30 unreferenced object 0xffff8800b6636150 (size 8): comm "kworker/0:2", pid 65, jiffies 4294809903 (age 86.358s) hex dump (first 8 bytes): 76 69 72 74 69 6f 34 00 virtio4. backtrace: [] kmemleak_alloc+0x4e/0xb0 [] __kmalloc_track_caller+0xf5/0x260 [] kstrdup+0x31/0x60 [] sysfs_new_dirent+0x2e/0x140 [] create_dir+0x38/0xe0 [] sysfs_create_dir_ns+0x73/0xc0 [] kobject_add_internal+0xc9/0x340 [] kobject_add+0x65/0xb0 [] device_add+0x128/0x660 [] device_register+0x1a/0x20 [] register_virtio_device+0x98/0xe0 [] virtio_pci_probe+0x12e/0x1c0 [] local_pci_probe+0x45/0xa0 [] pci_device_probe+0x121/0x130 [] driver_probe_device+0x87/0x390 [] __device_attach+0x3b/0x40 unreferenced object 0xffff8800b65aa1d8 (size 144): Fixes: 320ae51feed5 (blk-mq: new multi-queue block IO queueing mechanism) Cc: Jens Axboe Signed-off-by: Andrey Vagin Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'block') diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index ba6cf8e..b91ce75 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -335,9 +335,22 @@ static struct kobj_type blk_mq_hw_ktype = { void blk_mq_unregister_disk(struct gendisk *disk) { struct request_queue *q = disk->queue; + struct blk_mq_hw_ctx *hctx; + struct blk_mq_ctx *ctx; + int i, j; + + queue_for_each_hw_ctx(q, hctx, i) { + hctx_for_each_ctx(hctx, ctx, j) { + kobject_del(&ctx->kobj); + kobject_put(&ctx->kobj); + } + kobject_del(&hctx->kobj); + kobject_put(&hctx->kobj); + } kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_del(&q->mq_kobj); + kobject_put(&q->mq_kobj); kobject_put(&disk_to_dev(disk)->kobj); } -- cgit v1.1 From 43a5e4e21964a6efb4d14a34644ec7109d0ae891 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 26 Dec 2013 21:31:35 +0800 Subject: block: blk-mq: support draining mq queue blk_mq_drain_queue() is introduced so that we can drain mq queue inside blk_cleanup_queue(). Also don't accept new requests any more if queue is marked as dying. Cc: Jens Axboe Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-core.c | 10 ++++++++-- block/blk-exec.c | 4 ++++ block/blk-mq.c | 43 +++++++++++++++++++++++++++---------------- block/blk-mq.h | 1 + 4 files changed, 40 insertions(+), 18 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 5da8e90..accb7fc 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -38,6 +38,7 @@ #include "blk.h" #include "blk-cgroup.h" +#include "blk-mq.h" EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); @@ -497,8 +498,13 @@ void blk_cleanup_queue(struct request_queue *q) * Drain all requests queued before DYING marking. Set DEAD flag to * prevent that q->request_fn() gets invoked after draining finished. */ - spin_lock_irq(lock); - __blk_drain_queue(q, true); + if (q->mq_ops) { + blk_mq_drain_queue(q); + spin_lock_irq(lock); + } else { + spin_lock_irq(lock); + __blk_drain_queue(q, true); + } queue_flag_set(QUEUE_FLAG_DEAD, q); spin_unlock_irq(lock); diff --git a/block/blk-exec.c b/block/blk-exec.c index c3edf9d..bbfc072 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -60,6 +60,10 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, rq->rq_disk = bd_disk; rq->end_io = done; + /* + * don't check dying flag for MQ because the request won't + * be resued after dying flag is set + */ if (q->mq_ops) { blk_mq_insert_request(q, rq, true); return; diff --git a/block/blk-mq.c b/block/blk-mq.c index 3929f43..e2f811c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -106,10 +106,13 @@ static int blk_mq_queue_enter(struct request_queue *q) spin_lock_irq(q->queue_lock); ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq, - !blk_queue_bypass(q), *q->queue_lock); + !blk_queue_bypass(q) || blk_queue_dying(q), + *q->queue_lock); /* inc usage with lock hold to avoid freeze_queue runs here */ - if (!ret) + if (!ret && !blk_queue_dying(q)) __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); + else if (blk_queue_dying(q)) + ret = -ENODEV; spin_unlock_irq(q->queue_lock); return ret; @@ -120,6 +123,22 @@ static void blk_mq_queue_exit(struct request_queue *q) __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); } +static void __blk_mq_drain_queue(struct request_queue *q) +{ + while (true) { + s64 count; + + spin_lock_irq(q->queue_lock); + count = percpu_counter_sum(&q->mq_usage_counter); + spin_unlock_irq(q->queue_lock); + + if (count == 0) + break; + blk_mq_run_queues(q, false); + msleep(10); + } +} + /* * Guarantee no request is in use, so we can change any data structure of * the queue afterward. @@ -133,21 +152,13 @@ static void blk_mq_freeze_queue(struct request_queue *q) queue_flag_set(QUEUE_FLAG_BYPASS, q); spin_unlock_irq(q->queue_lock); - if (!drain) - return; - - while (true) { - s64 count; - - spin_lock_irq(q->queue_lock); - count = percpu_counter_sum(&q->mq_usage_counter); - spin_unlock_irq(q->queue_lock); + if (drain) + __blk_mq_drain_queue(q); +} - if (count == 0) - break; - blk_mq_run_queues(q, false); - msleep(10); - } +void blk_mq_drain_queue(struct request_queue *q) +{ + __blk_mq_drain_queue(q); } static void blk_mq_unfreeze_queue(struct request_queue *q) diff --git a/block/blk-mq.h b/block/blk-mq.h index 52bf1f9..caa614f 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -27,6 +27,7 @@ void blk_mq_complete_request(struct request *rq, int error); void blk_mq_run_request(struct request *rq, bool run_queue, bool async); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_init_flush(struct request_queue *q); +void blk_mq_drain_queue(struct request_queue *q); /* * CPU hotplug helpers -- cgit v1.1 From f04c1fe7619b2a60ee9e209cf3f9fcba2ce8f2a2 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 26 Dec 2013 21:31:36 +0800 Subject: block: blk-mq: make blk_sync_queue support mq This patch moves synchronization on mq->delay_work from blk_mq_free_queue() to blk_sync_queue(), so that blk_sync_queue can work on mq. Cc: Jens Axboe Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-core.c | 11 ++++++++++- block/blk-mq.c | 1 - 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index accb7fc..c00e0bd 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -246,7 +246,16 @@ EXPORT_SYMBOL(blk_stop_queue); void blk_sync_queue(struct request_queue *q) { del_timer_sync(&q->timeout); - cancel_delayed_work_sync(&q->delay_work); + + if (q->mq_ops) { + struct blk_mq_hw_ctx *hctx; + int i; + + queue_for_each_hw_ctx(q, hctx, i) + cancel_delayed_work_sync(&hctx->delayed_work); + } else { + cancel_delayed_work_sync(&q->delay_work); + } } EXPORT_SYMBOL(blk_sync_queue); diff --git a/block/blk-mq.c b/block/blk-mq.c index e2f811c..edbd253 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1440,7 +1440,6 @@ void blk_mq_free_queue(struct request_queue *q) int i; queue_for_each_hw_ctx(q, hctx, i) { - cancel_delayed_work_sync(&hctx->delayed_work); kfree(hctx->ctx_map); kfree(hctx->ctxs); blk_mq_free_rq_map(hctx); -- cgit v1.1 From 3edcc0ce85c59d45d6dfc6a36a6b3f8b31ba9887 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 26 Dec 2013 21:31:38 +0800 Subject: block: blk-mq: don't export blk_mq_free_queue() blk_mq_free_queue() is called from release handler of queue kobject, so it needn't be called from drivers. Cc: Jens Axboe Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 1 - block/blk-mq.h | 1 + block/blk-sysfs.c | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index edbd253..6914f9b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1461,7 +1461,6 @@ void blk_mq_free_queue(struct request_queue *q) list_del_init(&q->all_q_node); mutex_unlock(&all_q_mutex); } -EXPORT_SYMBOL(blk_mq_free_queue); /* Basically redo blk_mq_init_queue with queue frozen */ static void blk_mq_queue_reinit(struct request_queue *q) diff --git a/block/blk-mq.h b/block/blk-mq.h index caa614f..e151a2f 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -28,6 +28,7 @@ void blk_mq_run_request(struct request *rq, bool run_queue, bool async); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_init_flush(struct request_queue *q); void blk_mq_drain_queue(struct request_queue *q); +void blk_mq_free_queue(struct request_queue *q); /* * CPU hotplug helpers diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9777952..8095c4a 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -11,6 +11,7 @@ #include "blk.h" #include "blk-cgroup.h" +#include "blk-mq.h" struct queue_sysfs_entry { struct attribute attr; -- cgit v1.1 From 0fec08b4ecfc36fd8a64432343b2964fb86d2675 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 3 Jan 2014 10:00:08 -0700 Subject: blk-mq: fix initializing request's start time blk_rq_init() is called in req's complete handler to initialize the request, so the members of start_time and start_time_ns might become inaccurate when it is allocated in future. The patch initializes the two members in blk_mq_rq_ctx_init() to fix the problem. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index 6914f9b..473ce40 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -190,6 +190,8 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, rq->mq_ctx = ctx; rq->cmd_flags = rw_flags; + rq->start_time = jiffies; + set_start_time_ns(rq); ctx->rq_dispatched[rw_is_sync(rw_flags)]++; } -- cgit v1.1 From c78afc6261b09f74abff8c0719b80692a4959768 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 11 Jul 2013 22:39:53 -0700 Subject: bcache/md: Use raid stripe size Now that we've got code for raid5/6 stripe awareness, bcache just needs to know about the stripes and when writing partial stripes is expensive - we probably don't want to enable this optimization for raid1 or 10, even though they have stripes. So add a flag to queue_limits. Signed-off-by: Kent Overstreet --- block/blk-settings.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'block') diff --git a/block/blk-settings.c b/block/blk-settings.c index 05e8267..5d21239 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -592,6 +592,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, ret = -1; } + t->raid_partial_stripes_expensive = + max(t->raid_partial_stripes_expensive, + b->raid_partial_stripes_expensive); + /* Find lowest common alignment_offset */ t->alignment_offset = lcm(t->alignment_offset, alignment) & (max(t->physical_block_size, t->io_min) - 1); -- cgit v1.1 From 3d6efbf62c797a2924785f482e4ce8aa8820ec72 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Jan 2014 09:33:37 -0800 Subject: blk-mq: use __smp_call_function_single directly __smp_call_function_single already avoids multiple IPIs by internally queing up the items, and now also is available for non-SMP builds as a trivially correct stub, so there is no need to wrap it. If the additional lock roundtrip cause problems my patch to convert the generic IPI code to llists is waiting to get merged will fix it. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq-cpu.c | 31 ------------------------- block/blk-mq.c | 68 +++++++++--------------------------------------------- block/blk-mq.h | 1 - 3 files changed, 11 insertions(+), 89 deletions(-) (limited to 'block') diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c index 0045ace..20576e3 100644 --- a/block/blk-mq-cpu.c +++ b/block/blk-mq-cpu.c @@ -28,32 +28,6 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static void blk_mq_cpu_notify(void *data, unsigned long action, - unsigned int cpu) -{ - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { - /* - * If the CPU goes away, ensure that we run any pending - * completions. - */ - struct llist_node *node; - struct request *rq; - - local_irq_disable(); - - node = llist_del_all(&per_cpu(ipi_lists, cpu)); - while (node) { - struct llist_node *next = node->next; - - rq = llist_entry(node, struct request, ll_list); - __blk_mq_end_io(rq, rq->errors); - node = next; - } - - local_irq_enable(); - } -} - static struct notifier_block __cpuinitdata blk_mq_main_cpu_notifier = { .notifier_call = blk_mq_main_cpu_notify, }; @@ -82,12 +56,7 @@ void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, notifier->data = data; } -static struct blk_mq_cpu_notifier __cpuinitdata cpu_notifier = { - .notify = blk_mq_cpu_notify, -}; - void __init blk_mq_cpu_init(void) { register_hotcpu_notifier(&blk_mq_main_cpu_notifier); - blk_mq_register_cpu_notifier(&cpu_notifier); } diff --git a/block/blk-mq.c b/block/blk-mq.c index 473ce40..68734f8 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -27,8 +27,6 @@ static LIST_HEAD(all_q_list); static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); -DEFINE_PER_CPU(struct llist_head, ipi_lists); - static struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, unsigned int cpu) { @@ -339,55 +337,12 @@ void __blk_mq_end_io(struct request *rq, int error) blk_mq_complete_request(rq, error); } -#if defined(CONFIG_SMP) - -/* - * Called with interrupts disabled. - */ -static void ipi_end_io(void *data) +static void blk_mq_end_io_remote(void *data) { - struct llist_head *list = &per_cpu(ipi_lists, smp_processor_id()); - struct llist_node *entry, *next; - struct request *rq; - - entry = llist_del_all(list); - - while (entry) { - next = entry->next; - rq = llist_entry(entry, struct request, ll_list); - __blk_mq_end_io(rq, rq->errors); - entry = next; - } -} - -static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu, - struct request *rq, const int error) -{ - struct call_single_data *data = &rq->csd; - - rq->errors = error; - rq->ll_list.next = NULL; - - /* - * If the list is non-empty, an existing IPI must already - * be "in flight". If that is the case, we need not schedule - * a new one. - */ - if (llist_add(&rq->ll_list, &per_cpu(ipi_lists, ctx->cpu))) { - data->func = ipi_end_io; - data->flags = 0; - __smp_call_function_single(ctx->cpu, data, 0); - } + struct request *rq = data; - return true; + __blk_mq_end_io(rq, rq->errors); } -#else /* CONFIG_SMP */ -static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu, - struct request *rq, const int error) -{ - return false; -} -#endif /* * End IO on this request on a multiqueue enabled driver. We'll either do @@ -403,11 +358,15 @@ void blk_mq_end_io(struct request *rq, int error) return __blk_mq_end_io(rq, error); cpu = get_cpu(); - - if (cpu == ctx->cpu || !cpu_online(ctx->cpu) || - !ipi_remote_cpu(ctx, cpu, rq, error)) + if (cpu != ctx->cpu && cpu_online(ctx->cpu)) { + rq->errors = error; + rq->csd.func = blk_mq_end_io_remote; + rq->csd.info = rq; + rq->csd.flags = 0; + __smp_call_function_single(ctx->cpu, &rq->csd, 0); + } else { __blk_mq_end_io(rq, error); - + } put_cpu(); } EXPORT_SYMBOL(blk_mq_end_io); @@ -1506,11 +1465,6 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, static int __init blk_mq_init(void) { - unsigned int i; - - for_each_possible_cpu(i) - init_llist_head(&per_cpu(ipi_lists, i)); - blk_mq_cpu_init(); /* Must be called after percpu_counter_hotcpu_callback() */ diff --git a/block/blk-mq.h b/block/blk-mq.h index e151a2f..5c39179 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -40,7 +40,6 @@ void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier); void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier); void blk_mq_cpu_init(void); -DECLARE_PER_CPU(struct llist_head, ipi_lists); /* * CPU -> queue mappings -- cgit v1.1 From 6753471c0cb4562aebb9c70beb74ccd392d49ee8 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 8 Jan 2014 20:17:46 -0700 Subject: blk-mq: uses page->list incorrectly 'struct page' has two list_head fields: 'lru' and 'list'. Conveniently, they are unioned together. This means that code can use them interchangably, which gets horribly confusing. The blk-mq made the logical decision to try to use page->list. But, that field was actually introduced just for the slub code. ->lru is the right field to use outside of slab/slub. Signed-off-by: Dave Hansen Acked-by: David Rientjes Acked-by: Kirill A. Shutemov Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index 68734f8..57039fc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1063,8 +1063,8 @@ static void blk_mq_free_rq_map(struct blk_mq_hw_ctx *hctx) struct page *page; while (!list_empty(&hctx->page_list)) { - page = list_first_entry(&hctx->page_list, struct page, list); - list_del_init(&page->list); + page = list_first_entry(&hctx->page_list, struct page, lru); + list_del_init(&page->lru); __free_pages(page, page->private); } @@ -1128,7 +1128,7 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx, break; page->private = this_order; - list_add_tail(&page->list, &hctx->page_list); + list_add_tail(&page->lru, &hctx->page_list); p = page_address(page); entries_per_page = order_to_size(this_order) / rq_size; -- cgit v1.1 From bca266b3886b8bc587db044750c8a00d674b4d09 Mon Sep 17 00:00:00 2001 From: CaiZhiyong Date: Tue, 21 Jan 2014 14:39:25 -0800 Subject: block: remove unrelated header files and export symbol Fix up the following items: - remove unrelated header files. - export interface function. - modify function cmdline_parts_parse return value, this will make it more friendly for the caller. Signed-off-by: CaiZhiyong Cc: Ezequiel Garcia CC: Brian Norris Cc: "Wanglin (Albert)" Cc: Artem Bityutskiy Cc: Karel Zak Cc: Shmulik Ladkani Cc: David Woodhouse Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- block/cmdline-parser.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'block') diff --git a/block/cmdline-parser.c b/block/cmdline-parser.c index cc2637f..9dbc67e 100644 --- a/block/cmdline-parser.c +++ b/block/cmdline-parser.c @@ -4,8 +4,7 @@ * Written by Cai Zhiyong * */ -#include -#include +#include #include static int parse_subpart(struct cmdline_subpart **subpart, char *partdef) @@ -159,6 +158,7 @@ void cmdline_parts_free(struct cmdline_parts **parts) *parts = next_parts; } } +EXPORT_SYMBOL(cmdline_parts_free); int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline) { @@ -206,6 +206,7 @@ fail: cmdline_parts_free(parts); goto done; } +EXPORT_SYMBOL(cmdline_parts_parse); struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts, const char *bdev) @@ -214,17 +215,17 @@ struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts, parts = parts->next_parts; return parts; } +EXPORT_SYMBOL(cmdline_parts_find); /* * add_part() * 0 success. * 1 can not add so many partitions. */ -void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size, - int slot, - int (*add_part)(int, struct cmdline_subpart *, void *), - void *param) - +int cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size, + int slot, + int (*add_part)(int, struct cmdline_subpart *, void *), + void *param) { sector_t from = 0; struct cmdline_subpart *subpart; @@ -247,4 +248,7 @@ void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size, if (add_part(slot, subpart, param)) break; } + + return slot; } +EXPORT_SYMBOL(cmdline_parts_set); -- cgit v1.1 From 17a05cca99d952f5b4766fa48a2703548966636a Mon Sep 17 00:00:00 2001 From: Christian Engelmayer Date: Sun, 19 Jan 2014 02:08:49 +0100 Subject: block: Fix memory leak in rw_copy_check_uvector() handling Fix a memory leak in the error handling path of function sg_io() that is used during the processing of scsi ioctl. Memory already allocated by rw_copy_check_uvector() needs to be freed correctly. Detected by Coverity: CID 1128953. Signed-off-by: Christian Engelmayer Signed-off-by: Jens Axboe --- block/scsi_ioctl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 625e3e4..2648797 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -323,12 +323,14 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, if (hdr->iovec_count) { size_t iov_data_len; - struct iovec *iov; + struct iovec *iov = NULL; ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count, 0, NULL, &iov); - if (ret < 0) + if (ret < 0) { + kfree(iov); goto out; + } iov_data_len = ret; ret = 0; -- cgit v1.1 From 6f6b5d1ec56acdeab0503d2b823f6f88a0af493e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 19 Jan 2014 08:26:37 +0000 Subject: percpu_ida: Make percpu_ida_alloc + callers accept task state bitmask This patch changes percpu_ida_alloc() + callers to accept task state bitmask for prepare_to_wait() for code like target/iscsi that needs it for interruptible sleep, that is provided in a subsequent patch. It now expects TASK_UNINTERRUPTIBLE when the caller is able to sleep waiting for a new tag, or TASK_RUNNING when the caller cannot sleep, and is forced to return a negative value when no tags are available. v2 changes: - Include blk-mq + tcm_fc + vhost/scsi + target/iscsi changes - Drop signal_pending_state() call v3 changes: - Only call prepare_to_wait() + finish_wait() when != TASK_RUNNING (PeterZ) Reported-by: Linus Torvalds Cc: Linus Torvalds Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Jens Axboe Signed-off-by: Kent Overstreet Cc: #3.12+ Signed-off-by: Nicholas Bellinger --- block/blk-mq-tag.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index d64a02f..5d70edc 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -36,7 +36,8 @@ static unsigned int __blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp) { int tag; - tag = percpu_ida_alloc(&tags->free_tags, gfp); + tag = percpu_ida_alloc(&tags->free_tags, (gfp & __GFP_WAIT) ? + TASK_UNINTERRUPTIBLE : TASK_RUNNING); if (tag < 0) return BLK_MQ_TAG_FAIL; return tag + tags->nr_reserved_tags; @@ -52,7 +53,8 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_tags *tags, return BLK_MQ_TAG_FAIL; } - tag = percpu_ida_alloc(&tags->reserved_tags, gfp); + tag = percpu_ida_alloc(&tags->reserved_tags, (gfp & __GFP_WAIT) ? + TASK_UNINTERRUPTIBLE : TASK_RUNNING); if (tag < 0) return BLK_MQ_TAG_FAIL; return tag; -- cgit v1.1 From 381d3ee33b9ccbe93404dbeae5be435922254f71 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 28 Jan 2014 09:52:01 -0700 Subject: block/blk-mq-cpu.c: use hotcpu_notifier() Cleaner, reduces text size when cpu hotplug is disabled. Cc: Christoph Hellwig Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- block/blk-mq-cpu.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'block') diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c index 20576e3..3146bef 100644 --- a/block/blk-mq-cpu.c +++ b/block/blk-mq-cpu.c @@ -28,10 +28,6 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata blk_mq_main_cpu_notifier = { - .notifier_call = blk_mq_main_cpu_notify, -}; - void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier) { BUG_ON(!notifier->notify); @@ -58,5 +54,5 @@ void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, void __init blk_mq_cpu_init(void) { - register_hotcpu_notifier(&blk_mq_main_cpu_notifier); + hotcpu_notifier(blk_mq_main_cpu_notify, 0); } -- cgit v1.1 From f0276924fa35a3607920a58cf5d878212824b951 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 31 Dec 2013 11:38:50 +0800 Subject: blk-mq: Don't reserve a tag for flush request Reserving a tag (request) for flush to avoid dead lock is a overkill. A tag is valuable resource. We can track the number of flush requests and disallow having too many pending flush requests allocated. With this patch, blk_mq_alloc_request_pinned() could do a busy nop (but not a dead loop) if too many pending requests are allocated and new flush request is allocated. But this should not be a problem, too many pending flush requests are very rare case. I verified this can fix the deadlock caused by too many pending flush requests. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-flush.c | 8 +++++--- block/blk-mq.c | 46 ++++++++++++++++++++++++++++++---------------- 2 files changed, 35 insertions(+), 19 deletions(-) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index 9288aaf..9143e85 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -284,9 +284,8 @@ static void mq_flush_work(struct work_struct *work) q = container_of(work, struct request_queue, mq_flush_work); - /* We don't need set REQ_FLUSH_SEQ, it's for consistency */ rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ, - __GFP_WAIT|GFP_ATOMIC, true); + __GFP_WAIT|GFP_ATOMIC, false); rq->cmd_type = REQ_TYPE_FS; rq->end_io = flush_end_io; @@ -408,8 +407,11 @@ void blk_insert_flush(struct request *rq) /* * @policy now records what operations need to be done. Adjust * REQ_FLUSH and FUA for the driver. + * We keep REQ_FLUSH for mq to track flush requests. For !FUA, + * we never dispatch the request directly. */ - rq->cmd_flags &= ~REQ_FLUSH; + if (rq->cmd_flags & REQ_FUA) + rq->cmd_flags &= ~REQ_FLUSH; if (!(fflags & REQ_FUA)) rq->cmd_flags &= ~REQ_FUA; diff --git a/block/blk-mq.c b/block/blk-mq.c index 57039fc..9072d0a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -194,9 +194,27 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, } static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, - gfp_t gfp, bool reserved) + gfp_t gfp, bool reserved, + int rw) { - return blk_mq_alloc_rq(hctx, gfp, reserved); + struct request *req; + bool is_flush = false; + /* + * flush need allocate a request, leave at least one request for + * non-flush IO to avoid deadlock + */ + if ((rw & REQ_FLUSH) && !(rw & REQ_FLUSH_SEQ)) { + if (atomic_inc_return(&hctx->pending_flush) >= + hctx->queue_depth - hctx->reserved_tags - 1) { + atomic_dec(&hctx->pending_flush); + return NULL; + } + is_flush = true; + } + req = blk_mq_alloc_rq(hctx, gfp, reserved); + if (!req && is_flush) + atomic_dec(&hctx->pending_flush); + return req; } static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, @@ -209,7 +227,7 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); - rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved); + rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved, rw); if (rq) { blk_mq_rq_ctx_init(q, ctx, rq, rw); break; @@ -272,6 +290,9 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, const int tag = rq->tag; struct request_queue *q = rq->q; + if ((rq->cmd_flags & REQ_FLUSH) && !(rq->cmd_flags & REQ_FLUSH_SEQ)) + atomic_dec(&hctx->pending_flush); + blk_mq_rq_init(hctx, rq); blk_mq_put_tag(hctx->tags, tag); @@ -900,14 +921,14 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) hctx = q->mq_ops->map_queue(q, ctx->cpu); trace_block_getrq(q, bio, rw); - rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false); + rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false, bio->bi_rw); if (likely(rq)) - blk_mq_rq_ctx_init(q, ctx, rq, rw); + blk_mq_rq_ctx_init(q, ctx, rq, bio->bi_rw); else { blk_mq_put_ctx(ctx); trace_block_sleeprq(q, bio, rw); - rq = blk_mq_alloc_request_pinned(q, rw, __GFP_WAIT|GFP_ATOMIC, - false); + rq = blk_mq_alloc_request_pinned(q, bio->bi_rw, + __GFP_WAIT|GFP_ATOMIC, false); ctx = rq->mq_ctx; hctx = q->mq_ops->map_queue(q, ctx->cpu); } @@ -1184,7 +1205,9 @@ static int blk_mq_init_hw_queues(struct request_queue *q, hctx->queue_num = i; hctx->flags = reg->flags; hctx->queue_depth = reg->queue_depth; + hctx->reserved_tags = reg->reserved_tags; hctx->cmd_size = reg->cmd_size; + atomic_set(&hctx->pending_flush, 0); blk_mq_init_cpu_notifier(&hctx->cpu_notifier, blk_mq_hctx_notify, hctx); @@ -1309,15 +1332,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, reg->queue_depth = BLK_MQ_MAX_DEPTH; } - /* - * Set aside a tag for flush requests. It will only be used while - * another flush request is in progress but outside the driver. - * - * TODO: only allocate if flushes are supported - */ - reg->queue_depth++; - reg->reserved_tags++; - if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) return ERR_PTR(-EINVAL); -- cgit v1.1 From 556ee818c06f37b2e583af0363e6b16d0e0270de Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 29 Jan 2014 14:56:16 -0700 Subject: block: __elv_next_request() shouldn't call into the elevator if bypassing request_queue bypassing is used to suppress higher-level function of a request_queue so that they can be switched, reconfigured and shut down. A request_queue does the followings while bypassing. * bypasses elevator and io_cq association and queues requests directly to the FIFO dispatch queue. * bypasses block cgroup request_list lookup and always uses the root request_list. Once confirmed to be bypassing, specific elevator and block cgroup policy implementations can assume that nothing is in flight for them and perform various operations which would be dangerous otherwise. Such confirmation is acheived by short-circuiting all new requests directly to the dispatch queue and waiting for all the requests which were issued before to finish. Unfortunately, while the request allocating and draining sides were properly handled, we forgot to actually plug the request dispatch path. Even after bypassing mode is confirmed, if the attached driver tries to fetch a request and the dispatch queue is empty, __elv_next_request() would invoke the current elevator's elevator_dispatch_fn() callback. As all in-flight requests were drained, the elevator wouldn't contain any request but once bypass is confirmed we don't even know whether the elevator is even there. It might be in the process of being switched and half torn down. Frank Mayhar reports that this actually happened while switching elevators, leading to an oops. Let's fix it by making __elv_next_request() avoid invoking the elevator_dispatch_fn() callback if the queue is bypassing. It already avoids invoking the callback if the queue is dying. As a dying queue is guaranteed to be bypassing, we can simply replace blk_queue_dying() check with blk_queue_bypass(). Reported-by: Frank Mayhar References: http://lkml.kernel.org/g/1390319905.20232.38.camel@bobble.lax.corp.google.com Cc: stable@vger.kernel.org Tested-by: Frank Mayhar Signed-off-by: Jens Axboe --- block/blk.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk.h b/block/blk.h index c90e1d8..d23b415 100644 --- a/block/blk.h +++ b/block/blk.h @@ -113,7 +113,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) q->flush_queue_delayed = 1; return NULL; } - if (unlikely(blk_queue_dying(q)) || + if (unlikely(blk_queue_bypass(q)) || !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) return NULL; } -- cgit v1.1 From 72a0a36e2854a6eadb4cf2561858f613f9cd4639 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 7 Feb 2014 10:22:36 -0800 Subject: blk-mq: support at_head inserations for blk_execute_rq This is neede for proper SG_IO operation as well as various uses of blk_execute_rq from the SCSI midlayer. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-exec.c | 2 +- block/blk-mq.c | 17 ++++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'block') diff --git a/block/blk-exec.c b/block/blk-exec.c index bbfc072..c68613b 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -65,7 +65,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, * be resued after dying flag is set */ if (q->mq_ops) { - blk_mq_insert_request(q, rq, true); + blk_mq_insert_request(q, rq, at_head, true); return; } diff --git a/block/blk-mq.c b/block/blk-mq.c index 9072d0a..c9306e3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -714,13 +714,16 @@ static void blk_mq_work_fn(struct work_struct *work) } static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, - struct request *rq) + struct request *rq, bool at_head) { struct blk_mq_ctx *ctx = rq->mq_ctx; trace_block_rq_insert(hctx->queue, rq); - list_add_tail(&rq->queuelist, &ctx->rq_list); + if (at_head) + list_add(&rq->queuelist, &ctx->rq_list); + else + list_add_tail(&rq->queuelist, &ctx->rq_list); blk_mq_hctx_mark_pending(hctx, ctx); /* @@ -730,7 +733,7 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, } void blk_mq_insert_request(struct request_queue *q, struct request *rq, - bool run_queue) + bool at_head, bool run_queue) { struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx, *current_ctx; @@ -749,7 +752,7 @@ void blk_mq_insert_request(struct request_queue *q, struct request *rq, rq->mq_ctx = ctx; } spin_lock(&ctx->lock); - __blk_mq_insert_request(hctx, rq); + __blk_mq_insert_request(hctx, rq, at_head); spin_unlock(&ctx->lock); blk_mq_put_ctx(current_ctx); @@ -781,7 +784,7 @@ void blk_mq_run_request(struct request *rq, bool run_queue, bool async) /* ctx->cpu might be offline */ spin_lock(&ctx->lock); - __blk_mq_insert_request(hctx, rq); + __blk_mq_insert_request(hctx, rq, false); spin_unlock(&ctx->lock); blk_mq_put_ctx(current_ctx); @@ -819,7 +822,7 @@ static void blk_mq_insert_requests(struct request_queue *q, rq = list_first_entry(list, struct request, queuelist); list_del_init(&rq->queuelist); rq->mq_ctx = ctx; - __blk_mq_insert_request(hctx, rq); + __blk_mq_insert_request(hctx, rq, false); } spin_unlock(&ctx->lock); @@ -971,7 +974,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) __blk_mq_free_request(hctx, ctx, rq); else { blk_mq_bio_to_request(rq, bio); - __blk_mq_insert_request(hctx, rq); + __blk_mq_insert_request(hctx, rq, false); } spin_unlock(&ctx->lock); -- cgit v1.1 From 6f5ba581c0d3ba0a76fe138123c1c2817ffcbeb1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 7 Feb 2014 10:22:37 -0800 Subject: blk-mq: divert __blk_put_request for MQ ops __blk_put_request needs to call into the blk-mq code just like blk_put_request. As we don't have the queue lock in this case both end up calling the same function. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index c00e0bd..06636f3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1278,6 +1278,11 @@ void __blk_put_request(struct request_queue *q, struct request *req) if (unlikely(!q)) return; + if (q->mq_ops) { + blk_mq_free_request(req); + return; + } + blk_pm_put_request(req); elv_completed_request(q, req); -- cgit v1.1 From 4f7f418c4835d3ce1b66d00502df41f324d13ec0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 7 Feb 2014 10:22:38 -0800 Subject: blk-mq: handle dma_drain_size Make blk-mq handle the dma_drain_size field the same way as the old request path. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index c9306e3..a99bea4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -582,6 +582,16 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) list_del_init(&rq->queuelist); blk_mq_start_request(rq); + if (q->dma_drain_size && blk_rq_bytes(rq)) { + /* + * make sure space for the drain appears we + * know we can do this because max_hw_segments + * has been adjusted to be one fewer than the + * device can handle + */ + rq->nr_phys_segments++; + } + /* * Last request in the series. Flag it as such, this * enables drivers to know when IO should be kicked off, -- cgit v1.1 From 1be036e9464032362def6b3c13f57bfceefe2dab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 7 Feb 2014 10:22:39 -0800 Subject: blk-mq: initialize sg_reserved_size To behave the same way as the old request path. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index a99bea4..f1e63c2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1387,6 +1387,8 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, q->mq_ops = reg->ops; q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; + q->sg_reserved_size = INT_MAX; + blk_queue_make_request(q, blk_mq_make_request); blk_queue_rq_timed_out(q, reg->ops->timeout); if (reg->timeout) -- cgit v1.1 From 14ec77f352cb00ab8425ec2af03bd7e529eefe24 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 7 Feb 2014 13:45:39 -0700 Subject: blk-mq: Add bio_integrity setup to blk_mq_make_request This patch adds the missing bio_integrity_enabled() + bio_integrity_prep() setup into blk_mq_make_request() in order to use DIF protection with scsi-mq. Cc: Martin K. Petersen Signed-off-by: Nicholas Bellinger Signed-off-by: Jens Axboe --- block/blk-mq.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index f1e63c2..cee9623 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -922,6 +922,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_queue_bounce(q, &bio); + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { + bio_endio(bio, -EIO); + return; + } + if (use_plug && blk_attempt_plug_merge(q, bio, &request_count)) return; -- cgit v1.1 From 5cb8850c9c4a7605f74f5c9c7ecadd0b02e87a25 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 7 Feb 2014 13:53:46 -0700 Subject: block: Explicitly handle discard/write same segments Immutable biovecs changed the way biovecs are interpreted - drivers no longer use bi_vcnt, they have to go by bi_iter.bi_size (to allow for using part of an existing segment without modifying it). This breaks with discards and write_same bios, since for those bi_size has nothing to do with segments in the biovec. So for now, we need a fairly gross hack - we fortunately know that there will never be more than one segment for the entire request, so we can special case discard/write_same. Signed-off-by: Kent Overstreet Tested-by: Hugh Dickins Signed-off-by: Jens Axboe --- block/blk-merge.c | 91 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 62 insertions(+), 29 deletions(-) (limited to 'block') diff --git a/block/blk-merge.c b/block/blk-merge.c index 8f8adaa..6c583f9 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -21,6 +21,16 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, if (!bio) return 0; + /* + * This should probably be returning 0, but blk_add_request_payload() + * (Christoph!!!!) + */ + if (bio->bi_rw & REQ_DISCARD) + return 1; + + if (bio->bi_rw & REQ_WRITE_SAME) + return 1; + fbio = bio; cluster = blk_queue_cluster(q); seg_size = 0; @@ -161,30 +171,60 @@ new_segment: *bvprv = *bvec; } -/* - * map a request to scatterlist, return number of sg entries setup. Caller - * must make sure sg can hold rq->nr_phys_segments entries - */ -int blk_rq_map_sg(struct request_queue *q, struct request *rq, - struct scatterlist *sglist) +static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, + struct scatterlist *sglist, + struct scatterlist **sg) { struct bio_vec bvec, bvprv = { NULL }; - struct req_iterator iter; - struct scatterlist *sg; + struct bvec_iter iter; int nsegs, cluster; nsegs = 0; cluster = blk_queue_cluster(q); - /* - * for each bio in rq - */ - sg = NULL; - rq_for_each_segment(bvec, rq, iter) { - __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, - &nsegs, &cluster); - } /* segments in rq */ + if (bio->bi_rw & REQ_DISCARD) { + /* + * This is a hack - drivers should be neither modifying the + * biovec, nor relying on bi_vcnt - but because of + * blk_add_request_payload(), a discard bio may or may not have + * a payload we need to set up here (thank you Christoph) and + * bi_vcnt is really the only way of telling if we need to. + */ + + if (bio->bi_vcnt) + goto single_segment; + + return 0; + } + + if (bio->bi_rw & REQ_WRITE_SAME) { +single_segment: + *sg = sglist; + bvec = bio_iovec(bio); + sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); + return 1; + } + + for_each_bio(bio) + bio_for_each_segment(bvec, bio, iter) + __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg, + &nsegs, &cluster); + return nsegs; +} + +/* + * map a request to scatterlist, return number of sg entries setup. Caller + * must make sure sg can hold rq->nr_phys_segments entries + */ +int blk_rq_map_sg(struct request_queue *q, struct request *rq, + struct scatterlist *sglist) +{ + struct scatterlist *sg = NULL; + int nsegs = 0; + + if (rq->bio) + nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); if (unlikely(rq->cmd_flags & REQ_COPY_USER) && (blk_rq_bytes(rq) & q->dma_pad_mask)) { @@ -230,20 +270,13 @@ EXPORT_SYMBOL(blk_rq_map_sg); int blk_bio_map_sg(struct request_queue *q, struct bio *bio, struct scatterlist *sglist) { - struct bio_vec bvec, bvprv = { NULL }; - struct scatterlist *sg; - int nsegs, cluster; - struct bvec_iter iter; - - nsegs = 0; - cluster = blk_queue_cluster(q); - - sg = NULL; - bio_for_each_segment(bvec, bio, iter) { - __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, - &nsegs, &cluster); - } /* segments in bio */ + struct scatterlist *sg = NULL; + int nsegs; + struct bio *next = bio->bi_next; + bio->bi_next = NULL; + nsegs = __blk_bios_map_sg(q, bio, sglist, &sg); + bio->bi_next = next; if (sg) sg_mark_end(sg); -- cgit v1.1 From 30a91cb4ef385fe1b260df204ef314d86fff2850 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Feb 2014 03:24:38 -0800 Subject: blk-mq: rework I/O completions Rework I/O completions to work more like the old code path. blk_mq_end_io now stays out of the business of deferring completions to others CPUs and calling blk_mark_rq_complete. The latter is very important to allow completing requests that have timed out and thus are already marked completed, the former allows using the IPI callout even for driver specific completions instead of having to reimplement them. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 52 +++++++++++++++++++++++++++++++--------------------- block/blk-mq.h | 3 +-- block/blk-timeout.c | 2 +- 3 files changed, 33 insertions(+), 24 deletions(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index cee9623..14c8f35 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -326,7 +326,7 @@ static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error) bio_endio(bio, error); } -void blk_mq_complete_request(struct request *rq, int error) +void blk_mq_end_io(struct request *rq, int error) { struct bio *bio = rq->bio; unsigned int bytes = 0; @@ -351,46 +351,53 @@ void blk_mq_complete_request(struct request *rq, int error) else blk_mq_free_request(rq); } +EXPORT_SYMBOL(blk_mq_end_io); -void __blk_mq_end_io(struct request *rq, int error) -{ - if (!blk_mark_rq_complete(rq)) - blk_mq_complete_request(rq, error); -} - -static void blk_mq_end_io_remote(void *data) +static void __blk_mq_complete_request_remote(void *data) { struct request *rq = data; - __blk_mq_end_io(rq, rq->errors); + rq->q->softirq_done_fn(rq); } -/* - * End IO on this request on a multiqueue enabled driver. We'll either do - * it directly inline, or punt to a local IPI handler on the matching - * remote CPU. - */ -void blk_mq_end_io(struct request *rq, int error) +void __blk_mq_complete_request(struct request *rq) { struct blk_mq_ctx *ctx = rq->mq_ctx; int cpu; - if (!ctx->ipi_redirect) - return __blk_mq_end_io(rq, error); + if (!ctx->ipi_redirect) { + rq->q->softirq_done_fn(rq); + return; + } cpu = get_cpu(); if (cpu != ctx->cpu && cpu_online(ctx->cpu)) { - rq->errors = error; - rq->csd.func = blk_mq_end_io_remote; + rq->csd.func = __blk_mq_complete_request_remote; rq->csd.info = rq; rq->csd.flags = 0; __smp_call_function_single(ctx->cpu, &rq->csd, 0); } else { - __blk_mq_end_io(rq, error); + rq->q->softirq_done_fn(rq); } put_cpu(); } -EXPORT_SYMBOL(blk_mq_end_io); + +/** + * blk_mq_complete_request - end I/O on a request + * @rq: the request being processed + * + * Description: + * Ends all I/O on a request. It does not handle partial completions. + * The actual completion happens out-of-order, through a IPI handler. + **/ +void blk_mq_complete_request(struct request *rq) +{ + if (unlikely(blk_should_fake_timeout(rq->q))) + return; + if (!blk_mark_rq_complete(rq)) + __blk_mq_complete_request(rq); +} +EXPORT_SYMBOL(blk_mq_complete_request); static void blk_mq_start_request(struct request *rq) { @@ -1399,6 +1406,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, if (reg->timeout) blk_queue_rq_timeout(q, reg->timeout); + if (reg->ops->complete) + blk_queue_softirq_done(q, reg->ops->complete); + blk_mq_init_flush(q); blk_mq_init_cpu_queues(q, reg->nr_hw_queues); diff --git a/block/blk-mq.h b/block/blk-mq.h index 5c39179..f29b645 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -22,8 +22,7 @@ struct blk_mq_ctx { struct kobject kobj; }; -void __blk_mq_end_io(struct request *rq, int error); -void blk_mq_complete_request(struct request *rq, int error); +void __blk_mq_complete_request(struct request *rq); void blk_mq_run_request(struct request *rq, bool run_queue, bool async); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_init_flush(struct request_queue *q); diff --git a/block/blk-timeout.c b/block/blk-timeout.c index bba81c9..d96f7061 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -91,7 +91,7 @@ static void blk_rq_timed_out(struct request *req) case BLK_EH_HANDLED: /* Can we use req->errors here? */ if (q->mq_ops) - blk_mq_complete_request(req, req->errors); + __blk_mq_complete_request(req); else __blk_complete_request(req); break; -- cgit v1.1 From 18741986a4b1dc4b1f171634c4191abc3b0fa023 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Feb 2014 09:29:00 -0700 Subject: blk-mq: rework flush sequencing logic Witch to using a preallocated flush_rq for blk-mq similar to what's done with the old request path. This allows us to set up the request properly with a tag from the actually allowed range and ->rq_disk as needed by some drivers. To make life easier we also switch to dynamic allocation of ->flush_rq for the old path. This effectively reverts most of "blk-mq: fix for flush deadlock" and "blk-mq: Don't reserve a tag for flush request" Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 15 ++++++-- block/blk-flush.c | 105 ++++++++++++++++++++---------------------------------- block/blk-mq.c | 54 ++++++++++------------------ block/blk-mq.h | 1 + block/blk-sysfs.c | 2 ++ 5 files changed, 72 insertions(+), 105 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 06636f3..853f927 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -693,11 +693,20 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) if (!uninit_q) return NULL; + uninit_q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL); + if (!uninit_q->flush_rq) + goto out_cleanup_queue; + q = blk_init_allocated_queue(uninit_q, rfn, lock); if (!q) - blk_cleanup_queue(uninit_q); - + goto out_free_flush_rq; return q; + +out_free_flush_rq: + kfree(uninit_q->flush_rq); +out_cleanup_queue: + blk_cleanup_queue(uninit_q); + return NULL; } EXPORT_SYMBOL(blk_init_queue_node); @@ -1127,7 +1136,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) { if (q->mq_ops) - return blk_mq_alloc_request(q, rw, gfp_mask, false); + return blk_mq_alloc_request(q, rw, gfp_mask); else return blk_old_get_request(q, rw, gfp_mask); } diff --git a/block/blk-flush.c b/block/blk-flush.c index 9143e85..66e2b69 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -130,20 +130,26 @@ static void blk_flush_restore_request(struct request *rq) blk_clear_rq_complete(rq); } -static void mq_flush_data_run(struct work_struct *work) +static void mq_flush_run(struct work_struct *work) { struct request *rq; - rq = container_of(work, struct request, mq_flush_data); + rq = container_of(work, struct request, mq_flush_work); memset(&rq->csd, 0, sizeof(rq->csd)); blk_mq_run_request(rq, true, false); } -static void blk_mq_flush_data_insert(struct request *rq) +static bool blk_flush_queue_rq(struct request *rq) { - INIT_WORK(&rq->mq_flush_data, mq_flush_data_run); - kblockd_schedule_work(rq->q, &rq->mq_flush_data); + if (rq->q->mq_ops) { + INIT_WORK(&rq->mq_flush_work, mq_flush_run); + kblockd_schedule_work(rq->q, &rq->mq_flush_work); + return false; + } else { + list_add_tail(&rq->queuelist, &rq->q->queue_head); + return true; + } } /** @@ -187,12 +193,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, case REQ_FSEQ_DATA: list_move_tail(&rq->flush.list, &q->flush_data_in_flight); - if (q->mq_ops) - blk_mq_flush_data_insert(rq); - else { - list_add(&rq->queuelist, &q->queue_head); - queued = true; - } + queued = blk_flush_queue_rq(rq); break; case REQ_FSEQ_DONE: @@ -216,9 +217,6 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, } kicked = blk_kick_flush(q); - /* blk_mq_run_flush will run queue */ - if (q->mq_ops) - return queued; return kicked | queued; } @@ -230,10 +228,9 @@ static void flush_end_io(struct request *flush_rq, int error) struct request *rq, *n; unsigned long flags = 0; - if (q->mq_ops) { - blk_mq_free_request(flush_rq); + if (q->mq_ops) spin_lock_irqsave(&q->mq_flush_lock, flags); - } + running = &q->flush_queue[q->flush_running_idx]; BUG_ON(q->flush_pending_idx == q->flush_running_idx); @@ -263,48 +260,14 @@ static void flush_end_io(struct request *flush_rq, int error) * kblockd. */ if (queued || q->flush_queue_delayed) { - if (!q->mq_ops) - blk_run_queue_async(q); - else - /* - * This can be optimized to only run queues with requests - * queued if necessary. - */ - blk_mq_run_queues(q, true); + WARN_ON(q->mq_ops); + blk_run_queue_async(q); } q->flush_queue_delayed = 0; if (q->mq_ops) spin_unlock_irqrestore(&q->mq_flush_lock, flags); } -static void mq_flush_work(struct work_struct *work) -{ - struct request_queue *q; - struct request *rq; - - q = container_of(work, struct request_queue, mq_flush_work); - - rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ, - __GFP_WAIT|GFP_ATOMIC, false); - rq->cmd_type = REQ_TYPE_FS; - rq->end_io = flush_end_io; - - blk_mq_run_request(rq, true, false); -} - -/* - * We can't directly use q->flush_rq, because it doesn't have tag and is not in - * hctx->rqs[]. so we must allocate a new request, since we can't sleep here, - * so offload the work to workqueue. - * - * Note: we assume a flush request finished in any hardware queue will flush - * the whole disk cache. - */ -static void mq_run_flush(struct request_queue *q) -{ - kblockd_schedule_work(q, &q->mq_flush_work); -} - /** * blk_kick_flush - consider issuing flush request * @q: request_queue being kicked @@ -339,19 +302,31 @@ static bool blk_kick_flush(struct request_queue *q) * different from running_idx, which means flush is in flight. */ q->flush_pending_idx ^= 1; + if (q->mq_ops) { - mq_run_flush(q); - return true; + struct blk_mq_ctx *ctx = first_rq->mq_ctx; + struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); + + blk_mq_rq_init(hctx, q->flush_rq); + q->flush_rq->mq_ctx = ctx; + + /* + * Reuse the tag value from the fist waiting request, + * with blk-mq the tag is generated during request + * allocation and drivers can rely on it being inside + * the range they asked for. + */ + q->flush_rq->tag = first_rq->tag; + } else { + blk_rq_init(q, q->flush_rq); } - blk_rq_init(q, &q->flush_rq); - q->flush_rq.cmd_type = REQ_TYPE_FS; - q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; - q->flush_rq.rq_disk = first_rq->rq_disk; - q->flush_rq.end_io = flush_end_io; + q->flush_rq->cmd_type = REQ_TYPE_FS; + q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; + q->flush_rq->rq_disk = first_rq->rq_disk; + q->flush_rq->end_io = flush_end_io; - list_add_tail(&q->flush_rq.queuelist, &q->queue_head); - return true; + return blk_flush_queue_rq(q->flush_rq); } static void flush_data_end_io(struct request *rq, int error) @@ -407,11 +382,8 @@ void blk_insert_flush(struct request *rq) /* * @policy now records what operations need to be done. Adjust * REQ_FLUSH and FUA for the driver. - * We keep REQ_FLUSH for mq to track flush requests. For !FUA, - * we never dispatch the request directly. */ - if (rq->cmd_flags & REQ_FUA) - rq->cmd_flags &= ~REQ_FLUSH; + rq->cmd_flags &= ~REQ_FLUSH; if (!(fflags & REQ_FUA)) rq->cmd_flags &= ~REQ_FUA; @@ -560,5 +532,4 @@ EXPORT_SYMBOL(blkdev_issue_flush); void blk_mq_init_flush(struct request_queue *q) { spin_lock_init(&q->mq_flush_lock); - INIT_WORK(&q->mq_flush_work, mq_flush_work); } diff --git a/block/blk-mq.c b/block/blk-mq.c index 14c8f35..a59b056 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -194,27 +194,9 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, } static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, - gfp_t gfp, bool reserved, - int rw) + gfp_t gfp, bool reserved) { - struct request *req; - bool is_flush = false; - /* - * flush need allocate a request, leave at least one request for - * non-flush IO to avoid deadlock - */ - if ((rw & REQ_FLUSH) && !(rw & REQ_FLUSH_SEQ)) { - if (atomic_inc_return(&hctx->pending_flush) >= - hctx->queue_depth - hctx->reserved_tags - 1) { - atomic_dec(&hctx->pending_flush); - return NULL; - } - is_flush = true; - } - req = blk_mq_alloc_rq(hctx, gfp, reserved); - if (!req && is_flush) - atomic_dec(&hctx->pending_flush); - return req; + return blk_mq_alloc_rq(hctx, gfp, reserved); } static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, @@ -227,7 +209,7 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); - rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved, rw); + rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved); if (rq) { blk_mq_rq_ctx_init(q, ctx, rq, rw); break; @@ -244,15 +226,14 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, return rq; } -struct request *blk_mq_alloc_request(struct request_queue *q, int rw, - gfp_t gfp, bool reserved) +struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp) { struct request *rq; if (blk_mq_queue_enter(q)) return NULL; - rq = blk_mq_alloc_request_pinned(q, rw, gfp, reserved); + rq = blk_mq_alloc_request_pinned(q, rw, gfp, false); if (rq) blk_mq_put_ctx(rq->mq_ctx); return rq; @@ -276,7 +257,7 @@ EXPORT_SYMBOL(blk_mq_alloc_reserved_request); /* * Re-init and set pdu, if we have it */ -static void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) +void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) { blk_rq_init(hctx->queue, rq); @@ -290,9 +271,6 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, const int tag = rq->tag; struct request_queue *q = rq->q; - if ((rq->cmd_flags & REQ_FLUSH) && !(rq->cmd_flags & REQ_FLUSH_SEQ)) - atomic_dec(&hctx->pending_flush); - blk_mq_rq_init(hctx, rq); blk_mq_put_tag(hctx->tags, tag); @@ -946,14 +924,14 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) hctx = q->mq_ops->map_queue(q, ctx->cpu); trace_block_getrq(q, bio, rw); - rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false, bio->bi_rw); + rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false); if (likely(rq)) - blk_mq_rq_ctx_init(q, ctx, rq, bio->bi_rw); + blk_mq_rq_ctx_init(q, ctx, rq, rw); else { blk_mq_put_ctx(ctx); trace_block_sleeprq(q, bio, rw); - rq = blk_mq_alloc_request_pinned(q, bio->bi_rw, - __GFP_WAIT|GFP_ATOMIC, false); + rq = blk_mq_alloc_request_pinned(q, rw, __GFP_WAIT|GFP_ATOMIC, + false); ctx = rq->mq_ctx; hctx = q->mq_ops->map_queue(q, ctx->cpu); } @@ -1230,9 +1208,7 @@ static int blk_mq_init_hw_queues(struct request_queue *q, hctx->queue_num = i; hctx->flags = reg->flags; hctx->queue_depth = reg->queue_depth; - hctx->reserved_tags = reg->reserved_tags; hctx->cmd_size = reg->cmd_size; - atomic_set(&hctx->pending_flush, 0); blk_mq_init_cpu_notifier(&hctx->cpu_notifier, blk_mq_hctx_notify, hctx); @@ -1412,9 +1388,14 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, blk_mq_init_flush(q); blk_mq_init_cpu_queues(q, reg->nr_hw_queues); - if (blk_mq_init_hw_queues(q, reg, driver_data)) + q->flush_rq = kzalloc(round_up(sizeof(struct request) + reg->cmd_size, + cache_line_size()), GFP_KERNEL); + if (!q->flush_rq) goto err_hw; + if (blk_mq_init_hw_queues(q, reg, driver_data)) + goto err_flush_rq; + blk_mq_map_swqueue(q); mutex_lock(&all_q_mutex); @@ -1422,6 +1403,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, mutex_unlock(&all_q_mutex); return q; + +err_flush_rq: + kfree(q->flush_rq); err_hw: kfree(q->mq_map); err_map: diff --git a/block/blk-mq.h b/block/blk-mq.h index f29b645..ed0035c 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -28,6 +28,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_init_flush(struct request_queue *q); void blk_mq_drain_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); +void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq); /* * CPU hotplug helpers diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 8095c4a..7500f87 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -549,6 +549,8 @@ static void blk_release_queue(struct kobject *kobj) if (q->mq_ops) blk_mq_free_queue(q); + kfree(q->flush_rq); + blk_trace_shutdown(q); bdi_destroy(&q->backing_dev_info); -- cgit v1.1 From 11c94444074f40b479a05f6657d935204e992f2e Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Mon, 10 Feb 2014 10:39:18 -0700 Subject: block: Fix type mismatch in ssize_t_blk_mq_tag_sysfs_show cppcheck detected following format string mismatch. [blk-mq-tag.c:201]: (warning) %u in format string (no. 1) requires 'unsigned int' but the argument type is 'int'. Change "cpu" from int to unsigned int, because the cpu never become minus value. Signed-off-by: Masanari Iida Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index d64a02f..4025050 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -182,7 +182,7 @@ void blk_mq_free_tags(struct blk_mq_tags *tags) ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) { char *orig_page = page; - int cpu; + unsigned int cpu; if (!tags) return 0; -- cgit v1.1 From 1e93b8c274268038c93763dca65a73b42a081e10 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Feb 2014 08:27:13 -0800 Subject: blk-mq: dont assume rq->errors is set when returning an error from ->queue_rq rq->errors never has been part of the communication protocol between drivers and the block stack and most drivers will not have initialized it. Return -EIO to upper layers when the driver returns BLK_MQ_RQ_QUEUE_ERROR unconditionally. If a driver want to return a different error it can easily do so by returning success after calling blk_mq_end_io itself. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index a59b056..0480710 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -605,8 +605,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) break; default: pr_err("blk-mq: bad return on queue: %d\n", ret); - rq->errors = -EIO; case BLK_MQ_RQ_QUEUE_ERROR: + rq->errors = -EIO; blk_mq_end_io(rq, rq->errors); break; } -- cgit v1.1 From 49f5baa5109897b8cee491e8a7c4d74052b6bc1e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Feb 2014 08:27:14 -0800 Subject: blk-mq: pair blk_mq_start_request / blk_mq_requeue_request Make sure we have a proper pairing between starting and requeueing requests. Move the dma drain and REQ_END setup into blk_mq_start_request, and make sure blk_mq_requeue_request properly undoes them, giving us a pair of function to prepare and unprepare a request without leaving side effects. Together this ensures we always clean up properly after BLK_MQ_RQ_QUEUE_BUSY returns from ->queue_rq. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 49 ++++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 23 deletions(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index 0480710..1fa9dd1 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -377,7 +377,7 @@ void blk_mq_complete_request(struct request *rq) } EXPORT_SYMBOL(blk_mq_complete_request); -static void blk_mq_start_request(struct request *rq) +static void blk_mq_start_request(struct request *rq, bool last) { struct request_queue *q = rq->q; @@ -390,6 +390,25 @@ static void blk_mq_start_request(struct request *rq) */ rq->deadline = jiffies + q->rq_timeout; set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); + + if (q->dma_drain_size && blk_rq_bytes(rq)) { + /* + * Make sure space for the drain appears. We know we can do + * this because max_hw_segments has been adjusted to be one + * fewer than the device can handle. + */ + rq->nr_phys_segments++; + } + + /* + * Flag the last request in the series so that drivers know when IO + * should be kicked off, if they don't do it on a per-request basis. + * + * Note: the flag isn't the only condition drivers should do kick off. + * If drive is busy, the last request might not have the bit set. + */ + if (last) + rq->cmd_flags |= REQ_END; } static void blk_mq_requeue_request(struct request *rq) @@ -398,6 +417,11 @@ static void blk_mq_requeue_request(struct request *rq) trace_block_rq_requeue(q, rq); clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); + + rq->cmd_flags &= ~REQ_END; + + if (q->dma_drain_size && blk_rq_bytes(rq)) + rq->nr_phys_segments--; } struct blk_mq_timeout_data { @@ -565,29 +589,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) rq = list_first_entry(&rq_list, struct request, queuelist); list_del_init(&rq->queuelist); - blk_mq_start_request(rq); - if (q->dma_drain_size && blk_rq_bytes(rq)) { - /* - * make sure space for the drain appears we - * know we can do this because max_hw_segments - * has been adjusted to be one fewer than the - * device can handle - */ - rq->nr_phys_segments++; - } - - /* - * Last request in the series. Flag it as such, this - * enables drivers to know when IO should be kicked off, - * if they don't do it on a per-request basis. - * - * Note: the flag isn't the only condition drivers - * should do kick off. If drive is busy, the last - * request might not have the bit set. - */ - if (list_empty(&rq_list)) - rq->cmd_flags |= REQ_END; + blk_mq_start_request(rq, list_empty(&rq_list)); ret = q->mq_ops->queue_rq(hctx, rq); switch (ret) { -- cgit v1.1 From c8123f8c9cb517403b51aa41c3c46ff5e10b2c17 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 12 Feb 2014 09:34:01 -0700 Subject: block: add cond_resched() to potentially long running ioctl discard loop When mkfs issues a full device discard and the device only supports discards of a smallish size, we can loop in blkdev_issue_discard() for a long time. If preempt isn't enabled, this can turn into a softlock situation and the kernel will start complaining. Add an explicit cond_resched() at the end of the loop to avoid that. Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/blk-lib.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'block') diff --git a/block/blk-lib.c b/block/blk-lib.c index 2da76c9..97a733c 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -119,6 +119,14 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, atomic_inc(&bb.done); submit_bio(type, bio); + + /* + * We can loop for a long time in here, if someone does + * full device discards (like mkfs). Be nice and allow + * us to schedule out to avoid softlocking if preempt + * is disabled. + */ + cond_resched(); } blk_finish_plug(&plug); -- cgit v1.1