From 288dab8a35a0bde426a09870943c8d3ee3a50dab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 9 Jun 2016 16:00:36 +0200 Subject: block: add a separate operation type for secure erase Instead of overloading the discard support with the REQ_SECURE flag. Use the opportunity to rename the queue flag as well, and remove the dead checks for this flag in the RAID 1 and RAID 10 drivers that don't claim support for secure erase. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 27 +++++++++++++++++---------- block/blk-lib.c | 25 ++++++++++++++----------- block/blk-merge.c | 6 ++---- 3 files changed, 33 insertions(+), 25 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 32a283e..db31a29 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1977,16 +1977,21 @@ generic_make_request_checks(struct bio *bio) } } - if ((bio_op(bio) == REQ_OP_DISCARD) && - (!blk_queue_discard(q) || - ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) { - err = -EOPNOTSUPP; - goto end_io; - } - - if (bio_op(bio) == REQ_OP_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) { - err = -EOPNOTSUPP; - goto end_io; + switch (bio_op(bio)) { + case REQ_OP_DISCARD: + if (!blk_queue_discard(q)) + goto not_supported; + break; + case REQ_OP_SECURE_ERASE: + if (!blk_queue_secure_erase(q)) + goto not_supported; + break; + case REQ_OP_WRITE_SAME: + if (!bdev_write_same(bio->bi_bdev)) + goto not_supported; + break; + default: + break; } /* @@ -2003,6 +2008,8 @@ generic_make_request_checks(struct bio *bio) trace_block_bio_queue(q, bio); return true; +not_supported: + err = -EOPNOTSUPP; end_io: bio->bi_error = err; bio_endio(bio); diff --git a/block/blk-lib.c b/block/blk-lib.c index ff2a7f0..78626c2 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -23,20 +23,27 @@ static struct bio *next_bio(struct bio *bio, unsigned int nr_pages, } int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, int op_flags, + sector_t nr_sects, gfp_t gfp_mask, int flags, struct bio **biop) { struct request_queue *q = bdev_get_queue(bdev); struct bio *bio = *biop; unsigned int granularity; + enum req_op op; int alignment; if (!q) return -ENXIO; - if (!blk_queue_discard(q)) - return -EOPNOTSUPP; - if ((op_flags & REQ_SECURE) && !blk_queue_secdiscard(q)) - return -EOPNOTSUPP; + + if (flags & BLKDEV_DISCARD_SECURE) { + if (!blk_queue_secure_erase(q)) + return -EOPNOTSUPP; + op = REQ_OP_SECURE_ERASE; + } else { + if (!blk_queue_discard(q)) + return -EOPNOTSUPP; + op = REQ_OP_DISCARD; + } /* Zero-sector (unknown) and one-sector granularities are the same. */ granularity = max(q->limits.discard_granularity >> 9, 1U); @@ -66,7 +73,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, bio = next_bio(bio, 1, gfp_mask); bio->bi_iter.bi_sector = sector; bio->bi_bdev = bdev; - bio_set_op_attrs(bio, REQ_OP_DISCARD, op_flags); + bio_set_op_attrs(bio, op, 0); bio->bi_iter.bi_size = req_sects << 9; nr_sects -= req_sects; @@ -100,16 +107,12 @@ EXPORT_SYMBOL(__blkdev_issue_discard); int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) { - int op_flags = 0; struct bio *bio = NULL; struct blk_plug plug; int ret; - if (flags & BLKDEV_DISCARD_SECURE) - op_flags |= REQ_SECURE; - blk_start_plug(&plug); - ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, op_flags, + ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags, &bio); if (!ret && bio) { ret = submit_bio_wait(bio); diff --git a/block/blk-merge.c b/block/blk-merge.c index c265348..9772308 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -649,8 +649,7 @@ static int attempt_merge(struct request_queue *q, struct request *req, if (!rq_mergeable(req) || !rq_mergeable(next)) return 0; - if (!blk_check_merge_flags(req->cmd_flags, req_op(req), next->cmd_flags, - req_op(next))) + if (req_op(req) != req_op(next)) return 0; /* @@ -752,8 +751,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (!rq_mergeable(rq) || !bio_mergeable(bio)) return false; - if (!blk_check_merge_flags(rq->cmd_flags, req_op(rq), bio->bi_rw, - bio_op(bio))) + if (req_op(rq) != bio_op(bio)) return false; /* different data direction or already started, don't merge */ -- cgit v1.1 From e63a46bef01ff3064f44dba145833284fb6adeec Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 15 Jun 2016 18:17:27 -0700 Subject: block: introduce device_add_disk() In preparation for removing the ->driverfs_dev member of a gendisk, add an api that takes the parent device as a parameter to add_disk(). For now this maintains the status quo of WARN()ing on failure, but not return a error code. Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Bart Van Assche Signed-off-by: Dan Williams --- block/genhd.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'block') diff --git a/block/genhd.c b/block/genhd.c index 9f42526..fb2d9ae 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -506,7 +506,7 @@ static int exact_lock(dev_t devt, void *data) return 0; } -static void register_disk(struct gendisk *disk) +static void register_disk(struct device *parent, struct gendisk *disk) { struct device *ddev = disk_to_dev(disk); struct block_device *bdev; @@ -514,7 +514,7 @@ static void register_disk(struct gendisk *disk) struct hd_struct *part; int err; - ddev->parent = disk->driverfs_dev; + ddev->parent = parent; dev_set_name(ddev, "%s", disk->disk_name); @@ -573,7 +573,8 @@ exit: } /** - * add_disk - add partitioning information to kernel list + * device_add_disk - add partitioning information to kernel list + * @parent: parent device for the disk * @disk: per-device partitioning information * * This function registers the partitioning information in @disk @@ -581,7 +582,7 @@ exit: * * FIXME: error handling */ -void add_disk(struct gendisk *disk) +void device_add_disk(struct device *parent, struct gendisk *disk) { struct backing_dev_info *bdi; dev_t devt; @@ -617,7 +618,11 @@ void add_disk(struct gendisk *disk) blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); - register_disk(disk); + + /* temporary while we convert usages to use disk_to_dev(disk)->parent */ + disk->driverfs_dev = parent; + + register_disk(parent, disk); blk_register_queue(disk); /* @@ -633,7 +638,7 @@ void add_disk(struct gendisk *disk) disk_add_events(disk); blk_integrity_add(disk); } -EXPORT_SYMBOL(add_disk); +EXPORT_SYMBOL(device_add_disk); void del_gendisk(struct gendisk *disk) { -- cgit v1.1 From 52c44d93c26f5a76068c0a8cc83bb8f56f38043d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 15 Jun 2016 19:43:07 -0700 Subject: block: remove ->driverfs_dev Now that all drivers that specify a ->driverfs_dev have been converted to device_add_disk(), the pointer can be removed from struct gendisk. Cc: Jens Axboe Cc: Ross Zwisler Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Dan Williams --- block/genhd.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'block') diff --git a/block/genhd.c b/block/genhd.c index fb2d9ae..a18d353 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -618,10 +618,6 @@ void device_add_disk(struct device *parent, struct gendisk *disk) blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); - - /* temporary while we convert usages to use disk_to_dev(disk)->parent */ - disk->driverfs_dev = parent; - register_disk(parent, disk); blk_register_queue(disk); @@ -804,10 +800,9 @@ void __init printk_all_partitions(void) , disk_name(disk, part->partno, name_buf), part->info ? part->info->uuid : ""); if (is_part0) { - if (disk->driverfs_dev != NULL && - disk->driverfs_dev->driver != NULL) + if (dev->parent && dev->parent->driver) printk(" driver: %s\n", - disk->driverfs_dev->driver->name); + dev->parent->driver->name); else printk(" (driver?)\n"); } else -- cgit v1.1 From 1f5bd336b9150560458b03460cbcfcfbcf8995b1 Mon Sep 17 00:00:00 2001 From: Ming Lin Date: Mon, 13 Jun 2016 16:45:21 +0200 Subject: blk-mq: add blk_mq_alloc_request_hctx For some protocols like NVMe over Fabrics we need to be able to send initialization commands to a specific queue. Based on an earlier patch from Christoph Hellwig . Signed-off-by: Ming Lin [hch: disallow sleeping allocation, req_op fixes] Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-mq.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index 13f4603..7aa60c4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -267,6 +267,45 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, } EXPORT_SYMBOL(blk_mq_alloc_request); +struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, + unsigned int flags, unsigned int hctx_idx) +{ + struct blk_mq_hw_ctx *hctx; + struct blk_mq_ctx *ctx; + struct request *rq; + struct blk_mq_alloc_data alloc_data; + int ret; + + /* + * If the tag allocator sleeps we could get an allocation for a + * different hardware context. No need to complicate the low level + * allocator for this for the rare use case of a command tied to + * a specific queue. + */ + if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT))) + return ERR_PTR(-EINVAL); + + if (hctx_idx >= q->nr_hw_queues) + return ERR_PTR(-EIO); + + ret = blk_queue_enter(q, true); + if (ret) + return ERR_PTR(ret); + + hctx = q->queue_hw_ctx[hctx_idx]; + ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask)); + + blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); + rq = __blk_mq_alloc_request(&alloc_data, rw, 0); + if (!rq) { + blk_queue_exit(q); + return ERR_PTR(-EWOULDBLOCK); + } + + return rq; +} +EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); + static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct request *rq) { -- cgit v1.1 From 9645c1a2336bb92751a04454e7565c09c9a06f3c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 21 Jun 2016 18:04:19 +0200 Subject: block: Export blk_poll The new NVMe over fabrics target will make use of this outside from a module. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Reviewed-by: Steve Wise Signed-off-by: Jens Axboe --- block/blk-core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index db31a29..dd32563 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -3385,6 +3385,7 @@ bool blk_poll(struct request_queue *q, blk_qc_t cookie) return false; } +EXPORT_SYMBOL_GPL(blk_poll); #ifdef CONFIG_PM /** -- cgit v1.1 From 486cf9899e311838b6ab95d19ff87c4da44d6508 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 6 Jul 2016 21:55:48 +0900 Subject: blk-mq: Introduce blk_mq_reinit_tagset The new nvme-rdma driver will need to reinitialize all the tags as part of the error recovery procedure (realloc the tag memory region). Add a helper in blk-mq for it that can iterate over all requests in a tagset to make this easier. Signed-off-by: Sagi Grimberg Tested-by: Ming Lin Reviewed-by: Stephen Bates Signed-off-by: Christoph Hellwig Reviewed-by: Steve Wise Tested-by: Steve Wise Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'block') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 56a0c37..729bac3 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -485,6 +485,32 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, } EXPORT_SYMBOL(blk_mq_tagset_busy_iter); +int blk_mq_reinit_tagset(struct blk_mq_tag_set *set) +{ + int i, j, ret = 0; + + if (!set->ops->reinit_request) + goto out; + + for (i = 0; i < set->nr_hw_queues; i++) { + struct blk_mq_tags *tags = set->tags[i]; + + for (j = 0; j < tags->nr_tags; j++) { + if (!tags->rqs[j]) + continue; + + ret = set->ops->reinit_request(set->driver_data, + tags->rqs[j]); + if (ret) + goto out; + } + } + +out: + return ret; +} +EXPORT_SYMBOL_GPL(blk_mq_reinit_tagset); + void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void *priv) { -- cgit v1.1 From e950fdf71c9b4a6b63b58fed78956a96cc907402 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jul 2016 11:23:33 +0200 Subject: block: introduce BLKDEV_DISCARD_ZERO to fix zeroout Currently blkdev_issue_zeroout cascades down from discards (if the driver guarantees that discards zero data), to WRITE SAME and then to a loop writing zeroes. Unfortunately we ignore run-time EOPNOTSUPP errors in the block layer blkdev_issue_discard helper to work around DM volumes that may have mixed discard support underneath. This patch intoroduces a new BLKDEV_DISCARD_ZERO flag to blkdev_issue_discard that indicates we are called for zeroing operation. This allows both to ignore the EOPNOTSUPP hack and actually consolidating the discard_zeroes_data check into the function. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-lib.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'block') diff --git a/block/blk-lib.c b/block/blk-lib.c index 78626c2..45b35b1 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -36,12 +36,17 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, return -ENXIO; if (flags & BLKDEV_DISCARD_SECURE) { + if (flags & BLKDEV_DISCARD_ZERO) + return -EOPNOTSUPP; if (!blk_queue_secure_erase(q)) return -EOPNOTSUPP; op = REQ_OP_SECURE_ERASE; } else { if (!blk_queue_discard(q)) return -EOPNOTSUPP; + if ((flags & BLKDEV_DISCARD_ZERO) && + !q->limits.discard_zeroes_data) + return -EOPNOTSUPP; op = REQ_OP_DISCARD; } @@ -116,7 +121,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, &bio); if (!ret && bio) { ret = submit_bio_wait(bio); - if (ret == -EOPNOTSUPP) + if (ret == -EOPNOTSUPP && !(flags & BLKDEV_DISCARD_ZERO)) ret = 0; } blk_finish_plug(&plug); @@ -241,11 +246,11 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, bool discard) { - struct request_queue *q = bdev_get_queue(bdev); - - if (discard && blk_queue_discard(q) && q->limits.discard_zeroes_data && - blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0) == 0) - return 0; + if (discard) { + if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, + BLKDEV_DISCARD_ZERO)) + return 0; + } if (bdev_write_same(bdev) && blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, -- cgit v1.1 From 3f40bf2c89223192535a72a4d49ce7d68d8ed9dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jul 2016 11:23:34 +0200 Subject: block: don't ignore -EOPNOTSUPP blkdev_issue_write_same WRITE SAME is a data integrity operation and we can't simply ignore errors. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-lib.c b/block/blk-lib.c index 45b35b1..e371f83 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -178,7 +178,7 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, if (bio) ret = submit_bio_wait(bio); - return ret != -EOPNOTSUPP ? ret : 0; + return ret; } EXPORT_SYMBOL(blkdev_issue_write_same); -- cgit v1.1 From ed996a52c868b62c4e5bf529cb4ccb44bcfa2f8e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jul 2016 11:28:42 +0200 Subject: block: simplify and cleanup bvec pool handling Instead of a flag and an index just make sure an index of 0 means no need to free the bvec array. Also move the constants related to the bvec pools together and use a consistent naming scheme for them. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Mike Christie Signed-off-by: Jens Axboe --- block/bio-integrity.c | 9 ++++----- block/bio.c | 32 ++++++++++++++++++-------------- 2 files changed, 22 insertions(+), 19 deletions(-) (limited to 'block') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 711e4d8d..11633fc 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -53,7 +53,6 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, { struct bio_integrity_payload *bip; struct bio_set *bs = bio->bi_pool; - unsigned long idx = BIO_POOL_NONE; unsigned inline_vecs; if (!bs || !bs->bio_integrity_pool) { @@ -71,17 +70,19 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, memset(bip, 0, sizeof(*bip)); if (nr_vecs > inline_vecs) { + unsigned long idx = 0; + bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx, bs->bvec_integrity_pool); if (!bip->bip_vec) goto err; bip->bip_max_vcnt = bvec_nr_vecs(idx); + bip->bip_slab = idx; } else { bip->bip_vec = bip->bip_inline_vecs; bip->bip_max_vcnt = inline_vecs; } - bip->bip_slab = idx; bip->bip_bio = bio; bio->bi_integrity = bip; bio->bi_rw |= REQ_INTEGRITY; @@ -110,9 +111,7 @@ void bio_integrity_free(struct bio *bio) bip->bip_vec->bv_offset); if (bs && bs->bio_integrity_pool) { - if (bip->bip_slab != BIO_POOL_NONE) - bvec_free(bs->bvec_integrity_pool, bip->bip_vec, - bip->bip_slab); + bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab); mempool_free(bip, bs->bio_integrity_pool); } else { diff --git a/block/bio.c b/block/bio.c index 848cd35..882b50a 100644 --- a/block/bio.c +++ b/block/bio.c @@ -43,7 +43,7 @@ * unsigned short */ #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } -static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { +static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = { BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), }; #undef BV @@ -160,11 +160,15 @@ unsigned int bvec_nr_vecs(unsigned short idx) void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx) { - BIO_BUG_ON(idx >= BIOVEC_NR_POOLS); + if (!idx) + return; + idx--; + + BIO_BUG_ON(idx >= BVEC_POOL_NR); - if (idx == BIOVEC_MAX_IDX) + if (idx == BVEC_POOL_MAX) { mempool_free(bv, pool); - else { + } else { struct biovec_slab *bvs = bvec_slabs + idx; kmem_cache_free(bvs->slab, bv); @@ -206,7 +210,7 @@ struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx, * idx now points to the pool we want to allocate from. only the * 1-vec entry pool is mempool backed. */ - if (*idx == BIOVEC_MAX_IDX) { + if (*idx == BVEC_POOL_MAX) { fallback: bvl = mempool_alloc(pool, gfp_mask); } else { @@ -226,11 +230,12 @@ fallback: */ bvl = kmem_cache_alloc(bvs->slab, __gfp_mask); if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) { - *idx = BIOVEC_MAX_IDX; + *idx = BVEC_POOL_MAX; goto fallback; } } + (*idx)++; return bvl; } @@ -250,8 +255,7 @@ static void bio_free(struct bio *bio) __bio_free(bio); if (bs) { - if (bio_flagged(bio, BIO_OWNS_VEC)) - bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio)); + bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio)); /* * If we have front padding, adjust the bio pointer before freeing @@ -420,7 +424,6 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) gfp_t saved_gfp = gfp_mask; unsigned front_pad; unsigned inline_vecs; - unsigned long idx = BIO_POOL_NONE; struct bio_vec *bvl = NULL; struct bio *bio; void *p; @@ -480,6 +483,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) bio_init(bio); if (nr_iovecs > inline_vecs) { + unsigned long idx = 0; + bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); if (!bvl && gfp_mask != saved_gfp) { punt_bios_to_rescuer(bs); @@ -490,13 +495,12 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) if (unlikely(!bvl)) goto err_free; - bio_set_flag(bio, BIO_OWNS_VEC); + bio->bi_flags |= idx << BVEC_POOL_OFFSET; } else if (nr_iovecs) { bvl = bio->bi_inline_vecs; } bio->bi_pool = bs; - bio->bi_flags |= idx << BIO_POOL_OFFSET; bio->bi_max_vecs = nr_iovecs; bio->bi_io_vec = bvl; return bio; @@ -568,7 +572,7 @@ EXPORT_SYMBOL(bio_phys_segments); */ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) { - BUG_ON(bio->bi_pool && BIO_POOL_IDX(bio) != BIO_POOL_NONE); + BUG_ON(bio->bi_pool && BVEC_POOL_IDX(bio)); /* * most users will be overriding ->bi_bdev with a new target, @@ -1832,7 +1836,7 @@ EXPORT_SYMBOL_GPL(bio_trim); */ mempool_t *biovec_create_pool(int pool_entries) { - struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; + struct biovec_slab *bp = bvec_slabs + BVEC_POOL_MAX; return mempool_create_slab_pool(pool_entries, bp->slab); } @@ -2009,7 +2013,7 @@ static void __init biovec_init_slabs(void) { int i; - for (i = 0; i < BIOVEC_NR_POOLS; i++) { + for (i = 0; i < BVEC_POOL_NR; i++) { int size; struct biovec_slab *bvs = bvec_slabs + i; -- cgit v1.1 From 0c4de0f33b0a86a426c0c3958cd40d8c82ede8d2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jul 2016 11:31:50 +0200 Subject: block: ensure bios return from blk_get_request are properly initialized blk_get_request is used for BLOCK_PC and similar passthrough requests. Currently we always need to call blk_rq_set_block_pc or an open coded version of it to allow appending bios using the request mapping helpers later on, which is a somewhat awkward API. Instead move the initialization part of blk_rq_set_block_pc into blk_get_request, so that we always have a safe to use request. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 12 +++++++----- block/blk-mq.c | 4 ++++ 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index dd32563..4d87949 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1294,10 +1294,15 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, spin_lock_irq(q->queue_lock); rq = get_request(q, rw, 0, NULL, gfp_mask); - if (IS_ERR(rq)) + if (IS_ERR(rq)) { spin_unlock_irq(q->queue_lock); - /* q->queue_lock is unlocked at this point */ + return rq; + } + /* q->queue_lock is unlocked at this point */ + rq->__data_len = 0; + rq->__sector = (sector_t) -1; + rq->bio = rq->biotail = NULL; return rq; } @@ -1377,9 +1382,6 @@ EXPORT_SYMBOL(blk_make_request); void blk_rq_set_block_pc(struct request *rq) { rq->cmd_type = REQ_TYPE_BLOCK_PC; - rq->__data_len = 0; - rq->__sector = (sector_t) -1; - rq->bio = rq->biotail = NULL; memset(rq->__cmd, 0, sizeof(rq->__cmd)); } EXPORT_SYMBOL(blk_rq_set_block_pc); diff --git a/block/blk-mq.c b/block/blk-mq.c index 7aa60c4..8f4fac8 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -263,6 +263,10 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, blk_queue_exit(q); return ERR_PTR(-EWOULDBLOCK); } + + rq->__data_len = 0; + rq->__sector = (sector_t) -1; + rq->bio = rq->biotail = NULL; return rq; } EXPORT_SYMBOL(blk_mq_alloc_request); -- cgit v1.1 From 98d61d5b1a65a9df7cb3d9605f5d37d3dbbb4b5e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jul 2016 11:31:51 +0200 Subject: block: simplify and export blk_rq_append_bio The target SCSI passthrough backend is much better served with the low-level blk_rq_append_bio construct then the helpers built on top of it, so export it. Also use the opportunity to remove the pointless request_queue argument and make the code flow a little more readable. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-map.c | 25 +++++++++++++++---------- block/blk.h | 2 -- 3 files changed, 16 insertions(+), 13 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 4d87949..a223018 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1363,7 +1363,7 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio, int ret; blk_queue_bounce(q, &bounce_bio); - ret = blk_rq_append_bio(q, rq, bounce_bio); + ret = blk_rq_append_bio(rq, bounce_bio); if (unlikely(ret)) { blk_put_request(rq); return ERR_PTR(ret); diff --git a/block/blk-map.c b/block/blk-map.c index 61733a6..b8657fa 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -9,21 +9,26 @@ #include "blk.h" -int blk_rq_append_bio(struct request_queue *q, struct request *rq, - struct bio *bio) +/* + * Append a bio to a passthrough request. Only works can be merged into + * the request based on the driver constraints. + */ +int blk_rq_append_bio(struct request *rq, struct bio *bio) { - if (!rq->bio) - blk_rq_bio_prep(q, rq, bio); - else if (!ll_back_merge_fn(q, rq, bio)) - return -EINVAL; - else { + if (!rq->bio) { + blk_rq_bio_prep(rq->q, rq, bio); + } else { + if (!ll_back_merge_fn(rq->q, rq, bio)) + return -EINVAL; + rq->biotail->bi_next = bio; rq->biotail = bio; - rq->__data_len += bio->bi_iter.bi_size; } + return 0; } +EXPORT_SYMBOL(blk_rq_append_bio); static int __blk_rq_unmap_user(struct bio *bio) { @@ -71,7 +76,7 @@ static int __blk_rq_map_user_iov(struct request *rq, */ bio_get(bio); - ret = blk_rq_append_bio(q, rq, bio); + ret = blk_rq_append_bio(rq, bio); if (ret) { bio_endio(bio); __blk_rq_unmap_user(orig_bio); @@ -229,7 +234,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (do_copy) rq->cmd_flags |= REQ_COPY_USER; - ret = blk_rq_append_bio(q, rq, bio); + ret = blk_rq_append_bio(rq, bio); if (unlikely(ret)) { /* request is too big */ bio_put(bio); diff --git a/block/blk.h b/block/blk.h index 70e4aee..c37492f 100644 --- a/block/blk.h +++ b/block/blk.h @@ -64,8 +64,6 @@ void blk_exit_rl(struct request_list *rl); void init_request_from_bio(struct request *req, struct bio *bio); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); -int blk_rq_append_bio(struct request_queue *q, struct request *rq, - struct bio *bio); void blk_queue_bypass_start(struct request_queue *q); void blk_queue_bypass_end(struct request_queue *q); void blk_dequeue_request(struct request *rq); -- cgit v1.1 From 4613c5f1df92f3cb5a8f89c7dfefc37402c16bd8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jul 2016 11:31:53 +0200 Subject: scsi/osd: open code blk_make_request I wish the OSD code could simply use blk_rq_map_* helpers like everyone else, but the complex nature of deciding if we have DATA IN and/or DATA OUT buffers might make this impossible (at least for a mere human like me). But using blk_rq_append_bio at least allows sharing the setup code between request with or without dat a buffers, and given that this is the last user of blk_make_request it allows getting rid of that somewhat awkward interface. Signed-off-by: Christoph Hellwig Acked-by: Boaz Harrosh Signed-off-by: Jens Axboe --- block/blk-core.c | 57 -------------------------------------------------------- 1 file changed, 57 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index a223018..91b339f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1318,63 +1318,6 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) EXPORT_SYMBOL(blk_get_request); /** - * blk_make_request - given a bio, allocate a corresponding struct request. - * @q: target request queue - * @bio: The bio describing the memory mappings that will be submitted for IO. - * It may be a chained-bio properly constructed by block/bio layer. - * @gfp_mask: gfp flags to be used for memory allocation - * - * blk_make_request is the parallel of generic_make_request for BLOCK_PC - * type commands. Where the struct request needs to be farther initialized by - * the caller. It is passed a &struct bio, which describes the memory info of - * the I/O transfer. - * - * The caller of blk_make_request must make sure that bi_io_vec - * are set to describe the memory buffers. That bio_data_dir() will return - * the needed direction of the request. (And all bio's in the passed bio-chain - * are properly set accordingly) - * - * If called under none-sleepable conditions, mapped bio buffers must not - * need bouncing, by calling the appropriate masked or flagged allocator, - * suitable for the target device. Otherwise the call to blk_queue_bounce will - * BUG. - * - * WARNING: When allocating/cloning a bio-chain, careful consideration should be - * given to how you allocate bios. In particular, you cannot use - * __GFP_DIRECT_RECLAIM for anything but the first bio in the chain. Otherwise - * you risk waiting for IO completion of a bio that hasn't been submitted yet, - * thus resulting in a deadlock. Alternatively bios should be allocated using - * bio_kmalloc() instead of bio_alloc(), as that avoids the mempool deadlock. - * If possible a big IO should be split into smaller parts when allocation - * fails. Partial allocation should not be an error, or you risk a live-lock. - */ -struct request *blk_make_request(struct request_queue *q, struct bio *bio, - gfp_t gfp_mask) -{ - struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); - - if (IS_ERR(rq)) - return rq; - - blk_rq_set_block_pc(rq); - - for_each_bio(bio) { - struct bio *bounce_bio = bio; - int ret; - - blk_queue_bounce(q, &bounce_bio); - ret = blk_rq_append_bio(rq, bounce_bio); - if (unlikely(ret)) { - blk_put_request(rq); - return ERR_PTR(ret); - } - } - - return rq; -} -EXPORT_SYMBOL(blk_make_request); - -/** * blk_rq_set_block_pc - initialize a request to type BLOCK_PC * @rq: request to be initialized * -- cgit v1.1 From f9cc4472c963e64493261d834155974c48c0ff88 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jul 2016 11:31:54 +0200 Subject: block: unexport various bio mapping helpers They are unused and potential new users really should use the blk_rq_map* versions. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'block') diff --git a/block/bio.c b/block/bio.c index 882b50a..54ee384 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1101,7 +1101,6 @@ int bio_uncopy_user(struct bio *bio) bio_put(bio); return ret; } -EXPORT_SYMBOL(bio_uncopy_user); /** * bio_copy_user_iov - copy user data to bio @@ -1396,7 +1395,6 @@ void bio_unmap_user(struct bio *bio) __bio_unmap_user(bio); bio_put(bio); } -EXPORT_SYMBOL(bio_unmap_user); static void bio_map_kern_endio(struct bio *bio) { @@ -1542,7 +1540,6 @@ cleanup: bio_put(bio); return ERR_PTR(-ENOMEM); } -EXPORT_SYMBOL(bio_copy_kern); /* * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions -- cgit v1.1