From c4cf5261f8bffd9de132b50660a69148e7575bd6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 17 Apr 2015 16:15:18 -0600 Subject: bio: skip atomic inc/dec of ->bi_remaining for non-chains Struct bio has an atomic ref count for chained bio's, and we use this to know when to end IO on the bio. However, most bio's are not chained, so we don't need to always introduce this atomic operation as part of ending IO. Add a helper to elevate the bi_remaining count, and flag the bio as now actually needing the decrement at end_io time. Rename the field to __bi_remaining to catch any current users of this doing the incrementing manually. For high IOPS workloads, this reduces the overhead of bio_endio() substantially. Tested-by: Robert Elliott Acked-by: Kent Overstreet Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/bio.h | 11 +++++++++++ include/linux/blk_types.h | 3 ++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index da3a127..8bfe9ee 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -645,6 +645,17 @@ static inline struct bio *bio_list_get(struct bio_list *bl) } /* + * Increment chain count for the bio. Make sure the CHAIN flag update + * is visible before the raised count. + */ +static inline void bio_inc_remaining(struct bio *bio) +{ + bio->bi_flags |= (1 << BIO_CHAIN); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_remaining); +} + +/* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. * These memory pools in turn all allocate from the bio_slab diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a1b25e3..8b07e06 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -65,7 +65,7 @@ struct bio { unsigned int bi_seg_front_size; unsigned int bi_seg_back_size; - atomic_t bi_remaining; + atomic_t __bi_remaining; bio_end_io_t *bi_end_io; @@ -122,6 +122,7 @@ struct bio { #define BIO_NULL_MAPPED 8 /* contains invalid user pages */ #define BIO_QUIET 9 /* Make BIO Quiet */ #define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ +#define BIO_CHAIN 11 /* chained bio, ->bi_remaining in effect */ /* * Flags starting here get preserved by bio_reset() - this includes -- cgit v1.1 From dac56212e8127dbc0bff7be35c508bc280213309 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 17 Apr 2015 16:23:59 -0600 Subject: bio: skip atomic inc/dec of ->bi_cnt for most use cases Struct bio has a reference count that controls when it can be freed. Most uses cases is allocating the bio, which then returns with a single reference to it, doing IO, and then dropping that single reference. We can remove this atomic_dec_and_test() in the completion path, if nobody else is holding a reference to the bio. If someone does call bio_get() on the bio, then we flag the bio as now having valid count and that we must properly honor the reference count when it's being put. Tested-by: Robert Elliott Signed-off-by: Jens Axboe --- include/linux/bio.h | 16 +++++++++++++++- include/linux/blk_types.h | 3 ++- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 8bfe9ee..7486ea1 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -290,7 +290,21 @@ static inline unsigned bio_segments(struct bio *bio) * returns. and then bio would be freed memory when if (bio->bi_flags ...) * runs */ -#define bio_get(bio) atomic_inc(&(bio)->bi_cnt) +static inline void bio_get(struct bio *bio) +{ + bio->bi_flags |= (1 << BIO_REFFED); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_cnt); +} + +static inline void bio_cnt_set(struct bio *bio, unsigned int count) +{ + if (count != 1) { + bio->bi_flags |= (1 << BIO_REFFED); + smp_mb__before_atomic(); + } + atomic_set(&bio->__bi_cnt, count); +} enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 8b07e06..93d2e71 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -92,7 +92,7 @@ struct bio { unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ - atomic_t bi_cnt; /* pin count */ + atomic_t __bi_cnt; /* pin count */ struct bio_vec *bi_io_vec; /* the actual vec list */ @@ -123,6 +123,7 @@ struct bio { #define BIO_QUIET 9 /* Make BIO Quiet */ #define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ #define BIO_CHAIN 11 /* chained bio, ->bi_remaining in effect */ +#define BIO_REFFED 12 /* bio has elevated ->bi_cnt */ /* * Flags starting here get preserved by bio_reset() - this includes -- cgit v1.1 From 4f8c9510ba71bb54477841bebb90154ef140860f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Apr 2015 22:37:16 +0200 Subject: block: rename REQ_TYPE_SPECIAL to REQ_TYPE_DRV_PRIV Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7f9a516..98c9027 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -79,10 +79,10 @@ enum rq_cmd_type_bits { REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ - REQ_TYPE_SPECIAL, /* driver defined type */ + REQ_TYPE_DRV_PRIV, /* driver defined type */ /* * for ATA/ATAPI devices. this really doesn't belong here, ide should - * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver + * use REQ_TYPE_DRV_PRIV and use rq->cmd[0] with the range of driver * private REQ_LB opcodes to differentiate what type of request this is */ REQ_TYPE_ATA_TASKFILE, -- cgit v1.1 From b42171ef7d938a66fa52e66a3d911ed63770b5ca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Apr 2015 22:37:17 +0200 Subject: block: move REQ_TYPE_ATA_TASKFILE and REQ_TYPE_ATA_PC to ide.h These values are only used by the IDE driver, so move them into it by allowing drivers to take cmd_type values after the first private one. Note that we have to turn cmd_type into a plain unsigned integer so that gcc doesn't complain about mismatching enum types. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 11 ++--------- include/linux/ide.h | 7 +++++++ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 98c9027..9cb4d80 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -79,14 +79,7 @@ enum rq_cmd_type_bits { REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ - REQ_TYPE_DRV_PRIV, /* driver defined type */ - /* - * for ATA/ATAPI devices. this really doesn't belong here, ide should - * use REQ_TYPE_DRV_PRIV and use rq->cmd[0] with the range of driver - * private REQ_LB opcodes to differentiate what type of request this is - */ - REQ_TYPE_ATA_TASKFILE, - REQ_TYPE_ATA_PC, + REQ_TYPE_DRV_PRIV, /* driver defined types from here */ }; #define BLK_MAX_CDB 16 @@ -108,7 +101,7 @@ struct request { struct blk_mq_ctx *mq_ctx; u64 cmd_flags; - enum rq_cmd_type_bits cmd_type; + unsigned cmd_type; unsigned long atomic_flags; int cpu; diff --git a/include/linux/ide.h b/include/linux/ide.h index 93b5ca7..62ac399 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -39,6 +39,12 @@ struct device; +/* IDE-specific values for req->cmd_type */ +enum ata_cmd_type_bits { + REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1, + REQ_TYPE_ATA_PC, +}; + /* Error codes returned in rq->errors to the higher part of the driver. */ enum { IDE_DRV_ERROR_GENERAL = 101, @@ -1551,4 +1557,5 @@ static inline void ide_set_drivedata(ide_drive_t *drive, void *data) #define ide_host_for_each_port(i, port, host) \ for ((i) = 0; ((port) = (host)->ports[i]) || (i) < MAX_HOST_PORTS; (i)++) + #endif /* _IDE_H */ -- cgit v1.1 From b0b93b48a30e809240ddd7449a6ad60a5ddf7b4d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Apr 2015 22:37:18 +0200 Subject: block: move REQ_TYPE_SENSE to the ide driver Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - include/linux/ide.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9cb4d80..6076b9e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -75,7 +75,6 @@ struct request_list { enum rq_cmd_type_bits { REQ_TYPE_FS = 1, /* fs request */ REQ_TYPE_BLOCK_PC, /* scsi command */ - REQ_TYPE_SENSE, /* sense request */ REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ diff --git a/include/linux/ide.h b/include/linux/ide.h index 62ac399..9856b7d 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -43,6 +43,7 @@ struct device; enum ata_cmd_type_bits { REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1, REQ_TYPE_ATA_PC, + REQ_TYPE_ATA_SENSE, /* sense request */ }; /* Error codes returned in rq->errors to the higher part of the driver. */ -- cgit v1.1 From ac7cdff00a33d48d27217560fa3b16d802e5f535 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Apr 2015 22:37:19 +0200 Subject: block: remove REQ_TYPE_PM_SHUTDOWN Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6076b9e..c2829ba 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -77,7 +77,6 @@ enum rq_cmd_type_bits { REQ_TYPE_BLOCK_PC, /* scsi command */ REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ - REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ REQ_TYPE_DRV_PRIV, /* driver defined types from here */ }; -- cgit v1.1 From a7928c1578c550bd6f4dec62d65132e6db226c57 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Apr 2015 22:37:20 +0200 Subject: block: move PM request support to IDE This removes the request types and hacks from the block code and into the old IDE driver. There is a small amunt of code duplication due to this, but it's not too bad. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 21 +-------------------- include/linux/ide.h | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c2829ba..2da818a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -30,7 +30,6 @@ struct scsi_ioctl_command; struct request_queue; struct elevator_queue; -struct request_pm_state; struct blk_trace; struct request; struct sg_io_hdr; @@ -75,8 +74,6 @@ struct request_list { enum rq_cmd_type_bits { REQ_TYPE_FS = 1, /* fs request */ REQ_TYPE_BLOCK_PC, /* scsi command */ - REQ_TYPE_PM_SUSPEND, /* suspend request */ - REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_DRV_PRIV, /* driver defined types from here */ }; @@ -207,19 +204,6 @@ static inline unsigned short req_get_ioprio(struct request *req) return req->ioprio; } -/* - * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME - * requests. Some step values could eventually be made generic. - */ -struct request_pm_state -{ - /* PM state machine step value, currently driver specific */ - int pm_step; - /* requested PM state value (S1, S2, S3, S4, ...) */ - u32 pm_state; - void* data; /* for driver use */ -}; - #include struct blk_queue_ctx; @@ -601,10 +585,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) (((rq)->cmd_flags & REQ_STARTED) && \ ((rq)->cmd_type == REQ_TYPE_FS)) -#define blk_pm_request(rq) \ - ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ - (rq)->cmd_type == REQ_TYPE_PM_RESUME) - #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) /* rq->queuelist of dequeued request must be list_empty() */ @@ -838,6 +818,7 @@ extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); extern void __blk_run_queue(struct request_queue *q); +extern void __blk_run_queue_uncond(struct request_queue *q); extern void blk_run_queue(struct request_queue *); extern void blk_run_queue_async(struct request_queue *q); extern int blk_rq_map_user(struct request_queue *, struct request *, diff --git a/include/linux/ide.h b/include/linux/ide.h index 9856b7d..a633898 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -44,8 +44,14 @@ enum ata_cmd_type_bits { REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1, REQ_TYPE_ATA_PC, REQ_TYPE_ATA_SENSE, /* sense request */ + REQ_TYPE_ATA_PM_SUSPEND,/* suspend request */ + REQ_TYPE_ATA_PM_RESUME, /* resume request */ }; +#define ata_pm_request(rq) \ + ((rq)->cmd_type == REQ_TYPE_ATA_PM_SUSPEND || \ + (rq)->cmd_type == REQ_TYPE_ATA_PM_RESUME) + /* Error codes returned in rq->errors to the higher part of the driver. */ enum { IDE_DRV_ERROR_GENERAL = 101, @@ -1321,6 +1327,19 @@ struct ide_port_info { u8 udma_mask; }; +/* + * State information carried for REQ_TYPE_ATA_PM_SUSPEND and REQ_TYPE_ATA_PM_RESUME + * requests. + */ +struct ide_pm_state { + /* PM state machine step value, currently driver specific */ + int pm_step; + /* requested PM state value (S1, S2, S3, S4, ...) */ + u32 pm_state; + void* data; /* for driver use */ +}; + + int ide_pci_init_one(struct pci_dev *, const struct ide_port_info *, void *); int ide_pci_init_two(struct pci_dev *, struct pci_dev *, const struct ide_port_info *, void *); -- cgit v1.1 From 4ecd4fef3a074c8bb43c391a57742c422469ebbd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 May 2015 09:38:13 +0200 Subject: block: use an atomic_t for mq_freeze_depth lockdep gets unhappy about the not disabling irqs when using the queue_lock around it. Instead of trying to fix that up just switch to an atomic_t and get rid of the lock. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2da818a..bc91795 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -444,7 +444,7 @@ struct request_queue { struct mutex sysfs_lock; int bypass_depth; - int mq_freeze_depth; + atomic_t mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; -- cgit v1.1 From b25de9d6da49b1a8760a89672283128aa8c78345 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Apr 2015 21:41:01 +0200 Subject: block: remove BIO_EOPNOTSUPP Since the big barrier rewrite/removal in 2007 we never fail FLUSH or FUA requests, which means we can remove the magic BIO_EOPNOTSUPP flag to help propagating those to the buffer_head layer. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Moyer Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 93d2e71..daf9591 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -118,7 +118,6 @@ struct bio { #define BIO_CLONED 4 /* doesn't own data */ #define BIO_BOUNCED 5 /* bio is a bounce bio */ #define BIO_USER_MAPPED 6 /* contains user pages */ -#define BIO_EOPNOTSUPP 7 /* not supported */ #define BIO_NULL_MAPPED 8 /* contains invalid user pages */ #define BIO_QUIET 9 /* Make BIO Quiet */ #define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ -- cgit v1.1 From 97ca223c3b37ed12a5b67a5dc6247e5a4799d337 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Apr 2015 21:41:02 +0200 Subject: block: remove unused BIO_RW_BLOCK and BIO_EOF flags Signed-off-by: Christoph Hellwig Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index daf9591..09c7a2c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -112,8 +112,6 @@ struct bio { * bio flags */ #define BIO_UPTODATE 0 /* ok after I/O completion */ -#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ -#define BIO_EOF 2 /* out-out-bounds error */ #define BIO_SEG_VALID 3 /* bi_phys_segments valid */ #define BIO_CLONED 4 /* doesn't own data */ #define BIO_BOUNCED 5 /* bio is a bounce bio */ -- cgit v1.1 From b2dbe0a60f1bcf4db5c701f1577b3135c3159eb5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 19 May 2015 09:18:28 -0600 Subject: block: collapse bio bit space Various previous patches removed bits and left holes, collapse them all. Leave the reset start bit where it is, we don't need to change that. Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 09c7a2c..3f4ded0 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -112,15 +112,15 @@ struct bio { * bio flags */ #define BIO_UPTODATE 0 /* ok after I/O completion */ -#define BIO_SEG_VALID 3 /* bi_phys_segments valid */ -#define BIO_CLONED 4 /* doesn't own data */ -#define BIO_BOUNCED 5 /* bio is a bounce bio */ -#define BIO_USER_MAPPED 6 /* contains user pages */ -#define BIO_NULL_MAPPED 8 /* contains invalid user pages */ -#define BIO_QUIET 9 /* Make BIO Quiet */ -#define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ -#define BIO_CHAIN 11 /* chained bio, ->bi_remaining in effect */ -#define BIO_REFFED 12 /* bio has elevated ->bi_cnt */ +#define BIO_SEG_VALID 1 /* bi_phys_segments valid */ +#define BIO_CLONED 2 /* doesn't own data */ +#define BIO_BOUNCED 3 /* bio is a bounce bio */ +#define BIO_USER_MAPPED 4 /* contains user pages */ +#define BIO_NULL_MAPPED 5 /* contains invalid user pages */ +#define BIO_QUIET 6 /* Make BIO Quiet */ +#define BIO_SNAP_STABLE 7 /* bio data must be snapshotted during write */ +#define BIO_CHAIN 8 /* chained bio, ->bi_remaining in effect */ +#define BIO_REFFED 9 /* bio has elevated ->bi_cnt */ /* * Flags starting here get preserved by bio_reset() - this includes -- cgit v1.1 From 343df3c79c62b644ce6ff5dff96c9e0be1ecb242 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 May 2015 09:23:23 +0200 Subject: suspend: simplify block I/O handling Stop abusing struct page functionality and the swap end_io handler, and instead add a modified version of the blk-lib.c bio_batch helpers. Also move the block I/O code into swap.c as they are directly tied into each other. Signed-off-by: Christoph Hellwig Tested-by: Pavel Machek Tested-by: Ming Lin Acked-by: Pavel Machek Acked-by: Rafael J. Wysocki Signed-off-by: Jens Axboe --- include/linux/swap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index cee108c..3887472 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -377,7 +377,6 @@ extern void end_swap_bio_write(struct bio *bio, int err); extern int __swap_writepage(struct page *page, struct writeback_control *wbc, void (*end_write_func)(struct bio *, int)); extern int swap_set_page_dirty(struct page *page); -extern void end_swap_bio_read(struct bio *bio, int err); int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block); -- cgit v1.1 From be32417796c2b8a83fe4cbece83bea96ab9e378f Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Wed, 6 May 2015 12:26:22 +0800 Subject: block: export blkdev_reread_part() and __blkdev_reread_part() This patch exports blkdev_reread_part() for block drivers, also introduce __blkdev_reread_part(). For some drivers, such as loop, reread of partitions can be run from the release path, and bd_mutex may already be held prior to calling ioctl_by_bdev(bdev, BLKRRPART, 0), so introduce __blkdev_reread_part for use in such cases. CC: Christoph Hellwig CC: Jens Axboe CC: Tejun Heo CC: Alexander Viro CC: Markus Pargmann CC: Stefan Weinhuber CC: Stefan Haberland CC: Sebastian Ott CC: Fabian Frederick CC: Ming Lei CC: David Herrmann CC: Andrew Morton CC: Peter Zijlstra CC: nbd-general@lists.sourceforge.net CC: linux-s390@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Jarod Wilson Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 35ec87e..1ef6390 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2279,6 +2279,9 @@ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); extern void blkdev_put(struct block_device *bdev, fmode_t mode); +extern int __blkdev_reread_part(struct block_device *bdev); +extern int blkdev_reread_part(struct block_device *bdev); + #ifdef CONFIG_SYSFS extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); extern void bd_unlink_disk_holder(struct block_device *bdev, -- cgit v1.1 From 326e1dbb57368087a36607aaebe9795b8d5453e5 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 22 May 2015 09:14:03 -0400 Subject: block: remove management of bi_remaining when restoring original bi_end_io Commit c4cf5261 ("bio: skip atomic inc/dec of ->bi_remaining for non-chains") regressed all existing callers that followed this pattern: 1) saving a bio's original bi_end_io 2) wiring up an intermediate bi_end_io 3) restoring the original bi_end_io from intermediate bi_end_io 4) calling bio_endio() to execute the restored original bi_end_io The regression was due to BIO_CHAIN only ever getting set if bio_inc_remaining() is called. For the above pattern it isn't set until step 3 above (step 2 would've needed to establish BIO_CHAIN). As such the first bio_endio(), in step 2 above, never decremented __bi_remaining before calling the intermediate bi_end_io -- leaving __bi_remaining with the value 1 instead of 0. When bio_inc_remaining() occurred during step 3 it brought it to a value of 2. When the second bio_endio() was called, in step 4 above, it should've called the original bi_end_io but it didn't because there was an extra reference that wasn't dropped (due to atomic operations being optimized away since BIO_CHAIN wasn't set upfront). Fix this issue by removing the __bi_remaining management complexity for all callers that use the above pattern -- bio_chain() is the only interface that _needs_ to be concerned with __bi_remaining. For the above pattern callers just expect the bi_end_io they set to get called! Remove bio_endio_nodec() and also remove all bio_inc_remaining() calls that aren't associated with the bio_chain() interface. Also, the bio_inc_remaining() interface has been moved local to bio.c. Fixes: c4cf5261 ("bio: skip atomic inc/dec of ->bi_remaining for non-chains") Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/bio.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 7486ea1..f0291cf 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -427,7 +427,6 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) } extern void bio_endio(struct bio *, int); -extern void bio_endio_nodec(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); @@ -659,17 +658,6 @@ static inline struct bio *bio_list_get(struct bio_list *bl) } /* - * Increment chain count for the bio. Make sure the CHAIN flag update - * is visible before the raised count. - */ -static inline void bio_inc_remaining(struct bio *bio) -{ - bio->bi_flags |= (1 << BIO_CHAIN); - smp_mb__before_atomic(); - atomic_inc(&bio->__bi_remaining); -} - -/* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. * These memory pools in turn all allocate from the bio_slab -- cgit v1.1 From 5f1b670d0bef508a5554d92525f5f6d00d640b38 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 09:14:04 -0400 Subject: block, dm: don't copy bios for request clones Currently dm-multipath has to clone the bios for every request sent to the lower devices, which wastes cpu cycles and ties down memory. This patch instead adds a new REQ_CLONE flag that instructs req_bio_endio to not complete bios attached to a request, which we set on clone requests similar to bios in a flush sequence. With this change I/O errors on a path failure only get propagated to dm-multipath, which can then either resubmit the I/O or complete the bios on the original request. I've done some basic testing of this on a Linux target with ALUA support, and it survives path failures during I/O nicely. Signed-off-by: Christoph Hellwig Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 ++ include/linux/blkdev.h | 6 +----- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 3f4ded0..45a6be8 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -192,6 +192,7 @@ enum rq_flag_bits { __REQ_HASHED, /* on IO scheduler merge hash */ __REQ_MQ_INFLIGHT, /* track inflight for MQ */ __REQ_NO_TIMEOUT, /* requests may never expire */ + __REQ_CLONE, /* cloned bios */ __REQ_NR_BITS, /* stops here */ }; @@ -246,5 +247,6 @@ enum rq_flag_bits { #define REQ_HASHED (1ULL << __REQ_HASHED) #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) #define REQ_NO_TIMEOUT (1ULL << __REQ_NO_TIMEOUT) +#define REQ_CLONE (1ULL << __REQ_CLONE) #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bc91795..9ded80d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -775,11 +775,7 @@ extern void blk_add_request_payload(struct request *rq, struct page *page, unsigned int len); extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); extern int blk_lld_busy(struct request_queue *q); -extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, - struct bio_set *bs, gfp_t gfp_mask, - int (*bio_ctr)(struct bio *, struct bio *, void *), - void *data); -extern void blk_rq_unprep_clone(struct request *rq); +extern void blk_rq_prep_clone(struct request *rq, struct request *rq_src); extern int blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern void blk_delay_queue(struct request_queue *, unsigned long); -- cgit v1.1