From dc5ff2b1d66f21c27a4c37236636dff6946437e4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 26 Jul 2016 11:38:20 +0200 Subject: writeback: Write dirty times for WB_SYNC_ALL writeback Currently we take care to handle I_DIRTY_TIME in vfs_fsync() and queue_io() so that inodes which have only dirty timestamps are properly written on fsync(2) and sync(2). However there are other call sites - most notably going through write_inode_now() - which expect inode to be clean after WB_SYNC_ALL writeback. This is not currently true as we do not clear I_DIRTY_TIME in __writeback_single_inode() even for WB_SYNC_ALL writeback in all the cases. This then resulted in the following oops because bdev_write_inode() did not clean the inode and writeback code later stumbled over a dirty inode with detached wb. general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN Modules linked in: CPU: 3 PID: 32 Comm: kworker/u10:1 Not tainted 4.6.0-rc3+ #349 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: writeback wb_workfn (flush-11:0) task: ffff88006ccf1840 ti: ffff88006cda8000 task.ti: ffff88006cda8000 RIP: 0010:[] [] locked_inode_to_wb_and_lock_list+0xa2/0x750 RSP: 0018:ffff88006cdaf7d0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88006ccf2050 RDX: 0000000000000000 RSI: 000000114c8a8484 RDI: 0000000000000286 RBP: ffff88006cdaf820 R08: ffff88006ccf1840 R09: 0000000000000000 R10: 000229915090805f R11: 0000000000000001 R12: ffff88006a72f5e0 R13: dffffc0000000000 R14: ffffed000d4e5eed R15: ffffffff8830cf40 FS: 0000000000000000(0000) GS:ffff88006d500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000003301bf8 CR3: 000000006368f000 CR4: 00000000000006e0 DR0: 0000000000001ec9 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 Stack: ffff88006a72f680 ffff88006a72f768 ffff8800671230d8 03ff88006cdaf948 ffff88006a72f668 ffff88006a72f5e0 ffff8800671230d8 ffff88006cdaf948 ffff880065b90cc8 ffff880067123100 ffff88006cdaf970 ffffffff8188e12e Call Trace: [< inline >] inode_to_wb_and_lock_list fs/fs-writeback.c:309 [] writeback_sb_inodes+0x4de/0x1250 fs/fs-writeback.c:1554 [] __writeback_inodes_wb+0x104/0x1e0 fs/fs-writeback.c:1600 [] wb_writeback+0x7ce/0xc90 fs/fs-writeback.c:1709 [< inline >] wb_do_writeback fs/fs-writeback.c:1844 [] wb_workfn+0x2f9/0x1000 fs/fs-writeback.c:1884 [] process_one_work+0x78e/0x15c0 kernel/workqueue.c:2094 [] worker_thread+0xdb/0xfc0 kernel/workqueue.c:2228 [] kthread+0x23f/0x2d0 drivers/block/aoe/aoecmd.c:1303 [] ret_from_fork+0x22/0x50 arch/x86/entry/entry_64.S:392 Code: 05 94 4a a8 06 85 c0 0f 85 03 03 00 00 e8 07 15 d0 ff 41 80 3e 00 0f 85 64 06 00 00 49 8b 9c 24 88 01 00 00 48 89 d8 48 c1 e8 03 <42> 80 3c 28 00 0f 85 17 06 00 00 48 8b 03 48 83 c0 50 48 39 c3 RIP [< inline >] wb_get include/linux/backing-dev-defs.h:212 RIP [] locked_inode_to_wb_and_lock_list+0xa2/0x750 fs/fs-writeback.c:281 RSP ---[ end trace 986a4d314dcb2694 ]--- Fix the problem by making sure __writeback_single_inode() writes inode only with dirty times in WB_SYNC_ALL mode. Reported-by: Dmitry Vyukov Tested-by: Laurent Dufour Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 56c8fda..4d09d44 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1327,6 +1327,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) dirty = inode->i_state & I_DIRTY; if (inode->i_state & I_DIRTY_TIME) { if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || + wbc->sync_mode == WB_SYNC_ALL || unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) || unlikely(time_after(jiffies, (inode->dirtied_time_when + -- cgit v1.1 From 20bd723ec6a3261df5e02250cd3a1fbb09a343f2 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 27 Jul 2016 07:22:05 +0200 Subject: block: add missing group association in bio-cloning functions When a bio is cloned, the newly created bio must be associated with the same blkcg as the original bio (if BLK_CGROUP is enabled). If this operation is not performed, then the new bio is not associated with any group, and the group of the current task is returned when the group of the bio is requested. Depending on the cloning frequency, this may cause a large percentage of the bios belonging to a given group to be treated as if belonging to other groups (in most cases as if belonging to the root group). The expected group isolation may thereby be broken. This commit adds the missing association in bio-cloning functions. Fixes: da2f0f74cf7d ("Btrfs: add support for blkio controllers") Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Paolo Valente Reviewed-by: Nikolay Borisov Reviewed-by: Jeff Moyer Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/btrfs/extent_io.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index cee4cb9..5850d79 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2697,12 +2697,6 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) btrfs_bio->csum = NULL; btrfs_bio->csum_allocated = NULL; btrfs_bio->end_io = NULL; - -#ifdef CONFIG_BLK_CGROUP - /* FIXME, put this into bio_clone_bioset */ - if (bio->bi_css) - bio_associate_blkcg(new, bio->bi_css); -#endif } return new; } -- cgit v1.1 From 1aee6b9a7d947d42ed84baa4cf461e9d943b80f0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 27 Jul 2016 12:52:21 -0600 Subject: f2fs: drop bio->bi_rw manual assignment Merge 4fc29c1aa375 included this extra line, but it's not needed (or useful) since we'll bio_set_op_attrs() right after to properly set the op and flags for the bio. Signed-off-by: Jens Axboe --- fs/f2fs/data.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e262427..d64d2a5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -245,7 +245,6 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio_put(bio); return -EFAULT; } - bio->bi_rw = fio->op_flags; bio_set_op_attrs(bio, fio->op, fio->op_flags); __submit_bio(fio->sbi, bio, fio->type); -- cgit v1.1 From b571bc606e714e448b00920987d77b384a6a9570 Mon Sep 17 00:00:00 2001 From: Shaun Tancheff Date: Sat, 30 Jul 2016 16:45:48 -0500 Subject: Fixup direct bi_rw modifiers bi_rw should be using bio_set_op_attrs to set bi_rw. Signed-off-by: Shaun Tancheff Cc: Chris Mason Cc: Josef Bacik Cc: David Sterba Cc: Mike Christie Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5850d79..881eb46 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2049,7 +2049,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, return -EIO; } bio->bi_bdev = dev->bdev; - bio->bi_rw = WRITE_SYNC; + bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC); bio_add_page(bio, page, length, pg_offset); if (btrfsic_submit_bio_wait(bio)) { -- cgit v1.1 From abf545484d31b68777a85c5c8f5b4bcde08283eb Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 4 Aug 2016 14:23:34 -0600 Subject: mm/block: convert rw_page users to bio op use The rw_page users were not converted to use bio/req ops. As a result bdev_write_page is not passing down REQ_OP_WRITE and the IOs will be sent down as reads. Signed-off-by: Mike Christie Fixes: 4e1b2d52a80d ("block, fs, drivers: remove REQ_OP compat defs and related code") Modified by me to: 1) Drop op_flags passing into ->rw_page(), as we don't use it. 2) Make op_is_write() and friends safe to use for !CONFIG_BLOCK Signed-off-by: Jens Axboe --- fs/block_dev.c | 7 ++++--- fs/mpage.c | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 2033a3f..d402899 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -416,7 +416,8 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, result = blk_queue_enter(bdev->bd_queue, false); if (result) return result; - result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ); + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, + REQ_OP_READ); blk_queue_exit(bdev->bd_queue); return result; } @@ -445,7 +446,6 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, struct page *page, struct writeback_control *wbc) { int result; - int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE; const struct block_device_operations *ops = bdev->bd_disk->fops; if (!ops->rw_page || bdev_get_integrity(bdev)) @@ -455,7 +455,8 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, return result; set_page_writeback(page); - result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw); + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, + REQ_OP_WRITE); if (result) end_page_writeback(page); else diff --git a/fs/mpage.c b/fs/mpage.c index 2ca1f39..7a09c55 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -50,7 +50,7 @@ static void mpage_end_io(struct bio *bio) bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; - page_endio(page, bio_data_dir(bio), bio->bi_error); + page_endio(page, bio_op(bio), bio->bi_error); } bio_put(bio); -- cgit v1.1