From c404e0dc2c843b154f9a36c3aec10d0a715d88eb Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 30 Jan 2014 16:46:55 +0800 Subject: Btrfs: fix use-after-free in the finishing procedure of the device replace During device replace test, we hit a null pointer deference (It was very easy to reproduce it by running xfstests' btrfs/011 on the devices with the virtio scsi driver). There were two bugs that caused this problem: - We might allocate new chunks on the replaced device after we updated the mapping tree. And we forgot to replace the source device in those mapping of the new chunks. - We might get the mapping information which including the source device before the mapping information update. And then submit the bio which was based on that mapping information after we freed the source device. For the first bug, we can fix it by doing mapping tree update and source device remove in the same context of the chunk mutex. The chunk mutex is used to protect the allocable device list, the above method can avoid the new chunk allocation, and after we remove the source device, all the new chunks will be allocated on the new device. So it can fix the first bug. For the second bug, we need make sure all flighting bios are finished and no new bios are produced during we are removing the source device. To fix this problem, we introduced a global @bio_counter, we not only inc/dec @bio_counter outsize of map_blocks, but also inc it before submitting bio and dec @bio_counter when ending bios. Since Raid56 is a little different and device replace dosen't support raid56 yet, it is not addressed in the patch and I add comments to make sure we will fix it in the future. Reported-by: Qu Wenruo Signed-off-by: Wang Shilong Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/volumes.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b68afe32..07629e9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5263,6 +5263,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, static void btrfs_end_bio(struct bio *bio, int err) { struct btrfs_bio *bbio = bio->bi_private; + struct btrfs_device *dev = bbio->stripes[0].dev; int is_orig_bio = 0; if (err) { @@ -5270,7 +5271,6 @@ static void btrfs_end_bio(struct bio *bio, int err) if (err == -EIO || err == -EREMOTEIO) { unsigned int stripe_index = btrfs_io_bio(bio)->stripe_index; - struct btrfs_device *dev; BUG_ON(stripe_index >= bbio->num_stripes); dev = bbio->stripes[stripe_index].dev; @@ -5292,6 +5292,8 @@ static void btrfs_end_bio(struct bio *bio, int err) if (bio == bbio->orig_bio) is_orig_bio = 1; + btrfs_bio_counter_dec(bbio->fs_info); + if (atomic_dec_and_test(&bbio->stripes_pending)) { if (!is_orig_bio) { bio_put(bio); @@ -5440,6 +5442,9 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, } #endif bio->bi_bdev = dev->bdev; + + btrfs_bio_counter_inc_noblocked(root->fs_info); + if (async) btrfs_schedule_bio(root, dev, rw, bio); else @@ -5508,28 +5513,38 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, length = bio->bi_size; map_length = length; + btrfs_bio_counter_inc_blocked(root->fs_info); ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, mirror_num, &raid_map); - if (ret) /* -ENOMEM */ + if (ret) { + btrfs_bio_counter_dec(root->fs_info); return ret; + } total_devs = bbio->num_stripes; bbio->orig_bio = first_bio; bbio->private = first_bio->bi_private; bbio->end_io = first_bio->bi_end_io; + bbio->fs_info = root->fs_info; atomic_set(&bbio->stripes_pending, bbio->num_stripes); if (raid_map) { /* In this case, map_length has been set to the length of a single stripe; not the whole write */ if (rw & WRITE) { - return raid56_parity_write(root, bio, bbio, - raid_map, map_length); + ret = raid56_parity_write(root, bio, bbio, + raid_map, map_length); } else { - return raid56_parity_recover(root, bio, bbio, - raid_map, map_length, - mirror_num); + ret = raid56_parity_recover(root, bio, bbio, + raid_map, map_length, + mirror_num); } + /* + * FIXME, replace dosen't support raid56 yet, please fix + * it in the future. + */ + btrfs_bio_counter_dec(root->fs_info); + return ret; } if (map_length < length) { @@ -5571,6 +5586,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, async_submit); dev_nr++; } + btrfs_bio_counter_dec(root->fs_info); return 0; } -- cgit v1.1 From f5961d41d7575faa6e2905daa08650aa388ba9d0 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:02 +0800 Subject: btrfs: Cleanup the unused struct async_sched. The struct async_sched is not used by any codes and can be removed. Signed-off-by: Qu Wenruo Reviewed-by: Josef Bacik Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/volumes.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 07629e9..82a63b1 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5323,13 +5323,6 @@ static void btrfs_end_bio(struct bio *bio, int err) } } -struct async_sched { - struct bio *bio; - int rw; - struct btrfs_fs_info *info; - struct btrfs_work work; -}; - /* * see run_scheduled_bios for a description of why bios are collected for * async submit. -- cgit v1.1 From a8c93d4ef6f6727764a61a2ee1c1878a755637c5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:08 +0800 Subject: btrfs: Replace fs_info->submit_workers with btrfs_workqueue. Much like the fs_info->workers, replace the fs_info->submit_workers use the same btrfs_workqueue. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/volumes.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 82a63b1..0066cff 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -415,7 +415,8 @@ loop_lock: device->running_pending = 1; spin_unlock(&device->io_lock); - btrfs_requeue_work(&device->work); + btrfs_queue_work(fs_info->submit_workers, + &device->work); goto done; } /* unplug every 64 requests just for good measure */ @@ -439,7 +440,7 @@ done: blk_finish_plug(&plug); } -static void pending_bios_fn(struct btrfs_work *work) +static void pending_bios_fn(struct btrfs_work_struct *work) { struct btrfs_device *device; @@ -5379,8 +5380,8 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, spin_unlock(&device->io_lock); if (should_queue) - btrfs_queue_worker(&root->fs_info->submit_workers, - &device->work); + btrfs_queue_work(root->fs_info->submit_workers, + &device->work); } static int bio_size_ok(struct block_device *bdev, struct bio *bio, @@ -5668,7 +5669,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, else generate_random_uuid(dev->uuid); - dev->work.func = pending_bios_fn; + btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); return dev; } -- cgit v1.1 From d458b0540ebd728b4d6ef47cc5ef0dbfd4dd361a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:19 +0800 Subject: btrfs: Cleanup the "_struct" suffix in btrfs_workequeue Since the "_struct" suffix is mainly used for distinguish the differnt btrfs_work between the original and the newly created one, there is no need using the suffix since all btrfs_workers are changed into btrfs_workqueue. Also this patch fixed some codes whose code style is changed due to the too long "_struct" suffix. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0066cff..b4660c4 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -440,7 +440,7 @@ done: blk_finish_plug(&plug); } -static void pending_bios_fn(struct btrfs_work_struct *work) +static void pending_bios_fn(struct btrfs_work *work) { struct btrfs_device *device; -- cgit v1.1