From 12cf93728dfba237b46001a95479829c7179cdc9 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 19 Feb 2014 19:24:17 +0800 Subject: Btrfs: device_replace: fix deadlock for nocow case commit cb7ab02156e4 cause a following deadlock found by xfstests,btrfs/011: Thread1 is commiting transaction which is blocked at btrfs_scrub_pause(). Thread2 is calling btrfs_file_aio_write() which has held inode's @i_mutex and commit transaction(blocked because Thread1 is committing transaction). Thread3 is copy_nocow_page worker which will also try to hold inode @i_mutex, so thread3 will wait Thread1 finished. Thread4 is waiting pending workers finished which will wait Thread3 finished. So the problem is like this: Thread1--->Thread4--->Thread3--->Thread2---->Thread1 Deadlock happens! we fix it by letting Thread1 go firstly, which means we won't block transaction commit while we are waiting pending workers finished. Reported-by: Qu Wenruo Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik --- fs/btrfs/scrub.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/scrub.c') diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 51c342b..f2f8803e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2686,10 +2686,23 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0); - atomic_set(&sctx->wr_ctx.flush_all_writes, 0); + atomic_inc(&fs_info->scrubs_paused); + wake_up(&fs_info->scrub_pause_wait); + + /* + * must be called before we decrease @scrub_paused. + * make sure we don't block transaction commit while + * we are waiting pending workers finished. + */ wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0); - scrub_blocked_if_needed(fs_info); + atomic_set(&sctx->wr_ctx.flush_all_writes, 0); + + mutex_lock(&fs_info->scrub_lock); + __scrub_blocked_if_needed(fs_info); + atomic_dec(&fs_info->scrubs_paused); + mutex_unlock(&fs_info->scrub_lock); + wake_up(&fs_info->scrub_pause_wait); btrfs_put_block_group(cache); if (ret) -- cgit v1.1 From 32a447896c7b4c5cf5502a3ca7f5ef57d498fb03 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 19 Feb 2014 19:24:19 +0800 Subject: Btrfs: wake up @scrub_pause_wait as much as we can check if @scrubs_running=@scrubs_paused condition inside wait_event() is not an atomic operation which means we may inc/dec @scrub_running/ paused at any time. Let's wake up @scrub_pause_wait as much as we can to let commit transaction blocked less. An example below: Thread1 Thread2 |->scrub_blocked_if_needed() |->scrub_pending_trans_workers_inc |->increase @scrub_paused |->increase @scrub_running |->wake up scrub_pause_wait list |->scrub blocked |->increase @scrub_paused Thread3 is commiting transaction which is blocked at btrfs_scrub_pause(). So after Thread2 increase @scrub_paused, we meet the condition @scrub_paused=@scrub_running, but transaction will be still blocked until another calling to wake up @scrub_pause_wait. Signed-off-by: Wang Shilong Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/scrub.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs/btrfs/scrub.c') diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index f2f8803e..682ec3fc 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -315,6 +315,16 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) atomic_inc(&fs_info->scrubs_running); atomic_inc(&fs_info->scrubs_paused); mutex_unlock(&fs_info->scrub_lock); + + /* + * check if @scrubs_running=@scrubs_paused condition + * inside wait_event() is not an atomic operation. + * which means we may inc/dec @scrub_running/paused + * at any time. Let's wake up @scrub_pause_wait as + * much as we can to let commit transaction blocked less. + */ + wake_up(&fs_info->scrub_pause_wait); + atomic_inc(&sctx->workers_pending); } -- cgit v1.1 From 0339ef2f42bcfbb2d4021ad6f38fe20580082c85 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:17 +0800 Subject: btrfs: Replace fs_info->scrub_* workqueue with btrfs_workqueue. Replace the fs_info->scrub_* with the newly created btrfs_workqueue. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/scrub.c | 93 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 50 insertions(+), 43 deletions(-) (limited to 'fs/btrfs/scrub.c') diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 682ec3fc..5a240f5 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -96,7 +96,8 @@ struct scrub_bio { #endif int page_count; int next_free; - struct btrfs_work work; + struct btrfs_work_struct + work; }; struct scrub_block { @@ -154,7 +155,8 @@ struct scrub_fixup_nodatasum { struct btrfs_device *dev; u64 logical; struct btrfs_root *root; - struct btrfs_work work; + struct btrfs_work_struct + work; int mirror_num; }; @@ -172,7 +174,8 @@ struct scrub_copy_nocow_ctx { int mirror_num; u64 physical_for_dev_replace; struct list_head inodes; - struct btrfs_work work; + struct btrfs_work_struct + work; }; struct scrub_warning { @@ -231,7 +234,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, u64 gen, int mirror_num, u8 *csum, int force, u64 physical_for_dev_replace); static void scrub_bio_end_io(struct bio *bio, int err); -static void scrub_bio_end_io_worker(struct btrfs_work *work); +static void scrub_bio_end_io_worker(struct btrfs_work_struct *work); static void scrub_block_complete(struct scrub_block *sblock); static void scrub_remap_extent(struct btrfs_fs_info *fs_info, u64 extent_logical, u64 extent_len, @@ -248,14 +251,14 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, struct scrub_page *spage); static void scrub_wr_submit(struct scrub_ctx *sctx); static void scrub_wr_bio_end_io(struct bio *bio, int err); -static void scrub_wr_bio_end_io_worker(struct btrfs_work *work); +static void scrub_wr_bio_end_io_worker(struct btrfs_work_struct *work); static int write_page_nocow(struct scrub_ctx *sctx, u64 physical_for_dev_replace, struct page *page); static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, struct scrub_copy_nocow_ctx *ctx); static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, int mirror_num, u64 physical_for_dev_replace); -static void copy_nocow_pages_worker(struct btrfs_work *work); +static void copy_nocow_pages_worker(struct btrfs_work_struct *work); static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); @@ -428,7 +431,8 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) sbio->index = i; sbio->sctx = sctx; sbio->page_count = 0; - sbio->work.func = scrub_bio_end_io_worker; + btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, + NULL, NULL); if (i != SCRUB_BIOS_PER_SCTX - 1) sctx->bios[i]->next_free = i + 1; @@ -733,7 +737,7 @@ out: return -EIO; } -static void scrub_fixup_nodatasum(struct btrfs_work *work) +static void scrub_fixup_nodatasum(struct btrfs_work_struct *work) { int ret; struct scrub_fixup_nodatasum *fixup; @@ -997,9 +1001,10 @@ nodatasum_case: fixup_nodatasum->root = fs_info->extent_root; fixup_nodatasum->mirror_num = failed_mirror_index + 1; scrub_pending_trans_workers_inc(sctx); - fixup_nodatasum->work.func = scrub_fixup_nodatasum; - btrfs_queue_worker(&fs_info->scrub_workers, - &fixup_nodatasum->work); + btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum, + NULL, NULL); + btrfs_queue_work(fs_info->scrub_workers, + &fixup_nodatasum->work); goto out; } @@ -1613,11 +1618,11 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err) sbio->err = err; sbio->bio = bio; - sbio->work.func = scrub_wr_bio_end_io_worker; - btrfs_queue_worker(&fs_info->scrub_wr_completion_workers, &sbio->work); + btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL); + btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); } -static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) +static void scrub_wr_bio_end_io_worker(struct btrfs_work_struct *work) { struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); struct scrub_ctx *sctx = sbio->sctx; @@ -2082,10 +2087,10 @@ static void scrub_bio_end_io(struct bio *bio, int err) sbio->err = err; sbio->bio = bio; - btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); + btrfs_queue_work(fs_info->scrub_workers, &sbio->work); } -static void scrub_bio_end_io_worker(struct btrfs_work *work) +static void scrub_bio_end_io_worker(struct btrfs_work_struct *work) { struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); struct scrub_ctx *sctx = sbio->sctx; @@ -2780,33 +2785,35 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, int is_dev_replace) { int ret = 0; + int flags = WQ_FREEZABLE | WQ_UNBOUND; + int max_active = fs_info->thread_pool_size; if (fs_info->scrub_workers_refcnt == 0) { if (is_dev_replace) - btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1, - &fs_info->generic_worker); + fs_info->scrub_workers = + btrfs_alloc_workqueue("btrfs-scrub", flags, + 1, 4); else - btrfs_init_workers(&fs_info->scrub_workers, "scrub", - fs_info->thread_pool_size, - &fs_info->generic_worker); - fs_info->scrub_workers.idle_thresh = 4; - ret = btrfs_start_workers(&fs_info->scrub_workers); - if (ret) + fs_info->scrub_workers = + btrfs_alloc_workqueue("btrfs-scrub", flags, + max_active, 4); + if (!fs_info->scrub_workers) { + ret = -ENOMEM; goto out; - btrfs_init_workers(&fs_info->scrub_wr_completion_workers, - "scrubwrc", - fs_info->thread_pool_size, - &fs_info->generic_worker); - fs_info->scrub_wr_completion_workers.idle_thresh = 2; - ret = btrfs_start_workers( - &fs_info->scrub_wr_completion_workers); - if (ret) + } + fs_info->scrub_wr_completion_workers = + btrfs_alloc_workqueue("btrfs-scrubwrc", flags, + max_active, 2); + if (!fs_info->scrub_wr_completion_workers) { + ret = -ENOMEM; goto out; - btrfs_init_workers(&fs_info->scrub_nocow_workers, "scrubnc", 1, - &fs_info->generic_worker); - ret = btrfs_start_workers(&fs_info->scrub_nocow_workers); - if (ret) + } + fs_info->scrub_nocow_workers = + btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); + if (!fs_info->scrub_nocow_workers) { + ret = -ENOMEM; goto out; + } } ++fs_info->scrub_workers_refcnt; out: @@ -2816,9 +2823,9 @@ out: static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) { if (--fs_info->scrub_workers_refcnt == 0) { - btrfs_stop_workers(&fs_info->scrub_workers); - btrfs_stop_workers(&fs_info->scrub_wr_completion_workers); - btrfs_stop_workers(&fs_info->scrub_nocow_workers); + btrfs_destroy_workqueue(fs_info->scrub_workers); + btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); + btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); } WARN_ON(fs_info->scrub_workers_refcnt < 0); } @@ -3129,10 +3136,10 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, nocow_ctx->len = len; nocow_ctx->mirror_num = mirror_num; nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; - nocow_ctx->work.func = copy_nocow_pages_worker; + btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL); INIT_LIST_HEAD(&nocow_ctx->inodes); - btrfs_queue_worker(&fs_info->scrub_nocow_workers, - &nocow_ctx->work); + btrfs_queue_work(fs_info->scrub_nocow_workers, + &nocow_ctx->work); return 0; } @@ -3154,7 +3161,7 @@ static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx) #define COPY_COMPLETE 1 -static void copy_nocow_pages_worker(struct btrfs_work *work) +static void copy_nocow_pages_worker(struct btrfs_work_struct *work) { struct scrub_copy_nocow_ctx *nocow_ctx = container_of(work, struct scrub_copy_nocow_ctx, work); -- cgit v1.1 From d458b0540ebd728b4d6ef47cc5ef0dbfd4dd361a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:19 +0800 Subject: btrfs: Cleanup the "_struct" suffix in btrfs_workequeue Since the "_struct" suffix is mainly used for distinguish the differnt btrfs_work between the original and the newly created one, there is no need using the suffix since all btrfs_workers are changed into btrfs_workqueue. Also this patch fixed some codes whose code style is changed due to the too long "_struct" suffix. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/scrub.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/scrub.c') diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 5a240f5..db21a13 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -96,8 +96,7 @@ struct scrub_bio { #endif int page_count; int next_free; - struct btrfs_work_struct - work; + struct btrfs_work work; }; struct scrub_block { @@ -155,8 +154,7 @@ struct scrub_fixup_nodatasum { struct btrfs_device *dev; u64 logical; struct btrfs_root *root; - struct btrfs_work_struct - work; + struct btrfs_work work; int mirror_num; }; @@ -174,8 +172,7 @@ struct scrub_copy_nocow_ctx { int mirror_num; u64 physical_for_dev_replace; struct list_head inodes; - struct btrfs_work_struct - work; + struct btrfs_work work; }; struct scrub_warning { @@ -234,7 +231,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, u64 gen, int mirror_num, u8 *csum, int force, u64 physical_for_dev_replace); static void scrub_bio_end_io(struct bio *bio, int err); -static void scrub_bio_end_io_worker(struct btrfs_work_struct *work); +static void scrub_bio_end_io_worker(struct btrfs_work *work); static void scrub_block_complete(struct scrub_block *sblock); static void scrub_remap_extent(struct btrfs_fs_info *fs_info, u64 extent_logical, u64 extent_len, @@ -251,14 +248,14 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, struct scrub_page *spage); static void scrub_wr_submit(struct scrub_ctx *sctx); static void scrub_wr_bio_end_io(struct bio *bio, int err); -static void scrub_wr_bio_end_io_worker(struct btrfs_work_struct *work); +static void scrub_wr_bio_end_io_worker(struct btrfs_work *work); static int write_page_nocow(struct scrub_ctx *sctx, u64 physical_for_dev_replace, struct page *page); static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, struct scrub_copy_nocow_ctx *ctx); static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, int mirror_num, u64 physical_for_dev_replace); -static void copy_nocow_pages_worker(struct btrfs_work_struct *work); +static void copy_nocow_pages_worker(struct btrfs_work *work); static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); @@ -737,7 +734,7 @@ out: return -EIO; } -static void scrub_fixup_nodatasum(struct btrfs_work_struct *work) +static void scrub_fixup_nodatasum(struct btrfs_work *work) { int ret; struct scrub_fixup_nodatasum *fixup; @@ -1622,7 +1619,7 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err) btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); } -static void scrub_wr_bio_end_io_worker(struct btrfs_work_struct *work) +static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) { struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); struct scrub_ctx *sctx = sbio->sctx; @@ -2090,7 +2087,7 @@ static void scrub_bio_end_io(struct bio *bio, int err) btrfs_queue_work(fs_info->scrub_workers, &sbio->work); } -static void scrub_bio_end_io_worker(struct btrfs_work_struct *work) +static void scrub_bio_end_io_worker(struct btrfs_work *work) { struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); struct scrub_ctx *sctx = sbio->sctx; @@ -3161,7 +3158,7 @@ static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx) #define COPY_COMPLETE 1 -static void copy_nocow_pages_worker(struct btrfs_work_struct *work) +static void copy_nocow_pages_worker(struct btrfs_work *work) { struct scrub_copy_nocow_ctx *nocow_ctx = container_of(work, struct scrub_copy_nocow_ctx, work); -- cgit v1.1