From d7c151717a1efe289aec29fb9f94485f64262c0b Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Thu, 26 Feb 2015 10:49:20 +0800 Subject: btrfs: Fix NO_SPACE bug caused by delayed-iput Steps to reproduce: while true; do dd if=/dev/zero of=/btrfs_dir/file count=[fs_size * 75%] rm /btrfs_dir/file sync done And we'll see dd failed because btrfs return NO_SPACE. Reason: Normally, btrfs_commit_transaction() call btrfs_run_delayed_iputs() in end to free fs space for next write, but sometimes it hadn't done work on time, because btrfs-cleaner thread get delayed-iputs from list before, but do iput() after next write. This is log: [ 2569.050776] comm=btrfs-cleaner func=btrfs_evict_inode() begin [ 2569.084280] comm=sync func=btrfs_commit_transaction() call btrfs_run_delayed_iputs() [ 2569.085418] comm=sync func=btrfs_commit_transaction() done btrfs_run_delayed_iputs() [ 2569.087554] comm=sync func=btrfs_commit_transaction() end [ 2569.191081] comm=dd begin [ 2569.790112] comm=dd func=__btrfs_buffered_write() ret=-28 [ 2569.847479] comm=btrfs-cleaner func=add_pinned_bytes() 0 + 32677888 = 32677888 [ 2569.849530] comm=btrfs-cleaner func=add_pinned_bytes() 32677888 + 23834624 = 56512512 ... [ 2569.903893] comm=btrfs-cleaner func=add_pinned_bytes() 943976448 + 21762048 = 965738496 [ 2569.908270] comm=btrfs-cleaner func=btrfs_evict_inode() end Fix: Make btrfs_commit_transaction() wait current running btrfs-cleaner's delayed-iputs() done in end. Test: Use script similar to above(more complex), before patch: 7 failed in 100 * 20 loop. after patch: 0 failed in 100 * 20 loop. Signed-off-by: Zhao Lei Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 3 ++- fs/btrfs/extent-tree.c | 6 ++++++ fs/btrfs/inode.c | 4 ++++ 4 files changed, 13 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 10b6a75..d48b22f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1538,6 +1538,7 @@ struct btrfs_fs_info { spinlock_t delayed_iput_lock; struct list_head delayed_iputs; + struct rw_semaphore delayed_iput_sem; /* this protects tree_mod_seq_list */ spinlock_t tree_mod_seq_lock; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 62cd3b6..2ef9a4b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2487,11 +2487,12 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->qgroup_op_lock); spin_lock_init(&fs_info->buffer_lock); spin_lock_init(&fs_info->unused_bgs_lock); - mutex_init(&fs_info->unused_bg_unpin_mutex); rwlock_init(&fs_info->tree_mod_log_lock); + mutex_init(&fs_info->unused_bg_unpin_mutex); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); seqlock_init(&fs_info->profiles_lock); + init_rwsem(&fs_info->delayed_iput_sem); init_completion(&fs_info->kobj_unregister); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index be4a79a..46cb1d41 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3950,6 +3950,12 @@ commit_trans: ret = btrfs_commit_transaction(trans, root); if (ret) return ret; + /* + * make sure that all running delayed iput are + * done + */ + down_write(&root->fs_info->delayed_iput_sem); + up_write(&root->fs_info->delayed_iput_sem); goto again; } else { btrfs_end_transaction(trans, root); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 141df0c..6ef97c1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3111,6 +3111,8 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) if (empty) return; + down_read(&fs_info->delayed_iput_sem); + spin_lock(&fs_info->delayed_iput_lock); list_splice_init(&fs_info->delayed_iputs, &list); spin_unlock(&fs_info->delayed_iput_lock); @@ -3121,6 +3123,8 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) iput(delayed->inode); kfree(delayed); } + + up_read(&root->fs_info->delayed_iput_sem); } /* -- cgit v1.1