From c46effa601f869f3d20a7386a745d9c002838eb8 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 14 Oct 2013 12:59:45 +0800 Subject: Btrfs: introduce a head ref rbtree The way how we process delayed refs is 1) get a bunch of head refs, 2) pick up one head ref, 3) go one node back for any delayed ref updates. The head ref is also linked in the same rbtree as the delayed ref is, so in 1) stage, we have to walk one by one including not only head refs, but delayed refs. When we have a great number of delayed refs pending to process, this'll cost time a lot. Here we introduce a head ref specific rbtree, it only has head refs, so troubles go away. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8072cfa..435ef13 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3842,6 +3842,9 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, ref->in_tree = 0; rb_erase(&ref->rb_node, &delayed_refs->root); + if (head) + rb_erase(&head->href_node, &delayed_refs->href_root); + delayed_refs->num_entries--; spin_unlock(&delayed_refs->lock); if (head) { -- cgit v1.1 From 5ac1d209f11271fbfad0fa31ba56ec64c142d9ea Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 1 Nov 2013 13:06:58 -0400 Subject: btrfs: publish per-super attributes in sysfs This patch adds per-super attributes to sysfs. It doesn't publish any attributes yet, but does the proper lifetime handling as well as the basic infrastructure to add new attributes. Signed-off-by: Jeff Mahoney Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 435ef13..81f3433 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -48,6 +48,7 @@ #include "rcu-string.h" #include "dev-replace.h" #include "raid56.h" +#include "sysfs.h" #ifdef CONFIG_X86 #include @@ -2743,6 +2744,12 @@ retry_root_backup: btrfs_close_extra_devices(fs_info, fs_devices, 1); + ret = btrfs_sysfs_add_one(fs_info); + if (ret) { + pr_err("btrfs: failed to init sysfs interface: %d\n", ret); + goto fail_block_groups; + } + ret = btrfs_init_space_info(fs_info); if (ret) { printk(KERN_ERR "Failed to initial space info: %d\n", ret); @@ -3584,6 +3591,8 @@ int close_ctree(struct btrfs_root *root) percpu_counter_sum(&fs_info->delalloc_bytes)); } + btrfs_sysfs_remove_one(fs_info); + del_fs_roots(fs_info); btrfs_free_block_groups(fs_info); -- cgit v1.1 From 71db2a7751b6c4befad06e66a26d7f2b8dd3c1b9 Mon Sep 17 00:00:00 2001 From: Valentina Giusti Date: Mon, 4 Nov 2013 22:34:23 +0100 Subject: btrfs: remove unused variables from disk-io.c Remove unused variables: * tree from csum_dirty_buffer, * tree from btree_readpage_end_io_hook, * tree from btree_writepages, * bytenr from btrfs_create_tree, * fs_info from end_workqueue_fn. Signed-off-by: Valentina Giusti Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 81f3433..d846673 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -465,13 +465,10 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { - struct extent_io_tree *tree; u64 start = page_offset(page); u64 found_start; struct extent_buffer *eb; - tree = &BTRFS_I(page->mapping->host)->io_tree; - eb = (struct extent_buffer *)page->private; if (page != eb->pages[0]) return 0; @@ -570,7 +567,6 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, u64 phy_offset, struct page *page, u64 start, u64 end, int mirror) { - struct extent_io_tree *tree; u64 found_start; int found_level; struct extent_buffer *eb; @@ -581,7 +577,6 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, if (!page->private) goto out; - tree = &BTRFS_I(page->mapping->host)->io_tree; eb = (struct extent_buffer *)page->private; /* the pending IO might have been the only thing that kept this buffer @@ -968,11 +963,9 @@ static int btree_migratepage(struct address_space *mapping, static int btree_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct extent_io_tree *tree; struct btrfs_fs_info *fs_info; int ret; - tree = &BTRFS_I(mapping->host)->io_tree; if (wbc->sync_mode == WB_SYNC_NONE) { if (wbc->for_kupdate) @@ -1274,7 +1267,6 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root; struct btrfs_key key; int ret = 0; - u64 bytenr; uuid_le uuid; root = btrfs_alloc_root(fs_info); @@ -1296,7 +1288,6 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, goto fail; } - bytenr = leaf->start; memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); btrfs_set_header_bytenr(leaf, leaf->start); btrfs_set_header_generation(leaf, trans->transid); @@ -1685,12 +1676,10 @@ static void end_workqueue_fn(struct btrfs_work *work) { struct bio *bio; struct end_io_wq *end_io_wq; - struct btrfs_fs_info *fs_info; int error; end_io_wq = container_of(work, struct end_io_wq, work); bio = end_io_wq->bio; - fs_info = end_io_wq->info; error = end_io_wq->error; bio->bi_private = end_io_wq->private; -- cgit v1.1 From 3f870c28990015a1fd6c67807efcdb02a75b35e1 Mon Sep 17 00:00:00 2001 From: Kelley Nielsen Date: Mon, 4 Nov 2013 19:37:39 -0800 Subject: btrfs: expand btrfs_find_item() to include find_orphan_item functionality This is the third step in bootstrapping the btrfs_find_item interface. The function find_orphan_item(), in orphan.c, is similar to the two functions already replaced by the new interface. It uses two parameters, which are already present in the interface, and is nearly identical to the function brought in in the previous patch. Replace the two calls to find_orphan_item() with calls to btrfs_find_item(), with the defined objectid and type that was used internally by find_orphan_item(), a null path, and a null key. Add a test for a null path to btrfs_find_item, and if it passes, allocate and free the path. Finally, remove find_orphan_item(). Signed-off-by: Kelley Nielsen Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d846673..4ecee0a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1608,7 +1608,8 @@ again: if (ret) goto fail; - ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); + ret = btrfs_find_item(fs_info->tree_root, NULL, BTRFS_ORPHAN_OBJECTID, + location->objectid, BTRFS_ORPHAN_ITEM_KEY, NULL); if (ret < 0) goto fail; if (ret == 0) -- cgit v1.1 From f28491e0a6c46d99cbbef0f8ef7e314afa2359c8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 16 Dec 2013 13:24:27 -0500 Subject: Btrfs: move the extent buffer radix tree into the fs_info I need to create a fake tree to test qgroups and I don't want to have to setup a fake btree_inode. The fact is we only use the radix tree for the fs_info, so everybody else who allocates an extent_io_tree is just wasting the space anyway. This patch moves the radix tree and its lock into btrfs_fs_info so there is less stuff I have to fake to do qgroup sanity tests. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4ecee0a..4a1871c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1089,21 +1089,13 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { - struct inode *btree_inode = root->fs_info->btree_inode; - struct extent_buffer *eb; - eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr); - return eb; + return find_extent_buffer(root->fs_info, bytenr); } struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { - struct inode *btree_inode = root->fs_info->btree_inode; - struct extent_buffer *eb; - - eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, - bytenr, blocksize); - return eb; + return alloc_extent_buffer(root->fs_info, bytenr, blocksize); } @@ -2145,6 +2137,7 @@ int open_ctree(struct super_block *sb, mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); + INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->delayed_iputs); @@ -2158,6 +2151,7 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->free_chunk_lock); spin_lock_init(&fs_info->tree_mod_seq_lock); spin_lock_init(&fs_info->super_lock); + spin_lock_init(&fs_info->buffer_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->reloc_mutex); seqlock_init(&fs_info->profiles_lock); -- cgit v1.1 From efe120a067c8674a8ae21b194f0e68f098b61ee2 Mon Sep 17 00:00:00 2001 From: Frank Holton Date: Fri, 20 Dec 2013 11:37:06 -0500 Subject: Btrfs: convert printk to btrfs_ and fix BTRFS prefix Convert all applicable cases of printk and pr_* to the btrfs_* macros. Fix all uses of the BTRFS prefix. Signed-off-by: Frank Holton Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 106 +++++++++++++++++++++++++++-------------------------- 1 file changed, 54 insertions(+), 52 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4a1871c..0400a26 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -300,11 +300,11 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, memcpy(&found, result, csum_size); read_extent_buffer(buf, &val, 0, csum_size); - printk_ratelimited(KERN_INFO "btrfs: %s checksum verify " - "failed on %llu wanted %X found %X " - "level %d\n", - root->fs_info->sb->s_id, buf->start, - val, found, btrfs_header_level(buf)); + printk_ratelimited(KERN_INFO + "BTRFS: %s checksum verify failed on %llu wanted %X found %X " + "level %d\n", + root->fs_info->sb->s_id, buf->start, + val, found, btrfs_header_level(buf)); if (result != (char *)&inline_result) kfree(result); return 1; @@ -383,13 +383,14 @@ static int btrfs_check_super_csum(char *raw_disk_sb) ret = 1; if (ret && btrfs_super_generation(disk_sb) < 10) { - printk(KERN_WARNING "btrfs: super block crcs don't match, older mkfs detected\n"); + printk(KERN_WARNING + "BTRFS: super block crcs don't match, older mkfs detected\n"); ret = 0; } } if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) { - printk(KERN_ERR "btrfs: unsupported checksum algorithm %u\n", + printk(KERN_ERR "BTRFS: unsupported checksum algorithm %u\n", csum_type); ret = 1; } @@ -498,8 +499,8 @@ static int check_tree_block_fsid(struct btrfs_root *root, } #define CORRUPT(reason, eb, root, slot) \ - printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ - "root=%llu, slot=%d\n", reason, \ + btrfs_crit(root->fs_info, "corrupt leaf, %s: block=%llu," \ + "root=%llu, slot=%d", reason, \ btrfs_header_bytenr(eb), root->objectid, slot) static noinline int check_leaf(struct btrfs_root *root, @@ -596,21 +597,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, found_start = btrfs_header_bytenr(eb); if (found_start != eb->start) { - printk_ratelimited(KERN_INFO "btrfs bad tree block start " + printk_ratelimited(KERN_INFO "BTRFS: bad tree block start " "%llu %llu\n", found_start, eb->start); ret = -EIO; goto err; } if (check_tree_block_fsid(root, eb)) { - printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", + printk_ratelimited(KERN_INFO "BTRFS: bad fsid on block %llu\n", eb->start); ret = -EIO; goto err; } found_level = btrfs_header_level(eb); if (found_level >= BTRFS_MAX_LEVEL) { - btrfs_info(root->fs_info, "bad tree block level %d\n", + btrfs_info(root->fs_info, "bad tree block level %d", (int)btrfs_header_level(eb)); ret = -EIO; goto err; @@ -1004,8 +1005,9 @@ static void btree_invalidatepage(struct page *page, unsigned int offset, extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); if (PagePrivate(page)) { - printk(KERN_WARNING "btrfs warning page private not zero " - "on page %llu\n", (unsigned long long)page_offset(page)); + btrfs_warn(BTRFS_I(page->mapping->host)->root->fs_info, + "page private not zero on page %llu", + (unsigned long long)page_offset(page)); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); @@ -2322,7 +2324,7 @@ int open_ctree(struct super_block *sb, * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). */ if (btrfs_check_super_csum(bh->b_data)) { - printk(KERN_ERR "btrfs: superblock checksum mismatch\n"); + printk(KERN_ERR "BTRFS: superblock checksum mismatch\n"); err = -EINVAL; goto fail_alloc; } @@ -2341,7 +2343,7 @@ int open_ctree(struct super_block *sb, ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); if (ret) { - printk(KERN_ERR "btrfs: superblock contains fatal errors\n"); + printk(KERN_ERR "BTRFS: superblock contains fatal errors\n"); err = -EINVAL; goto fail_alloc; } @@ -2406,7 +2408,7 @@ int open_ctree(struct super_block *sb, features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) - printk(KERN_ERR "btrfs: has skinny extents\n"); + printk(KERN_ERR "BTRFS: has skinny extents\n"); /* * flag our filesystem as having big metadata blocks if @@ -2414,7 +2416,7 @@ int open_ctree(struct super_block *sb, */ if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) { if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) - printk(KERN_INFO "btrfs flagging fs with big metadata feature\n"); + printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n"); features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; } @@ -2431,7 +2433,7 @@ int open_ctree(struct super_block *sb, */ if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && (sectorsize != leafsize)) { - printk(KERN_WARNING "btrfs: unequal leaf/node/sector sizes " + printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes " "are not allowed for mixed block groups on %s\n", sb->s_id); goto fail_alloc; @@ -2568,12 +2570,12 @@ int open_ctree(struct super_block *sb, sb->s_blocksize_bits = blksize_bits(sectorsize); if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) { - printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); + printk(KERN_INFO "BTRFS: valid FS not found on %s\n", sb->s_id); goto fail_sb_buffer; } if (sectorsize != PAGE_SIZE) { - printk(KERN_WARNING "btrfs: Incompatible sector size(%lu) " + printk(KERN_WARNING "BTRFS: Incompatible sector size(%lu) " "found on %s\n", (unsigned long)sectorsize, sb->s_id); goto fail_sb_buffer; } @@ -2582,7 +2584,7 @@ int open_ctree(struct super_block *sb, ret = btrfs_read_sys_array(tree_root); mutex_unlock(&fs_info->chunk_mutex); if (ret) { - printk(KERN_WARNING "btrfs: failed to read the system " + printk(KERN_WARNING "BTRFS: failed to read the system " "array on %s\n", sb->s_id); goto fail_sb_buffer; } @@ -2599,7 +2601,7 @@ int open_ctree(struct super_block *sb, blocksize, generation); if (!chunk_root->node || !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { - printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", + printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n", sb->s_id); goto fail_tree_roots; } @@ -2611,7 +2613,7 @@ int open_ctree(struct super_block *sb, ret = btrfs_read_chunk_tree(chunk_root); if (ret) { - printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", + printk(KERN_WARNING "BTRFS: failed to read chunk tree on %s\n", sb->s_id); goto fail_tree_roots; } @@ -2623,7 +2625,7 @@ int open_ctree(struct super_block *sb, btrfs_close_extra_devices(fs_info, fs_devices, 0); if (!fs_devices->latest_bdev) { - printk(KERN_CRIT "btrfs: failed to read devices on %s\n", + printk(KERN_CRIT "BTRFS: failed to read devices on %s\n", sb->s_id); goto fail_tree_roots; } @@ -2638,7 +2640,7 @@ retry_root_backup: blocksize, generation); if (!tree_root->node || !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { - printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", + printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n", sb->s_id); goto recovery_tree_root; @@ -2709,20 +2711,20 @@ retry_root_backup: ret = btrfs_recover_balance(fs_info); if (ret) { - printk(KERN_WARNING "btrfs: failed to recover balance\n"); + printk(KERN_WARNING "BTRFS: failed to recover balance\n"); goto fail_block_groups; } ret = btrfs_init_dev_stats(fs_info); if (ret) { - printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n", + printk(KERN_ERR "BTRFS: failed to init dev_stats: %d\n", ret); goto fail_block_groups; } ret = btrfs_init_dev_replace(fs_info); if (ret) { - pr_err("btrfs: failed to init dev_replace: %d\n", ret); + pr_err("BTRFS: failed to init dev_replace: %d\n", ret); goto fail_block_groups; } @@ -2730,19 +2732,19 @@ retry_root_backup: ret = btrfs_sysfs_add_one(fs_info); if (ret) { - pr_err("btrfs: failed to init sysfs interface: %d\n", ret); + pr_err("BTRFS: failed to init sysfs interface: %d\n", ret); goto fail_block_groups; } ret = btrfs_init_space_info(fs_info); if (ret) { - printk(KERN_ERR "Failed to initial space info: %d\n", ret); + printk(KERN_ERR "BTRFS: Failed to initial space info: %d\n", ret); goto fail_block_groups; } ret = btrfs_read_block_groups(extent_root); if (ret) { - printk(KERN_ERR "Failed to read block groups: %d\n", ret); + printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret); goto fail_block_groups; } fs_info->num_tolerated_disk_barrier_failures = @@ -2750,8 +2752,8 @@ retry_root_backup: if (fs_info->fs_devices->missing_devices > fs_info->num_tolerated_disk_barrier_failures && !(sb->s_flags & MS_RDONLY)) { - printk(KERN_WARNING - "Btrfs: too many missing devices, writeable mount is not allowed\n"); + printk(KERN_WARNING "BTRFS: " + "too many missing devices, writeable mount is not allowed\n"); goto fail_block_groups; } @@ -2769,7 +2771,7 @@ retry_root_backup: if (!btrfs_test_opt(tree_root, SSD) && !btrfs_test_opt(tree_root, NOSSD) && !fs_info->fs_devices->rotating) { - printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD " + printk(KERN_INFO "BTRFS: detected SSD devices, enabling SSD " "mode\n"); btrfs_set_opt(fs_info->mount_opt, SSD); } @@ -2782,7 +2784,7 @@ retry_root_backup: 1 : 0, fs_info->check_integrity_print_mask); if (ret) - printk(KERN_WARNING "btrfs: failed to initialize" + printk(KERN_WARNING "BTRFS: failed to initialize" " integrity check module %s\n", sb->s_id); } #endif @@ -2795,7 +2797,7 @@ retry_root_backup: u64 bytenr = btrfs_super_log_root(disk_super); if (fs_devices->rw_devices == 0) { - printk(KERN_WARNING "Btrfs log replay required " + printk(KERN_WARNING "BTRFS: log replay required " "on RO media\n"); err = -EIO; goto fail_qgroup; @@ -2818,7 +2820,7 @@ retry_root_backup: generation + 1); if (!log_tree_root->node || !extent_buffer_uptodate(log_tree_root->node)) { - printk(KERN_ERR "btrfs: failed to read log tree\n"); + printk(KERN_ERR "BTRFS: failed to read log tree\n"); free_extent_buffer(log_tree_root->node); kfree(log_tree_root); goto fail_trans_kthread; @@ -2852,7 +2854,7 @@ retry_root_backup: ret = btrfs_recover_relocation(tree_root); if (ret < 0) { printk(KERN_WARNING - "btrfs: failed to recover relocation\n"); + "BTRFS: failed to recover relocation\n"); err = -EINVAL; goto fail_qgroup; } @@ -2882,14 +2884,14 @@ retry_root_backup: ret = btrfs_resume_balance_async(fs_info); if (ret) { - printk(KERN_WARNING "btrfs: failed to resume balance\n"); + printk(KERN_WARNING "BTRFS: failed to resume balance\n"); close_ctree(tree_root); return ret; } ret = btrfs_resume_dev_replace_async(fs_info); if (ret) { - pr_warn("btrfs: failed to resume dev_replace\n"); + pr_warn("BTRFS: failed to resume dev_replace\n"); close_ctree(tree_root); return ret; } @@ -2897,20 +2899,20 @@ retry_root_backup: btrfs_qgroup_rescan_resume(fs_info); if (create_uuid_tree) { - pr_info("btrfs: creating UUID tree\n"); + pr_info("BTRFS: creating UUID tree\n"); ret = btrfs_create_uuid_tree(fs_info); if (ret) { - pr_warn("btrfs: failed to create the UUID tree %d\n", + pr_warn("BTRFS: failed to create the UUID tree %d\n", ret); close_ctree(tree_root); return ret; } } else if (check_uuid_tree || btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) { - pr_info("btrfs: checking UUID tree\n"); + pr_info("BTRFS: checking UUID tree\n"); ret = btrfs_check_uuid_tree(fs_info); if (ret) { - pr_warn("btrfs: failed to check the UUID tree %d\n", + pr_warn("BTRFS: failed to check the UUID tree %d\n", ret); close_ctree(tree_root); return ret; @@ -2991,7 +2993,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) struct btrfs_device *device = (struct btrfs_device *) bh->b_private; - printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to " + printk_ratelimited_in_rcu(KERN_WARNING "BTRFS: lost page write due to " "I/O error on %s\n", rcu_str_deref(device->name)); /* note, we dont' set_buffer_write_io_error because we have @@ -3110,7 +3112,7 @@ static int write_dev_supers(struct btrfs_device *device, bh = __getblk(device->bdev, bytenr / 4096, BTRFS_SUPER_INFO_SIZE); if (!bh) { - printk(KERN_ERR "btrfs: couldn't get super " + printk(KERN_ERR "BTRFS: couldn't get super " "buffer head for bytenr %Lu\n", bytenr); errors++; continue; @@ -3177,7 +3179,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait) wait_for_completion(&device->flush_wait); if (bio_flagged(bio, BIO_EOPNOTSUPP)) { - printk_in_rcu("btrfs: disabling barriers on dev %s\n", + printk_in_rcu("BTRFS: disabling barriers on dev %s\n", rcu_str_deref(device->name)); device->nobarriers = 1; } else if (!bio_flagged(bio, BIO_UPTODATE)) { @@ -3398,7 +3400,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) total_errors++; } if (total_errors > max_errors) { - printk(KERN_ERR "btrfs: %d errors while writing supers\n", + btrfs_err(root->fs_info, "%d errors while writing supers", total_errors); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); @@ -3554,7 +3556,7 @@ int close_ctree(struct btrfs_root *root) if (!(fs_info->sb->s_flags & MS_RDONLY)) { ret = btrfs_commit_super(root); if (ret) - printk(KERN_ERR "btrfs: commit super ret %d\n", ret); + btrfs_err(root->fs_info, "commit super ret %d", ret); } if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) @@ -3571,7 +3573,7 @@ int close_ctree(struct btrfs_root *root) btrfs_free_qgroup_config(root->fs_info); if (percpu_counter_sum(&fs_info->delalloc_bytes)) { - printk(KERN_INFO "btrfs: at unmount delalloc count %lld\n", + btrfs_info(root->fs_info, "at unmount delalloc count %lld", percpu_counter_sum(&fs_info->delalloc_bytes)); } @@ -3798,7 +3800,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, spin_lock(&delayed_refs->lock); if (delayed_refs->num_entries == 0) { spin_unlock(&delayed_refs->lock); - printk(KERN_INFO "delayed_refs has NO entry\n"); + btrfs_info(root->fs_info, "delayed_refs has NO entry"); return ret; } -- cgit v1.1 From e8117c26b24098496b6011aabe84e43e0189a506 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Fri, 3 Jan 2014 18:22:57 +0800 Subject: Btrfs: only fua the first superblock when writting supers We only intent to fua the first superblock in every device from comments, fix it. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0400a26..9850a51 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3133,7 +3133,10 @@ static int write_dev_supers(struct btrfs_device *device, * we fua the first super. The others we allow * to go down lazy. */ - ret = btrfsic_submit_bh(WRITE_FUA, bh); + if (i == 0) + ret = btrfsic_submit_bh(WRITE_FUA, bh); + else + ret = btrfsic_submit_bh(WRITE_SYNC, bh); if (ret) errors++; } -- cgit v1.1 From d7df2c796d7eedd72a334dc89c65e1fec8171431 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 23 Jan 2014 09:21:38 -0500 Subject: Btrfs: attach delayed ref updates to delayed ref heads Currently we have two rb-trees, one for delayed ref heads and one for all of the delayed refs, including the delayed ref heads. When we process the delayed refs we have to hold onto the delayed ref lock for all of the selecting and merging and such, which results in quite a bit of lock contention. This was solved by having a waitqueue and only one flusher at a time, however this hurts if we get a lot of delayed refs queued up. So instead just have an rb tree for the delayed ref heads, and then attach the delayed ref updates to an rb tree that is per delayed ref head. Then we only need to take the delayed ref lock when adding new delayed refs and when selecting a delayed ref head to process, all the rest of the time we deal with a per delayed ref head lock which will be much less contentious. The locking rules for this get a little more complicated since we have to lock up to 3 things to properly process delayed refs, but I will address that problem later. For now this passes all of xfstests and my overnight stress tests. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 79 ++++++++++++++++++++++++++---------------------------- 1 file changed, 38 insertions(+), 41 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9850a51..ed23127 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3801,58 +3801,55 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, delayed_refs = &trans->delayed_refs; spin_lock(&delayed_refs->lock); - if (delayed_refs->num_entries == 0) { + if (atomic_read(&delayed_refs->num_entries) == 0) { spin_unlock(&delayed_refs->lock); btrfs_info(root->fs_info, "delayed_refs has NO entry"); return ret; } - while ((node = rb_first(&delayed_refs->root)) != NULL) { - struct btrfs_delayed_ref_head *head = NULL; + while ((node = rb_first(&delayed_refs->href_root)) != NULL) { + struct btrfs_delayed_ref_head *head; bool pin_bytes = false; - ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - atomic_set(&ref->refs, 1); - if (btrfs_delayed_ref_is_head(ref)) { + head = rb_entry(node, struct btrfs_delayed_ref_head, + href_node); + if (!mutex_trylock(&head->mutex)) { + atomic_inc(&head->node.refs); + spin_unlock(&delayed_refs->lock); - head = btrfs_delayed_node_to_head(ref); - if (!mutex_trylock(&head->mutex)) { - atomic_inc(&ref->refs); - spin_unlock(&delayed_refs->lock); - - /* Need to wait for the delayed ref to run */ - mutex_lock(&head->mutex); - mutex_unlock(&head->mutex); - btrfs_put_delayed_ref(ref); - - spin_lock(&delayed_refs->lock); - continue; - } - - if (head->must_insert_reserved) - pin_bytes = true; - btrfs_free_delayed_extent_op(head->extent_op); - delayed_refs->num_heads--; - if (list_empty(&head->cluster)) - delayed_refs->num_heads_ready--; - list_del_init(&head->cluster); - } - - ref->in_tree = 0; - rb_erase(&ref->rb_node, &delayed_refs->root); - if (head) - rb_erase(&head->href_node, &delayed_refs->href_root); - - delayed_refs->num_entries--; - spin_unlock(&delayed_refs->lock); - if (head) { - if (pin_bytes) - btrfs_pin_extent(root, ref->bytenr, - ref->num_bytes, 1); + mutex_lock(&head->mutex); mutex_unlock(&head->mutex); + btrfs_put_delayed_ref(&head->node); + spin_lock(&delayed_refs->lock); + continue; + } + spin_lock(&head->lock); + while ((node = rb_first(&head->ref_root)) != NULL) { + ref = rb_entry(node, struct btrfs_delayed_ref_node, + rb_node); + ref->in_tree = 0; + rb_erase(&ref->rb_node, &head->ref_root); + atomic_dec(&delayed_refs->num_entries); + btrfs_put_delayed_ref(ref); + cond_resched_lock(&head->lock); } - btrfs_put_delayed_ref(ref); + if (head->must_insert_reserved) + pin_bytes = true; + btrfs_free_delayed_extent_op(head->extent_op); + delayed_refs->num_heads--; + if (head->processing == 0) + delayed_refs->num_heads_ready--; + atomic_dec(&delayed_refs->num_entries); + head->node.in_tree = 0; + rb_erase(&head->href_node, &delayed_refs->href_root); + spin_unlock(&head->lock); + spin_unlock(&delayed_refs->lock); + mutex_unlock(&head->mutex); + if (pin_bytes) + btrfs_pin_extent(root, head->node.bytenr, + head->node.num_bytes, 1); + btrfs_put_delayed_ref(&head->node); cond_resched(); spin_lock(&delayed_refs->lock); } -- cgit v1.1 From 0a2b2a844af616addc87cac3cc18dcaba2a9d0fb Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 23 Jan 2014 10:54:11 -0500 Subject: Btrfs: throttle delayed refs better On one of our gluster clusters we noticed some pretty big lag spikes. This turned out to be because our transaction commit was taking like 3 minutes to complete. This is because we have like 30 gigs of metadata, so our global reserve would end up being the max which is like 512 mb. So our throttling code would allow a ridiculous amount of delayed refs to build up and then they'd all get run at transaction commit time, and for a cold mounted file system that could take up to 3 minutes to run. So fix the throttling to be based on both the size of the global reserve and how long it takes us to run delayed refs. This patch tracks the time it takes to run delayed refs and then only allows 1 seconds worth of outstanding delayed refs at a time. This way it will auto-tune itself from cold cache up to when everything is in memory and it no longer has to go to disk. This makes our transaction commits take much less time to run. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ed23127..f0e7bbe 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2185,7 +2185,7 @@ int open_ctree(struct super_block *sb, fs_info->free_chunk_space = 0; fs_info->tree_mod_log = RB_ROOT; fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; - + fs_info->avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64); /* readahead state */ INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); spin_lock_init(&fs_info->reada_lock); -- cgit v1.1 From 3818aea275423236db38a2d2d0a4951bc6da2e01 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 13 Jan 2014 13:36:06 +0800 Subject: btrfs: Add noinode_cache mount option Add noinode_cache mount option for btrfs. Since inode map cache involves all the btrfs_find_free_ino/return_ino things and if just trigger the mount_opt, an inode number get from inode map cache will not returned to inode map cache. To keep the find and return inode both in the same behavior, a new bit in mount_opt, CHANGE_INODE_CACHE, is introduced for this idea. CHANGE_INODE_CACHE is set/cleared in remounting, and the original INODE_MAP_CACHE is set/cleared according to CHANGE_INODE_CACHE after a success transaction. Since find/return inode is all done between btrfs_start_transaction and btrfs_commit_transaction, this will keep consistent behavior. Also noinode_cache mount option will not stop the caching_kthread. Cc: David Sterba Signed-off-by: Miao Xie Signed-off-by: Qu Wenruo Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f0e7bbe..4f142c9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2776,6 +2776,10 @@ retry_root_backup: btrfs_set_opt(fs_info->mount_opt, SSD); } + /* Set the real inode map cache flag */ + if (btrfs_test_opt(tree_root, CHANGE_INODE_CACHE)) + btrfs_set_opt(tree_root->fs_info->mount_opt, INODE_MAP_CACHE); + #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) { ret = btrfsic_mount(tree_root, fs_devices, -- cgit v1.1 From 1a4319cc3c495d5b6b8e41f4d4c73b950d54c2be Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 13 Jan 2014 19:53:53 +0800 Subject: Btrfs: fix extent state leak on transaction abortion When transaction is aborted, we fail to commit transaction, instead we do cleanup work. After that when we umount btrfs, we get to free fs roots' log trees respectively, but that happens after we unpin extents, so those extents pinned by freeing log trees will remain in memory and lead to the leak. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4f142c9..47c2bc2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2065,6 +2065,12 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info) for (i = 0; i < ret; i++) btrfs_drop_and_free_fs_root(fs_info, gang[i]); } + + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { + btrfs_free_log_root_tree(NULL, fs_info); + btrfs_destroy_pinned_extent(fs_info->tree_root, + fs_info->pinned_extents); + } } int open_ctree(struct super_block *sb, @@ -3455,10 +3461,8 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, if (btrfs_root_refs(&root->root_item) == 0) synchronize_srcu(&fs_info->subvol_srcu); - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) btrfs_free_log(NULL, root); - btrfs_free_log_root_tree(NULL, fs_info); - } __btrfs_remove_free_space_cache(root->free_ino_pinned); __btrfs_remove_free_space_cache(root->free_ino_ctl); @@ -3569,8 +3573,6 @@ int close_ctree(struct btrfs_root *root) if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) btrfs_error_commit_super(root); - btrfs_put_block_group_cache(fs_info); - kthread_stop(fs_info->transaction_kthread); kthread_stop(fs_info->cleaner_kthread); @@ -3588,6 +3590,8 @@ int close_ctree(struct btrfs_root *root) del_fs_roots(fs_info); + btrfs_put_block_group_cache(fs_info); + btrfs_free_block_groups(fs_info); btrfs_stop_all_workers(fs_info); -- cgit v1.1 From 2365dd3ca02bbb6d3412404482e1d85752549953 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 22 Jan 2014 11:15:51 +0800 Subject: btrfs: undo sysfs when open_ctree() fails reproducer: mkfs.btrfs -f /dev/sdb &&\ mount /dev/sdb /btrfs &&\ btrfs dev add -f /dev/sdc /btrfs &&\ umount /btrfs &&\ wipefs -a /dev/sdc &&\ mount -o degraded /dev/sdb /btrfs //above mount fails so try with RO mount -o degraded,ro /dev/sdb /btrfs ------ sysfs: cannot create duplicate filename '/fs/btrfs/3f48c79e-5ed0-4e87-b189-86e749e503f4' :: dump_stack+0x49/0x5e warn_slowpath_common+0x87/0xb0 warn_slowpath_fmt+0x41/0x50 strlcat+0x69/0x80 sysfs_warn_dup+0x87/0xa0 sysfs_add_one+0x40/0x50 create_dir+0x76/0xc0 sysfs_create_dir_ns+0x7a/0xc0 kobject_add_internal+0xad/0x220 kobject_add_varg+0x38/0x60 kobject_init_and_add+0x53/0x70 mutex_lock+0x11/0x40 __free_pages+0x25/0x30 free_pages+0x41/0x50 selinux_sb_copy_data+0x14e/0x1e0 mount_fs+0x3e/0x1a0 vfs_kern_mount+0x71/0x120 do_mount+0x3f7/0x980 SyS_mount+0x8b/0xe0 system_call_fastpath+0x16/0x1b ------ further 'modprobe -r btrfs' fails as well Signed-off-by: Anand Jain Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 47c2bc2..7619147 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2745,13 +2745,13 @@ retry_root_backup: ret = btrfs_init_space_info(fs_info); if (ret) { printk(KERN_ERR "BTRFS: Failed to initial space info: %d\n", ret); - goto fail_block_groups; + goto fail_sysfs; } ret = btrfs_read_block_groups(extent_root); if (ret) { printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret); - goto fail_block_groups; + goto fail_sysfs; } fs_info->num_tolerated_disk_barrier_failures = btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); @@ -2760,13 +2760,13 @@ retry_root_backup: !(sb->s_flags & MS_RDONLY)) { printk(KERN_WARNING "BTRFS: " "too many missing devices, writeable mount is not allowed\n"); - goto fail_block_groups; + goto fail_sysfs; } fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, "btrfs-cleaner"); if (IS_ERR(fs_info->cleaner_kthread)) - goto fail_block_groups; + goto fail_sysfs; fs_info->transaction_kthread = kthread_run(transaction_kthread, tree_root, @@ -2948,6 +2948,9 @@ fail_cleaner: */ filemap_write_and_wait(fs_info->btree_inode->i_mapping); +fail_sysfs: + btrfs_sysfs_remove_one(fs_info); + fail_block_groups: btrfs_put_block_group_cache(fs_info); btrfs_free_block_groups(fs_info); -- cgit v1.1