From d313d7a31a752c88f7288692bd98e66d0789779b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 20 Apr 2009 15:50:09 -0400 Subject: Btrfs: add a priority queue to the async thread helpers Btrfs is using WRITE_SYNC_PLUG to send down synchronous IOs with a higher priority. But, the checksumming helper threads prevent it from being fully effective. There are two problems. First, a big queue of pending checksumming will delay the synchronous IO behind other lower priority writes. Second, the checksumming uses an ordered async work queue. The ordering makes sure that IOs are sent to the block layer in the same order they are sent to the checksumming threads. Usually this gives us less seeky IO. But, when we start mixing IO priorities, the lower priority IO can delay the higher priority IO. This patch solves both problems by adding a high priority list to the async helper threads, and a new btrfs_set_work_high_prio(), which is used to make put a new async work item onto the higher priority list. The ordering is still done on high priority IO, but all of the high priority bios are ordered separately from the low priority bios. This ordering is purely an IO optimization, it is not involved in data or metadata integrity. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9c9fb46..e21c006 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1131,7 +1131,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (will_write) { btrfs_fdatawrite_range(inode->i_mapping, pos, pos + write_bytes - 1, - WB_SYNC_NONE); + WB_SYNC_ALL); } else { balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); -- cgit v1.1 From 546888da82082555a56528730a83f0afd12f33bf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 21 Apr 2009 11:53:38 -0400 Subject: Btrfs: fix btrfs fallocate oops and deadlock Btrfs fallocate was incorrectly starting a transaction with a lock held on the extent_io tree for the file, which could deadlock. Strictly speaking it was using join_transaction which would be safe, but it is better to move the transaction outside of the lock. When preallocated extents are overwritten, btrfs_mark_buffer_dirty was being called on an unlocked buffer. This was triggering an assertion and oops because the lock is supposed to be held. The bug was calling btrfs_mark_buffer_dirty on a leaf after btrfs_del_item had been run. btrfs_del_item takes care of dirtying things, so the solution is a to skip the btrfs_mark_buffer_dirty call in this case. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e21c006..482f8db 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -830,7 +830,7 @@ again: ret = btrfs_del_items(trans, root, path, del_slot, del_nr); BUG_ON(ret); - goto done; + goto release; } else if (split == start) { if (locked_end < extent_end) { ret = try_lock_extent(&BTRFS_I(inode)->io_tree, @@ -926,6 +926,8 @@ again: } done: btrfs_mark_buffer_dirty(leaf); + +release: btrfs_release_path(root, path); if (split_end && split == start) { split = end; -- cgit v1.1 From e980b50cda1610f1c17978d9b7fd311a9dd93877 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 24 Apr 2009 14:39:24 -0400 Subject: Btrfs: fix fallocate deadlock on inode extent lock The btrfs fallocate call takes an extent lock on the entire range being fallocated, and then runs through insert_reserved_extent on each extent as they are allocated. The problem with this is that btrfs_drop_extents may decide to try and take the same extent lock fallocate was already holding. The solution used here is to push down knowledge of the range that is already locked going into btrfs_drop_extents. It turns out that at least one other caller had the same bug. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 482f8db..da3ed96 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -363,15 +363,16 @@ out: */ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 inline_limit, u64 *hint_byte) + u64 start, u64 end, u64 locked_end, + u64 inline_limit, u64 *hint_byte) { u64 extent_end = 0; - u64 locked_end = end; u64 search_start = start; u64 leaf_start; u64 ram_bytes = 0; u64 orig_parent = 0; u64 disk_bytenr = 0; + u64 orig_locked_end = locked_end; u8 compression; u8 encryption; u16 other_encoding = 0; @@ -684,9 +685,9 @@ next_slot: } out: btrfs_free_path(path); - if (locked_end > end) { - unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, - GFP_NOFS); + if (locked_end > orig_locked_end) { + unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end, + locked_end - 1, GFP_NOFS); } btrfs_check_file(root, inode); return ret; -- cgit v1.1 From b7967db75a38df4891b22efe1b0969b9357eb946 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 27 Apr 2009 07:29:04 -0400 Subject: Btrfs: remove #if 0 code Btrfs had some old code sitting around under #if 0, this drops it. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 78 --------------------------------------------------------- 1 file changed, 78 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index da3ed96..1d51dc3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -272,83 +272,6 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, return 0; } -int btrfs_check_file(struct btrfs_root *root, struct inode *inode) -{ - return 0; -#if 0 - struct btrfs_path *path; - struct btrfs_key found_key; - struct extent_buffer *leaf; - struct btrfs_file_extent_item *extent; - u64 last_offset = 0; - int nritems; - int slot; - int found_type; - int ret; - int err = 0; - u64 extent_end = 0; - - path = btrfs_alloc_path(); - ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino, - last_offset, 0); - while (1) { - nritems = btrfs_header_nritems(path->nodes[0]); - if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret) - goto out; - nritems = btrfs_header_nritems(path->nodes[0]); - } - slot = path->slots[0]; - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (found_key.objectid != inode->i_ino) - break; - if (found_key.type != BTRFS_EXTENT_DATA_KEY) - goto out; - - if (found_key.offset < last_offset) { - WARN_ON(1); - btrfs_print_leaf(root, leaf); - printk(KERN_ERR "inode %lu found offset %llu " - "expected %llu\n", inode->i_ino, - (unsigned long long)found_key.offset, - (unsigned long long)last_offset); - err = 1; - goto out; - } - extent = btrfs_item_ptr(leaf, slot, - struct btrfs_file_extent_item); - found_type = btrfs_file_extent_type(leaf, extent); - if (found_type == BTRFS_FILE_EXTENT_REG) { - extent_end = found_key.offset + - btrfs_file_extent_num_bytes(leaf, extent); - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - struct btrfs_item *item; - item = btrfs_item_nr(leaf, slot); - extent_end = found_key.offset + - btrfs_file_extent_inline_len(leaf, extent); - extent_end = (extent_end + root->sectorsize - 1) & - ~((u64)root->sectorsize - 1); - } - last_offset = extent_end; - path->slots[0]++; - } - if (0 && last_offset < inode->i_size) { - WARN_ON(1); - btrfs_print_leaf(root, leaf); - printk(KERN_ERR "inode %lu found offset %llu size %llu\n", - inode->i_ino, (unsigned long long)last_offset, - (unsigned long long)inode->i_size); - err = 1; - - } -out: - btrfs_free_path(path); - return err; -#endif -} - /* * this is very complex, but the basic idea is to drop all extents * in the range start - end. hint_block is filled in with a block number @@ -689,7 +612,6 @@ out: unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end, locked_end - 1, GFP_NOFS); } - btrfs_check_file(root, inode); return ret; } -- cgit v1.1