diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-07-17 12:54:05 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:04 -0400 |
commit | dbe674a99c8af088faa4c95eddaeb271a3140ab6 (patch) | |
tree | c6b5a08e93d0b09df873fd5c304156622d03368a /fs/btrfs | |
parent | 247e743cbe6e655768c3679f84821e03c1577902 (diff) | |
download | op-kernel-dev-dbe674a99c8af088faa4c95eddaeb271a3140ab6.zip op-kernel-dev-dbe674a99c8af088faa4c95eddaeb271a3140ab6.tar.gz |
Btrfs: Update on disk i_size only after pending ordered extents are done
This changes the ordered data code to update i_size after the extent
is on disk. An on disk i_size is maintained in the in-memory btrfs inode
structures, and this is updated as extents finish.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 9 | ||||
-rw-r--r-- | fs/btrfs/file.c | 2 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 28 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 89 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.h | 2 |
5 files changed, 119 insertions, 11 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 8d03687..81c0444 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -40,11 +40,20 @@ struct btrfs_inode { */ u64 last_trans; u64 delalloc_bytes; + u64 disk_i_size; u32 flags; }; + static inline struct btrfs_inode *BTRFS_I(struct inode *inode) { return container_of(inode, struct btrfs_inode, vfs_inode); } +static inline void btrfs_i_size_write(struct inode *inode, u64 size) +{ + inode->i_size = size; + BTRFS_I(inode)->disk_i_size = size; +} + + #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2092863..3e4e5c2 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -338,7 +338,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, btrfs_update_inode(trans, root, inode); } failed: - err = btrfs_end_transaction(trans, root); + err = btrfs_end_transaction_throttle(trans, root); out_unlock: unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); return err; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 47a008c..baf4601 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -542,6 +542,7 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, add_pending_csums(trans, inode, ordered_extent->file_offset, &ordered_extent->list); + btrfs_ordered_update_i_size(inode, ordered_extent); btrfs_remove_ordered_extent(inode, ordered_extent); /* once for us */ btrfs_put_ordered_extent(ordered_extent); @@ -792,7 +793,7 @@ void btrfs_read_locked_inode(struct inode *inode) inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); inode->i_uid = btrfs_inode_uid(leaf, inode_item); inode->i_gid = btrfs_inode_gid(leaf, inode_item); - inode->i_size = btrfs_inode_size(leaf, inode_item); + btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); tspec = btrfs_inode_atime(inode_item); inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec); @@ -860,7 +861,7 @@ static void fill_inode_item(struct extent_buffer *leaf, { btrfs_set_inode_uid(leaf, item, inode->i_uid); btrfs_set_inode_gid(leaf, item, inode->i_gid); - btrfs_set_inode_size(leaf, item, inode->i_size); + btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); btrfs_set_inode_mode(leaf, item, inode->i_mode); btrfs_set_inode_nlink(leaf, item, inode->i_nlink); @@ -982,7 +983,7 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, err: btrfs_free_path(path); if (!ret) { - dir->i_size -= name_len * 2; + btrfs_i_size_write(dir, dir->i_size - name_len * 2); dir->i_mtime = dir->i_ctime = CURRENT_TIME; btrfs_update_inode(trans, root, dir); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) @@ -1044,7 +1045,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) /* now the directory is empty */ err = btrfs_unlink_trans(trans, root, dir, dentry); if (!err) { - inode->i_size = 0; + btrfs_i_size_write(inode, 0); } nr = trans->blocks_used; @@ -1089,7 +1090,6 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int extent_type = -1; u64 mask = root->sectorsize - 1; - btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1); path = btrfs_alloc_path(); path->reada = -1; @@ -1427,7 +1427,7 @@ void btrfs_delete_inode(struct inode *inode) goto no_delete; } - inode->i_size = 0; + btrfs_i_size_write(inode, 0); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -1561,6 +1561,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) inode->i_ino = args->ino; BTRFS_I(inode)->root = args->root; BTRFS_I(inode)->delalloc_bytes = 0; + BTRFS_I(inode)->disk_i_size = 0; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); @@ -1869,6 +1870,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; + BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->root = root; if (mode & S_IFDIR) @@ -1964,7 +1966,8 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, dentry->d_parent->d_inode->i_ino); } parent_inode = dentry->d_parent->d_inode; - parent_inode->i_size += dentry->d_name.len * 2; + btrfs_i_size_write(parent_inode, parent_inode->i_size + + dentry->d_name.len * 2); parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, dentry->d_parent->d_inode); @@ -2092,6 +2095,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; + BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; @@ -2199,7 +2203,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode->i_fop = &btrfs_dir_file_operations; btrfs_set_trans_block_group(trans, inode); - inode->i_size = 0; + btrfs_i_size_write(inode, 0); err = btrfs_update_inode(trans, root, inode); if (err) goto out_fail; @@ -2756,6 +2760,7 @@ static void btrfs_truncate(struct inode *inode) int ret; struct btrfs_trans_handle *trans; unsigned long nr; + u64 mask = root->sectorsize - 1; if (!S_ISREG(inode->i_mode)) return; @@ -2766,6 +2771,8 @@ static void btrfs_truncate(struct inode *inode) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); + btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); + btrfs_i_size_write(inode, inode->i_size); /* FIXME, add redo link to tree so we don't leak on crash */ ret = btrfs_truncate_in_trans(trans, root, inode, @@ -2821,7 +2828,7 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid, new_dirid); inode->i_nlink = 1; - inode->i_size = 0; + btrfs_i_size_write(inode, 0); return btrfs_update_inode(trans, new_root, inode); } @@ -3069,6 +3076,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; + BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; @@ -3103,7 +3111,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; inode->i_mapping->backing_dev_info = &root->fs_info->bdi; - inode->i_size = name_len - 1; + btrfs_i_size_write(inode, name_len - 1); err = btrfs_update_inode(trans, root, inode); if (err) drop_inode = 1; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 6513270..d86a953 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -406,3 +406,92 @@ out: mutex_unlock(&tree->mutex); return entry; } + +int btrfs_ordered_update_i_size(struct inode *inode, + struct btrfs_ordered_extent *ordered) +{ + struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + u64 disk_i_size; + u64 new_i_size; + u64 i_size_test; + struct rb_node *node; + struct btrfs_ordered_extent *test; + + mutex_lock(&tree->mutex); + disk_i_size = BTRFS_I(inode)->disk_i_size; + + /* + * if the disk i_size is already at the inode->i_size, or + * this ordered extent is inside the disk i_size, we're done + */ + if (disk_i_size >= inode->i_size || + ordered->file_offset + ordered->len <= disk_i_size) { + goto out; + } + + /* + * we can't update the disk_isize if there are delalloc bytes + * between disk_i_size and this ordered extent + */ + if (test_range_bit(io_tree, disk_i_size, + ordered->file_offset + ordered->len - 1, + EXTENT_DELALLOC, 0)) { + goto out; + } + /* + * walk backward from this ordered extent to disk_i_size. + * if we find an ordered extent then we can't update disk i_size + * yet + */ + while(1) { + node = rb_prev(&ordered->rb_node); + if (!node) + break; + test = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (test->file_offset + test->len <= disk_i_size) + break; + if (test->file_offset >= inode->i_size) + break; + if (test->file_offset >= disk_i_size) + goto out; + } + new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode)); + + /* + * at this point, we know we can safely update i_size to at least + * the offset from this ordered extent. But, we need to + * walk forward and see if ios from higher up in the file have + * finished. + */ + node = rb_next(&ordered->rb_node); + i_size_test = 0; + if (node) { + /* + * do we have an area where IO might have finished + * between our ordered extent and the next one. + */ + test = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (test->file_offset > entry_end(ordered)) { + i_size_test = test->file_offset - 1; + } + } else { + i_size_test = i_size_read(inode); + } + + /* + * i_size_test is the end of a region after this ordered + * extent where there are no ordered extents. As long as there + * are no delalloc bytes in this area, it is safe to update + * disk_i_size to the end of the region. + */ + if (i_size_test > entry_end(ordered) && + !test_range_bit(io_tree, entry_end(ordered), i_size_test, + EXTENT_DELALLOC, 0)) { + new_i_size = min_t(u64, i_size_test, i_size_read(inode)); + } + BTRFS_I(inode)->disk_i_size = new_i_size; +out: + mutex_unlock(&tree->mutex); + return 0; +} diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 33292c5..40e9126 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -89,4 +89,6 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); int btrfs_add_ordered_pending(struct inode *inode, struct btrfs_ordered_extent *ordered, u64 start, u64 len); +int btrfs_ordered_update_i_size(struct inode *inode, + struct btrfs_ordered_extent *ordered); #endif |