diff options
author | Josef Bacik <jbacik@fusionio.com> | 2012-11-09 10:53:21 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@fusionio.com> | 2012-12-16 20:46:27 -0500 |
commit | 6c760c072403f446ff829ec9e89568943a3c2ef2 (patch) | |
tree | c751a0aeb81809401df1fd87a36376034ef53a87 /fs | |
parent | 5124e00ec5b0be56155a11aec416fcc5125339f1 (diff) | |
download | op-kernel-dev-6c760c072403f446ff829ec9e89568943a3c2ef2.zip op-kernel-dev-6c760c072403f446ff829ec9e89568943a3c2ef2.tar.gz |
Btrfs: do not call file_update_time in aio_write
This starts a transaction and dirties the inode everytime we call it, which
is super expensive if you have a write heavy workload. We will be updating
the inode when the IO completes and we reserve the space for the inode
update when we reserve space for the write, so there is no chance of loss of
information or enospc issues. Thanks,
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/file.c | 35 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 42 |
2 files changed, 48 insertions, 29 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c56088e..20452c1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1464,6 +1464,24 @@ out: return written ? written : err; } +static void update_time_for_write(struct inode *inode) +{ + struct timespec now; + + if (IS_NOCMTIME(inode)) + return; + + now = current_fs_time(inode->i_sb); + if (!timespec_equal(&inode->i_mtime, &now)) + inode->i_mtime = now; + + if (!timespec_equal(&inode->i_ctime, &now)) + inode->i_ctime = now; + + if (IS_I_VERSION(inode)) + inode_inc_iversion(inode); +} + static ssize_t btrfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) @@ -1519,11 +1537,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, goto out; } - err = file_update_time(file); - if (err) { - mutex_unlock(&inode->i_mutex); - goto out; - } + /* + * We reserve space for updating the inode when we reserve space for the + * extent we are going to write, so we will enospc out there. We don't + * need to start yet another transaction to update the inode as we will + * update the inode when we finish writing whatever data we write. + */ + update_time_for_write(inode); start_pos = round_down(pos, root->sectorsize); if (start_pos > i_size_read(inode)) { @@ -1563,8 +1583,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, * this will either be one more than the running transaction * or the generation used for the next transaction if there isn't * one running right now. + * + * We also have to set last_sub_trans to the current log transid, + * otherwise subsequent syncs to a file that's been synced in this + * transaction will appear to have already occured. */ BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; + BTRFS_I(inode)->last_sub_trans = root->log_transid; if (num_written > 0 || num_written == -EIOCBQUEUED) { err = generic_write_sync(file, pos, num_written); if (err < 0 && num_written > 0) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 355a297..1673dbd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1922,22 +1922,20 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ - ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); - if (!ret) { - if (nolock) - trans = btrfs_join_transaction_nolock(root); - else - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - trans = NULL; - goto out; - } - trans->block_rsv = &root->fs_info->delalloc_block_rsv; - ret = btrfs_update_inode_fallback(trans, root, inode); - if (ret) /* -ENOMEM or corruption */ - btrfs_abort_transaction(trans, root, ret); + btrfs_ordered_update_i_size(inode, 0, ordered_extent); + if (nolock) + trans = btrfs_join_transaction_nolock(root); + else + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + trans = NULL; + goto out; } + trans->block_rsv = &root->fs_info->delalloc_block_rsv; + ret = btrfs_update_inode_fallback(trans, root, inode); + if (ret) /* -ENOMEM or corruption */ + btrfs_abort_transaction(trans, root, ret); goto out; } @@ -1986,15 +1984,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) add_pending_csums(trans, inode, ordered_extent->file_offset, &ordered_extent->list); - ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); - if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { - ret = btrfs_update_inode_fallback(trans, root, inode); - if (ret) { /* -ENOMEM or corruption */ - btrfs_abort_transaction(trans, root, ret); - goto out_unlock; - } - } else { - btrfs_set_inode_last_trans(trans, inode); + btrfs_ordered_update_i_size(inode, 0, ordered_extent); + ret = btrfs_update_inode_fallback(trans, root, inode); + if (ret) { /* -ENOMEM or corruption */ + btrfs_abort_transaction(trans, root, ret); + goto out_unlock; } ret = 0; out_unlock: |