diff options
author | David S. Miller <davem@davemloft.net> | 2009-02-24 03:50:29 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-02-24 03:50:29 -0800 |
commit | e70049b9e74267dd47e1ffa62302073487afcb48 (patch) | |
tree | 2cd000c0751ef31c9044b020d63f278cdf4f332d /fs/jbd2/transaction.c | |
parent | d18921a0e319ab512f8186b1b1142c7b8634c779 (diff) | |
parent | f7e603ad8f78cd3b59e33fa72707da0cbabdf699 (diff) | |
download | op-kernel-dev-e70049b9e74267dd47e1ffa62302073487afcb48.zip op-kernel-dev-e70049b9e74267dd47e1ffa62302073487afcb48.tar.gz |
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Diffstat (limited to 'fs/jbd2/transaction.c')
-rw-r--r-- | fs/jbd2/transaction.c | 42 |
1 files changed, 31 insertions, 11 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 46b4e34..28ce21d 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2129,26 +2129,46 @@ done: } /* - * This function must be called when inode is journaled in ordered mode - * before truncation happens. It starts writeout of truncated part in - * case it is in the committing transaction so that we stand to ordered - * mode consistency guarantees. + * File truncate and transaction commit interact with each other in a + * non-trivial way. If a transaction writing data block A is + * committing, we cannot discard the data by truncate until we have + * written them. Otherwise if we crashed after the transaction with + * write has committed but before the transaction with truncate has + * committed, we could see stale data in block A. This function is a + * helper to solve this problem. It starts writeout of the truncated + * part in case it is in the committing transaction. + * + * Filesystem code must call this function when inode is journaled in + * ordered mode before truncation happens and after the inode has been + * placed on orphan list with the new inode size. The second condition + * avoids the race that someone writes new data and we start + * committing the transaction after this function has been called but + * before a transaction for truncate is started (and furthermore it + * allows us to optimize the case where the addition to orphan list + * happens in the same transaction as write --- we don't have to write + * any data in such case). */ -int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, +int jbd2_journal_begin_ordered_truncate(journal_t *journal, + struct jbd2_inode *jinode, loff_t new_size) { - journal_t *journal; - transaction_t *commit_trans; + transaction_t *inode_trans, *commit_trans; int ret = 0; - if (!inode->i_transaction && !inode->i_next_transaction) + /* This is a quick check to avoid locking if not necessary */ + if (!jinode->i_transaction) goto out; - journal = inode->i_transaction->t_journal; + /* Locks are here just to force reading of recent values, it is + * enough that the transaction was not committing before we started + * a transaction adding the inode to orphan list */ spin_lock(&journal->j_state_lock); commit_trans = journal->j_committing_transaction; spin_unlock(&journal->j_state_lock); - if (inode->i_transaction == commit_trans) { - ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping, + spin_lock(&journal->j_list_lock); + inode_trans = jinode->i_transaction; + spin_unlock(&journal->j_list_lock); + if (inode_trans == commit_trans) { + ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping, new_size, LLONG_MAX); if (ret) jbd2_journal_abort(journal, ret); |