diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-08 06:36:39 +0900 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-08 06:36:39 +0900 |
commit | 6432f2128414edbea5fd4f6c4fa4c28d0e1c6151 (patch) | |
tree | d3c63c5f2f043ce52d98d8dfd3c9c0a7bc76ed95 /fs/jbd2 | |
parent | 1b033447bf847ba49c3816c564c9191c97456b36 (diff) | |
parent | c278531d39f3158bfee93dc67da0b77e09776de2 (diff) | |
download | op-kernel-dev-6432f2128414edbea5fd4f6c4fa4c28d0e1c6151.zip op-kernel-dev-6432f2128414edbea5fd4f6c4fa4c28d0e1c6151.tar.gz |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o:
"The big new feature added this time is supporting online resizing
using the meta_bg feature. This allows us to resize file systems
which are greater than 16TB. In addition, the speed of online
resizing has been improved in general.
We also fix a number of races, some of which could lead to deadlocks,
in ext4's Asynchronous I/O and online defrag support, thanks to good
work by Dmitry Monakhov.
There are also a large number of more minor bug fixes and cleanups
from a number of other ext4 contributors, quite of few of which have
submitted fixes for the first time."
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (69 commits)
ext4: fix ext4_flush_completed_IO wait semantics
ext4: fix mtime update in nodelalloc mode
ext4: fix ext_remove_space for punch_hole case
ext4: punch_hole should wait for DIO writers
ext4: serialize truncate with owerwrite DIO workers
ext4: endless truncate due to nonlocked dio readers
ext4: serialize unlocked dio reads with truncate
ext4: serialize dio nonlocked reads with defrag workers
ext4: completed_io locking cleanup
ext4: fix unwritten counter leakage
ext4: give i_aiodio_unwritten a more appropriate name
ext4: ext4_inode_info diet
ext4: convert to use leXX_add_cpu()
ext4: ext4_bread usage audit
fs: reserve fallocate flag codepoint
ext4: remove redundant offset check in mext_check_arguments()
ext4: don't clear orphan list on ro mount with errors
jbd2: fix assertion failure in commit code due to lacking transaction credits
ext4: release donor reference when EXT4_IOC_MOVE_EXT ioctl fails
ext4: enable FITRIM ioctl on bigalloc file system
...
Diffstat (limited to 'fs/jbd2')
-rw-r--r-- | fs/jbd2/commit.c | 40 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 5 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 7 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 65 |
4 files changed, 84 insertions, 33 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index af5280f..3091d42 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -1014,17 +1014,35 @@ restart_loop: * there's no point in keeping a checkpoint record for * it. */ - /* A buffer which has been freed while still being - * journaled by a previous transaction may end up still - * being dirty here, but we want to avoid writing back - * that buffer in the future after the "add to orphan" - * operation been committed, That's not only a performance - * gain, it also stops aliasing problems if the buffer is - * left behind for writeback and gets reallocated for another - * use in a different page. */ - if (buffer_freed(bh) && !jh->b_next_transaction) { - clear_buffer_freed(bh); - clear_buffer_jbddirty(bh); + /* + * A buffer which has been freed while still being journaled by + * a previous transaction. + */ + if (buffer_freed(bh)) { + /* + * If the running transaction is the one containing + * "add to orphan" operation (b_next_transaction != + * NULL), we have to wait for that transaction to + * commit before we can really get rid of the buffer. + * So just clear b_modified to not confuse transaction + * credit accounting and refile the buffer to + * BJ_Forget of the running transaction. If the just + * committed transaction contains "add to orphan" + * operation, we can completely invalidate the buffer + * now. We are rather through in that since the + * buffer may be still accessible when blocksize < + * pagesize and it is attached to the last partial + * page. + */ + jh->b_modified = 0; + if (!jh->b_next_transaction) { + clear_buffer_freed(bh); + clear_buffer_jbddirty(bh); + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_req(bh); + bh->b_bdev = NULL; + } } if (buffer_jbddirty(bh)) { diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e149b99..484b8d1 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1354,6 +1354,11 @@ static void jbd2_mark_journal_empty(journal_t *journal) BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); read_lock(&journal->j_state_lock); + /* Is it already empty? */ + if (sb->s_start == 0) { + read_unlock(&journal->j_state_lock); + return; + } jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", journal->j_tail_sequence); diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 0131e43..626846b 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -289,8 +289,11 @@ int jbd2_journal_recover(journal_t *journal) if (!err) err = err2; /* Make sure all replayed data is on permanent storage */ - if (journal->j_flags & JBD2_BARRIER) - blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); + if (journal->j_flags & JBD2_BARRIER) { + err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); + if (!err) + err = err2; + } return err; } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index fb1ab953..a74ba46 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1841,15 +1841,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) * We're outside-transaction here. Either or both of j_running_transaction * and j_committing_transaction may be NULL. */ -static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) +static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, + int partial_page) { transaction_t *transaction; struct journal_head *jh; int may_free = 1; - int ret; BUFFER_TRACE(bh, "entry"); +retry: /* * It is safe to proceed here without the j_list_lock because the * buffers cannot be stolen by try_to_free_buffers as long as we are @@ -1878,10 +1879,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * clear the buffer dirty bit at latest at the moment when the * transaction marking the buffer as freed in the filesystem * structures is committed because from that moment on the - * buffer can be reallocated and used by a different page. + * block can be reallocated and used by a different page. * Since the block hasn't been freed yet but the inode has * already been added to orphan list, it is safe for us to add * the buffer to BJ_Forget list of the newest transaction. + * + * Also we have to clear buffer_mapped flag of a truncated buffer + * because the buffer_head may be attached to the page straddling + * i_size (can happen only when blocksize < pagesize) and thus the + * buffer_head can be reused when the file is extended again. So we end + * up keeping around invalidated buffers attached to transactions' + * BJ_Forget list just to stop checkpointing code from cleaning up + * the transaction this buffer was modified in. */ transaction = jh->b_transaction; if (transaction == NULL) { @@ -1908,13 +1917,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * committed, the buffer won't be needed any * longer. */ JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); - ret = __dispose_buffer(jh, + may_free = __dispose_buffer(jh, journal->j_running_transaction); - jbd2_journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - write_unlock(&journal->j_state_lock); - return ret; + goto zap_buffer; } else { /* There is no currently-running transaction. So the * orphan record which we wrote for this file must have @@ -1922,13 +1927,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * the committing transaction, if it exists. */ if (journal->j_committing_transaction) { JBUFFER_TRACE(jh, "give to committing trans"); - ret = __dispose_buffer(jh, + may_free = __dispose_buffer(jh, journal->j_committing_transaction); - jbd2_journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - write_unlock(&journal->j_state_lock); - return ret; + goto zap_buffer; } else { /* The orphan record's transaction has * committed. We can cleanse this buffer */ @@ -1940,10 +1941,24 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) JBUFFER_TRACE(jh, "on committing transaction"); /* * The buffer is committing, we simply cannot touch - * it. So we just set j_next_transaction to the - * running transaction (if there is one) and mark - * buffer as freed so that commit code knows it should - * clear dirty bits when it is done with the buffer. + * it. If the page is straddling i_size we have to wait + * for commit and try again. + */ + if (partial_page) { + tid_t tid = journal->j_committing_transaction->t_tid; + + jbd2_journal_put_journal_head(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + write_unlock(&journal->j_state_lock); + jbd2_log_wait_commit(journal, tid); + goto retry; + } + /* + * OK, buffer won't be reachable after truncate. We just set + * j_next_transaction to the running transaction (if there is + * one) and mark buffer as freed so that commit code knows it + * should clear dirty bits when it is done with the buffer. */ set_buffer_freed(bh); if (journal->j_running_transaction && buffer_jbddirty(bh)) @@ -1966,6 +1981,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } zap_buffer: + /* + * This is tricky. Although the buffer is truncated, it may be reused + * if blocksize < pagesize and it is attached to the page straddling + * EOF. Since the buffer might have been added to BJ_Forget list of the + * running transaction, journal_get_write_access() won't clear + * b_modified and credit accounting gets confused. So clear b_modified + * here. + */ + jh->b_modified = 0; jbd2_journal_put_journal_head(jh); zap_buffer_no_jh: spin_unlock(&journal->j_list_lock); @@ -2017,7 +2041,8 @@ void jbd2_journal_invalidatepage(journal_t *journal, if (offset <= curr_off) { /* This block is wholly outside the truncation point */ lock_buffer(bh); - may_free &= journal_unmap_buffer(journal, bh); + may_free &= journal_unmap_buffer(journal, bh, + offset > 0); unlock_buffer(bh); } curr_off = next_off; |