From 229309caebe4508d650bb6d8f7d51f2b116f5bbd Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 8 May 2011 19:09:53 -0400 Subject: jbd2: Fix forever sleeping process in do_get_write_access() In do_get_write_access() we wait on BH_Unshadow bit for buffer to get from shadow state. The waking code in journal_commit_transaction() has a bug because it does not issue a memory barrier after the buffer is moved from the shadow state and before wake_up_bit() is called. Thus a waitqueue check can happen before the buffer is actually moved from the shadow state and waiting process may never be woken. Fix the problem by issuing proper barrier. Reported-by: Tao Ma Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/jbd2/commit.c') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6e28000..78c2992 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -760,8 +760,13 @@ wait_for_iobuf: required. */ JBUFFER_TRACE(jh, "file as BJ_Forget"); jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); - /* Wake up any transactions which were waiting for this - IO to complete */ + /* + * Wake up any transactions which were waiting for this IO to + * complete. The barrier must be here so that changes by + * jbd2_journal_file_buffer() take effect before wake_up_bit() + * does the waitqueue check. + */ + smp_mb(); wake_up_bit(&bh->b_state, BH_Unshadow); JBUFFER_TRACE(jh, "brelse shadowed buffer"); __brelse(bh); -- cgit v1.1 From 81be12c8179c1c397d3f179cdd9b3f7146cf47f1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 24 May 2011 11:52:40 -0400 Subject: jbd2: fix sending of data flush on journal commit In data=ordered mode, it's theoretically possible (however rare) that an inode is filed to transaction's t_inode_list and a flusher thread writes all the data and inode is reclaimed before the transaction starts to commit. In such a case, we could erroneously omit sending a flush to file system device when it is different from the journal device (because data can still be in disk cache only). Fix the problem by setting a flag in a transaction when some inode is added to it and then send disk flush in the commit code when the flag is set. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/jbd2/commit.c') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 78c2992..2d5095e 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -219,7 +219,6 @@ static int journal_submit_data_buffers(journal_t *journal, ret = err; spin_lock(&journal->j_list_lock); J_ASSERT(jinode->i_transaction == commit_transaction); - commit_transaction->t_flushed_data_blocks = 1; clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); smp_mb__after_clear_bit(); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); @@ -683,7 +682,7 @@ start_journal_io: * then we must flush the file system device before we issue * the commit record */ - if (commit_transaction->t_flushed_data_blocks && + if (commit_transaction->t_need_data_flush && (journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); -- cgit v1.1 From bbd2be36910728f485ac78ea36e0f4f5a38e691e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 24 May 2011 11:59:18 -0400 Subject: jbd2: Add function jbd2_trans_will_send_data_barrier() Provide a function which returns whether a transaction with given tid will send a flush to the filesystem device. The function will be used by ext4 to detect whether fsync needs to send a separate flush or not. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs/jbd2/commit.c') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 2d5095e..5b506e5 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -677,6 +677,10 @@ start_journal_io: err = 0; } + write_lock(&journal->j_state_lock); + J_ASSERT(commit_transaction->t_state == T_COMMIT); + commit_transaction->t_state = T_COMMIT_DFLUSH; + write_unlock(&journal->j_state_lock); /* * If the journal is not located on the file system device, * then we must flush the file system device before we issue @@ -804,6 +808,10 @@ wait_for_iobuf: jbd2_journal_abort(journal, err); jbd_debug(3, "JBD: commit phase 5\n"); + write_lock(&journal->j_state_lock); + J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH); + commit_transaction->t_state = T_COMMIT_JFLUSH; + write_unlock(&journal->j_state_lock); if (!JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { @@ -959,7 +967,7 @@ restart_loop: jbd_debug(3, "JBD: commit phase 7\n"); - J_ASSERT(commit_transaction->t_state == T_COMMIT); + J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH); commit_transaction->t_start = jiffies; stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging, -- cgit v1.1