summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian King <brking@linux.vnet.ibm.com>2010-10-27 21:25:12 -0400
committerTheodore Ts'o <tytso@mit.edu>2010-10-27 21:25:12 -0400
commit39e3ac2599a5f9aba499b5f8af809108e70a6163 (patch)
tree16c9d790029c36f217b1689b42869c8739e8ac5e
parent58590b06d79f7ce5ab64ff3b6d537180fa50dc84 (diff)
downloadop-kernel-dev-39e3ac2599a5f9aba499b5f8af809108e70a6163.zip
op-kernel-dev-39e3ac2599a5f9aba499b5f8af809108e70a6163.tar.gz
jbd2: Fix I/O hang in jbd2_journal_release_jbd_inode
This fixes a hang seen in jbd2_journal_release_jbd_inode on a lot of Power 6 systems running with ext4. When we get in the hung state, all I/O to the disk in question gets blocked where we stay indefinitely. Looking at the task list, I can see we are stuck in jbd2_journal_release_jbd_inode waiting on a wake up. I added some debug code to detect this scenario and dump additional data if we were stuck in jbd2_journal_release_jbd_inode for longer than 30 minutes. When it hit, I was able to see that i_flags was 0, suggesting we missed the wake up. This patch changes i_flags to be an unsigned long, uses bit operators to access it, and adds barriers around the accesses. Prior to applying this patch, we were regularly hitting this hang on numerous systems in our test environment. After applying the patch, the hangs no longer occur. Signed-off-by: Brian King <brking@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/jbd2/commit.c12
-rw-r--r--fs/jbd2/journal.c4
-rw-r--r--include/linux/jbd2.h2
3 files changed, 12 insertions, 6 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 7c068c1..6494c81 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -26,7 +26,9 @@
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <linux/bitops.h>
#include <trace/events/jbd2.h>
+#include <asm/system.h>
/*
* Default IO end handler for temporary BJ_IO buffer_heads.
@@ -236,7 +238,7 @@ static int journal_submit_data_buffers(journal_t *journal,
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
mapping = jinode->i_vfs_inode->i_mapping;
- jinode->i_flags |= JI_COMMIT_RUNNING;
+ set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
spin_unlock(&journal->j_list_lock);
/*
* submit the inode data buffers. We use writepage
@@ -251,7 +253,8 @@ static int journal_submit_data_buffers(journal_t *journal,
spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction);
commit_transaction->t_flushed_data_blocks = 1;
- jinode->i_flags &= ~JI_COMMIT_RUNNING;
+ clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+ smp_mb__after_clear_bit();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
spin_unlock(&journal->j_list_lock);
@@ -272,7 +275,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
/* For locking, see the comment in journal_submit_data_buffers() */
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
- jinode->i_flags |= JI_COMMIT_RUNNING;
+ set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
spin_unlock(&journal->j_list_lock);
err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
if (err) {
@@ -288,7 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
ret = err;
}
spin_lock(&journal->j_list_lock);
- jinode->i_flags &= ~JI_COMMIT_RUNNING;
+ clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+ smp_mb__after_clear_bit();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0e8014e..75e1b5a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -42,12 +42,14 @@
#include <linux/log2.h>
#include <linux/vmalloc.h>
#include <linux/backing-dev.h>
+#include <linux/bitops.h>
#define CREATE_TRACE_POINTS
#include <trace/events/jbd2.h>
#include <asm/uaccess.h>
#include <asm/page.h>
+#include <asm/system.h>
EXPORT_SYMBOL(jbd2_journal_extend);
EXPORT_SYMBOL(jbd2_journal_stop);
@@ -2206,7 +2208,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal,
restart:
spin_lock(&journal->j_list_lock);
/* Is commit writing out inode - we have to wait */
- if (jinode->i_flags & JI_COMMIT_RUNNING) {
+ if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) {
wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 0b52924..2ae86aa 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -395,7 +395,7 @@ struct jbd2_inode {
struct inode *i_vfs_inode;
/* Flags of inode [j_list_lock] */
- unsigned int i_flags;
+ unsigned long i_flags;
};
struct jbd2_revoke_table_s;
OpenPOWER on IntegriCloud