From 25389bb207987b5774182f763b9fb65ff08761c8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 23 Nov 2012 14:03:04 +0100 Subject: jbd: Fix lock ordering bug in journal_unmap_buffer() Commit 09e05d48 introduced a wait for transaction commit into journal_unmap_buffer() in the case we are truncating a buffer undergoing commit in the page stradding i_size on a filesystem with blocksize < pagesize. Sadly we forgot to drop buffer lock before waiting for transaction commit and thus deadlock is possible when kjournald wants to lock the buffer. Fix the problem by dropping the buffer lock before waiting for transaction commit. Since we are still holding page lock (and that is OK), buffer cannot disappear under us. CC: stable@vger.kernel.org # Wherever commit 09e05d48 was taken Signed-off-by: Jan Kara --- fs/jbd/transaction.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 78b7f84..7f5120b 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1961,7 +1961,9 @@ retry: spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); spin_unlock(&journal->j_state_lock); + unlock_buffer(bh); log_wait_commit(journal, tid); + lock_buffer(bh); goto retry; } /* -- cgit v1.1 From 05f564849d49499ced97913a0914b5950577d07d Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 26 Nov 2012 16:29:42 -0800 Subject: proc: check vma->vm_file before dereferencing Commit 7b540d0646ce ("proc_map_files_readdir(): don't bother with grabbing files") switched proc_map_files_readdir() to use @f_mode directly instead of grabbing @file reference, but same time the test for @vm_file presence was lost leading to nil dereference. The patch brings the test back. The all proc_map_files feature is CONFIG_CHECKPOINT_RESTORE wrapped (which is set to 'n' by default) so the bug doesn't affect regular kernels. The regression is 3.7-rc1 only as far as I can tell. [gorcunov@openvz.org: provided changelog] Signed-off-by: Stanislav Kinsbursky Acked-by: Cyrill Gorcunov Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 3c231ad..9e28356 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1877,8 +1877,9 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, if (!vma) goto out_no_vma; - result = proc_map_files_instantiate(dir, dentry, task, - (void *)(unsigned long)vma->vm_file->f_mode); + if (vma->vm_file) + result = proc_map_files_instantiate(dir, dentry, task, + (void *)(unsigned long)vma->vm_file->f_mode); out_no_vma: up_read(&mm->mmap_sem); -- cgit v1.1 From 4eff96dd5283a102e0c1cac95247090be74a38ed Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 26 Nov 2012 16:29:51 -0800 Subject: writeback: put unused inodes to LRU after writeback completion Commit 169ebd90131b ("writeback: Avoid iput() from flusher thread") removed iget-iput pair from inode writeback. As a side effect, inodes that are dirty during iput_final() call won't be ever added to inode LRU (iput_final() doesn't add dirty inodes to LRU and later when the inode is cleaned there's noone to add the inode there). Thus inodes are effectively unreclaimable until someone looks them up again. The practical effect of this bug is limited by the fact that inodes are pinned by a dentry for long enough that the inode gets cleaned. But still the bug can have nasty consequences leading up to OOM conditions under certain circumstances. Following can easily reproduce the problem: for (( i = 0; i < 1000; i++ )); do mkdir $i for (( j = 0; j < 1000; j++ )); do touch $i/$j echo 2 > /proc/sys/vm/drop_caches done done then one needs to run 'sync; ls -lR' to make inodes reclaimable again. We fix the issue by inserting unused clean inodes into the LRU after writeback finishes in inode_sync_complete(). Signed-off-by: Jan Kara Reported-by: OGAWA Hirofumi Cc: Al Viro Cc: OGAWA Hirofumi Cc: Wu Fengguang Cc: Dave Chinner Cc: [3.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 2 ++ fs/inode.c | 16 ++++++++++++++-- fs/internal.h | 1 + 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 51ea267..3e3422f 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -228,6 +228,8 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb) static void inode_sync_complete(struct inode *inode) { inode->i_state &= ~I_SYNC; + /* If inode is clean an unused, put it into LRU now... */ + inode_add_lru(inode); /* Waiters must see I_SYNC cleared before being woken up */ smp_mb(); wake_up_bit(&inode->i_state, __I_SYNC); diff --git a/fs/inode.c b/fs/inode.c index b03c719..64999f1 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -408,6 +408,19 @@ static void inode_lru_list_add(struct inode *inode) spin_unlock(&inode->i_sb->s_inode_lru_lock); } +/* + * Add inode to LRU if needed (inode is unused and clean). + * + * Needs inode->i_lock held. + */ +void inode_add_lru(struct inode *inode) +{ + if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) && + !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE) + inode_lru_list_add(inode); +} + + static void inode_lru_list_del(struct inode *inode) { spin_lock(&inode->i_sb->s_inode_lru_lock); @@ -1390,8 +1403,7 @@ static void iput_final(struct inode *inode) if (!drop && (sb->s_flags & MS_ACTIVE)) { inode->i_state |= I_REFERENCED; - if (!(inode->i_state & (I_DIRTY|I_SYNC))) - inode_lru_list_add(inode); + inode_add_lru(inode); spin_unlock(&inode->i_lock); return; } diff --git a/fs/internal.h b/fs/internal.h index 916b7cb..2f6af7f 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -110,6 +110,7 @@ extern int open_check_o_direct(struct file *f); * inode.c */ extern spinlock_t inode_sb_list_lock; +extern void inode_add_lru(struct inode *inode); /* * fs-writeback.c -- cgit v1.1