From cbedaac63481dea52327127a9f1c60f092bd6b07 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Thu, 12 Mar 2015 08:19:11 -0400
Subject: inode: add hlist_fake to avoid the inode hash lock in evict

Some filesystems don't use the VFS inode hash and fake the fact they
are hashed so that all the writeback code works correctly. However,
this means the evict() path still tries to remove the inode from the
hash, meaning that the inode_hash_lock() needs to be taken
unnecessarily. Hence under certain workloads the inode_hash_lock can
be contended even if the inode is never actually hashed.

To avoid this add hlist_fake to test if the inode isn't actually
hashed to avoid taking the hash lock on inodes that have never been
hashed.  Based on Dave Chinner's

inode: add IOP_NOTHASHED to avoid inode hash lock in evict

basd on Al's suggestions.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Dave Chinner <dchinner@redhat.com>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 84b783f..4a40fa8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2608,7 +2608,7 @@ static inline void insert_inode_hash(struct inode *inode)
 extern void __remove_inode_hash(struct inode *);
 static inline void remove_inode_hash(struct inode *inode)
 {
-	if (!inode_unhashed(inode))
+	if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash))
 		__remove_inode_hash(inode);
 }
 
-- 
cgit v1.1


From 74278da9f70d84d715601fe794567a6d2bfdf078 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 4 Mar 2015 12:37:22 -0500
Subject: inode: convert inode_sb_list_lock to per-sb

The process of reducing contention on per-superblock inode lists
starts with moving the locking to match the per-superblock inode
list. This takes the global lock out of the picture and reduces the
contention problems to within a single filesystem. This doesn't get
rid of contention as the locks still have global CPU scope, but it
does isolate operations on different superblocks form each other.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Dave Chinner <dchinner@redhat.com>
---
 include/linux/fs.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4a40fa8..09bbd38 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1309,7 +1309,6 @@ struct super_block {
 #endif
 	const struct xattr_handler **s_xattr;
 
-	struct list_head	s_inodes;	/* all inodes */
 	struct hlist_bl_head	s_anon;		/* anonymous dentries for (nfs) exporting */
 	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
 	struct block_device	*s_bdev;
@@ -1380,6 +1379,10 @@ struct super_block {
 	 * Indicates how deep in a filesystem stack this SB is
 	 */
 	int s_stack_depth;
+
+	/* s_inode_list_lock protects s_inodes */
+	spinlock_t		s_inode_list_lock ____cacheline_aligned_in_smp;
+	struct list_head	s_inodes;	/* all inodes */
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
-- 
cgit v1.1


From e97fedb9ef9868ff24d588be781906cf7c1b59ae Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 4 Mar 2015 13:40:00 -0500
Subject: sync: serialise per-superblock sync operations

When competing sync(2) calls walk the same filesystem, they need to
walk the list of inodes on the superblock to find all the inodes
that we need to wait for IO completion on. However, when multiple
wait_sb_inodes() calls do this at the same time, they contend on the
the inode_sb_list_lock and the contention causes system wide
slowdowns. In effect, concurrent sync(2) calls can take longer and
burn more CPU than if they were serialised.

Stop the worst of the contention by adding a per-sb mutex to wrap
around wait_sb_inodes() so that we only execute one sync(2) IO
completion walk per superblock superblock at a time and hence avoid
contention being triggered by concurrent sync(2) calls.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Dave Chinner <dchinner@redhat.com>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 09bbd38..82dfc55 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1375,6 +1375,8 @@ struct super_block {
 	struct list_lru		s_inode_lru ____cacheline_aligned_in_smp;
 	struct rcu_head		rcu;
 
+	struct mutex		s_sync_lock;	/* sync serialisation lock */
+
 	/*
 	 * Indicates how deep in a filesystem stack this SB is
 	 */
-- 
cgit v1.1


From c7f5408493aeb01532927b2276316797a03ed6ee Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 4 Mar 2015 14:07:22 -0500
Subject: inode: rename i_wb_list to i_io_list

There's a small consistency problem between the inode and writeback
naming. Writeback calls the "for IO" inode queues b_io and
b_more_io, but the inode calls these the "writeback list" or
i_wb_list. This makes it hard to an new "under writeback" list to
the inode, or call it an "under IO" list on the bdi because either
way we'll have writeback on IO and IO on writeback and it'll just be
confusing. I'm getting confused just writing this!

So, rename the inode "for IO" list variable to i_io_list so we can
add a new "writeback list" in a subsequent patch.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Dave Chinner <dchinner@redhat.com>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 82dfc55..34cfa60 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -636,7 +636,7 @@ struct inode {
 	unsigned long		dirtied_time_when;
 
 	struct hlist_node	i_hash;
-	struct list_head	i_wb_list;	/* backing dev IO list */
+	struct list_head	i_io_list;	/* backing dev IO list */
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct bdi_writeback	*i_wb;		/* the associated cgroup wb */
 
-- 
cgit v1.1