From cbedaac63481dea52327127a9f1c60f092bd6b07 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 12 Mar 2015 08:19:11 -0400 Subject: inode: add hlist_fake to avoid the inode hash lock in evict Some filesystems don't use the VFS inode hash and fake the fact they are hashed so that all the writeback code works correctly. However, this means the evict() path still tries to remove the inode from the hash, meaning that the inode_hash_lock() needs to be taken unnecessarily. Hence under certain workloads the inode_hash_lock can be contended even if the inode is never actually hashed. To avoid this add hlist_fake to test if the inode isn't actually hashed to avoid taking the hash lock on inodes that have never been hashed. Based on Dave Chinner's inode: add IOP_NOTHASHED to avoid inode hash lock in evict basd on Al's suggestions. Thanks, Signed-off-by: Josef Bacik Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Tested-by: Dave Chinner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 84b783f..4a40fa8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2608,7 +2608,7 @@ static inline void insert_inode_hash(struct inode *inode) extern void __remove_inode_hash(struct inode *); static inline void remove_inode_hash(struct inode *inode) { - if (!inode_unhashed(inode)) + if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash)) __remove_inode_hash(inode); } -- cgit v1.1 From 74278da9f70d84d715601fe794567a6d2bfdf078 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 4 Mar 2015 12:37:22 -0500 Subject: inode: convert inode_sb_list_lock to per-sb The process of reducing contention on per-superblock inode lists starts with moving the locking to match the per-superblock inode list. This takes the global lock out of the picture and reduces the contention problems to within a single filesystem. This doesn't get rid of contention as the locks still have global CPU scope, but it does isolate operations on different superblocks form each other. Signed-off-by: Dave Chinner Signed-off-by: Josef Bacik Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Tested-by: Dave Chinner --- include/linux/fs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4a40fa8..09bbd38 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1309,7 +1309,6 @@ struct super_block { #endif const struct xattr_handler **s_xattr; - struct list_head s_inodes; /* all inodes */ struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */ struct block_device *s_bdev; @@ -1380,6 +1379,10 @@ struct super_block { * Indicates how deep in a filesystem stack this SB is */ int s_stack_depth; + + /* s_inode_list_lock protects s_inodes */ + spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp; + struct list_head s_inodes; /* all inodes */ }; extern struct timespec current_fs_time(struct super_block *sb); -- cgit v1.1 From e97fedb9ef9868ff24d588be781906cf7c1b59ae Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 4 Mar 2015 13:40:00 -0500 Subject: sync: serialise per-superblock sync operations When competing sync(2) calls walk the same filesystem, they need to walk the list of inodes on the superblock to find all the inodes that we need to wait for IO completion on. However, when multiple wait_sb_inodes() calls do this at the same time, they contend on the the inode_sb_list_lock and the contention causes system wide slowdowns. In effect, concurrent sync(2) calls can take longer and burn more CPU than if they were serialised. Stop the worst of the contention by adding a per-sb mutex to wrap around wait_sb_inodes() so that we only execute one sync(2) IO completion walk per superblock superblock at a time and hence avoid contention being triggered by concurrent sync(2) calls. Signed-off-by: Dave Chinner Signed-off-by: Josef Bacik Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Tested-by: Dave Chinner --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 09bbd38..82dfc55 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1375,6 +1375,8 @@ struct super_block { struct list_lru s_inode_lru ____cacheline_aligned_in_smp; struct rcu_head rcu; + struct mutex s_sync_lock; /* sync serialisation lock */ + /* * Indicates how deep in a filesystem stack this SB is */ -- cgit v1.1 From c7f5408493aeb01532927b2276316797a03ed6ee Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 4 Mar 2015 14:07:22 -0500 Subject: inode: rename i_wb_list to i_io_list There's a small consistency problem between the inode and writeback naming. Writeback calls the "for IO" inode queues b_io and b_more_io, but the inode calls these the "writeback list" or i_wb_list. This makes it hard to an new "under writeback" list to the inode, or call it an "under IO" list on the bdi because either way we'll have writeback on IO and IO on writeback and it'll just be confusing. I'm getting confused just writing this! So, rename the inode "for IO" list variable to i_io_list so we can add a new "writeback list" in a subsequent patch. Signed-off-by: Dave Chinner Signed-off-by: Josef Bacik Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Tested-by: Dave Chinner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 82dfc55..34cfa60 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -636,7 +636,7 @@ struct inode { unsigned long dirtied_time_when; struct hlist_node i_hash; - struct list_head i_wb_list; /* backing dev IO list */ + struct list_head i_io_list; /* backing dev IO list */ #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *i_wb; /* the associated cgroup wb */ -- cgit v1.1