From 5adcee1d8d32d7f305f6f5aaefdbf8f35adca177 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:20 +1100 Subject: cgroup fs: avoid switching ->d_op on live dentry Switching d_op on a live dentry is racy in general, so avoid it. In this case it is a negative dentry, which is safer, but there are still concurrent ops which may be called on d_op in that case (eg. d_revalidate). So in general a filesystem may not do this. Fix cgroupfs so as not to do this. Signed-off-by: Nick Piggin --- kernel/cgroup.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 66a416b..163c890 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -763,6 +763,8 @@ EXPORT_SYMBOL_GPL(cgroup_unlock); * -> cgroup_mkdir. */ +static struct dentry *cgroup_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd); static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); static int cgroup_populate_dir(struct cgroup *cgrp); @@ -2180,7 +2182,7 @@ static const struct file_operations cgroup_file_operations = { }; static const struct inode_operations cgroup_dir_inode_operations = { - .lookup = simple_lookup, + .lookup = cgroup_lookup, .mkdir = cgroup_mkdir, .rmdir = cgroup_rmdir, .rename = cgroup_rename, @@ -2196,13 +2198,29 @@ static inline struct cftype *__file_cft(struct file *file) return __d_cft(file->f_dentry); } -static int cgroup_create_file(struct dentry *dentry, mode_t mode, - struct super_block *sb) +static int cgroup_delete_dentry(struct dentry *dentry) +{ + return 1; +} + +static struct dentry *cgroup_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) { - static const struct dentry_operations cgroup_dops = { + static const struct dentry_operations cgroup_dentry_operations = { + .d_delete = cgroup_delete_dentry, .d_iput = cgroup_diput, }; + if (dentry->d_name.len > NAME_MAX) + return ERR_PTR(-ENAMETOOLONG); + dentry->d_op = &cgroup_dentry_operations; + d_add(dentry, NULL); + return NULL; +} + +static int cgroup_create_file(struct dentry *dentry, mode_t mode, + struct super_block *sb) +{ struct inode *inode; if (!dentry) @@ -2228,7 +2246,6 @@ static int cgroup_create_file(struct dentry *dentry, mode_t mode, inode->i_size = 0; inode->i_fop = &cgroup_file_operations; } - dentry->d_op = &cgroup_dops; d_instantiate(dentry, inode); dget(dentry); /* Extra count - pin the dentry in core */ return 0; -- cgit v1.1 From fe15ce446beb3a33583af81ffe6c9d01a75314ed Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:23 +1100 Subject: fs: change d_delete semantics Change d_delete from a dentry deletion notification to a dentry caching advise, more like ->drop_inode. Require it to be constant and idempotent, and not take d_lock. This is how all existing filesystems use the callback anyway. This makes fine grained dentry locking of dput and dentry lru scanning much simpler. Signed-off-by: Nick Piggin --- kernel/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 163c890..746055b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2198,7 +2198,7 @@ static inline struct cftype *__file_cft(struct file *file) return __d_cft(file->f_dentry); } -static int cgroup_delete_dentry(struct dentry *dentry) +static int cgroup_delete_dentry(const struct dentry *dentry) { return 1; } -- cgit v1.1 From b7ab39f631f505edc2bbdb86620d5493f995c9da Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:32 +1100 Subject: fs: dcache scale dentry refcount Make d_count non-atomic and protect it with d_lock. This allows us to ensure a 0 refcount dentry remains 0 without dcache_lock. It is also fairly natural when we start protecting many other dentry members with d_lock. Signed-off-by: Nick Piggin --- kernel/cgroup.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 746055b..eb7af39 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3655,9 +3655,7 @@ again: list_del(&cgrp->sibling); cgroup_unlock_hierarchy(cgrp->root); - spin_lock(&cgrp->dentry->d_lock); d = dget(cgrp->dentry); - spin_unlock(&d->d_lock); cgroup_d_remove_dir(d); dput(d); -- cgit v1.1 From 2fd6b7f50797f2e993eea59e0a0b8c6399c811dc Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:34 +1100 Subject: fs: dcache scale subdirs Protect d_subdirs and d_child with d_lock, except in filesystems that aren't using dcache_lock for these anyway (eg. using i_mutex). Note: if we change the locking rule in future so that ->d_child protection is provided only with ->d_parent->d_lock, it may allow us to reduce some locking. But it would be an exception to an otherwise regular locking scheme, so we'd have to see some good results. Probably not worthwhile. Signed-off-by: Nick Piggin --- kernel/cgroup.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index eb7af39..7b4705b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -877,23 +877,31 @@ static void cgroup_clear_directory(struct dentry *dentry) BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); node = dentry->d_subdirs.next; while (node != &dentry->d_subdirs) { struct dentry *d = list_entry(node, struct dentry, d_u.d_child); + + spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); list_del_init(node); if (d->d_inode) { /* This should never be called on a cgroup * directory with child cgroups */ BUG_ON(d->d_inode->i_mode & S_IFDIR); - d = dget_locked(d); + dget_locked_dlock(d); + spin_unlock(&d->d_lock); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); d_delete(d); simple_unlink(dentry->d_inode, d); dput(d); spin_lock(&dcache_lock); - } + spin_lock(&dentry->d_lock); + } else + spin_unlock(&d->d_lock); node = dentry->d_subdirs.next; } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); } @@ -902,10 +910,17 @@ static void cgroup_clear_directory(struct dentry *dentry) */ static void cgroup_d_remove_dir(struct dentry *dentry) { + struct dentry *parent; + cgroup_clear_directory(dentry); spin_lock(&dcache_lock); + parent = dentry->d_parent; + spin_lock(&parent->d_lock); + spin_lock(&dentry->d_lock); list_del_init(&dentry->d_u.d_child); + spin_unlock(&dentry->d_lock); + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); remove_dir(dentry); } -- cgit v1.1 From b5c84bf6f6fa3a7dfdcb556023a62953574b60ee Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:38 +1100 Subject: fs: dcache remove dcache_lock dcache_lock no longer protects anything. remove it. Signed-off-by: Nick Piggin --- kernel/cgroup.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7b4705b..1864cb6 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -876,7 +876,6 @@ static void cgroup_clear_directory(struct dentry *dentry) struct list_head *node; BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); node = dentry->d_subdirs.next; while (node != &dentry->d_subdirs) { @@ -891,18 +890,15 @@ static void cgroup_clear_directory(struct dentry *dentry) dget_locked_dlock(d); spin_unlock(&d->d_lock); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); d_delete(d); simple_unlink(dentry->d_inode, d); dput(d); - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); } else spin_unlock(&d->d_lock); node = dentry->d_subdirs.next; } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } /* @@ -914,14 +910,12 @@ static void cgroup_d_remove_dir(struct dentry *dentry) cgroup_clear_directory(dentry); - spin_lock(&dcache_lock); parent = dentry->d_parent; spin_lock(&parent->d_lock); spin_lock(&dentry->d_lock); list_del_init(&dentry->d_u.d_child); spin_unlock(&dentry->d_lock); spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); remove_dir(dentry); } -- cgit v1.1 From dc0474be3e27463d4d4a2793f82366eed906f223 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:43 +1100 Subject: fs: dcache rationalise dget variants dget_locked was a shortcut to avoid the lazy lru manipulation when we already held dcache_lock (lru manipulation was relatively cheap at that point). However, how that the lru lock is an innermost one, we never hold it at any caller, so the lock cost can now be avoided. We already have well working lazy dcache LRU, so it should be fine to defer LRU manipulations to scan time. Signed-off-by: Nick Piggin --- kernel/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1864cb6..9f41470 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -887,7 +887,7 @@ static void cgroup_clear_directory(struct dentry *dentry) /* This should never be called on a cgroup * directory with child cgroups */ BUG_ON(d->d_inode->i_mode & S_IFDIR); - dget_locked_dlock(d); + dget_dlock(d); spin_unlock(&d->d_lock); spin_unlock(&dentry->d_lock); d_delete(d); -- cgit v1.1 From fb045adb99d9b7c562dc7fef834857f78249daa1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:55 +1100 Subject: fs: dcache reduce branches in lookup path Reduce some branches and memory accesses in dcache lookup by adding dentry flags to indicate common d_ops are set, rather than having to check them. This saves a pointer memory access (dentry->d_op) in common path lookup situations, and saves another pointer load and branch in cases where we have d_op but not the particular operation. Patched with: git grep -E '[.>]([[:space:]])*d_op([[:space:]])*=' | xargs sed -e 's/\([^\t ]*\)->d_op = \(.*\);/d_set_d_op(\1, \2);/' -e 's/\([^\t ]*\)\.d_op = \(.*\);/d_set_d_op(\&\1, \2);/' -i Signed-off-by: Nick Piggin --- kernel/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9f41470..51cddc1 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2222,7 +2222,7 @@ static struct dentry *cgroup_lookup(struct inode *dir, if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &cgroup_dentry_operations; + d_set_d_op(dentry, &cgroup_dentry_operations); d_add(dentry, NULL); return NULL; } -- cgit v1.1