From 64fd72e0a44bdd62c5ca277cb24d0d02b2d8e9dc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 28 May 2014 09:48:44 -0400 Subject: lift the "already marked killed" case into shrink_dentry_list() It can happen only when dentry_kill() is called with unlock_on_failure equal to 0 - other callers had dentry pinned until the moment they've got ->d_lock and DCACHE_DENTRY_KILLED is set only after lockref_mark_dead(). IOW, only one of three call sites of dentry_kill() might end up reaching that code. Just move it there. Signed-off-by: Al Viro --- fs/dcache.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 42ae01e..6888dde 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -455,12 +455,6 @@ dentry_kill(struct dentry *dentry, int unlock_on_failure) struct dentry *parent = NULL; bool can_free = true; - if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { - can_free = dentry->d_flags & DCACHE_MAY_FREE; - spin_unlock(&dentry->d_lock); - goto out; - } - inode = dentry->d_inode; if (inode && !spin_trylock(&inode->i_lock)) { relock: @@ -815,6 +809,15 @@ static void shrink_dentry_list(struct list_head *list) continue; } + + if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { + bool can_free = dentry->d_flags & DCACHE_MAY_FREE; + spin_unlock(&dentry->d_lock); + if (can_free) + dentry_free(dentry); + continue; + } + parent = dentry_kill(dentry, 0); /* * If dentry_kill returns NULL, we have nothing more to do. -- cgit v1.1 From e55fd011549eae01a230e3cace6f4d031b6a3453 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 28 May 2014 13:51:12 -0400 Subject: split dentry_kill() ... into trylocks and everything else. The latter (actual killing) is __dentry_kill(). Signed-off-by: Al Viro --- fs/dcache.c | 62 +++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 26 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 6888dde..1577c14 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -441,36 +441,12 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); -/* - * Finish off a dentry we've decided to kill. - * dentry->d_lock must be held, returns with it unlocked. - * If ref is non-zero, then decrement the refcount too. - * Returns dentry requiring refcount drop, or NULL if we're done. - */ -static struct dentry * -dentry_kill(struct dentry *dentry, int unlock_on_failure) - __releases(dentry->d_lock) +static void __dentry_kill(struct dentry *dentry) { - struct inode *inode; struct dentry *parent = NULL; bool can_free = true; - - inode = dentry->d_inode; - if (inode && !spin_trylock(&inode->i_lock)) { -relock: - if (unlock_on_failure) { - spin_unlock(&dentry->d_lock); - cpu_relax(); - } - return dentry; /* try again with same dentry */ - } if (!IS_ROOT(dentry)) parent = dentry->d_parent; - if (parent && !spin_trylock(&parent->d_lock)) { - if (inode) - spin_unlock(&inode->i_lock); - goto relock; - } /* * The dentry is now unrecoverably dead to the world. @@ -514,10 +490,44 @@ relock: can_free = false; } spin_unlock(&dentry->d_lock); -out: if (likely(can_free)) dentry_free(dentry); +} + +/* + * Finish off a dentry we've decided to kill. + * dentry->d_lock must be held, returns with it unlocked. + * If ref is non-zero, then decrement the refcount too. + * Returns dentry requiring refcount drop, or NULL if we're done. + */ +static struct dentry * +dentry_kill(struct dentry *dentry, int unlock_on_failure) + __releases(dentry->d_lock) +{ + struct inode *inode = dentry->d_inode; + struct dentry *parent = NULL; + + if (inode && unlikely(!spin_trylock(&inode->i_lock))) + goto failed; + + if (!IS_ROOT(dentry)) { + parent = dentry->d_parent; + if (unlikely(!spin_trylock(&parent->d_lock))) { + if (inode) + spin_unlock(&inode->i_lock); + goto failed; + } + } + + __dentry_kill(dentry); return parent; + +failed: + if (unlock_on_failure) { + spin_unlock(&dentry->d_lock); + cpu_relax(); + } + return dentry; /* try again with same dentry */ } /* -- cgit v1.1 From ff2fde9929feb2aef45377ce56b8b12df85dda69 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 28 May 2014 13:59:13 -0400 Subject: expand dentry_kill(dentry, 0) in shrink_dentry_list() Result will be massaged to saner shape in the next commits. It is ugly, no questions - the point of that one is to be a provably equivalent transformation (and it might be worth splitting a bit more). Signed-off-by: Al Viro --- fs/dcache.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 1577c14..c23f78a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -801,6 +801,7 @@ static void shrink_dentry_list(struct list_head *list) struct dentry *dentry, *parent; while (!list_empty(list)) { + struct inode *inode; dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); /* @@ -828,23 +829,26 @@ static void shrink_dentry_list(struct list_head *list) continue; } - parent = dentry_kill(dentry, 0); - /* - * If dentry_kill returns NULL, we have nothing more to do. - */ - if (!parent) - continue; - - if (unlikely(parent == dentry)) { - /* - * trylocks have failed and d_lock has been held the - * whole time, so it could not have been added to any - * other lists. Just add it back to the shrink list. - */ + inode = dentry->d_inode; + if (inode && unlikely(!spin_trylock(&inode->i_lock))) { d_shrink_add(dentry, list); spin_unlock(&dentry->d_lock); continue; } + + parent = NULL; + if (!IS_ROOT(dentry)) { + parent = dentry->d_parent; + if (unlikely(!spin_trylock(&parent->d_lock))) { + if (inode) + spin_unlock(&inode->i_lock); + d_shrink_add(dentry, list); + spin_unlock(&dentry->d_lock); + continue; + } + } + + __dentry_kill(dentry); /* * We need to prune ancestors too. This is necessary to prevent * quadratic behavior of shrink_dcache_parent(), but is also -- cgit v1.1 From 046b961b45f93a92e4c70525a12f3d378bced130 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 May 2014 08:54:52 -0400 Subject: shrink_dentry_list(): take parent's ->d_lock earlier The cause of livelocks there is that we are taking ->d_lock on dentry and its parent in the wrong order, forcing us to use trylock on the parent's one. d_walk() takes them in the right order, and unfortunately it's not hard to create a situation when shrink_dentry_list() can't make progress since trylock keeps failing, and shrink_dcache_parent() or check_submounts_and_drop() keeps calling d_walk() disrupting the very shrink_dentry_list() it's waiting for. Solution is straightforward - if that trylock fails, let's unlock the dentry itself and take locks in the right order. We need to stabilize ->d_parent without holding ->d_lock, but that's doable using RCU. And we'd better do that in the very beginning of the loop in shrink_dentry_list(), since the checks on refcount, etc. would need to be redone anyway. That deals with a half of the problem - killing dentries on the shrink list itself. Another one (dropping their parents) is in the next commit. locking parent is interesting - it would be easy to do rcu_read_lock(), lock whatever we think is a parent, lock dentry itself and check if the parent is still the right one. Except that we need to check that *before* locking the dentry, or we are risking taking ->d_lock out of order. Fortunately, once the D1 is locked, we can check if D2->d_parent is equal to D1 without the need to lock D2; D2->d_parent can start or stop pointing to D1 only under D1->d_lock, so taking D1->d_lock is enough. In other words, the right solution is rcu_read_lock/lock what looks like parent right now/check if it's still our parent/rcu_read_unlock/lock the child. Signed-off-by: Al Viro --- fs/dcache.c | 53 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 12 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index c23f78a..d54a99b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -530,6 +530,38 @@ failed: return dentry; /* try again with same dentry */ } +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *parent = dentry->d_parent; + if (IS_ROOT(dentry)) + return NULL; + if (likely(spin_trylock(&parent->d_lock))) + return parent; + spin_unlock(&dentry->d_lock); + rcu_read_lock(); +again: + parent = ACCESS_ONCE(dentry->d_parent); + spin_lock(&parent->d_lock); + /* + * We can't blindly lock dentry until we are sure + * that we won't violate the locking order. + * Any changes of dentry->d_parent must have + * been done with parent->d_lock held, so + * spin_lock() above is enough of a barrier + * for checking if it's still our child. + */ + if (unlikely(parent != dentry->d_parent)) { + spin_unlock(&parent->d_lock); + goto again; + } + rcu_read_unlock(); + if (parent != dentry) + spin_lock(&dentry->d_lock); + else + parent = NULL; + return parent; +} + /* * This is dput * @@ -804,6 +836,8 @@ static void shrink_dentry_list(struct list_head *list) struct inode *inode; dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); + parent = lock_parent(dentry); + /* * The dispose list is isolated and dentries are not accounted * to the LRU here, so we can simply remove it from the list @@ -817,6 +851,8 @@ static void shrink_dentry_list(struct list_head *list) */ if ((int)dentry->d_lockref.count > 0) { spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); continue; } @@ -824,6 +860,8 @@ static void shrink_dentry_list(struct list_head *list) if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { bool can_free = dentry->d_flags & DCACHE_MAY_FREE; spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); if (can_free) dentry_free(dentry); continue; @@ -833,22 +871,13 @@ static void shrink_dentry_list(struct list_head *list) if (inode && unlikely(!spin_trylock(&inode->i_lock))) { d_shrink_add(dentry, list); spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); continue; } - parent = NULL; - if (!IS_ROOT(dentry)) { - parent = dentry->d_parent; - if (unlikely(!spin_trylock(&parent->d_lock))) { - if (inode) - spin_unlock(&inode->i_lock); - d_shrink_add(dentry, list); - spin_unlock(&dentry->d_lock); - continue; - } - } - __dentry_kill(dentry); + /* * We need to prune ancestors too. This is necessary to prevent * quadratic behavior of shrink_dcache_parent(), but is also -- cgit v1.1 From b2b80195d8829921506880f6dccd21cabd163d0d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 May 2014 09:11:45 -0400 Subject: dealing with the rest of shrink_dentry_list() livelock We have the same problem with ->d_lock order in the inner loop, where we are dropping references to ancestors. Same solution, basically - instead of using dentry_kill() we use lock_parent() (introduced in the previous commit) to get that lock in a safe way, recheck ->d_count (in case if lock_parent() has ended up dropping and retaking ->d_lock and somebody managed to grab a reference during that window), trylock the inode->i_lock and use __dentry_kill() to do the rest. Signed-off-by: Al Viro --- fs/dcache.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index d54a99b..eb7c725 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -885,8 +885,26 @@ static void shrink_dentry_list(struct list_head *list) * fragmentation. */ dentry = parent; - while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) - dentry = dentry_kill(dentry, 1); + while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) { + parent = lock_parent(dentry); + if (dentry->d_lockref.count != 1) { + dentry->d_lockref.count--; + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + break; + } + inode = dentry->d_inode; /* can't be NULL */ + if (unlikely(!spin_trylock(&inode->i_lock))) { + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + cpu_relax(); + continue; + } + __dentry_kill(dentry); + dentry = parent; + } } } -- cgit v1.1 From 8cbf74da435d1bd13dbb790f94c7ff67b2fb6af4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 May 2014 09:18:26 -0400 Subject: dentry_kill() doesn't need the second argument now it's 1 in the only remaining caller. Signed-off-by: Al Viro --- fs/dcache.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index eb7c725..bce851d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -500,8 +500,7 @@ static void __dentry_kill(struct dentry *dentry) * If ref is non-zero, then decrement the refcount too. * Returns dentry requiring refcount drop, or NULL if we're done. */ -static struct dentry * -dentry_kill(struct dentry *dentry, int unlock_on_failure) +static struct dentry *dentry_kill(struct dentry *dentry) __releases(dentry->d_lock) { struct inode *inode = dentry->d_inode; @@ -523,10 +522,8 @@ dentry_kill(struct dentry *dentry, int unlock_on_failure) return parent; failed: - if (unlock_on_failure) { - spin_unlock(&dentry->d_lock); - cpu_relax(); - } + spin_unlock(&dentry->d_lock); + cpu_relax(); return dentry; /* try again with same dentry */ } @@ -615,7 +612,7 @@ repeat: return; kill_it: - dentry = dentry_kill(dentry, 1); + dentry = dentry_kill(dentry); if (dentry) goto repeat; } -- cgit v1.1 From 9f12600fe425bc28f0ccba034a77783c09c15af4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 31 May 2014 09:13:21 -0700 Subject: dcache: add missing lockdep annotation lock_parent() very much on purpose does nested locking of dentries, and is careful to maintain the right order (lock parent first). But because it didn't annotate the nested locking order, lockdep thought it might be a deadlock on d_lock, and complained. Add the proper annotation for the inner locking of the child dentry to make lockdep happy. Introduced by commit 046b961b45f9 ("shrink_dentry_list(): take parent's ->d_lock earlier"). Reported-and-tested-by: Josh Boyer Cc: Al Viro Signed-off-by: Linus Torvalds --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index bce851d..be2bea8 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -553,7 +553,7 @@ again: } rcu_read_unlock(); if (parent != dentry) - spin_lock(&dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); else parent = NULL; return parent; -- cgit v1.1