From a3b3c5627c8301ac850962b04f645dfab81e6a60 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 2 Apr 2015 20:33:53 -0500 Subject: mnt: Use hlist_move_list in namespace_unlock Small cleanup to make the code more readable and maintainable. Signed-off-by: Eric Biederman --- fs/namespace.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 82ef140..e1ee572 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1298,17 +1298,15 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static void namespace_unlock(void) { - struct hlist_head head = unmounted; + struct hlist_head head; - if (likely(hlist_empty(&head))) { - up_write(&namespace_sem); - return; - } + hlist_move_list(&unmounted, &head); - head.first->pprev = &head.first; - INIT_HLIST_HEAD(&unmounted); up_write(&namespace_sem); + if (likely(hlist_empty(&head))) + return; + synchronize_rcu(); group_pin_kill(&head); -- cgit v1.1 From e819f152104c9f7c9fe50e1aecce6f5d4bf06d65 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 24 Dec 2014 07:20:01 -0600 Subject: mnt: Improve the umount_tree flags - Remove the unneeded declaration from pnode.h - Mark umount_tree static as it has no callers outside of namespace.c - Define an enumeration of umount_tree's flags. - Pass umount_tree's flags in by name This removes the magic numbers 0, 1 and 2 making the code a little clearer and makes it possible for there to be lazy unmounts that don't propagate. Which is what __detach_mounts actually wants for example. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 31 ++++++++++++++++--------------- fs/pnode.h | 1 - 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index e1ee572..e06e367 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1317,14 +1317,15 @@ static inline void namespace_lock(void) down_write(&namespace_sem); } +enum umount_tree_flags { + UMOUNT_SYNC = 1, + UMOUNT_PROPAGATE = 2, +}; /* * mount_lock must be held * namespace_sem must be held for write - * how = 0 => just this tree, don't propagate - * how = 1 => propagate; we know that nobody else has reference to any victims - * how = 2 => lazy umount */ -void umount_tree(struct mount *mnt, int how) +static void umount_tree(struct mount *mnt, enum umount_tree_flags how) { HLIST_HEAD(tmp_list); struct mount *p; @@ -1337,7 +1338,7 @@ void umount_tree(struct mount *mnt, int how) hlist_for_each_entry(p, &tmp_list, mnt_hash) list_del_init(&p->mnt_child); - if (how) + if (how & UMOUNT_PROPAGATE) propagate_umount(&tmp_list); while (!hlist_empty(&tmp_list)) { @@ -1347,7 +1348,7 @@ void umount_tree(struct mount *mnt, int how) list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; - if (how < 2) + if (how & UMOUNT_SYNC) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted); @@ -1445,14 +1446,14 @@ static int do_umount(struct mount *mnt, int flags) if (flags & MNT_DETACH) { if (!list_empty(&mnt->mnt_list)) - umount_tree(mnt, 2); + umount_tree(mnt, UMOUNT_PROPAGATE); retval = 0; } else { shrink_submounts(mnt); retval = -EBUSY; if (!propagate_mount_busy(mnt, 2)) { if (!list_empty(&mnt->mnt_list)) - umount_tree(mnt, 1); + umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC); retval = 0; } } @@ -1484,7 +1485,7 @@ void __detach_mounts(struct dentry *dentry) lock_mount_hash(); while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); - umount_tree(mnt, 2); + umount_tree(mnt, UMOUNT_PROPAGATE); } unlock_mount_hash(); put_mountpoint(mp); @@ -1646,7 +1647,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, out: if (res) { lock_mount_hash(); - umount_tree(res, 0); + umount_tree(res, UMOUNT_SYNC); unlock_mount_hash(); } return q; @@ -1670,7 +1671,7 @@ void drop_collected_mounts(struct vfsmount *mnt) { namespace_lock(); lock_mount_hash(); - umount_tree(real_mount(mnt), 0); + umount_tree(real_mount(mnt), UMOUNT_SYNC); unlock_mount_hash(); namespace_unlock(); } @@ -1853,7 +1854,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, out_cleanup_ids: while (!hlist_empty(&tree_list)) { child = hlist_entry(tree_list.first, struct mount, mnt_hash); - umount_tree(child, 0); + umount_tree(child, UMOUNT_SYNC); } unlock_mount_hash(); cleanup_group_ids(source_mnt, NULL); @@ -2033,7 +2034,7 @@ static int do_loopback(struct path *path, const char *old_name, err = graft_tree(mnt, parent, mp); if (err) { lock_mount_hash(); - umount_tree(mnt, 0); + umount_tree(mnt, UMOUNT_SYNC); unlock_mount_hash(); } out2: @@ -2404,7 +2405,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) while (!list_empty(&graveyard)) { mnt = list_first_entry(&graveyard, struct mount, mnt_expire); touch_mnt_namespace(mnt->mnt_ns); - umount_tree(mnt, 1); + umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC); } unlock_mount_hash(); namespace_unlock(); @@ -2475,7 +2476,7 @@ static void shrink_submounts(struct mount *mnt) m = list_first_entry(&graveyard, struct mount, mnt_expire); touch_mnt_namespace(m->mnt_ns); - umount_tree(m, 1); + umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC); } } } diff --git a/fs/pnode.h b/fs/pnode.h index 4a24635..16afc3d 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -47,7 +47,6 @@ int get_dominating_id(struct mount *mnt, const struct path *root); unsigned int mnt_get_count(struct mount *mnt); void mnt_set_mountpoint(struct mount *, struct mountpoint *, struct mount *); -void umount_tree(struct mount *, int); struct mount *copy_tree(struct mount *, struct dentry *, int); bool is_path_reachable(struct mount *, struct dentry *, const struct path *root); -- cgit v1.1 From 8318e667f176f7ea34451a1a530634e293f216ac Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 24 Dec 2014 07:35:10 -0600 Subject: mnt: Don't propagate umounts in __detach_mounts Invoking mount propagation from __detach_mounts is inefficient and wrong. It is inefficient because __detach_mounts already walks the list of mounts that where something needs to be done, and mount propagation walks some subset of those mounts again. It is actively wrong because if the dentry that is passed to __detach_mounts is not part of the path to a mount that mount should not be affected. change_mnt_propagation(p,MS_PRIVATE) modifies the mount propagation tree of a master mount so it's slaves are connected to another master if possible. Which means even removing a mount from the middle of a mount tree with __detach_mounts will not deprive any mount propagated mount events. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index e06e367..c68d9fc 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1485,7 +1485,7 @@ void __detach_mounts(struct dentry *dentry) lock_mount_hash(); while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); - umount_tree(mnt, UMOUNT_PROPAGATE); + umount_tree(mnt, 0); } unlock_mount_hash(); put_mountpoint(mp); -- cgit v1.1 From c003b26ff98ca04a180ff34c38c007a3998d62f9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 18 Dec 2014 13:10:48 -0600 Subject: mnt: In umount_tree reuse mnt_list instead of mnt_hash umount_tree builds a list of mounts that need to be unmounted. Utilize mnt_list for this purpose instead of mnt_hash. This begins to allow keeping a mount on the mnt_hash after it is unmounted, which is necessary for a properly functioning MNT_LOCKED implementation. The fact that mnt_list is an ordinary list makding available list_move is nice bonus. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 20 +++++++++++--------- fs/pnode.c | 6 +++--- fs/pnode.h | 2 +- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index c68d9fc..54cbef1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1327,23 +1327,25 @@ enum umount_tree_flags { */ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) { - HLIST_HEAD(tmp_list); + LIST_HEAD(tmp_list); struct mount *p; - for (p = mnt; p; p = next_mnt(p, mnt)) { - hlist_del_init_rcu(&p->mnt_hash); - hlist_add_head(&p->mnt_hash, &tmp_list); - } + /* Gather the mounts to umount */ + for (p = mnt; p; p = next_mnt(p, mnt)) + list_move(&p->mnt_list, &tmp_list); - hlist_for_each_entry(p, &tmp_list, mnt_hash) + /* Hide the mounts from lookup_mnt and mnt_mounts */ + list_for_each_entry(p, &tmp_list, mnt_list) { + hlist_del_init_rcu(&p->mnt_hash); list_del_init(&p->mnt_child); + } + /* Add propogated mounts to the tmp_list */ if (how & UMOUNT_PROPAGATE) propagate_umount(&tmp_list); - while (!hlist_empty(&tmp_list)) { - p = hlist_entry(tmp_list.first, struct mount, mnt_hash); - hlist_del_init_rcu(&p->mnt_hash); + while (!list_empty(&tmp_list)) { + p = list_first_entry(&tmp_list, struct mount, mnt_list); list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); diff --git a/fs/pnode.c b/fs/pnode.c index 260ac8f..bf012af 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -384,7 +384,7 @@ static void __propagate_umount(struct mount *mnt) if (child && list_empty(&child->mnt_mounts)) { list_del_init(&child->mnt_child); hlist_del_init_rcu(&child->mnt_hash); - hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); + list_move_tail(&child->mnt_list, &mnt->mnt_list); } } } @@ -396,11 +396,11 @@ static void __propagate_umount(struct mount *mnt) * * vfsmount lock must be held for write */ -int propagate_umount(struct hlist_head *list) +int propagate_umount(struct list_head *list) { struct mount *mnt; - hlist_for_each_entry(mnt, list, mnt_hash) + list_for_each_entry(mnt, list, mnt_list) __propagate_umount(mnt); return 0; } diff --git a/fs/pnode.h b/fs/pnode.h index 16afc3d..aa6d65d 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -40,7 +40,7 @@ static inline void set_mnt_shared(struct mount *mnt) void change_mnt_propagation(struct mount *, int); int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, struct hlist_head *); -int propagate_umount(struct hlist_head *); +int propagate_umount(struct list_head *); int propagate_mount_busy(struct mount *, int); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); -- cgit v1.1 From 590ce4bcbfb4e0462a720a4ad901e84416080bba Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 Dec 2014 18:30:08 -0600 Subject: mnt: Add MNT_UMOUNT flag In some instances it is necessary to know if the the unmounting process has begun on a mount. Add MNT_UMOUNT to make that reliably testable. This fix gets used in fixing locked mounts in MNT_DETACH Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 4 +++- fs/pnode.c | 1 + include/linux/mount.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 54cbef1..d170814 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1331,8 +1331,10 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) struct mount *p; /* Gather the mounts to umount */ - for (p = mnt; p; p = next_mnt(p, mnt)) + for (p = mnt; p; p = next_mnt(p, mnt)) { + p->mnt.mnt_flags |= MNT_UMOUNT; list_move(&p->mnt_list, &tmp_list); + } /* Hide the mounts from lookup_mnt and mnt_mounts */ list_for_each_entry(p, &tmp_list, mnt_list) { diff --git a/fs/pnode.c b/fs/pnode.c index bf012af..ac3aa0d 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -384,6 +384,7 @@ static void __propagate_umount(struct mount *mnt) if (child && list_empty(&child->mnt_mounts)) { list_del_init(&child->mnt_child); hlist_del_init_rcu(&child->mnt_hash); + child->mnt.mnt_flags |= MNT_UMOUNT; list_move_tail(&child->mnt_list, &mnt->mnt_list); } } diff --git a/include/linux/mount.h b/include/linux/mount.h index c2c561d..564beee 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -61,6 +61,7 @@ struct mnt_namespace; #define MNT_DOOMED 0x1000000 #define MNT_SYNC_UMOUNT 0x2000000 #define MNT_MARKED 0x4000000 +#define MNT_UMOUNT 0x8000000 struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ -- cgit v1.1 From 411a938b5abc9cb126c41cccf5975ae464fe0f3e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 Dec 2014 19:12:07 -0600 Subject: mnt: Delay removal from the mount hash. - Modify __lookup_mnt_hash_last to ignore mounts that have MNT_UMOUNTED set. - Don't remove mounts from the mount hash table in propogate_umount - Don't remove mounts from the mount hash table in umount_tree before the entire list of mounts to be umounted is selected. - Remove mounts from the mount hash table as the last thing that happens in the case where a mount has a parent in umount_tree. Mounts without parents are not hashed (by definition). This paves the way for delaying removal from the mount hash table even farther and fixing the MNT_LOCKED vs MNT_DETACH issue. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 13 ++++++++----- fs/pnode.c | 1 - 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index d170814..083e340 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -632,14 +632,17 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) */ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) { - struct mount *p, *res; - res = p = __lookup_mnt(mnt, dentry); + struct mount *p, *res = NULL; + p = __lookup_mnt(mnt, dentry); if (!p) goto out; + if (!(p->mnt.mnt_flags & MNT_UMOUNT)) + res = p; hlist_for_each_entry_continue(p, mnt_hash) { if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) break; - res = p; + if (!(p->mnt.mnt_flags & MNT_UMOUNT)) + res = p; } out: return res; @@ -1336,9 +1339,8 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) list_move(&p->mnt_list, &tmp_list); } - /* Hide the mounts from lookup_mnt and mnt_mounts */ + /* Hide the mounts from mnt_mounts */ list_for_each_entry(p, &tmp_list, mnt_list) { - hlist_del_init_rcu(&p->mnt_hash); list_del_init(&p->mnt_child); } @@ -1365,6 +1367,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) p->mnt_mountpoint = p->mnt.mnt_root; p->mnt_parent = p; p->mnt_mp = NULL; + hlist_del_init_rcu(&p->mnt_hash); } change_mnt_propagation(p, MS_PRIVATE); } diff --git a/fs/pnode.c b/fs/pnode.c index ac3aa0d..c27ae38 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -383,7 +383,6 @@ static void __propagate_umount(struct mount *mnt) */ if (child && list_empty(&child->mnt_mounts)) { list_del_init(&child->mnt_child); - hlist_del_init_rcu(&child->mnt_hash); child->mnt.mnt_flags |= MNT_UMOUNT; list_move_tail(&child->mnt_list, &mnt->mnt_list); } -- cgit v1.1 From 5d88457eb5b86b475422dc882f089203faaeedb5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 3 Jan 2015 05:39:35 -0600 Subject: mnt: On an unmount propagate clearing of MNT_LOCKED A prerequisite of calling umount_tree is that the point where the tree is mounted at is valid to unmount. If we are propagating the effect of the unmount clear MNT_LOCKED in every instance where the same filesystem is mounted on the same mountpoint in the mount tree, as we know (by virtue of the fact that umount_tree was called) that it is safe to reveal what is at that mountpoint. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 3 +++ fs/pnode.c | 20 ++++++++++++++++++++ fs/pnode.h | 1 + 3 files changed, 24 insertions(+) diff --git a/fs/namespace.c b/fs/namespace.c index 083e340..2b12b7a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1333,6 +1333,9 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) LIST_HEAD(tmp_list); struct mount *p; + if (how & UMOUNT_PROPAGATE) + propagate_mount_unlock(mnt); + /* Gather the mounts to umount */ for (p = mnt; p; p = next_mnt(p, mnt)) { p->mnt.mnt_flags |= MNT_UMOUNT; diff --git a/fs/pnode.c b/fs/pnode.c index c27ae38..8989029 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -362,6 +362,26 @@ int propagate_mount_busy(struct mount *mnt, int refcnt) } /* + * Clear MNT_LOCKED when it can be shown to be safe. + * + * mount_lock lock must be held for write + */ +void propagate_mount_unlock(struct mount *mnt) +{ + struct mount *parent = mnt->mnt_parent; + struct mount *m, *child; + + BUG_ON(parent == mnt); + + for (m = propagation_next(parent, parent); m; + m = propagation_next(m, parent)) { + child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); + if (child) + child->mnt.mnt_flags &= ~MNT_LOCKED; + } +} + +/* * NOTE: unmounting 'mnt' naturally propagates to all other mounts its * parent propagates to. */ diff --git a/fs/pnode.h b/fs/pnode.h index aa6d65d..af47d4b 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -42,6 +42,7 @@ int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, struct hlist_head *); int propagate_umount(struct list_head *); int propagate_mount_busy(struct mount *, int); +void propagate_mount_unlock(struct mount *); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); unsigned int mnt_get_count(struct mount *mnt); -- cgit v1.1 From 0c56fe31420ca599c90240315f7959bf1b4eb6ce Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 5 Jan 2015 13:38:04 -0600 Subject: mnt: Don't propagate unmounts to locked mounts If the first mount in shared subtree is locked don't unmount the shared subtree. This is ensured by walking through the mounts parents before children and marking a mount as unmountable if it is not locked or it is locked but it's parent is marked. This allows recursive mount detach to propagate through a set of mounts when unmounting them would not reveal what is under any locked mount. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/pnode.c | 32 +++++++++++++++++++++++++++++--- fs/pnode.h | 1 + 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/fs/pnode.c b/fs/pnode.c index 8989029..6367e1e 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -382,6 +382,26 @@ void propagate_mount_unlock(struct mount *mnt) } /* + * Mark all mounts that the MNT_LOCKED logic will allow to be unmounted. + */ +static void mark_umount_candidates(struct mount *mnt) +{ + struct mount *parent = mnt->mnt_parent; + struct mount *m; + + BUG_ON(parent == mnt); + + for (m = propagation_next(parent, parent); m; + m = propagation_next(m, parent)) { + struct mount *child = __lookup_mnt_last(&m->mnt, + mnt->mnt_mountpoint); + if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) { + SET_MNT_MARK(child); + } + } +} + +/* * NOTE: unmounting 'mnt' naturally propagates to all other mounts its * parent propagates to. */ @@ -398,10 +418,13 @@ static void __propagate_umount(struct mount *mnt) struct mount *child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); /* - * umount the child only if the child has no - * other children + * umount the child only if the child has no children + * and the child is marked safe to unmount. */ - if (child && list_empty(&child->mnt_mounts)) { + if (!child || !IS_MNT_MARKED(child)) + continue; + CLEAR_MNT_MARK(child); + if (list_empty(&child->mnt_mounts)) { list_del_init(&child->mnt_child); child->mnt.mnt_flags |= MNT_UMOUNT; list_move_tail(&child->mnt_list, &mnt->mnt_list); @@ -420,6 +443,9 @@ int propagate_umount(struct list_head *list) { struct mount *mnt; + list_for_each_entry_reverse(mnt, list, mnt_list) + mark_umount_candidates(mnt); + list_for_each_entry(mnt, list, mnt_list) __propagate_umount(mnt); return 0; diff --git a/fs/pnode.h b/fs/pnode.h index af47d4b..0fcdbe7 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -19,6 +19,7 @@ #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) #define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) #define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) +#define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED) #define CL_EXPIRE 0x01 #define CL_SLAVE 0x02 -- cgit v1.1 From cd4a40174b71acd021877341684d8bb1dc8ea4ae Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 7 Jan 2015 14:28:26 -0600 Subject: mnt: Fail collect_mounts when applied to unmounted mounts The only users of collect_mounts are in audit_tree.c In audit_trim_trees and audit_add_tree_rule the path passed into collect_mounts is generated from kern_path passed an audit_tree pathname which is guaranteed to be an absolute path. In those cases collect_mounts is obviously intended to work on mounted paths and if a race results in paths that are unmounted when collect_mounts it is reasonable to fail early. The paths passed into audit_tag_tree don't have the absolute path check. But are used to play with fsnotify and otherwise interact with the audit_trees, so again operating only on mounted paths appears reasonable. Avoid having to worry about what happens when we try and audit unmounted filesystems by restricting collect_mounts to mounts that appear in the mount tree. Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 2b12b7a..acc5583 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1669,8 +1669,11 @@ struct vfsmount *collect_mounts(struct path *path) { struct mount *tree; namespace_lock(); - tree = copy_tree(real_mount(path->mnt), path->dentry, - CL_COPY_ALL | CL_PRIVATE); + if (!check_mnt(real_mount(path->mnt))) + tree = ERR_PTR(-EINVAL); + else + tree = copy_tree(real_mount(path->mnt), path->dentry, + CL_COPY_ALL | CL_PRIVATE); namespace_unlock(); if (IS_ERR(tree)) return ERR_CAST(tree); -- cgit v1.1 From 7bdb11de8ee4f4ae195e2fa19efd304e0b36c63b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 29 Dec 2014 13:03:41 -0600 Subject: mnt: Factor out unhash_mnt from detach_mnt and umount_tree Create a function unhash_mnt that contains the common code between detach_mnt and umount_tree, and use unhash_mnt in place of the common code. This add a unncessary list_del_init(mnt->mnt_child) into umount_tree but given that mnt_child is already empty this extra line is a noop. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index acc5583..e669a3b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -798,10 +798,8 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) /* * vfsmount lock must be held for write */ -static void detach_mnt(struct mount *mnt, struct path *old_path) +static void unhash_mnt(struct mount *mnt) { - old_path->dentry = mnt->mnt_mountpoint; - old_path->mnt = &mnt->mnt_parent->mnt; mnt->mnt_parent = mnt; mnt->mnt_mountpoint = mnt->mnt.mnt_root; list_del_init(&mnt->mnt_child); @@ -814,6 +812,16 @@ static void detach_mnt(struct mount *mnt, struct path *old_path) /* * vfsmount lock must be held for write */ +static void detach_mnt(struct mount *mnt, struct path *old_path) +{ + old_path->dentry = mnt->mnt_mountpoint; + old_path->mnt = &mnt->mnt_parent->mnt; + unhash_mnt(mnt); +} + +/* + * vfsmount lock must be held for write + */ void mnt_set_mountpoint(struct mount *mnt, struct mountpoint *mp, struct mount *child_mnt) @@ -1362,15 +1370,10 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted); if (mnt_has_parent(p)) { - hlist_del_init(&p->mnt_mp_list); - put_mountpoint(p->mnt_mp); mnt_add_count(p->mnt_parent, -1); /* old mountpoint will be dropped when we can do that */ p->mnt_ex_mountpoint = p->mnt_mountpoint; - p->mnt_mountpoint = p->mnt.mnt_root; - p->mnt_parent = p; - p->mnt_mp = NULL; - hlist_del_init_rcu(&p->mnt_hash); + unhash_mnt(p); } change_mnt_propagation(p, MS_PRIVATE); } -- cgit v1.1 From 6a46c5735c29175da55b2fa9d53775182422cdd7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 15 Jan 2015 22:58:33 -0600 Subject: mnt: Factor umount_mnt from umount_tree For future use factor out a function umount_mnt from umount_tree. This function unhashes a mount and remembers where the mount was mounted so that eventually when the code makes it to a sleeping context the mountpoint can be dput. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index e669a3b..010d5be 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -822,6 +822,16 @@ static void detach_mnt(struct mount *mnt, struct path *old_path) /* * vfsmount lock must be held for write */ +static void umount_mnt(struct mount *mnt) +{ + /* old mountpoint will be dropped when we can do that */ + mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint; + unhash_mnt(mnt); +} + +/* + * vfsmount lock must be held for write + */ void mnt_set_mountpoint(struct mount *mnt, struct mountpoint *mp, struct mount *child_mnt) @@ -1371,9 +1381,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted); if (mnt_has_parent(p)) { mnt_add_count(p->mnt_parent, -1); - /* old mountpoint will be dropped when we can do that */ - p->mnt_ex_mountpoint = p->mnt_mountpoint; - unhash_mnt(p); + umount_mnt(p); } change_mnt_propagation(p, MS_PRIVATE); } -- cgit v1.1 From 820f9f147dcce2602eefd9b575bbbd9ea14f0953 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 2 Apr 2015 16:35:48 -0500 Subject: fs_pin: Allow for the possibility that m_list or s_list go unused. This is needed to support lazily umounting locked mounts. Because the entire unmounted subtree needs to stay together until there are no users with references to any part of the subtree. To support this guarantee that the fs_pin m_list and s_list nodes are initialized by initializing them in init_fs_pin allowing for the possibility that pin_insert_group does not touch them. Further use hlist_del_init in pin_remove so that there is a hlist_unhashed test before the list we attempt to update the previous list item. Signed-off-by: "Eric W. Biederman" --- fs/fs_pin.c | 4 ++-- include/linux/fs_pin.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/fs_pin.c b/fs/fs_pin.c index b06c987..611b540 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -9,8 +9,8 @@ static DEFINE_SPINLOCK(pin_lock); void pin_remove(struct fs_pin *pin) { spin_lock(&pin_lock); - hlist_del(&pin->m_list); - hlist_del(&pin->s_list); + hlist_del_init(&pin->m_list); + hlist_del_init(&pin->s_list); spin_unlock(&pin_lock); spin_lock_irq(&pin->wait.lock); pin->done = 1; diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h index 9dc4e03..3886b3b 100644 --- a/include/linux/fs_pin.h +++ b/include/linux/fs_pin.h @@ -13,6 +13,8 @@ struct vfsmount; static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *)) { init_waitqueue_head(&p->wait); + INIT_HLIST_NODE(&p->s_list); + INIT_HLIST_NODE(&p->m_list); p->kill = kill; } -- cgit v1.1 From ce07d891a0891d3c0d0c2d73d577490486b809e1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 23 Dec 2014 21:37:03 -0600 Subject: mnt: Honor MNT_LOCKED when detaching mounts Modify umount(MNT_DETACH) to keep mounts in the hash table that are locked to their parent mounts, when the parent is lazily unmounted. In mntput_no_expire detach the children from the hash table, depending on mnt_pin_kill in cleanup_mnt to decrement the mnt_count of the children. In __detach_mounts if there are any mounts that have been unmounted but still are on the list of mounts of a mountpoint, remove their children from the mount hash table and those children to the unmounted list so they won't linger potentially indefinitely waiting for their final mntput, now that the mounts serve no purpose. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 29 ++++++++++++++++++++++++++--- fs/pnode.h | 2 ++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 010d5be..1894d18 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1099,6 +1099,13 @@ static void mntput_no_expire(struct mount *mnt) rcu_read_unlock(); list_del(&mnt->mnt_instance); + + if (unlikely(!list_empty(&mnt->mnt_mounts))) { + struct mount *p, *tmp; + list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) { + umount_mnt(p); + } + } unlock_mount_hash(); if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) { @@ -1370,6 +1377,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) propagate_umount(&tmp_list); while (!list_empty(&tmp_list)) { + bool disconnect; p = list_first_entry(&tmp_list, struct mount, mnt_list); list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); @@ -1378,10 +1386,18 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) if (how & UMOUNT_SYNC) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; - pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted); + disconnect = !IS_MNT_LOCKED_AND_LAZY(p); + + pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, + disconnect ? &unmounted : NULL); if (mnt_has_parent(p)) { mnt_add_count(p->mnt_parent, -1); - umount_mnt(p); + if (!disconnect) { + /* Don't forget about p */ + list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts); + } else { + umount_mnt(p); + } } change_mnt_propagation(p, MS_PRIVATE); } @@ -1506,7 +1522,14 @@ void __detach_mounts(struct dentry *dentry) lock_mount_hash(); while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); - umount_tree(mnt, 0); + if (mnt->mnt.mnt_flags & MNT_UMOUNT) { + struct mount *p, *tmp; + list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) { + hlist_add_head(&p->mnt_umount.s_list, &unmounted); + umount_mnt(p); + } + } + else umount_tree(mnt, 0); } unlock_mount_hash(); put_mountpoint(mp); diff --git a/fs/pnode.h b/fs/pnode.h index 0fcdbe7..7114ce6 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -20,6 +20,8 @@ #define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) #define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) #define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED) +#define IS_MNT_LOCKED_AND_LAZY(m) \ + (((m)->mnt.mnt_flags & (MNT_LOCKED|MNT_SYNC_UMOUNT)) == MNT_LOCKED) #define CL_EXPIRE 0x01 #define CL_SLAVE 0x02 -- cgit v1.1 From f53e57975151f54ad8caa1b0ac8a78091cd5700a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 19 Jan 2015 11:48:45 -0600 Subject: mnt: Fix the error check in __detach_mounts lookup_mountpoint can return either NULL or an error value. Update the test in __detach_mounts to test for an error value to avoid pathological cases causing a NULL pointer dereferences. The callers of __detach_mounts should prevent it from ever being called on an unlinked dentry but don't take any chances. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 1894d18..e8f7f8c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1516,7 +1516,7 @@ void __detach_mounts(struct dentry *dentry) namespace_lock(); mp = lookup_mountpoint(dentry); - if (!mp) + if (IS_ERR_OR_NULL(mp)) goto out_unlock; lock_mount_hash(); -- cgit v1.1 From e0c9c0afd2fc958ffa34b697972721d81df8a56f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Apr 2015 18:30:06 -0500 Subject: mnt: Update detach_mounts to leave mounts connected Now that it is possible to lazily unmount an entire mount tree and leave the individual mounts connected to each other add a new flag UMOUNT_CONNECTED to umount_tree to force this behavior and use this flag in detach_mounts. This closes a bug where the deletion of a file or directory could trigger an unmount and reveal data under a mount point. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index e8f7f8c..1f4f9da 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1348,6 +1348,7 @@ static inline void namespace_lock(void) enum umount_tree_flags { UMOUNT_SYNC = 1, UMOUNT_PROPAGATE = 2, + UMOUNT_CONNECTED = 4, }; /* * mount_lock must be held @@ -1386,7 +1387,10 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) if (how & UMOUNT_SYNC) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; - disconnect = !IS_MNT_LOCKED_AND_LAZY(p); + disconnect = !(((how & UMOUNT_CONNECTED) && + mnt_has_parent(p) && + (p->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) || + IS_MNT_LOCKED_AND_LAZY(p)); pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, disconnect ? &unmounted : NULL); @@ -1529,7 +1533,7 @@ void __detach_mounts(struct dentry *dentry) umount_mnt(p); } } - else umount_tree(mnt, 0); + else umount_tree(mnt, UMOUNT_CONNECTED); } unlock_mount_hash(); put_mountpoint(mp); -- cgit v1.1