diff options
Diffstat (limited to 'fs/namespace.c')
-rw-r--r-- | fs/namespace.c | 404 |
1 files changed, 252 insertions, 152 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index 50ca17d..7b1ca9b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -21,7 +21,8 @@ #include <linux/fs_struct.h> /* get_fs_root et.al. */ #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ #include <linux/uaccess.h> -#include <linux/proc_fs.h> +#include <linux/proc_ns.h> +#include <linux/magic.h> #include "pnode.h" #include "internal.h" @@ -36,6 +37,7 @@ static int mnt_id_start = 0; static int mnt_group_start = 1; static struct list_head *mount_hashtable __read_mostly; +static struct list_head *mountpoint_hashtable __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; static struct rw_semaphore namespace_sem; @@ -605,6 +607,51 @@ struct vfsmount *lookup_mnt(struct path *path) } } +static struct mountpoint *new_mountpoint(struct dentry *dentry) +{ + struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry); + struct mountpoint *mp; + + list_for_each_entry(mp, chain, m_hash) { + if (mp->m_dentry == dentry) { + /* might be worth a WARN_ON() */ + if (d_unlinked(dentry)) + return ERR_PTR(-ENOENT); + mp->m_count++; + return mp; + } + } + + mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); + if (!mp) + return ERR_PTR(-ENOMEM); + + spin_lock(&dentry->d_lock); + if (d_unlinked(dentry)) { + spin_unlock(&dentry->d_lock); + kfree(mp); + return ERR_PTR(-ENOENT); + } + dentry->d_flags |= DCACHE_MOUNTED; + spin_unlock(&dentry->d_lock); + mp->m_dentry = dentry; + mp->m_count = 1; + list_add(&mp->m_hash, chain); + return mp; +} + +static void put_mountpoint(struct mountpoint *mp) +{ + if (!--mp->m_count) { + struct dentry *dentry = mp->m_dentry; + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_MOUNTED; + spin_unlock(&dentry->d_lock); + list_del(&mp->m_hash); + kfree(mp); + } +} + static inline int check_mnt(struct mount *mnt) { return mnt->mnt_ns == current->nsproxy->mnt_ns; @@ -633,27 +680,6 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) } /* - * Clear dentry's mounted state if it has no remaining mounts. - * vfsmount_lock must be held for write. - */ -static void dentry_reset_mounted(struct dentry *dentry) -{ - unsigned u; - - for (u = 0; u < HASH_SIZE; u++) { - struct mount *p; - - list_for_each_entry(p, &mount_hashtable[u], mnt_hash) { - if (p->mnt_mountpoint == dentry) - return; - } - } - spin_lock(&dentry->d_lock); - dentry->d_flags &= ~DCACHE_MOUNTED; - spin_unlock(&dentry->d_lock); -} - -/* * vfsmount lock must be held for write */ static void detach_mnt(struct mount *mnt, struct path *old_path) @@ -664,32 +690,35 @@ static void detach_mnt(struct mount *mnt, struct path *old_path) mnt->mnt_mountpoint = mnt->mnt.mnt_root; list_del_init(&mnt->mnt_child); list_del_init(&mnt->mnt_hash); - dentry_reset_mounted(old_path->dentry); + put_mountpoint(mnt->mnt_mp); + mnt->mnt_mp = NULL; } /* * vfsmount lock must be held for write */ -void mnt_set_mountpoint(struct mount *mnt, struct dentry *dentry, +void mnt_set_mountpoint(struct mount *mnt, + struct mountpoint *mp, struct mount *child_mnt) { + mp->m_count++; mnt_add_count(mnt, 1); /* essentially, that's mntget */ - child_mnt->mnt_mountpoint = dget(dentry); + child_mnt->mnt_mountpoint = dget(mp->m_dentry); child_mnt->mnt_parent = mnt; - spin_lock(&dentry->d_lock); - dentry->d_flags |= DCACHE_MOUNTED; - spin_unlock(&dentry->d_lock); + child_mnt->mnt_mp = mp; } /* * vfsmount lock must be held for write */ -static void attach_mnt(struct mount *mnt, struct path *path) +static void attach_mnt(struct mount *mnt, + struct mount *parent, + struct mountpoint *mp) { - mnt_set_mountpoint(real_mount(path->mnt), path->dentry, mnt); + mnt_set_mountpoint(parent, mp, mnt); list_add_tail(&mnt->mnt_hash, mount_hashtable + - hash(path->mnt, path->dentry)); - list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts); + hash(&parent->mnt, mp->m_dentry)); + list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); } /* @@ -798,6 +827,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, } mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; + /* Don't allow unprivileged users to change mount flags */ + if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) + mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; + atomic_inc(&sb->s_active); mnt->mnt.mnt_sb = sb; mnt->mnt.mnt_root = dget(root); @@ -1091,11 +1124,23 @@ int may_umount(struct vfsmount *mnt) EXPORT_SYMBOL(may_umount); -void release_mounts(struct list_head *head) +static LIST_HEAD(unmounted); /* protected by namespace_sem */ + +static void namespace_unlock(void) { struct mount *mnt; - while (!list_empty(head)) { - mnt = list_first_entry(head, struct mount, mnt_hash); + LIST_HEAD(head); + + if (likely(list_empty(&unmounted))) { + up_write(&namespace_sem); + return; + } + + list_splice_init(&unmounted, &head); + up_write(&namespace_sem); + + while (!list_empty(&head)) { + mnt = list_first_entry(&head, struct mount, mnt_hash); list_del_init(&mnt->mnt_hash); if (mnt_has_parent(mnt)) { struct dentry *dentry; @@ -1115,11 +1160,16 @@ void release_mounts(struct list_head *head) } } +static inline void namespace_lock(void) +{ + down_write(&namespace_sem); +} + /* * vfsmount lock must be held for write * namespace_sem must be held for write */ -void umount_tree(struct mount *mnt, int propagate, struct list_head *kill) +void umount_tree(struct mount *mnt, int propagate) { LIST_HEAD(tmp_list); struct mount *p; @@ -1138,20 +1188,20 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill) list_del_init(&p->mnt_child); if (mnt_has_parent(p)) { p->mnt_parent->mnt_ghosts++; - dentry_reset_mounted(p->mnt_mountpoint); + put_mountpoint(p->mnt_mp); + p->mnt_mp = NULL; } change_mnt_propagation(p, MS_PRIVATE); } - list_splice(&tmp_list, kill); + list_splice(&tmp_list, &unmounted); } -static void shrink_submounts(struct mount *mnt, struct list_head *umounts); +static void shrink_submounts(struct mount *mnt); static int do_umount(struct mount *mnt, int flags) { struct super_block *sb = mnt->mnt.mnt_sb; int retval; - LIST_HEAD(umount_list); retval = security_sb_umount(&mnt->mnt, flags); if (retval) @@ -1218,22 +1268,21 @@ static int do_umount(struct mount *mnt, int flags) return retval; } - down_write(&namespace_sem); + namespace_lock(); br_write_lock(&vfsmount_lock); event++; if (!(flags & MNT_DETACH)) - shrink_submounts(mnt, &umount_list); + shrink_submounts(mnt); retval = -EBUSY; if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { if (!list_empty(&mnt->mnt_list)) - umount_tree(mnt, 1, &umount_list); + umount_tree(mnt, 1); retval = 0; } br_write_unlock(&vfsmount_lock); - up_write(&namespace_sem); - release_mounts(&umount_list); + namespace_unlock(); return retval; } @@ -1306,13 +1355,13 @@ static bool mnt_ns_loop(struct path *path) * mount namespace loop? */ struct inode *inode = path->dentry->d_inode; - struct proc_inode *ei; + struct proc_ns *ei; struct mnt_namespace *mnt_ns; if (!proc_ns_inode(inode)) return false; - ei = PROC_I(inode); + ei = get_proc_ns(inode); if (ei->ns_ops != &mntns_operations) return false; @@ -1323,8 +1372,7 @@ static bool mnt_ns_loop(struct path *path) struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, int flag) { - struct mount *res, *p, *q, *r; - struct path path; + struct mount *res, *p, *q, *r, *parent; if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) return ERR_PTR(-EINVAL); @@ -1351,25 +1399,22 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, q = q->mnt_parent; } p = s; - path.mnt = &q->mnt; - path.dentry = p->mnt_mountpoint; + parent = q; q = clone_mnt(p, p->mnt.mnt_root, flag); if (IS_ERR(q)) goto out; br_write_lock(&vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); - attach_mnt(q, &path); + attach_mnt(q, parent, p->mnt_mp); br_write_unlock(&vfsmount_lock); } } return res; out: if (res) { - LIST_HEAD(umount_list); br_write_lock(&vfsmount_lock); - umount_tree(res, 0, &umount_list); + umount_tree(res, 0); br_write_unlock(&vfsmount_lock); - release_mounts(&umount_list); } return q; } @@ -1379,10 +1424,10 @@ out: struct vfsmount *collect_mounts(struct path *path) { struct mount *tree; - down_write(&namespace_sem); + namespace_lock(); tree = copy_tree(real_mount(path->mnt), path->dentry, CL_COPY_ALL | CL_PRIVATE); - up_write(&namespace_sem); + namespace_unlock(); if (IS_ERR(tree)) return NULL; return &tree->mnt; @@ -1390,13 +1435,11 @@ struct vfsmount *collect_mounts(struct path *path) void drop_collected_mounts(struct vfsmount *mnt) { - LIST_HEAD(umount_list); - down_write(&namespace_sem); + namespace_lock(); br_write_lock(&vfsmount_lock); - umount_tree(real_mount(mnt), 0, &umount_list); + umount_tree(real_mount(mnt), 0); br_write_unlock(&vfsmount_lock); - up_write(&namespace_sem); - release_mounts(&umount_list); + namespace_unlock(); } int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, @@ -1505,11 +1548,11 @@ static int invent_group_ids(struct mount *mnt, bool recurse) * in allocations. */ static int attach_recursive_mnt(struct mount *source_mnt, - struct path *path, struct path *parent_path) + struct mount *dest_mnt, + struct mountpoint *dest_mp, + struct path *parent_path) { LIST_HEAD(tree_list); - struct mount *dest_mnt = real_mount(path->mnt); - struct dentry *dest_dentry = path->dentry; struct mount *child, *p; int err; @@ -1518,7 +1561,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, if (err) goto out; } - err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list); + err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); if (err) goto out_cleanup_ids; @@ -1530,10 +1573,10 @@ static int attach_recursive_mnt(struct mount *source_mnt, } if (parent_path) { detach_mnt(source_mnt, parent_path); - attach_mnt(source_mnt, path); + attach_mnt(source_mnt, dest_mnt, dest_mp); touch_mnt_namespace(source_mnt->mnt_ns); } else { - mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); + mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); commit_tree(source_mnt); } @@ -1552,46 +1595,53 @@ static int attach_recursive_mnt(struct mount *source_mnt, return err; } -static int lock_mount(struct path *path) +static struct mountpoint *lock_mount(struct path *path) { struct vfsmount *mnt; + struct dentry *dentry = path->dentry; retry: - mutex_lock(&path->dentry->d_inode->i_mutex); - if (unlikely(cant_mount(path->dentry))) { - mutex_unlock(&path->dentry->d_inode->i_mutex); - return -ENOENT; + mutex_lock(&dentry->d_inode->i_mutex); + if (unlikely(cant_mount(dentry))) { + mutex_unlock(&dentry->d_inode->i_mutex); + return ERR_PTR(-ENOENT); } - down_write(&namespace_sem); + namespace_lock(); mnt = lookup_mnt(path); - if (likely(!mnt)) - return 0; - up_write(&namespace_sem); + if (likely(!mnt)) { + struct mountpoint *mp = new_mountpoint(dentry); + if (IS_ERR(mp)) { + namespace_unlock(); + mutex_unlock(&dentry->d_inode->i_mutex); + return mp; + } + return mp; + } + namespace_unlock(); mutex_unlock(&path->dentry->d_inode->i_mutex); path_put(path); path->mnt = mnt; - path->dentry = dget(mnt->mnt_root); + dentry = path->dentry = dget(mnt->mnt_root); goto retry; } -static void unlock_mount(struct path *path) +static void unlock_mount(struct mountpoint *where) { - up_write(&namespace_sem); - mutex_unlock(&path->dentry->d_inode->i_mutex); + struct dentry *dentry = where->m_dentry; + put_mountpoint(where); + namespace_unlock(); + mutex_unlock(&dentry->d_inode->i_mutex); } -static int graft_tree(struct mount *mnt, struct path *path) +static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) { if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER) return -EINVAL; - if (S_ISDIR(path->dentry->d_inode->i_mode) != + if (S_ISDIR(mp->m_dentry->d_inode->i_mode) != S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode)) return -ENOTDIR; - if (d_unlinked(path->dentry)) - return -ENOENT; - - return attach_recursive_mnt(mnt, path, NULL); + return attach_recursive_mnt(mnt, p, mp, NULL); } /* @@ -1629,7 +1679,7 @@ static int do_change_type(struct path *path, int flag) if (!type) return -EINVAL; - down_write(&namespace_sem); + namespace_lock(); if (type == MS_SHARED) { err = invent_group_ids(mnt, recurse); if (err) @@ -1642,7 +1692,7 @@ static int do_change_type(struct path *path, int flag) br_write_unlock(&vfsmount_lock); out_unlock: - up_write(&namespace_sem); + namespace_unlock(); return err; } @@ -1652,9 +1702,9 @@ static int do_change_type(struct path *path, int flag) static int do_loopback(struct path *path, const char *old_name, int recurse) { - LIST_HEAD(umount_list); struct path old_path; - struct mount *mnt = NULL, *old; + struct mount *mnt = NULL, *old, *parent; + struct mountpoint *mp; int err; if (!old_name || !*old_name) return -EINVAL; @@ -1666,17 +1716,19 @@ static int do_loopback(struct path *path, const char *old_name, if (mnt_ns_loop(&old_path)) goto out; - err = lock_mount(path); - if (err) + mp = lock_mount(path); + err = PTR_ERR(mp); + if (IS_ERR(mp)) goto out; old = real_mount(old_path.mnt); + parent = real_mount(path->mnt); err = -EINVAL; if (IS_MNT_UNBINDABLE(old)) goto out2; - if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old)) + if (!check_mnt(parent) || !check_mnt(old)) goto out2; if (recurse) @@ -1686,18 +1738,17 @@ static int do_loopback(struct path *path, const char *old_name, if (IS_ERR(mnt)) { err = PTR_ERR(mnt); - goto out; + goto out2; } - err = graft_tree(mnt, path); + err = graft_tree(mnt, parent, mp); if (err) { br_write_lock(&vfsmount_lock); - umount_tree(mnt, 0, &umount_list); + umount_tree(mnt, 0); br_write_unlock(&vfsmount_lock); } out2: - unlock_mount(path); - release_mounts(&umount_list); + unlock_mount(mp); out: path_put(&old_path); return err; @@ -1713,6 +1764,9 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags) if (readonly_request == __mnt_is_readonly(mnt)) return 0; + if (mnt->mnt_flags & MNT_LOCK_READONLY) + return -EPERM; + if (readonly_request) error = mnt_make_readonly(real_mount(mnt)); else @@ -1779,6 +1833,7 @@ static int do_move_mount(struct path *path, const char *old_name) struct path old_path, parent_path; struct mount *p; struct mount *old; + struct mountpoint *mp; int err; if (!old_name || !*old_name) return -EINVAL; @@ -1786,8 +1841,9 @@ static int do_move_mount(struct path *path, const char *old_name) if (err) return err; - err = lock_mount(path); - if (err < 0) + mp = lock_mount(path); + err = PTR_ERR(mp); + if (IS_ERR(mp)) goto out; old = real_mount(old_path.mnt); @@ -1797,9 +1853,6 @@ static int do_move_mount(struct path *path, const char *old_name) if (!check_mnt(p) || !check_mnt(old)) goto out1; - if (d_unlinked(path->dentry)) - goto out1; - err = -EINVAL; if (old_path.dentry != old_path.mnt->mnt_root) goto out1; @@ -1826,7 +1879,7 @@ static int do_move_mount(struct path *path, const char *old_name) if (p == old) goto out1; - err = attach_recursive_mnt(old, path, &parent_path); + err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path); if (err) goto out1; @@ -1834,7 +1887,7 @@ static int do_move_mount(struct path *path, const char *old_name) * automatically */ list_del_init(&old->mnt_expire); out1: - unlock_mount(path); + unlock_mount(mp); out: if (!err) path_put(&parent_path); @@ -1870,21 +1923,24 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) */ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) { + struct mountpoint *mp; + struct mount *parent; int err; mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); - err = lock_mount(path); - if (err) - return err; + mp = lock_mount(path); + if (IS_ERR(mp)) + return PTR_ERR(mp); + parent = real_mount(path->mnt); err = -EINVAL; - if (unlikely(!check_mnt(real_mount(path->mnt)))) { + if (unlikely(!check_mnt(parent))) { /* that's acceptable only for automounts done in private ns */ if (!(mnt_flags & MNT_SHRINKABLE)) goto unlock; /* ... and for those we'd better have mountpoint still alive */ - if (!real_mount(path->mnt)->mnt_ns) + if (!parent->mnt_ns) goto unlock; } @@ -1899,10 +1955,10 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) goto unlock; newmnt->mnt.mnt_flags = mnt_flags; - err = graft_tree(newmnt, path); + err = graft_tree(newmnt, parent, mp); unlock: - unlock_mount(path); + unlock_mount(mp); return err; } @@ -1975,11 +2031,11 @@ int finish_automount(struct vfsmount *m, struct path *path) fail: /* remove m from any expiration list it may be on */ if (!list_empty(&mnt->mnt_expire)) { - down_write(&namespace_sem); + namespace_lock(); br_write_lock(&vfsmount_lock); list_del_init(&mnt->mnt_expire); br_write_unlock(&vfsmount_lock); - up_write(&namespace_sem); + namespace_unlock(); } mntput(m); mntput(m); @@ -1993,13 +2049,13 @@ fail: */ void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) { - down_write(&namespace_sem); + namespace_lock(); br_write_lock(&vfsmount_lock); list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); br_write_unlock(&vfsmount_lock); - up_write(&namespace_sem); + namespace_unlock(); } EXPORT_SYMBOL(mnt_set_expiry); @@ -2012,12 +2068,11 @@ void mark_mounts_for_expiry(struct list_head *mounts) { struct mount *mnt, *next; LIST_HEAD(graveyard); - LIST_HEAD(umounts); if (list_empty(mounts)) return; - down_write(&namespace_sem); + namespace_lock(); br_write_lock(&vfsmount_lock); /* extract from the expiration list every vfsmount that matches the @@ -2035,12 +2090,10 @@ void mark_mounts_for_expiry(struct list_head *mounts) while (!list_empty(&graveyard)) { mnt = list_first_entry(&graveyard, struct mount, mnt_expire); touch_mnt_namespace(mnt->mnt_ns); - umount_tree(mnt, 1, &umounts); + umount_tree(mnt, 1); } br_write_unlock(&vfsmount_lock); - up_write(&namespace_sem); - - release_mounts(&umounts); + namespace_unlock(); } EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); @@ -2097,7 +2150,7 @@ resume: * * vfsmount_lock must be held for write */ -static void shrink_submounts(struct mount *mnt, struct list_head *umounts) +static void shrink_submounts(struct mount *mnt) { LIST_HEAD(graveyard); struct mount *m; @@ -2108,7 +2161,7 @@ static void shrink_submounts(struct mount *mnt, struct list_head *umounts) m = list_first_entry(&graveyard, struct mount, mnt_expire); touch_mnt_namespace(m->mnt_ns); - umount_tree(m, 1, umounts); + umount_tree(m, 1); } } } @@ -2231,12 +2284,11 @@ long do_mount(const char *dev_name, const char *dir_name, retval = security_sb_mount(dev_name, &path, type_page, flags, data_page); + if (!retval && !may_mount()) + retval = -EPERM; if (retval) goto dput_out; - if (!may_mount()) - return -EPERM; - /* Default to relatime unless overriden */ if (!(flags & MS_NOATIME)) mnt_flags |= MNT_RELATIME; @@ -2335,14 +2387,14 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, if (IS_ERR(new_ns)) return new_ns; - down_write(&namespace_sem); + namespace_lock(); /* First pass: copy the tree topology */ copy_flags = CL_COPY_ALL | CL_EXPIRE; if (user_ns != mnt_ns->user_ns) - copy_flags |= CL_SHARED_TO_SLAVE; + copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; new = copy_tree(old, old->mnt.mnt_root, copy_flags); if (IS_ERR(new)) { - up_write(&namespace_sem); + namespace_unlock(); free_mnt_ns(new_ns); return ERR_CAST(new); } @@ -2373,7 +2425,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, p = next_mnt(p, old); q = next_mnt(q, new); } - up_write(&namespace_sem); + namespace_unlock(); if (rootmnt) mntput(rootmnt); @@ -2411,7 +2463,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) struct mount *mnt = real_mount(m); mnt->mnt_ns = new_ns; new_ns->root = mnt; - list_add(&new_ns->list, &mnt->mnt_list); + list_add(&mnt->mnt_list, &new_ns->list); } else { mntput(m); } @@ -2543,7 +2595,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, const char __user *, put_old) { struct path new, old, parent_path, root_parent, root; - struct mount *new_mnt, *root_mnt; + struct mount *new_mnt, *root_mnt, *old_mnt; + struct mountpoint *old_mp, *root_mp; int error; if (!may_mount()) @@ -2562,14 +2615,16 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, goto out2; get_fs_root(current->fs, &root); - error = lock_mount(&old); - if (error) + old_mp = lock_mount(&old); + error = PTR_ERR(old_mp); + if (IS_ERR(old_mp)) goto out3; error = -EINVAL; new_mnt = real_mount(new.mnt); root_mnt = real_mount(root.mnt); - if (IS_MNT_SHARED(real_mount(old.mnt)) || + old_mnt = real_mount(old.mnt); + if (IS_MNT_SHARED(old_mnt) || IS_MNT_SHARED(new_mnt->mnt_parent) || IS_MNT_SHARED(root_mnt->mnt_parent)) goto out4; @@ -2578,37 +2633,37 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, error = -ENOENT; if (d_unlinked(new.dentry)) goto out4; - if (d_unlinked(old.dentry)) - goto out4; error = -EBUSY; - if (new.mnt == root.mnt || - old.mnt == root.mnt) + if (new_mnt == root_mnt || old_mnt == root_mnt) goto out4; /* loop, on the same file system */ error = -EINVAL; if (root.mnt->mnt_root != root.dentry) goto out4; /* not a mountpoint */ if (!mnt_has_parent(root_mnt)) goto out4; /* not attached */ + root_mp = root_mnt->mnt_mp; if (new.mnt->mnt_root != new.dentry) goto out4; /* not a mountpoint */ if (!mnt_has_parent(new_mnt)) goto out4; /* not attached */ /* make sure we can reach put_old from new_root */ - if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new)) + if (!is_path_reachable(old_mnt, old.dentry, &new)) goto out4; + root_mp->m_count++; /* pin it so it won't go away */ br_write_lock(&vfsmount_lock); detach_mnt(new_mnt, &parent_path); detach_mnt(root_mnt, &root_parent); /* mount old root on put_old */ - attach_mnt(root_mnt, &old); + attach_mnt(root_mnt, old_mnt, old_mp); /* mount new_root on / */ - attach_mnt(new_mnt, &root_parent); + attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp); touch_mnt_namespace(current->nsproxy->mnt_ns); br_write_unlock(&vfsmount_lock); chroot_fs_refs(&root, &new); + put_mountpoint(root_mp); error = 0; out4: - unlock_mount(&old); + unlock_mount(old_mp); if (!error) { path_put(&root_parent); path_put(&parent_path); @@ -2663,14 +2718,17 @@ void __init mnt_init(void) 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); + mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); - if (!mount_hashtable) + if (!mount_hashtable || !mountpoint_hashtable) panic("Failed to allocate mount hash table\n"); printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE); for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); + for (u = 0; u < HASH_SIZE; u++) + INIT_LIST_HEAD(&mountpoint_hashtable[u]); br_lock_init(&vfsmount_lock); @@ -2687,16 +2745,13 @@ void __init mnt_init(void) void put_mnt_ns(struct mnt_namespace *ns) { - LIST_HEAD(umount_list); - if (!atomic_dec_and_test(&ns->count)) return; - down_write(&namespace_sem); + namespace_lock(); br_write_lock(&vfsmount_lock); - umount_tree(ns->root, 0, &umount_list); + umount_tree(ns->root, 0); br_write_unlock(&vfsmount_lock); - up_write(&namespace_sem); - release_mounts(&umount_list); + namespace_unlock(); free_mnt_ns(ns); } @@ -2732,6 +2787,51 @@ bool our_mnt(struct vfsmount *mnt) return check_mnt(real_mount(mnt)); } +bool current_chrooted(void) +{ + /* Does the current process have a non-standard root */ + struct path ns_root; + struct path fs_root; + bool chrooted; + + /* Find the namespace root */ + ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt; + ns_root.dentry = ns_root.mnt->mnt_root; + path_get(&ns_root); + while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root)) + ; + + get_fs_root(current->fs, &fs_root); + + chrooted = !path_equal(&fs_root, &ns_root); + + path_put(&fs_root); + path_put(&ns_root); + + return chrooted; +} + +void update_mnt_policy(struct user_namespace *userns) +{ + struct mnt_namespace *ns = current->nsproxy->mnt_ns; + struct mount *mnt; + + down_read(&namespace_sem); + list_for_each_entry(mnt, &ns->list, mnt_list) { + switch (mnt->mnt.mnt_sb->s_magic) { + case SYSFS_MAGIC: + userns->may_mount_sysfs = true; + break; + case PROC_SUPER_MAGIC: + userns->may_mount_proc = true; + break; + } + if (userns->may_mount_sysfs && userns->may_mount_proc) + break; + } + up_read(&namespace_sem); +} + static void *mntns_get(struct task_struct *task) { struct mnt_namespace *ns = NULL; |