diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2012-12-28 18:58:39 -0800 |
---|---|---|
committer | Eric W. Biederman <ebiederm@xmission.com> | 2013-01-26 22:11:41 -0800 |
commit | c61a2810a2161986353705b44d9503e6bb079f4f (patch) | |
tree | 1f949f359d0344c2a47c8d0c597815aceb8091c0 | |
parent | 923c7538236564c46ee80c253a416705321f13e3 (diff) | |
download | op-kernel-dev-c61a2810a2161986353705b44d9503e6bb079f4f.zip op-kernel-dev-c61a2810a2161986353705b44d9503e6bb079f4f.tar.gz |
userns: Avoid recursion in put_user_ns
When freeing a deeply nested user namespace free_user_ns calls
put_user_ns on it's parent which may in turn call free_user_ns again.
When -fno-optimize-sibling-calls is passed to gcc one stack frame per
user namespace is left on the stack, potentially overflowing the
kernel stack. CONFIG_FRAME_POINTER forces -fno-optimize-sibling-calls
so we can't count on gcc to optimize this code.
Remove struct kref and use a plain atomic_t. Making the code more
flexible and easier to comprehend. Make the loop in free_user_ns
explict to guarantee that the stack does not overflow with
CONFIG_FRAME_POINTER enabled.
I have tested this fix with a simple program that uses unshare to
create a deeply nested user namespace structure and then calls exit.
With 1000 nesteuser namespaces before this change running my test
program causes the kernel to die a horrible death. With 10,000,000
nested user namespaces after this change my test program runs to
completion and causes no harm.
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Pointed-out-by: Vasily Kulikov <segoon@openwall.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
-rw-r--r-- | include/linux/user_namespace.h | 10 | ||||
-rw-r--r-- | kernel/user.c | 4 | ||||
-rw-r--r-- | kernel/user_namespace.c | 17 |
3 files changed, 15 insertions, 16 deletions
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index b9bd2e6..4ce0093 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -21,7 +21,7 @@ struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; struct uid_gid_map projid_map; - struct kref kref; + atomic_t count; struct user_namespace *parent; kuid_t owner; kgid_t group; @@ -35,18 +35,18 @@ extern struct user_namespace init_user_ns; static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) - kref_get(&ns->kref); + atomic_inc(&ns->count); return ns; } extern int create_user_ns(struct cred *new); extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred); -extern void free_user_ns(struct kref *kref); +extern void free_user_ns(struct user_namespace *ns); static inline void put_user_ns(struct user_namespace *ns) { - if (ns) - kref_put(&ns->kref, free_user_ns); + if (ns && atomic_dec_and_test(&ns->count)) + free_user_ns(ns); } struct seq_operations; diff --git a/kernel/user.c b/kernel/user.c index 33acb5e..57ebfd4 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -47,9 +47,7 @@ struct user_namespace init_user_ns = { .count = 4294967295U, }, }, - .kref = { - .refcount = ATOMIC_INIT(3), - }, + .count = ATOMIC_INIT(3), .owner = GLOBAL_ROOT_UID, .group = GLOBAL_ROOT_GID, .proc_inum = PROC_USER_INIT_INO, diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 2b042c4..24f8ec3 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -78,7 +78,7 @@ int create_user_ns(struct cred *new) return ret; } - kref_init(&ns->kref); + atomic_set(&ns->count, 1); /* Leave the new->user_ns reference with the new user namespace. */ ns->parent = parent_ns; ns->owner = owner; @@ -104,15 +104,16 @@ int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) return create_user_ns(cred); } -void free_user_ns(struct kref *kref) +void free_user_ns(struct user_namespace *ns) { - struct user_namespace *parent, *ns = - container_of(kref, struct user_namespace, kref); + struct user_namespace *parent; - parent = ns->parent; - proc_free_inum(ns->proc_inum); - kmem_cache_free(user_ns_cachep, ns); - put_user_ns(parent); + do { + parent = ns->parent; + proc_free_inum(ns->proc_inum); + kmem_cache_free(user_ns_cachep, ns); + ns = parent; + } while (atomic_dec_and_test(&parent->count)); } EXPORT_SYMBOL(free_user_ns); |