From b57922b6c76c3ee401bb32fd3f298409dd6e6a53 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 3 Jul 2013 15:08:29 -0700 Subject: fork: reorder permissions when violating number of processes limits When a task is attempting to violate the RLIMIT_NPROC limit we have a check to see if the task is sufficiently priviledged. The check first looks at CAP_SYS_ADMIN, then CAP_SYS_RESOURCE, then if the task is uid=0. A result is that tasks which are allowed by the uid=0 check are first checked against the security subsystem. This results in the security subsystem auditting a denial for sys_admin and sys_resource and then the task passing the uid=0 check. This patch rearranges the code to first check uid=0, since if we pass that we shouldn't hit the security system at all. We then check sys_resource, since it is the smallest capability which will solve the problem. Lastly we check the fallback everything cap_sysadmin. We don't want to give this capability many places since it is so powerful. This will eliminate many of the false positive/needless denial messages we get when a root task tries to violate the nproc limit. (note that kthreads count against root, so on a sufficiently large machine we can actually get past the default limits before any userspace tasks are launched.) Signed-off-by: Eric Paris Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 987b28a..09dbda3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1199,8 +1199,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= task_rlimit(p, RLIMIT_NPROC)) { - if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && - p->real_cred->user != INIT_USER) + if (p->real_cred->user != INIT_USER && + !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) goto bad_fork_free; } current->flags &= ~PF_NPROC_EXCEEDED; -- cgit v1.1 From 80628ca06c5d42929de6bc22c0a41589a834d151 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:30 -0700 Subject: kernel/fork.c:copy_process(): unify CLONE_THREAD-or-thread_group_leader code Cleanup and preparation for the next changes. Move the "if (clone_flags & CLONE_THREAD)" code down under "if (likely(p->pid))" and turn it into into the "else" branch. This makes the process/thread initialization more symmetrical and removes one check. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Michal Hocko Cc: Pavel Emelyanov Cc: Sergey Dyasly Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 09dbda3..417cb86 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1446,14 +1446,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_free_pid; } - if (clone_flags & CLONE_THREAD) { - current->signal->nr_threads++; - atomic_inc(¤t->signal->live); - atomic_inc(¤t->signal->sigcnt); - p->group_leader = current->group_leader; - list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); - } - if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); @@ -1470,6 +1462,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); __this_cpu_inc(process_counts); + } else { + current->signal->nr_threads++; + atomic_inc(¤t->signal->live); + atomic_inc(¤t->signal->sigcnt); + p->group_leader = current->group_leader; + list_add_tail_rcu(&p->thread_group, + &p->group_leader->thread_group); } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; -- cgit v1.1 From 8190773985141f063e1d6dc10200527c655abfb5 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:31 -0700 Subject: kernel/fork.c:copy_process(): don't add the uninitialized child to thread/task/pid lists copy_process() adds the new child to thread_group/init_task.tasks list and then does attach_pid(child, PIDTYPE_PID). This means that the lockless next_thread() or next_task() can see this thread with the wrong pid. Say, "ls /proc/pid/task" can list the same inode twice. We could move attach_pid(child, PIDTYPE_PID) up, but in this case find_task_by_vpid() can find the new thread before it was fully initialized. And this is already true for PIDTYPE_PGID/PIDTYPE_SID, With this patch copy_process() initializes child->pids[*].pid first, then calls attach_pid() to insert the task into the pid->tasks list. attach_pid() no longer need the "struct pid*" argument, it is always called after pid_link->pid was already set. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Michal Hocko Cc: Pavel Emelyanov Cc: Sergey Dyasly Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 417cb86..7d6962f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1121,6 +1121,12 @@ static void posix_cpu_timers_init(struct task_struct *tsk) INIT_LIST_HEAD(&tsk->cpu_timers[2]); } +static inline void +init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid) +{ + task->pids[type].pid = pid; +} + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. @@ -1449,7 +1455,11 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); + init_task_pid(p, PIDTYPE_PID, pid); if (thread_group_leader(p)) { + init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); + init_task_pid(p, PIDTYPE_SID, task_session(current)); + if (is_child_reaper(pid)) { ns_of_pid(pid)->child_reaper = p; p->signal->flags |= SIGNAL_UNKILLABLE; @@ -1457,10 +1467,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->signal->leader_pid = pid; p->signal->tty = tty_kref_get(current->signal->tty); - attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); - attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); + attach_pid(p, PIDTYPE_PGID); + attach_pid(p, PIDTYPE_SID); __this_cpu_inc(process_counts); } else { current->signal->nr_threads++; @@ -1470,7 +1480,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } - attach_pid(p, PIDTYPE_PID, pid); + attach_pid(p, PIDTYPE_PID); nr_threads++; } -- cgit v1.1 From 18c830df771f2ba8b4699fea9af1492275ae627b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:32 -0700 Subject: kernel/fork.c:copy_process(): consolidate the lockless CLONE_THREAD checks copy_process() does a lot of "chaotic" initializations and checks CLONE_THREAD twice before it takes tasklist. In particular it sets "p->group_leader = p" and then changes it again under tasklist if !thread_group_leader(p). This looks a bit confusing, lets create a single "if (CLONE_THREAD)" block which initializes ->exit_signal, ->group_leader, and ->tgid. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Michal Hocko Cc: Pavel Emelyanov Cc: Sergey Dyasly Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 7d6962f..6e6a1c1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1360,11 +1360,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_cleanup_io; } - p->pid = pid_nr(pid); - p->tgid = p->pid; - if (clone_flags & CLONE_THREAD) - p->tgid = current->tgid; - p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? @@ -1400,12 +1395,19 @@ static struct task_struct *copy_process(unsigned long clone_flags, clear_all_latency_tracing(p); /* ok, now we should be set up.. */ - if (clone_flags & CLONE_THREAD) + p->pid = pid_nr(pid); + if (clone_flags & CLONE_THREAD) { p->exit_signal = -1; - else if (clone_flags & CLONE_PARENT) - p->exit_signal = current->group_leader->exit_signal; - else - p->exit_signal = (clone_flags & CSIGNAL); + p->group_leader = current->group_leader; + p->tgid = current->tgid; + } else { + if (clone_flags & CLONE_PARENT) + p->exit_signal = current->group_leader->exit_signal; + else + p->exit_signal = (clone_flags & CSIGNAL); + p->group_leader = p; + p->tgid = p->pid; + } p->pdeath_signal = 0; p->exit_state = 0; @@ -1414,15 +1416,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); p->dirty_paused_when = 0; - /* - * Ok, make it visible to the rest of the system. - * We dont wake it up yet. - */ - p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; - /* Need tasklist lock for parent etc handling! */ + /* + * Make it visible to the rest of the system, but dont wake it up yet. + * Need tasklist lock for parent etc handling! + */ write_lock_irq(&tasklist_lock); /* CLONE_PARENT re-uses the old parent */ @@ -1476,7 +1476,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, current->signal->nr_threads++; atomic_inc(¤t->signal->live); atomic_inc(¤t->signal->sigcnt); - p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } -- cgit v1.1