summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2014-12-15 12:01:42 +0000
committerkib <kib@FreeBSD.org>2014-12-15 12:01:42 +0000
commitc014fd46ecbcbeeee9989bc8fb1fdab425c10888 (patch)
treeb2f31aec2d05802cb12e66560e0af74d73c7e473
parent3c08ea0611f83860ef6a4604ebd6e42116dda80c (diff)
downloadFreeBSD-src-c014fd46ecbcbeeee9989bc8fb1fdab425c10888.zip
FreeBSD-src-c014fd46ecbcbeeee9989bc8fb1fdab425c10888.tar.gz
Add a facility for non-init process to declare itself the reaper of
the orphaned descendants. Base of the API is modelled after the same feature from the DragonFlyBSD. Requested by: bapt Reviewed by: jilles (previous version) Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 3 weeks
-rw-r--r--lib/libc/sys/procctl.2215
-rw-r--r--sys/compat/freebsd32/freebsd32.h6
-rw-r--r--sys/compat/freebsd32/freebsd32_misc.c51
-rw-r--r--sys/conf/files1
-rw-r--r--sys/kern/init_main.c7
-rw-r--r--sys/kern/kern_exit.c39
-rw-r--r--sys/kern/kern_fork.c24
-rw-r--r--sys/kern/kern_procctl.c460
-rw-r--r--sys/kern/sys_process.c191
-rw-r--r--sys/sys/proc.h10
-rw-r--r--sys/sys/procctl.h57
11 files changed, 850 insertions, 211 deletions
diff --git a/lib/libc/sys/procctl.2 b/lib/libc/sys/procctl.2
index 6ad0590..a5d3d89 100644
--- a/lib/libc/sys/procctl.2
+++ b/lib/libc/sys/procctl.2
@@ -2,6 +2,10 @@
.\" Written by: John H. Baldwin <jhb@FreeBSD.org>
.\" All rights reserved.
.\"
+.\" Copyright (c) 2014 The FreeBSD Foundation
+.\" Portions of this documentation were written by Konstantin Belousov
+.\" under sponsorship from the FreeBSD Foundation.
+.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
@@ -25,7 +29,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd September 19, 2013
+.Dd December 15, 2014
.Dt PROCCTL 2
.Os
.Sh NAME
@@ -67,7 +71,7 @@ The control request to perform is specified by the
.Fa cmd
argument.
The following commands are supported:
-.Bl -tag -width "Dv PROC_SPROTECT"
+.Bl -tag -width "Dv PROC_REAP_GETPIDS"
.It Dv PROC_SPROTECT
Set process protection state.
This is used to mark a process as protected from being killed if the system
@@ -95,6 +99,174 @@ When used with
mark all future child processes of each selected process as protected.
Future child processes will also mark all of their future child processes.
.El
+.It Dv PROC_REAP_ACQUIRE
+Acquires the reaper status for the current process.
+The status means that orphaned children by the reaper descendants,
+forked after the acquisition of the status, are reparented to the
+reaper.
+After the system initialization,
+.Xr init 8
+is the default reaper.
+.Pp
+.It Dv PROC_REAP_RELEASE
+Releases the reaper state fpr the current process.
+The reaper of the current process becomes the new reaper of the
+current process descendants.
+.It Dv PROC_REAP_STATUS
+Provides the information about the reaper of the specified process,
+or the process itself, in case it is a reaper.
+The
+.Fa data
+argument must point to the
+.Vt "struct procctl_reaper_status" ,
+which if filled by the syscall on successfull return.
+.Bd -literal
+struct procctl_reaper_status {
+ u_int rs_flags;
+ u_int rs_children;
+ u_int rs_descendants;
+ pid_t rs_reaper;
+ pid_t rs_pid;
+};
+.Ed
+The
+.Fa rs_flags
+may have the following flags returned:
+.Bl -tag -width "Dv REAPER_STATUS_REALINIT"
+.It Dv REAPER_STATUS_OWNED
+The specified process has acquired the reaper status and did not
+released it.
+When the flag is returned, the
+.Fa id
+pid identifies reaper, otherwise the
+.Fa rs_reaper
+field of the structure is the pid of the reaper for passed process id.
+.It Dv REAPER_STATUS_REALINIT
+The specified process is the root of the reaper tree, i.e.
+.Xr init 8.
+.El
+The
+.Fa rs_children
+returns the number of the children of the reaper.
+The
+.Fa rs_descendants
+returns the total number of descendants of the reaper,
+not counting descendants of the reapers in the subtree.
+The
+.Fa rs_reaper
+returns the reaper pid.
+The
+.Fa rs_pid
+returns pid of some reaper child if there is any descendant.
+.It Dv PROC_REAP_GETPIDS
+Queries the list of descendants of the reaper of the specified process.
+The request takes the pointer to
+.Vt "struct procctl_reaper_pids"
+as
+.Fa data .
+.Bd -literal
+struct procctl_reaper_pids {
+ u_int rp_count;
+ struct procctl_reaper_pidinfo *rp_pids;
+};
+.Ed
+On call, the
+.Fa rp_pids
+must point to the array of
+.Vt procctl_reaper_pidinfo
+structures, to be filled on return,
+and the
+.Fa rp_count
+must specify the size of the array,
+no more than rp_count elements is filled by kernel.
+.Pp
+The
+.Vt "struct procctl_reaper_pidinfo"
+structure provides some information about one reaper' descendant.
+Note that for the descendant which is not child, it is the subject
+of usual race with process exiting and pid reuse.
+.Bd -literal
+struct procctl_reaper_pidinfo {
+ pid_t pi_pid;
+ pid_t pi_subtree;
+ u_int pi_flags;
+};
+.Ed
+The
+.Fa pi_pid
+is the process id of the descendant.
+The
+.Fa pi_subtree
+provides the pid of the child of the reaper, which is (grand-)parent
+of the process.
+The
+.Fa pi_flags
+returns the following flags, further describing the descendant:
+.Bl -tag -width "Dv REAPER_PIDINFO_VALID"
+.It Dv REAPER_PIDINFO_VALID
+Set for the
+.Vt procctl_reaper_pidinfo
+structure, which was filled by kernel.
+Zero-filling the
+.Fa rp_pids
+array and testing the flag allows the caller to detect the end
+of returned array.
+.It Dv REAPER_PIDINFO_CHILD
+The
+.Fa pi_pid
+is the direct child of the reaper.
+.El
+.It Dv PROC_REAP_KILL
+Request to deliver a signal to some subset of descendants of the reaper.
+The
+.Fa data
+must point to
+.Vt procctl_reaper_kill
+structure, which is used both for parameters and status return.
+.Bd -literal
+struct procctl_reaper_kill {
+ int rk_sig;
+ u_int rk_flags;
+ pid_t rk_subtree;
+ u_int rk_killed;
+ pid_t rk_fpid;
+};
+.Ed
+The
+.Fa rk_sig
+specifies the signal to be delivered.
+Zero is not a valid signal number, unlike
+.Xr kill 2 .
+The
+.Fa rk_flags
+further directs the operation.
+It is or-ed from the following flags:
+.Bl -tag -width "Dv REAPER_KILL_CHILDREN"
+.It Dv REAPER_KILL_CHILDREN
+Deliver the specified signal only to direct children of the reaper.
+.It Dv REAPER_KILL_SUBTREE
+Deliver the specified signal only to descendants which were forked by
+the direct child with pid specified in
+.Fa rk_subtree .
+.El
+If no
+.Dv REAPER_KILL_CHILDREN
+and
+.Dv REAPER_KILL_SUBTREE
+flags are specified, all current descendants of the reaper are signalled.
+.Pp
+If signal was delivered to any process, the return value from the request
+is zero.
+In this case,
+.Fa rk_killed
+field is filled with the count of processes signalled.
+The
+.Fa rk_fpid
+field is set to the pid of the first process for which signal
+delivery failed, e.g. due to the permission problems.
+If no such process exist, the
+.Fa rk_fpid
+is set to -1.
.El
.Sh RETURN VALUES
If an error occurs, a value of -1 is returned and
@@ -132,11 +304,48 @@ An invalid operation or flag was passed in
for a
.Dv PROC_SPROTECT
command.
+.It Bq Er EPERM
+The
+.Fa idtype
+argument is not equal to
+.Dv P_PID ,
+or
+.Fa id
+is not equal to the pid of the calling process, for
+.Dv PROC_REAP_ACQUIRE
+or
+.Dv PROC_REAP_RELEASE
+requests.
+.It Bq Er EINVAL
+Invalid or undefined flags were passed to
+.Dv PROC_REAP_KILL
+request.
+.It Bq Er EINVAL
+Invalid or zero signal number was requested for
+.Dv PROC_REAP_KILL
+request.
+.It Bq Er EINVAL
+The
+.Dv PROC_REAP_RELEASE
+request was issued by the
+.Xr init 8
+process.
+.It Bq Er EBUSY
+The
+.Dv PROC_REAP_ACQUIRE
+request was issued by the process which already acquired reaper status
+and did not released it.
.El
.Sh SEE ALSO
-.Xr ptrace 2
+.Xr kill 2 ,
+.Xr ptrace 2 ,
+.Xr wait 2 ,
+.Xr init 8
.Sh HISTORY
The
.Fn procctl
function appeared in
.Fx 10.0 .
+Reaper facility was created based on the similar feature of Linux and
+DragonflyBSD, and first appeared in
+.Fx 10.2 .
diff --git a/sys/compat/freebsd32/freebsd32.h b/sys/compat/freebsd32/freebsd32.h
index af10055..ed3df7a 100644
--- a/sys/compat/freebsd32/freebsd32.h
+++ b/sys/compat/freebsd32/freebsd32.h
@@ -390,4 +390,10 @@ struct kld32_file_stat {
char pathname[MAXPATHLEN];
};
+struct procctl_reaper_pids32 {
+ u_int rp_count;
+ u_int rp_pad0[15];
+ uint32_t rp_pids;
+};
+
#endif /* !_COMPAT_FREEBSD32_FREEBSD32_H_ */
diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c
index 24c5738..1457f57 100644
--- a/sys/compat/freebsd32/freebsd32_misc.c
+++ b/sys/compat/freebsd32/freebsd32_misc.c
@@ -2957,20 +2957,63 @@ int
freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap)
{
void *data;
- int error, flags;
+ union {
+ struct procctl_reaper_status rs;
+ struct procctl_reaper_pids rp;
+ struct procctl_reaper_kill rk;
+ } x;
+ union {
+ struct procctl_reaper_pids32 rp;
+ } x32;
+ int error, error1, flags;
switch (uap->com) {
case PROC_SPROTECT:
error = copyin(PTRIN(uap->data), &flags, sizeof(flags));
- if (error)
+ if (error != 0)
return (error);
data = &flags;
break;
+ case PROC_REAP_ACQUIRE:
+ case PROC_REAP_RELEASE:
+ if (uap->data != NULL)
+ return (EINVAL);
+ data = NULL;
+ break;
+ case PROC_REAP_STATUS:
+ data = &x.rs;
+ break;
+ case PROC_REAP_GETPIDS:
+ error = copyin(uap->data, &x32.rp, sizeof(x32.rp));
+ if (error != 0)
+ return (error);
+ CP(x32.rp, x.rp, rp_count);
+ PTRIN_CP(x32.rp, x.rp, rp_pids);
+ data = &x.rp;
+ break;
+ case PROC_REAP_KILL:
+ error = copyin(uap->data, &x.rk, sizeof(x.rk));
+ if (error != 0)
+ return (error);
+ data = &x.rk;
+ break;
default:
return (EINVAL);
}
- return (kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id),
- uap->com, data));
+ error = kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id),
+ uap->com, data);
+ switch (uap->com) {
+ case PROC_REAP_STATUS:
+ if (error == 0)
+ error = copyout(&x.rs, uap->data, sizeof(x.rs));
+ break;
+ case PROC_REAP_KILL:
+ error1 = copyout(&x.rk, uap->data, sizeof(x.rk));
+ if (error == 0)
+ error = error1;
+ break;
+ }
+ return (error);
}
int
diff --git a/sys/conf/files b/sys/conf/files
index 018d77b..939b635 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -2987,6 +2987,7 @@ kern/kern_pmc.c standard
kern/kern_poll.c optional device_polling
kern/kern_priv.c standard
kern/kern_proc.c standard
+kern/kern_procctl.c standard
kern/kern_prot.c standard
kern/kern_racct.c standard
kern/kern_rangelock.c standard
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index e903f4c..beb49bc 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -496,7 +496,8 @@ proc0_init(void *dummy __unused)
prison0.pr_cpuset = cpuset_ref(td->td_cpuset);
p->p_peers = 0;
p->p_leader = p;
-
+ p->p_reaper = p;
+ LIST_INIT(&p->p_reaplist);
strncpy(p->p_comm, "kernel", sizeof (p->p_comm));
strncpy(td->td_name, "swapper", sizeof (td->td_name));
@@ -821,8 +822,11 @@ create_init(const void *udata __unused)
KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1"));
/* divorce init's credentials from the kernel's */
newcred = crget();
+ sx_xlock(&proctree_lock);
PROC_LOCK(initproc);
initproc->p_flag |= P_SYSTEM | P_INMEM;
+ initproc->p_treeflag |= P_TREE_REAPER;
+ LIST_INSERT_HEAD(&initproc->p_reaplist, &proc0, p_reapsibling);
oldcred = initproc->p_ucred;
crcopy(newcred, oldcred);
#ifdef MAC
@@ -833,6 +837,7 @@ create_init(const void *udata __unused)
#endif
initproc->p_ucred = newcred;
PROC_UNLOCK(initproc);
+ sx_xunlock(&proctree_lock);
crfree(oldcred);
cred_update_thread(FIRST_THREAD_IN_PROC(initproc));
cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL);
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index a431309..ce1f8f9 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -123,6 +123,31 @@ proc_realparent(struct proc *child)
return (parent);
}
+void
+reaper_abandon_children(struct proc *p, bool exiting)
+{
+ struct proc *p1, *p2, *ptmp;
+
+ sx_assert(&proctree_lock, SX_LOCKED);
+ KASSERT(p != initproc, ("reaper_abandon_children for initproc"));
+ if ((p->p_treeflag & P_TREE_REAPER) == 0)
+ return;
+ p1 = p->p_reaper;
+ LIST_FOREACH_SAFE(p2, &p->p_reaplist, p_reapsibling, ptmp) {
+ LIST_REMOVE(p2, p_reapsibling);
+ p2->p_reaper = p1;
+ p2->p_reapsubtree = p->p_reapsubtree;
+ LIST_INSERT_HEAD(&p1->p_reaplist, p2, p_reapsibling);
+ if (exiting && p2->p_pptr == p) {
+ PROC_LOCK(p2);
+ proc_reparent(p2, p1);
+ PROC_UNLOCK(p2);
+ }
+ }
+ KASSERT(LIST_EMPTY(&p->p_reaplist), ("p_reaplist not empty"));
+ p->p_treeflag &= ~P_TREE_REAPER;
+}
+
static void
clear_orphan(struct proc *p)
{
@@ -458,14 +483,14 @@ exit1(struct thread *td, int rv)
sx_xlock(&proctree_lock);
q = LIST_FIRST(&p->p_children);
if (q != NULL) /* only need this if any child is S_ZOMB */
- wakeup(initproc);
+ wakeup(q->p_reaper);
for (; q != NULL; q = nq) {
nq = LIST_NEXT(q, p_sibling);
PROC_LOCK(q);
q->p_sigparent = SIGCHLD;
if (!(q->p_flag & P_TRACED)) {
- proc_reparent(q, initproc);
+ proc_reparent(q, q->p_reaper);
} else {
/*
* Traced processes are killed since their existence
@@ -473,7 +498,7 @@ exit1(struct thread *td, int rv)
*/
t = proc_realparent(q);
if (t == p) {
- proc_reparent(q, initproc);
+ proc_reparent(q, q->p_reaper);
} else {
PROC_LOCK(t);
proc_reparent(q, t);
@@ -562,7 +587,7 @@ exit1(struct thread *td, int rv)
mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
pp = p->p_pptr;
PROC_UNLOCK(pp);
- proc_reparent(p, initproc);
+ proc_reparent(p, p->p_reaper);
p->p_sigparent = SIGCHLD;
PROC_LOCK(p->p_pptr);
@@ -575,8 +600,8 @@ exit1(struct thread *td, int rv)
} else
mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
- if (p->p_pptr == initproc)
- kern_psignal(p->p_pptr, SIGCHLD);
+ if (p->p_pptr == p->p_reaper || p->p_pptr == initproc)
+ childproc_exited(p);
else if (p->p_sigparent != 0) {
if (p->p_sigparent == SIGCHLD)
childproc_exited(p);
@@ -849,6 +874,8 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options)
LIST_REMOVE(p, p_list); /* off zombproc */
sx_xunlock(&allproc_lock);
LIST_REMOVE(p, p_sibling);
+ reaper_abandon_children(p, true);
+ LIST_REMOVE(p, p_reapsibling);
PROC_LOCK(p);
clear_orphan(p);
PROC_UNLOCK(p);
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index c529838..f469db6 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -261,11 +261,21 @@ retry:
* Scan the active and zombie procs to check whether this pid
* is in use. Remember the lowest pid that's greater
* than trypid, so we can avoid checking for a while.
+ *
+ * Avoid reuse of the process group id, session id or
+ * the reaper subtree id. Note that for process group
+ * and sessions, the amount of reserved pids is
+ * limited by process limit. For the subtree ids, the
+ * id is kept reserved only while there is a
+ * non-reaped process in the subtree, so amount of
+ * reserved pids is limited by process limit times
+ * two.
*/
p = LIST_FIRST(&allproc);
again:
for (; p != NULL; p = LIST_NEXT(p, p_list)) {
while (p->p_pid == trypid ||
+ p->p_reapsubtree == trypid ||
(p->p_pgrp != NULL &&
(p->p_pgrp->pg_id == trypid ||
(p->p_session != NULL &&
@@ -611,12 +621,20 @@ do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2,
* of init. This effectively disassociates the child from the
* parent.
*/
- if (flags & RFNOWAIT)
- pptr = initproc;
- else
+ if ((flags & RFNOWAIT) != 0) {
+ pptr = p1->p_reaper;
+ p2->p_reaper = pptr;
+ } else {
+ p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ?
+ p1 : p1->p_reaper;
pptr = p1;
+ }
p2->p_pptr = pptr;
LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
+ LIST_INIT(&p2->p_reaplist);
+ LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling);
+ if (p2->p_reaper == p1)
+ p2->p_reapsubtree = p2->p_pid;
sx_xunlock(&proctree_lock);
/* Inform accounting that we have forked. */
diff --git a/sys/kern/kern_procctl.c b/sys/kern/kern_procctl.c
new file mode 100644
index 0000000..5ee2953
--- /dev/null
+++ b/sys/kern/kern_procctl.c
@@ -0,0 +1,460 @@
+/*-
+ * Copyright (c) 2014 John Baldwin
+ * Copyright (c) 2014 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Konstantin Belousov
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/capsicum.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/procctl.h>
+#include <sys/sx.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysproto.h>
+#include <sys/wait.h>
+
+static int
+protect_setchild(struct thread *td, struct proc *p, int flags)
+{
+
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ if (p->p_flag & P_SYSTEM || p_cansched(td, p) != 0)
+ return (0);
+ if (flags & PPROT_SET) {
+ p->p_flag |= P_PROTECTED;
+ if (flags & PPROT_INHERIT)
+ p->p_flag2 |= P2_INHERIT_PROTECTED;
+ } else {
+ p->p_flag &= ~P_PROTECTED;
+ p->p_flag2 &= ~P2_INHERIT_PROTECTED;
+ }
+ return (1);
+}
+
+static int
+protect_setchildren(struct thread *td, struct proc *top, int flags)
+{
+ struct proc *p;
+ int ret;
+
+ p = top;
+ ret = 0;
+ sx_assert(&proctree_lock, SX_LOCKED);
+ for (;;) {
+ ret |= protect_setchild(td, p, flags);
+ PROC_UNLOCK(p);
+ /*
+ * If this process has children, descend to them next,
+ * otherwise do any siblings, and if done with this level,
+ * follow back up the tree (but not past top).
+ */
+ if (!LIST_EMPTY(&p->p_children))
+ p = LIST_FIRST(&p->p_children);
+ else for (;;) {
+ if (p == top) {
+ PROC_LOCK(p);
+ return (ret);
+ }
+ if (LIST_NEXT(p, p_sibling)) {
+ p = LIST_NEXT(p, p_sibling);
+ break;
+ }
+ p = p->p_pptr;
+ }
+ PROC_LOCK(p);
+ }
+}
+
+static int
+protect_set(struct thread *td, struct proc *p, int flags)
+{
+ int error, ret;
+
+ switch (PPROT_OP(flags)) {
+ case PPROT_SET:
+ case PPROT_CLEAR:
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ if ((PPROT_FLAGS(flags) & ~(PPROT_DESCEND | PPROT_INHERIT)) != 0)
+ return (EINVAL);
+
+ error = priv_check(td, PRIV_VM_MADV_PROTECT);
+ if (error)
+ return (error);
+
+ if (flags & PPROT_DESCEND)
+ ret = protect_setchildren(td, p, flags);
+ else
+ ret = protect_setchild(td, p, flags);
+ if (ret == 0)
+ return (EPERM);
+ return (0);
+}
+
+static int
+reap_acquire(struct thread *td, struct proc *p)
+{
+
+ sx_assert(&proctree_lock, SX_XLOCKED);
+ if (p != curproc)
+ return (EPERM);
+ if ((p->p_treeflag & P_TREE_REAPER) != 0)
+ return (EBUSY);
+ p->p_treeflag |= P_TREE_REAPER;
+ /*
+ * We do not reattach existing children and the whole tree
+ * under them to us, since p->p_reaper already seen them.
+ */
+ return (0);
+}
+
+static int
+reap_release(struct thread *td, struct proc *p)
+{
+
+ sx_assert(&proctree_lock, SX_XLOCKED);
+ if (p != curproc)
+ return (EPERM);
+ if (p == initproc)
+ return (EINVAL);
+ if ((p->p_treeflag & P_TREE_REAPER) == 0)
+ return (EINVAL);
+ reaper_abandon_children(p, false);
+ return (0);
+}
+
+static int
+reap_status(struct thread *td, struct proc *p,
+ struct procctl_reaper_status *rs)
+{
+ struct proc *reap, *p2;
+
+ sx_assert(&proctree_lock, SX_LOCKED);
+ bzero(rs, sizeof(*rs));
+ if ((p->p_treeflag & P_TREE_REAPER) == 0) {
+ reap = p->p_reaper;
+ } else {
+ reap = p;
+ rs->rs_flags |= REAPER_STATUS_OWNED;
+ }
+ if (reap == initproc)
+ rs->rs_flags |= REAPER_STATUS_REALINIT;
+ rs->rs_reaper = reap->p_pid;
+ rs->rs_descendants = 0;
+ rs->rs_children = 0;
+ if (!LIST_EMPTY(&reap->p_reaplist)) {
+ KASSERT(!LIST_EMPTY(&reap->p_children), ("no children"));
+ rs->rs_pid = LIST_FIRST(&reap->p_children)->p_pid;
+ LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
+ if (proc_realparent(p2) == reap)
+ rs->rs_children++;
+ rs->rs_descendants++;
+ }
+ } else {
+ rs->rs_pid = -1;
+ KASSERT(LIST_EMPTY(&reap->p_reaplist), ("reap children list"));
+ KASSERT(LIST_EMPTY(&reap->p_children), ("children list"));
+ }
+ return (0);
+}
+
+static int
+reap_getpids(struct thread *td, struct proc *p, struct procctl_reaper_pids *rp)
+{
+ struct proc *reap, *p2;
+ struct procctl_reaper_pidinfo *pi, *pip;
+ u_int i, n;
+ int error;
+
+ sx_assert(&proctree_lock, SX_LOCKED);
+ PROC_UNLOCK(p);
+ reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
+ n = i = 0;
+ error = 0;
+ LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling)
+ n++;
+ sx_unlock(&proctree_lock);
+ if (rp->rp_count < n)
+ n = rp->rp_count;
+ pi = malloc(n * sizeof(*pi), M_TEMP, M_WAITOK);
+ sx_slock(&proctree_lock);
+ LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
+ if (i == n)
+ break;
+ pip = &pi[i];
+ bzero(pip, sizeof(*pip));
+ pip->pi_pid = p2->p_pid;
+ pip->pi_subtree = p2->p_reapsubtree;
+ pip->pi_flags = REAPER_PIDINFO_VALID;
+ if (proc_realparent(p2) == reap)
+ pip->pi_flags |= REAPER_PIDINFO_CHILD;
+ i++;
+ }
+ sx_sunlock(&proctree_lock);
+ error = copyout(pi, rp->rp_pids, i * sizeof(*pi));
+ free(pi, M_TEMP);
+ sx_slock(&proctree_lock);
+ PROC_LOCK(p);
+ return (error);
+}
+
+static int
+reap_kill(struct thread *td, struct proc *p, struct procctl_reaper_kill *rk)
+{
+ struct proc *reap, *p2;
+ ksiginfo_t ksi;
+ int error, error1;
+
+ sx_assert(&proctree_lock, SX_LOCKED);
+ PROC_UNLOCK(p);
+ if (IN_CAPABILITY_MODE(td))
+ return (ECAPMODE);
+ if (rk->rk_sig <= 0 || rk->rk_sig > _SIG_MAXSIG)
+ return (EINVAL);
+ if ((rk->rk_flags & ~REAPER_KILL_CHILDREN) != 0)
+ return (EINVAL);
+ reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
+ ksiginfo_init(&ksi);
+ ksi.ksi_signo = rk->rk_sig;
+ ksi.ksi_code = SI_USER;
+ ksi.ksi_pid = td->td_proc->p_pid;
+ ksi.ksi_uid = td->td_ucred->cr_ruid;
+ error = ESRCH;
+ rk->rk_killed = 0;
+ rk->rk_fpid = -1;
+ for (p2 = (rk->rk_flags & REAPER_KILL_CHILDREN) != 0 ?
+ LIST_FIRST(&reap->p_children) : LIST_FIRST(&reap->p_reaplist);
+ p2 != NULL;
+ p2 = (rk->rk_flags & REAPER_KILL_CHILDREN) != 0 ?
+ LIST_NEXT(p2, p_sibling) : LIST_NEXT(p2, p_reapsibling)) {
+ if ((rk->rk_flags & REAPER_KILL_SUBTREE) != 0 &&
+ p2->p_reapsubtree != rk->rk_subtree)
+ continue;
+ PROC_LOCK(p2);
+ error1 = p_cansignal(td, p2, rk->rk_sig);
+ if (error1 == 0) {
+ pksignal(p2, rk->rk_sig, &ksi);
+ rk->rk_killed++;
+ error = error1;
+ } else if (error == ESRCH) {
+ error = error1;
+ rk->rk_fpid = p2->p_pid;
+ }
+ PROC_UNLOCK(p2);
+ /* Do not end the loop on error, signal everything we can. */
+ }
+ PROC_LOCK(p);
+ return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct procctl_args {
+ idtype_t idtype;
+ id_t id;
+ int com;
+ void *data;
+};
+#endif
+/* ARGSUSED */
+int
+sys_procctl(struct thread *td, struct procctl_args *uap)
+{
+ void *data;
+ union {
+ struct procctl_reaper_status rs;
+ struct procctl_reaper_pids rp;
+ struct procctl_reaper_kill rk;
+ } x;
+ int error, error1, flags;
+
+ switch (uap->com) {
+ case PROC_SPROTECT:
+ error = copyin(uap->data, &flags, sizeof(flags));
+ if (error != 0)
+ return (error);
+ data = &flags;
+ break;
+ case PROC_REAP_ACQUIRE:
+ case PROC_REAP_RELEASE:
+ if (uap->data != NULL)
+ return (EINVAL);
+ data = NULL;
+ break;
+ case PROC_REAP_STATUS:
+ data = &x.rs;
+ break;
+ case PROC_REAP_GETPIDS:
+ error = copyin(uap->data, &x.rp, sizeof(x.rp));
+ if (error != 0)
+ return (error);
+ data = &x.rp;
+ break;
+ case PROC_REAP_KILL:
+ error = copyin(uap->data, &x.rk, sizeof(x.rk));
+ if (error != 0)
+ return (error);
+ data = &x.rk;
+ break;
+ default:
+ return (EINVAL);
+ }
+ error = kern_procctl(td, uap->idtype, uap->id, uap->com, data);
+ switch (uap->com) {
+ case PROC_REAP_STATUS:
+ if (error == 0)
+ error = copyout(&x.rs, uap->data, sizeof(x.rs));
+ case PROC_REAP_KILL:
+ error1 = copyout(&x.rk, uap->data, sizeof(x.rk));
+ if (error == 0)
+ error = error1;
+ break;
+ }
+ return (error);
+}
+
+static int
+kern_procctl_single(struct thread *td, struct proc *p, int com, void *data)
+{
+
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ switch (com) {
+ case PROC_SPROTECT:
+ return (protect_set(td, p, *(int *)data));
+ case PROC_REAP_ACQUIRE:
+ return (reap_acquire(td, p));
+ case PROC_REAP_RELEASE:
+ return (reap_release(td, p));
+ case PROC_REAP_STATUS:
+ return (reap_status(td, p, data));
+ case PROC_REAP_GETPIDS:
+ return (reap_getpids(td, p, data));
+ case PROC_REAP_KILL:
+ return (reap_kill(td, p, data));
+ default:
+ return (EINVAL);
+ }
+}
+
+int
+kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data)
+{
+ struct pgrp *pg;
+ struct proc *p;
+ int error, first_error, ok;
+
+ switch (com) {
+ case PROC_REAP_ACQUIRE:
+ case PROC_REAP_RELEASE:
+ case PROC_REAP_STATUS:
+ case PROC_REAP_GETPIDS:
+ case PROC_REAP_KILL:
+ if (idtype != P_PID)
+ return (EINVAL);
+ }
+
+ switch (com) {
+ case PROC_SPROTECT:
+ case PROC_REAP_STATUS:
+ case PROC_REAP_GETPIDS:
+ case PROC_REAP_KILL:
+ sx_slock(&proctree_lock);
+ break;
+ case PROC_REAP_ACQUIRE:
+ case PROC_REAP_RELEASE:
+ sx_xlock(&proctree_lock);
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ switch (idtype) {
+ case P_PID:
+ p = pfind(id);
+ if (p == NULL) {
+ error = ESRCH;
+ break;
+ }
+ error = p_cansee(td, p);
+ if (error == 0)
+ error = kern_procctl_single(td, p, com, data);
+ PROC_UNLOCK(p);
+ break;
+ case P_PGID:
+ /*
+ * Attempt to apply the operation to all members of the
+ * group. Ignore processes in the group that can't be
+ * seen. Ignore errors so long as at least one process is
+ * able to complete the request successfully.
+ */
+ pg = pgfind(id);
+ if (pg == NULL) {
+ error = ESRCH;
+ break;
+ }
+ PGRP_UNLOCK(pg);
+ ok = 0;
+ first_error = 0;
+ LIST_FOREACH(p, &pg->pg_members, p_pglist) {
+ PROC_LOCK(p);
+ if (p->p_state == PRS_NEW || p_cansee(td, p) != 0) {
+ PROC_UNLOCK(p);
+ continue;
+ }
+ error = kern_procctl_single(td, p, com, data);
+ PROC_UNLOCK(p);
+ if (error == 0)
+ ok = 1;
+ else if (first_error == 0)
+ first_error = error;
+ }
+ if (ok)
+ error = 0;
+ else if (first_error != 0)
+ error = first_error;
+ else
+ /*
+ * Was not able to see any processes in the
+ * process group.
+ */
+ error = ESRCH;
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ sx_unlock(&proctree_lock);
+ return (error);
+}
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index 3105d94..7dd3d17 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -43,7 +43,6 @@ __FBSDID("$FreeBSD$");
#include <sys/sysproto.h>
#include <sys/priv.h>
#include <sys/proc.h>
-#include <sys/procctl.h>
#include <sys/vnode.h>
#include <sys/ptrace.h>
#include <sys/rwlock.h>
@@ -1234,193 +1233,3 @@ stopevent(struct proc *p, unsigned int event, unsigned int val)
msleep(&p->p_step, &p->p_mtx, PWAIT, "stopevent", 0);
} while (p->p_step);
}
-
-static int
-protect_setchild(struct thread *td, struct proc *p, int flags)
-{
-
- PROC_LOCK_ASSERT(p, MA_OWNED);
- if (p->p_flag & P_SYSTEM || p_cansched(td, p) != 0)
- return (0);
- if (flags & PPROT_SET) {
- p->p_flag |= P_PROTECTED;
- if (flags & PPROT_INHERIT)
- p->p_flag2 |= P2_INHERIT_PROTECTED;
- } else {
- p->p_flag &= ~P_PROTECTED;
- p->p_flag2 &= ~P2_INHERIT_PROTECTED;
- }
- return (1);
-}
-
-static int
-protect_setchildren(struct thread *td, struct proc *top, int flags)
-{
- struct proc *p;
- int ret;
-
- p = top;
- ret = 0;
- sx_assert(&proctree_lock, SX_LOCKED);
- for (;;) {
- ret |= protect_setchild(td, p, flags);
- PROC_UNLOCK(p);
- /*
- * If this process has children, descend to them next,
- * otherwise do any siblings, and if done with this level,
- * follow back up the tree (but not past top).
- */
- if (!LIST_EMPTY(&p->p_children))
- p = LIST_FIRST(&p->p_children);
- else for (;;) {
- if (p == top) {
- PROC_LOCK(p);
- return (ret);
- }
- if (LIST_NEXT(p, p_sibling)) {
- p = LIST_NEXT(p, p_sibling);
- break;
- }
- p = p->p_pptr;
- }
- PROC_LOCK(p);
- }
-}
-
-static int
-protect_set(struct thread *td, struct proc *p, int flags)
-{
- int error, ret;
-
- switch (PPROT_OP(flags)) {
- case PPROT_SET:
- case PPROT_CLEAR:
- break;
- default:
- return (EINVAL);
- }
-
- if ((PPROT_FLAGS(flags) & ~(PPROT_DESCEND | PPROT_INHERIT)) != 0)
- return (EINVAL);
-
- error = priv_check(td, PRIV_VM_MADV_PROTECT);
- if (error)
- return (error);
-
- if (flags & PPROT_DESCEND)
- ret = protect_setchildren(td, p, flags);
- else
- ret = protect_setchild(td, p, flags);
- if (ret == 0)
- return (EPERM);
- return (0);
-}
-
-#ifndef _SYS_SYSPROTO_H_
-struct procctl_args {
- idtype_t idtype;
- id_t id;
- int com;
- void *data;
-};
-#endif
-/* ARGSUSED */
-int
-sys_procctl(struct thread *td, struct procctl_args *uap)
-{
- int error, flags;
- void *data;
-
- switch (uap->com) {
- case PROC_SPROTECT:
- error = copyin(uap->data, &flags, sizeof(flags));
- if (error)
- return (error);
- data = &flags;
- break;
- default:
- return (EINVAL);
- }
-
- return (kern_procctl(td, uap->idtype, uap->id, uap->com, data));
-}
-
-static int
-kern_procctl_single(struct thread *td, struct proc *p, int com, void *data)
-{
-
- PROC_LOCK_ASSERT(p, MA_OWNED);
- switch (com) {
- case PROC_SPROTECT:
- return (protect_set(td, p, *(int *)data));
- default:
- return (EINVAL);
- }
-}
-
-int
-kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data)
-{
- struct pgrp *pg;
- struct proc *p;
- int error, first_error, ok;
-
- sx_slock(&proctree_lock);
- switch (idtype) {
- case P_PID:
- p = pfind(id);
- if (p == NULL) {
- error = ESRCH;
- break;
- }
- error = p_cansee(td, p);
- if (error == 0)
- error = kern_procctl_single(td, p, com, data);
- PROC_UNLOCK(p);
- break;
- case P_PGID:
- /*
- * Attempt to apply the operation to all members of the
- * group. Ignore processes in the group that can't be
- * seen. Ignore errors so long as at least one process is
- * able to complete the request successfully.
- */
- pg = pgfind(id);
- if (pg == NULL) {
- error = ESRCH;
- break;
- }
- PGRP_UNLOCK(pg);
- ok = 0;
- first_error = 0;
- LIST_FOREACH(p, &pg->pg_members, p_pglist) {
- PROC_LOCK(p);
- if (p->p_state == PRS_NEW || p_cansee(td, p) != 0) {
- PROC_UNLOCK(p);
- continue;
- }
- error = kern_procctl_single(td, p, com, data);
- PROC_UNLOCK(p);
- if (error == 0)
- ok = 1;
- else if (first_error == 0)
- first_error = error;
- }
- if (ok)
- error = 0;
- else if (first_error != 0)
- error = first_error;
- else
- /*
- * Was not able to see any processes in the
- * process group.
- */
- error = ESRCH;
- break;
- default:
- error = EINVAL;
- break;
- }
- sx_sunlock(&proctree_lock);
- return (error);
-}
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 6590394..d7a45e9 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -513,6 +513,11 @@ struct proc {
struct proc *p_pptr; /* (c + e) Pointer to parent process. */
LIST_ENTRY(proc) p_sibling; /* (e) List of sibling processes. */
LIST_HEAD(, proc) p_children; /* (e) Pointer to list of children. */
+ struct proc *p_reaper; /* (e) My reaper. */
+ LIST_HEAD(, proc) p_reaplist; /* (e) List of my descendants
+ (if I am reaper). */
+ LIST_ENTRY(proc) p_reapsibling; /* (e) List of siblings - descendants of
+ the same reaper. */
struct mtx p_mtx; /* (n) Lock for this struct. */
struct mtx p_statmtx; /* Lock for the stats */
struct mtx p_itimmtx; /* Lock for the virt/prof timers */
@@ -570,6 +575,9 @@ struct proc {
rlim_t p_cpulimit; /* (c) Current CPU limit in seconds. */
signed char p_nice; /* (c) Process "nice" value. */
int p_fibnum; /* in this routing domain XXX MRT */
+ pid_t p_reapsubtree; /* (e) Pid of the direct child of the
+ reaper which spawned
+ our subtree. */
/* End area that is copied on creation. */
#define p_endcopy p_xstat
@@ -671,6 +679,7 @@ struct proc {
#define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */
#define P_TREE_FIRST_ORPHAN 0x00000002 /* First element of orphan
list */
+#define P_TREE_REAPER 0x00000004 /* Reaper of subtree */
/*
* These were process status values (p_stat), now they are only used in
@@ -920,6 +929,7 @@ void proc_reparent(struct proc *child, struct proc *newparent);
struct pstats *pstats_alloc(void);
void pstats_fork(struct pstats *src, struct pstats *dst);
void pstats_free(struct pstats *ps);
+void reaper_abandon_children(struct proc *p, bool exiting);
int securelevel_ge(struct ucred *cr, int level);
int securelevel_gt(struct ucred *cr, int level);
void sess_hold(struct session *);
diff --git a/sys/sys/procctl.h b/sys/sys/procctl.h
index ff577c0..d11b2b2 100644
--- a/sys/sys/procctl.h
+++ b/sys/sys/procctl.h
@@ -30,7 +30,17 @@
#ifndef _SYS_PROCCTL_H_
#define _SYS_PROCCTL_H_
+#ifndef _KERNEL
+#include <sys/types.h>
+#include <sys/wait.h>
+#endif
+
#define PROC_SPROTECT 1 /* set protected state */
+#define PROC_REAP_ACQUIRE 2 /* reaping enable */
+#define PROC_REAP_RELEASE 3 /* reaping disable */
+#define PROC_REAP_STATUS 4 /* reaping status */
+#define PROC_REAP_GETPIDS 5 /* get descendants */
+#define PROC_REAP_KILL 6 /* kill descendants */
/* Operations for PROC_SPROTECT (passed in integer arg). */
#define PPROT_OP(x) ((x) & 0xf)
@@ -42,10 +52,51 @@
#define PPROT_DESCEND 0x10
#define PPROT_INHERIT 0x20
-#ifndef _KERNEL
-#include <sys/types.h>
-#include <sys/wait.h>
+/* Result of PREAP_STATUS (returned by value). */
+struct procctl_reaper_status {
+ u_int rs_flags;
+ u_int rs_children;
+ u_int rs_descendants;
+ pid_t rs_reaper;
+ pid_t rs_pid;
+ u_int rs_pad0[15];
+};
+/* struct procctl_reaper_status rs_flags */
+#define REAPER_STATUS_OWNED 0x00000001
+#define REAPER_STATUS_REALINIT 0x00000002
+
+struct procctl_reaper_pidinfo {
+ pid_t pi_pid;
+ pid_t pi_subtree;
+ u_int pi_flags;
+ u_int pi_pad0[15];
+};
+
+#define REAPER_PIDINFO_VALID 0x00000001
+#define REAPER_PIDINFO_CHILD 0x00000002
+
+struct procctl_reaper_pids {
+ u_int rp_count;
+ u_int rp_pad0[15];
+ struct procctl_reaper_pidinfo *rp_pids;
+};
+
+struct procctl_reaper_kill {
+ int rk_sig; /* in - signal to send */
+ u_int rk_flags; /* in - REAPER_KILL flags */
+ pid_t rk_subtree; /* in - subtree, if REAPER_KILL_SUBTREE */
+ u_int rk_killed; /* out - count of processes sucessfully
+ killed */
+ pid_t rk_fpid; /* out - first failed pid for which error
+ is returned */
+ u_int rk_pad0[15];
+};
+
+#define REAPER_KILL_CHILDREN 0x00000001
+#define REAPER_KILL_SUBTREE 0x00000002
+
+#ifndef _KERNEL
__BEGIN_DECLS
int procctl(idtype_t, id_t, int, void *);
__END_DECLS
OpenPOWER on IntegriCloud