diff options
author | jhb <jhb@FreeBSD.org> | 2013-09-19 18:53:42 +0000 |
---|---|---|
committer | jhb <jhb@FreeBSD.org> | 2013-09-19 18:53:42 +0000 |
commit | d3ef75b6c79e9e6d642efa9c32a96524d7a5a5b7 (patch) | |
tree | 56c7a97f082d488a8b917c2d2d52956076373329 /sys | |
parent | 8ecfe4666e7be9cea4cf6c3ef929de2e8d286f41 (diff) | |
download | FreeBSD-src-d3ef75b6c79e9e6d642efa9c32a96524d7a5a5b7.zip FreeBSD-src-d3ef75b6c79e9e6d642efa9c32a96524d7a5a5b7.tar.gz |
Extend the support for exempting processes from being killed when swap is
exhausted.
- Add a new protect(1) command that can be used to set or revoke protection
from arbitrary processes. Similar to ktrace it can apply a change to all
existing descendants of a process as well as future descendants.
- Add a new procctl(2) system call that provides a generic interface for
control operations on processes (as opposed to the debugger-specific
operations provided by ptrace(2)). procctl(2) uses a combination of
idtype_t and an id to identify the set of processes on which to operate
similar to wait6().
- Add a PROC_SPROTECT control operation to manage the protection status
of a set of processes. MADV_PROTECT still works for backwards
compatability.
- Add a p_flag2 to struct proc (and a corresponding ki_flag2 to kinfo_proc)
the first bit of which is used to track if P_PROTECT should be inherited
by new child processes.
Reviewed by: kib, jilles (earlier version)
Approved by: re (delphij)
MFC after: 1 month
Diffstat (limited to 'sys')
-rw-r--r-- | sys/compat/freebsd32/freebsd32.h | 1 | ||||
-rw-r--r-- | sys/compat/freebsd32/freebsd32_misc.c | 21 | ||||
-rw-r--r-- | sys/compat/freebsd32/syscalls.master | 9 | ||||
-rw-r--r-- | sys/kern/init_main.c | 1 | ||||
-rw-r--r-- | sys/kern/kern_fork.c | 6 | ||||
-rw-r--r-- | sys/kern/kern_proc.c | 2 | ||||
-rw-r--r-- | sys/kern/sys_process.c | 195 | ||||
-rw-r--r-- | sys/kern/syscalls.master | 2 | ||||
-rw-r--r-- | sys/sys/proc.h | 8 | ||||
-rw-r--r-- | sys/sys/procctl.h | 55 | ||||
-rw-r--r-- | sys/sys/syscallsubr.h | 2 | ||||
-rw-r--r-- | sys/sys/user.h | 3 | ||||
-rw-r--r-- | sys/vm/vm_mmap.c | 17 |
13 files changed, 307 insertions, 15 deletions
diff --git a/sys/compat/freebsd32/freebsd32.h b/sys/compat/freebsd32/freebsd32.h index aa08432..e6644f6 100644 --- a/sys/compat/freebsd32/freebsd32.h +++ b/sys/compat/freebsd32/freebsd32.h @@ -342,6 +342,7 @@ struct kinfo_proc32 { char ki_loginclass[LOGINCLASSLEN+1]; char ki_sparestrings[50]; int ki_spareints[KI_NSPARE_INT]; + int ki_flag2; int ki_fibnum; u_int ki_cr_flags; int ki_jid; diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index dbdafeb..deb03f3 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$"); #include <sys/mutex.h> #include <sys/namei.h> #include <sys/proc.h> +#include <sys/procctl.h> #include <sys/reboot.h> #include <sys/resource.h> #include <sys/resourcevar.h> @@ -3000,3 +3001,23 @@ convert_sigevent32(struct sigevent32 *sig32, struct sigevent *sig) } return (0); } + +int +freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap) +{ + void *data; + int error, flags; + + switch (uap->com) { + case PROC_SPROTECT: + error = copyin(PTRIN(uap->data), &flags, sizeof(flags)); + if (error) + return (error); + data = &flags; + break; + default: + return (EINVAL); + } + return (kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id), + uap->com, data)); +} diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master index c52256a..90c3e75 100644 --- a/sys/compat/freebsd32/syscalls.master +++ b/sys/compat/freebsd32/syscalls.master @@ -1056,3 +1056,12 @@ 542 AUE_PIPE NOPROTO { int pipe2(int *fildes, int flags); } 543 AUE_NULL NOSTD { int freebsd32_aio_mlock( \ struct aiocb32 *aiocbp); } +#ifdef PAD64_REQUIRED +544 AUE_NULL STD { int freebsd32_procctl(int idtype, int pad, \ + uint32_t id1, uint32_t id2, int com, \ + void *data); } +#else +544 AUE_NULL STD { int freebsd32_procctl(int idtype, \ + uint32_t id1, uint32_t id2, int com, \ + void *data); } +#endif diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 0828e48..ed343b6 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -474,6 +474,7 @@ proc0_init(void *dummy __unused) p->p_sysent = &null_sysvec; p->p_flag = P_SYSTEM | P_INMEM; + p->p_flag2 = 0; p->p_state = PRS_NORMAL; knlist_init_mtx(&p->p_klist, &p->p_mtx); STAILQ_INIT(&p->p_ktr); diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 9cd1da9..de7f09f 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -489,6 +489,7 @@ do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2, * Increase reference counts on shared objects. */ p2->p_flag = P_INMEM; + p2->p_flag2 = 0; p2->p_swtick = ticks; if (p1->p_flag & P_PROFIL) startprofclock(p2); @@ -512,6 +513,11 @@ do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2, p2->p_fd = fd; p2->p_fdtol = fdtol; + if (p1->p_flag2 & P2_INHERIT_PROTECTED) { + p2->p_flag |= P_PROTECTED; + p2->p_flag2 |= P2_INHERIT_PROTECTED; + } + /* * p_limit is copy-on-write. Bump its refcount. */ diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 3fa7a7f..9968e76 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -802,6 +802,7 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp) kp->ki_fd = p->p_fd; kp->ki_vmspace = p->p_vmspace; kp->ki_flag = p->p_flag; + kp->ki_flag2 = p->p_flag2; cred = p->p_ucred; if (cred) { kp->ki_uid = cred->cr_uid; @@ -1161,6 +1162,7 @@ freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32) bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1); bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1); bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1); + CP(*ki, *ki32, ki_flag2); CP(*ki, *ki32, ki_fibnum); CP(*ki, *ki32, ki_cr_flags); CP(*ki, *ki32, ki_jid); diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index 5508dcf..821c779 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -41,7 +41,9 @@ __FBSDID("$FreeBSD$"); #include <sys/syscallsubr.h> #include <sys/sysent.h> #include <sys/sysproto.h> +#include <sys/priv.h> #include <sys/proc.h> +#include <sys/procctl.h> #include <sys/vnode.h> #include <sys/ptrace.h> #include <sys/rwlock.h> @@ -1240,3 +1242,196 @@ stopevent(struct proc *p, unsigned int event, unsigned int val) msleep(&p->p_step, &p->p_mtx, PWAIT, "stopevent", 0); } while (p->p_step); } + +static int +protect_setchild(struct thread *td, struct proc *p, int flags) +{ + + PROC_LOCK_ASSERT(p, MA_OWNED); + if (p->p_flag & P_SYSTEM || p_cansee(td, p) != 0) + return (0); + if (flags & PPROT_SET) { + p->p_flag |= P_PROTECTED; + if (flags & PPROT_INHERIT) + p->p_flag2 |= P2_INHERIT_PROTECTED; + } else { + p->p_flag &= ~P_PROTECTED; + p->p_flag2 &= ~P2_INHERIT_PROTECTED; + } + return (1); +} + +static int +protect_setchildren(struct thread *td, struct proc *top, int flags) +{ + struct proc *p; + int ret; + + p = top; + ret = 0; + sx_assert(&proctree_lock, SX_LOCKED); + for (;;) { + ret |= protect_setchild(td, p, flags); + PROC_UNLOCK(p); + /* + * If this process has children, descend to them next, + * otherwise do any siblings, and if done with this level, + * follow back up the tree (but not past top). + */ + if (!LIST_EMPTY(&p->p_children)) + p = LIST_FIRST(&p->p_children); + else for (;;) { + if (p == top) { + PROC_LOCK(p); + return (ret); + } + if (LIST_NEXT(p, p_sibling)) { + p = LIST_NEXT(p, p_sibling); + break; + } + p = p->p_pptr; + } + PROC_LOCK(p); + } +} + +static int +protect_set(struct thread *td, struct proc *p, int flags) +{ + int error, ret; + + switch (PPROT_OP(flags)) { + case PPROT_SET: + case PPROT_CLEAR: + break; + default: + return (EINVAL); + } + + if ((PPROT_FLAGS(flags) & ~(PPROT_DESCEND | PPROT_INHERIT)) != 0) + return (EINVAL); + + error = priv_check(td, PRIV_VM_MADV_PROTECT); + if (error) + return (error); + + if (flags & PPROT_DESCEND) + ret = protect_setchildren(td, p, flags); + else + ret = protect_setchild(td, p, flags); + if (ret == 0) + return (EPERM); + return (0); +} + +#ifndef _SYS_SYSPROTO_H_ +struct procctl_args { + idtype_t idtype; + id_t id; + int com; + void *data; +}; +#endif +/* ARGSUSED */ +int +sys_procctl(struct thread *td, struct procctl_args *uap) +{ + int error, flags; + void *data; + + switch (uap->com) { + case PROC_SPROTECT: + error = copyin(uap->data, &flags, sizeof(flags)); + if (error) + return (error); + data = &flags; + break; + default: + return (EINVAL); + } + + return (kern_procctl(td, uap->idtype, uap->id, uap->com, data)); +} + +static int +kern_procctl_single(struct thread *td, struct proc *p, int com, void *data) +{ + + PROC_LOCK_ASSERT(p, MA_OWNED); + switch (com) { + case PROC_SPROTECT: + return (protect_set(td, p, *(int *)data)); + default: + return (EINVAL); + } +} + +int +kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data) +{ + struct pgrp *pg; + struct proc *p; + int error, first_error, ok; + + sx_slock(&proctree_lock); + switch (idtype) { + case P_PID: + p = pfind(id); + if (p == NULL) { + error = ESRCH; + break; + } + if (p->p_state == PRS_NEW) + error = ESRCH; + else + error = p_cansee(td, p); + if (error == 0) + error = kern_procctl_single(td, p, com, data); + PROC_UNLOCK(p); + break; + case P_PGID: + /* + * Attempt to apply the operation to all members of the + * group. Ignore processes in the group that can't be + * seen. Ignore errors so long as at least one process is + * able to complete the request successfully. + */ + pg = pgfind(id); + if (pg == NULL) { + error = ESRCH; + break; + } + PGRP_UNLOCK(pg); + ok = 0; + first_error = 0; + LIST_FOREACH(p, &pg->pg_members, p_pglist) { + PROC_LOCK(p); + if (p->p_state == PRS_NEW || p_cansee(td, p) != 0) { + PROC_UNLOCK(p); + continue; + } + error = kern_procctl_single(td, p, com, data); + PROC_UNLOCK(p); + if (error == 0) + ok = 1; + else if (first_error == 0) + first_error = error; + } + if (ok) + error = 0; + else if (first_error != 0) + error = first_error; + else + /* + * Was not able to see any processes in the + * process group. + */ + error = ESRCH; + break; + default: + error = EINVAL; + break; + } + sx_sunlock(&proctree_lock); + return (error); +} diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 7df50ca..e959214 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -978,5 +978,7 @@ int flags); } 542 AUE_PIPE STD { int pipe2(int *fildes, int flags); } 543 AUE_NULL NOSTD { int aio_mlock(struct aiocb *aiocbp); } +544 AUE_NULL STD { int procctl(idtype_t idtype, id_t id, \ + int com, void *data); } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 2f03152..5443b61 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -492,11 +492,8 @@ struct proc { struct callout p_limco; /* (c) Limit callout handle */ struct sigacts *p_sigacts; /* (x) Signal actions, state (CPU). */ - /* - * The following don't make too much sense. - * See the td_ or ke_ versions of the same flags. - */ int p_flag; /* (c) P_* flags. */ + int p_flag2; /* (c) P2_* flags. */ enum { PRS_NEW = 0, /* In creation */ PRS_NORMAL, /* threads can be run. */ @@ -641,6 +638,9 @@ struct proc { #define P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED) #define P_KILLED(p) ((p)->p_flag & P_WKILLED) +/* These flags are kept in p_flag2. */ +#define P2_INHERIT_PROTECTED 0x00000001 /* New children get P_PROTECTED. */ + /* * These were process status values (p_stat), now they are only used in * legacy conversion code. diff --git a/sys/sys/procctl.h b/sys/sys/procctl.h new file mode 100644 index 0000000..ff577c0 --- /dev/null +++ b/sys/sys/procctl.h @@ -0,0 +1,55 @@ +/*- + * Copyright (c) 2013 Advanced Computing Technologies LLC + * Written by: John H. Baldwin <jhb@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS_PROCCTL_H_ +#define _SYS_PROCCTL_H_ + +#define PROC_SPROTECT 1 /* set protected state */ + +/* Operations for PROC_SPROTECT (passed in integer arg). */ +#define PPROT_OP(x) ((x) & 0xf) +#define PPROT_SET 1 +#define PPROT_CLEAR 2 + +/* Flags for PROC_SPROTECT (ORed in with operation). */ +#define PPROT_FLAGS(x) ((x) & ~0xf) +#define PPROT_DESCEND 0x10 +#define PPROT_INHERIT 0x20 + +#ifndef _KERNEL +#include <sys/types.h> +#include <sys/wait.h> + +__BEGIN_DECLS +int procctl(idtype_t, id_t, int, void *); +__END_DECLS + +#endif + +#endif /* !_SYS_PROCCTL_H_ */ diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h index 17f2b97..ed11b34 100644 --- a/sys/sys/syscallsubr.h +++ b/sys/sys/syscallsubr.h @@ -167,6 +167,8 @@ int kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, int advice); int kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len); +int kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com, + void *data); int kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset); int kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou, fd_set *ex, struct timeval *tvp, sigset_t *uset, int abi_nfdbits); diff --git a/sys/sys/user.h b/sys/sys/user.h index 349003d..d2e2b6e 100644 --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -84,7 +84,7 @@ * it in two places: function fill_kinfo_proc in sys/kern/kern_proc.c and * function kvm_proclist in lib/libkvm/kvm_proc.c . */ -#define KI_NSPARE_INT 8 +#define KI_NSPARE_INT 7 #define KI_NSPARE_LONG 12 #define KI_NSPARE_PTR 6 @@ -187,6 +187,7 @@ struct kinfo_proc { */ char ki_sparestrings[50]; /* spare string space */ int ki_spareints[KI_NSPARE_INT]; /* spare room for growth */ + int ki_flag2; /* P2_* flags */ int ki_fibnum; /* Default FIB number */ u_int ki_cr_flags; /* Credential flags */ int ki_jid; /* Process jail ID */ diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 0f2d531..17f8cad 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$"); #include <sys/filedesc.h> #include <sys/priv.h> #include <sys/proc.h> +#include <sys/procctl.h> #include <sys/racct.h> #include <sys/resource.h> #include <sys/resourcevar.h> @@ -68,6 +69,7 @@ __FBSDID("$FreeBSD$"); #include <sys/mount.h> #include <sys/conf.h> #include <sys/stat.h> +#include <sys/syscallsubr.h> #include <sys/sysent.h> #include <sys/vmmeter.h> @@ -739,23 +741,18 @@ sys_madvise(td, uap) { vm_offset_t start, end; vm_map_t map; - struct proc *p; - int error; + int flags; /* * Check for our special case, advising the swap pager we are * "immortal." */ if (uap->behav == MADV_PROTECT) { - error = priv_check(td, PRIV_VM_MADV_PROTECT); - if (error == 0) { - p = td->td_proc; - PROC_LOCK(p); - p->p_flag |= P_PROTECTED; - PROC_UNLOCK(p); - } - return (error); + flags = PPROT_SET; + return (kern_procctl(td, P_PID, td->td_proc->p_pid, + PROC_SPROTECT, &flags)); } + /* * Check for illegal behavior */ |