diff options
-rw-r--r-- | kern_racct.c | 842 | ||||
-rw-r--r-- | sys/amd64/conf/GENERIC | 2 | ||||
-rw-r--r-- | sys/conf/NOTES | 3 | ||||
-rw-r--r-- | sys/conf/files | 1 | ||||
-rw-r--r-- | sys/conf/options | 3 | ||||
-rw-r--r-- | sys/kern/init_main.c | 4 | ||||
-rw-r--r-- | sys/kern/kern_exit.c | 6 | ||||
-rw-r--r-- | sys/kern/kern_fork.c | 17 | ||||
-rw-r--r-- | sys/kern/kern_jail.c | 17 | ||||
-rw-r--r-- | sys/kern/kern_loginclass.c | 20 | ||||
-rw-r--r-- | sys/kern/kern_racct.c | 837 | ||||
-rw-r--r-- | sys/kern/kern_resource.c | 20 | ||||
-rw-r--r-- | sys/sys/jail.h | 7 | ||||
-rw-r--r-- | sys/sys/kernel.h | 2 | ||||
-rw-r--r-- | sys/sys/loginclass.h | 6 | ||||
-rw-r--r-- | sys/sys/proc.h | 3 | ||||
-rw-r--r-- | sys/sys/racct.h | 147 | ||||
-rw-r--r-- | sys/sys/resourcevar.h | 5 |
18 files changed, 1939 insertions, 3 deletions
diff --git a/kern_racct.c b/kern_racct.c new file mode 100644 index 0000000..229977a --- /dev/null +++ b/kern_racct.c @@ -0,0 +1,842 @@ +/*- + * Copyright (c) 2010 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Edward Tomasz Napierala under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_kdtrace.h" + +#include <sys/param.h> +#include <sys/eventhandler.h> +#include <sys/param.h> +#include <sys/jail.h> +#include <sys/kernel.h> +#include <sys/kthread.h> +#include <sys/lock.h> +#include <sys/loginclass.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/racct.h> +#include <sys/resourcevar.h> +#include <sys/sbuf.h> +#include <sys/sched.h> +#include <sys/sdt.h> +#include <sys/sx.h> +#include <sys/sysent.h> +#include <sys/sysproto.h> +#include <sys/systm.h> +#include <sys/umtx.h> + +#ifdef RCTL +#include <sys/rctl.h> +#endif + +#ifdef RACCT + +FEATURE(racct, "Resource Accounting"); + +static struct mtx racct_lock; +MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF); + +static uma_zone_t racct_zone; + +static void racct_sub_racct(struct racct *dest, const struct racct *src); +static void racct_sub_cred_locked(struct ucred *cred, int resource, + uint64_t amount); +static void racct_add_cred_locked(struct ucred *cred, int resource, + uint64_t amount); + +SDT_PROVIDER_DEFINE(racct); +SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int", + "uint64_t"); +SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure, + "struct proc *", "int", "uint64_t"); +SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *", + "int", "uint64_t"); +SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *", + "int", "uint64_t"); +SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int", + "uint64_t"); +SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure, + "struct proc *", "int", "uint64_t"); +SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int", + "uint64_t"); +SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *", + "int", "uint64_t"); +SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *"); +SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *"); +SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *", + "struct racct *"); +SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure, + "struct racct *", "struct racct *"); +SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *", + "struct racct *"); + +int racct_types[] = { + [RACCT_CPU] = + RACCT_IN_THOUSANDS, + [RACCT_FSIZE] = + RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, + [RACCT_DATA] = + RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, + [RACCT_STACK] = + RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, + [RACCT_CORE] = + RACCT_DENIABLE, + [RACCT_RSS] = + RACCT_RECLAIMABLE, + [RACCT_MEMLOCK] = + RACCT_RECLAIMABLE | RACCT_DENIABLE, + [RACCT_NPROC] = + RACCT_RECLAIMABLE | RACCT_DENIABLE, + [RACCT_NOFILE] = + RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, + [RACCT_SBSIZE] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_VMEM] = + RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, + [RACCT_NPTS] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_SWAP] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_NTHR] = + RACCT_RECLAIMABLE | RACCT_DENIABLE, + [RACCT_MSGQQUEUED] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_MSGQSIZE] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_NMSGQ] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_NSEM] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_NSEMOP] = + RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, + [RACCT_NSHM] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_SHMSIZE] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_WALLCLOCK] = + RACCT_IN_THOUSANDS }; + +static void +racct_add_racct(struct racct *dest, const struct racct *src) +{ + int i; + + mtx_assert(&racct_lock, MA_OWNED); + + /* + * Update resource usage in dest. + */ + for (i = 0; i <= RACCT_MAX; i++) { + KASSERT(dest->r_resources[i] >= 0, + ("racct propagation meltdown: dest < 0")); + KASSERT(src->r_resources[i] >= 0, + ("racct propagation meltdown: src < 0")); + dest->r_resources[i] += src->r_resources[i]; + } +} + +static void +racct_sub_racct(struct racct *dest, const struct racct *src) +{ + int i; + + mtx_assert(&racct_lock, MA_OWNED); + + /* + * Update resource usage in dest. + */ + for (i = 0; i <= RACCT_MAX; i++) { + if (!racct_is_sloppy(i) && + !racct_is_dampened(i)) { + KASSERT(dest->r_resources[i] >= 0, + ("racct propagation meltdown: dest < 0")); + KASSERT(src->r_resources[i] >= 0, + ("racct propagation meltdown: src < 0")); + KASSERT(src->r_resources[i] <= dest->r_resources[i], + ("racct propagation meltdown: src > dest")); + } + if (racct_is_reclaimable(i)) { + dest->r_resources[i] -= src->r_resources[i]; + if (dest->r_resources[i] < 0) { + KASSERT(racct_is_sloppy(i) || + racct_is_dampened(i), + ("racct_sub_racct: usage < 0")); + dest->r_resources[i] = 0; + } + } + } +} + +void +racct_create(struct racct **racctp) +{ + + SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0); + + KASSERT(*racctp == NULL, ("racct already allocated")); + + *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO); +} + +static void +racct_destroy_locked(struct racct **racctp) +{ + int i; + struct racct *racct; + + SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0); + + mtx_assert(&racct_lock, MA_OWNED); + KASSERT(racctp != NULL, ("NULL racctp")); + KASSERT(*racctp != NULL, ("NULL racct")); + + racct = *racctp; + + for (i = 0; i <= RACCT_MAX; i++) { + if (racct_is_sloppy(i)) + continue; + if (!racct_is_reclaimable(i)) + continue; + if (racct_is_dampened(i)) + continue; + KASSERT(racct->r_resources[i] == 0, + ("destroying non-empty racct: " + "%ju allocated for resource %d\n", + racct->r_resources[i], i)); + } + uma_zfree(racct_zone, racct); + *racctp = NULL; +} + +void +racct_destroy(struct racct **racct) +{ + + mtx_lock(&racct_lock); + racct_destroy_locked(racct); + mtx_unlock(&racct_lock); +} + +/* + * Increase consumption of 'resource' by 'amount' for 'racct' + * and all its parents. Differently from other cases, 'amount' here + * may be less than zero. + */ +static void +racct_alloc_resource(struct racct *racct, int resource, + uint64_t amount) +{ + + mtx_assert(&racct_lock, MA_OWNED); + KASSERT(racct != NULL, ("NULL racct")); + + racct->r_resources[resource] += amount; + if (racct->r_resources[resource] < 0) { + KASSERT(racct_is_sloppy(resource) || + racct_is_dampened(resource), + ("racct_alloc_resource: usage < 0")); + racct->r_resources[resource] = 0; + } +} + +/* + * Increase allocation of 'resource' by 'amount' for process 'p'. + * Return 0 if it's below limits, or errno, if it's not. + */ +int +racct_add(struct proc *p, int resource, uint64_t amount) +{ +#ifdef RCTL + int error; +#endif + + if (p->p_flag & P_SYSTEM) + return (0); + + SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0); + + /* + * We need proc lock to dereference p->p_ucred. + */ + PROC_LOCK_ASSERT(p, MA_OWNED); + KASSERT(amount >= 0, ("racct_add: invalid amount for resource %d: %ju", + resource, amount)); + + mtx_lock(&racct_lock); +#ifdef RCTL + error = rctl_enforce(p, resource, amount); + if (error && racct_is_deniable(resource)) { + SDT_PROBE(racct, kernel, rusage, add_failure, p, resource, + amount, 0, 0); + mtx_unlock(&racct_lock); + return (error); + } +#endif + racct_alloc_resource(p->p_racct, resource, amount); + racct_add_cred_locked(p->p_ucred, resource, amount); + mtx_unlock(&racct_lock); + + return (0); +} + +static void +racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount) +{ + struct prison *pr; + + SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount, + 0, 0); + + KASSERT(amount >= 0, + ("racct_add_cred: invalid amount for resource %d: %ju", + resource, amount)); + + racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount); + for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) + racct_alloc_resource(pr->pr_racct, resource, amount); + racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount); +} + +/* + * Increase allocation of 'resource' by 'amount' for credential 'cred'. + * Doesn't check for limits and never fails. + * + * XXX: Shouldn't this ever return an error? + */ +void +racct_add_cred(struct ucred *cred, int resource, uint64_t amount) +{ + + mtx_lock(&racct_lock); + racct_add_cred_locked(cred, resource, amount); + mtx_unlock(&racct_lock); +} + +/* + * Increase allocation of 'resource' by 'amount' for process 'p'. + * Doesn't check for limits and never fails. + */ +void +racct_add_force(struct proc *p, int resource, uint64_t amount) +{ + + if (p->p_flag & P_SYSTEM) + return; + + SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0); + + /* + * We need proc lock to dereference p->p_ucred. + */ + PROC_LOCK_ASSERT(p, MA_OWNED); + KASSERT(amount >= 0, + ("racct_add_force: invalid amount for resource %d: %ju", + resource, amount)); + + mtx_lock(&racct_lock); + racct_alloc_resource(p->p_racct, resource, amount); + mtx_unlock(&racct_lock); + racct_add_cred(p->p_ucred, resource, amount); +} + +static int +racct_set_locked(struct proc *p, int resource, uint64_t amount) +{ + int64_t diff; +#ifdef RCTL + int error; +#endif + + if (p->p_flag & P_SYSTEM) + return (0); + + SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); + + /* + * We need proc lock to dereference p->p_ucred. + */ + PROC_LOCK_ASSERT(p, MA_OWNED); + KASSERT(amount >= 0, ("racct_set: invalid amount for resource %d: %ju", + resource, amount)); + + diff = amount - p->p_racct->r_resources[resource]; +#ifdef notyet + KASSERT(diff >= 0 || racct_is_reclaimable(resource), + ("racct_set: usage of non-reclaimable resource %d dropping", + resource)); +#endif +#ifdef RCTL + if (diff > 0) { + error = rctl_enforce(p, resource, diff); + if (error && racct_is_deniable(resource)) { + SDT_PROBE(racct, kernel, rusage, set_failure, p, + resource, amount, 0, 0); + return (error); + } + } +#endif + racct_alloc_resource(p->p_racct, resource, diff); + if (diff > 0) + racct_add_cred_locked(p->p_ucred, resource, diff); + else if (diff < 0) + racct_sub_cred_locked(p->p_ucred, resource, -diff); + + return (0); +} + +/* + * Set allocation of 'resource' to 'amount' for process 'p'. + * Return 0 if it's below limits, or errno, if it's not. + * + * Note that decreasing the allocation always returns 0, + * even if it's above the limit. + */ +int +racct_set(struct proc *p, int resource, uint64_t amount) +{ + int error; + + mtx_lock(&racct_lock); + error = racct_set_locked(p, resource, amount); + mtx_unlock(&racct_lock); + return (error); +} + +void +racct_set_force(struct proc *p, int resource, uint64_t amount) +{ + int64_t diff; + + if (p->p_flag & P_SYSTEM) + return; + + SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); + + /* + * We need proc lock to dereference p->p_ucred. + */ + PROC_LOCK_ASSERT(p, MA_OWNED); + KASSERT(amount >= 0, + ("racct_set_force: invalid amount for resource %d: %ju", + resource, amount)); + + mtx_lock(&racct_lock); + diff = amount - p->p_racct->r_resources[resource]; + racct_alloc_resource(p->p_racct, resource, diff); + if (diff > 0) + racct_add_cred_locked(p->p_ucred, resource, diff); + else if (diff < 0) + racct_sub_cred_locked(p->p_ucred, resource, -diff); + mtx_unlock(&racct_lock); +} + +/* + * Returns amount of 'resource' the process 'p' can keep allocated. + * Allocating more than that would be denied, unless the resource + * is marked undeniable. Amount of already allocated resource does + * not matter. + */ +uint64_t +racct_get_limit(struct proc *p, int resource) +{ + +#ifdef RCTL + return (rctl_get_limit(p, resource)); +#else + return (UINT64_MAX); +#endif +} + +/* + * Returns amount of 'resource' the process 'p' can keep allocated. + * Allocating more than that would be denied, unless the resource + * is marked undeniable. Amount of already allocated resource does + * matter. + */ +uint64_t +racct_get_available(struct proc *p, int resource) +{ + +#ifdef RCTL + return (rctl_get_available(p, resource)); +#else + return (UINT64_MAX); +#endif +} + +/* + * Decrease allocation of 'resource' by 'amount' for process 'p'. + */ +void +racct_sub(struct proc *p, int resource, uint64_t amount) +{ + + if (p->p_flag & P_SYSTEM) + return; + + SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0); + + /* + * We need proc lock to dereference p->p_ucred. + */ + PROC_LOCK_ASSERT(p, MA_OWNED); + KASSERT(amount >= 0, ("racct_sub: invalid amount for resource %d: %ju", + resource, amount)); + KASSERT(racct_is_reclaimable(resource), + ("racct_sub: called for non-reclaimable resource %d", resource)); + + mtx_lock(&racct_lock); + KASSERT(amount <= p->p_racct->r_resources[resource], + ("racct_sub: freeing %ju of resource %d, which is more " + "than allocated %jd for %s (pid %d)", amount, resource, + (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid)); + + racct_alloc_resource(p->p_racct, resource, -amount); + racct_sub_cred_locked(p->p_ucred, resource, amount); + mtx_unlock(&racct_lock); +} + +static void +racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount) +{ + struct prison *pr; + + SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount, + 0, 0); + + KASSERT(amount >= 0, + ("racct_sub_cred: invalid amount for resource %d: %ju", + resource, amount)); +#ifdef notyet + KASSERT(racct_is_reclaimable(resource), + ("racct_sub_cred: called for non-reclaimable resource %d", + resource)); +#endif + + racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount); + for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) + racct_alloc_resource(pr->pr_racct, resource, -amount); + racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount); +} + +/* + * Decrease allocation of 'resource' by 'amount' for credential 'cred'. + */ +void +racct_sub_cred(struct ucred *cred, int resource, uint64_t amount) +{ + + mtx_lock(&racct_lock); + racct_sub_cred_locked(cred, resource, amount); + mtx_unlock(&racct_lock); +} + +/* + * Inherit resource usage information from the parent process. + */ +int +racct_proc_fork(struct proc *parent, struct proc *child) +{ + int i, error = 0; + + /* + * Create racct for the child process. + */ + racct_create(&child->p_racct); + + /* + * No resource accounting for kernel processes. + */ + if (child->p_flag & P_SYSTEM) + return (0); + + PROC_LOCK(parent); + PROC_LOCK(child); + mtx_lock(&racct_lock); + + /* + * Inherit resource usage. + */ + for (i = 0; i <= RACCT_MAX; i++) { + if (parent->p_racct->r_resources[i] == 0 || + !racct_is_inheritable(i)) + continue; + + error = racct_set_locked(child, i, + parent->p_racct->r_resources[i]); + if (error != 0) { + /* + * XXX: The only purpose of these two lines is + * to prevent from tripping checks in racct_destroy(). + */ + for (i = 0; i <= RACCT_MAX; i++) + racct_set_locked(child, i, 0); + goto out; + } + } + +#ifdef RCTL + error = rctl_proc_fork(parent, child); + if (error != 0) { + /* + * XXX: The only purpose of these two lines is to prevent from + * tripping checks in racct_destroy(). + */ + for (i = 0; i <= RACCT_MAX; i++) + racct_set_locked(child, i, 0); + } +#endif + +out: + if (error != 0) + racct_destroy_locked(&child->p_racct); + mtx_unlock(&racct_lock); + PROC_UNLOCK(child); + PROC_UNLOCK(parent); + + return (error); +} + +void +racct_proc_exit(struct proc *p) +{ + uint64_t runtime; + + PROC_LOCK(p); + /* + * We don't need to calculate rux, proc_reap() has already done this. + */ + runtime = cputick2usec(p->p_rux.rux_runtime); +#ifdef notyet + KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); +#else + if (runtime < p->p_prev_runtime) + runtime = p->p_prev_runtime; +#endif + racct_set(p, RACCT_CPU, runtime); + + /* + * XXX: Free this some other way. + */ + racct_set(p, RACCT_FSIZE, 0); + racct_set(p, RACCT_NPTS, 0); + racct_set(p, RACCT_NTHR, 0); + racct_set(p, RACCT_RSS, 0); + PROC_UNLOCK(p); + +#ifdef RCTL + rctl_racct_release(p->p_racct); +#endif + racct_destroy(&p->p_racct); +} + +/* + * Called after credentials change, to move resource utilisation + * between raccts. + */ +void +racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred, + struct ucred *newcred) +{ + struct uidinfo *olduip, *newuip; + struct loginclass *oldlc, *newlc; + struct prison *oldpr, *newpr, *pr; + + PROC_LOCK_ASSERT(p, MA_NOTOWNED); + + newuip = newcred->cr_ruidinfo; + olduip = oldcred->cr_ruidinfo; + newlc = newcred->cr_loginclass; + oldlc = oldcred->cr_loginclass; + newpr = newcred->cr_prison; + oldpr = oldcred->cr_prison; + + mtx_lock(&racct_lock); + if (newuip != olduip) { + racct_sub_racct(olduip->ui_racct, p->p_racct); + racct_add_racct(newuip->ui_racct, p->p_racct); + } + if (newlc != oldlc) { + racct_sub_racct(oldlc->lc_racct, p->p_racct); + racct_add_racct(newlc->lc_racct, p->p_racct); + } + if (newpr != oldpr) { + for (pr = oldpr; pr != NULL; pr = pr->pr_parent) + racct_sub_racct(pr->pr_racct, p->p_racct); + for (pr = newpr; pr != NULL; pr = pr->pr_parent) + racct_add_racct(pr->pr_racct, p->p_racct); + } + mtx_unlock(&racct_lock); + +#ifdef RCTL + rctl_proc_ucred_changed(p, newcred); +#endif +} + +static void +racctd(void) +{ + struct thread *td; + struct proc *p; + struct timeval wallclock; + uint64_t runtime; + + for (;;) { + sx_slock(&allproc_lock); + + FOREACH_PROC_IN_SYSTEM(p) { + if (p->p_state != PRS_NORMAL) + continue; + if (p->p_flag & P_SYSTEM) + continue; + + microuptime(&wallclock); + timevalsub(&wallclock, &p->p_stats->p_start); + PROC_LOCK(p); + PROC_SLOCK(p); + FOREACH_THREAD_IN_PROC(p, td) { + ruxagg(p, td); + thread_lock(td); + thread_unlock(td); + } + runtime = cputick2usec(p->p_rux.rux_runtime); + PROC_SUNLOCK(p); +#ifdef notyet + KASSERT(runtime >= p->p_prev_runtime, + ("runtime < p_prev_runtime")); +#else + if (runtime < p->p_prev_runtime) + runtime = p->p_prev_runtime; +#endif + p->p_prev_runtime = runtime; + mtx_lock(&racct_lock); + racct_set_locked(p, RACCT_CPU, runtime); + racct_set_locked(p, RACCT_WALLCLOCK, + wallclock.tv_sec * 1000000 + wallclock.tv_usec); + mtx_unlock(&racct_lock); + PROC_UNLOCK(p); + } + sx_sunlock(&allproc_lock); + pause("-", hz); + } +} + +static struct kproc_desc racctd_kp = { + "racctd", + racctd, + NULL +}; +SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp); + +static void +racct_init(void) +{ + + racct_zone = uma_zcreate("racct", sizeof(struct racct), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + /* + * XXX: Move this somewhere. + */ + racct_create(&prison0.pr_racct); +} +SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL); + +#else /* !RACCT */ + +int +racct_add(struct proc *p, int resource, uint64_t amount) +{ + + return (0); +} + +void +racct_add_cred(struct ucred *cred, int resource, uint64_t amount) +{ +} + +void +racct_add_force(struct proc *p, int resource, uint64_t amount) +{ + + return (0); +} + +int +racct_set(struct proc *p, int resource, uint64_t amount) +{ + + return (0); +} + +void +racct_sub(struct proc *p, int resource, uint64_t amount) +{ +} + +void +racct_sub_cred(struct ucred *cred, int resource, uint64_t amount) +{ +} + +uint64_t +racct_get_limit(struct proc *p, int resource) +{ + + return (UINT64_MAX); +} + +void +racct_create(struct racct **racctp) +{ +} + +void +racct_destroy(struct racct **racctp) +{ +} + +int +racct_proc_fork(struct proc *parent, struct proc *child) +{ + + return (0); +} + +void +racct_proc_exit(struct proc *p) +{ +} + +#endif /* !RACCT */ diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index eca47a8..a6f8a6f 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -65,6 +65,8 @@ options MAC # TrustedBSD MAC Framework #options KDTRACE_HOOKS # Kernel DTrace hooks options INCLUDE_CONFIG_FILE # Include this file in kernel +options RACCT + # Debugging for use in -current options KDB # Enable kernel debugger support. options DDB # Support DDB. diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 851b9b8..d5fb648 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -2930,6 +2930,9 @@ options AAC_DEBUG # Debugging levels: # 2 - extremely noisy, emit trace # items in loops, etc. +# Resource Accounting +options RACCT + # Yet more undocumented options for linting. # BKTR_ALLOC_PAGES has no effect except to cause warnings, and # BROOKTREE_ALLOC_PAGES hasn't actually been superseded by it, since the diff --git a/sys/conf/files b/sys/conf/files index bced838..1cf8ff1 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -2225,6 +2225,7 @@ kern/kern_poll.c optional device_polling kern/kern_priv.c standard kern/kern_proc.c standard kern/kern_prot.c standard +kern/kern_racct.c standard kern/kern_resource.c standard kern/kern_rmlock.c standard kern/kern_rwlock.c standard diff --git a/sys/conf/options b/sys/conf/options index 81fb881..56dbd34 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -873,6 +873,9 @@ SDP_DEBUG opt_ofed.h IPOIB_DEBUG opt_ofed.h IPOIB_CM opt_ofed.h +# Resource Accounting +RACCT opt_global.h + # At least one of the AR71XX ubiquiti boards has a Redboot configuration # that "lies" about the amount of RAM it has. Until a cleaner method is # defined, this option will suffice in overriding what Redboot says. diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index eef0808..1977b96 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$"); #include <sys/syscallsubr.h> #include <sys/sysctl.h> #include <sys/proc.h> +#include <sys/racct.h> #include <sys/resourcevar.h> #include <sys/systm.h> #include <sys/signalvar.h> @@ -526,6 +527,9 @@ proc0_init(void *dummy __unused) p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem; p->p_cpulimit = RLIM_INFINITY; + /* Initialize resource accounting structures. */ + racct_create(&p->p_racct); + p->p_stats = pstats_alloc(); /* Allocate a prototype map so we have something to fork. */ diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index e95ac8f..01d6b75 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$"); #include <sys/wait.h> #include <sys/vmmeter.h> #include <sys/vnode.h> +#include <sys/racct.h> #include <sys/resourcevar.h> #include <sys/sbuf.h> #include <sys/signalvar.h> @@ -741,6 +742,11 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options, (void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0); /* + * Destroy resource accounting information associated with the process. + */ + racct_proc_exit(p); + + /* * Free credentials, arguments, and sigacts. */ crfree(p->p_ucred); diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index ebd4e6d..1dcc5bb 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$"); #include <sys/priv.h> #include <sys/proc.h> #include <sys/pioctl.h> +#include <sys/racct.h> #include <sys/resourcevar.h> #include <sys/sched.h> #include <sys/syscall.h> @@ -783,6 +784,21 @@ fork1(struct thread *td, int flags, int pages, struct proc **procp) knlist_init_mtx(&newproc->p_klist, &newproc->p_mtx); STAILQ_INIT(&newproc->p_ktr); + /* + * XXX: This is ugly; when we copy resource usage, we need to bump + * per-cred resource counters. + */ + newproc->p_ucred = p1->p_ucred; + + /* + * Initialize resource accounting for the child process. + */ + error = racct_proc_fork(p1, newproc); + if (error != 0) { + error = EAGAIN; + goto fail1; + } + /* We have to lock the process tree while we look for a pid. */ sx_slock(&proctree_lock); @@ -827,6 +843,7 @@ fork1(struct thread *td, int flags, int pages, struct proc **procp) error = EAGAIN; fail: + racct_proc_exit(newproc); sx_sunlock(&proctree_lock); if (ppsratecheck(&lastfail, &curfail, 1)) printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n", diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index 08343dd..6f72feb 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include <sys/jail.h> #include <sys/lock.h> #include <sys/mutex.h> +#include <sys/racct.h> #include <sys/sx.h> #include <sys/sysent.h> #include <sys/namei.h> @@ -1195,6 +1196,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) root = mypr->pr_root; vref(root); } + racct_create(&pr->pr_racct); strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN); pr->pr_flags |= PR_HOST; #if defined(INET) || defined(INET6) @@ -2295,6 +2297,9 @@ do_jail_attach(struct thread *td, struct prison *pr) newcred->cr_prison = pr; p->p_ucred = newcred; PROC_UNLOCK(p); +#ifdef RACCT + racct_proc_ucred_changed(p, oldcred, newcred); +#endif crfree(oldcred); prison_deref(ppr, PD_DEREF | PD_DEUREF); return (0); @@ -2527,6 +2532,7 @@ prison_deref(struct prison *pr, int flags) if (pr->pr_cpuset != NULL) cpuset_rel(pr->pr_cpuset); osd_jail_exit(pr); + racct_destroy(&pr->pr_racct); free(pr, M_PRISON); /* Removing a prison frees a reference on its parent. */ @@ -4263,6 +4269,17 @@ SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW, SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route"); +void +prison_racct_foreach(void (*callback)(struct racct *racct, + void *arg2, void *arg3), void *arg2, void *arg3) +{ + struct prison *pr; + + sx_slock(&allprison_lock); + TAILQ_FOREACH(pr, &allprison, pr_list) + (callback)(pr->pr_racct, arg2, arg3); + sx_sunlock(&allprison_lock); +} #ifdef DDB diff --git a/sys/kern/kern_loginclass.c b/sys/kern/kern_loginclass.c index cf644d5..d980246 100644 --- a/sys/kern/kern_loginclass.c +++ b/sys/kern/kern_loginclass.c @@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$"); #include <sys/priv.h> #include <sys/proc.h> #include <sys/queue.h> +#include <sys/racct.h> #include <sys/refcount.h> #include <sys/sysproto.h> #include <sys/systm.h> @@ -90,6 +91,7 @@ loginclass_free(struct loginclass *lc) mtx_lock(&loginclasses_lock); if (refcount_release(&lc->lc_refcount)) { + racct_destroy(&lc->lc_racct); LIST_REMOVE(lc, lc_next); mtx_unlock(&loginclasses_lock); free(lc, M_LOGINCLASS); @@ -115,6 +117,7 @@ loginclass_find(const char *name) return (NULL); newlc = malloc(sizeof(*newlc), M_LOGINCLASS, M_ZERO | M_WAITOK); + racct_create(&newlc->lc_racct); mtx_lock(&loginclasses_lock); LIST_FOREACH(lc, &loginclasses, lc_next) { @@ -124,6 +127,7 @@ loginclass_find(const char *name) /* Found loginclass with a matching name? */ loginclass_hold(lc); mtx_unlock(&loginclasses_lock); + racct_destroy(&newlc->lc_racct); free(newlc, M_LOGINCLASS); return (lc); } @@ -205,13 +209,27 @@ setloginclass(struct thread *td, struct setloginclass_args *uap) newcred->cr_loginclass = newlc; p->p_ucred = newcred; PROC_UNLOCK(p); - +#ifdef RACCT + racct_proc_ucred_changed(p, oldcred, newcred); +#endif loginclass_free(oldcred->cr_loginclass); crfree(oldcred); return (0); } +void +loginclass_racct_foreach(void (*callback)(struct racct *racct, + void *arg2, void *arg3), void *arg2, void *arg3) +{ + struct loginclass *lc; + + mtx_lock(&loginclasses_lock); + LIST_FOREACH(lc, &loginclasses, lc_next) + (callback)(lc->lc_racct, arg2, arg3); + mtx_unlock(&loginclasses_lock); +} + static void lc_init(void) { diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c new file mode 100644 index 0000000..28bc7b2 --- /dev/null +++ b/sys/kern/kern_racct.c @@ -0,0 +1,837 @@ +/*- + * Copyright (c) 2010 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Edward Tomasz Napierala under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_kdtrace.h" + +#include <sys/param.h> +#include <sys/eventhandler.h> +#include <sys/param.h> +#include <sys/jail.h> +#include <sys/kernel.h> +#include <sys/kthread.h> +#include <sys/lock.h> +#include <sys/loginclass.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/racct.h> +#include <sys/resourcevar.h> +#include <sys/sbuf.h> +#include <sys/sched.h> +#include <sys/sdt.h> +#include <sys/sx.h> +#include <sys/sysent.h> +#include <sys/sysproto.h> +#include <sys/systm.h> +#include <sys/umtx.h> + +#ifdef RCTL +#include <sys/rctl.h> +#endif + +#ifdef RACCT + +FEATURE(racct, "Resource Accounting"); + +static struct mtx racct_lock; +MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF); + +static uma_zone_t racct_zone; + +static void racct_sub_racct(struct racct *dest, const struct racct *src); +static void racct_sub_cred_locked(struct ucred *cred, int resource, + uint64_t amount); +static void racct_add_cred_locked(struct ucred *cred, int resource, + uint64_t amount); + +SDT_PROVIDER_DEFINE(racct); +SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int", + "uint64_t"); +SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure, + "struct proc *", "int", "uint64_t"); +SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *", + "int", "uint64_t"); +SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *", + "int", "uint64_t"); +SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int", + "uint64_t"); +SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure, + "struct proc *", "int", "uint64_t"); +SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int", + "uint64_t"); +SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *", + "int", "uint64_t"); +SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *"); +SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *"); +SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *", + "struct racct *"); +SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure, + "struct racct *", "struct racct *"); +SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *", + "struct racct *"); + +int racct_types[] = { + [RACCT_CPU] = + RACCT_IN_THOUSANDS, + [RACCT_FSIZE] = + RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, + [RACCT_DATA] = + RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, + [RACCT_STACK] = + RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, + [RACCT_CORE] = + RACCT_DENIABLE, + [RACCT_RSS] = + RACCT_RECLAIMABLE, + [RACCT_MEMLOCK] = + RACCT_RECLAIMABLE | RACCT_DENIABLE, + [RACCT_NPROC] = + RACCT_RECLAIMABLE | RACCT_DENIABLE, + [RACCT_NOFILE] = + RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, + [RACCT_SBSIZE] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_VMEM] = + RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, + [RACCT_NPTS] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_SWAP] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_NTHR] = + RACCT_RECLAIMABLE | RACCT_DENIABLE, + [RACCT_MSGQQUEUED] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_MSGQSIZE] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_NMSGQ] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_NSEM] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_NSEMOP] = + RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, + [RACCT_NSHM] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_SHMSIZE] = + RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, + [RACCT_WALLCLOCK] = + RACCT_IN_THOUSANDS }; + +static void +racct_add_racct(struct racct *dest, const struct racct *src) +{ + int i; + + mtx_assert(&racct_lock, MA_OWNED); + + /* + * Update resource usage in dest. + */ + for (i = 0; i <= RACCT_MAX; i++) { + KASSERT(dest->r_resources[i] >= 0, + ("racct propagation meltdown: dest < 0")); + KASSERT(src->r_resources[i] >= 0, + ("racct propagation meltdown: src < 0")); + dest->r_resources[i] += src->r_resources[i]; + } +} + +static void +racct_sub_racct(struct racct *dest, const struct racct *src) +{ + int i; + + mtx_assert(&racct_lock, MA_OWNED); + + /* + * Update resource usage in dest. + */ + for (i = 0; i <= RACCT_MAX; i++) { + if (!racct_is_sloppy(i)) { + KASSERT(dest->r_resources[i] >= 0, + ("racct propagation meltdown: dest < 0")); + KASSERT(src->r_resources[i] >= 0, + ("racct propagation meltdown: src < 0")); + KASSERT(src->r_resources[i] <= dest->r_resources[i], + ("racct propagation meltdown: src > dest")); + } + if (racct_is_reclaimable(i)) { + dest->r_resources[i] -= src->r_resources[i]; + if (dest->r_resources[i] < 0) { + KASSERT(racct_is_sloppy(i), + ("racct_sub_racct: usage < 0")); + dest->r_resources[i] = 0; + } + } + } +} + +void +racct_create(struct racct **racctp) +{ + + SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0); + + KASSERT(*racctp == NULL, ("racct already allocated")); + + *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO); +} + +static void +racct_destroy_locked(struct racct **racctp) +{ + int i; + struct racct *racct; + + SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0); + + mtx_assert(&racct_lock, MA_OWNED); + KASSERT(racctp != NULL, ("NULL racctp")); + KASSERT(*racctp != NULL, ("NULL racct")); + + racct = *racctp; + + for (i = 0; i <= RACCT_MAX; i++) { + if (racct_is_sloppy(i)) + continue; + if (!racct_is_reclaimable(i)) + continue; + KASSERT(racct->r_resources[i] == 0, + ("destroying non-empty racct: " + "%ju allocated for resource %d\n", + racct->r_resources[i], i)); + } + uma_zfree(racct_zone, racct); + *racctp = NULL; +} + +void +racct_destroy(struct racct **racct) +{ + + mtx_lock(&racct_lock); + racct_destroy_locked(racct); + mtx_unlock(&racct_lock); +} + +/* + * Increase consumption of 'resource' by 'amount' for 'racct' + * and all its parents. Differently from other cases, 'amount' here + * may be less than zero. + */ +static void +racct_alloc_resource(struct racct *racct, int resource, + uint64_t amount) +{ + + mtx_assert(&racct_lock, MA_OWNED); + KASSERT(racct != NULL, ("NULL racct")); + + racct->r_resources[resource] += amount; + if (racct->r_resources[resource] < 0) { + KASSERT(racct_is_sloppy(resource), + ("racct_alloc_resource: usage < 0")); + racct->r_resources[resource] = 0; + } +} + +/* + * Increase allocation of 'resource' by 'amount' for process 'p'. + * Return 0 if it's below limits, or errno, if it's not. + */ +int +racct_add(struct proc *p, int resource, uint64_t amount) +{ +#ifdef RCTL + int error; +#endif + + if (p->p_flag & P_SYSTEM) + return (0); + + SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0); + + /* + * We need proc lock to dereference p->p_ucred. + */ + PROC_LOCK_ASSERT(p, MA_OWNED); + KASSERT(amount >= 0, ("racct_add: invalid amount for resource %d: %ju", + resource, amount)); + + mtx_lock(&racct_lock); +#ifdef RCTL + error = rctl_enforce(p, resource, amount); + if (error && racct_is_deniable(resource)) { + SDT_PROBE(racct, kernel, rusage, add_failure, p, resource, + amount, 0, 0); + mtx_unlock(&racct_lock); + return (error); + } +#endif + racct_alloc_resource(p->p_racct, resource, amount); + racct_add_cred_locked(p->p_ucred, resource, amount); + mtx_unlock(&racct_lock); + + return (0); +} + +static void +racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount) +{ + struct prison *pr; + + SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount, + 0, 0); + + KASSERT(amount >= 0, + ("racct_add_cred: invalid amount for resource %d: %ju", + resource, amount)); + + racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount); + for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) + racct_alloc_resource(pr->pr_racct, resource, amount); + racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount); +} + +/* + * Increase allocation of 'resource' by 'amount' for credential 'cred'. + * Doesn't check for limits and never fails. + * + * XXX: Shouldn't this ever return an error? + */ +void +racct_add_cred(struct ucred *cred, int resource, uint64_t amount) +{ + + mtx_lock(&racct_lock); + racct_add_cred_locked(cred, resource, amount); + mtx_unlock(&racct_lock); +} + +/* + * Increase allocation of 'resource' by 'amount' for process 'p'. + * Doesn't check for limits and never fails. + */ +void +racct_add_force(struct proc *p, int resource, uint64_t amount) +{ + + if (p->p_flag & P_SYSTEM) + return; + + SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0); + + /* + * We need proc lock to dereference p->p_ucred. + */ + PROC_LOCK_ASSERT(p, MA_OWNED); + KASSERT(amount >= 0, + ("racct_add_force: invalid amount for resource %d: %ju", + resource, amount)); + + mtx_lock(&racct_lock); + racct_alloc_resource(p->p_racct, resource, amount); + mtx_unlock(&racct_lock); + racct_add_cred(p->p_ucred, resource, amount); +} + +static int +racct_set_locked(struct proc *p, int resource, uint64_t amount) +{ + int64_t diff; +#ifdef RCTL + int error; +#endif + + if (p->p_flag & P_SYSTEM) + return (0); + + SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); + + /* + * We need proc lock to dereference p->p_ucred. + */ + PROC_LOCK_ASSERT(p, MA_OWNED); + KASSERT(amount >= 0, ("racct_set: invalid amount for resource %d: %ju", + resource, amount)); + + diff = amount - p->p_racct->r_resources[resource]; +#ifdef notyet + KASSERT(diff >= 0 || racct_is_reclaimable(resource), + ("racct_set: usage of non-reclaimable resource %d dropping", + resource)); +#endif +#ifdef RCTL + if (diff > 0) { + error = rctl_enforce(p, resource, diff); + if (error && racct_is_deniable(resource)) { + SDT_PROBE(racct, kernel, rusage, set_failure, p, + resource, amount, 0, 0); + return (error); + } + } +#endif + racct_alloc_resource(p->p_racct, resource, diff); + if (diff > 0) + racct_add_cred_locked(p->p_ucred, resource, diff); + else if (diff < 0) + racct_sub_cred_locked(p->p_ucred, resource, -diff); + + return (0); +} + +/* + * Set allocation of 'resource' to 'amount' for process 'p'. + * Return 0 if it's below limits, or errno, if it's not. + * + * Note that decreasing the allocation always returns 0, + * even if it's above the limit. + */ +int +racct_set(struct proc *p, int resource, uint64_t amount) +{ + int error; + + mtx_lock(&racct_lock); + error = racct_set_locked(p, resource, amount); + mtx_unlock(&racct_lock); + return (error); +} + +void +racct_set_force(struct proc *p, int resource, uint64_t amount) +{ + int64_t diff; + + if (p->p_flag & P_SYSTEM) + return; + + SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); + + /* + * We need proc lock to dereference p->p_ucred. + */ + PROC_LOCK_ASSERT(p, MA_OWNED); + KASSERT(amount >= 0, + ("racct_set_force: invalid amount for resource %d: %ju", + resource, amount)); + + mtx_lock(&racct_lock); + diff = amount - p->p_racct->r_resources[resource]; + racct_alloc_resource(p->p_racct, resource, diff); + if (diff > 0) + racct_add_cred_locked(p->p_ucred, resource, diff); + else if (diff < 0) + racct_sub_cred_locked(p->p_ucred, resource, -diff); + mtx_unlock(&racct_lock); +} + +/* + * Returns amount of 'resource' the process 'p' can keep allocated. + * Allocating more than that would be denied, unless the resource + * is marked undeniable. Amount of already allocated resource does + * not matter. + */ +uint64_t +racct_get_limit(struct proc *p, int resource) +{ + +#ifdef RCTL + return (rctl_get_limit(p, resource)); +#else + return (UINT64_MAX); +#endif +} + +/* + * Returns amount of 'resource' the process 'p' can keep allocated. + * Allocating more than that would be denied, unless the resource + * is marked undeniable. Amount of already allocated resource does + * matter. + */ +uint64_t +racct_get_available(struct proc *p, int resource) +{ + +#ifdef RCTL + return (rctl_get_available(p, resource)); +#else + return (UINT64_MAX); +#endif +} + +/* + * Decrease allocation of 'resource' by 'amount' for process 'p'. + */ +void +racct_sub(struct proc *p, int resource, uint64_t amount) +{ + + if (p->p_flag & P_SYSTEM) + return; + + SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0); + + /* + * We need proc lock to dereference p->p_ucred. + */ + PROC_LOCK_ASSERT(p, MA_OWNED); + KASSERT(amount >= 0, ("racct_sub: invalid amount for resource %d: %ju", + resource, amount)); + KASSERT(racct_is_reclaimable(resource), + ("racct_sub: called for non-reclaimable resource %d", resource)); + + mtx_lock(&racct_lock); + KASSERT(amount <= p->p_racct->r_resources[resource], + ("racct_sub: freeing %ju of resource %d, which is more " + "than allocated %jd for %s (pid %d)", amount, resource, + (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid)); + + racct_alloc_resource(p->p_racct, resource, -amount); + racct_sub_cred_locked(p->p_ucred, resource, amount); + mtx_unlock(&racct_lock); +} + +static void +racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount) +{ + struct prison *pr; + + SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount, + 0, 0); + + KASSERT(amount >= 0, + ("racct_sub_cred: invalid amount for resource %d: %ju", + resource, amount)); +#ifdef notyet + KASSERT(racct_is_reclaimable(resource), + ("racct_sub_cred: called for non-reclaimable resource %d", + resource)); +#endif + + racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount); + for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) + racct_alloc_resource(pr->pr_racct, resource, -amount); + racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount); +} + +/* + * Decrease allocation of 'resource' by 'amount' for credential 'cred'. + */ +void +racct_sub_cred(struct ucred *cred, int resource, uint64_t amount) +{ + + mtx_lock(&racct_lock); + racct_sub_cred_locked(cred, resource, amount); + mtx_unlock(&racct_lock); +} + +/* + * Inherit resource usage information from the parent process. + */ +int +racct_proc_fork(struct proc *parent, struct proc *child) +{ + int i, error = 0; + + /* + * Create racct for the child process. + */ + racct_create(&child->p_racct); + + /* + * No resource accounting for kernel processes. + */ + if (child->p_flag & P_SYSTEM) + return (0); + + PROC_LOCK(parent); + PROC_LOCK(child); + mtx_lock(&racct_lock); + + /* + * Inherit resource usage. + */ + for (i = 0; i <= RACCT_MAX; i++) { + if (parent->p_racct->r_resources[i] == 0 || + !racct_is_inheritable(i)) + continue; + + error = racct_set_locked(child, i, + parent->p_racct->r_resources[i]); + if (error != 0) { + /* + * XXX: The only purpose of these two lines is + * to prevent from tripping checks in racct_destroy(). + */ + for (i = 0; i <= RACCT_MAX; i++) + racct_set_locked(child, i, 0); + goto out; + } + } + +#ifdef RCTL + error = rctl_proc_fork(parent, child); + if (error != 0) { + /* + * XXX: The only purpose of these two lines is to prevent from + * tripping checks in racct_destroy(). + */ + for (i = 0; i <= RACCT_MAX; i++) + racct_set_locked(child, i, 0); + } +#endif + +out: + if (error != 0) + racct_destroy_locked(&child->p_racct); + mtx_unlock(&racct_lock); + PROC_UNLOCK(child); + PROC_UNLOCK(parent); + + return (error); +} + +void +racct_proc_exit(struct proc *p) +{ + uint64_t runtime; + + PROC_LOCK(p); + /* + * We don't need to calculate rux, proc_reap() has already done this. + */ + runtime = cputick2usec(p->p_rux.rux_runtime); +#ifdef notyet + KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); +#else + if (runtime < p->p_prev_runtime) + runtime = p->p_prev_runtime; +#endif + racct_set(p, RACCT_CPU, runtime); + + /* + * XXX: Free this some other way. + */ + racct_set(p, RACCT_FSIZE, 0); + racct_set(p, RACCT_NPTS, 0); + racct_set(p, RACCT_NTHR, 0); + racct_set(p, RACCT_RSS, 0); + PROC_UNLOCK(p); + +#ifdef RCTL + rctl_racct_release(p->p_racct); +#endif + racct_destroy(&p->p_racct); +} + +/* + * Called after credentials change, to move resource utilisation + * between raccts. + */ +void +racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred, + struct ucred *newcred) +{ + struct uidinfo *olduip, *newuip; + struct loginclass *oldlc, *newlc; + struct prison *oldpr, *newpr, *pr; + + PROC_LOCK_ASSERT(p, MA_NOTOWNED); + + newuip = newcred->cr_ruidinfo; + olduip = oldcred->cr_ruidinfo; + newlc = newcred->cr_loginclass; + oldlc = oldcred->cr_loginclass; + newpr = newcred->cr_prison; + oldpr = oldcred->cr_prison; + + mtx_lock(&racct_lock); + if (newuip != olduip) { + racct_sub_racct(olduip->ui_racct, p->p_racct); + racct_add_racct(newuip->ui_racct, p->p_racct); + } + if (newlc != oldlc) { + racct_sub_racct(oldlc->lc_racct, p->p_racct); + racct_add_racct(newlc->lc_racct, p->p_racct); + } + if (newpr != oldpr) { + for (pr = oldpr; pr != NULL; pr = pr->pr_parent) + racct_sub_racct(pr->pr_racct, p->p_racct); + for (pr = newpr; pr != NULL; pr = pr->pr_parent) + racct_add_racct(pr->pr_racct, p->p_racct); + } + mtx_unlock(&racct_lock); + +#ifdef RCTL + rctl_proc_ucred_changed(p, newcred); +#endif +} + +static void +racctd(void) +{ + struct thread *td; + struct proc *p; + struct timeval wallclock; + uint64_t runtime; + + for (;;) { + sx_slock(&allproc_lock); + + FOREACH_PROC_IN_SYSTEM(p) { + if (p->p_state != PRS_NORMAL) + continue; + if (p->p_flag & P_SYSTEM) + continue; + + microuptime(&wallclock); + timevalsub(&wallclock, &p->p_stats->p_start); + PROC_LOCK(p); + PROC_SLOCK(p); + FOREACH_THREAD_IN_PROC(p, td) { + ruxagg(p, td); + thread_lock(td); + thread_unlock(td); + } + runtime = cputick2usec(p->p_rux.rux_runtime); + PROC_SUNLOCK(p); +#ifdef notyet + KASSERT(runtime >= p->p_prev_runtime, + ("runtime < p_prev_runtime")); +#else + if (runtime < p->p_prev_runtime) + runtime = p->p_prev_runtime; +#endif + p->p_prev_runtime = runtime; + mtx_lock(&racct_lock); + racct_set_locked(p, RACCT_CPU, runtime); + racct_set_locked(p, RACCT_WALLCLOCK, + wallclock.tv_sec * 1000000 + wallclock.tv_usec); + mtx_unlock(&racct_lock); + PROC_UNLOCK(p); + } + sx_sunlock(&allproc_lock); + pause("-", hz); + } +} + +static struct kproc_desc racctd_kp = { + "racctd", + racctd, + NULL +}; +SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp); + +static void +racct_init(void) +{ + + racct_zone = uma_zcreate("racct", sizeof(struct racct), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + /* + * XXX: Move this somewhere. + */ + racct_create(&prison0.pr_racct); +} +SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL); + +#else /* !RACCT */ + +int +racct_add(struct proc *p, int resource, uint64_t amount) +{ + + return (0); +} + +void +racct_add_cred(struct ucred *cred, int resource, uint64_t amount) +{ +} + +void +racct_add_force(struct proc *p, int resource, uint64_t amount) +{ + + return; +} + +int +racct_set(struct proc *p, int resource, uint64_t amount) +{ + + return (0); +} + +void +racct_sub(struct proc *p, int resource, uint64_t amount) +{ +} + +void +racct_sub_cred(struct ucred *cred, int resource, uint64_t amount) +{ +} + +uint64_t +racct_get_limit(struct proc *p, int resource) +{ + + return (UINT64_MAX); +} + +void +racct_create(struct racct **racctp) +{ +} + +void +racct_destroy(struct racct **racctp) +{ +} + +int +racct_proc_fork(struct proc *parent, struct proc *child) +{ + + return (0); +} + +void +racct_proc_exit(struct proc *p) +{ +} + +#endif /* !RACCT */ diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index 66b6e2d..fa7437d 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include <sys/priv.h> #include <sys/proc.h> #include <sys/refcount.h> +#include <sys/racct.h> #include <sys/resourcevar.h> #include <sys/rwlock.h> #include <sys/sched.h> @@ -1201,6 +1202,7 @@ uifind(uid) if (uip == NULL) { rw_runlock(&uihashtbl_lock); uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO); + racct_create(&uip->ui_racct); rw_wlock(&uihashtbl_lock); /* * There's a chance someone created our uidinfo while we @@ -1209,6 +1211,7 @@ uifind(uid) */ if ((old_uip = uilookup(uid)) != NULL) { /* Someone else beat us to it. */ + racct_destroy(&uip->ui_racct); free(uip, M_UIDINFO); uip = old_uip; } else { @@ -1264,6 +1267,7 @@ uifree(uip) /* Prepare for suboptimal case. */ rw_wlock(&uihashtbl_lock); if (refcount_release(&uip->ui_ref)) { + racct_destroy(&uip->ui_racct); LIST_REMOVE(uip, ui_hash); rw_wunlock(&uihashtbl_lock); if (uip->ui_sbsize != 0) @@ -1286,6 +1290,22 @@ uifree(uip) rw_wunlock(&uihashtbl_lock); } +void +ui_racct_foreach(void (*callback)(struct racct *racct, + void *arg2, void *arg3), void *arg2, void *arg3) +{ + struct uidinfo *uip; + struct uihashhead *uih; + + rw_rlock(&uihashtbl_lock); + for (uih = &uihashtbl[uihash]; uih >= uihashtbl; uih--) { + LIST_FOREACH(uip, uih, ui_hash) { + (callback)(uip->ui_racct, arg2, arg3); + } + } + rw_runlock(&uihashtbl_lock); +} + /* * Change the count associated with number of processes * a given user is using. When 'max' is 0, don't enforce a limit diff --git a/sys/sys/jail.h b/sys/sys/jail.h index 85c629a..b83ac1b 100644 --- a/sys/sys/jail.h +++ b/sys/sys/jail.h @@ -135,6 +135,8 @@ MALLOC_DECLARE(M_PRISON); #define HOSTUUIDLEN 64 +struct racct; + /* * This structure describes a prison. It is pointed to by all struct * ucreds's of the inmates. pr_ref keeps track of them and is used to @@ -166,7 +168,8 @@ struct prison { int pr_ip6s; /* (p) number of v6 IPs */ struct in_addr *pr_ip4; /* (p) v4 IPs of jail */ struct in6_addr *pr_ip6; /* (p) v6 IPs of jail */ - void *pr_sparep[4]; + struct racct *pr_racct; /* (c) resource accounting */ + void *pr_sparep[3]; int pr_childcount; /* (a) number of child jails */ int pr_childmax; /* (p) maximum child jails */ unsigned pr_allow; /* (p) PR_ALLOW_* flags */ @@ -380,6 +383,8 @@ int prison_if(struct ucred *cred, struct sockaddr *sa); char *prison_name(struct prison *, struct prison *); int prison_priv_check(struct ucred *cred, int priv); int sysctl_jail_param(SYSCTL_HANDLER_ARGS); +void prison_racct_foreach(void (*callback)(struct racct *racct, + void *arg2, void *arg3), void *arg2, void *arg3); #endif /* _KERNEL */ #endif /* !_SYS_JAIL_H_ */ diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h index 1a9cb5c..2916646 100644 --- a/sys/sys/kernel.h +++ b/sys/sys/kernel.h @@ -109,6 +109,7 @@ enum sysinit_sub_id { SI_SUB_VNET_PRELINK = 0x1E00000, /* vnet init before modules */ SI_SUB_KLD = 0x2000000, /* KLD and module setup */ SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/ + SI_SUB_RACCT = 0x2110000, /* resource accounting */ SI_SUB_RANDOM = 0x2120000, /* random number generator */ SI_SUB_KDTRACE = 0x2140000, /* Kernel dtrace hooks */ SI_SUB_MAC = 0x2180000, /* TrustedBSD MAC subsystem */ @@ -169,6 +170,7 @@ enum sysinit_sub_id { SI_SUB_KTHREAD_UPDATE = 0xec00000, /* update daemon*/ SI_SUB_KTHREAD_IDLE = 0xee00000, /* idle procs*/ SI_SUB_SMP = 0xf000000, /* start the APs*/ + SI_SUB_RACCTD = 0xf100000, /* start raccd*/ SI_SUB_RUN_SCHEDULER = 0xfffffff /* scheduler*/ }; diff --git a/sys/sys/loginclass.h b/sys/sys/loginclass.h index 36ecf80..08f3409 100644 --- a/sys/sys/loginclass.h +++ b/sys/sys/loginclass.h @@ -32,6 +32,8 @@ #ifndef _SYS_LOGINCLASS_H_ #define _SYS_LOGINCLASS_H_ +struct racct; + /* * Exactly one of these structures exists per login class. */ @@ -39,11 +41,13 @@ struct loginclass { LIST_ENTRY(loginclass) lc_next; char lc_name[MAXLOGNAME]; u_int lc_refcount; + struct racct *lc_racct; }; void loginclass_hold(struct loginclass *lc); void loginclass_free(struct loginclass *lc); struct loginclass *loginclass_find(const char *name); +void loginclass_racct_foreach(void (*callback)(struct racct *racct, + void *arg2, void *arg3), void *arg2, void *arg3); #endif /* !_SYS_LOGINCLASS_H_ */ - diff --git a/sys/sys/proc.h b/sys/sys/proc.h index c9eedff..e04d699 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -157,6 +157,7 @@ struct pargs { * either lock is sufficient for read access, but both locks must be held * for write access. */ +struct racct; struct kaudit_record; struct td_sched; struct nlminfo; @@ -566,6 +567,8 @@ struct proc { struct cv p_pwait; /* (*) wait cv for exit/exec. */ struct cv p_dbgwait; /* (*) wait cv for debugger attach after fork. */ + uint64_t p_prev_runtime; /* (c) Resource usage accounting. */ + struct racct *p_racct; /* (b) Resource accounting. */ }; #define p_session p_pgrp->pg_session diff --git a/sys/sys/racct.h b/sys/sys/racct.h new file mode 100644 index 0000000..cbd96a9 --- /dev/null +++ b/sys/sys/racct.h @@ -0,0 +1,147 @@ +/*- + * Copyright (c) 2010 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Edward Tomasz Napierala under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Resource accounting. + */ + +#ifndef _RACCT_H_ +#define _RACCT_H_ + +#include <sys/cdefs.h> +#include <sys/queue.h> +#include <sys/types.h> + +struct proc; +struct rctl_rule_link; +struct ucred; + +/* + * Resources. + */ +#define RACCT_UNDEFINED -1 +#define RACCT_CPU 0 +#define RACCT_FSIZE 1 +#define RACCT_DATA 2 +#define RACCT_STACK 3 +#define RACCT_CORE 4 +#define RACCT_RSS 5 +#define RACCT_MEMLOCK 6 +#define RACCT_NPROC 7 +#define RACCT_NOFILE 8 +#define RACCT_SBSIZE 9 +#define RACCT_VMEM 10 +#define RACCT_NPTS 11 +#define RACCT_SWAP 12 +#define RACCT_NTHR 13 +#define RACCT_MSGQQUEUED 14 +#define RACCT_MSGQSIZE 15 +#define RACCT_NMSGQ 16 +#define RACCT_NSEM 17 +#define RACCT_NSEMOP 18 +#define RACCT_NSHM 19 +#define RACCT_SHMSIZE 20 +#define RACCT_WALLCLOCK 21 +#define RACCT_MAX RACCT_WALLCLOCK + +/* + * Resource properties. + */ +#define RACCT_IN_THOUSANDS 0x01 +#define RACCT_RECLAIMABLE 0x02 +#define RACCT_INHERITABLE 0x04 +#define RACCT_DENIABLE 0x08 +#define RACCT_SLOPPY 0x10 + +extern int racct_types[]; + +/* + * Amount stored in c_resources[] is thousand times bigger than what's + * visible to the userland. It gets fixed up when retrieving resource + * usage or adding rules. + */ +#define racct_is_in_thousands(X) (racct_types[X] & RACCT_IN_THOUSANDS) + +/* + * Resource usage can drop, as opposed to only grow. + */ +#define racct_is_reclaimable(X) (racct_types[X] & RACCT_RECLAIMABLE) + +/* + * Children inherit resource usage. + */ +#define racct_is_inheritable(X) (racct_types[X] & RACCT_INHERITABLE) + +/* + * racct_{add,set}(9) can actually return an error and not update resource + * usage counters. Note that even when resource is not deniable, allocating + * resource might cause signals to be sent by RCTL code. + */ +#define racct_is_deniable(X) (racct_types[X] & RACCT_DENIABLE) + +/* + * Per-process resource usage information makes no sense, but per-credential + * one does. This kind of resources are usually allocated for process, but + * freed using credentials. + */ +#define racct_is_sloppy(X) (racct_types[X] & RACCT_SLOPPY) + +/* + * The 'racct' structure defines resource consumption for a particular + * subject, such as process or jail. + * + * This structure must be filled with zeroes initially. + */ +struct racct { + int64_t r_resources[RACCT_MAX + 1]; + LIST_HEAD(, rctl_rule_link) r_rule_links; +}; + +int racct_add(struct proc *p, int resource, uint64_t amount); +void racct_add_cred(struct ucred *cred, int resource, uint64_t amount); +void racct_add_force(struct proc *p, int resource, uint64_t amount); +int racct_set(struct proc *p, int resource, uint64_t amount); +void racct_set_force(struct proc *p, int resource, uint64_t amount); +void racct_sub(struct proc *p, int resource, uint64_t amount); +void racct_sub_cred(struct ucred *cred, int resource, uint64_t amount); +uint64_t racct_get_limit(struct proc *p, int resource); +uint64_t racct_get_available(struct proc *p, int resource); + +void racct_create(struct racct **racctp); +void racct_destroy(struct racct **racctp); + +int racct_proc_fork(struct proc *parent, struct proc *child); +void racct_proc_exit(struct proc *p); + +void racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred, + struct ucred *newcred); + +#endif /* !_RACCT_H_ */ diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h index 67af9b6..f17d95f 100644 --- a/sys/sys/resourcevar.h +++ b/sys/sys/resourcevar.h @@ -79,6 +79,8 @@ struct plimit { int pl_refcnt; /* number of references */ }; +struct racct; + /*- * Per uid resource consumption. This structure is used to track * the total resource consumption (process count, socket buffer size, @@ -99,6 +101,7 @@ struct uidinfo { long ui_ptscnt; /* (b) number of pseudo-terminals */ uid_t ui_uid; /* (a) uid */ u_int ui_ref; /* (b) reference count */ + struct racct *ui_racct; /* (a) resource accounting */ }; #define UIDINFO_VMSIZE_LOCK(ui) mtx_lock(&((ui)->ui_vmsize_mtx)) @@ -140,6 +143,8 @@ struct uidinfo void uifree(struct uidinfo *uip); void uihashinit(void); void uihold(struct uidinfo *uip); +void ui_racct_foreach(void (*callback)(struct racct *racct, + void *arg2, void *arg3), void *arg2, void *arg3); #endif /* _KERNEL */ #endif /* !_SYS_RESOURCEVAR_H_ */ |