summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kern_racct.c842
-rw-r--r--sys/amd64/conf/GENERIC2
-rw-r--r--sys/conf/NOTES3
-rw-r--r--sys/conf/files1
-rw-r--r--sys/conf/options3
-rw-r--r--sys/kern/init_main.c4
-rw-r--r--sys/kern/kern_exit.c6
-rw-r--r--sys/kern/kern_fork.c17
-rw-r--r--sys/kern/kern_jail.c17
-rw-r--r--sys/kern/kern_loginclass.c20
-rw-r--r--sys/kern/kern_racct.c837
-rw-r--r--sys/kern/kern_resource.c20
-rw-r--r--sys/sys/jail.h7
-rw-r--r--sys/sys/kernel.h2
-rw-r--r--sys/sys/loginclass.h6
-rw-r--r--sys/sys/proc.h3
-rw-r--r--sys/sys/racct.h147
-rw-r--r--sys/sys/resourcevar.h5
18 files changed, 1939 insertions, 3 deletions
diff --git a/kern_racct.c b/kern_racct.c
new file mode 100644
index 0000000..229977a
--- /dev/null
+++ b/kern_racct.c
@@ -0,0 +1,842 @@
+/*-
+ * Copyright (c) 2010 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Edward Tomasz Napierala under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_kdtrace.h"
+
+#include <sys/param.h>
+#include <sys/eventhandler.h>
+#include <sys/param.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/lock.h>
+#include <sys/loginclass.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/racct.h>
+#include <sys/resourcevar.h>
+#include <sys/sbuf.h>
+#include <sys/sched.h>
+#include <sys/sdt.h>
+#include <sys/sx.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/umtx.h>
+
+#ifdef RCTL
+#include <sys/rctl.h>
+#endif
+
+#ifdef RACCT
+
+FEATURE(racct, "Resource Accounting");
+
+static struct mtx racct_lock;
+MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
+
+static uma_zone_t racct_zone;
+
+static void racct_sub_racct(struct racct *dest, const struct racct *src);
+static void racct_sub_cred_locked(struct ucred *cred, int resource,
+ uint64_t amount);
+static void racct_add_cred_locked(struct ucred *cred, int resource,
+ uint64_t amount);
+
+SDT_PROVIDER_DEFINE(racct);
+SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
+ "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
+ "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
+ "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
+ "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
+ "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
+ "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
+ "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
+ "int", "uint64_t");
+SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
+SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
+SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
+ "struct racct *");
+SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
+ "struct racct *", "struct racct *");
+SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
+ "struct racct *");
+
+int racct_types[] = {
+ [RACCT_CPU] =
+ RACCT_IN_THOUSANDS,
+ [RACCT_FSIZE] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_DATA] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_STACK] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_CORE] =
+ RACCT_DENIABLE,
+ [RACCT_RSS] =
+ RACCT_RECLAIMABLE,
+ [RACCT_MEMLOCK] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE,
+ [RACCT_NPROC] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE,
+ [RACCT_NOFILE] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_SBSIZE] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_VMEM] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_NPTS] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_SWAP] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_NTHR] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE,
+ [RACCT_MSGQQUEUED] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_MSGQSIZE] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_NMSGQ] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_NSEM] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_NSEMOP] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_NSHM] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_SHMSIZE] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_WALLCLOCK] =
+ RACCT_IN_THOUSANDS };
+
+static void
+racct_add_racct(struct racct *dest, const struct racct *src)
+{
+ int i;
+
+ mtx_assert(&racct_lock, MA_OWNED);
+
+ /*
+ * Update resource usage in dest.
+ */
+ for (i = 0; i <= RACCT_MAX; i++) {
+ KASSERT(dest->r_resources[i] >= 0,
+ ("racct propagation meltdown: dest < 0"));
+ KASSERT(src->r_resources[i] >= 0,
+ ("racct propagation meltdown: src < 0"));
+ dest->r_resources[i] += src->r_resources[i];
+ }
+}
+
+static void
+racct_sub_racct(struct racct *dest, const struct racct *src)
+{
+ int i;
+
+ mtx_assert(&racct_lock, MA_OWNED);
+
+ /*
+ * Update resource usage in dest.
+ */
+ for (i = 0; i <= RACCT_MAX; i++) {
+ if (!racct_is_sloppy(i) &&
+ !racct_is_dampened(i)) {
+ KASSERT(dest->r_resources[i] >= 0,
+ ("racct propagation meltdown: dest < 0"));
+ KASSERT(src->r_resources[i] >= 0,
+ ("racct propagation meltdown: src < 0"));
+ KASSERT(src->r_resources[i] <= dest->r_resources[i],
+ ("racct propagation meltdown: src > dest"));
+ }
+ if (racct_is_reclaimable(i)) {
+ dest->r_resources[i] -= src->r_resources[i];
+ if (dest->r_resources[i] < 0) {
+ KASSERT(racct_is_sloppy(i) ||
+ racct_is_dampened(i),
+ ("racct_sub_racct: usage < 0"));
+ dest->r_resources[i] = 0;
+ }
+ }
+ }
+}
+
+void
+racct_create(struct racct **racctp)
+{
+
+ SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
+
+ KASSERT(*racctp == NULL, ("racct already allocated"));
+
+ *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
+}
+
+static void
+racct_destroy_locked(struct racct **racctp)
+{
+ int i;
+ struct racct *racct;
+
+ SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
+
+ mtx_assert(&racct_lock, MA_OWNED);
+ KASSERT(racctp != NULL, ("NULL racctp"));
+ KASSERT(*racctp != NULL, ("NULL racct"));
+
+ racct = *racctp;
+
+ for (i = 0; i <= RACCT_MAX; i++) {
+ if (racct_is_sloppy(i))
+ continue;
+ if (!racct_is_reclaimable(i))
+ continue;
+ if (racct_is_dampened(i))
+ continue;
+ KASSERT(racct->r_resources[i] == 0,
+ ("destroying non-empty racct: "
+ "%ju allocated for resource %d\n",
+ racct->r_resources[i], i));
+ }
+ uma_zfree(racct_zone, racct);
+ *racctp = NULL;
+}
+
+void
+racct_destroy(struct racct **racct)
+{
+
+ mtx_lock(&racct_lock);
+ racct_destroy_locked(racct);
+ mtx_unlock(&racct_lock);
+}
+
+/*
+ * Increase consumption of 'resource' by 'amount' for 'racct'
+ * and all its parents. Differently from other cases, 'amount' here
+ * may be less than zero.
+ */
+static void
+racct_alloc_resource(struct racct *racct, int resource,
+ uint64_t amount)
+{
+
+ mtx_assert(&racct_lock, MA_OWNED);
+ KASSERT(racct != NULL, ("NULL racct"));
+
+ racct->r_resources[resource] += amount;
+ if (racct->r_resources[resource] < 0) {
+ KASSERT(racct_is_sloppy(resource) ||
+ racct_is_dampened(resource),
+ ("racct_alloc_resource: usage < 0"));
+ racct->r_resources[resource] = 0;
+ }
+}
+
+/*
+ * Increase allocation of 'resource' by 'amount' for process 'p'.
+ * Return 0 if it's below limits, or errno, if it's not.
+ */
+int
+racct_add(struct proc *p, int resource, uint64_t amount)
+{
+#ifdef RCTL
+ int error;
+#endif
+
+ if (p->p_flag & P_SYSTEM)
+ return (0);
+
+ SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0, ("racct_add: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ mtx_lock(&racct_lock);
+#ifdef RCTL
+ error = rctl_enforce(p, resource, amount);
+ if (error && racct_is_deniable(resource)) {
+ SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
+ amount, 0, 0);
+ mtx_unlock(&racct_lock);
+ return (error);
+ }
+#endif
+ racct_alloc_resource(p->p_racct, resource, amount);
+ racct_add_cred_locked(p->p_ucred, resource, amount);
+ mtx_unlock(&racct_lock);
+
+ return (0);
+}
+
+static void
+racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
+{
+ struct prison *pr;
+
+ SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
+ 0, 0);
+
+ KASSERT(amount >= 0,
+ ("racct_add_cred: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
+ for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
+ racct_alloc_resource(pr->pr_racct, resource, amount);
+ racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
+}
+
+/*
+ * Increase allocation of 'resource' by 'amount' for credential 'cred'.
+ * Doesn't check for limits and never fails.
+ *
+ * XXX: Shouldn't this ever return an error?
+ */
+void
+racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+
+ mtx_lock(&racct_lock);
+ racct_add_cred_locked(cred, resource, amount);
+ mtx_unlock(&racct_lock);
+}
+
+/*
+ * Increase allocation of 'resource' by 'amount' for process 'p'.
+ * Doesn't check for limits and never fails.
+ */
+void
+racct_add_force(struct proc *p, int resource, uint64_t amount)
+{
+
+ if (p->p_flag & P_SYSTEM)
+ return;
+
+ SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0,
+ ("racct_add_force: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ mtx_lock(&racct_lock);
+ racct_alloc_resource(p->p_racct, resource, amount);
+ mtx_unlock(&racct_lock);
+ racct_add_cred(p->p_ucred, resource, amount);
+}
+
+static int
+racct_set_locked(struct proc *p, int resource, uint64_t amount)
+{
+ int64_t diff;
+#ifdef RCTL
+ int error;
+#endif
+
+ if (p->p_flag & P_SYSTEM)
+ return (0);
+
+ SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0, ("racct_set: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ diff = amount - p->p_racct->r_resources[resource];
+#ifdef notyet
+ KASSERT(diff >= 0 || racct_is_reclaimable(resource),
+ ("racct_set: usage of non-reclaimable resource %d dropping",
+ resource));
+#endif
+#ifdef RCTL
+ if (diff > 0) {
+ error = rctl_enforce(p, resource, diff);
+ if (error && racct_is_deniable(resource)) {
+ SDT_PROBE(racct, kernel, rusage, set_failure, p,
+ resource, amount, 0, 0);
+ return (error);
+ }
+ }
+#endif
+ racct_alloc_resource(p->p_racct, resource, diff);
+ if (diff > 0)
+ racct_add_cred_locked(p->p_ucred, resource, diff);
+ else if (diff < 0)
+ racct_sub_cred_locked(p->p_ucred, resource, -diff);
+
+ return (0);
+}
+
+/*
+ * Set allocation of 'resource' to 'amount' for process 'p'.
+ * Return 0 if it's below limits, or errno, if it's not.
+ *
+ * Note that decreasing the allocation always returns 0,
+ * even if it's above the limit.
+ */
+int
+racct_set(struct proc *p, int resource, uint64_t amount)
+{
+ int error;
+
+ mtx_lock(&racct_lock);
+ error = racct_set_locked(p, resource, amount);
+ mtx_unlock(&racct_lock);
+ return (error);
+}
+
+void
+racct_set_force(struct proc *p, int resource, uint64_t amount)
+{
+ int64_t diff;
+
+ if (p->p_flag & P_SYSTEM)
+ return;
+
+ SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0,
+ ("racct_set_force: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ mtx_lock(&racct_lock);
+ diff = amount - p->p_racct->r_resources[resource];
+ racct_alloc_resource(p->p_racct, resource, diff);
+ if (diff > 0)
+ racct_add_cred_locked(p->p_ucred, resource, diff);
+ else if (diff < 0)
+ racct_sub_cred_locked(p->p_ucred, resource, -diff);
+ mtx_unlock(&racct_lock);
+}
+
+/*
+ * Returns amount of 'resource' the process 'p' can keep allocated.
+ * Allocating more than that would be denied, unless the resource
+ * is marked undeniable. Amount of already allocated resource does
+ * not matter.
+ */
+uint64_t
+racct_get_limit(struct proc *p, int resource)
+{
+
+#ifdef RCTL
+ return (rctl_get_limit(p, resource));
+#else
+ return (UINT64_MAX);
+#endif
+}
+
+/*
+ * Returns amount of 'resource' the process 'p' can keep allocated.
+ * Allocating more than that would be denied, unless the resource
+ * is marked undeniable. Amount of already allocated resource does
+ * matter.
+ */
+uint64_t
+racct_get_available(struct proc *p, int resource)
+{
+
+#ifdef RCTL
+ return (rctl_get_available(p, resource));
+#else
+ return (UINT64_MAX);
+#endif
+}
+
+/*
+ * Decrease allocation of 'resource' by 'amount' for process 'p'.
+ */
+void
+racct_sub(struct proc *p, int resource, uint64_t amount)
+{
+
+ if (p->p_flag & P_SYSTEM)
+ return;
+
+ SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0, ("racct_sub: invalid amount for resource %d: %ju",
+ resource, amount));
+ KASSERT(racct_is_reclaimable(resource),
+ ("racct_sub: called for non-reclaimable resource %d", resource));
+
+ mtx_lock(&racct_lock);
+ KASSERT(amount <= p->p_racct->r_resources[resource],
+ ("racct_sub: freeing %ju of resource %d, which is more "
+ "than allocated %jd for %s (pid %d)", amount, resource,
+ (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
+
+ racct_alloc_resource(p->p_racct, resource, -amount);
+ racct_sub_cred_locked(p->p_ucred, resource, amount);
+ mtx_unlock(&racct_lock);
+}
+
+static void
+racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
+{
+ struct prison *pr;
+
+ SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
+ 0, 0);
+
+ KASSERT(amount >= 0,
+ ("racct_sub_cred: invalid amount for resource %d: %ju",
+ resource, amount));
+#ifdef notyet
+ KASSERT(racct_is_reclaimable(resource),
+ ("racct_sub_cred: called for non-reclaimable resource %d",
+ resource));
+#endif
+
+ racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
+ for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
+ racct_alloc_resource(pr->pr_racct, resource, -amount);
+ racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
+}
+
+/*
+ * Decrease allocation of 'resource' by 'amount' for credential 'cred'.
+ */
+void
+racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+
+ mtx_lock(&racct_lock);
+ racct_sub_cred_locked(cred, resource, amount);
+ mtx_unlock(&racct_lock);
+}
+
+/*
+ * Inherit resource usage information from the parent process.
+ */
+int
+racct_proc_fork(struct proc *parent, struct proc *child)
+{
+ int i, error = 0;
+
+ /*
+ * Create racct for the child process.
+ */
+ racct_create(&child->p_racct);
+
+ /*
+ * No resource accounting for kernel processes.
+ */
+ if (child->p_flag & P_SYSTEM)
+ return (0);
+
+ PROC_LOCK(parent);
+ PROC_LOCK(child);
+ mtx_lock(&racct_lock);
+
+ /*
+ * Inherit resource usage.
+ */
+ for (i = 0; i <= RACCT_MAX; i++) {
+ if (parent->p_racct->r_resources[i] == 0 ||
+ !racct_is_inheritable(i))
+ continue;
+
+ error = racct_set_locked(child, i,
+ parent->p_racct->r_resources[i]);
+ if (error != 0) {
+ /*
+ * XXX: The only purpose of these two lines is
+ * to prevent from tripping checks in racct_destroy().
+ */
+ for (i = 0; i <= RACCT_MAX; i++)
+ racct_set_locked(child, i, 0);
+ goto out;
+ }
+ }
+
+#ifdef RCTL
+ error = rctl_proc_fork(parent, child);
+ if (error != 0) {
+ /*
+ * XXX: The only purpose of these two lines is to prevent from
+ * tripping checks in racct_destroy().
+ */
+ for (i = 0; i <= RACCT_MAX; i++)
+ racct_set_locked(child, i, 0);
+ }
+#endif
+
+out:
+ if (error != 0)
+ racct_destroy_locked(&child->p_racct);
+ mtx_unlock(&racct_lock);
+ PROC_UNLOCK(child);
+ PROC_UNLOCK(parent);
+
+ return (error);
+}
+
+void
+racct_proc_exit(struct proc *p)
+{
+ uint64_t runtime;
+
+ PROC_LOCK(p);
+ /*
+ * We don't need to calculate rux, proc_reap() has already done this.
+ */
+ runtime = cputick2usec(p->p_rux.rux_runtime);
+#ifdef notyet
+ KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
+#else
+ if (runtime < p->p_prev_runtime)
+ runtime = p->p_prev_runtime;
+#endif
+ racct_set(p, RACCT_CPU, runtime);
+
+ /*
+ * XXX: Free this some other way.
+ */
+ racct_set(p, RACCT_FSIZE, 0);
+ racct_set(p, RACCT_NPTS, 0);
+ racct_set(p, RACCT_NTHR, 0);
+ racct_set(p, RACCT_RSS, 0);
+ PROC_UNLOCK(p);
+
+#ifdef RCTL
+ rctl_racct_release(p->p_racct);
+#endif
+ racct_destroy(&p->p_racct);
+}
+
+/*
+ * Called after credentials change, to move resource utilisation
+ * between raccts.
+ */
+void
+racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
+ struct ucred *newcred)
+{
+ struct uidinfo *olduip, *newuip;
+ struct loginclass *oldlc, *newlc;
+ struct prison *oldpr, *newpr, *pr;
+
+ PROC_LOCK_ASSERT(p, MA_NOTOWNED);
+
+ newuip = newcred->cr_ruidinfo;
+ olduip = oldcred->cr_ruidinfo;
+ newlc = newcred->cr_loginclass;
+ oldlc = oldcred->cr_loginclass;
+ newpr = newcred->cr_prison;
+ oldpr = oldcred->cr_prison;
+
+ mtx_lock(&racct_lock);
+ if (newuip != olduip) {
+ racct_sub_racct(olduip->ui_racct, p->p_racct);
+ racct_add_racct(newuip->ui_racct, p->p_racct);
+ }
+ if (newlc != oldlc) {
+ racct_sub_racct(oldlc->lc_racct, p->p_racct);
+ racct_add_racct(newlc->lc_racct, p->p_racct);
+ }
+ if (newpr != oldpr) {
+ for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
+ racct_sub_racct(pr->pr_racct, p->p_racct);
+ for (pr = newpr; pr != NULL; pr = pr->pr_parent)
+ racct_add_racct(pr->pr_racct, p->p_racct);
+ }
+ mtx_unlock(&racct_lock);
+
+#ifdef RCTL
+ rctl_proc_ucred_changed(p, newcred);
+#endif
+}
+
+static void
+racctd(void)
+{
+ struct thread *td;
+ struct proc *p;
+ struct timeval wallclock;
+ uint64_t runtime;
+
+ for (;;) {
+ sx_slock(&allproc_lock);
+
+ FOREACH_PROC_IN_SYSTEM(p) {
+ if (p->p_state != PRS_NORMAL)
+ continue;
+ if (p->p_flag & P_SYSTEM)
+ continue;
+
+ microuptime(&wallclock);
+ timevalsub(&wallclock, &p->p_stats->p_start);
+ PROC_LOCK(p);
+ PROC_SLOCK(p);
+ FOREACH_THREAD_IN_PROC(p, td) {
+ ruxagg(p, td);
+ thread_lock(td);
+ thread_unlock(td);
+ }
+ runtime = cputick2usec(p->p_rux.rux_runtime);
+ PROC_SUNLOCK(p);
+#ifdef notyet
+ KASSERT(runtime >= p->p_prev_runtime,
+ ("runtime < p_prev_runtime"));
+#else
+ if (runtime < p->p_prev_runtime)
+ runtime = p->p_prev_runtime;
+#endif
+ p->p_prev_runtime = runtime;
+ mtx_lock(&racct_lock);
+ racct_set_locked(p, RACCT_CPU, runtime);
+ racct_set_locked(p, RACCT_WALLCLOCK,
+ wallclock.tv_sec * 1000000 + wallclock.tv_usec);
+ mtx_unlock(&racct_lock);
+ PROC_UNLOCK(p);
+ }
+ sx_sunlock(&allproc_lock);
+ pause("-", hz);
+ }
+}
+
+static struct kproc_desc racctd_kp = {
+ "racctd",
+ racctd,
+ NULL
+};
+SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
+
+static void
+racct_init(void)
+{
+
+ racct_zone = uma_zcreate("racct", sizeof(struct racct),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ /*
+ * XXX: Move this somewhere.
+ */
+ racct_create(&prison0.pr_racct);
+}
+SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
+
+#else /* !RACCT */
+
+int
+racct_add(struct proc *p, int resource, uint64_t amount)
+{
+
+ return (0);
+}
+
+void
+racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+}
+
+void
+racct_add_force(struct proc *p, int resource, uint64_t amount)
+{
+
+ return (0);
+}
+
+int
+racct_set(struct proc *p, int resource, uint64_t amount)
+{
+
+ return (0);
+}
+
+void
+racct_sub(struct proc *p, int resource, uint64_t amount)
+{
+}
+
+void
+racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+}
+
+uint64_t
+racct_get_limit(struct proc *p, int resource)
+{
+
+ return (UINT64_MAX);
+}
+
+void
+racct_create(struct racct **racctp)
+{
+}
+
+void
+racct_destroy(struct racct **racctp)
+{
+}
+
+int
+racct_proc_fork(struct proc *parent, struct proc *child)
+{
+
+ return (0);
+}
+
+void
+racct_proc_exit(struct proc *p)
+{
+}
+
+#endif /* !RACCT */
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
index eca47a8..a6f8a6f 100644
--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@@ -65,6 +65,8 @@ options MAC # TrustedBSD MAC Framework
#options KDTRACE_HOOKS # Kernel DTrace hooks
options INCLUDE_CONFIG_FILE # Include this file in kernel
+options RACCT
+
# Debugging for use in -current
options KDB # Enable kernel debugger support.
options DDB # Support DDB.
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 851b9b8..d5fb648 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -2930,6 +2930,9 @@ options AAC_DEBUG # Debugging levels:
# 2 - extremely noisy, emit trace
# items in loops, etc.
+# Resource Accounting
+options RACCT
+
# Yet more undocumented options for linting.
# BKTR_ALLOC_PAGES has no effect except to cause warnings, and
# BROOKTREE_ALLOC_PAGES hasn't actually been superseded by it, since the
diff --git a/sys/conf/files b/sys/conf/files
index bced838..1cf8ff1 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -2225,6 +2225,7 @@ kern/kern_poll.c optional device_polling
kern/kern_priv.c standard
kern/kern_proc.c standard
kern/kern_prot.c standard
+kern/kern_racct.c standard
kern/kern_resource.c standard
kern/kern_rmlock.c standard
kern/kern_rwlock.c standard
diff --git a/sys/conf/options b/sys/conf/options
index 81fb881..56dbd34 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -873,6 +873,9 @@ SDP_DEBUG opt_ofed.h
IPOIB_DEBUG opt_ofed.h
IPOIB_CM opt_ofed.h
+# Resource Accounting
+RACCT opt_global.h
+
# At least one of the AR71XX ubiquiti boards has a Redboot configuration
# that "lies" about the amount of RAM it has. Until a cleaner method is
# defined, this option will suffice in overriding what Redboot says.
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index eef0808..1977b96 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$");
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/proc.h>
+#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/systm.h>
#include <sys/signalvar.h>
@@ -526,6 +527,9 @@ proc0_init(void *dummy __unused)
p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem;
p->p_cpulimit = RLIM_INFINITY;
+ /* Initialize resource accounting structures. */
+ racct_create(&p->p_racct);
+
p->p_stats = pstats_alloc();
/* Allocate a prototype map so we have something to fork. */
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index e95ac8f..01d6b75 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <sys/wait.h>
#include <sys/vmmeter.h>
#include <sys/vnode.h>
+#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/sbuf.h>
#include <sys/signalvar.h>
@@ -741,6 +742,11 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options,
(void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0);
/*
+ * Destroy resource accounting information associated with the process.
+ */
+ racct_proc_exit(p);
+
+ /*
* Free credentials, arguments, and sigacts.
*/
crfree(p->p_ucred);
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index ebd4e6d..1dcc5bb 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/pioctl.h>
+#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/syscall.h>
@@ -783,6 +784,21 @@ fork1(struct thread *td, int flags, int pages, struct proc **procp)
knlist_init_mtx(&newproc->p_klist, &newproc->p_mtx);
STAILQ_INIT(&newproc->p_ktr);
+ /*
+ * XXX: This is ugly; when we copy resource usage, we need to bump
+ * per-cred resource counters.
+ */
+ newproc->p_ucred = p1->p_ucred;
+
+ /*
+ * Initialize resource accounting for the child process.
+ */
+ error = racct_proc_fork(p1, newproc);
+ if (error != 0) {
+ error = EAGAIN;
+ goto fail1;
+ }
+
/* We have to lock the process tree while we look for a pid. */
sx_slock(&proctree_lock);
@@ -827,6 +843,7 @@ fork1(struct thread *td, int flags, int pages, struct proc **procp)
error = EAGAIN;
fail:
+ racct_proc_exit(newproc);
sx_sunlock(&proctree_lock);
if (ppsratecheck(&lastfail, &curfail, 1))
printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n",
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 08343dd..6f72feb 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/jail.h>
#include <sys/lock.h>
#include <sys/mutex.h>
+#include <sys/racct.h>
#include <sys/sx.h>
#include <sys/sysent.h>
#include <sys/namei.h>
@@ -1195,6 +1196,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
root = mypr->pr_root;
vref(root);
}
+ racct_create(&pr->pr_racct);
strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN);
pr->pr_flags |= PR_HOST;
#if defined(INET) || defined(INET6)
@@ -2295,6 +2297,9 @@ do_jail_attach(struct thread *td, struct prison *pr)
newcred->cr_prison = pr;
p->p_ucred = newcred;
PROC_UNLOCK(p);
+#ifdef RACCT
+ racct_proc_ucred_changed(p, oldcred, newcred);
+#endif
crfree(oldcred);
prison_deref(ppr, PD_DEREF | PD_DEUREF);
return (0);
@@ -2527,6 +2532,7 @@ prison_deref(struct prison *pr, int flags)
if (pr->pr_cpuset != NULL)
cpuset_rel(pr->pr_cpuset);
osd_jail_exit(pr);
+ racct_destroy(&pr->pr_racct);
free(pr, M_PRISON);
/* Removing a prison frees a reference on its parent. */
@@ -4263,6 +4269,17 @@ SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW,
SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW,
"B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route");
+void
+prison_racct_foreach(void (*callback)(struct racct *racct,
+ void *arg2, void *arg3), void *arg2, void *arg3)
+{
+ struct prison *pr;
+
+ sx_slock(&allprison_lock);
+ TAILQ_FOREACH(pr, &allprison, pr_list)
+ (callback)(pr->pr_racct, arg2, arg3);
+ sx_sunlock(&allprison_lock);
+}
#ifdef DDB
diff --git a/sys/kern/kern_loginclass.c b/sys/kern/kern_loginclass.c
index cf644d5..d980246 100644
--- a/sys/kern/kern_loginclass.c
+++ b/sys/kern/kern_loginclass.c
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/queue.h>
+#include <sys/racct.h>
#include <sys/refcount.h>
#include <sys/sysproto.h>
#include <sys/systm.h>
@@ -90,6 +91,7 @@ loginclass_free(struct loginclass *lc)
mtx_lock(&loginclasses_lock);
if (refcount_release(&lc->lc_refcount)) {
+ racct_destroy(&lc->lc_racct);
LIST_REMOVE(lc, lc_next);
mtx_unlock(&loginclasses_lock);
free(lc, M_LOGINCLASS);
@@ -115,6 +117,7 @@ loginclass_find(const char *name)
return (NULL);
newlc = malloc(sizeof(*newlc), M_LOGINCLASS, M_ZERO | M_WAITOK);
+ racct_create(&newlc->lc_racct);
mtx_lock(&loginclasses_lock);
LIST_FOREACH(lc, &loginclasses, lc_next) {
@@ -124,6 +127,7 @@ loginclass_find(const char *name)
/* Found loginclass with a matching name? */
loginclass_hold(lc);
mtx_unlock(&loginclasses_lock);
+ racct_destroy(&newlc->lc_racct);
free(newlc, M_LOGINCLASS);
return (lc);
}
@@ -205,13 +209,27 @@ setloginclass(struct thread *td, struct setloginclass_args *uap)
newcred->cr_loginclass = newlc;
p->p_ucred = newcred;
PROC_UNLOCK(p);
-
+#ifdef RACCT
+ racct_proc_ucred_changed(p, oldcred, newcred);
+#endif
loginclass_free(oldcred->cr_loginclass);
crfree(oldcred);
return (0);
}
+void
+loginclass_racct_foreach(void (*callback)(struct racct *racct,
+ void *arg2, void *arg3), void *arg2, void *arg3)
+{
+ struct loginclass *lc;
+
+ mtx_lock(&loginclasses_lock);
+ LIST_FOREACH(lc, &loginclasses, lc_next)
+ (callback)(lc->lc_racct, arg2, arg3);
+ mtx_unlock(&loginclasses_lock);
+}
+
static void
lc_init(void)
{
diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c
new file mode 100644
index 0000000..28bc7b2
--- /dev/null
+++ b/sys/kern/kern_racct.c
@@ -0,0 +1,837 @@
+/*-
+ * Copyright (c) 2010 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Edward Tomasz Napierala under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_kdtrace.h"
+
+#include <sys/param.h>
+#include <sys/eventhandler.h>
+#include <sys/param.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/lock.h>
+#include <sys/loginclass.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/racct.h>
+#include <sys/resourcevar.h>
+#include <sys/sbuf.h>
+#include <sys/sched.h>
+#include <sys/sdt.h>
+#include <sys/sx.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/umtx.h>
+
+#ifdef RCTL
+#include <sys/rctl.h>
+#endif
+
+#ifdef RACCT
+
+FEATURE(racct, "Resource Accounting");
+
+static struct mtx racct_lock;
+MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
+
+static uma_zone_t racct_zone;
+
+static void racct_sub_racct(struct racct *dest, const struct racct *src);
+static void racct_sub_cred_locked(struct ucred *cred, int resource,
+ uint64_t amount);
+static void racct_add_cred_locked(struct ucred *cred, int resource,
+ uint64_t amount);
+
+SDT_PROVIDER_DEFINE(racct);
+SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
+ "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
+ "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
+ "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
+ "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
+ "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
+ "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
+ "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
+ "int", "uint64_t");
+SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
+SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
+SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
+ "struct racct *");
+SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
+ "struct racct *", "struct racct *");
+SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
+ "struct racct *");
+
+int racct_types[] = {
+ [RACCT_CPU] =
+ RACCT_IN_THOUSANDS,
+ [RACCT_FSIZE] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_DATA] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_STACK] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_CORE] =
+ RACCT_DENIABLE,
+ [RACCT_RSS] =
+ RACCT_RECLAIMABLE,
+ [RACCT_MEMLOCK] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE,
+ [RACCT_NPROC] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE,
+ [RACCT_NOFILE] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_SBSIZE] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_VMEM] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_NPTS] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_SWAP] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_NTHR] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE,
+ [RACCT_MSGQQUEUED] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_MSGQSIZE] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_NMSGQ] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_NSEM] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_NSEMOP] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_NSHM] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_SHMSIZE] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_WALLCLOCK] =
+ RACCT_IN_THOUSANDS };
+
+static void
+racct_add_racct(struct racct *dest, const struct racct *src)
+{
+ int i;
+
+ mtx_assert(&racct_lock, MA_OWNED);
+
+ /*
+ * Update resource usage in dest.
+ */
+ for (i = 0; i <= RACCT_MAX; i++) {
+ KASSERT(dest->r_resources[i] >= 0,
+ ("racct propagation meltdown: dest < 0"));
+ KASSERT(src->r_resources[i] >= 0,
+ ("racct propagation meltdown: src < 0"));
+ dest->r_resources[i] += src->r_resources[i];
+ }
+}
+
+static void
+racct_sub_racct(struct racct *dest, const struct racct *src)
+{
+ int i;
+
+ mtx_assert(&racct_lock, MA_OWNED);
+
+ /*
+ * Update resource usage in dest.
+ */
+ for (i = 0; i <= RACCT_MAX; i++) {
+ if (!racct_is_sloppy(i)) {
+ KASSERT(dest->r_resources[i] >= 0,
+ ("racct propagation meltdown: dest < 0"));
+ KASSERT(src->r_resources[i] >= 0,
+ ("racct propagation meltdown: src < 0"));
+ KASSERT(src->r_resources[i] <= dest->r_resources[i],
+ ("racct propagation meltdown: src > dest"));
+ }
+ if (racct_is_reclaimable(i)) {
+ dest->r_resources[i] -= src->r_resources[i];
+ if (dest->r_resources[i] < 0) {
+ KASSERT(racct_is_sloppy(i),
+ ("racct_sub_racct: usage < 0"));
+ dest->r_resources[i] = 0;
+ }
+ }
+ }
+}
+
+void
+racct_create(struct racct **racctp)
+{
+
+ SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
+
+ KASSERT(*racctp == NULL, ("racct already allocated"));
+
+ *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
+}
+
+static void
+racct_destroy_locked(struct racct **racctp)
+{
+ int i;
+ struct racct *racct;
+
+ SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
+
+ mtx_assert(&racct_lock, MA_OWNED);
+ KASSERT(racctp != NULL, ("NULL racctp"));
+ KASSERT(*racctp != NULL, ("NULL racct"));
+
+ racct = *racctp;
+
+ for (i = 0; i <= RACCT_MAX; i++) {
+ if (racct_is_sloppy(i))
+ continue;
+ if (!racct_is_reclaimable(i))
+ continue;
+ KASSERT(racct->r_resources[i] == 0,
+ ("destroying non-empty racct: "
+ "%ju allocated for resource %d\n",
+ racct->r_resources[i], i));
+ }
+ uma_zfree(racct_zone, racct);
+ *racctp = NULL;
+}
+
+void
+racct_destroy(struct racct **racct)
+{
+
+ mtx_lock(&racct_lock);
+ racct_destroy_locked(racct);
+ mtx_unlock(&racct_lock);
+}
+
+/*
+ * Increase consumption of 'resource' by 'amount' for 'racct'
+ * and all its parents. Differently from other cases, 'amount' here
+ * may be less than zero.
+ */
+static void
+racct_alloc_resource(struct racct *racct, int resource,
+ uint64_t amount)
+{
+
+ mtx_assert(&racct_lock, MA_OWNED);
+ KASSERT(racct != NULL, ("NULL racct"));
+
+ racct->r_resources[resource] += amount;
+ if (racct->r_resources[resource] < 0) {
+ KASSERT(racct_is_sloppy(resource),
+ ("racct_alloc_resource: usage < 0"));
+ racct->r_resources[resource] = 0;
+ }
+}
+
+/*
+ * Increase allocation of 'resource' by 'amount' for process 'p'.
+ * Return 0 if it's below limits, or errno, if it's not.
+ */
+int
+racct_add(struct proc *p, int resource, uint64_t amount)
+{
+#ifdef RCTL
+ int error;
+#endif
+
+ if (p->p_flag & P_SYSTEM)
+ return (0);
+
+ SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0, ("racct_add: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ mtx_lock(&racct_lock);
+#ifdef RCTL
+ error = rctl_enforce(p, resource, amount);
+ if (error && racct_is_deniable(resource)) {
+ SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
+ amount, 0, 0);
+ mtx_unlock(&racct_lock);
+ return (error);
+ }
+#endif
+ racct_alloc_resource(p->p_racct, resource, amount);
+ racct_add_cred_locked(p->p_ucred, resource, amount);
+ mtx_unlock(&racct_lock);
+
+ return (0);
+}
+
+static void
+racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
+{
+ struct prison *pr;
+
+ SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
+ 0, 0);
+
+ KASSERT(amount >= 0,
+ ("racct_add_cred: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
+ for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
+ racct_alloc_resource(pr->pr_racct, resource, amount);
+ racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
+}
+
+/*
+ * Increase allocation of 'resource' by 'amount' for credential 'cred'.
+ * Doesn't check for limits and never fails.
+ *
+ * XXX: Shouldn't this ever return an error?
+ */
+void
+racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+
+ mtx_lock(&racct_lock);
+ racct_add_cred_locked(cred, resource, amount);
+ mtx_unlock(&racct_lock);
+}
+
+/*
+ * Increase allocation of 'resource' by 'amount' for process 'p'.
+ * Doesn't check for limits and never fails.
+ */
+void
+racct_add_force(struct proc *p, int resource, uint64_t amount)
+{
+
+ if (p->p_flag & P_SYSTEM)
+ return;
+
+ SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0,
+ ("racct_add_force: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ mtx_lock(&racct_lock);
+ racct_alloc_resource(p->p_racct, resource, amount);
+ mtx_unlock(&racct_lock);
+ racct_add_cred(p->p_ucred, resource, amount);
+}
+
+static int
+racct_set_locked(struct proc *p, int resource, uint64_t amount)
+{
+ int64_t diff;
+#ifdef RCTL
+ int error;
+#endif
+
+ if (p->p_flag & P_SYSTEM)
+ return (0);
+
+ SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0, ("racct_set: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ diff = amount - p->p_racct->r_resources[resource];
+#ifdef notyet
+ KASSERT(diff >= 0 || racct_is_reclaimable(resource),
+ ("racct_set: usage of non-reclaimable resource %d dropping",
+ resource));
+#endif
+#ifdef RCTL
+ if (diff > 0) {
+ error = rctl_enforce(p, resource, diff);
+ if (error && racct_is_deniable(resource)) {
+ SDT_PROBE(racct, kernel, rusage, set_failure, p,
+ resource, amount, 0, 0);
+ return (error);
+ }
+ }
+#endif
+ racct_alloc_resource(p->p_racct, resource, diff);
+ if (diff > 0)
+ racct_add_cred_locked(p->p_ucred, resource, diff);
+ else if (diff < 0)
+ racct_sub_cred_locked(p->p_ucred, resource, -diff);
+
+ return (0);
+}
+
+/*
+ * Set allocation of 'resource' to 'amount' for process 'p'.
+ * Return 0 if it's below limits, or errno, if it's not.
+ *
+ * Note that decreasing the allocation always returns 0,
+ * even if it's above the limit.
+ */
+int
+racct_set(struct proc *p, int resource, uint64_t amount)
+{
+ int error;
+
+ mtx_lock(&racct_lock);
+ error = racct_set_locked(p, resource, amount);
+ mtx_unlock(&racct_lock);
+ return (error);
+}
+
+void
+racct_set_force(struct proc *p, int resource, uint64_t amount)
+{
+ int64_t diff;
+
+ if (p->p_flag & P_SYSTEM)
+ return;
+
+ SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0,
+ ("racct_set_force: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ mtx_lock(&racct_lock);
+ diff = amount - p->p_racct->r_resources[resource];
+ racct_alloc_resource(p->p_racct, resource, diff);
+ if (diff > 0)
+ racct_add_cred_locked(p->p_ucred, resource, diff);
+ else if (diff < 0)
+ racct_sub_cred_locked(p->p_ucred, resource, -diff);
+ mtx_unlock(&racct_lock);
+}
+
+/*
+ * Returns amount of 'resource' the process 'p' can keep allocated.
+ * Allocating more than that would be denied, unless the resource
+ * is marked undeniable. Amount of already allocated resource does
+ * not matter.
+ */
+uint64_t
+racct_get_limit(struct proc *p, int resource)
+{
+
+#ifdef RCTL
+ return (rctl_get_limit(p, resource));
+#else
+ return (UINT64_MAX);
+#endif
+}
+
+/*
+ * Returns amount of 'resource' the process 'p' can keep allocated.
+ * Allocating more than that would be denied, unless the resource
+ * is marked undeniable. Amount of already allocated resource does
+ * matter.
+ */
+uint64_t
+racct_get_available(struct proc *p, int resource)
+{
+
+#ifdef RCTL
+ return (rctl_get_available(p, resource));
+#else
+ return (UINT64_MAX);
+#endif
+}
+
+/*
+ * Decrease allocation of 'resource' by 'amount' for process 'p'.
+ */
+void
+racct_sub(struct proc *p, int resource, uint64_t amount)
+{
+
+ if (p->p_flag & P_SYSTEM)
+ return;
+
+ SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0, ("racct_sub: invalid amount for resource %d: %ju",
+ resource, amount));
+ KASSERT(racct_is_reclaimable(resource),
+ ("racct_sub: called for non-reclaimable resource %d", resource));
+
+ mtx_lock(&racct_lock);
+ KASSERT(amount <= p->p_racct->r_resources[resource],
+ ("racct_sub: freeing %ju of resource %d, which is more "
+ "than allocated %jd for %s (pid %d)", amount, resource,
+ (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
+
+ racct_alloc_resource(p->p_racct, resource, -amount);
+ racct_sub_cred_locked(p->p_ucred, resource, amount);
+ mtx_unlock(&racct_lock);
+}
+
+static void
+racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
+{
+ struct prison *pr;
+
+ SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
+ 0, 0);
+
+ KASSERT(amount >= 0,
+ ("racct_sub_cred: invalid amount for resource %d: %ju",
+ resource, amount));
+#ifdef notyet
+ KASSERT(racct_is_reclaimable(resource),
+ ("racct_sub_cred: called for non-reclaimable resource %d",
+ resource));
+#endif
+
+ racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
+ for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
+ racct_alloc_resource(pr->pr_racct, resource, -amount);
+ racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
+}
+
+/*
+ * Decrease allocation of 'resource' by 'amount' for credential 'cred'.
+ */
+void
+racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+
+ mtx_lock(&racct_lock);
+ racct_sub_cred_locked(cred, resource, amount);
+ mtx_unlock(&racct_lock);
+}
+
+/*
+ * Inherit resource usage information from the parent process.
+ */
+int
+racct_proc_fork(struct proc *parent, struct proc *child)
+{
+ int i, error = 0;
+
+ /*
+ * Create racct for the child process.
+ */
+ racct_create(&child->p_racct);
+
+ /*
+ * No resource accounting for kernel processes.
+ */
+ if (child->p_flag & P_SYSTEM)
+ return (0);
+
+ PROC_LOCK(parent);
+ PROC_LOCK(child);
+ mtx_lock(&racct_lock);
+
+ /*
+ * Inherit resource usage.
+ */
+ for (i = 0; i <= RACCT_MAX; i++) {
+ if (parent->p_racct->r_resources[i] == 0 ||
+ !racct_is_inheritable(i))
+ continue;
+
+ error = racct_set_locked(child, i,
+ parent->p_racct->r_resources[i]);
+ if (error != 0) {
+ /*
+ * XXX: The only purpose of these two lines is
+ * to prevent from tripping checks in racct_destroy().
+ */
+ for (i = 0; i <= RACCT_MAX; i++)
+ racct_set_locked(child, i, 0);
+ goto out;
+ }
+ }
+
+#ifdef RCTL
+ error = rctl_proc_fork(parent, child);
+ if (error != 0) {
+ /*
+ * XXX: The only purpose of these two lines is to prevent from
+ * tripping checks in racct_destroy().
+ */
+ for (i = 0; i <= RACCT_MAX; i++)
+ racct_set_locked(child, i, 0);
+ }
+#endif
+
+out:
+ if (error != 0)
+ racct_destroy_locked(&child->p_racct);
+ mtx_unlock(&racct_lock);
+ PROC_UNLOCK(child);
+ PROC_UNLOCK(parent);
+
+ return (error);
+}
+
+void
+racct_proc_exit(struct proc *p)
+{
+ uint64_t runtime;
+
+ PROC_LOCK(p);
+ /*
+ * We don't need to calculate rux, proc_reap() has already done this.
+ */
+ runtime = cputick2usec(p->p_rux.rux_runtime);
+#ifdef notyet
+ KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
+#else
+ if (runtime < p->p_prev_runtime)
+ runtime = p->p_prev_runtime;
+#endif
+ racct_set(p, RACCT_CPU, runtime);
+
+ /*
+ * XXX: Free this some other way.
+ */
+ racct_set(p, RACCT_FSIZE, 0);
+ racct_set(p, RACCT_NPTS, 0);
+ racct_set(p, RACCT_NTHR, 0);
+ racct_set(p, RACCT_RSS, 0);
+ PROC_UNLOCK(p);
+
+#ifdef RCTL
+ rctl_racct_release(p->p_racct);
+#endif
+ racct_destroy(&p->p_racct);
+}
+
+/*
+ * Called after credentials change, to move resource utilisation
+ * between raccts.
+ */
+void
+racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
+ struct ucred *newcred)
+{
+ struct uidinfo *olduip, *newuip;
+ struct loginclass *oldlc, *newlc;
+ struct prison *oldpr, *newpr, *pr;
+
+ PROC_LOCK_ASSERT(p, MA_NOTOWNED);
+
+ newuip = newcred->cr_ruidinfo;
+ olduip = oldcred->cr_ruidinfo;
+ newlc = newcred->cr_loginclass;
+ oldlc = oldcred->cr_loginclass;
+ newpr = newcred->cr_prison;
+ oldpr = oldcred->cr_prison;
+
+ mtx_lock(&racct_lock);
+ if (newuip != olduip) {
+ racct_sub_racct(olduip->ui_racct, p->p_racct);
+ racct_add_racct(newuip->ui_racct, p->p_racct);
+ }
+ if (newlc != oldlc) {
+ racct_sub_racct(oldlc->lc_racct, p->p_racct);
+ racct_add_racct(newlc->lc_racct, p->p_racct);
+ }
+ if (newpr != oldpr) {
+ for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
+ racct_sub_racct(pr->pr_racct, p->p_racct);
+ for (pr = newpr; pr != NULL; pr = pr->pr_parent)
+ racct_add_racct(pr->pr_racct, p->p_racct);
+ }
+ mtx_unlock(&racct_lock);
+
+#ifdef RCTL
+ rctl_proc_ucred_changed(p, newcred);
+#endif
+}
+
+static void
+racctd(void)
+{
+ struct thread *td;
+ struct proc *p;
+ struct timeval wallclock;
+ uint64_t runtime;
+
+ for (;;) {
+ sx_slock(&allproc_lock);
+
+ FOREACH_PROC_IN_SYSTEM(p) {
+ if (p->p_state != PRS_NORMAL)
+ continue;
+ if (p->p_flag & P_SYSTEM)
+ continue;
+
+ microuptime(&wallclock);
+ timevalsub(&wallclock, &p->p_stats->p_start);
+ PROC_LOCK(p);
+ PROC_SLOCK(p);
+ FOREACH_THREAD_IN_PROC(p, td) {
+ ruxagg(p, td);
+ thread_lock(td);
+ thread_unlock(td);
+ }
+ runtime = cputick2usec(p->p_rux.rux_runtime);
+ PROC_SUNLOCK(p);
+#ifdef notyet
+ KASSERT(runtime >= p->p_prev_runtime,
+ ("runtime < p_prev_runtime"));
+#else
+ if (runtime < p->p_prev_runtime)
+ runtime = p->p_prev_runtime;
+#endif
+ p->p_prev_runtime = runtime;
+ mtx_lock(&racct_lock);
+ racct_set_locked(p, RACCT_CPU, runtime);
+ racct_set_locked(p, RACCT_WALLCLOCK,
+ wallclock.tv_sec * 1000000 + wallclock.tv_usec);
+ mtx_unlock(&racct_lock);
+ PROC_UNLOCK(p);
+ }
+ sx_sunlock(&allproc_lock);
+ pause("-", hz);
+ }
+}
+
+static struct kproc_desc racctd_kp = {
+ "racctd",
+ racctd,
+ NULL
+};
+SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
+
+static void
+racct_init(void)
+{
+
+ racct_zone = uma_zcreate("racct", sizeof(struct racct),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ /*
+ * XXX: Move this somewhere.
+ */
+ racct_create(&prison0.pr_racct);
+}
+SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
+
+#else /* !RACCT */
+
+int
+racct_add(struct proc *p, int resource, uint64_t amount)
+{
+
+ return (0);
+}
+
+void
+racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+}
+
+void
+racct_add_force(struct proc *p, int resource, uint64_t amount)
+{
+
+ return;
+}
+
+int
+racct_set(struct proc *p, int resource, uint64_t amount)
+{
+
+ return (0);
+}
+
+void
+racct_sub(struct proc *p, int resource, uint64_t amount)
+{
+}
+
+void
+racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+}
+
+uint64_t
+racct_get_limit(struct proc *p, int resource)
+{
+
+ return (UINT64_MAX);
+}
+
+void
+racct_create(struct racct **racctp)
+{
+}
+
+void
+racct_destroy(struct racct **racctp)
+{
+}
+
+int
+racct_proc_fork(struct proc *parent, struct proc *child)
+{
+
+ return (0);
+}
+
+void
+racct_proc_exit(struct proc *p)
+{
+}
+
+#endif /* !RACCT */
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index 66b6e2d..fa7437d 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/refcount.h>
+#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
#include <sys/sched.h>
@@ -1201,6 +1202,7 @@ uifind(uid)
if (uip == NULL) {
rw_runlock(&uihashtbl_lock);
uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO);
+ racct_create(&uip->ui_racct);
rw_wlock(&uihashtbl_lock);
/*
* There's a chance someone created our uidinfo while we
@@ -1209,6 +1211,7 @@ uifind(uid)
*/
if ((old_uip = uilookup(uid)) != NULL) {
/* Someone else beat us to it. */
+ racct_destroy(&uip->ui_racct);
free(uip, M_UIDINFO);
uip = old_uip;
} else {
@@ -1264,6 +1267,7 @@ uifree(uip)
/* Prepare for suboptimal case. */
rw_wlock(&uihashtbl_lock);
if (refcount_release(&uip->ui_ref)) {
+ racct_destroy(&uip->ui_racct);
LIST_REMOVE(uip, ui_hash);
rw_wunlock(&uihashtbl_lock);
if (uip->ui_sbsize != 0)
@@ -1286,6 +1290,22 @@ uifree(uip)
rw_wunlock(&uihashtbl_lock);
}
+void
+ui_racct_foreach(void (*callback)(struct racct *racct,
+ void *arg2, void *arg3), void *arg2, void *arg3)
+{
+ struct uidinfo *uip;
+ struct uihashhead *uih;
+
+ rw_rlock(&uihashtbl_lock);
+ for (uih = &uihashtbl[uihash]; uih >= uihashtbl; uih--) {
+ LIST_FOREACH(uip, uih, ui_hash) {
+ (callback)(uip->ui_racct, arg2, arg3);
+ }
+ }
+ rw_runlock(&uihashtbl_lock);
+}
+
/*
* Change the count associated with number of processes
* a given user is using. When 'max' is 0, don't enforce a limit
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index 85c629a..b83ac1b 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -135,6 +135,8 @@ MALLOC_DECLARE(M_PRISON);
#define HOSTUUIDLEN 64
+struct racct;
+
/*
* This structure describes a prison. It is pointed to by all struct
* ucreds's of the inmates. pr_ref keeps track of them and is used to
@@ -166,7 +168,8 @@ struct prison {
int pr_ip6s; /* (p) number of v6 IPs */
struct in_addr *pr_ip4; /* (p) v4 IPs of jail */
struct in6_addr *pr_ip6; /* (p) v6 IPs of jail */
- void *pr_sparep[4];
+ struct racct *pr_racct; /* (c) resource accounting */
+ void *pr_sparep[3];
int pr_childcount; /* (a) number of child jails */
int pr_childmax; /* (p) maximum child jails */
unsigned pr_allow; /* (p) PR_ALLOW_* flags */
@@ -380,6 +383,8 @@ int prison_if(struct ucred *cred, struct sockaddr *sa);
char *prison_name(struct prison *, struct prison *);
int prison_priv_check(struct ucred *cred, int priv);
int sysctl_jail_param(SYSCTL_HANDLER_ARGS);
+void prison_racct_foreach(void (*callback)(struct racct *racct,
+ void *arg2, void *arg3), void *arg2, void *arg3);
#endif /* _KERNEL */
#endif /* !_SYS_JAIL_H_ */
diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h
index 1a9cb5c..2916646 100644
--- a/sys/sys/kernel.h
+++ b/sys/sys/kernel.h
@@ -109,6 +109,7 @@ enum sysinit_sub_id {
SI_SUB_VNET_PRELINK = 0x1E00000, /* vnet init before modules */
SI_SUB_KLD = 0x2000000, /* KLD and module setup */
SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/
+ SI_SUB_RACCT = 0x2110000, /* resource accounting */
SI_SUB_RANDOM = 0x2120000, /* random number generator */
SI_SUB_KDTRACE = 0x2140000, /* Kernel dtrace hooks */
SI_SUB_MAC = 0x2180000, /* TrustedBSD MAC subsystem */
@@ -169,6 +170,7 @@ enum sysinit_sub_id {
SI_SUB_KTHREAD_UPDATE = 0xec00000, /* update daemon*/
SI_SUB_KTHREAD_IDLE = 0xee00000, /* idle procs*/
SI_SUB_SMP = 0xf000000, /* start the APs*/
+ SI_SUB_RACCTD = 0xf100000, /* start raccd*/
SI_SUB_RUN_SCHEDULER = 0xfffffff /* scheduler*/
};
diff --git a/sys/sys/loginclass.h b/sys/sys/loginclass.h
index 36ecf80..08f3409 100644
--- a/sys/sys/loginclass.h
+++ b/sys/sys/loginclass.h
@@ -32,6 +32,8 @@
#ifndef _SYS_LOGINCLASS_H_
#define _SYS_LOGINCLASS_H_
+struct racct;
+
/*
* Exactly one of these structures exists per login class.
*/
@@ -39,11 +41,13 @@ struct loginclass {
LIST_ENTRY(loginclass) lc_next;
char lc_name[MAXLOGNAME];
u_int lc_refcount;
+ struct racct *lc_racct;
};
void loginclass_hold(struct loginclass *lc);
void loginclass_free(struct loginclass *lc);
struct loginclass *loginclass_find(const char *name);
+void loginclass_racct_foreach(void (*callback)(struct racct *racct,
+ void *arg2, void *arg3), void *arg2, void *arg3);
#endif /* !_SYS_LOGINCLASS_H_ */
-
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index c9eedff..e04d699 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -157,6 +157,7 @@ struct pargs {
* either lock is sufficient for read access, but both locks must be held
* for write access.
*/
+struct racct;
struct kaudit_record;
struct td_sched;
struct nlminfo;
@@ -566,6 +567,8 @@ struct proc {
struct cv p_pwait; /* (*) wait cv for exit/exec. */
struct cv p_dbgwait; /* (*) wait cv for debugger attach
after fork. */
+ uint64_t p_prev_runtime; /* (c) Resource usage accounting. */
+ struct racct *p_racct; /* (b) Resource accounting. */
};
#define p_session p_pgrp->pg_session
diff --git a/sys/sys/racct.h b/sys/sys/racct.h
new file mode 100644
index 0000000..cbd96a9
--- /dev/null
+++ b/sys/sys/racct.h
@@ -0,0 +1,147 @@
+/*-
+ * Copyright (c) 2010 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Edward Tomasz Napierala under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Resource accounting.
+ */
+
+#ifndef _RACCT_H_
+#define _RACCT_H_
+
+#include <sys/cdefs.h>
+#include <sys/queue.h>
+#include <sys/types.h>
+
+struct proc;
+struct rctl_rule_link;
+struct ucred;
+
+/*
+ * Resources.
+ */
+#define RACCT_UNDEFINED -1
+#define RACCT_CPU 0
+#define RACCT_FSIZE 1
+#define RACCT_DATA 2
+#define RACCT_STACK 3
+#define RACCT_CORE 4
+#define RACCT_RSS 5
+#define RACCT_MEMLOCK 6
+#define RACCT_NPROC 7
+#define RACCT_NOFILE 8
+#define RACCT_SBSIZE 9
+#define RACCT_VMEM 10
+#define RACCT_NPTS 11
+#define RACCT_SWAP 12
+#define RACCT_NTHR 13
+#define RACCT_MSGQQUEUED 14
+#define RACCT_MSGQSIZE 15
+#define RACCT_NMSGQ 16
+#define RACCT_NSEM 17
+#define RACCT_NSEMOP 18
+#define RACCT_NSHM 19
+#define RACCT_SHMSIZE 20
+#define RACCT_WALLCLOCK 21
+#define RACCT_MAX RACCT_WALLCLOCK
+
+/*
+ * Resource properties.
+ */
+#define RACCT_IN_THOUSANDS 0x01
+#define RACCT_RECLAIMABLE 0x02
+#define RACCT_INHERITABLE 0x04
+#define RACCT_DENIABLE 0x08
+#define RACCT_SLOPPY 0x10
+
+extern int racct_types[];
+
+/*
+ * Amount stored in c_resources[] is thousand times bigger than what's
+ * visible to the userland. It gets fixed up when retrieving resource
+ * usage or adding rules.
+ */
+#define racct_is_in_thousands(X) (racct_types[X] & RACCT_IN_THOUSANDS)
+
+/*
+ * Resource usage can drop, as opposed to only grow.
+ */
+#define racct_is_reclaimable(X) (racct_types[X] & RACCT_RECLAIMABLE)
+
+/*
+ * Children inherit resource usage.
+ */
+#define racct_is_inheritable(X) (racct_types[X] & RACCT_INHERITABLE)
+
+/*
+ * racct_{add,set}(9) can actually return an error and not update resource
+ * usage counters. Note that even when resource is not deniable, allocating
+ * resource might cause signals to be sent by RCTL code.
+ */
+#define racct_is_deniable(X) (racct_types[X] & RACCT_DENIABLE)
+
+/*
+ * Per-process resource usage information makes no sense, but per-credential
+ * one does. This kind of resources are usually allocated for process, but
+ * freed using credentials.
+ */
+#define racct_is_sloppy(X) (racct_types[X] & RACCT_SLOPPY)
+
+/*
+ * The 'racct' structure defines resource consumption for a particular
+ * subject, such as process or jail.
+ *
+ * This structure must be filled with zeroes initially.
+ */
+struct racct {
+ int64_t r_resources[RACCT_MAX + 1];
+ LIST_HEAD(, rctl_rule_link) r_rule_links;
+};
+
+int racct_add(struct proc *p, int resource, uint64_t amount);
+void racct_add_cred(struct ucred *cred, int resource, uint64_t amount);
+void racct_add_force(struct proc *p, int resource, uint64_t amount);
+int racct_set(struct proc *p, int resource, uint64_t amount);
+void racct_set_force(struct proc *p, int resource, uint64_t amount);
+void racct_sub(struct proc *p, int resource, uint64_t amount);
+void racct_sub_cred(struct ucred *cred, int resource, uint64_t amount);
+uint64_t racct_get_limit(struct proc *p, int resource);
+uint64_t racct_get_available(struct proc *p, int resource);
+
+void racct_create(struct racct **racctp);
+void racct_destroy(struct racct **racctp);
+
+int racct_proc_fork(struct proc *parent, struct proc *child);
+void racct_proc_exit(struct proc *p);
+
+void racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
+ struct ucred *newcred);
+
+#endif /* !_RACCT_H_ */
diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
index 67af9b6..f17d95f 100644
--- a/sys/sys/resourcevar.h
+++ b/sys/sys/resourcevar.h
@@ -79,6 +79,8 @@ struct plimit {
int pl_refcnt; /* number of references */
};
+struct racct;
+
/*-
* Per uid resource consumption. This structure is used to track
* the total resource consumption (process count, socket buffer size,
@@ -99,6 +101,7 @@ struct uidinfo {
long ui_ptscnt; /* (b) number of pseudo-terminals */
uid_t ui_uid; /* (a) uid */
u_int ui_ref; /* (b) reference count */
+ struct racct *ui_racct; /* (a) resource accounting */
};
#define UIDINFO_VMSIZE_LOCK(ui) mtx_lock(&((ui)->ui_vmsize_mtx))
@@ -140,6 +143,8 @@ struct uidinfo
void uifree(struct uidinfo *uip);
void uihashinit(void);
void uihold(struct uidinfo *uip);
+void ui_racct_foreach(void (*callback)(struct racct *racct,
+ void *arg2, void *arg3), void *arg2, void *arg3);
#endif /* _KERNEL */
#endif /* !_SYS_RESOURCEVAR_H_ */
OpenPOWER on IntegriCloud