summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authortrasz <trasz@FreeBSD.org>2011-03-29 17:47:25 +0000
committertrasz <trasz@FreeBSD.org>2011-03-29 17:47:25 +0000
commitb8d3e8755df2ce0e93cf3b2ab68e0c4275c5565f (patch)
tree304753d7f5287660a242996801bd5664ee945d3a
parent056d03857b74836195225ca4b0216f0d197477b7 (diff)
downloadFreeBSD-src-b8d3e8755df2ce0e93cf3b2ab68e0c4275c5565f.zip
FreeBSD-src-b8d3e8755df2ce0e93cf3b2ab68e0c4275c5565f.tar.gz
Add racct. It's an API to keep per-process, per-jail, per-loginclass
and per-loginclass resource accounting information, to be used by the new resource limits code. It's connected to the build, but the code that actually calls the new functions will come later. Sponsored by: The FreeBSD Foundation Reviewed by: kib (earlier version)
-rw-r--r--kern_racct.c842
-rw-r--r--sys/amd64/conf/GENERIC2
-rw-r--r--sys/conf/NOTES3
-rw-r--r--sys/conf/files1
-rw-r--r--sys/conf/options3
-rw-r--r--sys/kern/init_main.c4
-rw-r--r--sys/kern/kern_exit.c6
-rw-r--r--sys/kern/kern_fork.c17
-rw-r--r--sys/kern/kern_jail.c17
-rw-r--r--sys/kern/kern_loginclass.c20
-rw-r--r--sys/kern/kern_racct.c837
-rw-r--r--sys/kern/kern_resource.c20
-rw-r--r--sys/sys/jail.h7
-rw-r--r--sys/sys/kernel.h2
-rw-r--r--sys/sys/loginclass.h6
-rw-r--r--sys/sys/proc.h3
-rw-r--r--sys/sys/racct.h147
-rw-r--r--sys/sys/resourcevar.h5
18 files changed, 1939 insertions, 3 deletions
diff --git a/kern_racct.c b/kern_racct.c
new file mode 100644
index 0000000..229977a
--- /dev/null
+++ b/kern_racct.c
@@ -0,0 +1,842 @@
+/*-
+ * Copyright (c) 2010 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Edward Tomasz Napierala under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_kdtrace.h"
+
+#include <sys/param.h>
+#include <sys/eventhandler.h>
+#include <sys/param.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/lock.h>
+#include <sys/loginclass.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/racct.h>
+#include <sys/resourcevar.h>
+#include <sys/sbuf.h>
+#include <sys/sched.h>
+#include <sys/sdt.h>
+#include <sys/sx.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/umtx.h>
+
+#ifdef RCTL
+#include <sys/rctl.h>
+#endif
+
+#ifdef RACCT
+
+FEATURE(racct, "Resource Accounting");
+
+static struct mtx racct_lock;
+MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
+
+static uma_zone_t racct_zone;
+
+static void racct_sub_racct(struct racct *dest, const struct racct *src);
+static void racct_sub_cred_locked(struct ucred *cred, int resource,
+ uint64_t amount);
+static void racct_add_cred_locked(struct ucred *cred, int resource,
+ uint64_t amount);
+
+SDT_PROVIDER_DEFINE(racct);
+SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
+ "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
+ "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
+ "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
+ "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
+ "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
+ "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
+ "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
+ "int", "uint64_t");
+SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
+SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
+SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
+ "struct racct *");
+SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
+ "struct racct *", "struct racct *");
+SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
+ "struct racct *");
+
+int racct_types[] = {
+ [RACCT_CPU] =
+ RACCT_IN_THOUSANDS,
+ [RACCT_FSIZE] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_DATA] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_STACK] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_CORE] =
+ RACCT_DENIABLE,
+ [RACCT_RSS] =
+ RACCT_RECLAIMABLE,
+ [RACCT_MEMLOCK] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE,
+ [RACCT_NPROC] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE,
+ [RACCT_NOFILE] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_SBSIZE] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_VMEM] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_NPTS] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_SWAP] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_NTHR] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE,
+ [RACCT_MSGQQUEUED] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_MSGQSIZE] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_NMSGQ] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_NSEM] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_NSEMOP] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_NSHM] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_SHMSIZE] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_WALLCLOCK] =
+ RACCT_IN_THOUSANDS };
+
+static void
+racct_add_racct(struct racct *dest, const struct racct *src)
+{
+ int i;
+
+ mtx_assert(&racct_lock, MA_OWNED);
+
+ /*
+ * Update resource usage in dest.
+ */
+ for (i = 0; i <= RACCT_MAX; i++) {
+ KASSERT(dest->r_resources[i] >= 0,
+ ("racct propagation meltdown: dest < 0"));
+ KASSERT(src->r_resources[i] >= 0,
+ ("racct propagation meltdown: src < 0"));
+ dest->r_resources[i] += src->r_resources[i];
+ }
+}
+
+static void
+racct_sub_racct(struct racct *dest, const struct racct *src)
+{
+ int i;
+
+ mtx_assert(&racct_lock, MA_OWNED);
+
+ /*
+ * Update resource usage in dest.
+ */
+ for (i = 0; i <= RACCT_MAX; i++) {
+ if (!racct_is_sloppy(i) &&
+ !racct_is_dampened(i)) {
+ KASSERT(dest->r_resources[i] >= 0,
+ ("racct propagation meltdown: dest < 0"));
+ KASSERT(src->r_resources[i] >= 0,
+ ("racct propagation meltdown: src < 0"));
+ KASSERT(src->r_resources[i] <= dest->r_resources[i],
+ ("racct propagation meltdown: src > dest"));
+ }
+ if (racct_is_reclaimable(i)) {
+ dest->r_resources[i] -= src->r_resources[i];
+ if (dest->r_resources[i] < 0) {
+ KASSERT(racct_is_sloppy(i) ||
+ racct_is_dampened(i),
+ ("racct_sub_racct: usage < 0"));
+ dest->r_resources[i] = 0;
+ }
+ }
+ }
+}
+
+void
+racct_create(struct racct **racctp)
+{
+
+ SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
+
+ KASSERT(*racctp == NULL, ("racct already allocated"));
+
+ *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
+}
+
+static void
+racct_destroy_locked(struct racct **racctp)
+{
+ int i;
+ struct racct *racct;
+
+ SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
+
+ mtx_assert(&racct_lock, MA_OWNED);
+ KASSERT(racctp != NULL, ("NULL racctp"));
+ KASSERT(*racctp != NULL, ("NULL racct"));
+
+ racct = *racctp;
+
+ for (i = 0; i <= RACCT_MAX; i++) {
+ if (racct_is_sloppy(i))
+ continue;
+ if (!racct_is_reclaimable(i))
+ continue;
+ if (racct_is_dampened(i))
+ continue;
+ KASSERT(racct->r_resources[i] == 0,
+ ("destroying non-empty racct: "
+ "%ju allocated for resource %d\n",
+ racct->r_resources[i], i));
+ }
+ uma_zfree(racct_zone, racct);
+ *racctp = NULL;
+}
+
+void
+racct_destroy(struct racct **racct)
+{
+
+ mtx_lock(&racct_lock);
+ racct_destroy_locked(racct);
+ mtx_unlock(&racct_lock);
+}
+
+/*
+ * Increase consumption of 'resource' by 'amount' for 'racct'
+ * and all its parents. Differently from other cases, 'amount' here
+ * may be less than zero.
+ */
+static void
+racct_alloc_resource(struct racct *racct, int resource,
+ uint64_t amount)
+{
+
+ mtx_assert(&racct_lock, MA_OWNED);
+ KASSERT(racct != NULL, ("NULL racct"));
+
+ racct->r_resources[resource] += amount;
+ if (racct->r_resources[resource] < 0) {
+ KASSERT(racct_is_sloppy(resource) ||
+ racct_is_dampened(resource),
+ ("racct_alloc_resource: usage < 0"));
+ racct->r_resources[resource] = 0;
+ }
+}
+
+/*
+ * Increase allocation of 'resource' by 'amount' for process 'p'.
+ * Return 0 if it's below limits, or errno, if it's not.
+ */
+int
+racct_add(struct proc *p, int resource, uint64_t amount)
+{
+#ifdef RCTL
+ int error;
+#endif
+
+ if (p->p_flag & P_SYSTEM)
+ return (0);
+
+ SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0, ("racct_add: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ mtx_lock(&racct_lock);
+#ifdef RCTL
+ error = rctl_enforce(p, resource, amount);
+ if (error && racct_is_deniable(resource)) {
+ SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
+ amount, 0, 0);
+ mtx_unlock(&racct_lock);
+ return (error);
+ }
+#endif
+ racct_alloc_resource(p->p_racct, resource, amount);
+ racct_add_cred_locked(p->p_ucred, resource, amount);
+ mtx_unlock(&racct_lock);
+
+ return (0);
+}
+
+static void
+racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
+{
+ struct prison *pr;
+
+ SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
+ 0, 0);
+
+ KASSERT(amount >= 0,
+ ("racct_add_cred: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
+ for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
+ racct_alloc_resource(pr->pr_racct, resource, amount);
+ racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
+}
+
+/*
+ * Increase allocation of 'resource' by 'amount' for credential 'cred'.
+ * Doesn't check for limits and never fails.
+ *
+ * XXX: Shouldn't this ever return an error?
+ */
+void
+racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+
+ mtx_lock(&racct_lock);
+ racct_add_cred_locked(cred, resource, amount);
+ mtx_unlock(&racct_lock);
+}
+
+/*
+ * Increase allocation of 'resource' by 'amount' for process 'p'.
+ * Doesn't check for limits and never fails.
+ */
+void
+racct_add_force(struct proc *p, int resource, uint64_t amount)
+{
+
+ if (p->p_flag & P_SYSTEM)
+ return;
+
+ SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0,
+ ("racct_add_force: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ mtx_lock(&racct_lock);
+ racct_alloc_resource(p->p_racct, resource, amount);
+ mtx_unlock(&racct_lock);
+ racct_add_cred(p->p_ucred, resource, amount);
+}
+
+static int
+racct_set_locked(struct proc *p, int resource, uint64_t amount)
+{
+ int64_t diff;
+#ifdef RCTL
+ int error;
+#endif
+
+ if (p->p_flag & P_SYSTEM)
+ return (0);
+
+ SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0, ("racct_set: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ diff = amount - p->p_racct->r_resources[resource];
+#ifdef notyet
+ KASSERT(diff >= 0 || racct_is_reclaimable(resource),
+ ("racct_set: usage of non-reclaimable resource %d dropping",
+ resource));
+#endif
+#ifdef RCTL
+ if (diff > 0) {
+ error = rctl_enforce(p, resource, diff);
+ if (error && racct_is_deniable(resource)) {
+ SDT_PROBE(racct, kernel, rusage, set_failure, p,
+ resource, amount, 0, 0);
+ return (error);
+ }
+ }
+#endif
+ racct_alloc_resource(p->p_racct, resource, diff);
+ if (diff > 0)
+ racct_add_cred_locked(p->p_ucred, resource, diff);
+ else if (diff < 0)
+ racct_sub_cred_locked(p->p_ucred, resource, -diff);
+
+ return (0);
+}
+
+/*
+ * Set allocation of 'resource' to 'amount' for process 'p'.
+ * Return 0 if it's below limits, or errno, if it's not.
+ *
+ * Note that decreasing the allocation always returns 0,
+ * even if it's above the limit.
+ */
+int
+racct_set(struct proc *p, int resource, uint64_t amount)
+{
+ int error;
+
+ mtx_lock(&racct_lock);
+ error = racct_set_locked(p, resource, amount);
+ mtx_unlock(&racct_lock);
+ return (error);
+}
+
+void
+racct_set_force(struct proc *p, int resource, uint64_t amount)
+{
+ int64_t diff;
+
+ if (p->p_flag & P_SYSTEM)
+ return;
+
+ SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0,
+ ("racct_set_force: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ mtx_lock(&racct_lock);
+ diff = amount - p->p_racct->r_resources[resource];
+ racct_alloc_resource(p->p_racct, resource, diff);
+ if (diff > 0)
+ racct_add_cred_locked(p->p_ucred, resource, diff);
+ else if (diff < 0)
+ racct_sub_cred_locked(p->p_ucred, resource, -diff);
+ mtx_unlock(&racct_lock);
+}
+
+/*
+ * Returns amount of 'resource' the process 'p' can keep allocated.
+ * Allocating more than that would be denied, unless the resource
+ * is marked undeniable. Amount of already allocated resource does
+ * not matter.
+ */
+uint64_t
+racct_get_limit(struct proc *p, int resource)
+{
+
+#ifdef RCTL
+ return (rctl_get_limit(p, resource));
+#else
+ return (UINT64_MAX);
+#endif
+}
+
+/*
+ * Returns amount of 'resource' the process 'p' can keep allocated.
+ * Allocating more than that would be denied, unless the resource
+ * is marked undeniable. Amount of already allocated resource does
+ * matter.
+ */
+uint64_t
+racct_get_available(struct proc *p, int resource)
+{
+
+#ifdef RCTL
+ return (rctl_get_available(p, resource));
+#else
+ return (UINT64_MAX);
+#endif
+}
+
+/*
+ * Decrease allocation of 'resource' by 'amount' for process 'p'.
+ */
+void
+racct_sub(struct proc *p, int resource, uint64_t amount)
+{
+
+ if (p->p_flag & P_SYSTEM)
+ return;
+
+ SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0, ("racct_sub: invalid amount for resource %d: %ju",
+ resource, amount));
+ KASSERT(racct_is_reclaimable(resource),
+ ("racct_sub: called for non-reclaimable resource %d", resource));
+
+ mtx_lock(&racct_lock);
+ KASSERT(amount <= p->p_racct->r_resources[resource],
+ ("racct_sub: freeing %ju of resource %d, which is more "
+ "than allocated %jd for %s (pid %d)", amount, resource,
+ (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
+
+ racct_alloc_resource(p->p_racct, resource, -amount);
+ racct_sub_cred_locked(p->p_ucred, resource, amount);
+ mtx_unlock(&racct_lock);
+}
+
+static void
+racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
+{
+ struct prison *pr;
+
+ SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
+ 0, 0);
+
+ KASSERT(amount >= 0,
+ ("racct_sub_cred: invalid amount for resource %d: %ju",
+ resource, amount));
+#ifdef notyet
+ KASSERT(racct_is_reclaimable(resource),
+ ("racct_sub_cred: called for non-reclaimable resource %d",
+ resource));
+#endif
+
+ racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
+ for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
+ racct_alloc_resource(pr->pr_racct, resource, -amount);
+ racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
+}
+
+/*
+ * Decrease allocation of 'resource' by 'amount' for credential 'cred'.
+ */
+void
+racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+
+ mtx_lock(&racct_lock);
+ racct_sub_cred_locked(cred, resource, amount);
+ mtx_unlock(&racct_lock);
+}
+
+/*
+ * Inherit resource usage information from the parent process.
+ */
+int
+racct_proc_fork(struct proc *parent, struct proc *child)
+{
+ int i, error = 0;
+
+ /*
+ * Create racct for the child process.
+ */
+ racct_create(&child->p_racct);
+
+ /*
+ * No resource accounting for kernel processes.
+ */
+ if (child->p_flag & P_SYSTEM)
+ return (0);
+
+ PROC_LOCK(parent);
+ PROC_LOCK(child);
+ mtx_lock(&racct_lock);
+
+ /*
+ * Inherit resource usage.
+ */
+ for (i = 0; i <= RACCT_MAX; i++) {
+ if (parent->p_racct->r_resources[i] == 0 ||
+ !racct_is_inheritable(i))
+ continue;
+
+ error = racct_set_locked(child, i,
+ parent->p_racct->r_resources[i]);
+ if (error != 0) {
+ /*
+ * XXX: The only purpose of these two lines is
+ * to prevent from tripping checks in racct_destroy().
+ */
+ for (i = 0; i <= RACCT_MAX; i++)
+ racct_set_locked(child, i, 0);
+ goto out;
+ }
+ }
+
+#ifdef RCTL
+ error = rctl_proc_fork(parent, child);
+ if (error != 0) {
+ /*
+ * XXX: The only purpose of these two lines is to prevent from
+ * tripping checks in racct_destroy().
+ */
+ for (i = 0; i <= RACCT_MAX; i++)
+ racct_set_locked(child, i, 0);
+ }
+#endif
+
+out:
+ if (error != 0)
+ racct_destroy_locked(&child->p_racct);
+ mtx_unlock(&racct_lock);
+ PROC_UNLOCK(child);
+ PROC_UNLOCK(parent);
+
+ return (error);
+}
+
+void
+racct_proc_exit(struct proc *p)
+{
+ uint64_t runtime;
+
+ PROC_LOCK(p);
+ /*
+ * We don't need to calculate rux, proc_reap() has already done this.
+ */
+ runtime = cputick2usec(p->p_rux.rux_runtime);
+#ifdef notyet
+ KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
+#else
+ if (runtime < p->p_prev_runtime)
+ runtime = p->p_prev_runtime;
+#endif
+ racct_set(p, RACCT_CPU, runtime);
+
+ /*
+ * XXX: Free this some other way.
+ */
+ racct_set(p, RACCT_FSIZE, 0);
+ racct_set(p, RACCT_NPTS, 0);
+ racct_set(p, RACCT_NTHR, 0);
+ racct_set(p, RACCT_RSS, 0);
+ PROC_UNLOCK(p);
+
+#ifdef RCTL
+ rctl_racct_release(p->p_racct);
+#endif
+ racct_destroy(&p->p_racct);
+}
+
+/*
+ * Called after credentials change, to move resource utilisation
+ * between raccts.
+ */
+void
+racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
+ struct ucred *newcred)
+{
+ struct uidinfo *olduip, *newuip;
+ struct loginclass *oldlc, *newlc;
+ struct prison *oldpr, *newpr, *pr;
+
+ PROC_LOCK_ASSERT(p, MA_NOTOWNED);
+
+ newuip = newcred->cr_ruidinfo;
+ olduip = oldcred->cr_ruidinfo;
+ newlc = newcred->cr_loginclass;
+ oldlc = oldcred->cr_loginclass;
+ newpr = newcred->cr_prison;
+ oldpr = oldcred->cr_prison;
+
+ mtx_lock(&racct_lock);
+ if (newuip != olduip) {
+ racct_sub_racct(olduip->ui_racct, p->p_racct);
+ racct_add_racct(newuip->ui_racct, p->p_racct);
+ }
+ if (newlc != oldlc) {
+ racct_sub_racct(oldlc->lc_racct, p->p_racct);
+ racct_add_racct(newlc->lc_racct, p->p_racct);
+ }
+ if (newpr != oldpr) {
+ for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
+ racct_sub_racct(pr->pr_racct, p->p_racct);
+ for (pr = newpr; pr != NULL; pr = pr->pr_parent)
+ racct_add_racct(pr->pr_racct, p->p_racct);
+ }
+ mtx_unlock(&racct_lock);
+
+#ifdef RCTL
+ rctl_proc_ucred_changed(p, newcred);
+#endif
+}
+
+static void
+racctd(void)
+{
+ struct thread *td;
+ struct proc *p;
+ struct timeval wallclock;
+ uint64_t runtime;
+
+ for (;;) {
+ sx_slock(&allproc_lock);
+
+ FOREACH_PROC_IN_SYSTEM(p) {
+ if (p->p_state != PRS_NORMAL)
+ continue;
+ if (p->p_flag & P_SYSTEM)
+ continue;
+
+ microuptime(&wallclock);
+ timevalsub(&wallclock, &p->p_stats->p_start);
+ PROC_LOCK(p);
+ PROC_SLOCK(p);
+ FOREACH_THREAD_IN_PROC(p, td) {
+ ruxagg(p, td);
+ thread_lock(td);
+ thread_unlock(td);
+ }
+ runtime = cputick2usec(p->p_rux.rux_runtime);
+ PROC_SUNLOCK(p);
+#ifdef notyet
+ KASSERT(runtime >= p->p_prev_runtime,
+ ("runtime < p_prev_runtime"));
+#else
+ if (runtime < p->p_prev_runtime)
+ runtime = p->p_prev_runtime;
+#endif
+ p->p_prev_runtime = runtime;
+ mtx_lock(&racct_lock);
+ racct_set_locked(p, RACCT_CPU, runtime);
+ racct_set_locked(p, RACCT_WALLCLOCK,
+ wallclock.tv_sec * 1000000 + wallclock.tv_usec);
+ mtx_unlock(&racct_lock);
+ PROC_UNLOCK(p);
+ }
+ sx_sunlock(&allproc_lock);
+ pause("-", hz);
+ }
+}
+
+static struct kproc_desc racctd_kp = {
+ "racctd",
+ racctd,
+ NULL
+};
+SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
+
+static void
+racct_init(void)
+{
+
+ racct_zone = uma_zcreate("racct", sizeof(struct racct),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ /*
+ * XXX: Move this somewhere.
+ */
+ racct_create(&prison0.pr_racct);
+}
+SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
+
+#else /* !RACCT */
+
+int
+racct_add(struct proc *p, int resource, uint64_t amount)
+{
+
+ return (0);
+}
+
+void
+racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+}
+
+void
+racct_add_force(struct proc *p, int resource, uint64_t amount)
+{
+
+ return (0);
+}
+
+int
+racct_set(struct proc *p, int resource, uint64_t amount)
+{
+
+ return (0);
+}
+
+void
+racct_sub(struct proc *p, int resource, uint64_t amount)
+{
+}
+
+void
+racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+}
+
+uint64_t
+racct_get_limit(struct proc *p, int resource)
+{
+
+ return (UINT64_MAX);
+}
+
+void
+racct_create(struct racct **racctp)
+{
+}
+
+void
+racct_destroy(struct racct **racctp)
+{
+}
+
+int
+racct_proc_fork(struct proc *parent, struct proc *child)
+{
+
+ return (0);
+}
+
+void
+racct_proc_exit(struct proc *p)
+{
+}
+
+#endif /* !RACCT */
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
index eca47a8..a6f8a6f 100644
--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@@ -65,6 +65,8 @@ options MAC # TrustedBSD MAC Framework
#options KDTRACE_HOOKS # Kernel DTrace hooks
options INCLUDE_CONFIG_FILE # Include this file in kernel
+options RACCT
+
# Debugging for use in -current
options KDB # Enable kernel debugger support.
options DDB # Support DDB.
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 851b9b8..d5fb648 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -2930,6 +2930,9 @@ options AAC_DEBUG # Debugging levels:
# 2 - extremely noisy, emit trace
# items in loops, etc.
+# Resource Accounting
+options RACCT
+
# Yet more undocumented options for linting.
# BKTR_ALLOC_PAGES has no effect except to cause warnings, and
# BROOKTREE_ALLOC_PAGES hasn't actually been superseded by it, since the
diff --git a/sys/conf/files b/sys/conf/files
index bced838..1cf8ff1 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -2225,6 +2225,7 @@ kern/kern_poll.c optional device_polling
kern/kern_priv.c standard
kern/kern_proc.c standard
kern/kern_prot.c standard
+kern/kern_racct.c standard
kern/kern_resource.c standard
kern/kern_rmlock.c standard
kern/kern_rwlock.c standard
diff --git a/sys/conf/options b/sys/conf/options
index 81fb881..56dbd34 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -873,6 +873,9 @@ SDP_DEBUG opt_ofed.h
IPOIB_DEBUG opt_ofed.h
IPOIB_CM opt_ofed.h
+# Resource Accounting
+RACCT opt_global.h
+
# At least one of the AR71XX ubiquiti boards has a Redboot configuration
# that "lies" about the amount of RAM it has. Until a cleaner method is
# defined, this option will suffice in overriding what Redboot says.
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index eef0808..1977b96 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$");
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/proc.h>
+#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/systm.h>
#include <sys/signalvar.h>
@@ -526,6 +527,9 @@ proc0_init(void *dummy __unused)
p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem;
p->p_cpulimit = RLIM_INFINITY;
+ /* Initialize resource accounting structures. */
+ racct_create(&p->p_racct);
+
p->p_stats = pstats_alloc();
/* Allocate a prototype map so we have something to fork. */
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index e95ac8f..01d6b75 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <sys/wait.h>
#include <sys/vmmeter.h>
#include <sys/vnode.h>
+#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/sbuf.h>
#include <sys/signalvar.h>
@@ -741,6 +742,11 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options,
(void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0);
/*
+ * Destroy resource accounting information associated with the process.
+ */
+ racct_proc_exit(p);
+
+ /*
* Free credentials, arguments, and sigacts.
*/
crfree(p->p_ucred);
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index ebd4e6d..1dcc5bb 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/pioctl.h>
+#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/syscall.h>
@@ -783,6 +784,21 @@ fork1(struct thread *td, int flags, int pages, struct proc **procp)
knlist_init_mtx(&newproc->p_klist, &newproc->p_mtx);
STAILQ_INIT(&newproc->p_ktr);
+ /*
+ * XXX: This is ugly; when we copy resource usage, we need to bump
+ * per-cred resource counters.
+ */
+ newproc->p_ucred = p1->p_ucred;
+
+ /*
+ * Initialize resource accounting for the child process.
+ */
+ error = racct_proc_fork(p1, newproc);
+ if (error != 0) {
+ error = EAGAIN;
+ goto fail1;
+ }
+
/* We have to lock the process tree while we look for a pid. */
sx_slock(&proctree_lock);
@@ -827,6 +843,7 @@ fork1(struct thread *td, int flags, int pages, struct proc **procp)
error = EAGAIN;
fail:
+ racct_proc_exit(newproc);
sx_sunlock(&proctree_lock);
if (ppsratecheck(&lastfail, &curfail, 1))
printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n",
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 08343dd..6f72feb 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/jail.h>
#include <sys/lock.h>
#include <sys/mutex.h>
+#include <sys/racct.h>
#include <sys/sx.h>
#include <sys/sysent.h>
#include <sys/namei.h>
@@ -1195,6 +1196,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
root = mypr->pr_root;
vref(root);
}
+ racct_create(&pr->pr_racct);
strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN);
pr->pr_flags |= PR_HOST;
#if defined(INET) || defined(INET6)
@@ -2295,6 +2297,9 @@ do_jail_attach(struct thread *td, struct prison *pr)
newcred->cr_prison = pr;
p->p_ucred = newcred;
PROC_UNLOCK(p);
+#ifdef RACCT
+ racct_proc_ucred_changed(p, oldcred, newcred);
+#endif
crfree(oldcred);
prison_deref(ppr, PD_DEREF | PD_DEUREF);
return (0);
@@ -2527,6 +2532,7 @@ prison_deref(struct prison *pr, int flags)
if (pr->pr_cpuset != NULL)
cpuset_rel(pr->pr_cpuset);
osd_jail_exit(pr);
+ racct_destroy(&pr->pr_racct);
free(pr, M_PRISON);
/* Removing a prison frees a reference on its parent. */
@@ -4263,6 +4269,17 @@ SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW,
SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW,
"B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route");
+void
+prison_racct_foreach(void (*callback)(struct racct *racct,
+ void *arg2, void *arg3), void *arg2, void *arg3)
+{
+ struct prison *pr;
+
+ sx_slock(&allprison_lock);
+ TAILQ_FOREACH(pr, &allprison, pr_list)
+ (callback)(pr->pr_racct, arg2, arg3);
+ sx_sunlock(&allprison_lock);
+}
#ifdef DDB
diff --git a/sys/kern/kern_loginclass.c b/sys/kern/kern_loginclass.c
index cf644d5..d980246 100644
--- a/sys/kern/kern_loginclass.c
+++ b/sys/kern/kern_loginclass.c
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/queue.h>
+#include <sys/racct.h>
#include <sys/refcount.h>
#include <sys/sysproto.h>
#include <sys/systm.h>
@@ -90,6 +91,7 @@ loginclass_free(struct loginclass *lc)
mtx_lock(&loginclasses_lock);
if (refcount_release(&lc->lc_refcount)) {
+ racct_destroy(&lc->lc_racct);
LIST_REMOVE(lc, lc_next);
mtx_unlock(&loginclasses_lock);
free(lc, M_LOGINCLASS);
@@ -115,6 +117,7 @@ loginclass_find(const char *name)
return (NULL);
newlc = malloc(sizeof(*newlc), M_LOGINCLASS, M_ZERO | M_WAITOK);
+ racct_create(&newlc->lc_racct);
mtx_lock(&loginclasses_lock);
LIST_FOREACH(lc, &loginclasses, lc_next) {
@@ -124,6 +127,7 @@ loginclass_find(const char *name)
/* Found loginclass with a matching name? */
loginclass_hold(lc);
mtx_unlock(&loginclasses_lock);
+ racct_destroy(&newlc->lc_racct);
free(newlc, M_LOGINCLASS);
return (lc);
}
@@ -205,13 +209,27 @@ setloginclass(struct thread *td, struct setloginclass_args *uap)
newcred->cr_loginclass = newlc;
p->p_ucred = newcred;
PROC_UNLOCK(p);
-
+#ifdef RACCT
+ racct_proc_ucred_changed(p, oldcred, newcred);
+#endif
loginclass_free(oldcred->cr_loginclass);
crfree(oldcred);
return (0);
}
+void
+loginclass_racct_foreach(void (*callback)(struct racct *racct,
+ void *arg2, void *arg3), void *arg2, void *arg3)
+{
+ struct loginclass *lc;
+
+ mtx_lock(&loginclasses_lock);
+ LIST_FOREACH(lc, &loginclasses, lc_next)
+ (callback)(lc->lc_racct, arg2, arg3);
+ mtx_unlock(&loginclasses_lock);
+}
+
static void
lc_init(void)
{
diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c
new file mode 100644
index 0000000..28bc7b2
--- /dev/null
+++ b/sys/kern/kern_racct.c
@@ -0,0 +1,837 @@
+/*-
+ * Copyright (c) 2010 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Edward Tomasz Napierala under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_kdtrace.h"
+
+#include <sys/param.h>
+#include <sys/eventhandler.h>
+#include <sys/param.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/lock.h>
+#include <sys/loginclass.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/racct.h>
+#include <sys/resourcevar.h>
+#include <sys/sbuf.h>
+#include <sys/sched.h>
+#include <sys/sdt.h>
+#include <sys/sx.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/umtx.h>
+
+#ifdef RCTL
+#include <sys/rctl.h>
+#endif
+
+#ifdef RACCT
+
+FEATURE(racct, "Resource Accounting");
+
+static struct mtx racct_lock;
+MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
+
+static uma_zone_t racct_zone;
+
+static void racct_sub_racct(struct racct *dest, const struct racct *src);
+static void racct_sub_cred_locked(struct ucred *cred, int resource,
+ uint64_t amount);
+static void racct_add_cred_locked(struct ucred *cred, int resource,
+ uint64_t amount);
+
+SDT_PROVIDER_DEFINE(racct);
+SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
+ "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
+ "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
+ "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
+ "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
+ "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
+ "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
+ "uint64_t");
+SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
+ "int", "uint64_t");
+SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
+SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
+SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
+ "struct racct *");
+SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
+ "struct racct *", "struct racct *");
+SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
+ "struct racct *");
+
+int racct_types[] = {
+ [RACCT_CPU] =
+ RACCT_IN_THOUSANDS,
+ [RACCT_FSIZE] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_DATA] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_STACK] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_CORE] =
+ RACCT_DENIABLE,
+ [RACCT_RSS] =
+ RACCT_RECLAIMABLE,
+ [RACCT_MEMLOCK] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE,
+ [RACCT_NPROC] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE,
+ [RACCT_NOFILE] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_SBSIZE] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_VMEM] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_NPTS] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_SWAP] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_NTHR] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE,
+ [RACCT_MSGQQUEUED] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_MSGQSIZE] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_NMSGQ] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_NSEM] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_NSEMOP] =
+ RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
+ [RACCT_NSHM] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_SHMSIZE] =
+ RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
+ [RACCT_WALLCLOCK] =
+ RACCT_IN_THOUSANDS };
+
+static void
+racct_add_racct(struct racct *dest, const struct racct *src)
+{
+ int i;
+
+ mtx_assert(&racct_lock, MA_OWNED);
+
+ /*
+ * Update resource usage in dest.
+ */
+ for (i = 0; i <= RACCT_MAX; i++) {
+ KASSERT(dest->r_resources[i] >= 0,
+ ("racct propagation meltdown: dest < 0"));
+ KASSERT(src->r_resources[i] >= 0,
+ ("racct propagation meltdown: src < 0"));
+ dest->r_resources[i] += src->r_resources[i];
+ }
+}
+
+static void
+racct_sub_racct(struct racct *dest, const struct racct *src)
+{
+ int i;
+
+ mtx_assert(&racct_lock, MA_OWNED);
+
+ /*
+ * Update resource usage in dest.
+ */
+ for (i = 0; i <= RACCT_MAX; i++) {
+ if (!racct_is_sloppy(i)) {
+ KASSERT(dest->r_resources[i] >= 0,
+ ("racct propagation meltdown: dest < 0"));
+ KASSERT(src->r_resources[i] >= 0,
+ ("racct propagation meltdown: src < 0"));
+ KASSERT(src->r_resources[i] <= dest->r_resources[i],
+ ("racct propagation meltdown: src > dest"));
+ }
+ if (racct_is_reclaimable(i)) {
+ dest->r_resources[i] -= src->r_resources[i];
+ if (dest->r_resources[i] < 0) {
+ KASSERT(racct_is_sloppy(i),
+ ("racct_sub_racct: usage < 0"));
+ dest->r_resources[i] = 0;
+ }
+ }
+ }
+}
+
+void
+racct_create(struct racct **racctp)
+{
+
+ SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
+
+ KASSERT(*racctp == NULL, ("racct already allocated"));
+
+ *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
+}
+
+static void
+racct_destroy_locked(struct racct **racctp)
+{
+ int i;
+ struct racct *racct;
+
+ SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
+
+ mtx_assert(&racct_lock, MA_OWNED);
+ KASSERT(racctp != NULL, ("NULL racctp"));
+ KASSERT(*racctp != NULL, ("NULL racct"));
+
+ racct = *racctp;
+
+ for (i = 0; i <= RACCT_MAX; i++) {
+ if (racct_is_sloppy(i))
+ continue;
+ if (!racct_is_reclaimable(i))
+ continue;
+ KASSERT(racct->r_resources[i] == 0,
+ ("destroying non-empty racct: "
+ "%ju allocated for resource %d\n",
+ racct->r_resources[i], i));
+ }
+ uma_zfree(racct_zone, racct);
+ *racctp = NULL;
+}
+
+void
+racct_destroy(struct racct **racct)
+{
+
+ mtx_lock(&racct_lock);
+ racct_destroy_locked(racct);
+ mtx_unlock(&racct_lock);
+}
+
+/*
+ * Increase consumption of 'resource' by 'amount' for 'racct'
+ * and all its parents. Differently from other cases, 'amount' here
+ * may be less than zero.
+ */
+static void
+racct_alloc_resource(struct racct *racct, int resource,
+ uint64_t amount)
+{
+
+ mtx_assert(&racct_lock, MA_OWNED);
+ KASSERT(racct != NULL, ("NULL racct"));
+
+ racct->r_resources[resource] += amount;
+ if (racct->r_resources[resource] < 0) {
+ KASSERT(racct_is_sloppy(resource),
+ ("racct_alloc_resource: usage < 0"));
+ racct->r_resources[resource] = 0;
+ }
+}
+
+/*
+ * Increase allocation of 'resource' by 'amount' for process 'p'.
+ * Return 0 if it's below limits, or errno, if it's not.
+ */
+int
+racct_add(struct proc *p, int resource, uint64_t amount)
+{
+#ifdef RCTL
+ int error;
+#endif
+
+ if (p->p_flag & P_SYSTEM)
+ return (0);
+
+ SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0, ("racct_add: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ mtx_lock(&racct_lock);
+#ifdef RCTL
+ error = rctl_enforce(p, resource, amount);
+ if (error && racct_is_deniable(resource)) {
+ SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
+ amount, 0, 0);
+ mtx_unlock(&racct_lock);
+ return (error);
+ }
+#endif
+ racct_alloc_resource(p->p_racct, resource, amount);
+ racct_add_cred_locked(p->p_ucred, resource, amount);
+ mtx_unlock(&racct_lock);
+
+ return (0);
+}
+
+static void
+racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
+{
+ struct prison *pr;
+
+ SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
+ 0, 0);
+
+ KASSERT(amount >= 0,
+ ("racct_add_cred: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
+ for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
+ racct_alloc_resource(pr->pr_racct, resource, amount);
+ racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
+}
+
+/*
+ * Increase allocation of 'resource' by 'amount' for credential 'cred'.
+ * Doesn't check for limits and never fails.
+ *
+ * XXX: Shouldn't this ever return an error?
+ */
+void
+racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+
+ mtx_lock(&racct_lock);
+ racct_add_cred_locked(cred, resource, amount);
+ mtx_unlock(&racct_lock);
+}
+
+/*
+ * Increase allocation of 'resource' by 'amount' for process 'p'.
+ * Doesn't check for limits and never fails.
+ */
+void
+racct_add_force(struct proc *p, int resource, uint64_t amount)
+{
+
+ if (p->p_flag & P_SYSTEM)
+ return;
+
+ SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0,
+ ("racct_add_force: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ mtx_lock(&racct_lock);
+ racct_alloc_resource(p->p_racct, resource, amount);
+ mtx_unlock(&racct_lock);
+ racct_add_cred(p->p_ucred, resource, amount);
+}
+
+static int
+racct_set_locked(struct proc *p, int resource, uint64_t amount)
+{
+ int64_t diff;
+#ifdef RCTL
+ int error;
+#endif
+
+ if (p->p_flag & P_SYSTEM)
+ return (0);
+
+ SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0, ("racct_set: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ diff = amount - p->p_racct->r_resources[resource];
+#ifdef notyet
+ KASSERT(diff >= 0 || racct_is_reclaimable(resource),
+ ("racct_set: usage of non-reclaimable resource %d dropping",
+ resource));
+#endif
+#ifdef RCTL
+ if (diff > 0) {
+ error = rctl_enforce(p, resource, diff);
+ if (error && racct_is_deniable(resource)) {
+ SDT_PROBE(racct, kernel, rusage, set_failure, p,
+ resource, amount, 0, 0);
+ return (error);
+ }
+ }
+#endif
+ racct_alloc_resource(p->p_racct, resource, diff);
+ if (diff > 0)
+ racct_add_cred_locked(p->p_ucred, resource, diff);
+ else if (diff < 0)
+ racct_sub_cred_locked(p->p_ucred, resource, -diff);
+
+ return (0);
+}
+
+/*
+ * Set allocation of 'resource' to 'amount' for process 'p'.
+ * Return 0 if it's below limits, or errno, if it's not.
+ *
+ * Note that decreasing the allocation always returns 0,
+ * even if it's above the limit.
+ */
+int
+racct_set(struct proc *p, int resource, uint64_t amount)
+{
+ int error;
+
+ mtx_lock(&racct_lock);
+ error = racct_set_locked(p, resource, amount);
+ mtx_unlock(&racct_lock);
+ return (error);
+}
+
+void
+racct_set_force(struct proc *p, int resource, uint64_t amount)
+{
+ int64_t diff;
+
+ if (p->p_flag & P_SYSTEM)
+ return;
+
+ SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0,
+ ("racct_set_force: invalid amount for resource %d: %ju",
+ resource, amount));
+
+ mtx_lock(&racct_lock);
+ diff = amount - p->p_racct->r_resources[resource];
+ racct_alloc_resource(p->p_racct, resource, diff);
+ if (diff > 0)
+ racct_add_cred_locked(p->p_ucred, resource, diff);
+ else if (diff < 0)
+ racct_sub_cred_locked(p->p_ucred, resource, -diff);
+ mtx_unlock(&racct_lock);
+}
+
+/*
+ * Returns amount of 'resource' the process 'p' can keep allocated.
+ * Allocating more than that would be denied, unless the resource
+ * is marked undeniable. Amount of already allocated resource does
+ * not matter.
+ */
+uint64_t
+racct_get_limit(struct proc *p, int resource)
+{
+
+#ifdef RCTL
+ return (rctl_get_limit(p, resource));
+#else
+ return (UINT64_MAX);
+#endif
+}
+
+/*
+ * Returns amount of 'resource' the process 'p' can keep allocated.
+ * Allocating more than that would be denied, unless the resource
+ * is marked undeniable. Amount of already allocated resource does
+ * matter.
+ */
+uint64_t
+racct_get_available(struct proc *p, int resource)
+{
+
+#ifdef RCTL
+ return (rctl_get_available(p, resource));
+#else
+ return (UINT64_MAX);
+#endif
+}
+
+/*
+ * Decrease allocation of 'resource' by 'amount' for process 'p'.
+ */
+void
+racct_sub(struct proc *p, int resource, uint64_t amount)
+{
+
+ if (p->p_flag & P_SYSTEM)
+ return;
+
+ SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
+
+ /*
+ * We need proc lock to dereference p->p_ucred.
+ */
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(amount >= 0, ("racct_sub: invalid amount for resource %d: %ju",
+ resource, amount));
+ KASSERT(racct_is_reclaimable(resource),
+ ("racct_sub: called for non-reclaimable resource %d", resource));
+
+ mtx_lock(&racct_lock);
+ KASSERT(amount <= p->p_racct->r_resources[resource],
+ ("racct_sub: freeing %ju of resource %d, which is more "
+ "than allocated %jd for %s (pid %d)", amount, resource,
+ (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
+
+ racct_alloc_resource(p->p_racct, resource, -amount);
+ racct_sub_cred_locked(p->p_ucred, resource, amount);
+ mtx_unlock(&racct_lock);
+}
+
+static void
+racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
+{
+ struct prison *pr;
+
+ SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
+ 0, 0);
+
+ KASSERT(amount >= 0,
+ ("racct_sub_cred: invalid amount for resource %d: %ju",
+ resource, amount));
+#ifdef notyet
+ KASSERT(racct_is_reclaimable(resource),
+ ("racct_sub_cred: called for non-reclaimable resource %d",
+ resource));
+#endif
+
+ racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
+ for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
+ racct_alloc_resource(pr->pr_racct, resource, -amount);
+ racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
+}
+
+/*
+ * Decrease allocation of 'resource' by 'amount' for credential 'cred'.
+ */
+void
+racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+
+ mtx_lock(&racct_lock);
+ racct_sub_cred_locked(cred, resource, amount);
+ mtx_unlock(&racct_lock);
+}
+
+/*
+ * Inherit resource usage information from the parent process.
+ */
+int
+racct_proc_fork(struct proc *parent, struct proc *child)
+{
+ int i, error = 0;
+
+ /*
+ * Create racct for the child process.
+ */
+ racct_create(&child->p_racct);
+
+ /*
+ * No resource accounting for kernel processes.
+ */
+ if (child->p_flag & P_SYSTEM)
+ return (0);
+
+ PROC_LOCK(parent);
+ PROC_LOCK(child);
+ mtx_lock(&racct_lock);
+
+ /*
+ * Inherit resource usage.
+ */
+ for (i = 0; i <= RACCT_MAX; i++) {
+ if (parent->p_racct->r_resources[i] == 0 ||
+ !racct_is_inheritable(i))
+ continue;
+
+ error = racct_set_locked(child, i,
+ parent->p_racct->r_resources[i]);
+ if (error != 0) {
+ /*
+ * XXX: The only purpose of these two lines is
+ * to prevent from tripping checks in racct_destroy().
+ */
+ for (i = 0; i <= RACCT_MAX; i++)
+ racct_set_locked(child, i, 0);
+ goto out;
+ }
+ }
+
+#ifdef RCTL
+ error = rctl_proc_fork(parent, child);
+ if (error != 0) {
+ /*
+ * XXX: The only purpose of these two lines is to prevent from
+ * tripping checks in racct_destroy().
+ */
+ for (i = 0; i <= RACCT_MAX; i++)
+ racct_set_locked(child, i, 0);
+ }
+#endif
+
+out:
+ if (error != 0)
+ racct_destroy_locked(&child->p_racct);
+ mtx_unlock(&racct_lock);
+ PROC_UNLOCK(child);
+ PROC_UNLOCK(parent);
+
+ return (error);
+}
+
+void
+racct_proc_exit(struct proc *p)
+{
+ uint64_t runtime;
+
+ PROC_LOCK(p);
+ /*
+ * We don't need to calculate rux, proc_reap() has already done this.
+ */
+ runtime = cputick2usec(p->p_rux.rux_runtime);
+#ifdef notyet
+ KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
+#else
+ if (runtime < p->p_prev_runtime)
+ runtime = p->p_prev_runtime;
+#endif
+ racct_set(p, RACCT_CPU, runtime);
+
+ /*
+ * XXX: Free this some other way.
+ */
+ racct_set(p, RACCT_FSIZE, 0);
+ racct_set(p, RACCT_NPTS, 0);
+ racct_set(p, RACCT_NTHR, 0);
+ racct_set(p, RACCT_RSS, 0);
+ PROC_UNLOCK(p);
+
+#ifdef RCTL
+ rctl_racct_release(p->p_racct);
+#endif
+ racct_destroy(&p->p_racct);
+}
+
+/*
+ * Called after credentials change, to move resource utilisation
+ * between raccts.
+ */
+void
+racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
+ struct ucred *newcred)
+{
+ struct uidinfo *olduip, *newuip;
+ struct loginclass *oldlc, *newlc;
+ struct prison *oldpr, *newpr, *pr;
+
+ PROC_LOCK_ASSERT(p, MA_NOTOWNED);
+
+ newuip = newcred->cr_ruidinfo;
+ olduip = oldcred->cr_ruidinfo;
+ newlc = newcred->cr_loginclass;
+ oldlc = oldcred->cr_loginclass;
+ newpr = newcred->cr_prison;
+ oldpr = oldcred->cr_prison;
+
+ mtx_lock(&racct_lock);
+ if (newuip != olduip) {
+ racct_sub_racct(olduip->ui_racct, p->p_racct);
+ racct_add_racct(newuip->ui_racct, p->p_racct);
+ }
+ if (newlc != oldlc) {
+ racct_sub_racct(oldlc->lc_racct, p->p_racct);
+ racct_add_racct(newlc->lc_racct, p->p_racct);
+ }
+ if (newpr != oldpr) {
+ for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
+ racct_sub_racct(pr->pr_racct, p->p_racct);
+ for (pr = newpr; pr != NULL; pr = pr->pr_parent)
+ racct_add_racct(pr->pr_racct, p->p_racct);
+ }
+ mtx_unlock(&racct_lock);
+
+#ifdef RCTL
+ rctl_proc_ucred_changed(p, newcred);
+#endif
+}
+
+static void
+racctd(void)
+{
+ struct thread *td;
+ struct proc *p;
+ struct timeval wallclock;
+ uint64_t runtime;
+
+ for (;;) {
+ sx_slock(&allproc_lock);
+
+ FOREACH_PROC_IN_SYSTEM(p) {
+ if (p->p_state != PRS_NORMAL)
+ continue;
+ if (p->p_flag & P_SYSTEM)
+ continue;
+
+ microuptime(&wallclock);
+ timevalsub(&wallclock, &p->p_stats->p_start);
+ PROC_LOCK(p);
+ PROC_SLOCK(p);
+ FOREACH_THREAD_IN_PROC(p, td) {
+ ruxagg(p, td);
+ thread_lock(td);
+ thread_unlock(td);
+ }
+ runtime = cputick2usec(p->p_rux.rux_runtime);
+ PROC_SUNLOCK(p);
+#ifdef notyet
+ KASSERT(runtime >= p->p_prev_runtime,
+ ("runtime < p_prev_runtime"));
+#else
+ if (runtime < p->p_prev_runtime)
+ runtime = p->p_prev_runtime;
+#endif
+ p->p_prev_runtime = runtime;
+ mtx_lock(&racct_lock);
+ racct_set_locked(p, RACCT_CPU, runtime);
+ racct_set_locked(p, RACCT_WALLCLOCK,
+ wallclock.tv_sec * 1000000 + wallclock.tv_usec);
+ mtx_unlock(&racct_lock);
+ PROC_UNLOCK(p);
+ }
+ sx_sunlock(&allproc_lock);
+ pause("-", hz);
+ }
+}
+
+static struct kproc_desc racctd_kp = {
+ "racctd",
+ racctd,
+ NULL
+};
+SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
+
+static void
+racct_init(void)
+{
+
+ racct_zone = uma_zcreate("racct", sizeof(struct racct),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ /*
+ * XXX: Move this somewhere.
+ */
+ racct_create(&prison0.pr_racct);
+}
+SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
+
+#else /* !RACCT */
+
+int
+racct_add(struct proc *p, int resource, uint64_t amount)
+{
+
+ return (0);
+}
+
+void
+racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+}
+
+void
+racct_add_force(struct proc *p, int resource, uint64_t amount)
+{
+
+ return;
+}
+
+int
+racct_set(struct proc *p, int resource, uint64_t amount)
+{
+
+ return (0);
+}
+
+void
+racct_sub(struct proc *p, int resource, uint64_t amount)
+{
+}
+
+void
+racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+}
+
+uint64_t
+racct_get_limit(struct proc *p, int resource)
+{
+
+ return (UINT64_MAX);
+}
+
+void
+racct_create(struct racct **racctp)
+{
+}
+
+void
+racct_destroy(struct racct **racctp)
+{
+}
+
+int
+racct_proc_fork(struct proc *parent, struct proc *child)
+{
+
+ return (0);
+}
+
+void
+racct_proc_exit(struct proc *p)
+{
+}
+
+#endif /* !RACCT */
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index 66b6e2d..fa7437d 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/refcount.h>
+#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
#include <sys/sched.h>
@@ -1201,6 +1202,7 @@ uifind(uid)
if (uip == NULL) {
rw_runlock(&uihashtbl_lock);
uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO);
+ racct_create(&uip->ui_racct);
rw_wlock(&uihashtbl_lock);
/*
* There's a chance someone created our uidinfo while we
@@ -1209,6 +1211,7 @@ uifind(uid)
*/
if ((old_uip = uilookup(uid)) != NULL) {
/* Someone else beat us to it. */
+ racct_destroy(&uip->ui_racct);
free(uip, M_UIDINFO);
uip = old_uip;
} else {
@@ -1264,6 +1267,7 @@ uifree(uip)
/* Prepare for suboptimal case. */
rw_wlock(&uihashtbl_lock);
if (refcount_release(&uip->ui_ref)) {
+ racct_destroy(&uip->ui_racct);
LIST_REMOVE(uip, ui_hash);
rw_wunlock(&uihashtbl_lock);
if (uip->ui_sbsize != 0)
@@ -1286,6 +1290,22 @@ uifree(uip)
rw_wunlock(&uihashtbl_lock);
}
+void
+ui_racct_foreach(void (*callback)(struct racct *racct,
+ void *arg2, void *arg3), void *arg2, void *arg3)
+{
+ struct uidinfo *uip;
+ struct uihashhead *uih;
+
+ rw_rlock(&uihashtbl_lock);
+ for (uih = &uihashtbl[uihash]; uih >= uihashtbl; uih--) {
+ LIST_FOREACH(uip, uih, ui_hash) {
+ (callback)(uip->ui_racct, arg2, arg3);
+ }
+ }
+ rw_runlock(&uihashtbl_lock);
+}
+
/*
* Change the count associated with number of processes
* a given user is using. When 'max' is 0, don't enforce a limit
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index 85c629a..b83ac1b 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -135,6 +135,8 @@ MALLOC_DECLARE(M_PRISON);
#define HOSTUUIDLEN 64
+struct racct;
+
/*
* This structure describes a prison. It is pointed to by all struct
* ucreds's of the inmates. pr_ref keeps track of them and is used to
@@ -166,7 +168,8 @@ struct prison {
int pr_ip6s; /* (p) number of v6 IPs */
struct in_addr *pr_ip4; /* (p) v4 IPs of jail */
struct in6_addr *pr_ip6; /* (p) v6 IPs of jail */
- void *pr_sparep[4];
+ struct racct *pr_racct; /* (c) resource accounting */
+ void *pr_sparep[3];
int pr_childcount; /* (a) number of child jails */
int pr_childmax; /* (p) maximum child jails */
unsigned pr_allow; /* (p) PR_ALLOW_* flags */
@@ -380,6 +383,8 @@ int prison_if(struct ucred *cred, struct sockaddr *sa);
char *prison_name(struct prison *, struct prison *);
int prison_priv_check(struct ucred *cred, int priv);
int sysctl_jail_param(SYSCTL_HANDLER_ARGS);
+void prison_racct_foreach(void (*callback)(struct racct *racct,
+ void *arg2, void *arg3), void *arg2, void *arg3);
#endif /* _KERNEL */
#endif /* !_SYS_JAIL_H_ */
diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h
index 1a9cb5c..2916646 100644
--- a/sys/sys/kernel.h
+++ b/sys/sys/kernel.h
@@ -109,6 +109,7 @@ enum sysinit_sub_id {
SI_SUB_VNET_PRELINK = 0x1E00000, /* vnet init before modules */
SI_SUB_KLD = 0x2000000, /* KLD and module setup */
SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/
+ SI_SUB_RACCT = 0x2110000, /* resource accounting */
SI_SUB_RANDOM = 0x2120000, /* random number generator */
SI_SUB_KDTRACE = 0x2140000, /* Kernel dtrace hooks */
SI_SUB_MAC = 0x2180000, /* TrustedBSD MAC subsystem */
@@ -169,6 +170,7 @@ enum sysinit_sub_id {
SI_SUB_KTHREAD_UPDATE = 0xec00000, /* update daemon*/
SI_SUB_KTHREAD_IDLE = 0xee00000, /* idle procs*/
SI_SUB_SMP = 0xf000000, /* start the APs*/
+ SI_SUB_RACCTD = 0xf100000, /* start raccd*/
SI_SUB_RUN_SCHEDULER = 0xfffffff /* scheduler*/
};
diff --git a/sys/sys/loginclass.h b/sys/sys/loginclass.h
index 36ecf80..08f3409 100644
--- a/sys/sys/loginclass.h
+++ b/sys/sys/loginclass.h
@@ -32,6 +32,8 @@
#ifndef _SYS_LOGINCLASS_H_
#define _SYS_LOGINCLASS_H_
+struct racct;
+
/*
* Exactly one of these structures exists per login class.
*/
@@ -39,11 +41,13 @@ struct loginclass {
LIST_ENTRY(loginclass) lc_next;
char lc_name[MAXLOGNAME];
u_int lc_refcount;
+ struct racct *lc_racct;
};
void loginclass_hold(struct loginclass *lc);
void loginclass_free(struct loginclass *lc);
struct loginclass *loginclass_find(const char *name);
+void loginclass_racct_foreach(void (*callback)(struct racct *racct,
+ void *arg2, void *arg3), void *arg2, void *arg3);
#endif /* !_SYS_LOGINCLASS_H_ */
-
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index c9eedff..e04d699 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -157,6 +157,7 @@ struct pargs {
* either lock is sufficient for read access, but both locks must be held
* for write access.
*/
+struct racct;
struct kaudit_record;
struct td_sched;
struct nlminfo;
@@ -566,6 +567,8 @@ struct proc {
struct cv p_pwait; /* (*) wait cv for exit/exec. */
struct cv p_dbgwait; /* (*) wait cv for debugger attach
after fork. */
+ uint64_t p_prev_runtime; /* (c) Resource usage accounting. */
+ struct racct *p_racct; /* (b) Resource accounting. */
};
#define p_session p_pgrp->pg_session
diff --git a/sys/sys/racct.h b/sys/sys/racct.h
new file mode 100644
index 0000000..cbd96a9
--- /dev/null
+++ b/sys/sys/racct.h
@@ -0,0 +1,147 @@
+/*-
+ * Copyright (c) 2010 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Edward Tomasz Napierala under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Resource accounting.
+ */
+
+#ifndef _RACCT_H_
+#define _RACCT_H_
+
+#include <sys/cdefs.h>
+#include <sys/queue.h>
+#include <sys/types.h>
+
+struct proc;
+struct rctl_rule_link;
+struct ucred;
+
+/*
+ * Resources.
+ */
+#define RACCT_UNDEFINED -1
+#define RACCT_CPU 0
+#define RACCT_FSIZE 1
+#define RACCT_DATA 2
+#define RACCT_STACK 3
+#define RACCT_CORE 4
+#define RACCT_RSS 5
+#define RACCT_MEMLOCK 6
+#define RACCT_NPROC 7
+#define RACCT_NOFILE 8
+#define RACCT_SBSIZE 9
+#define RACCT_VMEM 10
+#define RACCT_NPTS 11
+#define RACCT_SWAP 12
+#define RACCT_NTHR 13
+#define RACCT_MSGQQUEUED 14
+#define RACCT_MSGQSIZE 15
+#define RACCT_NMSGQ 16
+#define RACCT_NSEM 17
+#define RACCT_NSEMOP 18
+#define RACCT_NSHM 19
+#define RACCT_SHMSIZE 20
+#define RACCT_WALLCLOCK 21
+#define RACCT_MAX RACCT_WALLCLOCK
+
+/*
+ * Resource properties.
+ */
+#define RACCT_IN_THOUSANDS 0x01
+#define RACCT_RECLAIMABLE 0x02
+#define RACCT_INHERITABLE 0x04
+#define RACCT_DENIABLE 0x08
+#define RACCT_SLOPPY 0x10
+
+extern int racct_types[];
+
+/*
+ * Amount stored in c_resources[] is thousand times bigger than what's
+ * visible to the userland. It gets fixed up when retrieving resource
+ * usage or adding rules.
+ */
+#define racct_is_in_thousands(X) (racct_types[X] & RACCT_IN_THOUSANDS)
+
+/*
+ * Resource usage can drop, as opposed to only grow.
+ */
+#define racct_is_reclaimable(X) (racct_types[X] & RACCT_RECLAIMABLE)
+
+/*
+ * Children inherit resource usage.
+ */
+#define racct_is_inheritable(X) (racct_types[X] & RACCT_INHERITABLE)
+
+/*
+ * racct_{add,set}(9) can actually return an error and not update resource
+ * usage counters. Note that even when resource is not deniable, allocating
+ * resource might cause signals to be sent by RCTL code.
+ */
+#define racct_is_deniable(X) (racct_types[X] & RACCT_DENIABLE)
+
+/*
+ * Per-process resource usage information makes no sense, but per-credential
+ * one does. This kind of resources are usually allocated for process, but
+ * freed using credentials.
+ */
+#define racct_is_sloppy(X) (racct_types[X] & RACCT_SLOPPY)
+
+/*
+ * The 'racct' structure defines resource consumption for a particular
+ * subject, such as process or jail.
+ *
+ * This structure must be filled with zeroes initially.
+ */
+struct racct {
+ int64_t r_resources[RACCT_MAX + 1];
+ LIST_HEAD(, rctl_rule_link) r_rule_links;
+};
+
+int racct_add(struct proc *p, int resource, uint64_t amount);
+void racct_add_cred(struct ucred *cred, int resource, uint64_t amount);
+void racct_add_force(struct proc *p, int resource, uint64_t amount);
+int racct_set(struct proc *p, int resource, uint64_t amount);
+void racct_set_force(struct proc *p, int resource, uint64_t amount);
+void racct_sub(struct proc *p, int resource, uint64_t amount);
+void racct_sub_cred(struct ucred *cred, int resource, uint64_t amount);
+uint64_t racct_get_limit(struct proc *p, int resource);
+uint64_t racct_get_available(struct proc *p, int resource);
+
+void racct_create(struct racct **racctp);
+void racct_destroy(struct racct **racctp);
+
+int racct_proc_fork(struct proc *parent, struct proc *child);
+void racct_proc_exit(struct proc *p);
+
+void racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
+ struct ucred *newcred);
+
+#endif /* !_RACCT_H_ */
diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
index 67af9b6..f17d95f 100644
--- a/sys/sys/resourcevar.h
+++ b/sys/sys/resourcevar.h
@@ -79,6 +79,8 @@ struct plimit {
int pl_refcnt; /* number of references */
};
+struct racct;
+
/*-
* Per uid resource consumption. This structure is used to track
* the total resource consumption (process count, socket buffer size,
@@ -99,6 +101,7 @@ struct uidinfo {
long ui_ptscnt; /* (b) number of pseudo-terminals */
uid_t ui_uid; /* (a) uid */
u_int ui_ref; /* (b) reference count */
+ struct racct *ui_racct; /* (a) resource accounting */
};
#define UIDINFO_VMSIZE_LOCK(ui) mtx_lock(&((ui)->ui_vmsize_mtx))
@@ -140,6 +143,8 @@ struct uidinfo
void uifree(struct uidinfo *uip);
void uihashinit(void);
void uihold(struct uidinfo *uip);
+void ui_racct_foreach(void (*callback)(struct racct *racct,
+ void *arg2, void *arg3), void *arg2, void *arg3);
#endif /* _KERNEL */
#endif /* !_SYS_RESOURCEVAR_H_ */
OpenPOWER on IntegriCloud