summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--share/man/man9/Makefile3
-rw-r--r--share/man/man9/zone.917
-rw-r--r--sys/kern/kern_mbuf.c78
-rw-r--r--sys/vm/uma.h13
-rw-r--r--sys/vm/uma_core.c20
-rw-r--r--sys/vm/uma_int.h4
6 files changed, 130 insertions, 5 deletions
diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile
index 30cb503..c1399a3 100644
--- a/share/man/man9/Makefile
+++ b/share/man/man9/Makefile
@@ -1911,6 +1911,7 @@ MLINKS+=zone.9 uma.9 \
zone.9 uma_zone_get_cur.9 \
zone.9 uma_zone_get_max.9 \
zone.9 uma_zone_set_max.9 \
- zone.9 uma_zone_set_warning.9
+ zone.9 uma_zone_set_warning.9 \
+ zone.9 uma_zone_set_maxaction.9
.include <bsd.prog.mk>
diff --git a/share/man/man9/zone.9 b/share/man/man9/zone.9
index 2df14b02..12226d1 100644
--- a/share/man/man9/zone.9
+++ b/share/man/man9/zone.9
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd February 7, 2014
+.Dd December 20, 2015
.Dt ZONE 9
.Os
.Sh NAME
@@ -39,7 +39,8 @@
.Nm uma_zone_set_max,
.Nm uma_zone_get_max,
.Nm uma_zone_get_cur,
-.Nm uma_zone_set_warning
+.Nm uma_zone_set_warning,
+.Nm uma_zone_set_maxaction
.Nd zone allocator
.Sh SYNOPSIS
.In sys/param.h
@@ -71,6 +72,8 @@
.Fn uma_zone_get_cur "uma_zone_t zone"
.Ft void
.Fn uma_zone_set_warning "uma_zone_t zone" "const char *warning"
+.Ft void
+.Fn uma_zone_set_maxaction "uma_zone_t zone" "void (*maxaction)(uma_zone_t)"
.In sys/sysctl.h
.Fn SYSCTL_UMA_MAX parent nbr name access zone descr
.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr
@@ -307,13 +310,21 @@ The
.Fn uma_zone_set_warning
function sets a warning that will be printed on the system console when the
given zone becomes full and fails to allocate an item.
-The warning will be printed not often than every five minutes.
+The warning will be printed no more often than every five minutes.
Warnings can be turned off globally by setting the
.Va vm.zone_warnings
sysctl tunable to
.Va 0 .
.Pp
The
+.Fn uma_zone_set_maxaction
+function sets a function that will be called when the given zone becomes full
+and fails to allocate an item.
+The function will be called with the zone locked. Also, the function
+that called the allocation function may have held additional locks. Therefore,
+this function should do very little work (similar to a signal handler).
+.Pp
+The
.Fn SYSCTL_UMA_MAX parent nbr name access zone descr
macro declares a static
.Xr sysctl
diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c
index 0648a5d..a864c94 100644
--- a/sys/kern/kern_mbuf.c
+++ b/sys/kern/kern_mbuf.c
@@ -32,11 +32,14 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/malloc.h>
+#include <sys/types.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
#include <sys/eventhandler.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/protosw.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
@@ -272,6 +275,12 @@ uma_zone_t zone_jumbo16;
uma_zone_t zone_ext_refcnt;
/*
+ * Callout to assist us in freeing mbufs.
+ */
+static struct callout mb_reclaim_callout;
+static struct mtx mb_reclaim_callout_mtx;
+
+/*
* Local prototypes.
*/
static int mb_ctor_mbuf(void *, int, void *, int);
@@ -285,6 +294,7 @@ static void mb_zfini_pack(void *, int);
static void mb_reclaim(void *);
static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
+static void mb_maxaction(uma_zone_t);
/* Ensure that MSIZE is a power of 2. */
CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
@@ -310,6 +320,7 @@ mbuf_init(void *dummy)
if (nmbufs > 0)
nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached");
+ uma_zone_set_maxaction(zone_mbuf, mb_maxaction);
zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
mb_ctor_clust, mb_dtor_clust,
@@ -322,6 +333,7 @@ mbuf_init(void *dummy)
if (nmbclusters > 0)
nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached");
+ uma_zone_set_maxaction(zone_clust, mb_maxaction);
zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
@@ -338,6 +350,7 @@ mbuf_init(void *dummy)
if (nmbjumbop > 0)
nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached");
+ uma_zone_set_maxaction(zone_jumbop, mb_maxaction);
zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
mb_ctor_clust, mb_dtor_clust,
@@ -351,6 +364,7 @@ mbuf_init(void *dummy)
if (nmbjumbo9 > 0)
nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached");
+ uma_zone_set_maxaction(zone_jumbo9, mb_maxaction);
zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
mb_ctor_clust, mb_dtor_clust,
@@ -364,6 +378,7 @@ mbuf_init(void *dummy)
if (nmbjumbo16 > 0)
nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
+ uma_zone_set_maxaction(zone_jumbo16, mb_maxaction);
zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
NULL, NULL,
@@ -372,6 +387,11 @@ mbuf_init(void *dummy)
/* uma_prealloc() goes here... */
+ /* Initialize the mb_reclaim() callout. */
+ mtx_init(&mb_reclaim_callout_mtx, "mb_reclaim_callout_mtx", NULL,
+ MTX_DEF);
+ callout_init(&mb_reclaim_callout, 1);
+
/*
* Hook event handler for low-memory situation, used to
* drain protocols and push data back to the caches (UMA
@@ -678,3 +698,61 @@ mb_reclaim(void *junk)
if (pr->pr_drain != NULL)
(*pr->pr_drain)();
}
+
+/*
+ * This is the function called by the mb_reclaim_callout, which is
+ * used when we hit the maximum for a zone.
+ *
+ * (See mb_maxaction() below.)
+ */
+static void
+mb_reclaim_timer(void *junk __unused)
+{
+
+ mtx_lock(&mb_reclaim_callout_mtx);
+
+ /*
+ * Avoid running this function extra times by skipping this invocation
+ * if the callout has already been rescheduled.
+ */
+ if (callout_pending(&mb_reclaim_callout) ||
+ !callout_active(&mb_reclaim_callout)) {
+ mtx_unlock(&mb_reclaim_callout_mtx);
+ return;
+ }
+ mtx_unlock(&mb_reclaim_callout_mtx);
+
+ mb_reclaim(NULL);
+
+ mtx_lock(&mb_reclaim_callout_mtx);
+ callout_deactivate(&mb_reclaim_callout);
+ mtx_unlock(&mb_reclaim_callout_mtx);
+}
+
+/*
+ * This function is called when we hit the maximum for a zone.
+ *
+ * At that point, we want to call the protocol drain routine to free up some
+ * mbufs. However, we will use the callout routines to schedule this to
+ * occur in another thread. (The thread calling this function holds the
+ * zone lock.)
+ */
+static void
+mb_maxaction(uma_zone_t zone __unused)
+{
+
+ /*
+ * If we can't immediately obtain the lock, either the callout
+ * is currently running, or another thread is scheduling the
+ * callout.
+ */
+ if (!mtx_trylock(&mb_reclaim_callout_mtx))
+ return;
+
+ /* If not already scheduled/running, schedule the callout. */
+ if (!callout_active(&mb_reclaim_callout)) {
+ callout_reset(&mb_reclaim_callout, 1, mb_reclaim_timer, NULL);
+ }
+
+ mtx_unlock(&mb_reclaim_callout_mtx);
+}
diff --git a/sys/vm/uma.h b/sys/vm/uma.h
index d3e0658..d218e60 100644
--- a/sys/vm/uma.h
+++ b/sys/vm/uma.h
@@ -521,6 +521,19 @@ int uma_zone_get_max(uma_zone_t zone);
void uma_zone_set_warning(uma_zone_t zone, const char *warning);
/*
+ * Sets a function to run when limit is reached
+ *
+ * Arguments:
+ * zone The zone to which this applies
+ * fx The function ro run
+ *
+ * Returns:
+ * Nothing
+ */
+typedef void (*uma_maxaction_t)(uma_zone_t);
+void uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t);
+
+/*
* Obtains the approximate current number of items allocated from a zone
*
* Arguments:
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index 3a0a799..4600589 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -431,6 +431,13 @@ zone_log_warning(uma_zone_t zone)
printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
}
+static inline void
+zone_maxaction(uma_zone_t zone)
+{
+ if (zone->uz_maxaction)
+ (*zone->uz_maxaction)(zone);
+}
+
static void
zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
{
@@ -1578,6 +1585,7 @@ zone_ctor(void *mem, int size, void *udata, int flags)
zone->uz_flags = 0;
zone->uz_warning = NULL;
timevalclear(&zone->uz_ratecheck);
+ zone->uz_maxaction = NULL;
keg = arg->keg;
ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
@@ -2382,6 +2390,7 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
zone->uz_flags |= UMA_ZFLAG_FULL;
zone_log_warning(zone);
+ zone_maxaction(zone);
}
if (flags & M_NOWAIT)
break;
@@ -2501,6 +2510,7 @@ zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
zone->uz_flags |= UMA_ZFLAG_FULL;
zone->uz_sleeps++;
zone_log_warning(zone);
+ zone_maxaction(zone);
msleep(zone, zone->uz_lockptr, PVM,
"zonelimit", hz/100);
zone->uz_flags &= ~UMA_ZFLAG_FULL;
@@ -3007,6 +3017,16 @@ uma_zone_set_warning(uma_zone_t zone, const char *warning)
}
/* See uma.h */
+void
+uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
+{
+
+ ZONE_LOCK(zone);
+ zone->uz_maxaction = maxaction;
+ ZONE_UNLOCK(zone);
+}
+
+/* See uma.h */
int
uma_zone_get_cur(uma_zone_t zone)
{
diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h
index ad2a405..5d7ecd3 100644
--- a/sys/vm/uma_int.h
+++ b/sys/vm/uma_int.h
@@ -303,10 +303,12 @@ struct uma_zone {
uint16_t uz_count; /* Amount of items in full bucket */
uint16_t uz_count_min; /* Minimal amount of items there */
- /* The next three fields are used to print a rate-limited warnings. */
+ /* The next two fields are used to print a rate-limited warnings. */
const char *uz_warning; /* Warning to print on failure */
struct timeval uz_ratecheck; /* Warnings rate-limiting */
+ uma_maxaction_t uz_maxaction; /* Function to run when at limit */
+
/*
* This HAS to be the last item because we adjust the zone size
* based on NCPU and then allocate the space for the zones.
OpenPOWER on IntegriCloud