summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorbmilekic <bmilekic@FreeBSD.org>2001-06-22 06:35:32 +0000
committerbmilekic <bmilekic@FreeBSD.org>2001-06-22 06:35:32 +0000
commit5d710b296b16892625e76899c986dd7697754050 (patch)
treed56e06b359d94f82a565ce2544371429eb0c93ae /sys
parent44d1723f45fa955b68fb90fc3e7604ceeea6f0d3 (diff)
downloadFreeBSD-src-5d710b296b16892625e76899c986dd7697754050.zip
FreeBSD-src-5d710b296b16892625e76899c986dd7697754050.tar.gz
Introduce numerous SMP friendly changes to the mbuf allocator. Namely,
introduce a modified allocation mechanism for mbufs and mbuf clusters; one which can scale under SMP and which offers the possibility of resource reclamation to be implemented in the future. Notable advantages: o Reduce contention for SMP by offering per-CPU pools and locks. o Better use of data cache due to per-CPU pools. o Much less code cache pollution due to excessively large allocation macros. o Framework for `grouping' objects from same page together so as to be able to possibly free wired-down pages back to the system if they are no longer needed by the network stacks. Additional things changed with this addition: - Moved some mbuf specific declarations and initializations from sys/conf/param.c into mbuf-specific code where they belong. - m_getclr() has been renamed to m_get_clrd() because the old name is really confusing. m_getclr() HAS been preserved though and is defined to the new name. No tree sweep has been done "to change the interface," as the old name will continue to be supported and is not depracated. The change was merely done because m_getclr() sounds too much like "m_get a cluster." - TEMPORARILY disabled mbtypes statistics displaying in netstat(1) and systat(1) (see TODO below). - Fixed systat(1) to display number of "free mbufs" based on new per-CPU stat structures. - Fixed netstat(1) to display new per-CPU stats based on sysctl-exported per-CPU stat structures. All infos are fetched via sysctl. TODO (in order of priority): - Re-enable mbtypes statistics in both netstat(1) and systat(1) after introducing an SMP friendly way to collect the mbtypes stats under the already introduced per-CPU locks (i.e. hopefully don't use atomic() - it seems too costly for a mere stat update, especially when other locks are already present). - Optionally have systat(1) display not only "total free mbufs" but also "total free mbufs per CPU pool." - Fix minor length-fetching issues in netstat(1) related to recently re-enabled option to read mbuf stats from a core file. - Move reference counters at least for mbuf clusters into an unused portion of the cluster itself, to save space and need to allocate a counter. - Look into introducing resource freeing possibly from a kproc. Reviewed by (in parts): jlemon, jake, silby, terry Tested by: jlemon (Intel & Alpha), mjacob (Intel & Alpha) Preliminary performance measurements: jlemon (and me, obviously) URL: http://people.freebsd.org/~bmilekic/mb_alloc/
Diffstat (limited to 'sys')
-rw-r--r--sys/conf/files1
-rw-r--r--sys/conf/param.c13
-rw-r--r--sys/kern/kern_malloc.c9
-rw-r--r--sys/kern/subr_mbuf.c1029
-rw-r--r--sys/kern/subr_param.c13
-rw-r--r--sys/kern/uipc_mbuf.c469
-rw-r--r--sys/sys/mbuf.h533
-rw-r--r--sys/sys/sysctl.h2
-rw-r--r--sys/vm/vm_kern.c14
-rw-r--r--sys/vm/vm_kern.h2
-rw-r--r--sys/vm/vm_map.c10
-rw-r--r--sys/vm/vm_object.c2
12 files changed, 1198 insertions, 899 deletions
diff --git a/sys/conf/files b/sys/conf/files
index d39e829..0a0970a 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -796,6 +796,7 @@ kern/subr_diskslice.c standard
kern/subr_eventhandler.c standard
kern/subr_kobj.c standard
kern/subr_log.c standard
+kern/subr_mbuf.c standard
kern/subr_mchain.c optional libmchain
kern/subr_module.c standard
kern/subr_pcpu.c standard
diff --git a/sys/conf/param.c b/sys/conf/param.c
index 333fdc3..704e3f5 100644
--- a/sys/conf/param.c
+++ b/sys/conf/param.c
@@ -64,17 +64,10 @@ int tickadj = howmany(30000, 60 * HZ); /* can adjust 30ms in 60s */
#define MAXFILES (NPROC*2)
#endif
int maxproc = NPROC; /* maximum # of processes */
-int maxprocperuid = NPROC-1; /* maximum # of processes per user */
-int maxfiles = MAXFILES; /* system wide open files limit */
-int maxfilesperproc = MAXFILES; /* per-process open files limit */
+int maxprocperuid = NPROC-1; /* max # of procs per user */
+int maxfiles = MAXFILES; /* sys. wide open files limit */
+int maxfilesperproc = MAXFILES; /* per-proc open files limit */
int ncallout = 16 + NPROC + MAXFILES; /* maximum # of timer events */
-int mbuf_wait = 32; /* mbuf sleep time in ticks */
-
-/* maximum # of sf_bufs (sendfile(2) zero-copy virtual buffers) */
-#ifndef NSFBUFS
-#define NSFBUFS (512 + MAXUSERS * 16)
-#endif
-int nsfbufs = NSFBUFS;
/*
* These may be set to nonzero here or by patching.
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index f460d53..5791925 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -474,8 +474,15 @@ kmeminit(dummy)
if ((vm_kmem_size / 2) > (cnt.v_page_count * PAGE_SIZE))
vm_kmem_size = 2 * cnt.v_page_count * PAGE_SIZE;
+ /*
+ * In mb_init(), we set up submaps for mbufs and clusters, in which
+ * case we rounddown() (nmbufs * MSIZE) and (nmbclusters * MCLBYTES),
+ * respectively. Mathematically, this means that what we do here may
+ * amount to slightly more address space than we need for the submaps,
+ * but it never hurts to have an extra page in kmem_map.
+ */
npg = (nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt *
- sizeof(union mext_refcnt) + vm_kmem_size) / PAGE_SIZE;
+ sizeof(u_int) + vm_kmem_size) / PAGE_SIZE;
kmemusage = (struct kmemusage *) kmem_alloc(kernel_map,
(vm_size_t)(npg * sizeof(struct kmemusage)));
diff --git a/sys/kern/subr_mbuf.c b/sys/kern/subr_mbuf.c
new file mode 100644
index 0000000..e285484
--- /dev/null
+++ b/sys/kern/subr_mbuf.c
@@ -0,0 +1,1029 @@
+/*
+ * Copyright (c) 2001
+ * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "opt_param.h"
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/condvar.h>
+#include <sys/smp.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_extern.h>
+
+/*
+ * Maximum number of PCPU containers. If you know what you're doing you could
+ * explicitly define MBALLOC_NCPU to be exactly the number of CPUs on your
+ * system during compilation, and thus prevent kernel structure bloats.
+ */
+#ifdef MBALLOC_NCPU
+#define NCPU MBALLOC_NCPU
+#else
+#define NCPU MAXCPU
+#endif
+
+/*
+ * The mbuf allocator is heavily based on Alfred Perlstein's
+ * (alfred@FreeBSD.org) "memcache" allocator which is itself based
+ * on concepts from several per-CPU memory allocators. The difference
+ * between this allocator and memcache is that, among other things:
+ *
+ * (i) We don't free back to the map from the free() routine - we leave the
+ * option of implementing lazy freeing (from a kproc) in the future.
+ *
+ * (ii) We allocate from separate sub-maps of kmem_map, thus limiting the
+ * maximum number of allocatable objects of a given type. Further,
+ * we handle blocking on a cv in the case that the map is starved and
+ * we have to rely solely on cached (circulating) objects.
+ *
+ * The mbuf allocator keeps all objects that it allocates in mb_buckets.
+ * The buckets keep a page worth of objects (an object can be an mbuf or an
+ * mbuf cluster) and facilitate moving larger sets of contiguous objects
+ * from the per-CPU lists to the main list for the given object. The buckets
+ * also have an added advantage in that after several moves from a per-CPU
+ * list to the main list and back to the per-CPU list, contiguous objects
+ * are kept together, thus trying to put the TLB cache to good use.
+ *
+ * The buckets are kept on singly-linked lists called "containers." A container
+ * is protected by a mutex lock in order to ensure consistency. The mutex lock
+ * itself is allocated seperately and attached to the container at boot time,
+ * thus allowing for certain containers to share the same mutex lock. Per-CPU
+ * containers for mbufs and mbuf clusters all share the same per-CPU
+ * lock whereas the "general system" containers (i.e. the "main lists") for
+ * these objects share one global lock.
+ *
+ */
+struct mb_bucket {
+ SLIST_ENTRY(mb_bucket) mb_blist;
+ int mb_owner;
+ int mb_numfree;
+ void *mb_free[0];
+};
+
+struct mb_container {
+ SLIST_HEAD(mc_buckethd, mb_bucket) mc_bhead;
+ struct mtx *mc_lock;
+ int mc_numowner;
+ u_int mc_starved;
+ u_long *mc_objcount;
+ u_long *mc_numpgs;
+};
+
+struct mb_gen_list {
+ struct mb_container mb_cont;
+ struct cv mgl_mstarved;
+};
+
+struct mb_pcpu_list {
+ struct mb_container mb_cont;
+};
+
+/*
+ * Boot-time configurable object counts that will determine the maximum
+ * number of permitted objects in the mbuf and mcluster cases. In the
+ * ext counter (nmbcnt) case, it's just an indicator serving to scale
+ * kmem_map size properly - in other words, we may be allowed to allocate
+ * more than nmbcnt counters, whereas we will never be allowed to allocate
+ * more than nmbufs mbufs or nmbclusters mclusters.
+ * As for nsfbufs, it is used to indicate how many sendfile(2) buffers will be
+ * allocatable by the sfbuf allocator (found in uipc_syscalls.c)
+ */
+#ifndef NMBCLUSTERS
+#define NMBCLUSTERS (1024 + MAXUSERS * 16)
+#endif
+#ifndef NMBUFS
+#define NMBUFS (NMBCLUSTERS * 2)
+#endif
+#ifndef NSFBUFS
+#define NSFBUFS (512 + MAXUSERS * 16)
+#endif
+#ifndef NMBCNTS
+#define NMBCNTS (NMBCLUSTERS + NSFBUFS)
+#endif
+int nmbufs = NMBUFS;
+int nmbclusters = NMBCLUSTERS;
+int nmbcnt = NMBCNTS;
+int nsfbufs = NSFBUFS;
+TUNABLE_INT("kern.ipc.nmbufs", &nmbufs);
+TUNABLE_INT("kern.ipc.nmbclusters", &nmbclusters);
+TUNABLE_INT("kern.ipc.nmbcnt", &nmbcnt);
+TUNABLE_INT("kern.ipc.nsfbufs", &nsfbufs);
+
+/*
+ * Perform sanity checks of tunables declared above.
+ */
+static void
+tunable_mbinit(void *dummy)
+{
+ /*
+ * This has to be done before VM init.
+ */
+ if (nmbufs < nmbclusters * 2)
+ nmbufs = nmbclusters * 2;
+ if (nmbcnt < nmbclusters + nsfbufs)
+ nmbcnt = nmbclusters + nsfbufs;
+
+ return;
+}
+SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL);
+
+/*
+ * The freelist structures and mutex locks. The number statically declared
+ * here depends on the number of CPUs.
+ *
+ * We setup in such a way that all the objects (mbufs, clusters)
+ * share the same mutex lock. It has been established that we do not benefit
+ * from different locks for different objects, so we use the same lock,
+ * regardless of object type.
+ */
+struct mb_lstmngr {
+ struct mb_gen_list *ml_genlist;
+ struct mb_pcpu_list *ml_cntlst[NCPU];
+ struct mb_bucket **ml_btable;
+ vm_map_t ml_map;
+ vm_offset_t ml_mapbase;
+ vm_offset_t ml_maptop;
+ int ml_mapfull;
+ u_int ml_objsize;
+ u_int *ml_wmhigh;
+};
+struct mb_lstmngr mb_list_mbuf, mb_list_clust;
+struct mtx mbuf_gen, mbuf_pcpu[NCPU];
+
+/*
+ * Local macros for internal allocator structure manipulations.
+ */
+#define MB_GET_PCPU_LIST(mb_lst) (mb_lst)->ml_cntlst[PCPU_GET(cpuid)]
+
+#define MB_GET_PCPU_LIST_NUM(mb_lst, num) (mb_lst)->ml_cntlst[(num)]
+
+#define MB_GET_GEN_LIST(mb_lst) (mb_lst)->ml_genlist
+
+#define MB_LOCK_CONT(mb_cnt) mtx_lock((mb_cnt)->mb_cont.mc_lock)
+
+#define MB_UNLOCK_CONT(mb_cnt) mtx_unlock((mb_cnt)->mb_cont.mc_lock)
+
+#define MB_BUCKET_INDX(mb_obj, mb_lst) \
+ (int)(((caddr_t)(mb_obj) - (caddr_t)(mb_lst)->ml_mapbase) / PAGE_SIZE)
+
+#define MB_GET_OBJECT(mb_objp, mb_bckt, mb_lst) \
+{ \
+ struct mc_buckethd *_mchd = &((mb_lst)->mb_cont.mc_bhead); \
+ \
+ (mb_bckt)->mb_numfree--; \
+ (mb_objp) = (mb_bckt)->mb_free[((mb_bckt)->mb_numfree)]; \
+ (*((mb_lst)->mb_cont.mc_objcount))--; \
+ if ((mb_bckt)->mb_numfree == 0) { \
+ SLIST_REMOVE_HEAD(_mchd, mb_blist); \
+ SLIST_NEXT((mb_bckt), mb_blist) = NULL; \
+ (mb_bckt)->mb_owner |= MB_BUCKET_FREE; \
+ } \
+}
+
+#define MB_PUT_OBJECT(mb_objp, mb_bckt, mb_lst) \
+ (mb_bckt)->mb_free[((mb_bckt)->mb_numfree)] = (mb_objp); \
+ (mb_bckt)->mb_numfree++; \
+ (*((mb_lst)->mb_cont.mc_objcount))++;
+
+/*
+ * Ownership of buckets/containers is represented by integers. The PCPU
+ * lists range from 0 to NCPU-1. We need a free numerical id for the general
+ * list (we use NCPU). We also need a non-conflicting free bit to indicate
+ * that the bucket is free and removed from a container, while not losing
+ * the bucket's originating container id. We use the highest bit
+ * for the free marker.
+ */
+#define MB_GENLIST_OWNER (NCPU)
+#define MB_BUCKET_FREE (1 << (sizeof(int) * 8 - 1))
+
+/*
+ * sysctl(8) exported objects
+ */
+struct mbstat mbstat; /* General stats + infos. */
+struct mbpstat mb_statpcpu[NCPU+1]; /* PCPU + Gen. container alloc stats */
+int mbuf_wait = 64; /* Sleep time for wait code (ticks) */
+u_int mbuf_limit = 512; /* Upper lim. on # of mbufs per CPU */
+u_int clust_limit = 128; /* Upper lim. on # of clusts per CPU */
+SYSCTL_DECL(_kern_ipc);
+SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RD, &nmbclusters, 0,
+ "Maximum number of mbuf clusters available");
+SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
+ "Maximum number of mbufs available");
+SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0,
+ "Number used to scale kmem_map to ensure sufficient space for counters");
+SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RD, &nsfbufs, 0,
+ "Maximum number of sendfile(2) sf_bufs available");
+SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, &mbuf_wait, 0,
+ "Sleep time of mbuf subsystem wait allocations during exhaustion");
+SYSCTL_UINT(_kern_ipc, OID_AUTO, mbuf_limit, CTLFLAG_RW, &mbuf_limit, 0,
+ "Upper limit of number of mbufs allowed on each PCPU list");
+SYSCTL_UINT(_kern_ipc, OID_AUTO, clust_limit, CTLFLAG_RW, &clust_limit, 0,
+ "Upper limit of number of mbuf clusters allowed on each PCPU list");
+SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
+ "Mbuf general information and statistics");
+SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mb_statpcpu, CTLFLAG_RD, mb_statpcpu,
+ sizeof(mb_statpcpu), "S,", "Mbuf allocator per CPU statistics");
+
+/*
+ * Prototypes of local allocator routines.
+ */
+static __inline void *mb_alloc(struct mb_lstmngr *, int);
+void *mb_alloc_wait(struct mb_lstmngr *);
+static __inline void mb_free(struct mb_lstmngr *, void *);
+static void mb_init(void *);
+struct mb_bucket *mb_pop_cont(struct mb_lstmngr *, int,
+ struct mb_pcpu_list *);
+void mb_reclaim(void);
+
+/*
+ * Initial allocation numbers. Each parameter represents the number of buckets
+ * of each object that will be placed initially in each PCPU container for
+ * said object.
+ */
+#define NMB_MBUF_INIT 4
+#define NMB_CLUST_INIT 16
+
+/*
+ * Initialize the mbuf subsystem.
+ *
+ * We sub-divide the kmem_map into several submaps; this way, we don't have
+ * to worry about artificially limiting the number of mbuf or mbuf cluster
+ * allocations, due to fear of one type of allocation "stealing" address
+ * space initially reserved for another.
+ *
+ * Setup both the general containers and all the PCPU containers. Populate
+ * the PCPU containers with initial numbers.
+ */
+MALLOC_DEFINE(M_MBUF, "mbufmgr", "mbuf subsystem management structures");
+SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mb_init, NULL)
+void
+mb_init(void *dummy)
+{
+ struct mb_pcpu_list *pcpu_cnt;
+ vm_size_t mb_map_size;
+ int i, j;
+
+ /*
+ * Setup all the submaps, for each type of object that we deal
+ * with in this allocator.
+ */
+ mb_map_size = (vm_size_t)(nmbufs * MSIZE);
+ mb_map_size = rounddown(mb_map_size, PAGE_SIZE);
+ mb_list_mbuf.ml_btable = malloc((unsigned long)mb_map_size / PAGE_SIZE *
+ sizeof(struct mb_bucket *), M_MBUF, M_NOWAIT);
+ if (mb_list_mbuf.ml_btable == NULL)
+ goto bad;
+ mb_list_mbuf.ml_map = kmem_suballoc(kmem_map,&(mb_list_mbuf.ml_mapbase),
+ &(mb_list_mbuf.ml_maptop), mb_map_size);
+ mb_list_mbuf.ml_mapfull = 0;
+ mb_list_mbuf.ml_objsize = MSIZE;
+ mb_list_mbuf.ml_wmhigh = &mbuf_limit;
+
+ mb_map_size = (vm_size_t)(nmbclusters * MCLBYTES);
+ mb_map_size = rounddown(mb_map_size, PAGE_SIZE);
+ mb_list_clust.ml_btable = malloc((unsigned long)mb_map_size / PAGE_SIZE
+ * sizeof(struct mb_bucket *), M_MBUF, M_NOWAIT);
+ if (mb_list_clust.ml_btable == NULL)
+ goto bad;
+ mb_list_clust.ml_map = kmem_suballoc(kmem_map,
+ &(mb_list_clust.ml_mapbase), &(mb_list_clust.ml_maptop),
+ mb_map_size);
+ mb_list_clust.ml_mapfull = 0;
+ mb_list_clust.ml_objsize = MCLBYTES;
+ mb_list_clust.ml_wmhigh = &clust_limit;
+
+ /* XXX XXX XXX: mbuf_map->system_map = clust_map->system_map = 1 */
+
+ /*
+ * Allocate required general (global) containers for each object type.
+ */
+ mb_list_mbuf.ml_genlist = malloc(sizeof(struct mb_gen_list), M_MBUF,
+ M_NOWAIT);
+ mb_list_clust.ml_genlist = malloc(sizeof(struct mb_gen_list), M_MBUF,
+ M_NOWAIT);
+ if ((mb_list_mbuf.ml_genlist == NULL) ||
+ (mb_list_clust.ml_genlist == NULL))
+ goto bad;
+
+ /*
+ * Initialize condition variables and general container mutex locks.
+ */
+ mtx_init(&mbuf_gen, "mbuf subsystem general lists lock", 0);
+ cv_init(&(mb_list_mbuf.ml_genlist->mgl_mstarved), "mbuf pool starved");
+ cv_init(&(mb_list_clust.ml_genlist->mgl_mstarved),
+ "mcluster pool starved");
+ mb_list_mbuf.ml_genlist->mb_cont.mc_lock =
+ mb_list_clust.ml_genlist->mb_cont.mc_lock = &mbuf_gen;
+
+ /*
+ * Setup the general containers for each object.
+ */
+ mb_list_mbuf.ml_genlist->mb_cont.mc_numowner =
+ mb_list_clust.ml_genlist->mb_cont.mc_numowner = MB_GENLIST_OWNER;
+ mb_list_mbuf.ml_genlist->mb_cont.mc_starved =
+ mb_list_clust.ml_genlist->mb_cont.mc_starved = 0;
+ mb_list_mbuf.ml_genlist->mb_cont.mc_objcount =
+ &(mb_statpcpu[MB_GENLIST_OWNER].mb_mbfree);
+ mb_list_clust.ml_genlist->mb_cont.mc_objcount =
+ &(mb_statpcpu[MB_GENLIST_OWNER].mb_clfree);
+ mb_list_mbuf.ml_genlist->mb_cont.mc_numpgs =
+ &(mb_statpcpu[MB_GENLIST_OWNER].mb_mbpgs);
+ mb_list_clust.ml_genlist->mb_cont.mc_numpgs =
+ &(mb_statpcpu[MB_GENLIST_OWNER].mb_clpgs);
+ SLIST_INIT(&(mb_list_mbuf.ml_genlist->mb_cont.mc_bhead));
+ SLIST_INIT(&(mb_list_clust.ml_genlist->mb_cont.mc_bhead));
+
+ /*
+ * Initialize general mbuf statistics
+ */
+ mbstat.m_msize = MSIZE;
+ mbstat.m_mclbytes = MCLBYTES;
+ mbstat.m_minclsize = MINCLSIZE;
+ mbstat.m_mlen = MLEN;
+ mbstat.m_mhlen = MHLEN;
+
+ /*
+ * Allocate and initialize PCPU containers.
+ */
+ for (i = 0; i < mp_ncpus; i++) {
+ mb_list_mbuf.ml_cntlst[i] = malloc(sizeof(struct mb_pcpu_list),
+ M_MBUF, M_NOWAIT);
+ mb_list_clust.ml_cntlst[i] = malloc(sizeof(struct mb_pcpu_list),
+ M_MBUF, M_NOWAIT);
+ if ((mb_list_mbuf.ml_cntlst[i] == NULL) ||
+ (mb_list_clust.ml_cntlst[i] == NULL))
+ goto bad;
+
+ mtx_init(&mbuf_pcpu[i], "mbuf PCPU list lock", 0);
+ mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_lock =
+ mb_list_clust.ml_cntlst[i]->mb_cont.mc_lock = &mbuf_pcpu[i];
+
+ mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_numowner =
+ mb_list_clust.ml_cntlst[i]->mb_cont.mc_numowner = i;
+ mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_starved =
+ mb_list_clust.ml_cntlst[i]->mb_cont.mc_starved = 0;
+ mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_objcount =
+ &(mb_statpcpu[i].mb_mbfree);
+ mb_list_clust.ml_cntlst[i]->mb_cont.mc_objcount =
+ &(mb_statpcpu[i].mb_clfree);
+ mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_numpgs =
+ &(mb_statpcpu[i].mb_mbpgs);
+ mb_list_clust.ml_cntlst[i]->mb_cont.mc_numpgs =
+ &(mb_statpcpu[i].mb_clpgs);
+
+ SLIST_INIT(&(mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_bhead));
+ SLIST_INIT(&(mb_list_clust.ml_cntlst[i]->mb_cont.mc_bhead));
+
+ /*
+ * Perform initial allocations.
+ */
+ pcpu_cnt = MB_GET_PCPU_LIST_NUM(&mb_list_mbuf, i);
+ MB_LOCK_CONT(pcpu_cnt);
+ for (j = 0; j < NMB_MBUF_INIT; j++) {
+ if (mb_pop_cont(&mb_list_mbuf, M_DONTWAIT, pcpu_cnt)
+ == NULL)
+ goto bad;
+ }
+ MB_UNLOCK_CONT(pcpu_cnt);
+
+ pcpu_cnt = MB_GET_PCPU_LIST_NUM(&mb_list_clust, i);
+ MB_LOCK_CONT(pcpu_cnt);
+ for (j = 0; j < NMB_CLUST_INIT; j++) {
+ if (mb_pop_cont(&mb_list_clust, M_DONTWAIT, pcpu_cnt)
+ == NULL)
+ goto bad;
+ }
+ MB_UNLOCK_CONT(pcpu_cnt);
+ }
+
+ return;
+bad:
+ panic("mb_init(): failed to initialize mbuf subsystem!");
+}
+
+/*
+ * Populate a given mbuf PCPU container with a bucket full of fresh new
+ * buffers. Return a pointer to the new bucket (already in the container if
+ * successful), or return NULL on failure.
+ *
+ * LOCKING NOTES:
+ * PCPU container lock must be held when this is called.
+ * The lock is dropped here so that we can cleanly call the underlying VM
+ * code. If we fail, we return with no locks held. If we succeed (i.e. return
+ * non-NULL), we return with the PCPU lock held, ready for allocation from
+ * the returned bucket.
+ */
+struct mb_bucket *
+mb_pop_cont(struct mb_lstmngr *mb_list, int how, struct mb_pcpu_list *cnt_lst)
+{
+ struct mb_bucket *bucket;
+ caddr_t p;
+ int i;
+
+ MB_UNLOCK_CONT(cnt_lst);
+ /*
+ * If our object's (finite) map is starved now (i.e. no more address
+ * space), bail out now.
+ */
+ if (mb_list->ml_mapfull)
+ return (NULL);
+
+ bucket = malloc(sizeof(struct mb_bucket) +
+ PAGE_SIZE / mb_list->ml_objsize * sizeof(void *), M_MBUF,
+ how == M_TRYWAIT ? M_WAITOK : M_NOWAIT);
+ if (bucket == NULL)
+ return (NULL);
+
+ p = (caddr_t)kmem_malloc(mb_list->ml_map, PAGE_SIZE,
+ how == M_TRYWAIT ? M_WAITOK : M_NOWAIT);
+ if (p == NULL) {
+ free(bucket, M_MBUF);
+ return (NULL);
+ }
+
+ bucket->mb_numfree = 0;
+ mb_list->ml_btable[MB_BUCKET_INDX(p, mb_list)] = bucket;
+ for (i = 0; i < (PAGE_SIZE / mb_list->ml_objsize); i++) {
+ bucket->mb_free[i] = p;
+ bucket->mb_numfree++;
+ p += mb_list->ml_objsize;
+ }
+
+ MB_LOCK_CONT(cnt_lst);
+ bucket->mb_owner = cnt_lst->mb_cont.mc_numowner;
+ SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead), bucket, mb_blist);
+ (*(cnt_lst->mb_cont.mc_numpgs))++;
+ *(cnt_lst->mb_cont.mc_objcount) += bucket->mb_numfree;
+
+ return (bucket);
+}
+
+/*
+ * Allocate an mbuf-subsystem type object.
+ * The general case is very easy. Complications only arise if our PCPU
+ * container is empty. Things get worse if the PCPU container is empty,
+ * the general container is empty, and we've run out of address space
+ * in our map; then we try to block if we're willing to (M_TRYWAIT).
+ */
+static __inline
+void *
+mb_alloc(struct mb_lstmngr *mb_list, int how)
+{
+ struct mb_pcpu_list *cnt_lst;
+ struct mb_bucket *bucket;
+ void *m;
+
+ m = NULL;
+ cnt_lst = MB_GET_PCPU_LIST(mb_list);
+ MB_LOCK_CONT(cnt_lst);
+
+ if ((bucket = SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead))) != NULL) {
+ /*
+ * This is the easy allocation case. We just grab an object
+ * from a bucket in the PCPU container. At worst, we
+ * have just emptied the bucket and so we remove it
+ * from the container.
+ */
+ MB_GET_OBJECT(m, bucket, cnt_lst);
+ MB_UNLOCK_CONT(cnt_lst);
+ } else {
+ struct mb_gen_list *gen_list;
+
+ /*
+ * This is the less-common more difficult case. We must
+ * first verify if the general list has anything for us
+ * and if that also fails, we must allocate a page from
+ * the map and create a new bucket to place in our PCPU
+ * container (already locked). If the map is starved then
+ * we're really in for trouble, as we have to wait on
+ * the general container's condition variable.
+ */
+ gen_list = MB_GET_GEN_LIST(mb_list);
+ MB_LOCK_CONT(gen_list);
+
+ if ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead)))
+ != NULL) {
+ /*
+ * Give ownership of the bucket to our CPU's
+ * container, but only actually put the bucket
+ * in the container if it doesn't become free
+ * upon removing an mbuf from it.
+ */
+ SLIST_REMOVE_HEAD(&(gen_list->mb_cont.mc_bhead),
+ mb_blist);
+ bucket->mb_owner = cnt_lst->mb_cont.mc_numowner;
+ (*(gen_list->mb_cont.mc_numpgs))--;
+ (*(cnt_lst->mb_cont.mc_numpgs))++;
+ *(gen_list->mb_cont.mc_objcount) -= bucket->mb_numfree;
+ bucket->mb_numfree--;
+ m = bucket->mb_free[(bucket->mb_numfree)];
+ if (bucket->mb_numfree == 0) {
+ SLIST_NEXT(bucket, mb_blist) = NULL;
+ bucket->mb_owner |= MB_BUCKET_FREE;
+ } else {
+ SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead),
+ bucket, mb_blist);
+ *(cnt_lst->mb_cont.mc_objcount) +=
+ bucket->mb_numfree;
+ }
+ MB_UNLOCK_CONT(gen_list);
+ MB_UNLOCK_CONT(cnt_lst);
+ } else {
+ /*
+ * We'll have to allocate a new page.
+ */
+ MB_UNLOCK_CONT(gen_list);
+ bucket = mb_pop_cont(mb_list, how, cnt_lst);
+ if (bucket != NULL) {
+ bucket->mb_numfree--;
+ m = bucket->mb_free[(bucket->mb_numfree)];
+ (*(cnt_lst->mb_cont.mc_objcount))--;
+ MB_UNLOCK_CONT(cnt_lst);
+ } else {
+ if (how == M_TRYWAIT) {
+ /*
+ * Absolute worst-case scenario. We block if
+ * we're willing to, but only after trying to
+ * steal from other lists.
+ */
+ mb_list->ml_mapfull = 1;
+ m = mb_alloc_wait(mb_list);
+ } else
+ /* XXX: No consistency. */
+ mbstat.m_drops++;
+ }
+ }
+ }
+
+ return (m);
+}
+
+/*
+ * This is the worst-case scenario called only if we're allocating with
+ * M_TRYWAIT. We first drain all the protocols, then try to find an mbuf
+ * by looking in every PCPU container. If we're still unsuccesful, we
+ * try the general container one last time and possibly block on our
+ * starved cv.
+ */
+void *
+mb_alloc_wait(struct mb_lstmngr *mb_list)
+{
+ struct mb_pcpu_list *cnt_lst;
+ struct mb_gen_list *gen_list;
+ struct mb_bucket *bucket;
+ void *m;
+ int i, cv_ret;
+
+ /*
+ * Try to reclaim mbuf-related objects (mbufs, clusters).
+ */
+ mb_reclaim();
+
+ /*
+ * Cycle all the PCPU containers. Increment starved counts if found
+ * empty.
+ */
+ for (i = 0; i < mp_ncpus; i++) {
+ cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, i);
+ MB_LOCK_CONT(cnt_lst);
+
+ /*
+ * If container is non-empty, get a single object from it.
+ * If empty, increment starved count.
+ */
+ if ((bucket = SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead))) !=
+ NULL) {
+ MB_GET_OBJECT(m, bucket, cnt_lst);
+ MB_UNLOCK_CONT(cnt_lst);
+ mbstat.m_wait++; /* XXX: No consistency. */
+ return (m);
+ } else
+ cnt_lst->mb_cont.mc_starved++;
+
+ MB_UNLOCK_CONT(cnt_lst);
+ }
+
+ /*
+ * We're still here, so that means it's time to get the general
+ * container lock, check it one more time (now that mb_reclaim()
+ * has been called) and if we still get nothing, block on the cv.
+ */
+ gen_list = MB_GET_GEN_LIST(mb_list);
+ MB_LOCK_CONT(gen_list);
+ if ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead))) != NULL) {
+ MB_GET_OBJECT(m, bucket, gen_list);
+ MB_UNLOCK_CONT(gen_list);
+ mbstat.m_wait++; /* XXX: No consistency. */
+ return (m);
+ }
+
+ gen_list->mb_cont.mc_starved++;
+ cv_ret = cv_timedwait(&(gen_list->mgl_mstarved),
+ gen_list->mb_cont.mc_lock, mbuf_wait);
+ gen_list->mb_cont.mc_starved--;
+
+ if ((cv_ret == 0) &&
+ ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead))) != NULL)) {
+ MB_GET_OBJECT(m, bucket, gen_list);
+ mbstat.m_wait++; /* XXX: No consistency. */
+ } else {
+ mbstat.m_drops++; /* XXX: No consistency. */
+ m = NULL;
+ }
+
+ MB_UNLOCK_CONT(gen_list);
+
+ return (m);
+}
+
+/*
+ * Free an object to its rightful container.
+ * In the very general case, this operation is really very easy.
+ * Complications arise primarily if:
+ * (a) We've hit the high limit on number of free objects allowed in
+ * our PCPU container.
+ * (b) We're in a critical situation where our container has been
+ * marked 'starved' and we need to issue wakeups on the starved
+ * condition variable.
+ * (c) Minor (odd) cases: our bucket has migrated while we were
+ * waiting for the lock; our bucket is in the general container;
+ * our bucket is empty.
+ */
+static __inline
+void
+mb_free(struct mb_lstmngr *mb_list, void *m)
+{
+ struct mb_pcpu_list *cnt_lst;
+ struct mb_gen_list *gen_list;
+ struct mb_bucket *bucket;
+ u_int owner;
+
+ bucket = mb_list->ml_btable[MB_BUCKET_INDX(m, mb_list)];
+
+ /*
+ * Make sure that if after we lock the bucket's present container the
+ * bucket has migrated, that we drop the lock and get the new one.
+ */
+retry_lock:
+ owner = bucket->mb_owner & ~MB_BUCKET_FREE;
+ switch (owner) {
+ case MB_GENLIST_OWNER:
+ gen_list = MB_GET_GEN_LIST(mb_list);
+ MB_LOCK_CONT(gen_list);
+ if (owner != (bucket->mb_owner & ~MB_BUCKET_FREE)) {
+ MB_UNLOCK_CONT(gen_list);
+ goto retry_lock;
+ }
+
+ /*
+ * If we're intended for the general container, this is
+ * real easy: no migrating required. The only `bogon'
+ * is that we're now contending with all the threads
+ * dealing with the general list, but this is expected.
+ */
+ MB_PUT_OBJECT(m, bucket, gen_list);
+ if (gen_list->mb_cont.mc_starved > 0)
+ cv_signal(&(gen_list->mgl_mstarved));
+ MB_UNLOCK_CONT(gen_list);
+ break;
+
+ default:
+ cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, owner);
+ MB_LOCK_CONT(cnt_lst);
+ if (owner != (bucket->mb_owner & ~MB_BUCKET_FREE)) {
+ MB_UNLOCK_CONT(cnt_lst);
+ goto retry_lock;
+ }
+
+ MB_PUT_OBJECT(m, bucket, cnt_lst);
+
+ if (cnt_lst->mb_cont.mc_starved > 0) {
+ /*
+ * This is a tough case. It means that we've
+ * been flagged at least once to indicate that
+ * we're empty, and that the system is in a critical
+ * situation, so we ought to migrate at least one
+ * bucket over to the general container.
+ * There may or may not be a thread blocking on
+ * the starved condition variable, but chances
+ * are that one will eventually come up soon so
+ * it's better to migrate now than never.
+ */
+ gen_list = MB_GET_GEN_LIST(mb_list);
+ MB_LOCK_CONT(gen_list);
+ KASSERT((bucket->mb_owner & MB_BUCKET_FREE) != 0,
+ ("mb_free: corrupt bucket %p\n", bucket));
+ SLIST_INSERT_HEAD(&(gen_list->mb_cont.mc_bhead),
+ bucket, mb_blist);
+ bucket->mb_owner = MB_GENLIST_OWNER;
+ (*(cnt_lst->mb_cont.mc_objcount))--;
+ (*(gen_list->mb_cont.mc_objcount))++;
+ (*(cnt_lst->mb_cont.mc_numpgs))--;
+ (*(gen_list->mb_cont.mc_numpgs))++;
+
+ /*
+ * Determine whether or not to keep transferring
+ * buckets to the general list or whether we've
+ * transferred enough already.
+ * We realize that although we may flag another
+ * bucket to be migrated to the general container
+ * that in the meantime, the thread that was
+ * blocked on the cv is already woken up and
+ * long gone. But in that case, the worst
+ * consequence is that we will end up migrating
+ * one bucket too many, which is really not a big
+ * deal, especially if we're close to a critical
+ * situation.
+ */
+ if (gen_list->mb_cont.mc_starved > 0) {
+ cnt_lst->mb_cont.mc_starved--;
+ cv_signal(&(gen_list->mgl_mstarved));
+ } else
+ cnt_lst->mb_cont.mc_starved = 0;
+
+ MB_UNLOCK_CONT(gen_list);
+ MB_UNLOCK_CONT(cnt_lst);
+ break;
+ }
+
+ if (*(cnt_lst->mb_cont.mc_objcount) > *(mb_list->ml_wmhigh)) {
+ /*
+ * We've hit the high limit of allowed numbers of mbufs
+ * on this PCPU list. We must now migrate a bucket
+ * over to the general container.
+ */
+ gen_list = MB_GET_GEN_LIST(mb_list);
+ MB_LOCK_CONT(gen_list);
+ if ((bucket->mb_owner & MB_BUCKET_FREE) == 0) {
+ bucket =
+ SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead));
+ SLIST_REMOVE_HEAD(&(cnt_lst->mb_cont.mc_bhead),
+ mb_blist);
+ }
+ SLIST_INSERT_HEAD(&(gen_list->mb_cont.mc_bhead),
+ bucket, mb_blist);
+ bucket->mb_owner = MB_GENLIST_OWNER;
+ *(cnt_lst->mb_cont.mc_objcount) -= bucket->mb_numfree;
+ *(gen_list->mb_cont.mc_objcount) += bucket->mb_numfree;
+ (*(cnt_lst->mb_cont.mc_numpgs))--;
+ (*(gen_list->mb_cont.mc_numpgs))++;
+
+ MB_UNLOCK_CONT(gen_list);
+ MB_UNLOCK_CONT(cnt_lst);
+ break;
+ }
+
+ if (bucket->mb_owner & MB_BUCKET_FREE) {
+ SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead),
+ bucket, mb_blist);
+ bucket->mb_owner = cnt_lst->mb_cont.mc_numowner;
+ }
+
+ MB_UNLOCK_CONT(cnt_lst);
+ break;
+ }
+
+ return;
+}
+
+/*
+ * Drain protocols in hopes to free up some resources.
+ *
+ * LOCKING NOTES:
+ * No locks should be held when this is called. The drain routines have to
+ * presently acquire some locks which raises the possibility of lock order
+ * violation if we're holding any mutex if that mutex is acquired in reverse
+ * order relative to one of the locks in the drain routines.
+ */
+void
+mb_reclaim(void)
+{
+ struct domain *dp;
+ struct protosw *pr;
+
+/*
+ * XXX: Argh, we almost always trip here with witness turned on now-a-days
+ * XXX: because we often come in with Giant held. For now, there's no way
+ * XXX: to avoid this.
+ */
+#ifdef WITNESS
+ KASSERT(witness_list(curproc) == 0,
+ ("mb_reclaim() called with locks held"));
+#endif
+
+ mbstat.m_drain++; /* XXX: No consistency. */
+
+ for (dp = domains; dp; dp = dp->dom_next)
+ for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+ if (pr->pr_drain)
+ (*pr->pr_drain)();
+
+}
+
+/*
+ * Local mbuf & cluster alloc macros and routines.
+ * Local macro and function names begin with an underscore ("_").
+ */
+void _mext_free(struct mbuf *);
+void _mclfree(struct mbuf *);
+
+#define _m_get(m, how, type) do { \
+ (m) = (struct mbuf *)mb_alloc(&mb_list_mbuf, (how)); \
+ if ((m) != NULL) { \
+ (m)->m_type = (type); \
+ (m)->m_next = NULL; \
+ (m)->m_nextpkt = NULL; \
+ (m)->m_data = (m)->m_dat; \
+ (m)->m_flags = 0; \
+ } \
+} while (0)
+
+#define _m_gethdr(m, how, type) do { \
+ (m) = (struct mbuf *)mb_alloc(&mb_list_mbuf, (how)); \
+ if ((m) != NULL) { \
+ (m)->m_type = (type); \
+ (m)->m_next = NULL; \
+ (m)->m_nextpkt = NULL; \
+ (m)->m_data = (m)->m_pktdat; \
+ (m)->m_flags = M_PKTHDR; \
+ (m)->m_pkthdr.rcvif = NULL; \
+ (m)->m_pkthdr.csum_flags = 0; \
+ (m)->m_pkthdr.aux = NULL; \
+ } \
+} while (0)
+
+/* XXX: Check for M_PKTHDR && m_pkthdr.aux is bogus... please fix (see KAME) */
+#define _m_free(m, n) do { \
+ (n) = (m)->m_next; \
+ if ((m)->m_flags & M_EXT) \
+ MEXTFREE((m)); \
+ if (((m)->m_flags & M_PKTHDR) != 0 && (m)->m_pkthdr.aux) { \
+ m_freem((m)->m_pkthdr.aux); \
+ (m)->m_pkthdr.aux = NULL; \
+ } \
+ mb_free(&mb_list_mbuf, (m)); \
+} while (0)
+
+#define _mext_init_ref(m) do { \
+ (m)->m_ext.ref_cnt = malloc(sizeof(u_int), M_MBUF, M_NOWAIT); \
+ if ((m)->m_ext.ref_cnt != NULL) { \
+ *((m)->m_ext.ref_cnt) = 0; \
+ MEXT_ADD_REF((m)); \
+ } \
+} while (0)
+
+#define _mext_dealloc_ref(m) \
+ free((m)->m_ext.ref_cnt, M_MBUF)
+
+void
+_mext_free(struct mbuf *mb)
+{
+
+ if (mb->m_ext.ext_type == EXT_CLUSTER)
+ mb_free(&mb_list_clust, (caddr_t)mb->m_ext.ext_buf);
+ else
+ (*(mb->m_ext.ext_free))(mb->m_ext.ext_buf, mb->m_ext.ext_args);
+
+ _mext_dealloc_ref(mb);
+ return;
+}
+
+/* We only include this here to avoid making m_clget() excessively large
+ * due to too much inlined code. */
+void
+_mclfree(struct mbuf *mb)
+{
+
+ mb_free(&mb_list_clust, (caddr_t)mb->m_ext.ext_buf);
+ mb->m_ext.ext_buf = NULL;
+ return;
+}
+
+/*
+ * Exported space allocation and de-allocation routines.
+ */
+struct mbuf *
+m_get(int how, int type)
+{
+ struct mbuf *mb;
+
+ _m_get(mb, how, type);
+ return (mb);
+}
+
+struct mbuf *
+m_gethdr(int how, int type)
+{
+ struct mbuf *mb;
+
+ _m_gethdr(mb, how, type);
+ return (mb);
+}
+
+struct mbuf *
+m_get_clrd(int how, int type)
+{
+ struct mbuf *mb;
+
+ _m_get(mb, how, type);
+
+ if (mb != NULL)
+ bzero(mtod(mb, caddr_t), MLEN);
+
+ return (mb);
+}
+
+struct mbuf *
+m_gethdr_clrd(int how, int type)
+{
+ struct mbuf *mb;
+
+ _m_gethdr(mb, how, type);
+
+ if (mb != NULL)
+ bzero(mtod(mb, caddr_t), MHLEN);
+
+ return (mb);
+}
+
+struct mbuf *
+m_free(struct mbuf *mb)
+{
+ struct mbuf *nb;
+
+ _m_free(mb, nb);
+ return (nb);
+}
+
+void
+m_clget(struct mbuf *mb, int how)
+{
+
+ mb->m_ext.ext_buf = (caddr_t)mb_alloc(&mb_list_clust, how);
+ if (mb->m_ext.ext_buf != NULL) {
+ _mext_init_ref(mb);
+ if (mb->m_ext.ref_cnt == NULL)
+ _mclfree(mb);
+ else {
+ mb->m_data = mb->m_ext.ext_buf;
+ mb->m_flags |= M_EXT;
+ mb->m_ext.ext_free = NULL;
+ mb->m_ext.ext_args = NULL;
+ mb->m_ext.ext_size = MCLBYTES;
+ mb->m_ext.ext_type = EXT_CLUSTER;
+ }
+ }
+ return;
+}
+
+void
+m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
+ void (*freef)(caddr_t, void *), void *args, short flags, int type)
+{
+
+ _mext_init_ref(mb);
+ if (mb->m_ext.ref_cnt != NULL) {
+ mb->m_flags |= (M_EXT | flags);
+ mb->m_ext.ext_buf = buf;
+ mb->m_data = mb->m_ext.ext_buf;
+ mb->m_ext.ext_size = size;
+ mb->m_ext.ext_free = freef;
+ mb->m_ext.ext_args = args;
+ mb->m_ext.ext_type = type;
+ }
+ return;
+}
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
index 333fdc3..704e3f5 100644
--- a/sys/kern/subr_param.c
+++ b/sys/kern/subr_param.c
@@ -64,17 +64,10 @@ int tickadj = howmany(30000, 60 * HZ); /* can adjust 30ms in 60s */
#define MAXFILES (NPROC*2)
#endif
int maxproc = NPROC; /* maximum # of processes */
-int maxprocperuid = NPROC-1; /* maximum # of processes per user */
-int maxfiles = MAXFILES; /* system wide open files limit */
-int maxfilesperproc = MAXFILES; /* per-process open files limit */
+int maxprocperuid = NPROC-1; /* max # of procs per user */
+int maxfiles = MAXFILES; /* sys. wide open files limit */
+int maxfilesperproc = MAXFILES; /* per-proc open files limit */
int ncallout = 16 + NPROC + MAXFILES; /* maximum # of timer events */
-int mbuf_wait = 32; /* mbuf sleep time in ticks */
-
-/* maximum # of sf_bufs (sendfile(2) zero-copy virtual buffers) */
-#ifndef NSFBUFS
-#define NSFBUFS (512 + MAXUSERS * 16)
-#endif
-int nsfbufs = NSFBUFS;
/*
* These may be set to nonzero here or by patching.
diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c
index a4d3674..d68299c 100644
--- a/sys/kern/uipc_mbuf.c
+++ b/sys/kern/uipc_mbuf.c
@@ -37,48 +37,18 @@
#include "opt_param.h"
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/condvar.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
-#include <sys/mutex.h>
#include <sys/sysctl.h>
#include <sys/domain.h>
#include <sys/protosw.h>
-#include <vm/vm.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_extern.h>
-
-#ifndef NMBCLUSTERS
-#define NMBCLUSTERS (512 + MAXUSERS * 16)
-#endif
-
-static void mbinit(void *);
-SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
-
-struct mbuf *mbutl;
-struct mbstat mbstat;
-u_long mbtypes[MT_NTYPES];
int max_linkhdr;
int max_protohdr;
int max_hdr;
int max_datalen;
-int nmbclusters = NMBCLUSTERS;
-int nmbufs = NMBCLUSTERS * 4;
-int nmbcnt;
-u_long m_mballoc_wid = 0;
-u_long m_clalloc_wid = 0;
-
-/*
- * freelist header structures...
- * mbffree_lst, mclfree_lst, mcntfree_lst
- */
-struct mbffree_lst mmbfree;
-struct mclfree_lst mclfree;
-struct mcntfree_lst mcntfree;
-struct mtx mbuf_mtx;
/*
* sysctl(8) exported objects
@@ -91,419 +61,6 @@ SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
&max_datalen, 0, "");
-SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW,
- &mbuf_wait, 0, "");
-SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, "");
-SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes,
- sizeof(mbtypes), "LU", "");
-SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD,
- &nmbclusters, 0, "Maximum number of mbuf clusters available");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
- "Maximum number of mbufs available");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0,
- "Maximum number of ext_buf counters available");
-
-TUNABLE_INT("kern.ipc.nmbclusters", &nmbclusters);
-TUNABLE_INT("kern.ipc.nmbufs", &nmbufs);
-TUNABLE_INT("kern.ipc.nmbcnt", &nmbcnt);
-
-static void m_reclaim(void);
-
-/* Initial allocation numbers */
-#define NCL_INIT 2
-#define NMB_INIT 16
-#define REF_INIT NMBCLUSTERS
-
-static void
-tunable_mbinit(void *dummy)
-{
-
- /*
- * Sanity checks and pre-initialization for non-constants.
- * This has to be done before VM initialization.
- */
- if (nmbufs < nmbclusters * 2)
- nmbufs = nmbclusters * 2;
- if (nmbcnt == 0)
- nmbcnt = EXT_COUNTERS;
-}
-SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL);
-
-/*
- * Full mbuf subsystem initialization done here.
- *
- * XXX: If ever we have system specific map setups to do, then move them to
- * machdep.c - for now, there is no reason for this stuff to go there.
- */
-static void
-mbinit(void *dummy)
-{
- vm_offset_t maxaddr;
- vm_size_t mb_map_size;
-
- /*
- * Setup the mb_map, allocate requested VM space.
- */
- mb_map_size = (vm_size_t)(nmbufs * MSIZE + nmbclusters * MCLBYTES +
- nmbcnt * sizeof(union mext_refcnt));
- mb_map_size = rounddown(mb_map_size, PAGE_SIZE);
- mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
- mb_map_size);
- /* XXX XXX XXX: mb_map->system_map = 1; */
-
- /*
- * Initialize the free list headers, and setup locks for lists.
- */
- mmbfree.m_head = NULL;
- mclfree.m_head = NULL;
- mcntfree.m_head = NULL;
- mtx_init(&mbuf_mtx, "mbuf free list lock", MTX_DEF);
- cv_init(&mmbfree.m_starved, "mbuf free list starved cv");
- cv_init(&mclfree.m_starved, "mbuf cluster free list starved cv");
-
- /*
- * Initialize mbuf subsystem (sysctl exported) statistics structure.
- */
- mbstat.m_msize = MSIZE;
- mbstat.m_mclbytes = MCLBYTES;
- mbstat.m_minclsize = MINCLSIZE;
- mbstat.m_mlen = MLEN;
- mbstat.m_mhlen = MHLEN;
-
- /*
- * Perform some initial allocations.
- */
- mtx_lock(&mbuf_mtx);
- if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0)
- goto bad;
- if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0)
- goto bad;
- if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
- goto bad;
- mtx_unlock(&mbuf_mtx);
-
- return;
-bad:
- panic("mbinit: failed to initialize mbuf subsystem!");
-}
-
-/*
- * Allocate at least nmb reference count structs and place them
- * on the ref cnt free list.
- *
- * Must be called with the mcntfree lock held.
- */
-int
-m_alloc_ref(u_int nmb, int how)
-{
- caddr_t p;
- u_int nbytes;
- int i;
-
- /*
- * We don't cap the amount of memory that can be used
- * by the reference counters, like we do for mbufs and
- * mbuf clusters. In fact, we're absolutely sure that we
- * won't ever be going over our allocated space. We keep enough
- * space in mb_map to accomodate maximum values of allocatable
- * external buffers including, but not limited to, clusters.
- * (That's also why we won't have to have wait routines for
- * counters).
- *
- * If we're in here, we're absolutely certain to be returning
- * succesfully, as long as there is physical memory to accomodate
- * us. And if there isn't, but we're willing to wait, then
- * kmem_malloc() will do the only waiting needed.
- */
-
- nbytes = round_page(nmb * sizeof(union mext_refcnt));
- if (1 /* XXX: how == M_TRYWAIT */)
- mtx_unlock(&mbuf_mtx);
- if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ?
- M_WAITOK : M_NOWAIT)) == NULL) {
- if (1 /* XXX: how == M_TRYWAIT */)
- mtx_lock(&mbuf_mtx);
- return (0);
- }
- nmb = nbytes / sizeof(union mext_refcnt);
-
- /*
- * We don't let go of the mutex in order to avoid a race.
- * It is up to the caller to let go of the mutex.
- */
- if (1 /* XXX: how == M_TRYWAIT */)
- mtx_lock(&mbuf_mtx);
- for (i = 0; i < nmb; i++) {
- ((union mext_refcnt *)p)->next_ref = mcntfree.m_head;
- mcntfree.m_head = (union mext_refcnt *)p;
- p += sizeof(union mext_refcnt);
- mbstat.m_refree++;
- }
- mbstat.m_refcnt += nmb;
-
- return (1);
-}
-
-/*
- * Allocate at least nmb mbufs and place on mbuf free list.
- *
- * Must be called with the mmbfree lock held.
- */
-int
-m_mballoc(int nmb, int how)
-{
- caddr_t p;
- int i;
- int nbytes;
-
- nbytes = round_page(nmb * MSIZE);
- nmb = nbytes / MSIZE;
-
- /*
- * If we've hit the mbuf limit, stop allocating from mb_map.
- * Also, once we run out of map space, it will be impossible to
- * get any more (nothing is ever freed back to the map).
- */
- if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs))
- return (0);
-
- if (1 /* XXX: how == M_TRYWAIT */)
- mtx_unlock(&mbuf_mtx);
- p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_TRYWAIT ?
- M_WAITOK : M_NOWAIT);
- if (1 /* XXX: how == M_TRYWAIT */) {
- mtx_lock(&mbuf_mtx);
- if (p == NULL)
- mbstat.m_wait++;
- }
-
- /*
- * Either the map is now full, or `how' is M_DONTWAIT and there
- * are no pages left.
- */
- if (p == NULL)
- return (0);
-
- /*
- * We don't let go of the mutex in order to avoid a race.
- * It is up to the caller to let go of the mutex when done
- * with grabbing the mbuf from the free list.
- */
- for (i = 0; i < nmb; i++) {
- ((struct mbuf *)p)->m_next = mmbfree.m_head;
- mmbfree.m_head = (struct mbuf *)p;
- p += MSIZE;
- }
- mbstat.m_mbufs += nmb;
- mbtypes[MT_FREE] += nmb;
- return (1);
-}
-
-/*
- * Once the mb_map has been exhausted and if the call to the allocation macros
- * (or, in some cases, functions) is with M_TRYWAIT, then it is necessary to
- * rely solely on reclaimed mbufs.
- *
- * Here we request for the protocols to free up some resources and, if we
- * still cannot get anything, then we wait for an mbuf to be freed for a
- * designated (mbuf_wait) time, at most.
- *
- * Must be called with the mmbfree mutex held.
- */
-struct mbuf *
-m_mballoc_wait(void)
-{
- struct mbuf *p = NULL;
-
- /*
- * See if we can drain some resources out of the protocols.
- * We drop the mmbfree mutex to avoid recursing into it in some of
- * the drain routines. Clearly, we're faced with a race here because
- * once something is freed during the drain, it may be grabbed right
- * from under us by some other thread. But we accept this possibility
- * in order to avoid a potentially large lock recursion and, more
- * importantly, to avoid a potential lock order reversal which may
- * result in deadlock (See comment above m_reclaim()).
- */
- mtx_unlock(&mbuf_mtx);
- m_reclaim();
-
- mtx_lock(&mbuf_mtx);
- _MGET(p, M_DONTWAIT);
-
- if (p == NULL) {
- int retval;
-
- m_mballoc_wid++;
- retval = cv_timedwait(&mmbfree.m_starved, &mbuf_mtx,
- mbuf_wait);
- m_mballoc_wid--;
-
- /*
- * If we got signaled (i.e. didn't time out), allocate.
- */
- if (retval == 0)
- _MGET(p, M_DONTWAIT);
- }
-
- if (p != NULL) {
- mbstat.m_wait++;
- if (mmbfree.m_head != NULL)
- MBWAKEUP(m_mballoc_wid, &mmbfree.m_starved);
- }
-
- return (p);
-}
-
-/*
- * Allocate some number of mbuf clusters
- * and place on cluster free list.
- *
- * Must be called with the mclfree lock held.
- */
-int
-m_clalloc(int ncl, int how)
-{
- caddr_t p;
- int i;
- int npg_sz;
-
- npg_sz = round_page(ncl * MCLBYTES);
- ncl = npg_sz / MCLBYTES;
-
- /*
- * If the map is now full (nothing will ever be freed to it).
- * If we've hit the mcluster number limit, stop allocating from
- * mb_map.
- */
- if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters))
- return (0);
-
- if (1 /* XXX: how == M_TRYWAIT */)
- mtx_unlock(&mbuf_mtx);
- p = (caddr_t)kmem_malloc(mb_map, npg_sz,
- how == M_TRYWAIT ? M_WAITOK : M_NOWAIT);
- if (1 /* XXX: how == M_TRYWAIT */)
- mtx_lock(&mbuf_mtx);
-
- /*
- * Either the map is now full, or `how' is M_DONTWAIT and there
- * are no pages left.
- */
- if (p == NULL)
- return (0);
-
- for (i = 0; i < ncl; i++) {
- ((union mcluster *)p)->mcl_next = mclfree.m_head;
- mclfree.m_head = (union mcluster *)p;
- p += MCLBYTES;
- mbstat.m_clfree++;
- }
- mbstat.m_clusters += ncl;
- return (1);
-}
-
-/*
- * Once the mb_map submap has been exhausted and the allocation is called with
- * M_TRYWAIT, we rely on the mclfree list. If nothing is free, we will
- * block on a cv for a designated amount of time (mbuf_wait) or until we're
- * signaled due to sudden mcluster availability.
- *
- * Must be called with the mclfree lock held.
- */
-caddr_t
-m_clalloc_wait(void)
-{
- caddr_t p = NULL;
- int retval;
-
- m_clalloc_wid++;
- retval = cv_timedwait(&mclfree.m_starved, &mbuf_mtx, mbuf_wait);
- m_clalloc_wid--;
-
- /*
- * Now that we (think) that we've got something, try again.
- */
- if (retval == 0)
- _MCLALLOC(p, M_DONTWAIT);
-
- if (p != NULL) {
- mbstat.m_wait++;
- if (mclfree.m_head != NULL)
- MBWAKEUP(m_clalloc_wid, &mclfree.m_starved);
- }
-
- return (p);
-}
-
-/*
- * m_reclaim: drain protocols in hopes to free up some resources...
- *
- * XXX: No locks should be held going in here. The drain routines have
- * to presently acquire some locks which raises the possibility of lock
- * order violation if we're holding any mutex if that mutex is acquired in
- * reverse order relative to one of the locks in the drain routines.
- */
-static void
-m_reclaim(void)
-{
- struct domain *dp;
- struct protosw *pr;
-
-#ifdef WITNESS
- KASSERT(witness_list(curproc) == 0,
- ("m_reclaim called with locks held"));
-#endif
-
- for (dp = domains; dp; dp = dp->dom_next)
- for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
- if (pr->pr_drain)
- (*pr->pr_drain)();
- mbstat.m_drain++;
-}
-
-/*
- * Space allocation routines.
- * Some of these are also available as macros
- * for critical paths.
- */
-struct mbuf *
-m_get(int how, int type)
-{
- struct mbuf *m;
-
- MGET(m, how, type);
- return (m);
-}
-
-struct mbuf *
-m_gethdr(int how, int type)
-{
- struct mbuf *m;
-
- MGETHDR(m, how, type);
- return (m);
-}
-
-struct mbuf *
-m_getclr(int how, int type)
-{
- struct mbuf *m;
-
- MGET(m, how, type);
- if (m != NULL)
- bzero(mtod(m, caddr_t), MLEN);
- return (m);
-}
-
-struct mbuf *
-m_free(struct mbuf *m)
-{
- struct mbuf *n;
-
- MFREE(m, n);
- return (n);
-}
/*
* struct mbuf *
@@ -672,17 +229,13 @@ m_copym(struct mbuf *m, int off0, int len, int wait)
m = m->m_next;
np = &n->m_next;
}
- if (top == NULL) {
- mtx_lock(&mbuf_mtx);
- mbstat.m_mcfail++;
- mtx_unlock(&mbuf_mtx);
- }
+ if (top == NULL)
+ mbstat.m_mcfail++; /* XXX: No consistency. */
+
return (top);
nospace:
m_freem(top);
- mtx_lock(&mbuf_mtx);
- mbstat.m_mcfail++;
- mtx_unlock(&mbuf_mtx);
+ mbstat.m_mcfail++; /* XXX: No consistency. */
return (NULL);
}
@@ -741,9 +294,7 @@ m_copypacket(struct mbuf *m, int how)
return top;
nospace:
m_freem(top);
- mtx_lock(&mbuf_mtx);
- mbstat.m_mcfail++;
- mtx_unlock(&mbuf_mtx);
+ mbstat.m_mcfail++; /* XXX: No consistency. */
return (NULL);
}
@@ -844,9 +395,7 @@ m_dup(struct mbuf *m, int how)
nospace:
m_freem(top);
- mtx_lock(&mbuf_mtx);
- mbstat.m_mcfail++;
- mtx_unlock(&mbuf_mtx);
+ mbstat.m_mcfail++; /* XXX: No consistency. */
return (NULL);
}
@@ -1008,9 +557,7 @@ m_pullup(struct mbuf *n, int len)
return (m);
bad:
m_freem(n);
- mtx_lock(&mbuf_mtx);
- mbstat.m_mpfail++;
- mtx_unlock(&mbuf_mtx);
+ mbstat.m_mpfail++; /* XXX: No consistency. */
return (NULL);
}
@@ -1157,7 +704,7 @@ m_copyback(struct mbuf *m0, int off, int len, caddr_t cp)
off -= mlen;
totlen += mlen;
if (m->m_next == NULL) {
- n = m_getclr(M_DONTWAIT, m->m_type);
+ n = m_get_clrd(M_DONTWAIT, m->m_type);
if (n == NULL)
goto out;
n->m_len = min(MLEN, len + off);
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index 0752ca4..ec496d4 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -37,42 +37,20 @@
#ifndef _SYS_MBUF_H_
#define _SYS_MBUF_H_
-#ifdef _KERNEL
-#include <sys/condvar.h> /* XXX */
-#include <sys/_lock.h>
-#include <sys/_mutex.h>
-#endif /* _KERNEL */
-
/*
* Mbufs are of a single size, MSIZE (machine/param.h), which
* includes overhead. An mbuf may add a single "mbuf cluster" of size
* MCLBYTES (also in machine/param.h), which has no additional overhead
* and is used instead of the internal data area; this is done when
- * at least MINCLSIZE of data must be stored.
+ * at least MINCLSIZE of data must be stored. Additionally, it is possible
+ * to allocate a separate buffer externally and attach it to the mbuf in
+ * a way similar to that of mbuf clusters.
*/
-
#define MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */
#define MHLEN (MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */
-
#define MINCLSIZE (MHLEN + 1) /* smallest amount to put in cluster */
#define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */
-/*
- * Maximum number of allocatable counters for external buffers. This
- * ensures enough VM address space for the allocation of counters
- * in the extreme case where all possible external buffers are allocated.
- *
- * Note: When new types of external storage are allocated, EXT_COUNTERS
- * must be tuned accordingly. Practically, this isn't a big deal
- * as each counter is only a word long, so we can fit
- * (PAGE_SIZE / length of word) counters in a single page.
- *
- * XXX: Must increase this if using any of if_ti, if_wb, if_sk drivers,
- * or any other drivers which may manage their own buffers and
- * eventually attach them to mbufs.
- */
-#define EXT_COUNTERS (nmbclusters + nsfbufs)
-
#ifdef _KERNEL
/*
* Macros for type conversion
@@ -83,7 +61,9 @@
#define dtom(x) ((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1)))
#endif /* _KERNEL */
-/* header at beginning of each mbuf: */
+/*
+ * Header present at the beginning of every mbuf.
+ */
struct m_hdr {
struct mbuf *mh_next; /* next buffer in chain */
struct mbuf *mh_nextpkt; /* next chain in queue/record */
@@ -93,7 +73,9 @@ struct m_hdr {
short mh_flags; /* flags; see below */
};
-/* record/packet header in first mbuf of chain; valid if M_PKTHDR set */
+/*
+ * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set.
+ */
struct pkthdr {
struct ifnet *rcvif; /* rcv interface */
int len; /* total packet length */
@@ -105,17 +87,23 @@ struct pkthdr {
struct mbuf *aux; /* extra data buffer; ipsec/others */
};
-/* description of external storage mapped into mbuf, valid if M_EXT set */
+/*
+ * Description of external storage mapped into mbuf; valid only if M_EXT is set.
+ */
struct m_ext {
caddr_t ext_buf; /* start of buffer */
void (*ext_free) /* free routine if not the usual */
(caddr_t, void *);
void *ext_args; /* optional argument pointer */
u_int ext_size; /* size of buffer, for ext_free */
- union mext_refcnt *ref_cnt; /* pointer to ref count info */
+ u_int *ref_cnt; /* pointer to ref count info */
int ext_type; /* type of external storage */
};
+/*
+ * The core of the mbuf object along with some shortcut defines for
+ * practical purposes.
+ */
struct mbuf {
struct m_hdr m_hdr;
union {
@@ -141,7 +129,9 @@ struct mbuf {
#define m_pktdat M_dat.MH.MH_dat.MH_databuf
#define m_dat M_dat.M_databuf
-/* mbuf flags */
+/*
+ * mbuf flags
+ */
#define M_EXT 0x0001 /* has associated external storage */
#define M_PKTHDR 0x0002 /* start of record */
#define M_EOR 0x0004 /* end of record */
@@ -152,24 +142,32 @@ struct mbuf {
#define M_PROTO4 0x0080 /* protocol-specific */
#define M_PROTO5 0x0100 /* protocol-specific */
-/* mbuf pkthdr flags, also in m_flags */
+/*
+ * mbuf pkthdr flags (also stored in m_flags)
+ */
#define M_BCAST 0x0200 /* send/received as link-level broadcast */
#define M_MCAST 0x0400 /* send/received as link-level multicast */
#define M_FRAG 0x0800 /* packet is a fragment of a larger packet */
#define M_FIRSTFRAG 0x1000 /* packet is first fragment */
#define M_LASTFRAG 0x2000 /* packet is last fragment */
-/* external buffer types: identify ext_buf type */
+/*
+ * External buffer types: identify ext_buf type
+ */
#define EXT_CLUSTER 1 /* mbuf cluster */
#define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */
#define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */
#define EXT_MOD_TYPE 200 /* custom module's ext_buf type */
-/* flags copied when copying m_pkthdr */
+/*
+ * Flags copied when copying m_pkthdr
+ */
#define M_COPYFLAGS (M_PKTHDR|M_EOR|M_PROTO1|M_PROTO1|M_PROTO2|M_PROTO3 | \
M_PROTO4|M_PROTO5|M_BCAST|M_MCAST|M_FRAG|M_RDONLY)
-/* flags indicating hw checksum support and sw checksum requirements */
+/*
+ * Flags indicating hw checksum support and sw checksum requirements
+ */
#define CSUM_IP 0x0001 /* will csum IP */
#define CSUM_TCP 0x0002 /* will csum TCP */
#define CSUM_UDP 0x0004 /* will csum UDP */
@@ -184,7 +182,9 @@ struct mbuf {
#define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP)
#define CSUM_DELAY_IP (CSUM_IP) /* XXX add ipv6 here too? */
-/* mbuf types */
+/*
+ * mbuf types
+ */
#define MT_FREE 0 /* should be on free list */
#define MT_DATA 1 /* dynamic (data) allocation */
#define MT_HEADER 2 /* packet header */
@@ -209,17 +209,26 @@ struct mbuf {
#define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */
/*
- * mbuf statistics
+ * Mbuf and cluster allocation statistics PCPU structure.
+ */
+struct mbpstat {
+ u_long mb_mbfree;
+ u_long mb_mbpgs;
+ u_long mb_clfree;
+ u_long mb_clpgs;
+};
+
+/*
+ * General mbuf statistics structure.
+ * XXX: Modifications of these are not protected by any mutex locks nor by
+ * any atomic() manipulations. As a result, we may occasionally lose
+ * a count or two. Luckily, not all of these fields are modified at all
+ * and remain static, and those that are manipulated are only manipulated
+ * in failure situations, which do not occur (hopefully) very often.
*/
struct mbstat {
- u_long m_mbufs; /* # mbufs obtained from page pool */
- u_long m_clusters; /* # clusters obtained from page pool */
- u_long m_clfree; /* # clusters on freelist (cache) */
- u_long m_refcnt; /* # ref counters obtained from page pool */
- u_long m_refree; /* # ref counters on freelist (cache) */
- u_long m_spare; /* spare field */
- u_long m_drops; /* times failed to find space */
- u_long m_wait; /* times waited for space */
+ u_long m_drops; /* times failed to allocate */
+ u_long m_wait; /* times succesfully returned from wait */
u_long m_drain; /* times drained protocols for space */
u_long m_mcfail; /* times m_copym failed */
u_long m_mpfail; /* times m_pullup failed */
@@ -230,329 +239,75 @@ struct mbstat {
u_long m_mhlen; /* length of data in a header mbuf */
};
-/* flags to m_get/MGET */
+/*
+ * Flags specifying how an allocation should be made.
+ * M_DONTWAIT means "don't block if nothing is available" whereas
+ * M_TRYWAIT means "block for mbuf_wait ticks at most if nothing is
+ * available."
+ */
#define M_DONTWAIT 1
#define M_TRYWAIT 0
#define M_WAIT M_TRYWAIT /* XXX: Deprecated. */
-/*
- * Normal mbuf clusters are normally treated as character arrays
- * after allocation, but use the first word of the buffer as a free list
- * pointer while on the free list.
- */
-union mcluster {
- union mcluster *mcl_next;
- char mcl_buf[MCLBYTES];
-};
-
-/*
- * The m_ext object reference counter structure.
- */
-union mext_refcnt {
- union mext_refcnt *next_ref;
- u_int refcnt;
-};
-
#ifdef _KERNEL
/*
- * The freelists for mbufs and mbuf clusters include condition variables
- * that are used in cases of depletion/starvation.
- * The counter freelist does not require a condition variable as we never
- * expect to consume more than the reserved address space for counters.
- * All are presently protected by the mbuf_mtx lock.
- */
-struct mbffree_lst {
- struct mbuf *m_head;
- struct cv m_starved;
-};
-
-struct mclfree_lst {
- union mcluster *m_head;
- struct cv m_starved;
-};
-
-struct mcntfree_lst {
- union mext_refcnt *m_head;
-};
-
-/*
- * Signal a single instance (if any) blocked on a m_starved cv (i.e. an
- * instance waiting for an {mbuf, cluster} to be freed to the global
- * cache lists).
- *
- * Must be called with mbuf_mtx held.
- */
-#define MBWAKEUP(m_wid, m_cv) do { \
- if ((m_wid) > 0) \
- cv_signal((m_cv)); \
-} while (0)
-
-/*
- * mbuf external reference count management macros:
+ * mbuf external reference count management macros
*
* MEXT_IS_REF(m): true if (m) is not the only mbuf referencing
* the external buffer ext_buf
+ *
* MEXT_REM_REF(m): remove reference to m_ext object
+ *
* MEXT_ADD_REF(m): add reference to m_ext object already
* referred to by (m)
- * MEXT_INIT_REF(m): allocate and initialize an external
- * object reference counter for (m)
*/
-#define MEXT_IS_REF(m) ((m)->m_ext.ref_cnt->refcnt > 1)
+#define MEXT_IS_REF(m) (*((m)->m_ext.ref_cnt) > 1)
#define MEXT_REM_REF(m) do { \
- KASSERT((m)->m_ext.ref_cnt->refcnt > 0, ("m_ext refcnt < 0")); \
- atomic_subtract_int(&((m)->m_ext.ref_cnt->refcnt), 1); \
+ KASSERT(*((m)->m_ext.ref_cnt) > 0, ("m_ext refcnt < 0")); \
+ atomic_subtract_int((m)->m_ext.ref_cnt, 1); \
} while(0)
-#define MEXT_ADD_REF(m) atomic_add_int(&((m)->m_ext.ref_cnt->refcnt), 1)
-
-#define _MEXT_ALLOC_CNT(m_cnt, how) do { \
- union mext_refcnt *__mcnt; \
- \
- mtx_lock(&mbuf_mtx); \
- if (mcntfree.m_head == NULL) \
- m_alloc_ref(1, (how)); \
- __mcnt = mcntfree.m_head; \
- if (__mcnt != NULL) { \
- mcntfree.m_head = __mcnt->next_ref; \
- mbstat.m_refree--; \
- __mcnt->refcnt = 0; \
- } \
- mtx_unlock(&mbuf_mtx); \
- (m_cnt) = __mcnt; \
-} while (0)
-
-#define _MEXT_DEALLOC_CNT(m_cnt) do { \
- union mext_refcnt *__mcnt = (m_cnt); \
- \
- mtx_lock(&mbuf_mtx); \
- __mcnt->next_ref = mcntfree.m_head; \
- mcntfree.m_head = __mcnt; \
- mbstat.m_refree++; \
- mtx_unlock(&mbuf_mtx); \
-} while (0)
-
-#define MEXT_INIT_REF(m, how) do { \
- struct mbuf *__mmm = (m); \
- \
- _MEXT_ALLOC_CNT(__mmm->m_ext.ref_cnt, (how)); \
- if (__mmm->m_ext.ref_cnt != NULL) \
- MEXT_ADD_REF(__mmm); \
-} while (0)
+#define MEXT_ADD_REF(m) atomic_add_int((m)->m_ext.ref_cnt, 1)
/*
- * mbuf allocation/deallocation macros:
- *
- * MGET(struct mbuf *m, int how, int type)
- * allocates an mbuf and initializes it to contain internal data.
- *
- * MGETHDR(struct mbuf *m, int how, int type)
- * allocates an mbuf and initializes it to contain a packet header
- * and internal data.
- */
-/*
- * Lower-level macros for MGET(HDR)... Not to be used outside the
- * subsystem ("non-exportable" macro names are prepended with "_").
+ * mbuf, cluster, and external object allocation macros
+ * (for compatibility purposes)
*/
-#define _MGET_SETUP(m_set, m_set_type) do { \
- (m_set)->m_type = (m_set_type); \
- (m_set)->m_next = NULL; \
- (m_set)->m_nextpkt = NULL; \
- (m_set)->m_data = (m_set)->m_dat; \
- (m_set)->m_flags = 0; \
-} while (0)
+#define MGET(m, how, type) \
+ (m) = m_get((how), (type))
-#define _MGET(m_mget, m_get_how) do { \
- if (mmbfree.m_head == NULL) \
- m_mballoc(1, (m_get_how)); \
- (m_mget) = mmbfree.m_head; \
- if ((m_mget) != NULL) { \
- mmbfree.m_head = (m_mget)->m_next; \
- mbtypes[MT_FREE]--; \
- } else { \
- if ((m_get_how) == M_TRYWAIT) \
- (m_mget) = m_mballoc_wait(); \
- } \
-} while (0)
+#define MGETHDR(m, how, type) \
+ (m) = m_gethdr((how), (type))
-#define MGET(m, how, type) do { \
- struct mbuf *_mm; \
- int _mhow = (how); \
- int _mtype = (type); \
- \
- mtx_lock(&mbuf_mtx); \
- _MGET(_mm, _mhow); \
- if (_mm != NULL) { \
- mbtypes[_mtype]++; \
- mtx_unlock(&mbuf_mtx); \
- _MGET_SETUP(_mm, _mtype); \
- } else { \
- mbstat.m_drops++; \
- mtx_unlock(&mbuf_mtx); \
- } \
- (m) = _mm; \
-} while (0)
+#define MCLGET(m, how) \
+ m_clget((m), (how))
-#define _MGETHDR_SETUP(m_set, m_set_type) do { \
- (m_set)->m_type = (m_set_type); \
- (m_set)->m_next = NULL; \
- (m_set)->m_nextpkt = NULL; \
- (m_set)->m_data = (m_set)->m_pktdat; \
- (m_set)->m_flags = M_PKTHDR; \
- (m_set)->m_pkthdr.rcvif = NULL; \
- (m_set)->m_pkthdr.csum_flags = 0; \
- (m_set)->m_pkthdr.aux = NULL; \
-} while (0)
+#define MEXTADD(m, buf, size, free, args, flags, type) \
+ m_extadd((m), (caddr_t)(buf), (size), (free), (args), (flags), (type))
-#define MGETHDR(m, how, type) do { \
- struct mbuf *_mm; \
- int _mhow = (how); \
- int _mtype = (type); \
- \
- mtx_lock(&mbuf_mtx); \
- _MGET(_mm, _mhow); \
- if (_mm != NULL) { \
- mbtypes[_mtype]++; \
- mtx_unlock(&mbuf_mtx); \
- _MGETHDR_SETUP(_mm, _mtype); \
- } else { \
- mbstat.m_drops++; \
- mtx_unlock(&mbuf_mtx); \
- } \
- (m) = _mm; \
-} while (0)
-
-/*
- * mbuf external storage macros:
- *
- * MCLGET allocates and refers an mcluster to an mbuf
- * MEXTADD sets up pre-allocated external storage and refers to mbuf
- * MEXTFREE removes reference to external object and frees it if
- * necessary
- */
-#define _MCLALLOC(p, how) do { \
- caddr_t _mp; \
- int _mhow = (how); \
- \
- if (mclfree.m_head == NULL) \
- m_clalloc(1, _mhow); \
- _mp = (caddr_t)mclfree.m_head; \
- if (_mp != NULL) { \
- mbstat.m_clfree--; \
- mclfree.m_head = ((union mcluster *)_mp)->mcl_next; \
- } else { \
- if (_mhow == M_TRYWAIT) \
- _mp = m_clalloc_wait(); \
- } \
- (p) = _mp; \
-} while (0)
-
-#define MCLGET(m, how) do { \
- struct mbuf *_mm = (m); \
- \
- mtx_lock(&mbuf_mtx); \
- _MCLALLOC(_mm->m_ext.ext_buf, (how)); \
- if (_mm->m_ext.ext_buf != NULL) { \
- mtx_unlock(&mbuf_mtx); \
- MEXT_INIT_REF(_mm, (how)); \
- if (_mm->m_ext.ref_cnt == NULL) { \
- _MCLFREE(_mm->m_ext.ext_buf); \
- _mm->m_ext.ext_buf = NULL; \
- } else { \
- _mm->m_data = _mm->m_ext.ext_buf; \
- _mm->m_flags |= M_EXT; \
- _mm->m_ext.ext_free = NULL; \
- _mm->m_ext.ext_args = NULL; \
- _mm->m_ext.ext_size = MCLBYTES; \
- _mm->m_ext.ext_type = EXT_CLUSTER; \
- } \
- } else { \
- mbstat.m_drops++; \
- mtx_unlock(&mbuf_mtx); \
- } \
+#define MFREE(m, n) do { \
+ (n) = m_free((m)); \
+ (m) = NULL; \
} while (0)
-#define MEXTADD(m, buf, size, free, args, flags, type) do { \
- struct mbuf *_mm = (m); \
- \
- MEXT_INIT_REF(_mm, M_TRYWAIT); \
- if (_mm->m_ext.ref_cnt != NULL) { \
- _mm->m_flags |= (M_EXT | (flags)); \
- _mm->m_ext.ext_buf = (caddr_t)(buf); \
- _mm->m_data = _mm->m_ext.ext_buf; \
- _mm->m_ext.ext_size = (size); \
- _mm->m_ext.ext_free = (free); \
- _mm->m_ext.ext_args = (args); \
- _mm->m_ext.ext_type = (type); \
- } \
-} while (0)
+#define m_getclr m_get_clrd
-#define _MCLFREE(p) do { \
- union mcluster *_mp = (union mcluster *)(p); \
- \
- mtx_lock(&mbuf_mtx); \
- _mp->mcl_next = mclfree.m_head; \
- mclfree.m_head = _mp; \
- mbstat.m_clfree++; \
- MBWAKEUP(m_clalloc_wid, &mclfree.m_starved); \
- mtx_unlock(&mbuf_mtx); \
-} while (0)
-
-/* MEXTFREE:
+/*
+ * MEXTFREE(m): disassociate (and possibly free) an external object from `m'
+ *
* If the atomic_cmpset_int() returns 0, then we effectively do nothing
* in terms of "cleaning up" (freeing the ext buf and ref. counter) as
* this means that either there are still references, or another thread
* is taking care of the clean-up.
*/
#define MEXTFREE(m) do { \
- struct mbuf *_mmm = (m); \
- \
- MEXT_REM_REF(_mmm); \
- if (atomic_cmpset_int(&_mmm->m_ext.ref_cnt->refcnt, 0, 1)) { \
- if (_mmm->m_ext.ext_type != EXT_CLUSTER) { \
- (*(_mmm->m_ext.ext_free))(_mmm->m_ext.ext_buf, \
- _mmm->m_ext.ext_args); \
- } else \
- _MCLFREE(_mmm->m_ext.ext_buf); \
- _MEXT_DEALLOC_CNT(_mmm->m_ext.ref_cnt); \
- } \
- _mmm->m_flags &= ~M_EXT; \
-} while (0)
-
-/*
- * MFREE(struct mbuf *m, struct mbuf *n)
- * Free a single mbuf and associated external storage.
- * Place the successor, if any, in n.
- *
- * we do need to check non-first mbuf for m_aux, since some of existing
- * code does not call M_PREPEND properly.
- * (example: call to bpf_mtap from drivers)
- */
-#define MFREE(m, n) do { \
- struct mbuf *_mm = (m); \
- struct mbuf *_aux; \
+ struct mbuf *_mb = (m); \
\
- KASSERT(_mm->m_type != MT_FREE, ("freeing free mbuf")); \
- if (_mm->m_flags & M_EXT) \
- MEXTFREE(_mm); \
- mtx_lock(&mbuf_mtx); \
- mbtypes[_mm->m_type]--; \
- if ((_mm->m_flags & M_PKTHDR) != 0 && _mm->m_pkthdr.aux) { \
- _aux = _mm->m_pkthdr.aux; \
- _mm->m_pkthdr.aux = NULL; \
- } else { \
- _aux = NULL; \
- } \
- _mm->m_type = MT_FREE; \
- mbtypes[MT_FREE]++; \
- (n) = _mm->m_next; \
- _mm->m_next = mmbfree.m_head; \
- mmbfree.m_head = _mm; \
- MBWAKEUP(m_mballoc_wid, &mmbfree.m_starved); \
- mtx_unlock(&mbuf_mtx); \
- if (_aux) \
- m_freem(_aux); \
+ MEXT_REM_REF(_mb); \
+ if (atomic_cmpset_int(_mb->m_ext.ref_cnt, 0, 1)) \
+ _mext_free(_mb); \
+ _mb->m_flags &= ~M_EXT; \
} while (0)
/*
@@ -570,8 +325,8 @@ struct mcntfree_lst {
* aux pointer will be moved to `to'.
*/
#define M_COPY_PKTHDR(to, from) do { \
- struct mbuf *_mfrom = (from); \
- struct mbuf *_mto = (to); \
+ struct mbuf *_mfrom = (from); \
+ struct mbuf *_mto = (to); \
\
_mto->m_data = _mto->m_pktdat; \
_mto->m_flags = _mfrom->m_flags & M_COPYFLAGS; \
@@ -621,10 +376,10 @@ struct mcntfree_lst {
* set to NULL.
*/
#define M_PREPEND(m, plen, how) do { \
- struct mbuf **_mmp = &(m); \
- struct mbuf *_mm = *_mmp; \
- int _mplen = (plen); \
- int __mhow = (how); \
+ struct mbuf **_mmp = &(m); \
+ struct mbuf *_mm = *_mmp; \
+ int _mplen = (plen); \
+ int __mhow = (how); \
\
if (M_LEADINGSPACE(_mm) >= _mplen) { \
_mm->m_data -= _mplen; \
@@ -639,16 +394,7 @@ struct mcntfree_lst {
/*
* change mbuf to new type
*/
-#define MCHTYPE(m, t) do { \
- struct mbuf *_mm = (m); \
- int _mt = (t); \
- \
- mtx_lock(&mbuf_mtx); \
- mbtypes[_mm->m_type]--; \
- mbtypes[_mt]++; \
- mtx_unlock(&mbuf_mtx); \
- _mm->m_type = (_mt); \
-} while (0)
+#define MCHTYPE(m, t) (m)->m_type = (t)
/* length to m_copy to copy all */
#define M_COPYALL 1000000000
@@ -665,55 +411,46 @@ struct mauxtag {
void* p;
};
-extern u_long m_clalloc_wid; /* mbuf cluster wait count */
-extern u_long m_mballoc_wid; /* mbuf wait count */
-extern int max_datalen; /* MHLEN - max_hdr */
-extern int max_hdr; /* largest link+protocol header */
-extern int max_linkhdr; /* largest link-level header */
-extern int max_protohdr; /* largest protocol header */
-extern struct mbstat mbstat;
-extern u_long mbtypes[MT_NTYPES]; /* per-type mbuf allocations */
-extern int mbuf_wait; /* mbuf sleep time */
-extern struct mtx mbuf_mtx;
-extern struct mbuf *mbutl; /* virtual address of mclusters */
-extern struct mclfree_lst mclfree;
-extern struct mcntfree_lst mcntfree;
-extern struct mbffree_lst mmbfree;
-extern int nmbclusters;
-extern int nmbcnt;
-extern int nmbufs;
-extern int nsfbufs;
-
-void m_adj(struct mbuf *, int);
-int m_alloc_ref(u_int, int);
-struct mbuf *m_aux_add2 __P((struct mbuf *, int, int, void *));
-struct mbuf *m_aux_find2 __P((struct mbuf *, int, int, void *));
-struct mbuf *m_aux_add(struct mbuf *, int, int);
-void m_aux_delete(struct mbuf *, struct mbuf *);
-struct mbuf *m_aux_find(struct mbuf *, int, int);
-void m_cat(struct mbuf *, struct mbuf *);
-int m_clalloc(int, int);
-caddr_t m_clalloc_wait(void);
-void m_copyback(struct mbuf *, int, int, caddr_t);
-void m_copydata(struct mbuf *, int, int, caddr_t);
-struct mbuf *m_copym(struct mbuf *, int, int, int);
-struct mbuf *m_copypacket(struct mbuf *, int);
-struct mbuf *m_devget(char *, int, int, struct ifnet *,
- void (*copy)(char *, caddr_t, u_int));
-struct mbuf *m_dup(struct mbuf *, int);
-struct mbuf *m_free(struct mbuf *);
-void m_freem(struct mbuf *);
-struct mbuf *m_get(int, int);
-struct mbuf *m_getclr(int, int);
-struct mbuf *m_gethdr(int, int);
-struct mbuf *m_getm(struct mbuf *, int, int, int);
-int m_mballoc(int, int);
-struct mbuf *m_mballoc_wait(void);
-struct mbuf *m_prepend(struct mbuf *, int, int);
-void m_print(const struct mbuf *m);
-struct mbuf *m_pulldown(struct mbuf *, int, int, int *);
-struct mbuf *m_pullup(struct mbuf *, int);
-struct mbuf *m_split(struct mbuf *, int, int);
+extern int max_datalen; /* MHLEN - max_hdr */
+extern int max_hdr; /* largest link + protocol header */
+extern int max_linkhdr; /* largest link-level header */
+extern int max_protohdr; /* largest protocol header */
+extern struct mbpstat mb_statpcpu[]; /* Per-CPU allocation stats. */
+extern struct mbstat mbstat; /* General mbuf stats/infos. */
+extern int nmbclusters; /* Maximum number of clusters */
+extern int nmbcnt; /* Scale kmem_map for counter space */
+extern int nmbufs; /* Maximum number of mbufs */
+extern int nsfbufs; /* Number of sendfile(2) bufs */
+
+void m_adj(struct mbuf *, int);
+struct mbuf *m_aux_add(struct mbuf *, int, int);
+struct mbuf *m_aux_add2(struct mbuf *, int, int, void *);
+void m_aux_delete(struct mbuf *, struct mbuf *);
+struct mbuf *m_aux_find(struct mbuf *, int, int);
+struct mbuf *m_aux_find2(struct mbuf *, int, int, void *);
+void m_cat(struct mbuf *, struct mbuf *);
+void m_clget(struct mbuf *, int);
+void m_extadd(struct mbuf *, caddr_t, u_int,
+ void (*free)(caddr_t, void *), void *, short, int);
+void m_copyback(struct mbuf *, int, int, caddr_t);
+void m_copydata(struct mbuf *, int, int, caddr_t);
+struct mbuf *m_copym(struct mbuf *, int, int, int);
+struct mbuf *m_copypacket(struct mbuf *, int);
+struct mbuf *m_devget(char *, int, int, struct ifnet *,
+ void (*copy)(char *, caddr_t, u_int));
+struct mbuf *m_dup(struct mbuf *, int);
+struct mbuf *m_free(struct mbuf *);
+void m_freem(struct mbuf *);
+struct mbuf *m_get(int, int);
+struct mbuf *m_get_clrd(int, int);
+struct mbuf *m_gethdr(int, int);
+struct mbuf *m_gethdr_clrd(int, int);
+struct mbuf *m_getm(struct mbuf *, int, int, int);
+struct mbuf *m_prepend(struct mbuf *, int, int);
+void m_print(const struct mbuf *m);
+struct mbuf *m_pulldown(struct mbuf *, int, int, int *);
+struct mbuf *m_pullup(struct mbuf *, int);
+struct mbuf *m_split(struct mbuf *, int, int);
#endif /* _KERNEL */
#endif /* !_SYS_MBUF_H_ */
diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h
index 307b25d..cb19d94 100644
--- a/sys/sys/sysctl.h
+++ b/sys/sys/sysctl.h
@@ -402,8 +402,6 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#define KIPC_MAX_PROTOHDR 5 /* int: max length of network header */
#define KIPC_MAX_HDR 6 /* int: max total length of headers */
#define KIPC_MAX_DATALEN 7 /* int: max length of data? */
-#define KIPC_MBSTAT 8 /* struct: mbuf usage statistics */
-#define KIPC_NMBCLUSTERS 9 /* int: maximum mbuf clusters */
/*
* CTL_HW identifiers
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index 08ee486..38f969e 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -89,8 +89,6 @@ vm_map_t kmem_map=0;
vm_map_t exec_map=0;
vm_map_t clean_map=0;
vm_map_t buffer_map=0;
-vm_map_t mb_map=0;
-int mb_map_full=0;
/*
* kmem_alloc_pageable:
@@ -331,6 +329,9 @@ kmem_suballoc(parent, min, max, size)
*
* NOTE: This routine is not supposed to block if M_NOWAIT is set, but
* I have not verified that it actually does not block.
+ *
+ * `map' is ONLY allowed to be kmem_map or one of the mbuf submaps to
+ * which we never free.
*/
vm_offset_t
kmem_malloc(map, size, flags)
@@ -344,9 +345,6 @@ kmem_malloc(map, size, flags)
vm_page_t m;
int hadvmlock;
- if (map != kmem_map && map != mb_map)
- panic("kmem_malloc: map != {kmem,mb}_map");
-
hadvmlock = mtx_owned(&vm_mtx);
if (!hadvmlock)
mtx_lock(&vm_mtx);
@@ -362,9 +360,9 @@ kmem_malloc(map, size, flags)
vm_map_lock(map);
if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
vm_map_unlock(map);
- if (map == mb_map) {
- mb_map_full = TRUE;
- printf("Out of mbuf clusters - adjust NMBCLUSTERS or increase maxusers!\n");
+ if (map != kmem_map) {
+ printf("Out of mbuf address space!\n");
+ printf("Consider increasing NMBCLUSTERS\n");
goto bad;
}
if ((flags & M_NOWAIT) == 0)
diff --git a/sys/vm/vm_kern.h b/sys/vm/vm_kern.h
index a962c17..b615a2e 100644
--- a/sys/vm/vm_kern.h
+++ b/sys/vm/vm_kern.h
@@ -71,8 +71,6 @@
extern vm_map_t buffer_map;
extern vm_map_t kernel_map;
extern vm_map_t kmem_map;
-extern vm_map_t mb_map;
-extern int mb_map_full;
extern vm_map_t clean_map;
extern vm_map_t exec_map;
extern u_int vm_kmem_size;
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 5ee6762..229a822 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -726,14 +726,14 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
mtx_assert(&vm_mtx, MA_OWNED);
start = *addr;
- if (map == kmem_map || map == mb_map)
+ if (map == kmem_map)
s = splvm();
vm_map_lock(map);
if (find_space) {
if (vm_map_findspace(map, start, length, addr)) {
vm_map_unlock(map);
- if (map == kmem_map || map == mb_map)
+ if (map == kmem_map)
splx(s);
return (KERN_NO_SPACE);
}
@@ -743,7 +743,7 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
start, start + length, prot, max, cow);
vm_map_unlock(map);
- if (map == kmem_map || map == mb_map)
+ if (map == kmem_map)
splx(s);
return (result);
@@ -1951,7 +1951,7 @@ vm_map_remove(map, start, end)
int result, s = 0;
mtx_assert(&vm_mtx, MA_OWNED);
- if (map == kmem_map || map == mb_map)
+ if (map == kmem_map)
s = splvm();
vm_map_lock(map);
@@ -1959,7 +1959,7 @@ vm_map_remove(map, start, end)
result = vm_map_delete(map, start, end);
vm_map_unlock(map);
- if (map == kmem_map || map == mb_map)
+ if (map == kmem_map)
splx(s);
return (result);
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index e162f94..9717325 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -1696,8 +1696,6 @@ vm_object_in_map( object)
return 1;
if( _vm_object_in_map( buffer_map, object, 0))
return 1;
- if( _vm_object_in_map( mb_map, object, 0))
- return 1;
return 0;
}
OpenPOWER on IntegriCloud