summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorandre <andre@FreeBSD.org>2005-11-02 16:20:36 +0000
committerandre <andre@FreeBSD.org>2005-11-02 16:20:36 +0000
commitb53d0a6c803e7602549b842ef5a0491dfb45d5a1 (patch)
treeee80c56a57b4230c8d1731f681fa5aa5c8973a33 /sys
parent62e84272e80b5648260ea53141f9f07961cad894 (diff)
downloadFreeBSD-src-b53d0a6c803e7602549b842ef5a0491dfb45d5a1.zip
FreeBSD-src-b53d0a6c803e7602549b842ef5a0491dfb45d5a1.tar.gz
Mandatory mbuf cluster reference counting and groundwork for UMA
based jumbo 9k and jumbo 16k cluster support. All mbuf's with external storage attached are mandatory reference counted. For clusters and jumbo clusters UMA provides the refcnt storage directly. It does not have to be separatly allocated. Any other type of external storage gets its own refcnt allocated from an UMA mbuf refcnt zone instead of normal kernel malloc. The refcount API MEXT_ADD_REF() and MEXT_REM_REF() is no longer publically accessible. The proper m_* functions have to be used. mb_ctor_clust() and mb_dtor_clust() both handle normal 2K as well as 9k and 16k clusters. Clusters and jumbo clusters may be obtained without attaching it immideatly to an mbuf. This is for high performance cluster allocation in network drivers where mbufs are attached after the cluster has been filled. Tested by: rwatson Sponsored by: TCP/IP Optimizations Fundraise 2005
Diffstat (limited to 'sys')
-rw-r--r--sys/kern/kern_mbuf.c193
-rw-r--r--sys/kern/uipc_mbuf.c188
-rw-r--r--sys/sys/mbuf.h65
-rw-r--r--sys/sys/param.h3
4 files changed, 270 insertions, 179 deletions
diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c
index 5e5f9af..e02acca 100644
--- a/sys/kern/kern_mbuf.c
+++ b/sys/kern/kern_mbuf.c
@@ -78,9 +78,25 @@ __FBSDID("$FreeBSD$");
* [ Cluster Slabs ] |
* | [ Mbuf Slabs ]
* \____________(VM)_________________/
+ *
+ *
+ * Whenever a object is allocated with uma_zalloc() out of the
+ * one of the Zones its _ctor_ function is executed. The same
+ * for any deallocation through uma_zfree() the _dror_ function
+ * is executed.
+ *
+ * Caches are per-CPU and are filled from the Master Zone.
+ *
+ * Whenever a object is allocated from the underlying global
+ * memory pool it gets pre-initialized with the _zinit_ functions.
+ * When the Keg's are overfull objects get decomissioned with
+ * _zfini_ functions and free'd back to the global memory pool.
+ *
*/
-int nmbclusters;
+int nmbclusters; /* limits number of mbuf clusters */
+int nmbjumbo9; /* limits number of 9k jumbo clusters */
+int nmbjumbo16; /* limits number of 16k jumbo clusters */
struct mbstat mbstat;
static void
@@ -94,8 +110,13 @@ tunable_mbinit(void *dummy)
SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL);
SYSCTL_DECL(_kern_ipc);
+/* XXX: These should be tuneables. Can't change UMA limits on the fly. */
SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RW, &nmbclusters, 0,
"Maximum number of mbuf clusters allowed");
+SYSCTL_INT(_kern_ipc, OID_AUTO, nmbjumbo9, CTLFLAG_RW, &nmbjumbo9, 0,
+ "Maximum number of mbuf 9k jumbo clusters allowed");
+SYSCTL_INT(_kern_ipc, OID_AUTO, nmbjumbo16, CTLFLAG_RW, &nmbjumbo16, 0,
+ "Maximum number of mbuf 16k jumbo clusters allowed");
SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
"Mbuf general information and statistics");
@@ -105,6 +126,9 @@ SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
uma_zone_t zone_mbuf;
uma_zone_t zone_clust;
uma_zone_t zone_pack;
+uma_zone_t zone_jumbo9;
+uma_zone_t zone_jumbo16;
+uma_zone_t zone_ext_refcnt;
/*
* Local prototypes.
@@ -113,10 +137,10 @@ static int mb_ctor_mbuf(void *, int, void *, int);
static int mb_ctor_clust(void *, int, void *, int);
static int mb_ctor_pack(void *, int, void *, int);
static void mb_dtor_mbuf(void *, int, void *);
-static void mb_dtor_clust(void *, int, void *); /* XXX */
-static void mb_dtor_pack(void *, int, void *); /* XXX */
-static int mb_init_pack(void *, int, int);
-static void mb_fini_pack(void *, int);
+static void mb_dtor_clust(void *, int, void *);
+static void mb_dtor_pack(void *, int, void *);
+static int mb_zinit_pack(void *, int, int);
+static void mb_zfini_pack(void *, int);
static void mb_reclaim(void *);
static void mbuf_init(void *);
@@ -135,26 +159,58 @@ mbuf_init(void *dummy)
/*
* Configure UMA zones for Mbufs, Clusters, and Packets.
*/
- zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, mb_ctor_mbuf,
- mb_dtor_mbuf,
+ zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE,
+ mb_ctor_mbuf, mb_dtor_mbuf,
#ifdef INVARIANTS
- trash_init, trash_fini, MSIZE - 1, UMA_ZONE_MAXBUCKET);
+ trash_init, trash_fini,
#else
- NULL, NULL, MSIZE - 1, UMA_ZONE_MAXBUCKET);
+ NULL, NULL,
#endif
+ MSIZE - 1, UMA_ZONE_MAXBUCKET);
+
zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
- mb_ctor_clust,
+ mb_ctor_clust, mb_dtor_clust,
#ifdef INVARIANTS
- mb_dtor_clust, trash_init, trash_fini, UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+ trash_init, trash_fini,
#else
- mb_dtor_clust, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+ NULL, NULL,
#endif
+ UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
if (nmbclusters > 0)
uma_zone_set_max(zone_clust, nmbclusters);
+
zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
- mb_dtor_pack, mb_init_pack, mb_fini_pack, zone_mbuf);
+ mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
+
+ /* Make jumbo frame zone too. 9k and 16k. */
+ zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
+ mb_ctor_clust, mb_dtor_clust,
+#ifdef INVARIANTS
+ trash_init, trash_fini,
+#else
+ NULL, NULL,
+#endif
+ UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+ if (nmbjumbo9 > 0)
+ uma_zone_set_max(zone_jumbo9, nmbjumbo9);
+
+ zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
+ mb_ctor_clust, mb_dtor_clust,
+#ifdef INVARIANTS
+ trash_init, trash_fini,
+#else
+ NULL, NULL,
+#endif
+ UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+ if (nmbjumbo16 > 0)
+ uma_zone_set_max(zone_jumbo16, nmbjumbo16);
- /* uma_prealloc() goes here */
+ zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
+ NULL, NULL,
+ NULL, NULL,
+ UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
+
+ /* uma_prealloc() goes here... */
/*
* Hook event handler for low-memory situation, used to
@@ -189,7 +245,7 @@ mbuf_init(void *dummy)
*
* The 'arg' pointer points to a mb_args structure which
* contains call-specific information required to support the
- * mbuf allocation API.
+ * mbuf allocation API. See mbuf.h.
*/
static int
mb_ctor_mbuf(void *mem, int size, void *arg, int how)
@@ -210,14 +266,25 @@ mb_ctor_mbuf(void *mem, int size, void *arg, int how)
flags = args->flags;
type = args->type;
- m->m_type = type;
+ /*
+ * The mbuf is initialized later. The caller has the
+ * responseability to setup any MAC labels too.
+ */
+ if (type == MT_NOINIT)
+ return (0);
+
m->m_next = NULL;
m->m_nextpkt = NULL;
+ m->m_len = 0;
m->m_flags = flags;
+ m->m_type = type;
if (flags & M_PKTHDR) {
m->m_data = m->m_pktdat;
m->m_pkthdr.rcvif = NULL;
+ m->m_pkthdr.len = 0;
+ m->m_pkthdr.header = NULL;
m->m_pkthdr.csum_flags = 0;
+ m->m_pkthdr.csum_data = 0;
SLIST_INIT(&m->m_pkthdr.tags);
#ifdef MAC
/* If the label init fails, fail the alloc */
@@ -231,7 +298,7 @@ mb_ctor_mbuf(void *mem, int size, void *arg, int how)
}
/*
- * The Mbuf master zone and Packet secondary zone destructor.
+ * The Mbuf master zone destructor.
*/
static void
mb_dtor_mbuf(void *mem, int size, void *arg)
@@ -241,12 +308,15 @@ mb_dtor_mbuf(void *mem, int size, void *arg)
m = (struct mbuf *)mem;
if ((m->m_flags & M_PKTHDR) != 0)
m_tag_delete_chain(m, NULL);
+ KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
#ifdef INVARIANTS
trash_dtor(mem, size, arg);
#endif
}
-/* XXX Only because of stats */
+/*
+ * The Mbuf Packet zone destructor.
+ */
static void
mb_dtor_pack(void *mem, int size, void *arg)
{
@@ -255,41 +325,81 @@ mb_dtor_pack(void *mem, int size, void *arg)
m = (struct mbuf *)mem;
if ((m->m_flags & M_PKTHDR) != 0)
m_tag_delete_chain(m, NULL);
+
+ /* Make sure we've got a clean cluster back. */
+ KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
+ KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__));
+ KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__));
+ KASSERT(m->m_ext.ext_args == NULL, ("%s: ext_args != NULL", __func__));
+ KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__));
+ KASSERT(m->m_ext.ext_type == EXT_CLUSTER, ("%s: ext_type != EXT_CLUSTER", __func__));
+ KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__));
#ifdef INVARIANTS
trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
#endif
}
/*
- * The Cluster zone constructor.
+ * The Cluster and Jumbo[9|16] zone constructor.
*
* Here the 'arg' pointer points to the Mbuf which we
- * are configuring cluster storage for.
+ * are configuring cluster storage for. If 'arg' is
+ * empty we allocate just the cluster without setting
+ * the mbuf to it. See mbuf.h.
*/
static int
mb_ctor_clust(void *mem, int size, void *arg, int how)
{
struct mbuf *m;
+ u_int *refcnt;
+ int type = 0;
#ifdef INVARIANTS
trash_ctor(mem, size, arg, how);
#endif
m = (struct mbuf *)arg;
- m->m_ext.ext_buf = (caddr_t)mem;
- m->m_data = m->m_ext.ext_buf;
- m->m_flags |= M_EXT;
- m->m_ext.ext_free = NULL;
- m->m_ext.ext_args = NULL;
- m->m_ext.ext_size = MCLBYTES;
- m->m_ext.ext_type = EXT_CLUSTER;
- m->m_ext.ref_cnt = NULL; /* Lazy counter assign. */
+ if (m != NULL) {
+ switch (size) {
+ case MCLBYTES:
+ type = EXT_CLUSTER;
+ break;
+ case MJUM9BYTES:
+ type = EXT_JUMBO9;
+ break;
+ case MJUM16BYTES:
+ type = EXT_JUMBO16;
+ break;
+ default:
+ panic("unknown cluster size");
+ break;
+ }
+ m->m_ext.ext_buf = (caddr_t)mem;
+ m->m_data = m->m_ext.ext_buf;
+ m->m_flags |= M_EXT;
+ m->m_ext.ext_free = NULL;
+ m->m_ext.ext_args = NULL;
+ m->m_ext.ext_size = size;
+ m->m_ext.ext_type = type;
+ m->m_ext.ref_cnt = uma_find_refcnt(zone_clust, mem);
+ *m->m_ext.ref_cnt = 1;
+ } else {
+ refcnt = uma_find_refcnt(zone_clust, mem);
+ *refcnt = 1;
+ }
return (0);
}
-/* XXX */
+/*
+ * The Mbuf Cluster zone destructor.
+ */
static void
mb_dtor_clust(void *mem, int size, void *arg)
{
+ u_int *refcnt;
+
+ refcnt = uma_find_refcnt(zone_clust, mem);
+ KASSERT(*refcnt == 1, ("%s: refcnt incorrect %u", __func__, *refcnt));
+ *refcnt = 0;
#ifdef INVARIANTS
trash_dtor(mem, size, arg);
#endif
@@ -297,15 +407,14 @@ mb_dtor_clust(void *mem, int size, void *arg)
/*
* The Packet secondary zone's init routine, executed on the
- * object's transition from keg slab to zone cache.
+ * object's transition from mbuf keg slab to zone cache.
*/
static int
-mb_init_pack(void *mem, int size, int how)
+mb_zinit_pack(void *mem, int size, int how)
{
struct mbuf *m;
- m = (struct mbuf *)mem;
- m->m_ext.ext_buf = NULL;
+ m = (struct mbuf *)mem; /* m is virgin. */
uma_zalloc_arg(zone_clust, m, how);
if (m->m_ext.ext_buf == NULL)
return (ENOMEM);
@@ -320,7 +429,7 @@ mb_init_pack(void *mem, int size, int how)
* object's transition from zone cache to keg slab.
*/
static void
-mb_fini_pack(void *mem, int size)
+mb_zfini_pack(void *mem, int size)
{
struct mbuf *m;
@@ -329,7 +438,6 @@ mb_fini_pack(void *mem, int size)
trash_fini(m->m_ext.ext_buf, MCLBYTES);
#endif
uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL);
- m->m_ext.ext_buf = NULL;
#ifdef INVARIANTS
trash_dtor(mem, size, NULL);
#endif
@@ -357,20 +465,19 @@ mb_ctor_pack(void *mem, int size, void *arg, int how)
#ifdef INVARIANTS
trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
#endif
- m->m_type = type;
m->m_next = NULL;
m->m_nextpkt = NULL;
m->m_data = m->m_ext.ext_buf;
- m->m_flags = flags|M_EXT;
- m->m_ext.ext_free = NULL;
- m->m_ext.ext_args = NULL;
- m->m_ext.ext_size = MCLBYTES;
- m->m_ext.ext_type = EXT_PACKET;
- m->m_ext.ref_cnt = NULL; /* Lazy counter assign. */
+ m->m_len = 0;
+ m->m_flags = (flags | M_EXT);
+ m->m_type = type;
if (flags & M_PKTHDR) {
m->m_pkthdr.rcvif = NULL;
+ m->m_pkthdr.len = 0;
+ m->m_pkthdr.header = NULL;
m->m_pkthdr.csum_flags = 0;
+ m->m_pkthdr.csum_data = 0;
SLIST_INIT(&m->m_pkthdr.tags);
#ifdef MAC
/* If the label init fails, fail the alloc */
@@ -379,6 +486,8 @@ mb_ctor_pack(void *mem, int size, void *arg, int how)
return (error);
#endif
}
+ /* m_ext is already initialized. */
+
return (0);
}
diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c
index 542cb46..9d90e01 100644
--- a/sys/kern/uipc_mbuf.c
+++ b/sys/kern/uipc_mbuf.c
@@ -86,11 +86,6 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
#endif
/*
- * Malloc-type for external ext_buf ref counts.
- */
-static MALLOC_DEFINE(M_MBUF, "mbextcnt", "mbuf external ref counts");
-
-/*
* Allocate a given length worth of mbufs and/or clusters (whatever fits
* best) and return a pointer to the top of the allocated chain. If an
* existing mbuf chain is provided, then we will append the new chain
@@ -192,16 +187,10 @@ void
m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
void (*freef)(void *, void *), void *args, int flags, int type)
{
- u_int *ref_cnt = NULL;
-
- /* XXX Shouldn't be adding EXT_CLUSTER with this API */
- if (type == EXT_CLUSTER)
- ref_cnt = (u_int *)uma_find_refcnt(zone_clust,
- mb->m_ext.ext_buf);
- else if (type == EXT_EXTREF)
- ref_cnt = __DEVOLATILE(u_int *, mb->m_ext.ref_cnt);
- mb->m_ext.ref_cnt = (ref_cnt == NULL) ?
- malloc(sizeof(u_int), M_MBUF, M_NOWAIT) : (u_int *)ref_cnt;
+ KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__));
+
+ if (type != EXT_EXTREF)
+ mb->m_ext.ref_cnt = (u_int *)uma_zalloc(zone_ext_refcnt, M_NOWAIT);
if (mb->m_ext.ref_cnt != NULL) {
*(mb->m_ext.ref_cnt) = 1;
mb->m_flags |= (M_EXT | flags);
@@ -216,60 +205,86 @@ m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
/*
* Non-directly-exported function to clean up after mbufs with M_EXT
- * storage attached to them if the reference count hits 0.
+ * storage attached to them if the reference count hits 1.
*/
void
mb_free_ext(struct mbuf *m)
{
- u_int cnt;
- int dofree;
-
- /* Account for lazy ref count assign. */
- if (m->m_ext.ref_cnt == NULL)
- dofree = 1;
- else
- dofree = 0;
-
- /*
- * This is tricky. We need to make sure to decrement the
- * refcount in a safe way but to also clean up if we're the
- * last reference. This method seems to do it without race.
- */
- while (dofree == 0) {
- cnt = *(m->m_ext.ref_cnt);
- if (atomic_cmpset_int(m->m_ext.ref_cnt, cnt, cnt - 1)) {
- if (cnt == 1)
- dofree = 1;
- break;
- }
- }
-
- if (dofree) {
- /*
- * Do the free, should be safe.
- */
- if (m->m_ext.ext_type == EXT_PACKET) {
+ KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
+ KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__));
+
+ /* Free attached storage if this mbuf is the only reference to it. */
+ if (*(m->m_ext.ref_cnt) == 1 ||
+ atomic_fetchadd_int(m->m_ext.ref_cnt, -1) == 1) {
+ switch (m->m_ext.ext_type) {
+ case EXT_CLUSTER:
uma_zfree(zone_pack, m);
- return;
- } else if (m->m_ext.ext_type == EXT_CLUSTER) {
- uma_zfree(zone_clust, m->m_ext.ext_buf);
- m->m_ext.ext_buf = NULL;
- } else {
+ return; /* Job done. */
+ break;
+ case EXT_JUMBO9:
+ uma_zfree(zone_jumbo9, m->m_ext.ext_buf);
+ break;
+ case EXT_JUMBO16:
+ uma_zfree(zone_jumbo16, m->m_ext.ext_buf);
+ break;
+ case EXT_SFBUF:
+ case EXT_NET_DRV:
+ case EXT_MOD_TYPE:
+ case EXT_DISPOSABLE:
+ *(m->m_ext.ref_cnt) = 0;
+ uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *,
+ m->m_ext.ref_cnt));
+ /* FALLTHROUGH */
+ case EXT_EXTREF:
+ KASSERT(m->m_ext.ext_free != NULL,
+ ("%s: ext_free not set", __func__));
(*(m->m_ext.ext_free))(m->m_ext.ext_buf,
m->m_ext.ext_args);
- if (m->m_ext.ext_type != EXT_EXTREF) {
- if (m->m_ext.ref_cnt != NULL)
- free(__DEVOLATILE(u_int *,
- m->m_ext.ref_cnt), M_MBUF);
- m->m_ext.ref_cnt = NULL;
- }
- m->m_ext.ext_buf = NULL;
+ break;
+ default:
+ KASSERT(m->m_ext.ext_type == 0,
+ ("%s: unknown ext_type", __func__));
}
}
+ /*
+ * Free this mbuf back to the mbuf zone with all m_ext
+ * information purged.
+ */
+ m->m_ext.ext_buf = NULL;
+ m->m_ext.ext_free = NULL;
+ m->m_ext.ext_args = NULL;
+ m->m_ext.ref_cnt = NULL;
+ m->m_ext.ext_size = 0;
+ m->m_ext.ext_type = 0;
+ m->m_flags &= ~M_EXT;
uma_zfree(zone_mbuf, m);
}
/*
+ * Attach the the cluster from *m to *n, set up m_ext in *n
+ * and bump the refcount of the cluster.
+ */
+static void
+mb_dupcl(struct mbuf *n, struct mbuf *m)
+{
+ KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
+ KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__));
+ KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
+
+ if (*(m->m_ext.ref_cnt) == 1)
+ *(m->m_ext.ref_cnt) += 1;
+ else
+ atomic_add_int(m->m_ext.ref_cnt, 1);
+ n->m_ext.ext_buf = m->m_ext.ext_buf;
+ n->m_ext.ext_free = m->m_ext.ext_free;
+ n->m_ext.ext_args = m->m_ext.ext_args;
+ n->m_ext.ext_size = m->m_ext.ext_size;
+ n->m_ext.ref_cnt = m->m_ext.ref_cnt;
+ n->m_ext.ext_type = m->m_ext.ext_type;
+ n->m_flags |= M_EXT;
+}
+
+/*
* Clean up mbuf (chain) from any tags and packet headers.
* If "all" is set then the first mbuf in the chain will be
* cleaned too.
@@ -533,10 +548,7 @@ m_copym(struct mbuf *m, int off0, int len, int wait)
n->m_len = min(len, m->m_len - off);
if (m->m_flags & M_EXT) {
n->m_data = m->m_data + off;
- n->m_ext = m->m_ext;
- n->m_flags |= M_EXT;
- MEXT_ADD_REF(m);
- n->m_ext.ref_cnt = m->m_ext.ref_cnt;
+ mb_dupcl(n, m);
} else
bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
(u_int)n->m_len);
@@ -578,9 +590,9 @@ struct mbuf *
m_copymdata(struct mbuf *m, struct mbuf *n, int off, int len,
int prep, int how)
{
- struct mbuf *mm, *x, *z;
+ struct mbuf *mm, *x, *z, *prev = NULL;
caddr_t p;
- int i, mlen, nlen = 0;
+ int i, nlen = 0;
caddr_t buf[MLEN];
KASSERT(m != NULL && n != NULL, ("m_copymdata, no target or source"));
@@ -588,25 +600,13 @@ m_copymdata(struct mbuf *m, struct mbuf *n, int off, int len,
KASSERT(len >= 0, ("m_copymdata, negative len %d", len));
KASSERT(prep == 0 || prep == 1, ("m_copymdata, unknown direction %d", prep));
- /* Make sure environment is sane. */
- for (z = m; z != NULL; z = z->m_next) {
- mlen += z->m_len;
- if (!M_WRITABLE(z)) {
- /* Make clusters writeable. */
- if (z->m_flags & M_RDONLY)
- return NULL; /* Can't handle ext ref. */
- x = m_getcl(how, MT_DATA, 0);
- if (!x)
- return NULL;
- bcopy(z->m_ext.ext_buf, x->m_ext.ext_buf, x->m_ext.ext_size);
- p = x->m_ext.ext_buf + (z->m_data - z->m_ext.ext_buf);
- MEXT_REM_REF(z); /* XXX */
- z->m_data = p;
- x->m_flags &= ~M_EXT;
- (void)m_free(x);
+ mm = m;
+ if (!prep) {
+ while(mm->m_next) {
+ prev = mm;
+ mm = mm->m_next;
}
}
- mm = prep ? m : z;
for (z = n; z != NULL; z = z->m_next)
nlen += z->m_len;
if (len == M_COPYALL)
@@ -614,6 +614,21 @@ m_copymdata(struct mbuf *m, struct mbuf *n, int off, int len,
if (off + len > nlen || len < 1)
return NULL;
+ if (!M_WRITABLE(mm)) {
+ /* XXX: Use proper m_xxx function instead. */
+ x = m_getcl(how, MT_DATA, mm->m_flags);
+ if (x == NULL)
+ return NULL;
+ bcopy(mm->m_ext.ext_buf, x->m_ext.ext_buf, x->m_ext.ext_size);
+ p = x->m_ext.ext_buf + (mm->m_data - mm->m_ext.ext_buf);
+ x->m_data = p;
+ mm->m_next = NULL;
+ if (mm != m)
+ prev->m_next = x;
+ m_free(mm);
+ mm = x;
+ }
+
/*
* Append/prepend the data. Allocating mbufs as necessary.
*/
@@ -726,10 +741,7 @@ m_copypacket(struct mbuf *m, int how)
n->m_len = m->m_len;
if (m->m_flags & M_EXT) {
n->m_data = m->m_data;
- n->m_ext = m->m_ext;
- n->m_flags |= M_EXT;
- MEXT_ADD_REF(m);
- n->m_ext.ref_cnt = m->m_ext.ref_cnt;
+ mb_dupcl(n, m);
} else {
n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
@@ -747,10 +759,7 @@ m_copypacket(struct mbuf *m, int how)
n->m_len = m->m_len;
if (m->m_flags & M_EXT) {
n->m_data = m->m_data;
- n->m_ext = m->m_ext;
- n->m_flags |= M_EXT;
- MEXT_ADD_REF(m);
- n->m_ext.ref_cnt = m->m_ext.ref_cnt;
+ mb_dupcl(n, m);
} else {
bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
}
@@ -1133,11 +1142,8 @@ m_split(struct mbuf *m0, int len0, int wait)
}
extpacket:
if (m->m_flags & M_EXT) {
- n->m_flags |= M_EXT;
- n->m_ext = m->m_ext;
- MEXT_ADD_REF(m);
- n->m_ext.ref_cnt = m->m_ext.ref_cnt;
n->m_data = m->m_data + len;
+ mb_dupcl(n, m);
} else {
bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
}
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index 91ab0b2..262a02e 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -185,7 +185,8 @@ struct mbuf {
*/
#define EXT_CLUSTER 1 /* mbuf cluster */
#define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */
-#define EXT_PACKET 3 /* came out of Packet zone */
+#define EXT_JUMBO9 3 /* jumbo cluster 9216 bytes */
+#define EXT_JUMBO16 4 /* jumbo cluster 16184 bytes */
#define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */
#define EXT_MOD_TYPE 200 /* custom module's ext_buf type */
#define EXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */
@@ -242,6 +243,9 @@ struct mbuf {
#define MT_OOBDATA 15 /* expedited data */
#define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */
+#define MT_NOINIT 255 /* Not a type but a flag to allocate
+ a non-initialized mbuf */
+
/*
* General mbuf allocator statistics structure.
*/
@@ -295,53 +299,19 @@ struct mbstat {
#define MBUF_MEM_NAME "mbuf"
#define MBUF_CLUSTER_MEM_NAME "mbuf_cluster"
#define MBUF_PACKET_MEM_NAME "mbuf_packet"
+#define MBUF_JUMBO9_MEM_NAME "mbuf_jumbo_9k"
+#define MBUF_JUMBO16_MEM_NAME "mbuf_jumbo_16k"
#define MBUF_TAG_MEM_NAME "mbuf_tag"
+#define MBUF_EXTREFCNT_MEM_NAME "mbuf_ext_refcnt"
#ifdef _KERNEL
-/*-
- * mbuf external reference count management macros.
- *
- * MEXT_IS_REF(m): true if (m) is not the only mbuf referencing
- * the external buffer ext_buf.
- *
- * MEXT_REM_REF(m): remove reference to m_ext object.
- *
- * MEXT_ADD_REF(m): add reference to m_ext object already
- * referred to by (m). XXX Note that it is VERY important that you
- * always set the second mbuf's m_ext.ref_cnt to point to the first
- * one's (i.e., n->m_ext.ref_cnt = m->m_ext.ref_cnt) AFTER you run
- * MEXT_ADD_REF(m). This is because m might have a lazy initialized
- * ref_cnt (NULL) before this is run and it will only be looked up
- * from here. We should make MEXT_ADD_REF() always take two mbufs
- * as arguments so that it can take care of this itself.
- */
-#define MEXT_IS_REF(m) (((m)->m_ext.ref_cnt != NULL) \
- && (*((m)->m_ext.ref_cnt) > 1))
-
-#define MEXT_REM_REF(m) do { \
- KASSERT((m)->m_ext.ref_cnt != NULL, ("m_ext refcnt lazy NULL")); \
- KASSERT(*((m)->m_ext.ref_cnt) > 0, ("m_ext refcnt < 0")); \
- atomic_subtract_int((m)->m_ext.ref_cnt, 1); \
-} while(0)
-
-#define MEXT_ADD_REF(m) do { \
- if ((m)->m_ext.ref_cnt == NULL) { \
- KASSERT((m)->m_ext.ext_type == EXT_CLUSTER || \
- (m)->m_ext.ext_type == EXT_PACKET, \
- ("Unexpected mbuf type has lazy refcnt")); \
- (m)->m_ext.ref_cnt = (u_int *)uma_find_refcnt( \
- zone_clust, (m)->m_ext.ext_buf); \
- *((m)->m_ext.ref_cnt) = 2; \
- } else \
- atomic_add_int((m)->m_ext.ref_cnt, 1); \
-} while (0)
#ifdef WITNESS
#define MBUF_CHECKSLEEP(how) do { \
if (how == M_WAITOK) \
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, \
"Sleeping in \"%s\"", __func__); \
-} while(0)
+} while (0)
#else
#define MBUF_CHECKSLEEP(how)
#endif
@@ -355,6 +325,9 @@ struct mbstat {
extern uma_zone_t zone_mbuf;
extern uma_zone_t zone_clust;
extern uma_zone_t zone_pack;
+extern uma_zone_t zone_jumbo9;
+extern uma_zone_t zone_jumbo16;
+extern uma_zone_t zone_ext_refcnt;
static __inline struct mbuf *m_get(int how, short type);
static __inline struct mbuf *m_gethdr(int how, short type);
@@ -420,9 +393,6 @@ m_free(struct mbuf *m)
{
struct mbuf *n = m->m_next;
-#ifdef INVARIANTS
- m->m_flags |= M_FREELIST;
-#endif
if (m->m_flags & M_EXT)
mb_free_ext(m);
else
@@ -434,7 +404,8 @@ static __inline
void
m_clget(struct mbuf *m, int how)
{
-
+ if (m->m_flags & M_EXT)
+ printf("%s: %p mbuf already has cluster\n", __func__, m);
m->m_ext.ext_buf = NULL;
uma_zalloc_arg(zone_clust, m, how);
}
@@ -463,8 +434,9 @@ m_chtype(struct mbuf *m, short new_type)
* can be both the local data payload, or an external buffer area,
* depending on whether M_EXT is set).
*/
-#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && (!((m)->m_flags \
- & M_EXT) || !MEXT_IS_REF(m)))
+#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \
+ (!(((m)->m_flags & M_EXT)) || \
+ (*((m)->m_ext.ref_cnt) == 1)) ) \
/* Check if the supplied mbuf has a packet header, or else panic. */
#define M_ASSERTPKTHDR(m) \
@@ -472,8 +444,9 @@ m_chtype(struct mbuf *m, short new_type)
("%s: no mbuf packet header!", __func__))
/* Ensure that the supplied mbuf is a valid, non-free mbuf. */
+/* XXX: Broken at the moment. Need some UMA magic to make it work again. */
#define M_ASSERTVALID(m) \
- KASSERT((((struct mbuf *)m)->m_flags & M_FREELIST) == 0, \
+ KASSERT((((struct mbuf *)m)->m_flags & 0) == 0, \
("%s: attempted use of a free mbuf!", __func__))
/*
diff --git a/sys/sys/param.h b/sys/sys/param.h
index 8bda6ae..3116108 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -147,6 +147,9 @@
#define MCLBYTES (1 << MCLSHIFT) /* size of an mbuf cluster */
+#define MJUM9BYTES (9 * 1024) /* jumbo frame 9k */
+#define MJUM16BYTES (16 * 1024) /* jumbo frame 16k */
+
/*
* Some macros for units conversion
*/
OpenPOWER on IntegriCloud