summaryrefslogtreecommitdiffstats
path: root/sys/kern/uipc_mbuf.c
diff options
context:
space:
mode:
authorglebius <glebius@FreeBSD.org>2014-07-11 19:40:50 +0000
committerglebius <glebius@FreeBSD.org>2014-07-11 19:40:50 +0000
commit1b591cdaa850f86c2091fced7ba790ac0bbef35a (patch)
treed39235ea546a9d1310ed21dec1dc4edd42980c35 /sys/kern/uipc_mbuf.c
parentd1d1e8d008a8740852b8d9f8e4db7db45c6e368c (diff)
downloadFreeBSD-src-1b591cdaa850f86c2091fced7ba790ac0bbef35a.zip
FreeBSD-src-1b591cdaa850f86c2091fced7ba790ac0bbef35a.tar.gz
Improve reference counting of EXT_SFBUF pages attached to mbufs.
o Do not use UMA refcount zone. The problem with this zone is that several refcounting words (16 on amd64) share the same cache line, and issueing atomic(9) updates on them creates cache line contention. Also, allocating and freeing them is extra CPU cycles. Instead, refcount the page directly via vm_page_wire() and the sfbuf via sf_buf_alloc(sf_buf_page(sf)) [1]. o Call refcounting/freeing function for EXT_SFBUF via direct function call, instead of function pointer. This removes barrier for CPU branch predictor. o Do not cleanup the mbuf to be freed in mb_free_ext(), merely to satisfy assertion in mb_dtor_mbuf(). Remove the assertion from mb_dtor_mbuf(). Use bcopy() instead of manual assignments to copy m_ext in mb_dupcl(). [1] This has some problems for now. Using sf_buf_alloc() merely to increase refcount is expensive, and is broken on sparc64. To be fixed. Sponsored by: Netflix Sponsored by: Nginx, Inc.
Diffstat (limited to 'sys/kern/uipc_mbuf.c')
-rw-r--r--sys/kern/uipc_mbuf.c79
1 files changed, 39 insertions, 40 deletions
diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c
index 8a56227..36432e0 100644
--- a/sys/kern/uipc_mbuf.c
+++ b/sys/kern/uipc_mbuf.c
@@ -287,19 +287,31 @@ m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
void
mb_free_ext(struct mbuf *m)
{
- int skipmbuf;
+ int freembuf;
- KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
- KASSERT(m->m_ext.ext_cnt != NULL, ("%s: ext_cnt not set", __func__));
+ KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m));
/*
- * check if the header is embedded in the cluster
+ * Check if the header is embedded in the cluster.
*/
- skipmbuf = (m->m_flags & M_NOFREE);
+ freembuf = (m->m_flags & M_NOFREE) ? 0 : 1;
+
+ switch (m->m_ext.ext_type) {
+ case EXT_SFBUF:
+ sf_ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2);
+ break;
+ default:
+ KASSERT(m->m_ext.ext_cnt != NULL,
+ ("%s: no refcounting pointer on %p", __func__, m));
+ /*
+ * Free attached storage if this mbuf is the only
+ * reference to it.
+ */
+ if (*(m->m_ext.ext_cnt) != 1) {
+ if (atomic_fetchadd_int(m->m_ext.ext_cnt, -1) != 1)
+ break;
+ }
- /* Free attached storage if this mbuf is the only reference to it. */
- if (*(m->m_ext.ext_cnt) == 1 ||
- atomic_fetchadd_int(m->m_ext.ext_cnt, -1) == 1) {
switch (m->m_ext.ext_type) {
case EXT_PACKET: /* The packet zone is special. */
if (*(m->m_ext.ext_cnt) == 0)
@@ -318,7 +330,6 @@ mb_free_ext(struct mbuf *m)
case EXT_JUMBO16:
uma_zfree(zone_jumbo16, m->m_ext.ext_buf);
break;
- case EXT_SFBUF:
case EXT_NET_DRV:
case EXT_MOD_TYPE:
case EXT_DISPOSABLE:
@@ -337,23 +348,9 @@ mb_free_ext(struct mbuf *m)
("%s: unknown ext_type", __func__));
}
}
- if (skipmbuf)
- return;
- /*
- * Free this mbuf back to the mbuf zone with all m_ext
- * information purged.
- */
- m->m_ext.ext_buf = NULL;
- m->m_ext.ext_free = NULL;
- m->m_ext.ext_arg1 = NULL;
- m->m_ext.ext_arg2 = NULL;
- m->m_ext.ext_cnt = NULL;
- m->m_ext.ext_size = 0;
- m->m_ext.ext_type = 0;
- m->m_ext.ext_flags = 0;
- m->m_flags &= ~M_EXT;
- uma_zfree(zone_mbuf, m);
+ if (freembuf)
+ uma_zfree(zone_mbuf, m);
}
/*
@@ -363,22 +360,24 @@ mb_free_ext(struct mbuf *m)
static void
mb_dupcl(struct mbuf *n, struct mbuf *m)
{
- KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
- KASSERT(m->m_ext.ext_cnt != NULL, ("%s: ext_cnt not set", __func__));
- KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
- if (*(m->m_ext.ext_cnt) == 1)
- *(m->m_ext.ext_cnt) += 1;
- else
- atomic_add_int(m->m_ext.ext_cnt, 1);
- n->m_ext.ext_buf = m->m_ext.ext_buf;
- n->m_ext.ext_free = m->m_ext.ext_free;
- n->m_ext.ext_arg1 = m->m_ext.ext_arg1;
- n->m_ext.ext_arg2 = m->m_ext.ext_arg2;
- n->m_ext.ext_size = m->m_ext.ext_size;
- n->m_ext.ext_cnt = m->m_ext.ext_cnt;
- n->m_ext.ext_type = m->m_ext.ext_type;
- n->m_ext.ext_flags = m->m_ext.ext_flags;
+ KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m));
+ KASSERT(!(n->m_flags & M_EXT), ("%s: M_EXT set on %p", __func__, n));
+
+ switch (m->m_ext.ext_type) {
+ case EXT_SFBUF:
+ sf_ext_ref(m->m_ext.ext_arg1, m->m_ext.ext_arg2);
+ break;
+ default:
+ KASSERT(m->m_ext.ext_cnt != NULL,
+ ("%s: no refcounting pointer on %p", __func__, m));
+ if (*(m->m_ext.ext_cnt) == 1)
+ *(m->m_ext.ext_cnt) += 1;
+ else
+ atomic_add_int(m->m_ext.ext_cnt, 1);
+ }
+
+ bcopy(&m->m_ext, &n->m_ext, sizeof(m->m_ext));
n->m_flags |= M_EXT;
n->m_flags |= m->m_flags & M_RDONLY;
}
OpenPOWER on IntegriCloud