summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--share/man/man9/mbuf.924
-rw-r--r--sys/kern/uipc_mbuf.c153
-rw-r--r--sys/netipsec/ipsec.h1
-rw-r--r--sys/netipsec/ipsec_mbuf.c149
-rw-r--r--sys/netipsec/xform_ah.c2
-rw-r--r--sys/netipsec/xform_esp.c2
-rw-r--r--sys/netipsec/xform_ipcomp.c2
-rw-r--r--sys/sys/mbuf.h1
8 files changed, 180 insertions, 154 deletions
diff --git a/share/man/man9/mbuf.9 b/share/man/man9/mbuf.9
index 1074d85..e80df0d 100644
--- a/share/man/man9/mbuf.9
+++ b/share/man/man9/mbuf.9
@@ -24,7 +24,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd November 18, 2005
+.Dd March 15, 2006
.Dt MBUF 9
.Os
.\"
@@ -132,6 +132,8 @@
.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
.Ft struct mbuf *
.Fn m_defrag "struct mbuf *m0" "int how"
+.Ft struct mbuf *
+.Fn m_unshare "struct mbuf *m0" "int how"
.\"
.Sh DESCRIPTION
An
@@ -886,6 +888,26 @@ depending on the caller's preference.
This function is especially useful in network drivers, where
certain long mbuf chains must be shortened before being added
to TX descriptor lists.
+.It Fn m_unshare m0 how
+Create a version of the specified mbuf chain whose
+contents can be safely modified without affecting other users.
+If allocation fails and this operation can not be completed,
+.Dv NULL
+will be returned.
+The original mbuf chain is always reclaimed and the reference
+count of any shared mbuf clusters is decremented.
+.Fa how
+should be either
+.Dv M_TRYWAIT
+or
+.Dv M_DONTWAIT ,
+depending on the caller's preference.
+As a side-effect of this process the returned
+mbuf chain may be compacted.
+.Pp
+This function is especially useful in the transmit path of
+network code, when data must be encrypted or otherwise
+altered prior to transmission.
.El
.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
This section currently applies to TCP/IP only.
diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c
index 887db00..5c4c5bc 100644
--- a/sys/kern/uipc_mbuf.c
+++ b/sys/kern/uipc_mbuf.c
@@ -1679,3 +1679,156 @@ m_align(struct mbuf *m, int len)
adjust = MLEN - len;
m->m_data += adjust &~ (sizeof(long)-1);
}
+
+/*
+ * Create a writable copy of the mbuf chain. While doing this
+ * we compact the chain with a goal of producing a chain with
+ * at most two mbufs. The second mbuf in this chain is likely
+ * to be a cluster. The primary purpose of this work is to create
+ * a writable packet for encryption, compression, etc. The
+ * secondary goal is to linearize the data so the data can be
+ * passed to crypto hardware in the most efficient manner possible.
+ */
+struct mbuf *
+m_unshare(struct mbuf *m0, int how)
+{
+ struct mbuf *m, *mprev;
+ struct mbuf *n, *mfirst, *mlast;
+ int len, off;
+
+ mprev = NULL;
+ for (m = m0; m != NULL; m = mprev->m_next) {
+ /*
+ * Regular mbufs are ignored unless there's a cluster
+ * in front of it that we can use to coalesce. We do
+ * the latter mainly so later clusters can be coalesced
+ * also w/o having to handle them specially (i.e. convert
+ * mbuf+cluster -> cluster). This optimization is heavily
+ * influenced by the assumption that we're running over
+ * Ethernet where MCLBYTES is large enough that the max
+ * packet size will permit lots of coalescing into a
+ * single cluster. This in turn permits efficient
+ * crypto operations, especially when using hardware.
+ */
+ if ((m->m_flags & M_EXT) == 0) {
+ if (mprev && (mprev->m_flags & M_EXT) &&
+ m->m_len <= M_TRAILINGSPACE(mprev)) {
+ /* XXX: this ignores mbuf types */
+ memcpy(mtod(mprev, caddr_t) + mprev->m_len,
+ mtod(m, caddr_t), m->m_len);
+ mprev->m_len += m->m_len;
+ mprev->m_next = m->m_next; /* unlink from chain */
+ m_free(m); /* reclaim mbuf */
+#if 0
+ newipsecstat.ips_mbcoalesced++;
+#endif
+ } else {
+ mprev = m;
+ }
+ continue;
+ }
+ /*
+ * Writable mbufs are left alone (for now).
+ */
+ if (M_WRITABLE(m)) {
+ mprev = m;
+ continue;
+ }
+
+ /*
+ * Not writable, replace with a copy or coalesce with
+ * the previous mbuf if possible (since we have to copy
+ * it anyway, we try to reduce the number of mbufs and
+ * clusters so that future work is easier).
+ */
+ KASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags));
+ /* NB: we only coalesce into a cluster or larger */
+ if (mprev != NULL && (mprev->m_flags & M_EXT) &&
+ m->m_len <= M_TRAILINGSPACE(mprev)) {
+ /* XXX: this ignores mbuf types */
+ memcpy(mtod(mprev, caddr_t) + mprev->m_len,
+ mtod(m, caddr_t), m->m_len);
+ mprev->m_len += m->m_len;
+ mprev->m_next = m->m_next; /* unlink from chain */
+ m_free(m); /* reclaim mbuf */
+#if 0
+ newipsecstat.ips_clcoalesced++;
+#endif
+ continue;
+ }
+
+ /*
+ * Allocate new space to hold the copy...
+ */
+ /* XXX why can M_PKTHDR be set past the first mbuf? */
+ if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
+ /*
+ * NB: if a packet header is present we must
+ * allocate the mbuf separately from any cluster
+ * because M_MOVE_PKTHDR will smash the data
+ * pointer and drop the M_EXT marker.
+ */
+ MGETHDR(n, how, m->m_type);
+ if (n == NULL) {
+ m_freem(m0);
+ return (NULL);
+ }
+ M_MOVE_PKTHDR(n, m);
+ MCLGET(n, how);
+ if ((n->m_flags & M_EXT) == 0) {
+ m_free(n);
+ m_freem(m0);
+ return (NULL);
+ }
+ } else {
+ n = m_getcl(how, m->m_type, m->m_flags);
+ if (n == NULL) {
+ m_freem(m0);
+ return (NULL);
+ }
+ }
+ /*
+ * ... and copy the data. We deal with jumbo mbufs
+ * (i.e. m_len > MCLBYTES) by splitting them into
+ * clusters. We could just malloc a buffer and make
+ * it external but too many device drivers don't know
+ * how to break up the non-contiguous memory when
+ * doing DMA.
+ */
+ len = m->m_len;
+ off = 0;
+ mfirst = n;
+ mlast = NULL;
+ for (;;) {
+ int cc = min(len, MCLBYTES);
+ memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
+ n->m_len = cc;
+ if (mlast != NULL)
+ mlast->m_next = n;
+ mlast = n;
+#if 0
+ newipsecstat.ips_clcopied++;
+#endif
+
+ len -= cc;
+ if (len <= 0)
+ break;
+ off += cc;
+
+ n = m_getcl(how, m->m_type, m->m_flags);
+ if (n == NULL) {
+ m_freem(mfirst);
+ m_freem(m0);
+ return (NULL);
+ }
+ }
+ n->m_next = m->m_next;
+ if (mprev == NULL)
+ m0 = mfirst; /* new head of chain */
+ else
+ mprev->m_next = mfirst; /* replace old mbuf */
+ m_free(m); /* release old mbuf */
+ mprev = mfirst;
+ }
+ return (m0);
+}
diff --git a/sys/netipsec/ipsec.h b/sys/netipsec/ipsec.h
index e08b94a..8fd1068 100644
--- a/sys/netipsec/ipsec.h
+++ b/sys/netipsec/ipsec.h
@@ -410,7 +410,6 @@ extern struct mbuf *ipsec_copypkt __P((struct mbuf *));
extern void m_checkalignment(const char* where, struct mbuf *m0,
int off, int len);
-extern struct mbuf *m_clone(struct mbuf *m0);
extern struct mbuf *m_makespace(struct mbuf *m0, int skip, int hlen, int *off);
extern caddr_t m_pad(struct mbuf *m, int n);
extern int m_striphdr(struct mbuf *m, int skip, int hlen);
diff --git a/sys/netipsec/ipsec_mbuf.c b/sys/netipsec/ipsec_mbuf.c
index ade7d33..b63a5af 100644
--- a/sys/netipsec/ipsec_mbuf.c
+++ b/sys/netipsec/ipsec_mbuf.c
@@ -43,155 +43,6 @@
#include <netipsec/ipsec.h>
/*
- * Create a writable copy of the mbuf chain. While doing this
- * we compact the chain with a goal of producing a chain with
- * at most two mbufs. The second mbuf in this chain is likely
- * to be a cluster. The primary purpose of this work is to create
- * a writable packet for encryption, compression, etc. The
- * secondary goal is to linearize the data so the data can be
- * passed to crypto hardware in the most efficient manner possible.
- */
-struct mbuf *
-m_clone(struct mbuf *m0)
-{
- struct mbuf *m, *mprev;
- struct mbuf *n, *mfirst, *mlast;
- int len, off;
-
- IPSEC_ASSERT(m0 != NULL, ("null mbuf"));
-
- mprev = NULL;
- for (m = m0; m != NULL; m = mprev->m_next) {
- /*
- * Regular mbufs are ignored unless there's a cluster
- * in front of it that we can use to coalesce. We do
- * the latter mainly so later clusters can be coalesced
- * also w/o having to handle them specially (i.e. convert
- * mbuf+cluster -> cluster). This optimization is heavily
- * influenced by the assumption that we're running over
- * Ethernet where MCLBYTES is large enough that the max
- * packet size will permit lots of coalescing into a
- * single cluster. This in turn permits efficient
- * crypto operations, especially when using hardware.
- */
- if ((m->m_flags & M_EXT) == 0) {
- if (mprev && (mprev->m_flags & M_EXT) &&
- m->m_len <= M_TRAILINGSPACE(mprev)) {
- /* XXX: this ignores mbuf types */
- memcpy(mtod(mprev, caddr_t) + mprev->m_len,
- mtod(m, caddr_t), m->m_len);
- mprev->m_len += m->m_len;
- mprev->m_next = m->m_next; /* unlink from chain */
- m_free(m); /* reclaim mbuf */
- newipsecstat.ips_mbcoalesced++;
- } else {
- mprev = m;
- }
- continue;
- }
- /*
- * Writable mbufs are left alone (for now).
- */
- if (M_WRITABLE(m)) {
- mprev = m;
- continue;
- }
-
- /*
- * Not writable, replace with a copy or coalesce with
- * the previous mbuf if possible (since we have to copy
- * it anyway, we try to reduce the number of mbufs and
- * clusters so that future work is easier).
- */
- IPSEC_ASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags));
- /* NB: we only coalesce into a cluster or larger */
- if (mprev != NULL && (mprev->m_flags & M_EXT) &&
- m->m_len <= M_TRAILINGSPACE(mprev)) {
- /* XXX: this ignores mbuf types */
- memcpy(mtod(mprev, caddr_t) + mprev->m_len,
- mtod(m, caddr_t), m->m_len);
- mprev->m_len += m->m_len;
- mprev->m_next = m->m_next; /* unlink from chain */
- m_free(m); /* reclaim mbuf */
- newipsecstat.ips_clcoalesced++;
- continue;
- }
-
- /*
- * Allocate new space to hold the copy...
- */
- /* XXX why can M_PKTHDR be set past the first mbuf? */
- if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
- /*
- * NB: if a packet header is present we must
- * allocate the mbuf separately from any cluster
- * because M_MOVE_PKTHDR will smash the data
- * pointer and drop the M_EXT marker.
- */
- MGETHDR(n, M_DONTWAIT, m->m_type);
- if (n == NULL) {
- m_freem(m0);
- return (NULL);
- }
- M_MOVE_PKTHDR(n, m);
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- m_free(n);
- m_freem(m0);
- return (NULL);
- }
- } else {
- n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
- if (n == NULL) {
- m_freem(m0);
- return (NULL);
- }
- }
- /*
- * ... and copy the data. We deal with jumbo mbufs
- * (i.e. m_len > MCLBYTES) by splitting them into
- * clusters. We could just malloc a buffer and make
- * it external but too many device drivers don't know
- * how to break up the non-contiguous memory when
- * doing DMA.
- */
- len = m->m_len;
- off = 0;
- mfirst = n;
- mlast = NULL;
- for (;;) {
- int cc = min(len, MCLBYTES);
- memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
- n->m_len = cc;
- if (mlast != NULL)
- mlast->m_next = n;
- mlast = n;
- newipsecstat.ips_clcopied++;
-
- len -= cc;
- if (len <= 0)
- break;
- off += cc;
-
- n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
- if (n == NULL) {
- m_freem(mfirst);
- m_freem(m0);
- return (NULL);
- }
- }
- n->m_next = m->m_next;
- if (mprev == NULL)
- m0 = mfirst; /* new head of chain */
- else
- mprev->m_next = mfirst; /* replace old mbuf */
- m_free(m); /* release old mbuf */
- mprev = mfirst;
- }
- return (m0);
-}
-
-/*
* Make space for a new header of length hlen at skip bytes
* into the packet. When doing this we allocate new mbufs only
* when absolutely necessary. The mbuf where the new header
diff --git a/sys/netipsec/xform_ah.c b/sys/netipsec/xform_ah.c
index 9b830d0..3b1f665 100644
--- a/sys/netipsec/xform_ah.c
+++ b/sys/netipsec/xform_ah.c
@@ -942,7 +942,7 @@ ah_output(
/* Update the counters. */
ahstat.ahs_obytes += m->m_pkthdr.len - skip;
- m = m_clone(m);
+ m = m_unshare(m, M_NOWAIT);
if (m == NULL) {
DPRINTF(("%s: cannot clone mbuf chain, SA %s/%08lx\n", __func__,
ipsec_address(&sav->sah->saidx.dst),
diff --git a/sys/netipsec/xform_esp.c b/sys/netipsec/xform_esp.c
index 7fe303e..fdcee7f 100644
--- a/sys/netipsec/xform_esp.c
+++ b/sys/netipsec/xform_esp.c
@@ -713,7 +713,7 @@ esp_output(
/* Update the counters. */
espstat.esps_obytes += m->m_pkthdr.len - skip;
- m = m_clone(m);
+ m = m_unshare(m, M_NOWAIT);
if (m == NULL) {
DPRINTF(("%s: cannot clone mbuf chain, SA %s/%08lx\n", __func__,
ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi)));
diff --git a/sys/netipsec/xform_ipcomp.c b/sys/netipsec/xform_ipcomp.c
index 90e7489..1ad6c49 100644
--- a/sys/netipsec/xform_ipcomp.c
+++ b/sys/netipsec/xform_ipcomp.c
@@ -385,7 +385,7 @@ ipcomp_output(
/* Update the counters */
ipcompstat.ipcomps_obytes += m->m_pkthdr.len - skip;
- m = m_clone(m);
+ m = m_unshare(m, M_NOWAIT);
if (m == NULL) {
ipcompstat.ipcomps_hdrops++;
DPRINTF(("%s: cannot clone mbuf chain, IPCA %s/%08lx\n",
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index 4fb1ae8..fee3197 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -677,6 +677,7 @@ struct mbuf *m_pullup(struct mbuf *, int);
int m_sanity(struct mbuf *, int);
struct mbuf *m_split(struct mbuf *, int, int);
struct mbuf *m_uiotombuf(struct uio *, int, int, int);
+struct mbuf *m_unshare(struct mbuf *, int how);
/*-
* Network packets may have annotations attached by affixing a list
OpenPOWER on IntegriCloud