summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
authorsam <sam@FreeBSD.org>2003-10-28 05:47:40 +0000
committersam <sam@FreeBSD.org>2003-10-28 05:47:40 +0000
commit39ba2e1c90c5d5fc0d01568719c540b15001528d (patch)
treeec5f03665816c252b1006ac692945285f1783b9b /sys/kern
parenta68a195ad4d4f0bbdd17a20f3ce13c36224a00d7 (diff)
downloadFreeBSD-src-39ba2e1c90c5d5fc0d01568719c540b15001528d.zip
FreeBSD-src-39ba2e1c90c5d5fc0d01568719c540b15001528d.tar.gz
speedup stream socket recv handling by tracking the tail of
the mbuf chain instead of walking the list for each append Submitted by: ps/jayanth Obtained from: netbsd (jason thorpe)
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/uipc_sockbuf.c162
-rw-r--r--sys/kern/uipc_socket.c55
-rw-r--r--sys/kern/uipc_socket2.c162
3 files changed, 338 insertions, 41 deletions
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
index 665b672..d64198d 100644
--- a/sys/kern/uipc_sockbuf.c
+++ b/sys/kern/uipc_sockbuf.c
@@ -468,6 +468,60 @@ sbrelease(sb, so)
* or sbdroprecord() when the data is acknowledged by the peer.
*/
+#ifdef SOCKBUF_DEBUG
+void
+sblastrecordchk(struct sockbuf *sb, const char *file, int line)
+{
+ struct mbuf *m = sb->sb_mb;
+
+ while (m && m->m_nextpkt)
+ m = m->m_nextpkt;
+
+ if (m != sb->sb_lastrecord) {
+ printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
+ __func__, sb->sb_mb, sb->sb_lastrecord, m);
+ printf("packet chain:\n");
+ for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
+ printf("\t%p\n", m);
+ panic("%s from %s:%u", __func__, file, line);
+ }
+}
+
+void
+sblastmbufchk(struct sockbuf *sb, const char *file, int line)
+{
+ struct mbuf *m = sb->sb_mb;
+ struct mbuf *n;
+
+ while (m && m->m_nextpkt)
+ m = m->m_nextpkt;
+
+ while (m && m->m_next)
+ m = m->m_next;
+
+ if (m != sb->sb_mbtail) {
+ printf("%s: sb_mb %p sb_mbtail %p last %p\n",
+ __func__, sb->sb_mb, sb->sb_mbtail, m);
+ printf("packet tree:\n");
+ for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
+ printf("\t");
+ for (n = m; n != NULL; n = n->m_next)
+ printf("%p ", n);
+ printf("\n");
+ }
+ panic("%s from %s:%u", __func__, file, line);
+ }
+}
+#endif /* SOCKBUF_DEBUG */
+
+#define SBLINKRECORD(sb, m0) do { \
+ if ((sb)->sb_lastrecord != NULL) \
+ (sb)->sb_lastrecord->m_nextpkt = (m0); \
+ else \
+ (sb)->sb_mb = (m0); \
+ (sb)->sb_lastrecord = (m0); \
+} while (/*CONSTCOND*/0)
+
/*
* Append mbuf chain m to the last record in the
* socket buffer sb. The additional space associated
@@ -483,6 +537,7 @@ sbappend(sb, m)
if (m == 0)
return;
+ SBLASTRECORDCHK(sb);
n = sb->sb_mb;
if (n) {
while (n->m_nextpkt)
@@ -493,8 +548,53 @@ sbappend(sb, m)
return;
}
} while (n->m_next && (n = n->m_next));
+ } else {
+ /*
+ * XXX Would like to simply use sb_mbtail here, but
+ * XXX I need to verify that I won't miss an EOR that
+ * XXX way.
+ */
+ if ((n = sb->sb_lastrecord) != NULL) {
+ do {
+ if (n->m_flags & M_EOR) {
+ sbappendrecord(sb, m); /* XXXXXX!!!! */
+ return;
+ }
+ } while (n->m_next && (n = n->m_next));
+ } else {
+ /*
+ * If this is the first record in the socket buffer,
+ * it's also the last record.
+ */
+ sb->sb_lastrecord = m;
+ }
}
sbcompress(sb, m, n);
+ SBLASTRECORDCHK(sb);
+}
+
+/*
+ * This version of sbappend() should only be used when the caller
+ * absolutely knows that there will never be more than one record
+ * in the socket buffer, that is, a stream protocol (such as TCP).
+ */
+void
+sbappendstream(struct sockbuf *sb, struct mbuf *m)
+{
+
+ KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
+ KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
+
+ SBLASTMBUFCHK(sb);
+
+#ifdef MBUFTRACE
+ m_claim(m, sb->sb_mowner);
+#endif
+
+ sbcompress(sb, m, sb->sb_mbtail);
+
+ sb->sb_lastrecord = sb->sb_mb;
+ SBLASTRECORDCHK(sb);
}
#ifdef SOCKBUF_DEBUG
@@ -516,7 +616,7 @@ sbcheck(sb)
}
}
if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
- printf("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
+ printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc,
mbcnt, sb->sb_mbcnt);
panic("sbcheck");
}
@@ -545,6 +645,8 @@ sbappendrecord(sb, m0)
* Note this permits zero length records.
*/
sballoc(sb, m0);
+ SBLASTRECORDCHK(sb);
+ SBLINKRECORD(sb, m0);
if (m)
m->m_nextpkt = m0;
else
@@ -616,7 +718,7 @@ sbappendaddr(sb, asa, m0, control)
struct sockaddr *asa;
struct mbuf *m0, *control;
{
- struct mbuf *m, *n;
+ struct mbuf *m, *n, *nlast;
int space = asa->sa_len;
if (m0 && (m0->m_flags & M_PKTHDR) == 0)
@@ -640,15 +742,16 @@ sbappendaddr(sb, asa, m0, control)
else
control = m0;
m->m_next = control;
- for (n = m; n; n = n->m_next)
+ for (n = m; n->m_next != NULL; n = n->m_next)
sballoc(sb, n);
- n = sb->sb_mb;
- if (n) {
- while (n->m_nextpkt)
- n = n->m_nextpkt;
- n->m_nextpkt = m;
- } else
- sb->sb_mb = m;
+ sballoc(sb, n);
+ nlast = n;
+ SBLINKRECORD(sb, m);
+
+ sb->sb_mbtail = nlast;
+ SBLASTMBUFCHK(sb);
+
+ SBLASTRECORDCHK(sb);
return (1);
}
@@ -657,7 +760,7 @@ sbappendcontrol(sb, m0, control)
struct sockbuf *sb;
struct mbuf *control, *m0;
{
- struct mbuf *m, *n;
+ struct mbuf *m, *n, *mlast;
int space;
if (control == 0)
@@ -666,15 +769,19 @@ sbappendcontrol(sb, m0, control)
if (space > sbspace(sb))
return (0);
n->m_next = m0; /* concatenate data to control */
- for (m = control; m; m = m->m_next)
+
+ SBLASTRECORDCHK(sb);
+
+ for (m = control; m->m_next; m = m->m_next)
sballoc(sb, m);
- n = sb->sb_mb;
- if (n) {
- while (n->m_nextpkt)
- n = n->m_nextpkt;
- n->m_nextpkt = control;
- } else
- sb->sb_mb = control;
+ sballoc(sb, m);
+ mlast = m;
+ SBLINKRECORD(sb, control);
+
+ sb->sb_mbtail = mlast;
+ SBLASTMBUFCHK(sb);
+
+ SBLASTRECORDCHK(sb);
return (1);
}
@@ -697,6 +804,8 @@ sbcompress(sb, m, n)
(eor == 0 ||
(((o = m->m_next) || (o = n)) &&
o->m_type == m->m_type))) {
+ if (sb->sb_lastrecord == m)
+ sb->sb_lastrecord = m->m_next;
m = m_free(m);
continue;
}
@@ -720,6 +829,7 @@ sbcompress(sb, m, n)
n->m_next = m;
else
sb->sb_mb = m;
+ sb->sb_mbtail = m;
sballoc(sb, m);
n = m;
m->m_flags &= ~M_EOR;
@@ -732,6 +842,7 @@ sbcompress(sb, m, n)
else
printf("semi-panic: sbcompress\n");
}
+ SBLASTMBUFCHK(sb);
}
/*
@@ -800,6 +911,18 @@ sbdrop(sb, len)
m->m_nextpkt = next;
} else
sb->sb_mb = next;
+ /*
+ * First part is an inline SB_EMPTY_FIXUP(). Second part
+ * makes sure sb_lastrecord is up-to-date if we dropped
+ * part of the last record.
+ */
+ m = sb->sb_mb;
+ if (m == NULL) {
+ sb->sb_mbtail = NULL;
+ sb->sb_lastrecord = NULL;
+ } else if (m->m_nextpkt == NULL) {
+ sb->sb_lastrecord = m;
+ }
}
/*
@@ -820,6 +943,7 @@ sbdroprecord(sb)
m = m_free(m);
} while (m);
}
+ SB_EMPTY_FIXUP(sb);
}
/*
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 7d12c8f..4d4eb63 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -888,6 +888,8 @@ restart:
error = EWOULDBLOCK;
goto release;
}
+ SBLASTRECORDCHK(&so->so_rcv);
+ SBLASTMBUFCHK(&so->so_rcv);
sbunlock(&so->so_rcv);
error = sbwait(&so->so_rcv);
splx(s);
@@ -898,6 +900,8 @@ restart:
dontblock:
if (uio->uio_td)
uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++;
+ SBLASTRECORDCHK(&so->so_rcv);
+ SBLASTMBUFCHK(&so->so_rcv);
nextrecord = m->m_nextpkt;
if (pr->pr_flags & PR_ADDR) {
KASSERT(m->m_type == MT_SONAME,
@@ -939,12 +943,32 @@ dontblock:
}
}
if (m) {
- if ((flags & MSG_PEEK) == 0)
+ if ((flags & MSG_PEEK) == 0) {
m->m_nextpkt = nextrecord;
+ /*
+ * If nextrecord == NULL (this is a single chain),
+ * then sb_lastrecord may not be valid here if m
+ * was changed earlier.
+ */
+ if (nextrecord == NULL) {
+ KASSERT(so->so_rcv.sb_mb == m,
+ ("receive tailq 1"));
+ so->so_rcv.sb_lastrecord = m;
+ }
+ }
type = m->m_type;
if (type == MT_OOBDATA)
flags |= MSG_OOB;
+ } else {
+ if ((flags & MSG_PEEK) == 0) {
+ KASSERT(so->so_rcv.sb_mb == m,("receive tailq 2"));
+ so->so_rcv.sb_mb = nextrecord;
+ SB_EMPTY_FIXUP(&so->so_rcv);
+ }
}
+ SBLASTRECORDCHK(&so->so_rcv);
+ SBLASTMBUFCHK(&so->so_rcv);
+
moff = 0;
offset = 0;
while (m && uio->uio_resid > 0 && error == 0) {
@@ -971,6 +995,8 @@ dontblock:
* block interrupts again.
*/
if (mp == 0) {
+ SBLASTRECORDCHK(&so->so_rcv);
+ SBLASTMBUFCHK(&so->so_rcv);
splx(s);
#ifdef ZERO_COPY_SOCKETS
if (so_zero_copy_receive) {
@@ -1018,8 +1044,16 @@ dontblock:
so->so_rcv.sb_mb = m_free(m);
m = so->so_rcv.sb_mb;
}
- if (m)
+ if (m) {
m->m_nextpkt = nextrecord;
+ if (nextrecord == NULL)
+ so->so_rcv.sb_lastrecord = m;
+ } else {
+ so->so_rcv.sb_mb = nextrecord;
+ SB_EMPTY_FIXUP(&so->so_rcv);
+ }
+ SBLASTRECORDCHK(&so->so_rcv);
+ SBLASTMBUFCHK(&so->so_rcv);
}
} else {
if (flags & MSG_PEEK)
@@ -1064,6 +1098,8 @@ dontblock:
*/
if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
(*pr->pr_usrreqs->pru_rcvd)(so, flags);
+ SBLASTRECORDCHK(&so->so_rcv);
+ SBLASTMBUFCHK(&so->so_rcv);
error = sbwait(&so->so_rcv);
if (error) {
sbunlock(&so->so_rcv);
@@ -1082,8 +1118,21 @@ dontblock:
(void) sbdroprecord(&so->so_rcv);
}
if ((flags & MSG_PEEK) == 0) {
- if (m == 0)
+ if (m == 0) {
+ /*
+ * First part is an inline SB_EMPTY_FIXUP(). Second
+ * part makes sure sb_lastrecord is up-to-date if
+ * there is still data in the socket buffer.
+ */
so->so_rcv.sb_mb = nextrecord;
+ if (so->so_rcv.sb_mb == NULL) {
+ so->so_rcv.sb_mbtail = NULL;
+ so->so_rcv.sb_lastrecord = NULL;
+ } else if (nextrecord->m_nextpkt == NULL)
+ so->so_rcv.sb_lastrecord = nextrecord;
+ }
+ SBLASTRECORDCHK(&so->so_rcv);
+ SBLASTMBUFCHK(&so->so_rcv);
if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
(*pr->pr_usrreqs->pru_rcvd)(so, flags);
}
diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c
index 665b672..d64198d 100644
--- a/sys/kern/uipc_socket2.c
+++ b/sys/kern/uipc_socket2.c
@@ -468,6 +468,60 @@ sbrelease(sb, so)
* or sbdroprecord() when the data is acknowledged by the peer.
*/
+#ifdef SOCKBUF_DEBUG
+void
+sblastrecordchk(struct sockbuf *sb, const char *file, int line)
+{
+ struct mbuf *m = sb->sb_mb;
+
+ while (m && m->m_nextpkt)
+ m = m->m_nextpkt;
+
+ if (m != sb->sb_lastrecord) {
+ printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
+ __func__, sb->sb_mb, sb->sb_lastrecord, m);
+ printf("packet chain:\n");
+ for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
+ printf("\t%p\n", m);
+ panic("%s from %s:%u", __func__, file, line);
+ }
+}
+
+void
+sblastmbufchk(struct sockbuf *sb, const char *file, int line)
+{
+ struct mbuf *m = sb->sb_mb;
+ struct mbuf *n;
+
+ while (m && m->m_nextpkt)
+ m = m->m_nextpkt;
+
+ while (m && m->m_next)
+ m = m->m_next;
+
+ if (m != sb->sb_mbtail) {
+ printf("%s: sb_mb %p sb_mbtail %p last %p\n",
+ __func__, sb->sb_mb, sb->sb_mbtail, m);
+ printf("packet tree:\n");
+ for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
+ printf("\t");
+ for (n = m; n != NULL; n = n->m_next)
+ printf("%p ", n);
+ printf("\n");
+ }
+ panic("%s from %s:%u", __func__, file, line);
+ }
+}
+#endif /* SOCKBUF_DEBUG */
+
+#define SBLINKRECORD(sb, m0) do { \
+ if ((sb)->sb_lastrecord != NULL) \
+ (sb)->sb_lastrecord->m_nextpkt = (m0); \
+ else \
+ (sb)->sb_mb = (m0); \
+ (sb)->sb_lastrecord = (m0); \
+} while (/*CONSTCOND*/0)
+
/*
* Append mbuf chain m to the last record in the
* socket buffer sb. The additional space associated
@@ -483,6 +537,7 @@ sbappend(sb, m)
if (m == 0)
return;
+ SBLASTRECORDCHK(sb);
n = sb->sb_mb;
if (n) {
while (n->m_nextpkt)
@@ -493,8 +548,53 @@ sbappend(sb, m)
return;
}
} while (n->m_next && (n = n->m_next));
+ } else {
+ /*
+ * XXX Would like to simply use sb_mbtail here, but
+ * XXX I need to verify that I won't miss an EOR that
+ * XXX way.
+ */
+ if ((n = sb->sb_lastrecord) != NULL) {
+ do {
+ if (n->m_flags & M_EOR) {
+ sbappendrecord(sb, m); /* XXXXXX!!!! */
+ return;
+ }
+ } while (n->m_next && (n = n->m_next));
+ } else {
+ /*
+ * If this is the first record in the socket buffer,
+ * it's also the last record.
+ */
+ sb->sb_lastrecord = m;
+ }
}
sbcompress(sb, m, n);
+ SBLASTRECORDCHK(sb);
+}
+
+/*
+ * This version of sbappend() should only be used when the caller
+ * absolutely knows that there will never be more than one record
+ * in the socket buffer, that is, a stream protocol (such as TCP).
+ */
+void
+sbappendstream(struct sockbuf *sb, struct mbuf *m)
+{
+
+ KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
+ KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
+
+ SBLASTMBUFCHK(sb);
+
+#ifdef MBUFTRACE
+ m_claim(m, sb->sb_mowner);
+#endif
+
+ sbcompress(sb, m, sb->sb_mbtail);
+
+ sb->sb_lastrecord = sb->sb_mb;
+ SBLASTRECORDCHK(sb);
}
#ifdef SOCKBUF_DEBUG
@@ -516,7 +616,7 @@ sbcheck(sb)
}
}
if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
- printf("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
+ printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc,
mbcnt, sb->sb_mbcnt);
panic("sbcheck");
}
@@ -545,6 +645,8 @@ sbappendrecord(sb, m0)
* Note this permits zero length records.
*/
sballoc(sb, m0);
+ SBLASTRECORDCHK(sb);
+ SBLINKRECORD(sb, m0);
if (m)
m->m_nextpkt = m0;
else
@@ -616,7 +718,7 @@ sbappendaddr(sb, asa, m0, control)
struct sockaddr *asa;
struct mbuf *m0, *control;
{
- struct mbuf *m, *n;
+ struct mbuf *m, *n, *nlast;
int space = asa->sa_len;
if (m0 && (m0->m_flags & M_PKTHDR) == 0)
@@ -640,15 +742,16 @@ sbappendaddr(sb, asa, m0, control)
else
control = m0;
m->m_next = control;
- for (n = m; n; n = n->m_next)
+ for (n = m; n->m_next != NULL; n = n->m_next)
sballoc(sb, n);
- n = sb->sb_mb;
- if (n) {
- while (n->m_nextpkt)
- n = n->m_nextpkt;
- n->m_nextpkt = m;
- } else
- sb->sb_mb = m;
+ sballoc(sb, n);
+ nlast = n;
+ SBLINKRECORD(sb, m);
+
+ sb->sb_mbtail = nlast;
+ SBLASTMBUFCHK(sb);
+
+ SBLASTRECORDCHK(sb);
return (1);
}
@@ -657,7 +760,7 @@ sbappendcontrol(sb, m0, control)
struct sockbuf *sb;
struct mbuf *control, *m0;
{
- struct mbuf *m, *n;
+ struct mbuf *m, *n, *mlast;
int space;
if (control == 0)
@@ -666,15 +769,19 @@ sbappendcontrol(sb, m0, control)
if (space > sbspace(sb))
return (0);
n->m_next = m0; /* concatenate data to control */
- for (m = control; m; m = m->m_next)
+
+ SBLASTRECORDCHK(sb);
+
+ for (m = control; m->m_next; m = m->m_next)
sballoc(sb, m);
- n = sb->sb_mb;
- if (n) {
- while (n->m_nextpkt)
- n = n->m_nextpkt;
- n->m_nextpkt = control;
- } else
- sb->sb_mb = control;
+ sballoc(sb, m);
+ mlast = m;
+ SBLINKRECORD(sb, control);
+
+ sb->sb_mbtail = mlast;
+ SBLASTMBUFCHK(sb);
+
+ SBLASTRECORDCHK(sb);
return (1);
}
@@ -697,6 +804,8 @@ sbcompress(sb, m, n)
(eor == 0 ||
(((o = m->m_next) || (o = n)) &&
o->m_type == m->m_type))) {
+ if (sb->sb_lastrecord == m)
+ sb->sb_lastrecord = m->m_next;
m = m_free(m);
continue;
}
@@ -720,6 +829,7 @@ sbcompress(sb, m, n)
n->m_next = m;
else
sb->sb_mb = m;
+ sb->sb_mbtail = m;
sballoc(sb, m);
n = m;
m->m_flags &= ~M_EOR;
@@ -732,6 +842,7 @@ sbcompress(sb, m, n)
else
printf("semi-panic: sbcompress\n");
}
+ SBLASTMBUFCHK(sb);
}
/*
@@ -800,6 +911,18 @@ sbdrop(sb, len)
m->m_nextpkt = next;
} else
sb->sb_mb = next;
+ /*
+ * First part is an inline SB_EMPTY_FIXUP(). Second part
+ * makes sure sb_lastrecord is up-to-date if we dropped
+ * part of the last record.
+ */
+ m = sb->sb_mb;
+ if (m == NULL) {
+ sb->sb_mbtail = NULL;
+ sb->sb_lastrecord = NULL;
+ } else if (m->m_nextpkt == NULL) {
+ sb->sb_lastrecord = m;
+ }
}
/*
@@ -820,6 +943,7 @@ sbdroprecord(sb)
m = m_free(m);
} while (m);
}
+ SB_EMPTY_FIXUP(sb);
}
/*
OpenPOWER on IntegriCloud