diff options
author | sam <sam@FreeBSD.org> | 2003-10-28 05:47:40 +0000 |
---|---|---|
committer | sam <sam@FreeBSD.org> | 2003-10-28 05:47:40 +0000 |
commit | 39ba2e1c90c5d5fc0d01568719c540b15001528d (patch) | |
tree | ec5f03665816c252b1006ac692945285f1783b9b /sys/kern | |
parent | a68a195ad4d4f0bbdd17a20f3ce13c36224a00d7 (diff) | |
download | FreeBSD-src-39ba2e1c90c5d5fc0d01568719c540b15001528d.zip FreeBSD-src-39ba2e1c90c5d5fc0d01568719c540b15001528d.tar.gz |
speedup stream socket recv handling by tracking the tail of
the mbuf chain instead of walking the list for each append
Submitted by: ps/jayanth
Obtained from: netbsd (jason thorpe)
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/uipc_sockbuf.c | 162 | ||||
-rw-r--r-- | sys/kern/uipc_socket.c | 55 | ||||
-rw-r--r-- | sys/kern/uipc_socket2.c | 162 |
3 files changed, 338 insertions, 41 deletions
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c index 665b672..d64198d 100644 --- a/sys/kern/uipc_sockbuf.c +++ b/sys/kern/uipc_sockbuf.c @@ -468,6 +468,60 @@ sbrelease(sb, so) * or sbdroprecord() when the data is acknowledged by the peer. */ +#ifdef SOCKBUF_DEBUG +void +sblastrecordchk(struct sockbuf *sb, const char *file, int line) +{ + struct mbuf *m = sb->sb_mb; + + while (m && m->m_nextpkt) + m = m->m_nextpkt; + + if (m != sb->sb_lastrecord) { + printf("%s: sb_mb %p sb_lastrecord %p last %p\n", + __func__, sb->sb_mb, sb->sb_lastrecord, m); + printf("packet chain:\n"); + for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) + printf("\t%p\n", m); + panic("%s from %s:%u", __func__, file, line); + } +} + +void +sblastmbufchk(struct sockbuf *sb, const char *file, int line) +{ + struct mbuf *m = sb->sb_mb; + struct mbuf *n; + + while (m && m->m_nextpkt) + m = m->m_nextpkt; + + while (m && m->m_next) + m = m->m_next; + + if (m != sb->sb_mbtail) { + printf("%s: sb_mb %p sb_mbtail %p last %p\n", + __func__, sb->sb_mb, sb->sb_mbtail, m); + printf("packet tree:\n"); + for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { + printf("\t"); + for (n = m; n != NULL; n = n->m_next) + printf("%p ", n); + printf("\n"); + } + panic("%s from %s:%u", __func__, file, line); + } +} +#endif /* SOCKBUF_DEBUG */ + +#define SBLINKRECORD(sb, m0) do { \ + if ((sb)->sb_lastrecord != NULL) \ + (sb)->sb_lastrecord->m_nextpkt = (m0); \ + else \ + (sb)->sb_mb = (m0); \ + (sb)->sb_lastrecord = (m0); \ +} while (/*CONSTCOND*/0) + /* * Append mbuf chain m to the last record in the * socket buffer sb. The additional space associated @@ -483,6 +537,7 @@ sbappend(sb, m) if (m == 0) return; + SBLASTRECORDCHK(sb); n = sb->sb_mb; if (n) { while (n->m_nextpkt) @@ -493,8 +548,53 @@ sbappend(sb, m) return; } } while (n->m_next && (n = n->m_next)); + } else { + /* + * XXX Would like to simply use sb_mbtail here, but + * XXX I need to verify that I won't miss an EOR that + * XXX way. + */ + if ((n = sb->sb_lastrecord) != NULL) { + do { + if (n->m_flags & M_EOR) { + sbappendrecord(sb, m); /* XXXXXX!!!! */ + return; + } + } while (n->m_next && (n = n->m_next)); + } else { + /* + * If this is the first record in the socket buffer, + * it's also the last record. + */ + sb->sb_lastrecord = m; + } } sbcompress(sb, m, n); + SBLASTRECORDCHK(sb); +} + +/* + * This version of sbappend() should only be used when the caller + * absolutely knows that there will never be more than one record + * in the socket buffer, that is, a stream protocol (such as TCP). + */ +void +sbappendstream(struct sockbuf *sb, struct mbuf *m) +{ + + KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); + KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); + + SBLASTMBUFCHK(sb); + +#ifdef MBUFTRACE + m_claim(m, sb->sb_mowner); +#endif + + sbcompress(sb, m, sb->sb_mbtail); + + sb->sb_lastrecord = sb->sb_mb; + SBLASTRECORDCHK(sb); } #ifdef SOCKBUF_DEBUG @@ -516,7 +616,7 @@ sbcheck(sb) } } if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { - printf("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc, + printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc, mbcnt, sb->sb_mbcnt); panic("sbcheck"); } @@ -545,6 +645,8 @@ sbappendrecord(sb, m0) * Note this permits zero length records. */ sballoc(sb, m0); + SBLASTRECORDCHK(sb); + SBLINKRECORD(sb, m0); if (m) m->m_nextpkt = m0; else @@ -616,7 +718,7 @@ sbappendaddr(sb, asa, m0, control) struct sockaddr *asa; struct mbuf *m0, *control; { - struct mbuf *m, *n; + struct mbuf *m, *n, *nlast; int space = asa->sa_len; if (m0 && (m0->m_flags & M_PKTHDR) == 0) @@ -640,15 +742,16 @@ sbappendaddr(sb, asa, m0, control) else control = m0; m->m_next = control; - for (n = m; n; n = n->m_next) + for (n = m; n->m_next != NULL; n = n->m_next) sballoc(sb, n); - n = sb->sb_mb; - if (n) { - while (n->m_nextpkt) - n = n->m_nextpkt; - n->m_nextpkt = m; - } else - sb->sb_mb = m; + sballoc(sb, n); + nlast = n; + SBLINKRECORD(sb, m); + + sb->sb_mbtail = nlast; + SBLASTMBUFCHK(sb); + + SBLASTRECORDCHK(sb); return (1); } @@ -657,7 +760,7 @@ sbappendcontrol(sb, m0, control) struct sockbuf *sb; struct mbuf *control, *m0; { - struct mbuf *m, *n; + struct mbuf *m, *n, *mlast; int space; if (control == 0) @@ -666,15 +769,19 @@ sbappendcontrol(sb, m0, control) if (space > sbspace(sb)) return (0); n->m_next = m0; /* concatenate data to control */ - for (m = control; m; m = m->m_next) + + SBLASTRECORDCHK(sb); + + for (m = control; m->m_next; m = m->m_next) sballoc(sb, m); - n = sb->sb_mb; - if (n) { - while (n->m_nextpkt) - n = n->m_nextpkt; - n->m_nextpkt = control; - } else - sb->sb_mb = control; + sballoc(sb, m); + mlast = m; + SBLINKRECORD(sb, control); + + sb->sb_mbtail = mlast; + SBLASTMBUFCHK(sb); + + SBLASTRECORDCHK(sb); return (1); } @@ -697,6 +804,8 @@ sbcompress(sb, m, n) (eor == 0 || (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) { + if (sb->sb_lastrecord == m) + sb->sb_lastrecord = m->m_next; m = m_free(m); continue; } @@ -720,6 +829,7 @@ sbcompress(sb, m, n) n->m_next = m; else sb->sb_mb = m; + sb->sb_mbtail = m; sballoc(sb, m); n = m; m->m_flags &= ~M_EOR; @@ -732,6 +842,7 @@ sbcompress(sb, m, n) else printf("semi-panic: sbcompress\n"); } + SBLASTMBUFCHK(sb); } /* @@ -800,6 +911,18 @@ sbdrop(sb, len) m->m_nextpkt = next; } else sb->sb_mb = next; + /* + * First part is an inline SB_EMPTY_FIXUP(). Second part + * makes sure sb_lastrecord is up-to-date if we dropped + * part of the last record. + */ + m = sb->sb_mb; + if (m == NULL) { + sb->sb_mbtail = NULL; + sb->sb_lastrecord = NULL; + } else if (m->m_nextpkt == NULL) { + sb->sb_lastrecord = m; + } } /* @@ -820,6 +943,7 @@ sbdroprecord(sb) m = m_free(m); } while (m); } + SB_EMPTY_FIXUP(sb); } /* diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 7d12c8f..4d4eb63 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -888,6 +888,8 @@ restart: error = EWOULDBLOCK; goto release; } + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); sbunlock(&so->so_rcv); error = sbwait(&so->so_rcv); splx(s); @@ -898,6 +900,8 @@ restart: dontblock: if (uio->uio_td) uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++; + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); nextrecord = m->m_nextpkt; if (pr->pr_flags & PR_ADDR) { KASSERT(m->m_type == MT_SONAME, @@ -939,12 +943,32 @@ dontblock: } } if (m) { - if ((flags & MSG_PEEK) == 0) + if ((flags & MSG_PEEK) == 0) { m->m_nextpkt = nextrecord; + /* + * If nextrecord == NULL (this is a single chain), + * then sb_lastrecord may not be valid here if m + * was changed earlier. + */ + if (nextrecord == NULL) { + KASSERT(so->so_rcv.sb_mb == m, + ("receive tailq 1")); + so->so_rcv.sb_lastrecord = m; + } + } type = m->m_type; if (type == MT_OOBDATA) flags |= MSG_OOB; + } else { + if ((flags & MSG_PEEK) == 0) { + KASSERT(so->so_rcv.sb_mb == m,("receive tailq 2")); + so->so_rcv.sb_mb = nextrecord; + SB_EMPTY_FIXUP(&so->so_rcv); + } } + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); + moff = 0; offset = 0; while (m && uio->uio_resid > 0 && error == 0) { @@ -971,6 +995,8 @@ dontblock: * block interrupts again. */ if (mp == 0) { + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); splx(s); #ifdef ZERO_COPY_SOCKETS if (so_zero_copy_receive) { @@ -1018,8 +1044,16 @@ dontblock: so->so_rcv.sb_mb = m_free(m); m = so->so_rcv.sb_mb; } - if (m) + if (m) { m->m_nextpkt = nextrecord; + if (nextrecord == NULL) + so->so_rcv.sb_lastrecord = m; + } else { + so->so_rcv.sb_mb = nextrecord; + SB_EMPTY_FIXUP(&so->so_rcv); + } + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); } } else { if (flags & MSG_PEEK) @@ -1064,6 +1098,8 @@ dontblock: */ if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreqs->pru_rcvd)(so, flags); + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); error = sbwait(&so->so_rcv); if (error) { sbunlock(&so->so_rcv); @@ -1082,8 +1118,21 @@ dontblock: (void) sbdroprecord(&so->so_rcv); } if ((flags & MSG_PEEK) == 0) { - if (m == 0) + if (m == 0) { + /* + * First part is an inline SB_EMPTY_FIXUP(). Second + * part makes sure sb_lastrecord is up-to-date if + * there is still data in the socket buffer. + */ so->so_rcv.sb_mb = nextrecord; + if (so->so_rcv.sb_mb == NULL) { + so->so_rcv.sb_mbtail = NULL; + so->so_rcv.sb_lastrecord = NULL; + } else if (nextrecord->m_nextpkt == NULL) + so->so_rcv.sb_lastrecord = nextrecord; + } + SBLASTRECORDCHK(&so->so_rcv); + SBLASTMBUFCHK(&so->so_rcv); if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreqs->pru_rcvd)(so, flags); } diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index 665b672..d64198d 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -468,6 +468,60 @@ sbrelease(sb, so) * or sbdroprecord() when the data is acknowledged by the peer. */ +#ifdef SOCKBUF_DEBUG +void +sblastrecordchk(struct sockbuf *sb, const char *file, int line) +{ + struct mbuf *m = sb->sb_mb; + + while (m && m->m_nextpkt) + m = m->m_nextpkt; + + if (m != sb->sb_lastrecord) { + printf("%s: sb_mb %p sb_lastrecord %p last %p\n", + __func__, sb->sb_mb, sb->sb_lastrecord, m); + printf("packet chain:\n"); + for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) + printf("\t%p\n", m); + panic("%s from %s:%u", __func__, file, line); + } +} + +void +sblastmbufchk(struct sockbuf *sb, const char *file, int line) +{ + struct mbuf *m = sb->sb_mb; + struct mbuf *n; + + while (m && m->m_nextpkt) + m = m->m_nextpkt; + + while (m && m->m_next) + m = m->m_next; + + if (m != sb->sb_mbtail) { + printf("%s: sb_mb %p sb_mbtail %p last %p\n", + __func__, sb->sb_mb, sb->sb_mbtail, m); + printf("packet tree:\n"); + for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { + printf("\t"); + for (n = m; n != NULL; n = n->m_next) + printf("%p ", n); + printf("\n"); + } + panic("%s from %s:%u", __func__, file, line); + } +} +#endif /* SOCKBUF_DEBUG */ + +#define SBLINKRECORD(sb, m0) do { \ + if ((sb)->sb_lastrecord != NULL) \ + (sb)->sb_lastrecord->m_nextpkt = (m0); \ + else \ + (sb)->sb_mb = (m0); \ + (sb)->sb_lastrecord = (m0); \ +} while (/*CONSTCOND*/0) + /* * Append mbuf chain m to the last record in the * socket buffer sb. The additional space associated @@ -483,6 +537,7 @@ sbappend(sb, m) if (m == 0) return; + SBLASTRECORDCHK(sb); n = sb->sb_mb; if (n) { while (n->m_nextpkt) @@ -493,8 +548,53 @@ sbappend(sb, m) return; } } while (n->m_next && (n = n->m_next)); + } else { + /* + * XXX Would like to simply use sb_mbtail here, but + * XXX I need to verify that I won't miss an EOR that + * XXX way. + */ + if ((n = sb->sb_lastrecord) != NULL) { + do { + if (n->m_flags & M_EOR) { + sbappendrecord(sb, m); /* XXXXXX!!!! */ + return; + } + } while (n->m_next && (n = n->m_next)); + } else { + /* + * If this is the first record in the socket buffer, + * it's also the last record. + */ + sb->sb_lastrecord = m; + } } sbcompress(sb, m, n); + SBLASTRECORDCHK(sb); +} + +/* + * This version of sbappend() should only be used when the caller + * absolutely knows that there will never be more than one record + * in the socket buffer, that is, a stream protocol (such as TCP). + */ +void +sbappendstream(struct sockbuf *sb, struct mbuf *m) +{ + + KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); + KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); + + SBLASTMBUFCHK(sb); + +#ifdef MBUFTRACE + m_claim(m, sb->sb_mowner); +#endif + + sbcompress(sb, m, sb->sb_mbtail); + + sb->sb_lastrecord = sb->sb_mb; + SBLASTRECORDCHK(sb); } #ifdef SOCKBUF_DEBUG @@ -516,7 +616,7 @@ sbcheck(sb) } } if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { - printf("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc, + printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc, mbcnt, sb->sb_mbcnt); panic("sbcheck"); } @@ -545,6 +645,8 @@ sbappendrecord(sb, m0) * Note this permits zero length records. */ sballoc(sb, m0); + SBLASTRECORDCHK(sb); + SBLINKRECORD(sb, m0); if (m) m->m_nextpkt = m0; else @@ -616,7 +718,7 @@ sbappendaddr(sb, asa, m0, control) struct sockaddr *asa; struct mbuf *m0, *control; { - struct mbuf *m, *n; + struct mbuf *m, *n, *nlast; int space = asa->sa_len; if (m0 && (m0->m_flags & M_PKTHDR) == 0) @@ -640,15 +742,16 @@ sbappendaddr(sb, asa, m0, control) else control = m0; m->m_next = control; - for (n = m; n; n = n->m_next) + for (n = m; n->m_next != NULL; n = n->m_next) sballoc(sb, n); - n = sb->sb_mb; - if (n) { - while (n->m_nextpkt) - n = n->m_nextpkt; - n->m_nextpkt = m; - } else - sb->sb_mb = m; + sballoc(sb, n); + nlast = n; + SBLINKRECORD(sb, m); + + sb->sb_mbtail = nlast; + SBLASTMBUFCHK(sb); + + SBLASTRECORDCHK(sb); return (1); } @@ -657,7 +760,7 @@ sbappendcontrol(sb, m0, control) struct sockbuf *sb; struct mbuf *control, *m0; { - struct mbuf *m, *n; + struct mbuf *m, *n, *mlast; int space; if (control == 0) @@ -666,15 +769,19 @@ sbappendcontrol(sb, m0, control) if (space > sbspace(sb)) return (0); n->m_next = m0; /* concatenate data to control */ - for (m = control; m; m = m->m_next) + + SBLASTRECORDCHK(sb); + + for (m = control; m->m_next; m = m->m_next) sballoc(sb, m); - n = sb->sb_mb; - if (n) { - while (n->m_nextpkt) - n = n->m_nextpkt; - n->m_nextpkt = control; - } else - sb->sb_mb = control; + sballoc(sb, m); + mlast = m; + SBLINKRECORD(sb, control); + + sb->sb_mbtail = mlast; + SBLASTMBUFCHK(sb); + + SBLASTRECORDCHK(sb); return (1); } @@ -697,6 +804,8 @@ sbcompress(sb, m, n) (eor == 0 || (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) { + if (sb->sb_lastrecord == m) + sb->sb_lastrecord = m->m_next; m = m_free(m); continue; } @@ -720,6 +829,7 @@ sbcompress(sb, m, n) n->m_next = m; else sb->sb_mb = m; + sb->sb_mbtail = m; sballoc(sb, m); n = m; m->m_flags &= ~M_EOR; @@ -732,6 +842,7 @@ sbcompress(sb, m, n) else printf("semi-panic: sbcompress\n"); } + SBLASTMBUFCHK(sb); } /* @@ -800,6 +911,18 @@ sbdrop(sb, len) m->m_nextpkt = next; } else sb->sb_mb = next; + /* + * First part is an inline SB_EMPTY_FIXUP(). Second part + * makes sure sb_lastrecord is up-to-date if we dropped + * part of the last record. + */ + m = sb->sb_mb; + if (m == NULL) { + sb->sb_mbtail = NULL; + sb->sb_lastrecord = NULL; + } else if (m->m_nextpkt == NULL) { + sb->sb_lastrecord = m; + } } /* @@ -820,6 +943,7 @@ sbdroprecord(sb) m = m_free(m); } while (m); } + SB_EMPTY_FIXUP(sb); } /* |