diff options
-rw-r--r-- | sys/kern/uipc_sockbuf.c | 264 | ||||
-rw-r--r-- | sys/kern/uipc_socket.c | 38 | ||||
-rw-r--r-- | sys/kern/uipc_socket2.c | 264 | ||||
-rw-r--r-- | sys/kern/uipc_usrreq.c | 11 | ||||
-rw-r--r-- | sys/netkey/keysock.c | 2 | ||||
-rw-r--r-- | sys/sys/socketvar.h | 48 |
6 files changed, 558 insertions, 69 deletions
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c index 0e4e99b..7f18ba8 100644 --- a/sys/kern/uipc_sockbuf.c +++ b/sys/kern/uipc_sockbuf.c @@ -197,9 +197,9 @@ soisdisconnected(so) SOCKBUF_UNLOCK(&so->so_rcv); SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_state |= SBS_CANTSENDMORE; + sbdrop_locked(&so->so_snd, so->so_snd.sb_cc); SOCKBUF_UNLOCK(&so->so_snd); wakeup(&so->so_timeo); - sbdrop(&so->so_snd, so->so_snd.sb_cc); sowwakeup(so); sorwakeup(so); } @@ -296,23 +296,48 @@ sonewconn(head, connstatus) * protocol when it detects that the peer will send no more data. * Data queued for reading in the socket may yet be read. */ +void +socantsendmore_locked(so) + struct socket *so; +{ + + SOCKBUF_LOCK_ASSERT(&so->so_snd); + + so->so_snd.sb_state |= SBS_CANTSENDMORE; + sowwakeup_locked(so); + mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); +} void socantsendmore(so) struct socket *so; { - so->so_snd.sb_state |= SBS_CANTSENDMORE; - sowwakeup(so); + SOCKBUF_LOCK(&so->so_snd); + socantsendmore_locked(so); + mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); } void -socantrcvmore(so) +socantrcvmore_locked(so) struct socket *so; { + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + so->so_rcv.sb_state |= SBS_CANTRCVMORE; - sorwakeup(so); + sorwakeup_locked(so); + mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); +} + +void +socantrcvmore(so) + struct socket *so; +{ + + SOCKBUF_LOCK(&so->so_rcv); + socantrcvmore_locked(so); + mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); } /* @@ -356,9 +381,16 @@ sb_lock(sb) } /* - * Wakeup processes waiting on a socket buffer. - * Do asynchronous notification via SIGIO - * if the socket has the SS_ASYNC flag set. + * Wakeup processes waiting on a socket buffer. Do asynchronous + * notification via SIGIO if the socket has the SS_ASYNC flag set. + * + * Called with the socket buffer lock held; will release the lock by the end + * of the function. This allows the caller to acquire the socket buffer lock + * while testing for the need for various sorts of wakeup and hold it through + * to the point where it's no longer required. We currently hold the lock + * through calls out to other subsystems (with the exception of kqueue), and + * then release it to avoid lock order issues. It's not clear that's + * correct. */ void sowakeup(so, sb) @@ -366,19 +398,23 @@ sowakeup(so, sb) register struct sockbuf *sb; { + SOCKBUF_LOCK_ASSERT(sb); + selwakeuppri(&sb->sb_sel, PSOCK); sb->sb_flags &= ~SB_SEL; if (sb->sb_flags & SB_WAIT) { sb->sb_flags &= ~SB_WAIT; wakeup(&sb->sb_cc); } + KNOTE(&sb->sb_sel.si_note, 0); + SOCKBUF_UNLOCK(sb); if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) pgsigio(&so->so_sigio, SIGIO, 0); if (sb->sb_flags & SB_UPCALL) (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); if (sb->sb_flags & SB_AIO) aio_swake(so, sb); - KNOTE(&sb->sb_sel.si_note, 0); + mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); } /* @@ -500,17 +536,29 @@ sbreserve(sb, cc, so, td) * Free mbufs held by a socket, and reserved mbuf space. */ void -sbrelease(sb, so) +sbrelease_locked(sb, so) struct sockbuf *sb; struct socket *so; { - sbflush(sb); + SOCKBUF_LOCK_ASSERT(sb); + + sbflush_locked(sb); (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); sb->sb_mbmax = 0; } +void +sbrelease(sb, so) + struct sockbuf *sb; + struct socket *so; +{ + + SOCKBUF_LOCK(sb); + sbrelease_locked(sb, so); + SOCKBUF_UNLOCK(sb); +} /* * Routines to add and remove * data from an mbuf queue. @@ -542,6 +590,8 @@ sblastrecordchk(struct sockbuf *sb, const char *file, int line) { struct mbuf *m = sb->sb_mb; + SOCKBUF_LOCK_ASSERT(sb); + while (m && m->m_nextpkt) m = m->m_nextpkt; @@ -561,6 +611,8 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line) struct mbuf *m = sb->sb_mb; struct mbuf *n; + SOCKBUF_LOCK_ASSERT(sb); + while (m && m->m_nextpkt) m = m->m_nextpkt; @@ -583,6 +635,7 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line) #endif /* SOCKBUF_DEBUG */ #define SBLINKRECORD(sb, m0) do { \ + SOCKBUF_LOCK_ASSERT(sb); \ if ((sb)->sb_lastrecord != NULL) \ (sb)->sb_lastrecord->m_nextpkt = (m0); \ else \ @@ -597,14 +650,17 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line) * discarded and mbufs are compacted where possible. */ void -sbappend(sb, m) +sbappend_locked(sb, m) struct sockbuf *sb; struct mbuf *m; { register struct mbuf *n; + SOCKBUF_LOCK_ASSERT(sb); + if (m == 0) return; + SBLASTRECORDCHK(sb); n = sb->sb_mb; if (n) { @@ -612,7 +668,7 @@ sbappend(sb, m) n = n->m_nextpkt; do { if (n->m_flags & M_EOR) { - sbappendrecord(sb, m); /* XXXXXX!!!! */ + sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); @@ -625,7 +681,7 @@ sbappend(sb, m) if ((n = sb->sb_lastrecord) != NULL) { do { if (n->m_flags & M_EOR) { - sbappendrecord(sb, m); /* XXXXXX!!!! */ + sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); @@ -642,13 +698,31 @@ sbappend(sb, m) } /* + * Append mbuf chain m to the last record in the + * socket buffer sb. The additional space associated + * the mbuf chain is recorded in sb. Empty mbufs are + * discarded and mbufs are compacted where possible. + */ +void +sbappend(sb, m) + struct sockbuf *sb; + struct mbuf *m; +{ + + SOCKBUF_LOCK(sb); + sbappend_locked(sb, m); + SOCKBUF_UNLOCK(sb); +} + +/* * This version of sbappend() should only be used when the caller * absolutely knows that there will never be more than one record * in the socket buffer, that is, a stream protocol (such as TCP). */ void -sbappendstream(struct sockbuf *sb, struct mbuf *m) +sbappendstream_locked(struct sockbuf *sb, struct mbuf *m) { + SOCKBUF_LOCK_ASSERT(sb); KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); @@ -661,6 +735,20 @@ sbappendstream(struct sockbuf *sb, struct mbuf *m) SBLASTRECORDCHK(sb); } +/* + * This version of sbappend() should only be used when the caller + * absolutely knows that there will never be more than one record + * in the socket buffer, that is, a stream protocol (such as TCP). + */ +void +sbappendstream(struct sockbuf *sb, struct mbuf *m) +{ + + SOCKBUF_LOCK(sb); + sbappendstream_locked(sb, m); + SOCKBUF_UNLOCK(sb); +} + #ifdef SOCKBUF_DEBUG void sbcheck(sb) @@ -670,6 +758,8 @@ sbcheck(sb) struct mbuf *n = 0; u_long len = 0, mbcnt = 0; + SOCKBUF_LOCK_ASSERT(sb); + for (m = sb->sb_mb; m; m = n) { n = m->m_nextpkt; for (; m; m = m->m_next) { @@ -692,12 +782,14 @@ sbcheck(sb) * begins a new record. */ void -sbappendrecord(sb, m0) +sbappendrecord_locked(sb, m0) register struct sockbuf *sb; register struct mbuf *m0; { register struct mbuf *m; + SOCKBUF_LOCK_ASSERT(sb); + if (m0 == 0) return; m = sb->sb_mb; @@ -725,18 +817,35 @@ sbappendrecord(sb, m0) } /* + * As above, except the mbuf chain + * begins a new record. + */ +void +sbappendrecord(sb, m0) + register struct sockbuf *sb; + register struct mbuf *m0; +{ + + SOCKBUF_LOCK(sb); + sbappendrecord_locked(sb, m0); + SOCKBUF_UNLOCK(sb); +} + +/* * As above except that OOB data * is inserted at the beginning of the sockbuf, * but after any other OOB data. */ void -sbinsertoob(sb, m0) +sbinsertoob_locked(sb, m0) register struct sockbuf *sb; register struct mbuf *m0; { register struct mbuf *m; register struct mbuf **mp; + SOCKBUF_LOCK_ASSERT(sb); + if (m0 == 0) return; for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) { @@ -771,13 +880,29 @@ sbinsertoob(sb, m0) } /* + * As above except that OOB data + * is inserted at the beginning of the sockbuf, + * but after any other OOB data. + */ +void +sbinsertoob(sb, m0) + register struct sockbuf *sb; + register struct mbuf *m0; +{ + + SOCKBUF_LOCK(sb); + sbinsertoob_locked(sb, m0); + SOCKBUF_UNLOCK(sb); +} + +/* * Append address and data, and optionally, control (ancillary) data * to the receive queue of a socket. If present, * m0 must include a packet header with total length. * Returns 0 if no space in sockbuf or insufficient mbufs. */ int -sbappendaddr(sb, asa, m0, control) +sbappendaddr_locked(sb, asa, m0, control) struct sockbuf *sb; const struct sockaddr *asa; struct mbuf *m0, *control; @@ -785,11 +910,14 @@ sbappendaddr(sb, asa, m0, control) struct mbuf *m, *n, *nlast; int space = asa->sa_len; + SOCKBUF_LOCK_ASSERT(sb); + if (m0 && (m0->m_flags & M_PKTHDR) == 0) - panic("sbappendaddr"); + panic("sbappendaddr_locked"); if (m0) space += m0->m_pkthdr.len; space += m_length(control, &n); + if (space > sbspace(sb)) return (0); #if MSIZE <= 256 @@ -819,17 +947,40 @@ sbappendaddr(sb, asa, m0, control) return (1); } +/* + * Append address and data, and optionally, control (ancillary) data + * to the receive queue of a socket. If present, + * m0 must include a packet header with total length. + * Returns 0 if no space in sockbuf or insufficient mbufs. + */ +int +sbappendaddr(sb, asa, m0, control) + struct sockbuf *sb; + const struct sockaddr *asa; + struct mbuf *m0, *control; +{ + int retval; + + SOCKBUF_LOCK(sb); + retval = sbappendaddr_locked(sb, asa, m0, control); + SOCKBUF_UNLOCK(sb); + return (retval); +} + int -sbappendcontrol(sb, m0, control) +sbappendcontrol_locked(sb, m0, control) struct sockbuf *sb; struct mbuf *control, *m0; { struct mbuf *m, *n, *mlast; int space; + SOCKBUF_LOCK_ASSERT(sb); + if (control == 0) - panic("sbappendcontrol"); + panic("sbappendcontrol_locked"); space = m_length(control, &n) + m_length(m0, NULL); + if (space > sbspace(sb)) return (0); n->m_next = m0; /* concatenate data to control */ @@ -849,6 +1000,19 @@ sbappendcontrol(sb, m0, control) return (1); } +int +sbappendcontrol(sb, m0, control) + struct sockbuf *sb; + struct mbuf *control, *m0; +{ + int retval; + + SOCKBUF_LOCK(sb); + retval = sbappendcontrol_locked(sb, m0, control); + SOCKBUF_UNLOCK(sb); + return (retval); +} + /* * Compress mbuf chain m into the socket * buffer sb following mbuf n. If n @@ -862,6 +1026,8 @@ sbcompress(sb, m, n) register int eor = 0; register struct mbuf *o; + SOCKBUF_LOCK_ASSERT(sb); + while (m) { eor |= m->m_flags & M_EOR; if (m->m_len == 0 && @@ -914,12 +1080,14 @@ sbcompress(sb, m, n) * Check that all resources are reclaimed. */ void -sbflush(sb) +sbflush_locked(sb) register struct sockbuf *sb; { + SOCKBUF_LOCK_ASSERT(sb); + if (sb->sb_flags & SB_LOCK) - panic("sbflush: locked"); + panic("sbflush_locked: locked"); while (sb->sb_mbcnt) { /* * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty: @@ -927,23 +1095,35 @@ sbflush(sb) */ if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len)) break; - sbdrop(sb, (int)sb->sb_cc); + sbdrop_locked(sb, (int)sb->sb_cc); } if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt) - panic("sbflush: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt); + panic("sbflush_locked: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt); +} + +void +sbflush(sb) + register struct sockbuf *sb; +{ + + SOCKBUF_LOCK(sb); + sbflush_locked(sb); + SOCKBUF_UNLOCK(sb); } /* * Drop data from (the front of) a sockbuf. */ void -sbdrop(sb, len) +sbdrop_locked(sb, len) register struct sockbuf *sb; register int len; { register struct mbuf *m; struct mbuf *next; + SOCKBUF_LOCK_ASSERT(sb); + next = (m = sb->sb_mb) ? m->m_nextpkt : 0; while (len > 0) { if (m == 0) { @@ -990,15 +1170,31 @@ sbdrop(sb, len) } /* + * Drop data from (the front of) a sockbuf. + */ +void +sbdrop(sb, len) + register struct sockbuf *sb; + register int len; +{ + + SOCKBUF_LOCK(sb); + sbdrop_locked(sb, len); + SOCKBUF_UNLOCK(sb); +} + +/* * Drop a record off the front of a sockbuf * and move the next record to the front. */ void -sbdroprecord(sb) +sbdroprecord_locked(sb) register struct sockbuf *sb; { register struct mbuf *m; + SOCKBUF_LOCK_ASSERT(sb); + m = sb->sb_mb; if (m) { sb->sb_mb = m->m_nextpkt; @@ -1011,6 +1207,20 @@ sbdroprecord(sb) } /* + * Drop a record off the front of a sockbuf + * and move the next record to the front. + */ +void +sbdroprecord(sb) + register struct sockbuf *sb; +{ + + SOCKBUF_LOCK(sb); + sbdroprecord_locked(sb); + SOCKBUF_UNLOCK(sb); +} + +/* * Create a "control" mbuf containing the specified data * with the specified type for presentation on a socket buffer. */ diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index b1415d7..80699b0 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -348,9 +348,15 @@ sofree(so) SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_flags |= SB_NOINTR; (void)sblock(&so->so_snd, M_WAITOK); - socantsendmore(so); + /* + * socantsendmore_locked() drops the socket buffer mutex so that it + * can safely perform wakeups. Re-acquire the mutex before + * continuing. + */ + socantsendmore_locked(so); + SOCKBUF_LOCK(&so->so_snd); sbunlock(&so->so_snd); - sbrelease(&so->so_snd, so); + sbrelease_locked(&so->so_snd, so); SOCKBUF_UNLOCK(&so->so_snd); sorflush(so); sodealloc(so); @@ -1202,7 +1208,7 @@ dontblock: flags |= MSG_TRUNC; if ((flags & MSG_PEEK) == 0) { SOCKBUF_LOCK_ASSERT(&so->so_rcv); - (void) sbdroprecord(&so->so_rcv); + (void) sbdroprecord_locked(&so->so_rcv); } } if ((flags & MSG_PEEK) == 0) { @@ -1271,23 +1277,41 @@ sorflush(so) struct protosw *pr = so->so_proto; struct sockbuf asb; + /* + * XXXRW: This is quite ugly. The existing code made a copy of the + * socket buffer, then zero'd the original to clear the buffer + * fields. However, with mutexes in the socket buffer, this causes + * problems. We only clear the zeroable bits of the original; + * however, we have to initialize and destroy the mutex in the copy + * so that dom_dispose() and sbrelease() can lock t as needed. + */ SOCKBUF_LOCK(sb); sb->sb_flags |= SB_NOINTR; (void) sblock(sb, M_WAITOK); - socantrcvmore(so); + /* + * socantrcvmore_locked() drops the socket buffer mutex so that it + * can safely perform wakeups. Re-acquire the mutex before + * continuing. + */ + socantrcvmore_locked(so); + SOCKBUF_LOCK(sb); sbunlock(sb); - asb = *sb; /* - * Invalidate/clear most of the sockbuf structure, but keep - * its selinfo structure valid. + * Invalidate/clear most of the sockbuf structure, but leave + * selinfo and mutex data unchanged. */ + bzero(&asb, offsetof(struct sockbuf, sb_startzero)); + bcopy(&sb->sb_startzero, &asb.sb_startzero, + sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); bzero(&sb->sb_startzero, sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); SOCKBUF_UNLOCK(sb); + SOCKBUF_LOCK_INIT(&asb, "so_rcv"); if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) (*pr->pr_domain->dom_dispose)(asb.sb_mb); sbrelease(&asb, so); + SOCKBUF_LOCK_DESTROY(&asb); } #ifdef INET diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index 0e4e99b..7f18ba8 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -197,9 +197,9 @@ soisdisconnected(so) SOCKBUF_UNLOCK(&so->so_rcv); SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_state |= SBS_CANTSENDMORE; + sbdrop_locked(&so->so_snd, so->so_snd.sb_cc); SOCKBUF_UNLOCK(&so->so_snd); wakeup(&so->so_timeo); - sbdrop(&so->so_snd, so->so_snd.sb_cc); sowwakeup(so); sorwakeup(so); } @@ -296,23 +296,48 @@ sonewconn(head, connstatus) * protocol when it detects that the peer will send no more data. * Data queued for reading in the socket may yet be read. */ +void +socantsendmore_locked(so) + struct socket *so; +{ + + SOCKBUF_LOCK_ASSERT(&so->so_snd); + + so->so_snd.sb_state |= SBS_CANTSENDMORE; + sowwakeup_locked(so); + mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); +} void socantsendmore(so) struct socket *so; { - so->so_snd.sb_state |= SBS_CANTSENDMORE; - sowwakeup(so); + SOCKBUF_LOCK(&so->so_snd); + socantsendmore_locked(so); + mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); } void -socantrcvmore(so) +socantrcvmore_locked(so) struct socket *so; { + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + so->so_rcv.sb_state |= SBS_CANTRCVMORE; - sorwakeup(so); + sorwakeup_locked(so); + mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); +} + +void +socantrcvmore(so) + struct socket *so; +{ + + SOCKBUF_LOCK(&so->so_rcv); + socantrcvmore_locked(so); + mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); } /* @@ -356,9 +381,16 @@ sb_lock(sb) } /* - * Wakeup processes waiting on a socket buffer. - * Do asynchronous notification via SIGIO - * if the socket has the SS_ASYNC flag set. + * Wakeup processes waiting on a socket buffer. Do asynchronous + * notification via SIGIO if the socket has the SS_ASYNC flag set. + * + * Called with the socket buffer lock held; will release the lock by the end + * of the function. This allows the caller to acquire the socket buffer lock + * while testing for the need for various sorts of wakeup and hold it through + * to the point where it's no longer required. We currently hold the lock + * through calls out to other subsystems (with the exception of kqueue), and + * then release it to avoid lock order issues. It's not clear that's + * correct. */ void sowakeup(so, sb) @@ -366,19 +398,23 @@ sowakeup(so, sb) register struct sockbuf *sb; { + SOCKBUF_LOCK_ASSERT(sb); + selwakeuppri(&sb->sb_sel, PSOCK); sb->sb_flags &= ~SB_SEL; if (sb->sb_flags & SB_WAIT) { sb->sb_flags &= ~SB_WAIT; wakeup(&sb->sb_cc); } + KNOTE(&sb->sb_sel.si_note, 0); + SOCKBUF_UNLOCK(sb); if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) pgsigio(&so->so_sigio, SIGIO, 0); if (sb->sb_flags & SB_UPCALL) (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); if (sb->sb_flags & SB_AIO) aio_swake(so, sb); - KNOTE(&sb->sb_sel.si_note, 0); + mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); } /* @@ -500,17 +536,29 @@ sbreserve(sb, cc, so, td) * Free mbufs held by a socket, and reserved mbuf space. */ void -sbrelease(sb, so) +sbrelease_locked(sb, so) struct sockbuf *sb; struct socket *so; { - sbflush(sb); + SOCKBUF_LOCK_ASSERT(sb); + + sbflush_locked(sb); (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); sb->sb_mbmax = 0; } +void +sbrelease(sb, so) + struct sockbuf *sb; + struct socket *so; +{ + + SOCKBUF_LOCK(sb); + sbrelease_locked(sb, so); + SOCKBUF_UNLOCK(sb); +} /* * Routines to add and remove * data from an mbuf queue. @@ -542,6 +590,8 @@ sblastrecordchk(struct sockbuf *sb, const char *file, int line) { struct mbuf *m = sb->sb_mb; + SOCKBUF_LOCK_ASSERT(sb); + while (m && m->m_nextpkt) m = m->m_nextpkt; @@ -561,6 +611,8 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line) struct mbuf *m = sb->sb_mb; struct mbuf *n; + SOCKBUF_LOCK_ASSERT(sb); + while (m && m->m_nextpkt) m = m->m_nextpkt; @@ -583,6 +635,7 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line) #endif /* SOCKBUF_DEBUG */ #define SBLINKRECORD(sb, m0) do { \ + SOCKBUF_LOCK_ASSERT(sb); \ if ((sb)->sb_lastrecord != NULL) \ (sb)->sb_lastrecord->m_nextpkt = (m0); \ else \ @@ -597,14 +650,17 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line) * discarded and mbufs are compacted where possible. */ void -sbappend(sb, m) +sbappend_locked(sb, m) struct sockbuf *sb; struct mbuf *m; { register struct mbuf *n; + SOCKBUF_LOCK_ASSERT(sb); + if (m == 0) return; + SBLASTRECORDCHK(sb); n = sb->sb_mb; if (n) { @@ -612,7 +668,7 @@ sbappend(sb, m) n = n->m_nextpkt; do { if (n->m_flags & M_EOR) { - sbappendrecord(sb, m); /* XXXXXX!!!! */ + sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); @@ -625,7 +681,7 @@ sbappend(sb, m) if ((n = sb->sb_lastrecord) != NULL) { do { if (n->m_flags & M_EOR) { - sbappendrecord(sb, m); /* XXXXXX!!!! */ + sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); @@ -642,13 +698,31 @@ sbappend(sb, m) } /* + * Append mbuf chain m to the last record in the + * socket buffer sb. The additional space associated + * the mbuf chain is recorded in sb. Empty mbufs are + * discarded and mbufs are compacted where possible. + */ +void +sbappend(sb, m) + struct sockbuf *sb; + struct mbuf *m; +{ + + SOCKBUF_LOCK(sb); + sbappend_locked(sb, m); + SOCKBUF_UNLOCK(sb); +} + +/* * This version of sbappend() should only be used when the caller * absolutely knows that there will never be more than one record * in the socket buffer, that is, a stream protocol (such as TCP). */ void -sbappendstream(struct sockbuf *sb, struct mbuf *m) +sbappendstream_locked(struct sockbuf *sb, struct mbuf *m) { + SOCKBUF_LOCK_ASSERT(sb); KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); @@ -661,6 +735,20 @@ sbappendstream(struct sockbuf *sb, struct mbuf *m) SBLASTRECORDCHK(sb); } +/* + * This version of sbappend() should only be used when the caller + * absolutely knows that there will never be more than one record + * in the socket buffer, that is, a stream protocol (such as TCP). + */ +void +sbappendstream(struct sockbuf *sb, struct mbuf *m) +{ + + SOCKBUF_LOCK(sb); + sbappendstream_locked(sb, m); + SOCKBUF_UNLOCK(sb); +} + #ifdef SOCKBUF_DEBUG void sbcheck(sb) @@ -670,6 +758,8 @@ sbcheck(sb) struct mbuf *n = 0; u_long len = 0, mbcnt = 0; + SOCKBUF_LOCK_ASSERT(sb); + for (m = sb->sb_mb; m; m = n) { n = m->m_nextpkt; for (; m; m = m->m_next) { @@ -692,12 +782,14 @@ sbcheck(sb) * begins a new record. */ void -sbappendrecord(sb, m0) +sbappendrecord_locked(sb, m0) register struct sockbuf *sb; register struct mbuf *m0; { register struct mbuf *m; + SOCKBUF_LOCK_ASSERT(sb); + if (m0 == 0) return; m = sb->sb_mb; @@ -725,18 +817,35 @@ sbappendrecord(sb, m0) } /* + * As above, except the mbuf chain + * begins a new record. + */ +void +sbappendrecord(sb, m0) + register struct sockbuf *sb; + register struct mbuf *m0; +{ + + SOCKBUF_LOCK(sb); + sbappendrecord_locked(sb, m0); + SOCKBUF_UNLOCK(sb); +} + +/* * As above except that OOB data * is inserted at the beginning of the sockbuf, * but after any other OOB data. */ void -sbinsertoob(sb, m0) +sbinsertoob_locked(sb, m0) register struct sockbuf *sb; register struct mbuf *m0; { register struct mbuf *m; register struct mbuf **mp; + SOCKBUF_LOCK_ASSERT(sb); + if (m0 == 0) return; for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) { @@ -771,13 +880,29 @@ sbinsertoob(sb, m0) } /* + * As above except that OOB data + * is inserted at the beginning of the sockbuf, + * but after any other OOB data. + */ +void +sbinsertoob(sb, m0) + register struct sockbuf *sb; + register struct mbuf *m0; +{ + + SOCKBUF_LOCK(sb); + sbinsertoob_locked(sb, m0); + SOCKBUF_UNLOCK(sb); +} + +/* * Append address and data, and optionally, control (ancillary) data * to the receive queue of a socket. If present, * m0 must include a packet header with total length. * Returns 0 if no space in sockbuf or insufficient mbufs. */ int -sbappendaddr(sb, asa, m0, control) +sbappendaddr_locked(sb, asa, m0, control) struct sockbuf *sb; const struct sockaddr *asa; struct mbuf *m0, *control; @@ -785,11 +910,14 @@ sbappendaddr(sb, asa, m0, control) struct mbuf *m, *n, *nlast; int space = asa->sa_len; + SOCKBUF_LOCK_ASSERT(sb); + if (m0 && (m0->m_flags & M_PKTHDR) == 0) - panic("sbappendaddr"); + panic("sbappendaddr_locked"); if (m0) space += m0->m_pkthdr.len; space += m_length(control, &n); + if (space > sbspace(sb)) return (0); #if MSIZE <= 256 @@ -819,17 +947,40 @@ sbappendaddr(sb, asa, m0, control) return (1); } +/* + * Append address and data, and optionally, control (ancillary) data + * to the receive queue of a socket. If present, + * m0 must include a packet header with total length. + * Returns 0 if no space in sockbuf or insufficient mbufs. + */ +int +sbappendaddr(sb, asa, m0, control) + struct sockbuf *sb; + const struct sockaddr *asa; + struct mbuf *m0, *control; +{ + int retval; + + SOCKBUF_LOCK(sb); + retval = sbappendaddr_locked(sb, asa, m0, control); + SOCKBUF_UNLOCK(sb); + return (retval); +} + int -sbappendcontrol(sb, m0, control) +sbappendcontrol_locked(sb, m0, control) struct sockbuf *sb; struct mbuf *control, *m0; { struct mbuf *m, *n, *mlast; int space; + SOCKBUF_LOCK_ASSERT(sb); + if (control == 0) - panic("sbappendcontrol"); + panic("sbappendcontrol_locked"); space = m_length(control, &n) + m_length(m0, NULL); + if (space > sbspace(sb)) return (0); n->m_next = m0; /* concatenate data to control */ @@ -849,6 +1000,19 @@ sbappendcontrol(sb, m0, control) return (1); } +int +sbappendcontrol(sb, m0, control) + struct sockbuf *sb; + struct mbuf *control, *m0; +{ + int retval; + + SOCKBUF_LOCK(sb); + retval = sbappendcontrol_locked(sb, m0, control); + SOCKBUF_UNLOCK(sb); + return (retval); +} + /* * Compress mbuf chain m into the socket * buffer sb following mbuf n. If n @@ -862,6 +1026,8 @@ sbcompress(sb, m, n) register int eor = 0; register struct mbuf *o; + SOCKBUF_LOCK_ASSERT(sb); + while (m) { eor |= m->m_flags & M_EOR; if (m->m_len == 0 && @@ -914,12 +1080,14 @@ sbcompress(sb, m, n) * Check that all resources are reclaimed. */ void -sbflush(sb) +sbflush_locked(sb) register struct sockbuf *sb; { + SOCKBUF_LOCK_ASSERT(sb); + if (sb->sb_flags & SB_LOCK) - panic("sbflush: locked"); + panic("sbflush_locked: locked"); while (sb->sb_mbcnt) { /* * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty: @@ -927,23 +1095,35 @@ sbflush(sb) */ if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len)) break; - sbdrop(sb, (int)sb->sb_cc); + sbdrop_locked(sb, (int)sb->sb_cc); } if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt) - panic("sbflush: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt); + panic("sbflush_locked: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt); +} + +void +sbflush(sb) + register struct sockbuf *sb; +{ + + SOCKBUF_LOCK(sb); + sbflush_locked(sb); + SOCKBUF_UNLOCK(sb); } /* * Drop data from (the front of) a sockbuf. */ void -sbdrop(sb, len) +sbdrop_locked(sb, len) register struct sockbuf *sb; register int len; { register struct mbuf *m; struct mbuf *next; + SOCKBUF_LOCK_ASSERT(sb); + next = (m = sb->sb_mb) ? m->m_nextpkt : 0; while (len > 0) { if (m == 0) { @@ -990,15 +1170,31 @@ sbdrop(sb, len) } /* + * Drop data from (the front of) a sockbuf. + */ +void +sbdrop(sb, len) + register struct sockbuf *sb; + register int len; +{ + + SOCKBUF_LOCK(sb); + sbdrop_locked(sb, len); + SOCKBUF_UNLOCK(sb); +} + +/* * Drop a record off the front of a sockbuf * and move the next record to the front. */ void -sbdroprecord(sb) +sbdroprecord_locked(sb) register struct sockbuf *sb; { register struct mbuf *m; + SOCKBUF_LOCK_ASSERT(sb); + m = sb->sb_mb; if (m) { sb->sb_mb = m->m_nextpkt; @@ -1011,6 +1207,20 @@ sbdroprecord(sb) } /* + * Drop a record off the front of a sockbuf + * and move the next record to the front. + */ +void +sbdroprecord(sb) + register struct sockbuf *sb; +{ + + SOCKBUF_LOCK(sb); + sbdroprecord_locked(sb); + SOCKBUF_UNLOCK(sb); +} + +/* * Create a "control" mbuf containing the specified data * with the specified type for presentation on a socket buffer. */ diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index a5abcee..ea9a6a2 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -353,11 +353,14 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, from = (struct sockaddr *)unp->unp_addr; else from = &sun_noname; - if (sbappendaddr(&so2->so_rcv, from, m, control)) { + SOCKBUF_LOCK(&so2->so_rcv); + if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) { + SOCKBUF_UNLOCK(&so2->so_rcv); sorwakeup(so2); m = NULL; control = NULL; } else { + SOCKBUF_UNLOCK(&so2->so_rcv); error = ENOBUFS; } if (nam != NULL) @@ -389,16 +392,17 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, if (unp->unp_conn == NULL) panic("uipc_send connected but no connection?"); so2 = unp->unp_conn->unp_socket; + SOCKBUF_LOCK(&so2->so_rcv); /* * Send to paired receive port, and then reduce * send buffer hiwater marks to maintain backpressure. * Wake up readers. */ if (control != NULL) { - if (sbappendcontrol(&so2->so_rcv, m, control)) + if (sbappendcontrol_locked(&so2->so_rcv, m, control)) control = NULL; } else { - sbappend(&so2->so_rcv, m); + sbappend_locked(&so2->so_rcv, m); } so->so_snd.sb_mbmax -= so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt; @@ -408,6 +412,7 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, newhiwat, RLIM_INFINITY); unp->unp_conn->unp_cc = so2->so_rcv.sb_cc; + SOCKBUF_UNLOCK(&so2->so_rcv); sorwakeup(so2); m = NULL; break; diff --git a/sys/netkey/keysock.c b/sys/netkey/keysock.c index 233fb28..ef1b134 100644 --- a/sys/netkey/keysock.c +++ b/sys/netkey/keysock.c @@ -41,8 +41,10 @@ __FBSDID("$FreeBSD$"); #include <sys/domain.h> #include <sys/errno.h> #include <sys/kernel.h> +#include <sys/lock.h> #include <sys/malloc.h> #include <sys/mbuf.h> +#include <sys/mutex.h> #include <sys/protosw.h> #include <sys/signalvar.h> #include <sys/socket.h> diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index 8173715..6ea4bb8 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -319,13 +319,14 @@ struct xsocket { ((sb)->sb_flags |= SB_LOCK), 0) /* release lock on sockbuf sb */ -#define sbunlock(sb) { \ +#define sbunlock(sb) do { \ + SOCKBUF_LOCK_ASSERT(sb); \ (sb)->sb_flags &= ~SB_LOCK; \ if ((sb)->sb_flags & SB_WANT) { \ (sb)->sb_flags &= ~SB_WANT; \ wakeup(&(sb)->sb_flags); \ } \ -} +} while (0) /* * soref()/sorele() ref-count the socket structure. Note that you must @@ -355,14 +356,37 @@ struct xsocket { SOCK_UNLOCK(so); \ } while(0) -#define sorwakeup(so) do { \ +/* + * In sorwakeup() and sowwakeup(), acquire the socket buffer lock to + * avoid a non-atomic test-and-wakeup. However, sowakeup is + * responsible for releasing the lock if it is called. We unlock only + * if we don't call into sowakeup. If any code is introduced that + * directly invokes the underlying sowakeup() primitives, it must + * maintain the same semantics. + */ +#define sorwakeup_locked(so) do { \ + SOCKBUF_LOCK_ASSERT(&(so)->so_rcv); \ if (sb_notify(&(so)->so_rcv)) \ - sowakeup((so), &(so)->so_rcv); \ + sowakeup((so), &(so)->so_rcv); \ + else \ + SOCKBUF_UNLOCK(&(so)->so_rcv); \ } while (0) -#define sowwakeup(so) do { \ +#define sorwakeup(so) do { \ + SOCKBUF_LOCK(&(so)->so_rcv); \ + sorwakeup_locked(so); \ +} while (0) + +#define sowwakeup_locked(so) do { \ if (sb_notify(&(so)->so_snd)) \ sowakeup((so), &(so)->so_snd); \ + else \ + SOCKBUF_UNLOCK(&(so)->so_snd); \ +} while (0) + +#define sowwakeup(so) do { \ + SOCKBUF_LOCK(&(so)->so_snd); \ + sowwakeup_locked(so); \ } while (0) /* @@ -412,21 +436,33 @@ struct uio; int sockargs(struct mbuf **mp, caddr_t buf, int buflen, int type); int getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len); void sbappend(struct sockbuf *sb, struct mbuf *m); +void sbappend_locked(struct sockbuf *sb, struct mbuf *m); void sbappendstream(struct sockbuf *sb, struct mbuf *m); +void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m); int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control); +int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, + struct mbuf *m0, struct mbuf *control); int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control); +int sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, + struct mbuf *control); void sbappendrecord(struct sockbuf *sb, struct mbuf *m0); +void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0); void sbcheck(struct sockbuf *sb); void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n); struct mbuf * sbcreatecontrol(caddr_t p, int size, int type, int level); void sbdrop(struct sockbuf *sb, int len); +void sbdrop_locked(struct sockbuf *sb, int len); void sbdroprecord(struct sockbuf *sb); +void sbdroprecord_locked(struct sockbuf *sb); void sbflush(struct sockbuf *sb); +void sbflush_locked(struct sockbuf *sb); void sbinsertoob(struct sockbuf *sb, struct mbuf *m0); +void sbinsertoob_locked(struct sockbuf *sb, struct mbuf *m0); void sbrelease(struct sockbuf *sb, struct socket *so); +void sbrelease_locked(struct sockbuf *sb, struct socket *so); int sbreserve(struct sockbuf *sb, u_long cc, struct socket *so, struct thread *td); void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb); @@ -438,7 +474,9 @@ struct socket *soalloc(int mflags); int socheckuid(struct socket *so, uid_t uid); int sobind(struct socket *so, struct sockaddr *nam, struct thread *td); void socantrcvmore(struct socket *so); +void socantrcvmore_locked(struct socket *so); void socantsendmore(struct socket *so); +void socantsendmore_locked(struct socket *so); int soclose(struct socket *so); int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td); int soconnect2(struct socket *so1, struct socket *so2); |