summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/kern/uipc_sockbuf.c264
-rw-r--r--sys/kern/uipc_socket.c38
-rw-r--r--sys/kern/uipc_socket2.c264
-rw-r--r--sys/kern/uipc_usrreq.c11
-rw-r--r--sys/netkey/keysock.c2
-rw-r--r--sys/sys/socketvar.h48
6 files changed, 558 insertions, 69 deletions
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
index 0e4e99b..7f18ba8 100644
--- a/sys/kern/uipc_sockbuf.c
+++ b/sys/kern/uipc_sockbuf.c
@@ -197,9 +197,9 @@ soisdisconnected(so)
SOCKBUF_UNLOCK(&so->so_rcv);
SOCKBUF_LOCK(&so->so_snd);
so->so_snd.sb_state |= SBS_CANTSENDMORE;
+ sbdrop_locked(&so->so_snd, so->so_snd.sb_cc);
SOCKBUF_UNLOCK(&so->so_snd);
wakeup(&so->so_timeo);
- sbdrop(&so->so_snd, so->so_snd.sb_cc);
sowwakeup(so);
sorwakeup(so);
}
@@ -296,23 +296,48 @@ sonewconn(head, connstatus)
* protocol when it detects that the peer will send no more data.
* Data queued for reading in the socket may yet be read.
*/
+void
+socantsendmore_locked(so)
+ struct socket *so;
+{
+
+ SOCKBUF_LOCK_ASSERT(&so->so_snd);
+
+ so->so_snd.sb_state |= SBS_CANTSENDMORE;
+ sowwakeup_locked(so);
+ mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
+}
void
socantsendmore(so)
struct socket *so;
{
- so->so_snd.sb_state |= SBS_CANTSENDMORE;
- sowwakeup(so);
+ SOCKBUF_LOCK(&so->so_snd);
+ socantsendmore_locked(so);
+ mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
}
void
-socantrcvmore(so)
+socantrcvmore_locked(so)
struct socket *so;
{
+ SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+
so->so_rcv.sb_state |= SBS_CANTRCVMORE;
- sorwakeup(so);
+ sorwakeup_locked(so);
+ mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
+}
+
+void
+socantrcvmore(so)
+ struct socket *so;
+{
+
+ SOCKBUF_LOCK(&so->so_rcv);
+ socantrcvmore_locked(so);
+ mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
}
/*
@@ -356,9 +381,16 @@ sb_lock(sb)
}
/*
- * Wakeup processes waiting on a socket buffer.
- * Do asynchronous notification via SIGIO
- * if the socket has the SS_ASYNC flag set.
+ * Wakeup processes waiting on a socket buffer. Do asynchronous
+ * notification via SIGIO if the socket has the SS_ASYNC flag set.
+ *
+ * Called with the socket buffer lock held; will release the lock by the end
+ * of the function. This allows the caller to acquire the socket buffer lock
+ * while testing for the need for various sorts of wakeup and hold it through
+ * to the point where it's no longer required. We currently hold the lock
+ * through calls out to other subsystems (with the exception of kqueue), and
+ * then release it to avoid lock order issues. It's not clear that's
+ * correct.
*/
void
sowakeup(so, sb)
@@ -366,19 +398,23 @@ sowakeup(so, sb)
register struct sockbuf *sb;
{
+ SOCKBUF_LOCK_ASSERT(sb);
+
selwakeuppri(&sb->sb_sel, PSOCK);
sb->sb_flags &= ~SB_SEL;
if (sb->sb_flags & SB_WAIT) {
sb->sb_flags &= ~SB_WAIT;
wakeup(&sb->sb_cc);
}
+ KNOTE(&sb->sb_sel.si_note, 0);
+ SOCKBUF_UNLOCK(sb);
if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
pgsigio(&so->so_sigio, SIGIO, 0);
if (sb->sb_flags & SB_UPCALL)
(*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
if (sb->sb_flags & SB_AIO)
aio_swake(so, sb);
- KNOTE(&sb->sb_sel.si_note, 0);
+ mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
}
/*
@@ -500,17 +536,29 @@ sbreserve(sb, cc, so, td)
* Free mbufs held by a socket, and reserved mbuf space.
*/
void
-sbrelease(sb, so)
+sbrelease_locked(sb, so)
struct sockbuf *sb;
struct socket *so;
{
- sbflush(sb);
+ SOCKBUF_LOCK_ASSERT(sb);
+
+ sbflush_locked(sb);
(void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
RLIM_INFINITY);
sb->sb_mbmax = 0;
}
+void
+sbrelease(sb, so)
+ struct sockbuf *sb;
+ struct socket *so;
+{
+
+ SOCKBUF_LOCK(sb);
+ sbrelease_locked(sb, so);
+ SOCKBUF_UNLOCK(sb);
+}
/*
* Routines to add and remove
* data from an mbuf queue.
@@ -542,6 +590,8 @@ sblastrecordchk(struct sockbuf *sb, const char *file, int line)
{
struct mbuf *m = sb->sb_mb;
+ SOCKBUF_LOCK_ASSERT(sb);
+
while (m && m->m_nextpkt)
m = m->m_nextpkt;
@@ -561,6 +611,8 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line)
struct mbuf *m = sb->sb_mb;
struct mbuf *n;
+ SOCKBUF_LOCK_ASSERT(sb);
+
while (m && m->m_nextpkt)
m = m->m_nextpkt;
@@ -583,6 +635,7 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line)
#endif /* SOCKBUF_DEBUG */
#define SBLINKRECORD(sb, m0) do { \
+ SOCKBUF_LOCK_ASSERT(sb); \
if ((sb)->sb_lastrecord != NULL) \
(sb)->sb_lastrecord->m_nextpkt = (m0); \
else \
@@ -597,14 +650,17 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line)
* discarded and mbufs are compacted where possible.
*/
void
-sbappend(sb, m)
+sbappend_locked(sb, m)
struct sockbuf *sb;
struct mbuf *m;
{
register struct mbuf *n;
+ SOCKBUF_LOCK_ASSERT(sb);
+
if (m == 0)
return;
+
SBLASTRECORDCHK(sb);
n = sb->sb_mb;
if (n) {
@@ -612,7 +668,7 @@ sbappend(sb, m)
n = n->m_nextpkt;
do {
if (n->m_flags & M_EOR) {
- sbappendrecord(sb, m); /* XXXXXX!!!! */
+ sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
return;
}
} while (n->m_next && (n = n->m_next));
@@ -625,7 +681,7 @@ sbappend(sb, m)
if ((n = sb->sb_lastrecord) != NULL) {
do {
if (n->m_flags & M_EOR) {
- sbappendrecord(sb, m); /* XXXXXX!!!! */
+ sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
return;
}
} while (n->m_next && (n = n->m_next));
@@ -642,13 +698,31 @@ sbappend(sb, m)
}
/*
+ * Append mbuf chain m to the last record in the
+ * socket buffer sb. The additional space associated
+ * the mbuf chain is recorded in sb. Empty mbufs are
+ * discarded and mbufs are compacted where possible.
+ */
+void
+sbappend(sb, m)
+ struct sockbuf *sb;
+ struct mbuf *m;
+{
+
+ SOCKBUF_LOCK(sb);
+ sbappend_locked(sb, m);
+ SOCKBUF_UNLOCK(sb);
+}
+
+/*
* This version of sbappend() should only be used when the caller
* absolutely knows that there will never be more than one record
* in the socket buffer, that is, a stream protocol (such as TCP).
*/
void
-sbappendstream(struct sockbuf *sb, struct mbuf *m)
+sbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
{
+ SOCKBUF_LOCK_ASSERT(sb);
KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
@@ -661,6 +735,20 @@ sbappendstream(struct sockbuf *sb, struct mbuf *m)
SBLASTRECORDCHK(sb);
}
+/*
+ * This version of sbappend() should only be used when the caller
+ * absolutely knows that there will never be more than one record
+ * in the socket buffer, that is, a stream protocol (such as TCP).
+ */
+void
+sbappendstream(struct sockbuf *sb, struct mbuf *m)
+{
+
+ SOCKBUF_LOCK(sb);
+ sbappendstream_locked(sb, m);
+ SOCKBUF_UNLOCK(sb);
+}
+
#ifdef SOCKBUF_DEBUG
void
sbcheck(sb)
@@ -670,6 +758,8 @@ sbcheck(sb)
struct mbuf *n = 0;
u_long len = 0, mbcnt = 0;
+ SOCKBUF_LOCK_ASSERT(sb);
+
for (m = sb->sb_mb; m; m = n) {
n = m->m_nextpkt;
for (; m; m = m->m_next) {
@@ -692,12 +782,14 @@ sbcheck(sb)
* begins a new record.
*/
void
-sbappendrecord(sb, m0)
+sbappendrecord_locked(sb, m0)
register struct sockbuf *sb;
register struct mbuf *m0;
{
register struct mbuf *m;
+ SOCKBUF_LOCK_ASSERT(sb);
+
if (m0 == 0)
return;
m = sb->sb_mb;
@@ -725,18 +817,35 @@ sbappendrecord(sb, m0)
}
/*
+ * As above, except the mbuf chain
+ * begins a new record.
+ */
+void
+sbappendrecord(sb, m0)
+ register struct sockbuf *sb;
+ register struct mbuf *m0;
+{
+
+ SOCKBUF_LOCK(sb);
+ sbappendrecord_locked(sb, m0);
+ SOCKBUF_UNLOCK(sb);
+}
+
+/*
* As above except that OOB data
* is inserted at the beginning of the sockbuf,
* but after any other OOB data.
*/
void
-sbinsertoob(sb, m0)
+sbinsertoob_locked(sb, m0)
register struct sockbuf *sb;
register struct mbuf *m0;
{
register struct mbuf *m;
register struct mbuf **mp;
+ SOCKBUF_LOCK_ASSERT(sb);
+
if (m0 == 0)
return;
for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
@@ -771,13 +880,29 @@ sbinsertoob(sb, m0)
}
/*
+ * As above except that OOB data
+ * is inserted at the beginning of the sockbuf,
+ * but after any other OOB data.
+ */
+void
+sbinsertoob(sb, m0)
+ register struct sockbuf *sb;
+ register struct mbuf *m0;
+{
+
+ SOCKBUF_LOCK(sb);
+ sbinsertoob_locked(sb, m0);
+ SOCKBUF_UNLOCK(sb);
+}
+
+/*
* Append address and data, and optionally, control (ancillary) data
* to the receive queue of a socket. If present,
* m0 must include a packet header with total length.
* Returns 0 if no space in sockbuf or insufficient mbufs.
*/
int
-sbappendaddr(sb, asa, m0, control)
+sbappendaddr_locked(sb, asa, m0, control)
struct sockbuf *sb;
const struct sockaddr *asa;
struct mbuf *m0, *control;
@@ -785,11 +910,14 @@ sbappendaddr(sb, asa, m0, control)
struct mbuf *m, *n, *nlast;
int space = asa->sa_len;
+ SOCKBUF_LOCK_ASSERT(sb);
+
if (m0 && (m0->m_flags & M_PKTHDR) == 0)
- panic("sbappendaddr");
+ panic("sbappendaddr_locked");
if (m0)
space += m0->m_pkthdr.len;
space += m_length(control, &n);
+
if (space > sbspace(sb))
return (0);
#if MSIZE <= 256
@@ -819,17 +947,40 @@ sbappendaddr(sb, asa, m0, control)
return (1);
}
+/*
+ * Append address and data, and optionally, control (ancillary) data
+ * to the receive queue of a socket. If present,
+ * m0 must include a packet header with total length.
+ * Returns 0 if no space in sockbuf or insufficient mbufs.
+ */
+int
+sbappendaddr(sb, asa, m0, control)
+ struct sockbuf *sb;
+ const struct sockaddr *asa;
+ struct mbuf *m0, *control;
+{
+ int retval;
+
+ SOCKBUF_LOCK(sb);
+ retval = sbappendaddr_locked(sb, asa, m0, control);
+ SOCKBUF_UNLOCK(sb);
+ return (retval);
+}
+
int
-sbappendcontrol(sb, m0, control)
+sbappendcontrol_locked(sb, m0, control)
struct sockbuf *sb;
struct mbuf *control, *m0;
{
struct mbuf *m, *n, *mlast;
int space;
+ SOCKBUF_LOCK_ASSERT(sb);
+
if (control == 0)
- panic("sbappendcontrol");
+ panic("sbappendcontrol_locked");
space = m_length(control, &n) + m_length(m0, NULL);
+
if (space > sbspace(sb))
return (0);
n->m_next = m0; /* concatenate data to control */
@@ -849,6 +1000,19 @@ sbappendcontrol(sb, m0, control)
return (1);
}
+int
+sbappendcontrol(sb, m0, control)
+ struct sockbuf *sb;
+ struct mbuf *control, *m0;
+{
+ int retval;
+
+ SOCKBUF_LOCK(sb);
+ retval = sbappendcontrol_locked(sb, m0, control);
+ SOCKBUF_UNLOCK(sb);
+ return (retval);
+}
+
/*
* Compress mbuf chain m into the socket
* buffer sb following mbuf n. If n
@@ -862,6 +1026,8 @@ sbcompress(sb, m, n)
register int eor = 0;
register struct mbuf *o;
+ SOCKBUF_LOCK_ASSERT(sb);
+
while (m) {
eor |= m->m_flags & M_EOR;
if (m->m_len == 0 &&
@@ -914,12 +1080,14 @@ sbcompress(sb, m, n)
* Check that all resources are reclaimed.
*/
void
-sbflush(sb)
+sbflush_locked(sb)
register struct sockbuf *sb;
{
+ SOCKBUF_LOCK_ASSERT(sb);
+
if (sb->sb_flags & SB_LOCK)
- panic("sbflush: locked");
+ panic("sbflush_locked: locked");
while (sb->sb_mbcnt) {
/*
* Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
@@ -927,23 +1095,35 @@ sbflush(sb)
*/
if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
break;
- sbdrop(sb, (int)sb->sb_cc);
+ sbdrop_locked(sb, (int)sb->sb_cc);
}
if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
- panic("sbflush: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
+ panic("sbflush_locked: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
+}
+
+void
+sbflush(sb)
+ register struct sockbuf *sb;
+{
+
+ SOCKBUF_LOCK(sb);
+ sbflush_locked(sb);
+ SOCKBUF_UNLOCK(sb);
}
/*
* Drop data from (the front of) a sockbuf.
*/
void
-sbdrop(sb, len)
+sbdrop_locked(sb, len)
register struct sockbuf *sb;
register int len;
{
register struct mbuf *m;
struct mbuf *next;
+ SOCKBUF_LOCK_ASSERT(sb);
+
next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
while (len > 0) {
if (m == 0) {
@@ -990,15 +1170,31 @@ sbdrop(sb, len)
}
/*
+ * Drop data from (the front of) a sockbuf.
+ */
+void
+sbdrop(sb, len)
+ register struct sockbuf *sb;
+ register int len;
+{
+
+ SOCKBUF_LOCK(sb);
+ sbdrop_locked(sb, len);
+ SOCKBUF_UNLOCK(sb);
+}
+
+/*
* Drop a record off the front of a sockbuf
* and move the next record to the front.
*/
void
-sbdroprecord(sb)
+sbdroprecord_locked(sb)
register struct sockbuf *sb;
{
register struct mbuf *m;
+ SOCKBUF_LOCK_ASSERT(sb);
+
m = sb->sb_mb;
if (m) {
sb->sb_mb = m->m_nextpkt;
@@ -1011,6 +1207,20 @@ sbdroprecord(sb)
}
/*
+ * Drop a record off the front of a sockbuf
+ * and move the next record to the front.
+ */
+void
+sbdroprecord(sb)
+ register struct sockbuf *sb;
+{
+
+ SOCKBUF_LOCK(sb);
+ sbdroprecord_locked(sb);
+ SOCKBUF_UNLOCK(sb);
+}
+
+/*
* Create a "control" mbuf containing the specified data
* with the specified type for presentation on a socket buffer.
*/
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index b1415d7..80699b0 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -348,9 +348,15 @@ sofree(so)
SOCKBUF_LOCK(&so->so_snd);
so->so_snd.sb_flags |= SB_NOINTR;
(void)sblock(&so->so_snd, M_WAITOK);
- socantsendmore(so);
+ /*
+ * socantsendmore_locked() drops the socket buffer mutex so that it
+ * can safely perform wakeups. Re-acquire the mutex before
+ * continuing.
+ */
+ socantsendmore_locked(so);
+ SOCKBUF_LOCK(&so->so_snd);
sbunlock(&so->so_snd);
- sbrelease(&so->so_snd, so);
+ sbrelease_locked(&so->so_snd, so);
SOCKBUF_UNLOCK(&so->so_snd);
sorflush(so);
sodealloc(so);
@@ -1202,7 +1208,7 @@ dontblock:
flags |= MSG_TRUNC;
if ((flags & MSG_PEEK) == 0) {
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
- (void) sbdroprecord(&so->so_rcv);
+ (void) sbdroprecord_locked(&so->so_rcv);
}
}
if ((flags & MSG_PEEK) == 0) {
@@ -1271,23 +1277,41 @@ sorflush(so)
struct protosw *pr = so->so_proto;
struct sockbuf asb;
+ /*
+ * XXXRW: This is quite ugly. The existing code made a copy of the
+ * socket buffer, then zero'd the original to clear the buffer
+ * fields. However, with mutexes in the socket buffer, this causes
+ * problems. We only clear the zeroable bits of the original;
+ * however, we have to initialize and destroy the mutex in the copy
+ * so that dom_dispose() and sbrelease() can lock t as needed.
+ */
SOCKBUF_LOCK(sb);
sb->sb_flags |= SB_NOINTR;
(void) sblock(sb, M_WAITOK);
- socantrcvmore(so);
+ /*
+ * socantrcvmore_locked() drops the socket buffer mutex so that it
+ * can safely perform wakeups. Re-acquire the mutex before
+ * continuing.
+ */
+ socantrcvmore_locked(so);
+ SOCKBUF_LOCK(sb);
sbunlock(sb);
- asb = *sb;
/*
- * Invalidate/clear most of the sockbuf structure, but keep
- * its selinfo structure valid.
+ * Invalidate/clear most of the sockbuf structure, but leave
+ * selinfo and mutex data unchanged.
*/
+ bzero(&asb, offsetof(struct sockbuf, sb_startzero));
+ bcopy(&sb->sb_startzero, &asb.sb_startzero,
+ sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
bzero(&sb->sb_startzero,
sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
SOCKBUF_UNLOCK(sb);
+ SOCKBUF_LOCK_INIT(&asb, "so_rcv");
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
(*pr->pr_domain->dom_dispose)(asb.sb_mb);
sbrelease(&asb, so);
+ SOCKBUF_LOCK_DESTROY(&asb);
}
#ifdef INET
diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c
index 0e4e99b..7f18ba8 100644
--- a/sys/kern/uipc_socket2.c
+++ b/sys/kern/uipc_socket2.c
@@ -197,9 +197,9 @@ soisdisconnected(so)
SOCKBUF_UNLOCK(&so->so_rcv);
SOCKBUF_LOCK(&so->so_snd);
so->so_snd.sb_state |= SBS_CANTSENDMORE;
+ sbdrop_locked(&so->so_snd, so->so_snd.sb_cc);
SOCKBUF_UNLOCK(&so->so_snd);
wakeup(&so->so_timeo);
- sbdrop(&so->so_snd, so->so_snd.sb_cc);
sowwakeup(so);
sorwakeup(so);
}
@@ -296,23 +296,48 @@ sonewconn(head, connstatus)
* protocol when it detects that the peer will send no more data.
* Data queued for reading in the socket may yet be read.
*/
+void
+socantsendmore_locked(so)
+ struct socket *so;
+{
+
+ SOCKBUF_LOCK_ASSERT(&so->so_snd);
+
+ so->so_snd.sb_state |= SBS_CANTSENDMORE;
+ sowwakeup_locked(so);
+ mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
+}
void
socantsendmore(so)
struct socket *so;
{
- so->so_snd.sb_state |= SBS_CANTSENDMORE;
- sowwakeup(so);
+ SOCKBUF_LOCK(&so->so_snd);
+ socantsendmore_locked(so);
+ mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
}
void
-socantrcvmore(so)
+socantrcvmore_locked(so)
struct socket *so;
{
+ SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+
so->so_rcv.sb_state |= SBS_CANTRCVMORE;
- sorwakeup(so);
+ sorwakeup_locked(so);
+ mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
+}
+
+void
+socantrcvmore(so)
+ struct socket *so;
+{
+
+ SOCKBUF_LOCK(&so->so_rcv);
+ socantrcvmore_locked(so);
+ mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
}
/*
@@ -356,9 +381,16 @@ sb_lock(sb)
}
/*
- * Wakeup processes waiting on a socket buffer.
- * Do asynchronous notification via SIGIO
- * if the socket has the SS_ASYNC flag set.
+ * Wakeup processes waiting on a socket buffer. Do asynchronous
+ * notification via SIGIO if the socket has the SS_ASYNC flag set.
+ *
+ * Called with the socket buffer lock held; will release the lock by the end
+ * of the function. This allows the caller to acquire the socket buffer lock
+ * while testing for the need for various sorts of wakeup and hold it through
+ * to the point where it's no longer required. We currently hold the lock
+ * through calls out to other subsystems (with the exception of kqueue), and
+ * then release it to avoid lock order issues. It's not clear that's
+ * correct.
*/
void
sowakeup(so, sb)
@@ -366,19 +398,23 @@ sowakeup(so, sb)
register struct sockbuf *sb;
{
+ SOCKBUF_LOCK_ASSERT(sb);
+
selwakeuppri(&sb->sb_sel, PSOCK);
sb->sb_flags &= ~SB_SEL;
if (sb->sb_flags & SB_WAIT) {
sb->sb_flags &= ~SB_WAIT;
wakeup(&sb->sb_cc);
}
+ KNOTE(&sb->sb_sel.si_note, 0);
+ SOCKBUF_UNLOCK(sb);
if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
pgsigio(&so->so_sigio, SIGIO, 0);
if (sb->sb_flags & SB_UPCALL)
(*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
if (sb->sb_flags & SB_AIO)
aio_swake(so, sb);
- KNOTE(&sb->sb_sel.si_note, 0);
+ mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
}
/*
@@ -500,17 +536,29 @@ sbreserve(sb, cc, so, td)
* Free mbufs held by a socket, and reserved mbuf space.
*/
void
-sbrelease(sb, so)
+sbrelease_locked(sb, so)
struct sockbuf *sb;
struct socket *so;
{
- sbflush(sb);
+ SOCKBUF_LOCK_ASSERT(sb);
+
+ sbflush_locked(sb);
(void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
RLIM_INFINITY);
sb->sb_mbmax = 0;
}
+void
+sbrelease(sb, so)
+ struct sockbuf *sb;
+ struct socket *so;
+{
+
+ SOCKBUF_LOCK(sb);
+ sbrelease_locked(sb, so);
+ SOCKBUF_UNLOCK(sb);
+}
/*
* Routines to add and remove
* data from an mbuf queue.
@@ -542,6 +590,8 @@ sblastrecordchk(struct sockbuf *sb, const char *file, int line)
{
struct mbuf *m = sb->sb_mb;
+ SOCKBUF_LOCK_ASSERT(sb);
+
while (m && m->m_nextpkt)
m = m->m_nextpkt;
@@ -561,6 +611,8 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line)
struct mbuf *m = sb->sb_mb;
struct mbuf *n;
+ SOCKBUF_LOCK_ASSERT(sb);
+
while (m && m->m_nextpkt)
m = m->m_nextpkt;
@@ -583,6 +635,7 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line)
#endif /* SOCKBUF_DEBUG */
#define SBLINKRECORD(sb, m0) do { \
+ SOCKBUF_LOCK_ASSERT(sb); \
if ((sb)->sb_lastrecord != NULL) \
(sb)->sb_lastrecord->m_nextpkt = (m0); \
else \
@@ -597,14 +650,17 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line)
* discarded and mbufs are compacted where possible.
*/
void
-sbappend(sb, m)
+sbappend_locked(sb, m)
struct sockbuf *sb;
struct mbuf *m;
{
register struct mbuf *n;
+ SOCKBUF_LOCK_ASSERT(sb);
+
if (m == 0)
return;
+
SBLASTRECORDCHK(sb);
n = sb->sb_mb;
if (n) {
@@ -612,7 +668,7 @@ sbappend(sb, m)
n = n->m_nextpkt;
do {
if (n->m_flags & M_EOR) {
- sbappendrecord(sb, m); /* XXXXXX!!!! */
+ sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
return;
}
} while (n->m_next && (n = n->m_next));
@@ -625,7 +681,7 @@ sbappend(sb, m)
if ((n = sb->sb_lastrecord) != NULL) {
do {
if (n->m_flags & M_EOR) {
- sbappendrecord(sb, m); /* XXXXXX!!!! */
+ sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
return;
}
} while (n->m_next && (n = n->m_next));
@@ -642,13 +698,31 @@ sbappend(sb, m)
}
/*
+ * Append mbuf chain m to the last record in the
+ * socket buffer sb. The additional space associated
+ * the mbuf chain is recorded in sb. Empty mbufs are
+ * discarded and mbufs are compacted where possible.
+ */
+void
+sbappend(sb, m)
+ struct sockbuf *sb;
+ struct mbuf *m;
+{
+
+ SOCKBUF_LOCK(sb);
+ sbappend_locked(sb, m);
+ SOCKBUF_UNLOCK(sb);
+}
+
+/*
* This version of sbappend() should only be used when the caller
* absolutely knows that there will never be more than one record
* in the socket buffer, that is, a stream protocol (such as TCP).
*/
void
-sbappendstream(struct sockbuf *sb, struct mbuf *m)
+sbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
{
+ SOCKBUF_LOCK_ASSERT(sb);
KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
@@ -661,6 +735,20 @@ sbappendstream(struct sockbuf *sb, struct mbuf *m)
SBLASTRECORDCHK(sb);
}
+/*
+ * This version of sbappend() should only be used when the caller
+ * absolutely knows that there will never be more than one record
+ * in the socket buffer, that is, a stream protocol (such as TCP).
+ */
+void
+sbappendstream(struct sockbuf *sb, struct mbuf *m)
+{
+
+ SOCKBUF_LOCK(sb);
+ sbappendstream_locked(sb, m);
+ SOCKBUF_UNLOCK(sb);
+}
+
#ifdef SOCKBUF_DEBUG
void
sbcheck(sb)
@@ -670,6 +758,8 @@ sbcheck(sb)
struct mbuf *n = 0;
u_long len = 0, mbcnt = 0;
+ SOCKBUF_LOCK_ASSERT(sb);
+
for (m = sb->sb_mb; m; m = n) {
n = m->m_nextpkt;
for (; m; m = m->m_next) {
@@ -692,12 +782,14 @@ sbcheck(sb)
* begins a new record.
*/
void
-sbappendrecord(sb, m0)
+sbappendrecord_locked(sb, m0)
register struct sockbuf *sb;
register struct mbuf *m0;
{
register struct mbuf *m;
+ SOCKBUF_LOCK_ASSERT(sb);
+
if (m0 == 0)
return;
m = sb->sb_mb;
@@ -725,18 +817,35 @@ sbappendrecord(sb, m0)
}
/*
+ * As above, except the mbuf chain
+ * begins a new record.
+ */
+void
+sbappendrecord(sb, m0)
+ register struct sockbuf *sb;
+ register struct mbuf *m0;
+{
+
+ SOCKBUF_LOCK(sb);
+ sbappendrecord_locked(sb, m0);
+ SOCKBUF_UNLOCK(sb);
+}
+
+/*
* As above except that OOB data
* is inserted at the beginning of the sockbuf,
* but after any other OOB data.
*/
void
-sbinsertoob(sb, m0)
+sbinsertoob_locked(sb, m0)
register struct sockbuf *sb;
register struct mbuf *m0;
{
register struct mbuf *m;
register struct mbuf **mp;
+ SOCKBUF_LOCK_ASSERT(sb);
+
if (m0 == 0)
return;
for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
@@ -771,13 +880,29 @@ sbinsertoob(sb, m0)
}
/*
+ * As above except that OOB data
+ * is inserted at the beginning of the sockbuf,
+ * but after any other OOB data.
+ */
+void
+sbinsertoob(sb, m0)
+ register struct sockbuf *sb;
+ register struct mbuf *m0;
+{
+
+ SOCKBUF_LOCK(sb);
+ sbinsertoob_locked(sb, m0);
+ SOCKBUF_UNLOCK(sb);
+}
+
+/*
* Append address and data, and optionally, control (ancillary) data
* to the receive queue of a socket. If present,
* m0 must include a packet header with total length.
* Returns 0 if no space in sockbuf or insufficient mbufs.
*/
int
-sbappendaddr(sb, asa, m0, control)
+sbappendaddr_locked(sb, asa, m0, control)
struct sockbuf *sb;
const struct sockaddr *asa;
struct mbuf *m0, *control;
@@ -785,11 +910,14 @@ sbappendaddr(sb, asa, m0, control)
struct mbuf *m, *n, *nlast;
int space = asa->sa_len;
+ SOCKBUF_LOCK_ASSERT(sb);
+
if (m0 && (m0->m_flags & M_PKTHDR) == 0)
- panic("sbappendaddr");
+ panic("sbappendaddr_locked");
if (m0)
space += m0->m_pkthdr.len;
space += m_length(control, &n);
+
if (space > sbspace(sb))
return (0);
#if MSIZE <= 256
@@ -819,17 +947,40 @@ sbappendaddr(sb, asa, m0, control)
return (1);
}
+/*
+ * Append address and data, and optionally, control (ancillary) data
+ * to the receive queue of a socket. If present,
+ * m0 must include a packet header with total length.
+ * Returns 0 if no space in sockbuf or insufficient mbufs.
+ */
+int
+sbappendaddr(sb, asa, m0, control)
+ struct sockbuf *sb;
+ const struct sockaddr *asa;
+ struct mbuf *m0, *control;
+{
+ int retval;
+
+ SOCKBUF_LOCK(sb);
+ retval = sbappendaddr_locked(sb, asa, m0, control);
+ SOCKBUF_UNLOCK(sb);
+ return (retval);
+}
+
int
-sbappendcontrol(sb, m0, control)
+sbappendcontrol_locked(sb, m0, control)
struct sockbuf *sb;
struct mbuf *control, *m0;
{
struct mbuf *m, *n, *mlast;
int space;
+ SOCKBUF_LOCK_ASSERT(sb);
+
if (control == 0)
- panic("sbappendcontrol");
+ panic("sbappendcontrol_locked");
space = m_length(control, &n) + m_length(m0, NULL);
+
if (space > sbspace(sb))
return (0);
n->m_next = m0; /* concatenate data to control */
@@ -849,6 +1000,19 @@ sbappendcontrol(sb, m0, control)
return (1);
}
+int
+sbappendcontrol(sb, m0, control)
+ struct sockbuf *sb;
+ struct mbuf *control, *m0;
+{
+ int retval;
+
+ SOCKBUF_LOCK(sb);
+ retval = sbappendcontrol_locked(sb, m0, control);
+ SOCKBUF_UNLOCK(sb);
+ return (retval);
+}
+
/*
* Compress mbuf chain m into the socket
* buffer sb following mbuf n. If n
@@ -862,6 +1026,8 @@ sbcompress(sb, m, n)
register int eor = 0;
register struct mbuf *o;
+ SOCKBUF_LOCK_ASSERT(sb);
+
while (m) {
eor |= m->m_flags & M_EOR;
if (m->m_len == 0 &&
@@ -914,12 +1080,14 @@ sbcompress(sb, m, n)
* Check that all resources are reclaimed.
*/
void
-sbflush(sb)
+sbflush_locked(sb)
register struct sockbuf *sb;
{
+ SOCKBUF_LOCK_ASSERT(sb);
+
if (sb->sb_flags & SB_LOCK)
- panic("sbflush: locked");
+ panic("sbflush_locked: locked");
while (sb->sb_mbcnt) {
/*
* Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
@@ -927,23 +1095,35 @@ sbflush(sb)
*/
if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
break;
- sbdrop(sb, (int)sb->sb_cc);
+ sbdrop_locked(sb, (int)sb->sb_cc);
}
if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
- panic("sbflush: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
+ panic("sbflush_locked: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
+}
+
+void
+sbflush(sb)
+ register struct sockbuf *sb;
+{
+
+ SOCKBUF_LOCK(sb);
+ sbflush_locked(sb);
+ SOCKBUF_UNLOCK(sb);
}
/*
* Drop data from (the front of) a sockbuf.
*/
void
-sbdrop(sb, len)
+sbdrop_locked(sb, len)
register struct sockbuf *sb;
register int len;
{
register struct mbuf *m;
struct mbuf *next;
+ SOCKBUF_LOCK_ASSERT(sb);
+
next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
while (len > 0) {
if (m == 0) {
@@ -990,15 +1170,31 @@ sbdrop(sb, len)
}
/*
+ * Drop data from (the front of) a sockbuf.
+ */
+void
+sbdrop(sb, len)
+ register struct sockbuf *sb;
+ register int len;
+{
+
+ SOCKBUF_LOCK(sb);
+ sbdrop_locked(sb, len);
+ SOCKBUF_UNLOCK(sb);
+}
+
+/*
* Drop a record off the front of a sockbuf
* and move the next record to the front.
*/
void
-sbdroprecord(sb)
+sbdroprecord_locked(sb)
register struct sockbuf *sb;
{
register struct mbuf *m;
+ SOCKBUF_LOCK_ASSERT(sb);
+
m = sb->sb_mb;
if (m) {
sb->sb_mb = m->m_nextpkt;
@@ -1011,6 +1207,20 @@ sbdroprecord(sb)
}
/*
+ * Drop a record off the front of a sockbuf
+ * and move the next record to the front.
+ */
+void
+sbdroprecord(sb)
+ register struct sockbuf *sb;
+{
+
+ SOCKBUF_LOCK(sb);
+ sbdroprecord_locked(sb);
+ SOCKBUF_UNLOCK(sb);
+}
+
+/*
* Create a "control" mbuf containing the specified data
* with the specified type for presentation on a socket buffer.
*/
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index a5abcee..ea9a6a2 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -353,11 +353,14 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
from = (struct sockaddr *)unp->unp_addr;
else
from = &sun_noname;
- if (sbappendaddr(&so2->so_rcv, from, m, control)) {
+ SOCKBUF_LOCK(&so2->so_rcv);
+ if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) {
+ SOCKBUF_UNLOCK(&so2->so_rcv);
sorwakeup(so2);
m = NULL;
control = NULL;
} else {
+ SOCKBUF_UNLOCK(&so2->so_rcv);
error = ENOBUFS;
}
if (nam != NULL)
@@ -389,16 +392,17 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
if (unp->unp_conn == NULL)
panic("uipc_send connected but no connection?");
so2 = unp->unp_conn->unp_socket;
+ SOCKBUF_LOCK(&so2->so_rcv);
/*
* Send to paired receive port, and then reduce
* send buffer hiwater marks to maintain backpressure.
* Wake up readers.
*/
if (control != NULL) {
- if (sbappendcontrol(&so2->so_rcv, m, control))
+ if (sbappendcontrol_locked(&so2->so_rcv, m, control))
control = NULL;
} else {
- sbappend(&so2->so_rcv, m);
+ sbappend_locked(&so2->so_rcv, m);
}
so->so_snd.sb_mbmax -=
so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt;
@@ -408,6 +412,7 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
(void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
newhiwat, RLIM_INFINITY);
unp->unp_conn->unp_cc = so2->so_rcv.sb_cc;
+ SOCKBUF_UNLOCK(&so2->so_rcv);
sorwakeup(so2);
m = NULL;
break;
diff --git a/sys/netkey/keysock.c b/sys/netkey/keysock.c
index 233fb28..ef1b134 100644
--- a/sys/netkey/keysock.c
+++ b/sys/netkey/keysock.c
@@ -41,8 +41,10 @@ __FBSDID("$FreeBSD$");
#include <sys/domain.h>
#include <sys/errno.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/mutex.h>
#include <sys/protosw.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index 8173715..6ea4bb8 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -319,13 +319,14 @@ struct xsocket {
((sb)->sb_flags |= SB_LOCK), 0)
/* release lock on sockbuf sb */
-#define sbunlock(sb) { \
+#define sbunlock(sb) do { \
+ SOCKBUF_LOCK_ASSERT(sb); \
(sb)->sb_flags &= ~SB_LOCK; \
if ((sb)->sb_flags & SB_WANT) { \
(sb)->sb_flags &= ~SB_WANT; \
wakeup(&(sb)->sb_flags); \
} \
-}
+} while (0)
/*
* soref()/sorele() ref-count the socket structure. Note that you must
@@ -355,14 +356,37 @@ struct xsocket {
SOCK_UNLOCK(so); \
} while(0)
-#define sorwakeup(so) do { \
+/*
+ * In sorwakeup() and sowwakeup(), acquire the socket buffer lock to
+ * avoid a non-atomic test-and-wakeup. However, sowakeup is
+ * responsible for releasing the lock if it is called. We unlock only
+ * if we don't call into sowakeup. If any code is introduced that
+ * directly invokes the underlying sowakeup() primitives, it must
+ * maintain the same semantics.
+ */
+#define sorwakeup_locked(so) do { \
+ SOCKBUF_LOCK_ASSERT(&(so)->so_rcv); \
if (sb_notify(&(so)->so_rcv)) \
- sowakeup((so), &(so)->so_rcv); \
+ sowakeup((so), &(so)->so_rcv); \
+ else \
+ SOCKBUF_UNLOCK(&(so)->so_rcv); \
} while (0)
-#define sowwakeup(so) do { \
+#define sorwakeup(so) do { \
+ SOCKBUF_LOCK(&(so)->so_rcv); \
+ sorwakeup_locked(so); \
+} while (0)
+
+#define sowwakeup_locked(so) do { \
if (sb_notify(&(so)->so_snd)) \
sowakeup((so), &(so)->so_snd); \
+ else \
+ SOCKBUF_UNLOCK(&(so)->so_snd); \
+} while (0)
+
+#define sowwakeup(so) do { \
+ SOCKBUF_LOCK(&(so)->so_snd); \
+ sowwakeup_locked(so); \
} while (0)
/*
@@ -412,21 +436,33 @@ struct uio;
int sockargs(struct mbuf **mp, caddr_t buf, int buflen, int type);
int getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len);
void sbappend(struct sockbuf *sb, struct mbuf *m);
+void sbappend_locked(struct sockbuf *sb, struct mbuf *m);
void sbappendstream(struct sockbuf *sb, struct mbuf *m);
+void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m);
int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
struct mbuf *m0, struct mbuf *control);
+int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
+ struct mbuf *m0, struct mbuf *control);
int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
struct mbuf *control);
+int sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
+ struct mbuf *control);
void sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
+void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
void sbcheck(struct sockbuf *sb);
void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
struct mbuf *
sbcreatecontrol(caddr_t p, int size, int type, int level);
void sbdrop(struct sockbuf *sb, int len);
+void sbdrop_locked(struct sockbuf *sb, int len);
void sbdroprecord(struct sockbuf *sb);
+void sbdroprecord_locked(struct sockbuf *sb);
void sbflush(struct sockbuf *sb);
+void sbflush_locked(struct sockbuf *sb);
void sbinsertoob(struct sockbuf *sb, struct mbuf *m0);
+void sbinsertoob_locked(struct sockbuf *sb, struct mbuf *m0);
void sbrelease(struct sockbuf *sb, struct socket *so);
+void sbrelease_locked(struct sockbuf *sb, struct socket *so);
int sbreserve(struct sockbuf *sb, u_long cc, struct socket *so,
struct thread *td);
void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb);
@@ -438,7 +474,9 @@ struct socket *soalloc(int mflags);
int socheckuid(struct socket *so, uid_t uid);
int sobind(struct socket *so, struct sockaddr *nam, struct thread *td);
void socantrcvmore(struct socket *so);
+void socantrcvmore_locked(struct socket *so);
void socantsendmore(struct socket *so);
+void socantsendmore_locked(struct socket *so);
int soclose(struct socket *so);
int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td);
int soconnect2(struct socket *so1, struct socket *so2);
OpenPOWER on IntegriCloud