summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorrwatson <rwatson@FreeBSD.org>2004-07-11 18:29:47 +0000
committerrwatson <rwatson@FreeBSD.org>2004-07-11 18:29:47 +0000
commitb6a69093250e8bef3f07b33e373f866f6e6ff82a (patch)
tree200b19fbe9f02c0d1bf42ac28e644d3ac0aead08 /sys
parent62f362bd8ff68238bbf44b8e713e7e82cbf477ac (diff)
downloadFreeBSD-src-b6a69093250e8bef3f07b33e373f866f6e6ff82a.zip
FreeBSD-src-b6a69093250e8bef3f07b33e373f866f6e6ff82a.tar.gz
Add additional annotations to soreceive(), documenting the effects of
locking on 'nextrecord' and concerns regarding potentially inconsistent or stale use of socket buffer or stack fields if they aren't carefully synchronized whenever the socket buffer mutex is released. Document that the high-level sblock() prevents races against other readers on the socket. Also document the 'type' logic as to how soreceive() guarantees that it will only return one of normal data or inline out-of-band data.
Diffstat (limited to 'sys')
-rw-r--r--sys/kern/uipc_socket.c36
1 files changed, 35 insertions, 1 deletions
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 35355d3..160c89e 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -838,7 +838,7 @@ out:
* data from a socket. For more complete comments, see soreceive(), from
* which this code originated.
*
- * XXXRW: Note that soreceive_rcvoob(), unlike the remainder of soreceve(),
+ * XXXRW: Note that soreceive_rcvoob(), unlike the remainder of soreiceve(),
* is unable to return an mbuf chain to the caller.
*/
static int
@@ -1007,6 +1007,21 @@ restart:
goto restart;
}
dontblock:
+ /*
+ * From this point onward, we maintain 'nextrecord' as a cache of the
+ * pointer to the next record in the socket buffer. We must keep the
+ * various socket buffer pointers and local stack versions of the
+ * pointers in sync, pushing out modifications before dropping the
+ * socket buffer mutex, and re-reading them when picking it up.
+ *
+ * Otherwise, we will race with the network stack appending new data
+ * or records onto the socket buffer by using inconsistent/stale
+ * versions of the field, possibly resulting in socket buffer
+ * corruption.
+ *
+ * By holding the high-level sblock(), we prevent simultaneous
+ * readers from pulling off the front of the socket buffer.
+ */
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
if (uio->uio_td)
uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++;
@@ -1031,6 +1046,13 @@ dontblock:
m->m_nextpkt = nextrecord;
}
}
+
+ /*
+ * Process one or more MT_CONTROL mbufs present before any data mbufs
+ * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
+ * just copy the data; if !MSG_PEEK, we call into the protocol to
+ * perform externalization.
+ */
while (m != NULL && m->m_type == MT_CONTROL && error == 0) {
if (flags & MSG_PEEK) {
if (controlp != NULL)
@@ -1085,9 +1107,21 @@ dontblock:
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
+ /*
+ * Now continue to read any data mbufs off of the head of the socket
+ * buffer until the read request is satisfied. Note that 'type' is
+ * used to store the type of any mbuf reads that have happened so far
+ * such that soreceive() can stop reading if the type changes, which
+ * causes soreceive() to return only one of regular data and inline
+ * out-of-band data in a single socket receive operation.
+ */
moff = 0;
offset = 0;
while (m != NULL && uio->uio_resid > 0 && error == 0) {
+ /*
+ * If the type of mbuf has changed since the last mbuf
+ * examined ('type'), end the receive operation.
+ */
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
if (m->m_type == MT_OOBDATA) {
if (type != MT_OOBDATA)
OpenPOWER on IntegriCloud