summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/kern/uipc_socket.c36
1 files changed, 35 insertions, 1 deletions
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 35355d3..160c89e 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -838,7 +838,7 @@ out:
* data from a socket. For more complete comments, see soreceive(), from
* which this code originated.
*
- * XXXRW: Note that soreceive_rcvoob(), unlike the remainder of soreceve(),
+ * XXXRW: Note that soreceive_rcvoob(), unlike the remainder of soreiceve(),
* is unable to return an mbuf chain to the caller.
*/
static int
@@ -1007,6 +1007,21 @@ restart:
goto restart;
}
dontblock:
+ /*
+ * From this point onward, we maintain 'nextrecord' as a cache of the
+ * pointer to the next record in the socket buffer. We must keep the
+ * various socket buffer pointers and local stack versions of the
+ * pointers in sync, pushing out modifications before dropping the
+ * socket buffer mutex, and re-reading them when picking it up.
+ *
+ * Otherwise, we will race with the network stack appending new data
+ * or records onto the socket buffer by using inconsistent/stale
+ * versions of the field, possibly resulting in socket buffer
+ * corruption.
+ *
+ * By holding the high-level sblock(), we prevent simultaneous
+ * readers from pulling off the front of the socket buffer.
+ */
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
if (uio->uio_td)
uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++;
@@ -1031,6 +1046,13 @@ dontblock:
m->m_nextpkt = nextrecord;
}
}
+
+ /*
+ * Process one or more MT_CONTROL mbufs present before any data mbufs
+ * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
+ * just copy the data; if !MSG_PEEK, we call into the protocol to
+ * perform externalization.
+ */
while (m != NULL && m->m_type == MT_CONTROL && error == 0) {
if (flags & MSG_PEEK) {
if (controlp != NULL)
@@ -1085,9 +1107,21 @@ dontblock:
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
+ /*
+ * Now continue to read any data mbufs off of the head of the socket
+ * buffer until the read request is satisfied. Note that 'type' is
+ * used to store the type of any mbuf reads that have happened so far
+ * such that soreceive() can stop reading if the type changes, which
+ * causes soreceive() to return only one of regular data and inline
+ * out-of-band data in a single socket receive operation.
+ */
moff = 0;
offset = 0;
while (m != NULL && uio->uio_resid > 0 && error == 0) {
+ /*
+ * If the type of mbuf has changed since the last mbuf
+ * examined ('type'), end the receive operation.
+ */
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
if (m->m_type == MT_OOBDATA) {
if (type != MT_OOBDATA)
OpenPOWER on IntegriCloud