diff options
Diffstat (limited to 'sys/kern/uipc_socket.c')
-rw-r--r-- | sys/kern/uipc_socket.c | 36 |
1 files changed, 35 insertions, 1 deletions
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 35355d3..160c89e 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -838,7 +838,7 @@ out: * data from a socket. For more complete comments, see soreceive(), from * which this code originated. * - * XXXRW: Note that soreceive_rcvoob(), unlike the remainder of soreceve(), + * XXXRW: Note that soreceive_rcvoob(), unlike the remainder of soreiceve(), * is unable to return an mbuf chain to the caller. */ static int @@ -1007,6 +1007,21 @@ restart: goto restart; } dontblock: + /* + * From this point onward, we maintain 'nextrecord' as a cache of the + * pointer to the next record in the socket buffer. We must keep the + * various socket buffer pointers and local stack versions of the + * pointers in sync, pushing out modifications before dropping the + * socket buffer mutex, and re-reading them when picking it up. + * + * Otherwise, we will race with the network stack appending new data + * or records onto the socket buffer by using inconsistent/stale + * versions of the field, possibly resulting in socket buffer + * corruption. + * + * By holding the high-level sblock(), we prevent simultaneous + * readers from pulling off the front of the socket buffer. + */ SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (uio->uio_td) uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++; @@ -1031,6 +1046,13 @@ dontblock: m->m_nextpkt = nextrecord; } } + + /* + * Process one or more MT_CONTROL mbufs present before any data mbufs + * in the first mbuf chain on the socket buffer. If MSG_PEEK, we + * just copy the data; if !MSG_PEEK, we call into the protocol to + * perform externalization. + */ while (m != NULL && m->m_type == MT_CONTROL && error == 0) { if (flags & MSG_PEEK) { if (controlp != NULL) @@ -1085,9 +1107,21 @@ dontblock: SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); + /* + * Now continue to read any data mbufs off of the head of the socket + * buffer until the read request is satisfied. Note that 'type' is + * used to store the type of any mbuf reads that have happened so far + * such that soreceive() can stop reading if the type changes, which + * causes soreceive() to return only one of regular data and inline + * out-of-band data in a single socket receive operation. + */ moff = 0; offset = 0; while (m != NULL && uio->uio_resid > 0 && error == 0) { + /* + * If the type of mbuf has changed since the last mbuf + * examined ('type'), end the receive operation. + */ SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (m->m_type == MT_OOBDATA) { if (type != MT_OOBDATA) |