diff options
author | rwatson <rwatson@FreeBSD.org> | 2004-06-02 04:15:39 +0000 |
---|---|---|
committer | rwatson <rwatson@FreeBSD.org> | 2004-06-02 04:15:39 +0000 |
commit | 576b26bafdee61ede4fafa63fd78e21930f5bc2e (patch) | |
tree | 6cd76268318837a9c462ad6283cb7d6cabe7d0e5 /sys/kern/uipc_socket.c | |
parent | a22e58fe44a2fe47575e6ddbeef0217aeebcae2f (diff) | |
download | FreeBSD-src-576b26bafdee61ede4fafa63fd78e21930f5bc2e.zip FreeBSD-src-576b26bafdee61ede4fafa63fd78e21930f5bc2e.tar.gz |
Integrate accept locking from rwatson_netperf, introducing a new
global mutex, accept_mtx, which serializes access to the following
fields across all sockets:
so_qlen so_incqlen so_qstate
so_comp so_incomp so_list
so_head
While providing only coarse granularity, this approach avoids lock
order issues between sockets by avoiding ownership of the fields
by a specific socket and its per-socket mutexes.
While here, rewrite soclose(), sofree(), soaccept(), and
sonewconn() to add assertions, close additional races and address
lock order concerns. In particular:
- Reorganize the optimistic concurrency behavior in accept1() to
always allocate a file descriptor with falloc() so that if we do
find a socket, we don't have to encounter the "Oh, there wasn't
a socket" race that can occur if falloc() sleeps in the current
code, which broke inbound accept() ordering, not to mention
requiring backing out socket state changes in a way that raced
with the protocol level. We may want to add a lockless read of
the queue state if polling of empty queues proves to be important
to optimize.
- In accept1(), soref() the socket while holding the accept lock
so that the socket cannot be free'd in a race with the protocol
layer. Likewise in netgraph equivilents of the accept1() code.
- In sonewconn(), loop waiting for the queue to be small enough to
insert our new socket once we've committed to inserting it, or
races can occur that cause the incomplete socket queue to
overfill. In the previously implementation, it was sufficient
to simply tested once since calling soabort() didn't release
synchronization permitting another thread to insert a socket as
we discard a previous one.
- In soclose()/sofree()/et al, it is the responsibility of the
caller to remove a socket from the incomplete connection queue
before calling soabort(), which prevents soabort() from having
to walk into the accept socket to release the socket from its
queue, and avoids races when releasing the accept mutex to enter
soabort(), permitting soabort() to avoid lock ordering issues
with the caller.
- Generally cluster accept queue related operations together
throughout these functions in order to facilitate locking.
Annotate new locking in socketvar.h.
Diffstat (limited to 'sys/kern/uipc_socket.c')
-rw-r--r-- | sys/kern/uipc_socket.c | 71 |
1 files changed, 49 insertions, 22 deletions
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 706167e..efbfaf0 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -106,6 +106,9 @@ SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, send, CTLFLAG_RW, &so_zero_copy_send, 0, "Enable zero copy send"); #endif /* ZERO_COPY_SOCKETS */ +struct mtx accept_mtx; +MTX_SYSINIT(accept_mtx, &accept_mtx, "accept", MTX_DEF); + /* * Socket operation routines. @@ -266,11 +269,13 @@ solisten(so, backlog, td) splx(s); return (error); } + ACCEPT_LOCK(); if (TAILQ_EMPTY(&so->so_comp)) so->so_options |= SO_ACCEPTCONN; if (backlog < 0 || backlog > somaxconn) backlog = somaxconn; so->so_qlimit = backlog; + ACCEPT_UNLOCK(); splx(s); return (0); } @@ -286,25 +291,42 @@ sofree(so) if (so->so_pcb != NULL || (so->so_state & SS_NOFDREF) == 0) return; - if (so->so_head != NULL) { - head = so->so_head; - if (so->so_qstate & SQ_INCOMP) { - TAILQ_REMOVE(&head->so_incomp, so, so_list); - head->so_incqlen--; - } else if (so->so_qstate & SQ_COMP) { - /* - * We must not decommission a socket that's - * on the accept(2) queue. If we do, then - * accept(2) may hang after select(2) indicated - * that the listening socket was ready. - */ + + ACCEPT_LOCK(); + head = so->so_head; + if (head != NULL) { + KASSERT((so->so_qstate & SQ_COMP) != 0 || + (so->so_qstate & SQ_INCOMP) != 0, + ("sofree: so_head != NULL, but neither SQ_COMP nor " + "SQ_INCOMP")); + KASSERT((so->so_qstate & SQ_COMP) == 0 || + (so->so_qstate & SQ_INCOMP) == 0, + ("sofree: so->so_qstate is SQ_COMP and also SQ_INCOMP")); + /* + * accept(2) is responsible draining the completed + * connection queue and freeing those sockets, so + * we just return here if this socket is currently + * on the completed connection queue. Otherwise, + * accept(2) may hang after select(2) has indicating + * that a listening socket was ready. If it's an + * incomplete connection, we remove it from the queue + * and free it; otherwise, it won't be released until + * the listening socket is closed. + */ + if ((so->so_qstate & SQ_COMP) != 0) { + ACCEPT_UNLOCK(); return; - } else { - panic("sofree: not queued"); } + TAILQ_REMOVE(&head->so_incomp, so, so_list); + head->so_incqlen--; so->so_qstate &= ~SQ_INCOMP; so->so_head = NULL; } + KASSERT((so->so_qstate & SQ_COMP) == 0 && + (so->so_qstate & SQ_INCOMP) == 0, + ("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)", + so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP)); + ACCEPT_UNLOCK(); so->so_snd.sb_flags |= SB_NOINTR; (void)sblock(&so->so_snd, M_WAITOK); s = splimp(); @@ -334,22 +356,27 @@ soclose(so) funsetown(&so->so_sigio); if (so->so_options & SO_ACCEPTCONN) { - struct socket *sp, *sonext; - - sp = TAILQ_FIRST(&so->so_incomp); - for (; sp != NULL; sp = sonext) { - sonext = TAILQ_NEXT(sp, so_list); + struct socket *sp; + ACCEPT_LOCK(); + while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { + TAILQ_REMOVE(&so->so_incomp, sp, so_list); + so->so_incqlen--; + sp->so_qstate &= ~SQ_INCOMP; + sp->so_head = NULL; + ACCEPT_UNLOCK(); (void) soabort(sp); + ACCEPT_LOCK(); } - for (sp = TAILQ_FIRST(&so->so_comp); sp != NULL; sp = sonext) { - sonext = TAILQ_NEXT(sp, so_list); - /* Dequeue from so_comp since sofree() won't do it */ + while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) { TAILQ_REMOVE(&so->so_comp, sp, so_list); so->so_qlen--; sp->so_qstate &= ~SQ_COMP; sp->so_head = NULL; + ACCEPT_UNLOCK(); (void) soabort(sp); + ACCEPT_LOCK(); } + ACCEPT_UNLOCK(); } if (so->so_pcb == NULL) goto discard; |