diff options
author | rwatson <rwatson@FreeBSD.org> | 2004-10-18 22:19:43 +0000 |
---|---|---|
committer | rwatson <rwatson@FreeBSD.org> | 2004-10-18 22:19:43 +0000 |
commit | 4b81ce6dd2658abba782e835143f8008092c1c6f (patch) | |
tree | e18ee35e276065c19e56111431141fc111be57fb | |
parent | d2f67f65f7a477ab4e37f6a25e978bdd763f830f (diff) | |
download | FreeBSD-src-4b81ce6dd2658abba782e835143f8008092c1c6f.zip FreeBSD-src-4b81ce6dd2658abba782e835143f8008092c1c6f.tar.gz |
Push acquisition of the accept mutex out of sofree() into the caller
(sorele()/sotryfree()):
- This permits the caller to acquire the accept mutex before the socket
mutex, avoiding sofree() having to drop the socket mutex and re-order,
which could lead to races permitting more than one thread to enter
sofree() after a socket is ready to be free'd.
- This also covers clearing of the so_pcb weak socket reference from
the protocol to the socket, preventing races in clearing and
evaluation of the reference such that sofree() might be called more
than once on the same socket.
This appears to close a race I was able to easily trigger by repeatedly
opening and resetting TCP connections to a host, in which the
tcp_close() code called as a result of the RST raced with the close()
of the accepted socket in the user process resulting in simultaneous
attempts to de-allocate the same socket. The new locking increases
the overhead for operations that may potentially free the socket, so we
will want to revise the synchronization strategy here as we normalize
the reference counting model for sockets. The use of the accept mutex
in freeing of sockets that are not listen sockets is primarily
motivated by the potential need to remove the socket from the
incomplete connection queue on its parent (listen) socket, so cleaning
up the reference model here may allow us to substantially weaken the
synchronization requirements.
RELENG_5_3 candidate.
MFC after: 3 days
Reviewed by: dwhite
Discussed with: gnn, dwhite, green
Reported by: Marc UBM Bocklet <ubm at u-boot-man dot de>
Reported by: Vlad <marchenko at gmail dot com>
-rw-r--r-- | sys/kern/kern_descrip.c | 1 | ||||
-rw-r--r-- | sys/kern/uipc_socket.c | 7 | ||||
-rw-r--r-- | sys/kern/uipc_usrreq.c | 1 | ||||
-rw-r--r-- | sys/net/raw_cb.c | 1 | ||||
-rw-r--r-- | sys/net/raw_usrreq.c | 1 | ||||
-rw-r--r-- | sys/netatalk/ddp_pcb.c | 1 | ||||
-rw-r--r-- | sys/netatm/atm_socket.c | 1 | ||||
-rw-r--r-- | sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c | 1 | ||||
-rw-r--r-- | sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c | 2 | ||||
-rw-r--r-- | sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c | 1 | ||||
-rw-r--r-- | sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c | 1 | ||||
-rw-r--r-- | sys/netinet/in_pcb.c | 1 | ||||
-rw-r--r-- | sys/netinet/tcp_subr.c | 1 | ||||
-rw-r--r-- | sys/netinet/tcp_timewait.c | 1 | ||||
-rw-r--r-- | sys/netinet6/in6_pcb.c | 1 | ||||
-rw-r--r-- | sys/netipx/ipx_pcb.c | 1 | ||||
-rw-r--r-- | sys/netipx/ipx_usrreq.c | 1 | ||||
-rw-r--r-- | sys/netnatm/natm.c | 2 | ||||
-rw-r--r-- | sys/sys/socketvar.h | 12 |
19 files changed, 33 insertions, 5 deletions
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 769b8db..9a448b4 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -2063,6 +2063,7 @@ fputsock(struct socket *so) { NET_ASSERT_GIANT(); + ACCEPT_LOCK(); SOCK_LOCK(so); sorele(so); } diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 17ecd6a..1a74cff 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -227,6 +227,7 @@ socreate(dom, aso, type, proto, cred, td) SOCK_UNLOCK(so); error = (*prp->pr_usrreqs->pru_attach)(so, proto, td); if (error) { + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_state |= SS_NOFDREF; sorele(so); @@ -333,9 +334,8 @@ sofree(so) { struct socket *head; - SOCK_UNLOCK(so); - ACCEPT_LOCK(); - SOCK_LOCK(so); + ACCEPT_LOCK_ASSERT(); + SOCK_LOCK_ASSERT(so); if (so->so_pcb != NULL || (so->so_state & SS_NOFDREF) == 0 || so->so_count != 0) { @@ -467,6 +467,7 @@ drop: error = error2; } discard: + ACCEPT_LOCK(); SOCK_LOCK(so); KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF")); so->so_state |= SS_NOFDREF; diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 07cdbc7..6b97f94 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -140,6 +140,7 @@ uipc_abort(struct socket *so) unp_drop(unp, ECONNABORTED); unp_detach(unp); UNP_UNLOCK_ASSERT(); + ACCEPT_LOCK(); SOCK_LOCK(so); sotryfree(so); return (0); diff --git a/sys/net/raw_cb.c b/sys/net/raw_cb.c index 04b4516..b6ab208 100644 --- a/sys/net/raw_cb.c +++ b/sys/net/raw_cb.c @@ -98,6 +98,7 @@ raw_detach(rp) { struct socket *so = rp->rcb_socket; + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = 0; sotryfree(so); diff --git a/sys/net/raw_usrreq.c b/sys/net/raw_usrreq.c index 7517743..f444b57 100644 --- a/sys/net/raw_usrreq.c +++ b/sys/net/raw_usrreq.c @@ -147,6 +147,7 @@ raw_uabort(struct socket *so) return EINVAL; raw_disconnect(rp); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); sotryfree(so); return 0; diff --git a/sys/netatalk/ddp_pcb.c b/sys/netatalk/ddp_pcb.c index 8073d55..dd69850 100644 --- a/sys/netatalk/ddp_pcb.c +++ b/sys/netatalk/ddp_pcb.c @@ -282,6 +282,7 @@ at_pcbdetach(struct socket *so, struct ddpcb *ddp) DDP_LOCK_ASSERT(ddp); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netatm/atm_socket.c b/sys/netatm/atm_socket.c index 6a027e2..99dbaf6 100644 --- a/sys/netatm/atm_socket.c +++ b/sys/netatm/atm_socket.c @@ -173,6 +173,7 @@ atm_sock_detach(so) /* * Break links and free control blocks */ + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c b/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c index d4df5bd..7c347c3 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c @@ -1417,6 +1417,7 @@ ng_btsocket_hci_raw_detach(struct socket *so) bzero(pcb, sizeof(*pcb)); FREE(pcb, M_NETGRAPH_BTSOCKET_HCI_RAW); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c index f52bafa..cb91fd7 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c @@ -1804,6 +1804,7 @@ ng_btsocket_l2cap_rtclean(void *context, int pending) FREE(pcb, M_NETGRAPH_BTSOCKET_L2CAP); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); @@ -2347,6 +2348,7 @@ ng_btsocket_l2cap_detach(struct socket *so) FREE(pcb, M_NETGRAPH_BTSOCKET_L2CAP); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c index 8103f27..97e9247 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c @@ -1129,6 +1129,7 @@ ng_btsocket_l2cap_raw_detach(struct socket *so) bzero(pcb, sizeof(*pcb)); FREE(pcb, M_NETGRAPH_BTSOCKET_L2CAP_RAW); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c index a81afac..1ba7211 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c @@ -724,6 +724,7 @@ ng_btsocket_rfcomm_detach(struct socket *so) FREE(pcb, M_NETGRAPH_BTSOCKET_RFCOMM); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index ec25655..11e5b0d 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -687,6 +687,7 @@ in_pcbdetach(inp) inp->inp_gencnt = ++ipi->ipi_gencnt; in_pcbremlists(inp); if (so) { + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 7619916..03610b3 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1680,6 +1680,7 @@ tcp_twstart(tp) } tcp_discardcb(tp); so = inp->inp_socket; + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; tw->tw_cred = crhold(so->so_cred); diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index 7619916..03610b3 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -1680,6 +1680,7 @@ tcp_twstart(tp) } tcp_discardcb(tp); so = inp->inp_socket; + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; tw->tw_cred = crhold(so->so_cred); diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 9bcbeb8..e794103 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -436,6 +436,7 @@ in6_pcbdetach(inp) in_pcbremlists(inp); if (so) { + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netipx/ipx_pcb.c b/sys/netipx/ipx_pcb.c index 2be7e1d..b1e2cab 100644 --- a/sys/netipx/ipx_pcb.c +++ b/sys/netipx/ipx_pcb.c @@ -268,6 +268,7 @@ ipx_pcbdetach(ipxp) { struct socket *so = ipxp->ipxp_socket; + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = 0; sotryfree(so); diff --git a/sys/netipx/ipx_usrreq.c b/sys/netipx/ipx_usrreq.c index f07cecb..cff94bc 100644 --- a/sys/netipx/ipx_usrreq.c +++ b/sys/netipx/ipx_usrreq.c @@ -424,6 +424,7 @@ ipx_usr_abort(so) ipx_pcbdetach(ipxp); splx(s); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); sotryfree(so); return (0); diff --git a/sys/netnatm/natm.c b/sys/netnatm/natm.c index 5613b6a..f999f32 100644 --- a/sys/netnatm/natm.c +++ b/sys/netnatm/natm.c @@ -135,6 +135,7 @@ natm_usr_detach(struct socket *so) * we turn on 'drain' *before* we sofree. */ npcb_free(npcb, NPCB_DESTROY); /* drain */ + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); @@ -464,6 +465,7 @@ struct proc *p; */ npcb_free(npcb, NPCB_DESTROY); /* drain */ + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index cc24814..391ed3f 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -158,6 +158,8 @@ struct socket { * until such time as it proves to be a good idea. */ extern struct mtx accept_mtx; +#define ACCEPT_LOCK_ASSERT() mtx_assert(&accept_mtx, MA_OWNED) +#define ACCEPT_UNLOCK_ASSERT() mtx_assert(&accept_mtx, MA_NOTOWNED) #define ACCEPT_LOCK() mtx_lock(&accept_mtx) #define ACCEPT_UNLOCK() mtx_unlock(&accept_mtx) @@ -344,21 +346,27 @@ struct xsocket { } while (0) #define sorele(so) do { \ + ACCEPT_LOCK_ASSERT(); \ SOCK_LOCK_ASSERT(so); \ if ((so)->so_count <= 0) \ panic("sorele"); \ if (--(so)->so_count == 0) \ sofree(so); \ - else \ + else { \ SOCK_UNLOCK(so); \ + ACCEPT_UNLOCK(); \ + } \ } while (0) #define sotryfree(so) do { \ + ACCEPT_LOCK_ASSERT(); \ SOCK_LOCK_ASSERT(so); \ if ((so)->so_count == 0) \ sofree(so); \ - else \ + else { \ SOCK_UNLOCK(so); \ + ACCEPT_UNLOCK(); \ + } \ } while(0) /* |