summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrwatson <rwatson@FreeBSD.org>2004-10-18 22:19:43 +0000
committerrwatson <rwatson@FreeBSD.org>2004-10-18 22:19:43 +0000
commit4b81ce6dd2658abba782e835143f8008092c1c6f (patch)
treee18ee35e276065c19e56111431141fc111be57fb
parentd2f67f65f7a477ab4e37f6a25e978bdd763f830f (diff)
downloadFreeBSD-src-4b81ce6dd2658abba782e835143f8008092c1c6f.zip
FreeBSD-src-4b81ce6dd2658abba782e835143f8008092c1c6f.tar.gz
Push acquisition of the accept mutex out of sofree() into the caller
(sorele()/sotryfree()): - This permits the caller to acquire the accept mutex before the socket mutex, avoiding sofree() having to drop the socket mutex and re-order, which could lead to races permitting more than one thread to enter sofree() after a socket is ready to be free'd. - This also covers clearing of the so_pcb weak socket reference from the protocol to the socket, preventing races in clearing and evaluation of the reference such that sofree() might be called more than once on the same socket. This appears to close a race I was able to easily trigger by repeatedly opening and resetting TCP connections to a host, in which the tcp_close() code called as a result of the RST raced with the close() of the accepted socket in the user process resulting in simultaneous attempts to de-allocate the same socket. The new locking increases the overhead for operations that may potentially free the socket, so we will want to revise the synchronization strategy here as we normalize the reference counting model for sockets. The use of the accept mutex in freeing of sockets that are not listen sockets is primarily motivated by the potential need to remove the socket from the incomplete connection queue on its parent (listen) socket, so cleaning up the reference model here may allow us to substantially weaken the synchronization requirements. RELENG_5_3 candidate. MFC after: 3 days Reviewed by: dwhite Discussed with: gnn, dwhite, green Reported by: Marc UBM Bocklet <ubm at u-boot-man dot de> Reported by: Vlad <marchenko at gmail dot com>
-rw-r--r--sys/kern/kern_descrip.c1
-rw-r--r--sys/kern/uipc_socket.c7
-rw-r--r--sys/kern/uipc_usrreq.c1
-rw-r--r--sys/net/raw_cb.c1
-rw-r--r--sys/net/raw_usrreq.c1
-rw-r--r--sys/netatalk/ddp_pcb.c1
-rw-r--r--sys/netatm/atm_socket.c1
-rw-r--r--sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c1
-rw-r--r--sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c2
-rw-r--r--sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c1
-rw-r--r--sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c1
-rw-r--r--sys/netinet/in_pcb.c1
-rw-r--r--sys/netinet/tcp_subr.c1
-rw-r--r--sys/netinet/tcp_timewait.c1
-rw-r--r--sys/netinet6/in6_pcb.c1
-rw-r--r--sys/netipx/ipx_pcb.c1
-rw-r--r--sys/netipx/ipx_usrreq.c1
-rw-r--r--sys/netnatm/natm.c2
-rw-r--r--sys/sys/socketvar.h12
19 files changed, 33 insertions, 5 deletions
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 769b8db..9a448b4 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -2063,6 +2063,7 @@ fputsock(struct socket *so)
{
NET_ASSERT_GIANT();
+ ACCEPT_LOCK();
SOCK_LOCK(so);
sorele(so);
}
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 17ecd6a..1a74cff 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -227,6 +227,7 @@ socreate(dom, aso, type, proto, cred, td)
SOCK_UNLOCK(so);
error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
if (error) {
+ ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state |= SS_NOFDREF;
sorele(so);
@@ -333,9 +334,8 @@ sofree(so)
{
struct socket *head;
- SOCK_UNLOCK(so);
- ACCEPT_LOCK();
- SOCK_LOCK(so);
+ ACCEPT_LOCK_ASSERT();
+ SOCK_LOCK_ASSERT(so);
if (so->so_pcb != NULL || (so->so_state & SS_NOFDREF) == 0 ||
so->so_count != 0) {
@@ -467,6 +467,7 @@ drop:
error = error2;
}
discard:
+ ACCEPT_LOCK();
SOCK_LOCK(so);
KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
so->so_state |= SS_NOFDREF;
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 07cdbc7..6b97f94 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -140,6 +140,7 @@ uipc_abort(struct socket *so)
unp_drop(unp, ECONNABORTED);
unp_detach(unp);
UNP_UNLOCK_ASSERT();
+ ACCEPT_LOCK();
SOCK_LOCK(so);
sotryfree(so);
return (0);
diff --git a/sys/net/raw_cb.c b/sys/net/raw_cb.c
index 04b4516..b6ab208 100644
--- a/sys/net/raw_cb.c
+++ b/sys/net/raw_cb.c
@@ -98,6 +98,7 @@ raw_detach(rp)
{
struct socket *so = rp->rcb_socket;
+ ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = 0;
sotryfree(so);
diff --git a/sys/net/raw_usrreq.c b/sys/net/raw_usrreq.c
index 7517743..f444b57 100644
--- a/sys/net/raw_usrreq.c
+++ b/sys/net/raw_usrreq.c
@@ -147,6 +147,7 @@ raw_uabort(struct socket *so)
return EINVAL;
raw_disconnect(rp);
soisdisconnected(so);
+ ACCEPT_LOCK();
SOCK_LOCK(so);
sotryfree(so);
return 0;
diff --git a/sys/netatalk/ddp_pcb.c b/sys/netatalk/ddp_pcb.c
index 8073d55..dd69850 100644
--- a/sys/netatalk/ddp_pcb.c
+++ b/sys/netatalk/ddp_pcb.c
@@ -282,6 +282,7 @@ at_pcbdetach(struct socket *so, struct ddpcb *ddp)
DDP_LOCK_ASSERT(ddp);
soisdisconnected(so);
+ ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);
diff --git a/sys/netatm/atm_socket.c b/sys/netatm/atm_socket.c
index 6a027e2..99dbaf6 100644
--- a/sys/netatm/atm_socket.c
+++ b/sys/netatm/atm_socket.c
@@ -173,6 +173,7 @@ atm_sock_detach(so)
/*
* Break links and free control blocks
*/
+ ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);
diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c b/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c
index d4df5bd..7c347c3 100644
--- a/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c
+++ b/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c
@@ -1417,6 +1417,7 @@ ng_btsocket_hci_raw_detach(struct socket *so)
bzero(pcb, sizeof(*pcb));
FREE(pcb, M_NETGRAPH_BTSOCKET_HCI_RAW);
+ ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);
diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
index f52bafa..cb91fd7 100644
--- a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
+++ b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
@@ -1804,6 +1804,7 @@ ng_btsocket_l2cap_rtclean(void *context, int pending)
FREE(pcb, M_NETGRAPH_BTSOCKET_L2CAP);
soisdisconnected(so);
+ ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);
@@ -2347,6 +2348,7 @@ ng_btsocket_l2cap_detach(struct socket *so)
FREE(pcb, M_NETGRAPH_BTSOCKET_L2CAP);
soisdisconnected(so);
+ ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);
diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c
index 8103f27..97e9247 100644
--- a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c
+++ b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c
@@ -1129,6 +1129,7 @@ ng_btsocket_l2cap_raw_detach(struct socket *so)
bzero(pcb, sizeof(*pcb));
FREE(pcb, M_NETGRAPH_BTSOCKET_L2CAP_RAW);
+ ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);
diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
index a81afac..1ba7211 100644
--- a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
+++ b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
@@ -724,6 +724,7 @@ ng_btsocket_rfcomm_detach(struct socket *so)
FREE(pcb, M_NETGRAPH_BTSOCKET_RFCOMM);
soisdisconnected(so);
+ ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index ec25655..11e5b0d 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -687,6 +687,7 @@ in_pcbdetach(inp)
inp->inp_gencnt = ++ipi->ipi_gencnt;
in_pcbremlists(inp);
if (so) {
+ ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 7619916..03610b3 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -1680,6 +1680,7 @@ tcp_twstart(tp)
}
tcp_discardcb(tp);
so = inp->inp_socket;
+ ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
tw->tw_cred = crhold(so->so_cred);
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
index 7619916..03610b3 100644
--- a/sys/netinet/tcp_timewait.c
+++ b/sys/netinet/tcp_timewait.c
@@ -1680,6 +1680,7 @@ tcp_twstart(tp)
}
tcp_discardcb(tp);
so = inp->inp_socket;
+ ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
tw->tw_cred = crhold(so->so_cred);
diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c
index 9bcbeb8..e794103 100644
--- a/sys/netinet6/in6_pcb.c
+++ b/sys/netinet6/in6_pcb.c
@@ -436,6 +436,7 @@ in6_pcbdetach(inp)
in_pcbremlists(inp);
if (so) {
+ ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);
diff --git a/sys/netipx/ipx_pcb.c b/sys/netipx/ipx_pcb.c
index 2be7e1d..b1e2cab 100644
--- a/sys/netipx/ipx_pcb.c
+++ b/sys/netipx/ipx_pcb.c
@@ -268,6 +268,7 @@ ipx_pcbdetach(ipxp)
{
struct socket *so = ipxp->ipxp_socket;
+ ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = 0;
sotryfree(so);
diff --git a/sys/netipx/ipx_usrreq.c b/sys/netipx/ipx_usrreq.c
index f07cecb..cff94bc 100644
--- a/sys/netipx/ipx_usrreq.c
+++ b/sys/netipx/ipx_usrreq.c
@@ -424,6 +424,7 @@ ipx_usr_abort(so)
ipx_pcbdetach(ipxp);
splx(s);
soisdisconnected(so);
+ ACCEPT_LOCK();
SOCK_LOCK(so);
sotryfree(so);
return (0);
diff --git a/sys/netnatm/natm.c b/sys/netnatm/natm.c
index 5613b6a..f999f32 100644
--- a/sys/netnatm/natm.c
+++ b/sys/netnatm/natm.c
@@ -135,6 +135,7 @@ natm_usr_detach(struct socket *so)
* we turn on 'drain' *before* we sofree.
*/
npcb_free(npcb, NPCB_DESTROY); /* drain */
+ ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);
@@ -464,6 +465,7 @@ struct proc *p;
*/
npcb_free(npcb, NPCB_DESTROY); /* drain */
+ ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index cc24814..391ed3f 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -158,6 +158,8 @@ struct socket {
* until such time as it proves to be a good idea.
*/
extern struct mtx accept_mtx;
+#define ACCEPT_LOCK_ASSERT() mtx_assert(&accept_mtx, MA_OWNED)
+#define ACCEPT_UNLOCK_ASSERT() mtx_assert(&accept_mtx, MA_NOTOWNED)
#define ACCEPT_LOCK() mtx_lock(&accept_mtx)
#define ACCEPT_UNLOCK() mtx_unlock(&accept_mtx)
@@ -344,21 +346,27 @@ struct xsocket {
} while (0)
#define sorele(so) do { \
+ ACCEPT_LOCK_ASSERT(); \
SOCK_LOCK_ASSERT(so); \
if ((so)->so_count <= 0) \
panic("sorele"); \
if (--(so)->so_count == 0) \
sofree(so); \
- else \
+ else { \
SOCK_UNLOCK(so); \
+ ACCEPT_UNLOCK(); \
+ } \
} while (0)
#define sotryfree(so) do { \
+ ACCEPT_LOCK_ASSERT(); \
SOCK_LOCK_ASSERT(so); \
if ((so)->so_count == 0) \
sofree(so); \
- else \
+ else { \
SOCK_UNLOCK(so); \
+ ACCEPT_UNLOCK(); \
+ } \
} while(0)
/*
OpenPOWER on IntegriCloud