summaryrefslogtreecommitdiffstats
path: root/sys/kern/uipc_socket.c
diff options
context:
space:
mode:
authorzec <zec@FreeBSD.org>2009-05-05 10:56:12 +0000
committerzec <zec@FreeBSD.org>2009-05-05 10:56:12 +0000
commitd78a1b1a824c4f5eb8cb3583bb5265f73dcc24dd (patch)
tree79a0bccccf2c92504cdf23ad15f7c1813bb3f926 /sys/kern/uipc_socket.c
parent8e4ffe653f6c9ff6da3eed58566ef35e77d530d0 (diff)
downloadFreeBSD-src-d78a1b1a824c4f5eb8cb3583bb5265f73dcc24dd.zip
FreeBSD-src-d78a1b1a824c4f5eb8cb3583bb5265f73dcc24dd.tar.gz
Change the curvnet variable from a global const struct vnet *,
previously always pointing to the default vnet context, to a dynamically changing thread-local one. The currvnet context should be set on entry to networking code via CURVNET_SET() macros, and reverted to previous state via CURVNET_RESTORE(). Recursions on curvnet are permitted, though strongly discuouraged. This change should have no functional impact on nooptions VIMAGE kernel builds, where CURVNET_* macros expand to whitespace. The curthread->td_vnet (aka curvnet) variable's purpose is to be an indicator of the vnet context in which the current network-related operation takes place, in case we cannot deduce the current vnet context from any other source, such as by looking at mbuf's m->m_pkthdr.rcvif->if_vnet, sockets's so->so_vnet etc. Moreover, so far curvnet has turned out to be an invaluable consistency checking aid: it helps to catch cases when sockets, ifnets or any other vnet-aware structures may have leaked from one vnet to another. The exact placement of the CURVNET_SET() / CURVNET_RESTORE() macros was a result of an empirical iterative process, whith an aim to reduce recursions on CURVNET_SET() to a minimum, while still reducing the scope of CURVNET_SET() to networking only operations - the alternative would be calling CURVNET_SET() on each system call entry. In general, curvnet has to be set in three typicall cases: when processing socket-related requests from userspace or from within the kernel; when processing inbound traffic flowing from device drivers to upper layers of the networking stack, and when executing timer-driven networking functions. This change also introduces a DDB subcommand to show the list of all vnet instances. Approved by: julian (mentor)
Diffstat (limited to 'sys/kern/uipc_socket.c')
-rw-r--r--sys/kern/uipc_socket.c44
1 files changed, 35 insertions, 9 deletions
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index a2f7c05..0fd3bee 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -264,7 +264,7 @@ SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
* soalloc() returns a socket with a ref count of 0.
*/
static struct socket *
-soalloc(void)
+soalloc(struct vnet *vnet)
{
struct socket *so;
@@ -286,7 +286,8 @@ soalloc(void)
so->so_gencnt = ++so_gencnt;
++numopensockets;
#ifdef VIMAGE
- so->so_vnet = curvnet;
+ ++vnet->sockcnt; /* locked with so_global_mtx */
+ so->so_vnet = vnet;
#endif
mtx_unlock(&so_global_mtx);
return (so);
@@ -307,6 +308,9 @@ sodealloc(struct socket *so)
mtx_lock(&so_global_mtx);
so->so_gencnt = ++so_gencnt;
--numopensockets; /* Could be below, but faster here. */
+#ifdef VIMAGE
+ --so->so_vnet->sockcnt;
+#endif
mtx_unlock(&so_global_mtx);
if (so->so_rcv.sb_hiwat)
(void)chgsbsize(so->so_cred->cr_uidinfo,
@@ -356,7 +360,7 @@ socreate(int dom, struct socket **aso, int type, int proto,
if (prp->pr_type != type)
return (EPROTOTYPE);
- so = soalloc();
+ so = soalloc(TD_TO_VNET(td));
if (so == NULL)
return (ENOBUFS);
@@ -382,7 +386,9 @@ socreate(int dom, struct socket **aso, int type, int proto,
* Auto-sizing of socket buffers is managed by the protocols and
* the appropriate flags must be set in the pru_attach function.
*/
+ CURVNET_SET(so->so_vnet);
error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
+ CURVNET_RESTORE();
if (error) {
KASSERT(so->so_count == 1, ("socreate: so_count %d",
so->so_count));
@@ -424,7 +430,8 @@ sonewconn(struct socket *head, int connstatus)
if (over)
#endif
return (NULL);
- so = soalloc();
+ VNET_ASSERT(head->so_vnet);
+ so = soalloc(head->so_vnet);
if (so == NULL)
return (NULL);
if ((head->so_options & SO_ACCEPTFILTER) != 0)
@@ -496,8 +503,12 @@ sonewconn(struct socket *head, int connstatus)
int
sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
{
+ int error;
- return ((*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td));
+ CURVNET_SET(so->so_vnet);
+ error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td);
+ CURVNET_RESTORE();
+ return error;
}
/*
@@ -645,6 +656,7 @@ soclose(struct socket *so)
KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter"));
+ CURVNET_SET(so->so_vnet);
funsetown(&so->so_sigio);
if (so->so_state & SS_ISCONNECTED) {
if ((so->so_state & SS_ISDISCONNECTING) == 0) {
@@ -696,6 +708,7 @@ drop:
KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
so->so_state |= SS_NOFDREF;
sorele(so);
+ CURVNET_RESTORE();
return (error);
}
@@ -771,7 +784,9 @@ soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
* biting us.
*/
so->so_error = 0;
+ CURVNET_SET(so->so_vnet);
error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td);
+ CURVNET_RESTORE();
}
return (error);
@@ -1287,9 +1302,13 @@ int
sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
{
+ int error;
- return (so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio, top,
- control, flags, td));
+ CURVNET_SET(so->so_vnet);
+ error = so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio, top,
+ control, flags, td);
+ CURVNET_RESTORE();
+ return (error);
}
/*
@@ -2037,6 +2056,7 @@ int
soshutdown(struct socket *so, int how)
{
struct protosw *pr = so->so_proto;
+ int error;
if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
return (EINVAL);
@@ -2045,8 +2065,12 @@ soshutdown(struct socket *so, int how)
}
if (how != SHUT_WR)
sorflush(so);
- if (how != SHUT_RD)
- return ((*pr->pr_usrreqs->pru_shutdown)(so));
+ if (how != SHUT_RD) {
+ CURVNET_SET(so->so_vnet);
+ error = (*pr->pr_usrreqs->pru_shutdown)(so);
+ CURVNET_RESTORE();
+ return (error);
+ }
return (0);
}
@@ -2070,6 +2094,7 @@ sorflush(struct socket *so)
* socket buffer. Don't let our acquire be interrupted by a signal
* despite any existing socket disposition on interruptable waiting.
*/
+ CURVNET_SET(so->so_vnet);
socantrcvmore(so);
(void) sblock(sb, SBL_WAIT | SBL_NOINTR);
@@ -2093,6 +2118,7 @@ sorflush(struct socket *so)
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
(*pr->pr_domain->dom_dispose)(asb.sb_mb);
sbrelease_internal(&asb, so);
+ CURVNET_RESTORE();
}
/*
OpenPOWER on IntegriCloud