summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
authorzec <zec@FreeBSD.org>2009-05-05 10:56:12 +0000
committerzec <zec@FreeBSD.org>2009-05-05 10:56:12 +0000
commitd78a1b1a824c4f5eb8cb3583bb5265f73dcc24dd (patch)
tree79a0bccccf2c92504cdf23ad15f7c1813bb3f926 /sys/kern
parent8e4ffe653f6c9ff6da3eed58566ef35e77d530d0 (diff)
downloadFreeBSD-src-d78a1b1a824c4f5eb8cb3583bb5265f73dcc24dd.zip
FreeBSD-src-d78a1b1a824c4f5eb8cb3583bb5265f73dcc24dd.tar.gz
Change the curvnet variable from a global const struct vnet *,
previously always pointing to the default vnet context, to a dynamically changing thread-local one. The currvnet context should be set on entry to networking code via CURVNET_SET() macros, and reverted to previous state via CURVNET_RESTORE(). Recursions on curvnet are permitted, though strongly discuouraged. This change should have no functional impact on nooptions VIMAGE kernel builds, where CURVNET_* macros expand to whitespace. The curthread->td_vnet (aka curvnet) variable's purpose is to be an indicator of the vnet context in which the current network-related operation takes place, in case we cannot deduce the current vnet context from any other source, such as by looking at mbuf's m->m_pkthdr.rcvif->if_vnet, sockets's so->so_vnet etc. Moreover, so far curvnet has turned out to be an invaluable consistency checking aid: it helps to catch cases when sockets, ifnets or any other vnet-aware structures may have leaked from one vnet to another. The exact placement of the CURVNET_SET() / CURVNET_RESTORE() macros was a result of an empirical iterative process, whith an aim to reduce recursions on CURVNET_SET() to a minimum, while still reducing the scope of CURVNET_SET() to networking only operations - the alternative would be calling CURVNET_SET() on each system call entry. In general, curvnet has to be set in three typicall cases: when processing socket-related requests from userspace or from within the kernel; when processing inbound traffic flowing from device drivers to upper layers of the networking stack, and when executing timer-driven networking functions. This change also introduces a DDB subcommand to show the list of all vnet instances. Approved by: julian (mentor)
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/init_main.c4
-rw-r--r--sys/kern/kern_fork.c6
-rw-r--r--sys/kern/kern_linker.c9
-rw-r--r--sys/kern/kern_vimage.c58
-rw-r--r--sys/kern/subr_pcpu.c4
-rw-r--r--sys/kern/sys_socket.c10
-rw-r--r--sys/kern/uipc_socket.c44
-rw-r--r--sys/kern/uipc_syscalls.c17
-rw-r--r--sys/kern/uipc_usrreq.c5
9 files changed, 142 insertions, 15 deletions
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index a8abad8..96ebf36 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -74,6 +74,7 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/conf.h>
#include <sys/cpuset.h>
+#include <sys/vimage.h>
#include <machine/cpu.h>
@@ -452,6 +453,9 @@ proc0_init(void *dummy __unused)
p->p_ucred->cr_uidinfo = uifind(0);
p->p_ucred->cr_ruidinfo = uifind(0);
p->p_ucred->cr_prison = NULL; /* Don't jail it. */
+#ifdef VIMAGE
+ p->p_ucred->cr_vnet = LIST_FIRST(&vnet_head);
+#endif
#ifdef AUDIT
audit_cred_kproc0(p->p_ucred);
#endif
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index e7e98b1..76695e7 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -68,6 +68,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sdt.h>
#include <sys/sx.h>
#include <sys/signalvar.h>
+#include <sys/vimage.h>
#include <security/audit/audit.h>
#include <security/mac/mac_framework.h>
@@ -523,6 +524,11 @@ again:
td2->td_sigmask = td->td_sigmask;
td2->td_flags = TDF_INMEM;
+#ifdef VIMAGE
+ td2->td_vnet = NULL;
+ td2->td_vnet_lpush = NULL;
+#endif
+
/*
* Duplicate sub-structures as needed.
* Increase reference counts on shared objects.
diff --git a/sys/kern/kern_linker.c b/sys/kern/kern_linker.c
index 9350713..2237107 100644
--- a/sys/kern/kern_linker.c
+++ b/sys/kern/kern_linker.c
@@ -993,6 +993,12 @@ kern_kldload(struct thread *td, const char *file, int *fileid)
return (error);
/*
+ * It's possible that kldloaded module will attach a new ifnet,
+ * so vnet context must be set when this ocurs.
+ */
+ CURVNET_SET(TD_TO_VNET(td));
+
+ /*
* If file does not contain a qualified name or any dot in it
* (kldname.ko, or kldname.ver.ko) treat it as an interface
* name.
@@ -1019,6 +1025,7 @@ kern_kldload(struct thread *td, const char *file, int *fileid)
*fileid = lf->id;
unlock:
KLD_UNLOCK();
+ CURVNET_RESTORE();
return (error);
}
@@ -1056,6 +1063,7 @@ kern_kldunload(struct thread *td, int fileid, int flags)
if ((error = priv_check(td, PRIV_KLD_UNLOAD)) != 0)
return (error);
+ CURVNET_SET(TD_TO_VNET(td));
KLD_LOCK();
lf = linker_find_file_by_id(fileid);
if (lf) {
@@ -1092,6 +1100,7 @@ kern_kldunload(struct thread *td, int fileid, int flags)
PMC_CALL_HOOK(td, PMC_FN_KLD_UNLOAD, (void *) &pkm);
#endif
KLD_UNLOCK();
+ CURVNET_RESTORE();
return (error);
}
diff --git a/sys/kern/kern_vimage.c b/sys/kern/kern_vimage.c
index 310e328..21d502e 100644
--- a/sys/kern/kern_vimage.c
+++ b/sys/kern/kern_vimage.c
@@ -31,6 +31,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_ddb.h"
+
#include <sys/param.h>
#include <sys/types.h>
#include <sys/kernel.h>
@@ -38,6 +40,9 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/systm.h>
#include <sys/vimage.h>
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
#ifndef VIMAGE_GLOBALS
@@ -51,8 +56,6 @@ static int vnet_mod_constructor(struct vnet_modlink *);
static int vnet_mod_destructor(struct vnet_modlink *);
#ifdef VIMAGE
-/* curvnet should be thread-local - this is only a temporary step. */
-struct vnet *curvnet;
struct vnet_list_head vnet_head;
#endif
@@ -183,7 +186,8 @@ vnet_mod_deregister_multi(const struct vnet_modinfo *vmi, void *iarg,
free(vml, M_VIMAGE);
}
-static int vnet_mod_constructor(struct vnet_modlink *vml)
+static int
+vnet_mod_constructor(struct vnet_modlink *vml)
{
const struct vnet_modinfo *vmi = vml->vml_modinfo;
@@ -303,7 +307,9 @@ vi_init(void *unused)
if (vnet == NULL)
panic("vi_alloc: malloc failed");
LIST_INSERT_HEAD(&vnet_head, vnet, vnet_le);
+ vnet->vnet_magic_n = VNET_MAGIC_N;
+ /* We MUST clear curvnet in vi_init_done before going SMP. */
curvnet = LIST_FIRST(&vnet_head);
#endif
}
@@ -313,6 +319,10 @@ vi_init_done(void *unused)
{
struct vnet_modlink *vml_iter;
+#ifdef VIMAGE
+ curvnet = NULL;
+#endif
+
if (TAILQ_EMPTY(&vnet_modpending_head))
return;
@@ -327,5 +337,45 @@ vi_init_done(void *unused)
SYSINIT(vimage, SI_SUB_VIMAGE, SI_ORDER_FIRST, vi_init, NULL);
SYSINIT(vimage_done, SI_SUB_VIMAGE_DONE, SI_ORDER_FIRST, vi_init_done, NULL);
-
#endif /* !VIMAGE_GLOBALS */
+
+#ifdef VIMAGE
+#ifdef DDB
+static void
+db_vnet_ptr(void *arg)
+{
+
+ if (arg)
+ db_printf(" %p", arg);
+ else
+#if SIZE_MAX == UINT32_MAX /* 32-bit arch */
+ db_printf(" 0");
+#else /* 64-bit arch, most probaly... */
+ db_printf(" 0");
+#endif
+}
+
+DB_SHOW_COMMAND(vnets, db_show_vnets)
+{
+ VNET_ITERATOR_DECL(vnet_iter);
+
+#if SIZE_MAX == UINT32_MAX /* 32-bit arch */
+ db_printf(" vnet ifs socks");
+ db_printf(" net inet inet6 ipsec netgraph\n");
+#else /* 64-bit arch, most probaly... */
+ db_printf(" vnet ifs socks");
+ db_printf(" net inet inet6 ipsec netgraph\n");
+#endif
+ VNET_FOREACH(vnet_iter) {
+ db_printf("%p %3d %5d",
+ vnet_iter, vnet_iter->ifccnt, vnet_iter->sockcnt);
+ db_vnet_ptr(vnet_iter->mod_data[VNET_MOD_NET]);
+ db_vnet_ptr(vnet_iter->mod_data[VNET_MOD_INET]);
+ db_vnet_ptr(vnet_iter->mod_data[VNET_MOD_INET6]);
+ db_vnet_ptr(vnet_iter->mod_data[VNET_MOD_IPSEC]);
+ db_vnet_ptr(vnet_iter->mod_data[VNET_MOD_NETGRAPH]);
+ db_printf("\n");
+ }
+}
+#endif
+#endif /* VIMAGE */
diff --git a/sys/kern/subr_pcpu.c b/sys/kern/subr_pcpu.c
index ea25aa7..74082c2 100644
--- a/sys/kern/subr_pcpu.c
+++ b/sys/kern/subr_pcpu.c
@@ -135,6 +135,10 @@ show_pcpu(struct pcpu *pc)
db_printf("none\n");
db_show_mdpcpu(pc);
+#ifdef VIMAGE
+ db_printf("curvnet = %p\n", pc->pc_curthread->td_vnet);
+#endif
+
#ifdef WITNESS
db_printf("spin locks held:\n");
witness_list_locks(&pc->pc_spinlocks);
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
index 813b1dd..61b0361 100644
--- a/sys/kern/sys_socket.c
+++ b/sys/kern/sys_socket.c
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <sys/stat.h>
#include <sys/uio.h>
#include <sys/ucred.h>
+#include <sys/vimage.h>
#include <net/if.h>
#include <net/route.h>
@@ -74,16 +75,19 @@ soo_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
int flags, struct thread *td)
{
struct socket *so = fp->f_data;
-#ifdef MAC
int error;
+#ifdef MAC
SOCK_LOCK(so);
error = mac_socket_check_receive(active_cred, so);
SOCK_UNLOCK(so);
if (error)
return (error);
#endif
- return (soreceive(so, 0, uio, 0, 0, 0));
+ CURVNET_SET(so->so_vnet);
+ error = soreceive(so, 0, uio, 0, 0, 0);
+ CURVNET_RESTORE();
+ return (error);
}
/* ARGSUSED */
@@ -125,6 +129,7 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
struct socket *so = fp->f_data;
int error = 0;
+ CURVNET_SET(so->so_vnet);
switch (cmd) {
case FIONBIO:
SOCK_LOCK(so);
@@ -205,6 +210,7 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
(so, cmd, data, 0, td));
break;
}
+ CURVNET_RESTORE();
return (error);
}
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index a2f7c05..0fd3bee 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -264,7 +264,7 @@ SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
* soalloc() returns a socket with a ref count of 0.
*/
static struct socket *
-soalloc(void)
+soalloc(struct vnet *vnet)
{
struct socket *so;
@@ -286,7 +286,8 @@ soalloc(void)
so->so_gencnt = ++so_gencnt;
++numopensockets;
#ifdef VIMAGE
- so->so_vnet = curvnet;
+ ++vnet->sockcnt; /* locked with so_global_mtx */
+ so->so_vnet = vnet;
#endif
mtx_unlock(&so_global_mtx);
return (so);
@@ -307,6 +308,9 @@ sodealloc(struct socket *so)
mtx_lock(&so_global_mtx);
so->so_gencnt = ++so_gencnt;
--numopensockets; /* Could be below, but faster here. */
+#ifdef VIMAGE
+ --so->so_vnet->sockcnt;
+#endif
mtx_unlock(&so_global_mtx);
if (so->so_rcv.sb_hiwat)
(void)chgsbsize(so->so_cred->cr_uidinfo,
@@ -356,7 +360,7 @@ socreate(int dom, struct socket **aso, int type, int proto,
if (prp->pr_type != type)
return (EPROTOTYPE);
- so = soalloc();
+ so = soalloc(TD_TO_VNET(td));
if (so == NULL)
return (ENOBUFS);
@@ -382,7 +386,9 @@ socreate(int dom, struct socket **aso, int type, int proto,
* Auto-sizing of socket buffers is managed by the protocols and
* the appropriate flags must be set in the pru_attach function.
*/
+ CURVNET_SET(so->so_vnet);
error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
+ CURVNET_RESTORE();
if (error) {
KASSERT(so->so_count == 1, ("socreate: so_count %d",
so->so_count));
@@ -424,7 +430,8 @@ sonewconn(struct socket *head, int connstatus)
if (over)
#endif
return (NULL);
- so = soalloc();
+ VNET_ASSERT(head->so_vnet);
+ so = soalloc(head->so_vnet);
if (so == NULL)
return (NULL);
if ((head->so_options & SO_ACCEPTFILTER) != 0)
@@ -496,8 +503,12 @@ sonewconn(struct socket *head, int connstatus)
int
sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
{
+ int error;
- return ((*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td));
+ CURVNET_SET(so->so_vnet);
+ error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td);
+ CURVNET_RESTORE();
+ return error;
}
/*
@@ -645,6 +656,7 @@ soclose(struct socket *so)
KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter"));
+ CURVNET_SET(so->so_vnet);
funsetown(&so->so_sigio);
if (so->so_state & SS_ISCONNECTED) {
if ((so->so_state & SS_ISDISCONNECTING) == 0) {
@@ -696,6 +708,7 @@ drop:
KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
so->so_state |= SS_NOFDREF;
sorele(so);
+ CURVNET_RESTORE();
return (error);
}
@@ -771,7 +784,9 @@ soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
* biting us.
*/
so->so_error = 0;
+ CURVNET_SET(so->so_vnet);
error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td);
+ CURVNET_RESTORE();
}
return (error);
@@ -1287,9 +1302,13 @@ int
sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
{
+ int error;
- return (so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio, top,
- control, flags, td));
+ CURVNET_SET(so->so_vnet);
+ error = so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio, top,
+ control, flags, td);
+ CURVNET_RESTORE();
+ return (error);
}
/*
@@ -2037,6 +2056,7 @@ int
soshutdown(struct socket *so, int how)
{
struct protosw *pr = so->so_proto;
+ int error;
if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
return (EINVAL);
@@ -2045,8 +2065,12 @@ soshutdown(struct socket *so, int how)
}
if (how != SHUT_WR)
sorflush(so);
- if (how != SHUT_RD)
- return ((*pr->pr_usrreqs->pru_shutdown)(so));
+ if (how != SHUT_RD) {
+ CURVNET_SET(so->so_vnet);
+ error = (*pr->pr_usrreqs->pru_shutdown)(so);
+ CURVNET_RESTORE();
+ return (error);
+ }
return (0);
}
@@ -2070,6 +2094,7 @@ sorflush(struct socket *so)
* socket buffer. Don't let our acquire be interrupted by a signal
* despite any existing socket disposition on interruptable waiting.
*/
+ CURVNET_SET(so->so_vnet);
socantrcvmore(so);
(void) sblock(sb, SBL_WAIT | SBL_NOINTR);
@@ -2093,6 +2118,7 @@ sorflush(struct socket *so)
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
(*pr->pr_domain->dom_dispose)(asb.sb_mb);
sbrelease_internal(&asb, so);
+ CURVNET_RESTORE();
}
/*
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 662b3abd..5689aeb 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/uio.h>
#include <sys/vnode.h>
+#include <sys/vimage.h>
#ifdef KTRACE
#include <sys/ktrace.h>
#endif
@@ -264,7 +265,9 @@ listen(td, uap)
if (error)
goto done;
#endif
+ CURVNET_SET(so->so_vnet);
error = solisten(so, uap->backlog, td);
+ CURVNET_RESTORE();
#ifdef MAC
done:
#endif
@@ -429,7 +432,9 @@ kern_accept(struct thread *td, int s, struct sockaddr **name,
tmp = fflag & FASYNC;
(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
sa = 0;
+ CURVNET_SET(so->so_vnet);
error = soaccept(so, &sa);
+ CURVNET_RESTORE();
if (error) {
/*
* return a namelen of zero for older code which might
@@ -976,9 +981,11 @@ kern_recvit(td, s, mp, fromseg, controlp)
ktruio = cloneuio(&auio);
#endif
len = auio.uio_resid;
+ CURVNET_SET(so->so_vnet);
error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
(mp->msg_control || controlp) ? &control : (struct mbuf **)0,
&mp->msg_flags);
+ CURVNET_RESTORE();
if (error) {
if (auio.uio_resid != (int)len && (error == ERESTART ||
error == EINTR || error == EWOULDBLOCK))
@@ -1322,7 +1329,9 @@ kern_setsockopt(td, s, level, name, val, valseg, valsize)
error = getsock(td->td_proc->p_fd, s, &fp, NULL);
if (error == 0) {
so = fp->f_data;
+ CURVNET_SET(so->so_vnet);
error = sosetopt(so, &sopt);
+ CURVNET_RESTORE();
fdrop(fp, td);
}
return(error);
@@ -1400,7 +1409,9 @@ kern_getsockopt(td, s, level, name, val, valseg, valsize)
error = getsock(td->td_proc->p_fd, s, &fp, NULL);
if (error == 0) {
so = fp->f_data;
+ CURVNET_SET(so->so_vnet);
error = sogetopt(so, &sopt);
+ CURVNET_RESTORE();
*valsize = sopt.sopt_valsize;
fdrop(fp, td);
}
@@ -1463,7 +1474,9 @@ kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
return (error);
so = fp->f_data;
*sa = NULL;
+ CURVNET_SET(so->so_vnet);
error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
+ CURVNET_RESTORE();
if (error)
goto bad;
if (*sa == NULL)
@@ -1564,7 +1577,9 @@ kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
goto done;
}
*sa = NULL;
+ CURVNET_SET(so->so_vnet);
error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
+ CURVNET_RESTORE();
if (error)
goto bad;
if (*sa == NULL)
@@ -2176,9 +2191,11 @@ retry_space:
goto done;
}
SOCKBUF_UNLOCK(&so->so_snd);
+ CURVNET_SET(so->so_vnet);
/* Avoid error aliasing. */
err = (*so->so_proto->pr_usrreqs->pru_send)
(so, 0, m, NULL, NULL, td);
+ CURVNET_RESTORE();
if (err == 0) {
/*
* We need two counters to get the
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 6ddc837..2f33008 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -90,6 +90,7 @@ __FBSDID("$FreeBSD$");
#include <sys/un.h>
#include <sys/unpcb.h>
#include <sys/vnode.h>
+#include <sys/vimage.h>
#ifdef DDB
#include <ddb/ddb.h>
@@ -1647,6 +1648,10 @@ static void
unp_init(void)
{
+#ifdef VIMAGE
+ if (!IS_DEFAULT_VNET(curvnet))
+ return;
+#endif
unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, 0);
if (unp_zone == NULL)
OpenPOWER on IntegriCloud