diff options
author | zec <zec@FreeBSD.org> | 2009-05-05 10:56:12 +0000 |
---|---|---|
committer | zec <zec@FreeBSD.org> | 2009-05-05 10:56:12 +0000 |
commit | d78a1b1a824c4f5eb8cb3583bb5265f73dcc24dd (patch) | |
tree | 79a0bccccf2c92504cdf23ad15f7c1813bb3f926 /sys/net | |
parent | 8e4ffe653f6c9ff6da3eed58566ef35e77d530d0 (diff) | |
download | FreeBSD-src-d78a1b1a824c4f5eb8cb3583bb5265f73dcc24dd.zip FreeBSD-src-d78a1b1a824c4f5eb8cb3583bb5265f73dcc24dd.tar.gz |
Change the curvnet variable from a global const struct vnet *,
previously always pointing to the default vnet context, to a
dynamically changing thread-local one. The currvnet context
should be set on entry to networking code via CURVNET_SET() macros,
and reverted to previous state via CURVNET_RESTORE(). Recursions
on curvnet are permitted, though strongly discuouraged.
This change should have no functional impact on nooptions VIMAGE
kernel builds, where CURVNET_* macros expand to whitespace.
The curthread->td_vnet (aka curvnet) variable's purpose is to be an
indicator of the vnet context in which the current network-related
operation takes place, in case we cannot deduce the current vnet
context from any other source, such as by looking at mbuf's
m->m_pkthdr.rcvif->if_vnet, sockets's so->so_vnet etc. Moreover, so
far curvnet has turned out to be an invaluable consistency checking
aid: it helps to catch cases when sockets, ifnets or any other
vnet-aware structures may have leaked from one vnet to another.
The exact placement of the CURVNET_SET() / CURVNET_RESTORE() macros
was a result of an empirical iterative process, whith an aim to
reduce recursions on CURVNET_SET() to a minimum, while still reducing
the scope of CURVNET_SET() to networking only operations - the
alternative would be calling CURVNET_SET() on each system call entry.
In general, curvnet has to be set in three typicall cases: when
processing socket-related requests from userspace or from within the
kernel; when processing inbound traffic flowing from device drivers
to upper layers of the networking stack, and when executing
timer-driven networking functions.
This change also introduces a DDB subcommand to show the list of all
vnet instances.
Approved by: julian (mentor)
Diffstat (limited to 'sys/net')
-rw-r--r-- | sys/net/bpf.c | 4 | ||||
-rw-r--r-- | sys/net/if.c | 42 | ||||
-rw-r--r-- | sys/net/if_clone.c | 12 | ||||
-rw-r--r-- | sys/net/if_ethersubr.c | 12 | ||||
-rw-r--r-- | sys/net/if_var.h | 3 | ||||
-rw-r--r-- | sys/net/netisr.c | 4 | ||||
-rw-r--r-- | sys/net/rtsock.c | 9 |
7 files changed, 62 insertions, 24 deletions
diff --git a/sys/net/bpf.c b/sys/net/bpf.c index 5a28e69..d8ed7c2 100644 --- a/sys/net/bpf.c +++ b/sys/net/bpf.c @@ -873,11 +873,10 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) m->m_len -= hlen; m->m_data += hlen; /* XXX */ + CURVNET_SET(ifp->if_vnet); #ifdef MAC BPFD_LOCK(d); - CURVNET_SET(ifp->if_vnet); mac_bpfdesc_create_mbuf(d, m); - CURVNET_RESTORE(); if (mc != NULL) mac_bpfdesc_create_mbuf(d, mc); BPFD_UNLOCK(d); @@ -893,6 +892,7 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) else m_freem(mc); } + CURVNET_RESTORE(); return (error); } diff --git a/sys/net/if.c b/sys/net/if.c index 92bf8a6..a67f31b 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -53,6 +53,7 @@ #include <sys/kernel.h> #include <sys/lock.h> #include <sys/refcount.h> +#include <sys/module.h> #include <sys/rwlock.h> #include <sys/sockio.h> #include <sys/syslog.h> @@ -126,7 +127,6 @@ static void if_attachdomain(void *); static void if_attachdomain1(struct ifnet *); static int ifconf(u_long, caddr_t); static void if_freemulti(struct ifmultiaddr *); -static void if_grow(void); static void if_init(void *); static void if_check(void *); static void if_route(struct ifnet *, int flag, int fam); @@ -202,7 +202,7 @@ MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals"); MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); -static struct ifnet * +struct ifnet * ifnet_byindex_locked(u_short idx) { INIT_VNET_NET(curvnet); @@ -239,7 +239,7 @@ ifnet_byindex_ref(u_short idx) return (ifp); } -static void +void ifnet_setbyindex(u_short idx, struct ifnet *ifp) { INIT_VNET_NET(curvnet); @@ -445,7 +445,7 @@ vnet_net_iattach(const void *unused __unused) return (0); } -static void +void if_grow(void) { INIT_VNET_NET(curvnet); @@ -696,11 +696,13 @@ if_attach(struct ifnet *ifp) mac_ifnet_create(ifp); #endif - ifdev_setbyindex(ifp->if_index, make_dev(&net_cdevsw, - ifp->if_index, UID_ROOT, GID_WHEEL, 0600, "%s/%s", - net_cdevsw.d_name, ifp->if_xname)); - make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d", - net_cdevsw.d_name, ifp->if_index); + if (IS_DEFAULT_VNET(curvnet)) { + ifdev_setbyindex(ifp->if_index, make_dev(&net_cdevsw, + ifp->if_index, UID_ROOT, GID_WHEEL, 0600, "%s/%s", + net_cdevsw.d_name, ifp->if_xname)); + make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d", + net_cdevsw.d_name, ifp->if_index); + } ifq_attach(&ifp->if_snd, ifp); @@ -742,13 +744,17 @@ if_attach(struct ifnet *ifp) IFNET_WLOCK(); TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link); +#ifdef VIMAGE + curvnet->ifccnt++; +#endif IFNET_WUNLOCK(); if (domain_init_status >= 2) if_attachdomain1(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); - devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); + if (IS_DEFAULT_VNET(curvnet)) + devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); /* Announce the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); @@ -895,6 +901,10 @@ if_detach(struct ifnet *ifp) found = 1; break; } +#ifdef VIMAGE + if (found) + curvnet->ifccnt--; +#endif IFNET_WUNLOCK(); if (!found) return; @@ -943,7 +953,8 @@ if_detach(struct ifnet *ifp) * Clean up all addresses. */ ifp->if_addr = NULL; - destroy_dev(ifdev_byindex(ifp->if_index)); + if (IS_DEFAULT_VNET(curvnet)) + destroy_dev(ifdev_byindex(ifp->if_index)); ifdev_setbyindex(ifp->if_index, NULL); /* We can now free link ifaddr. */ @@ -972,7 +983,8 @@ if_detach(struct ifnet *ifp) /* Announce that the interface is gone. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); - devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); + if (IS_DEFAULT_VNET(curvnet)) + devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); if_delgroups(ifp); IF_AFDATA_LOCK(ifp); @@ -1701,8 +1713,10 @@ do_link_state_change(void *arg, int pending) (*lagg_linkstate_p)(ifp, link_state); } - devctl_notify("IFNET", ifp->if_xname, - (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); + if (IS_DEFAULT_VNET(curvnet)) + devctl_notify("IFNET", ifp->if_xname, + (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", + NULL); if (pending > 1) if_printf(ifp, "%d link states coalesced\n", pending); if (log_link_state_change) diff --git a/sys/net/if_clone.c b/sys/net/if_clone.c index 8ffb758..a0140fc 100644 --- a/sys/net/if_clone.c +++ b/sys/net/if_clone.c @@ -39,6 +39,7 @@ #include <sys/systm.h> #include <sys/types.h> #include <sys/socket.h> +#include <sys/vimage.h> #include <net/if.h> #include <net/if_clone.h> @@ -49,6 +50,7 @@ #include <net/if_var.h> #include <net/radix.h> #include <net/route.h> +#include <net/vnet.h> static void if_clone_free(struct if_clone *ifc); static int if_clone_createif(struct if_clone *ifc, char *name, size_t len, @@ -203,15 +205,14 @@ if_clone_destroyif(struct if_clone *ifc, struct ifnet *ifp) { int err; - if (ifc->ifc_destroy == NULL) { - err = EOPNOTSUPP; - goto done; - } + if (ifc->ifc_destroy == NULL) + return(EOPNOTSUPP); IF_CLONE_LOCK(ifc); IFC_IFLIST_REMOVE(ifc, ifp); IF_CLONE_UNLOCK(ifc); + CURVNET_SET_QUIET(ifp->if_vnet); if_delgroup(ifp, ifc->ifc_name); err = (*ifc->ifc_destroy)(ifc, ifp); @@ -223,8 +224,7 @@ if_clone_destroyif(struct if_clone *ifc, struct ifnet *ifp) IFC_IFLIST_INSERT(ifc, ifp); IF_CLONE_UNLOCK(ifc); } - -done: + CURVNET_RESTORE(); return (err); } diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 38bf7d4..44f6b23 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -602,6 +602,8 @@ ether_input(struct ifnet *ifp, struct mbuf *m) } #endif + CURVNET_SET_QUIET(ifp->if_vnet); + if (ETHER_IS_MULTICAST(eh->ether_dhost)) { if (ETHER_IS_BROADCAST(eh->ether_dhost)) m->m_flags |= M_BCAST; @@ -638,6 +640,7 @@ ether_input(struct ifnet *ifp, struct mbuf *m) /* Allow monitor mode to claim this frame, after stats are updated. */ if (ifp->if_flags & IFF_MONITOR) { m_freem(m); + CURVNET_RESTORE(); return; } @@ -686,8 +689,10 @@ ether_input(struct ifnet *ifp, struct mbuf *m) ("%s: ng_ether_input_p is NULL", __func__)); m->m_flags &= ~M_PROMISC; (*ng_ether_input_p)(ifp, &m); - if (m == NULL) + if (m == NULL) { + CURVNET_RESTORE(); return; + } } /* @@ -698,8 +703,10 @@ ether_input(struct ifnet *ifp, struct mbuf *m) if (ifp->if_bridge != NULL) { m->m_flags &= ~M_PROMISC; BRIDGE_INPUT(ifp, m); - if (m == NULL) + if (m == NULL) { + CURVNET_RESTORE(); return; + } } #ifdef DEV_CARP @@ -735,6 +742,7 @@ ether_input(struct ifnet *ifp, struct mbuf *m) random_harvest(m, 16, 3, 0, RANDOM_NET); ether_demux(ifp, m); + CURVNET_RESTORE(); } /* diff --git a/sys/net/if_var.h b/sys/net/if_var.h index e6a6a26..18084aa 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -731,7 +731,9 @@ struct ifindex_entry { * to call ifnet_byindex() instead if ifnet_byindex_ref(). */ struct ifnet *ifnet_byindex(u_short idx); +struct ifnet *ifnet_byindex_locked(u_short idx); struct ifnet *ifnet_byindex_ref(u_short idx); +void ifnet_setbyindex(u_short idx, struct ifnet *ifp); /* * Given the index, ifaddr_byindex() returns the one and only @@ -755,6 +757,7 @@ int if_allmulti(struct ifnet *, int); struct ifnet* if_alloc(u_char); void if_attach(struct ifnet *); void if_dead(struct ifnet *); +void if_grow(void); int if_delmulti(struct ifnet *, struct sockaddr *); void if_delmulti_ifma(struct ifmultiaddr *); void if_detach(struct ifnet *); diff --git a/sys/net/netisr.c b/sys/net/netisr.c index ed5466c..efbc183 100644 --- a/sys/net/netisr.c +++ b/sys/net/netisr.c @@ -43,6 +43,7 @@ #include <sys/resourcevar.h> #include <sys/sysctl.h> #include <sys/unistd.h> +#include <sys/vimage.h> #include <machine/atomic.h> #include <machine/cpu.h> #include <machine/stdarg.h> @@ -142,7 +143,10 @@ netisr_processqueue(struct netisr *ni) IF_DEQUEUE(ni->ni_queue, m); if (m == NULL) break; + VNET_ASSERT(m->m_pkthdr.rcvif != NULL); + CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); ni->ni_handler(m); + CURVNET_RESTORE(); } } diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 943d713..95faba7 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -1206,6 +1206,7 @@ rt_ifannouncemsg(struct ifnet *ifp, int what) static void rt_dispatch(struct mbuf *m, const struct sockaddr *sa) { + INIT_VNET_NET(curvnet); struct m_tag *tag; /* @@ -1223,6 +1224,14 @@ rt_dispatch(struct mbuf *m, const struct sockaddr *sa) *(unsigned short *)(tag + 1) = sa->sa_family; m_tag_prepend(m, tag); } +#ifdef VIMAGE + if (V_loif) + m->m_pkthdr.rcvif = V_loif; + else { + m_freem(m); + return; + } +#endif netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */ } |