summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorbz <bz@FreeBSD.org>2016-06-03 13:57:10 +0000
committerbz <bz@FreeBSD.org>2016-06-03 13:57:10 +0000
commit69cdb2137c13e36cbc467dd745ab15126fc22063 (patch)
treeb9bad282d3f1a106a25f2d6b11e1232e2b57223b /sys
parent28ea44dac8d1daa2b1d50448cbbb48c2f51045fd (diff)
downloadFreeBSD-src-69cdb2137c13e36cbc467dd745ab15126fc22063.zip
FreeBSD-src-69cdb2137c13e36cbc467dd745ab15126fc22063.tar.gz
Introduce a per-VNET flag to enable/disable netisr prcessing on that VNET.
Add accessor functions to toggle the state per VNET. The base system (vnet0) will always enable itself with the normal registration. We will share the registered protocol handlers in all VNETs minimising duplication and management. Upon disabling netisr processing for a VNET drain the netisr queue from packets for that VNET. Update netisr consumers to (de)register on a per-VNET start/teardown using VNET_SYS(UN)INIT functionality. The change should be transparent for non-VIMAGE kernels. Reviewed by: gnn (, hiren) Obtained from: projects/vnet MFC after: 2 weeks Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D6691
Diffstat (limited to 'sys')
-rw-r--r--sys/net/if_epair.c6
-rw-r--r--sys/net/if_ethersubr.c18
-rw-r--r--sys/net/netisr.c150
-rw-r--r--sys/net/netisr.h4
-rw-r--r--sys/net/rtsock.c28
-rw-r--r--sys/netinet/if_ether.c30
-rw-r--r--sys/netinet/ip_input.c14
-rw-r--r--sys/netinet6/ip6_input.c14
8 files changed, 251 insertions, 13 deletions
diff --git a/sys/net/if_epair.c b/sys/net/if_epair.c
index 943776c..949a417 100644
--- a/sys/net/if_epair.c
+++ b/sys/net/if_epair.c
@@ -959,6 +959,9 @@ vnet_epair_init(const void *unused __unused)
V_epair_cloner = if_clone_advanced(epairname, 0,
epair_clone_match, epair_clone_create, epair_clone_destroy);
+#ifdef VIMAGE
+ netisr_register_vnet(&epair_nh);
+#endif
}
VNET_SYSINIT(vnet_epair_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
vnet_epair_init, NULL);
@@ -967,6 +970,9 @@ static void
vnet_epair_uninit(const void *unused __unused)
{
+#ifdef VIMAGE
+ netisr_unregister_vnet(&epair_nh);
+#endif
if_clone_detach(V_epair_cloner);
}
VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index 9346aec..2542ab6 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -702,12 +702,16 @@ vnet_ether_init(__unused void *arg)
if ((i = pfil_head_register(&V_link_pfil_hook)) != 0)
printf("%s: WARNING: unable to register pfil link hook, "
"error %d\n", __func__, i);
+#ifdef VIMAGE
+ netisr_register_vnet(&ether_nh);
+#endif
}
VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
vnet_ether_init, NULL);
+#ifdef VIMAGE
static void
-vnet_ether_destroy(__unused void *arg)
+vnet_ether_pfil_destroy(__unused void *arg)
{
int i;
@@ -715,8 +719,18 @@ vnet_ether_destroy(__unused void *arg)
printf("%s: WARNING: unable to unregister pfil link hook, "
"error %d\n", __func__, i);
}
+VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY,
+ vnet_ether_pfil_destroy, NULL);
+
+static void
+vnet_ether_destroy(__unused void *arg)
+{
+
+ netisr_unregister_vnet(&ether_nh);
+}
VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
vnet_ether_destroy, NULL);
+#endif
@@ -740,7 +754,9 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
* so assert it is correct here.
*/
KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__));
+ CURVNET_SET_QUIET(ifp->if_vnet);
netisr_dispatch(NETISR_ETHER, m);
+ CURVNET_RESTORE();
m = mn;
}
}
diff --git a/sys/net/netisr.c b/sys/net/netisr.c
index 492a851..318e1a3 100644
--- a/sys/net/netisr.c
+++ b/sys/net/netisr.c
@@ -210,6 +210,23 @@ SYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD,
*/
static struct netisr_proto netisr_proto[NETISR_MAXPROT];
+#ifdef VIMAGE
+/*
+ * The netisr_enable array describes a per-VNET flag for registered
+ * protocols on whether this netisr is active in this VNET or not.
+ * netisr_register() will automatically enable the netisr for the
+ * default VNET and all currently active instances.
+ * netisr_unregister() will disable all active VNETs, including vnet0.
+ * Individual network stack instances can be enabled/disabled by the
+ * netisr_(un)register _vnet() functions.
+ * With this we keep the one netisr_proto per protocol but add a
+ * mechanism to stop netisr processing for vnet teardown.
+ * Apart from that we expect a VNET to always be enabled.
+ */
+static VNET_DEFINE(u_int, netisr_enable[NETISR_MAXPROT]);
+#define V_netisr_enable VNET(netisr_enable)
+#endif
+
/*
* Per-CPU workstream data. See netisr_internal.h for more details.
*/
@@ -352,6 +369,7 @@ sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS)
void
netisr_register(const struct netisr_handler *nhp)
{
+ VNET_ITERATOR_DECL(vnet_iter);
struct netisr_work *npwp;
const char *name;
u_int i, proto;
@@ -420,6 +438,22 @@ netisr_register(const struct netisr_handler *nhp)
bzero(npwp, sizeof(*npwp));
npwp->nw_qlimit = netisr_proto[proto].np_qlimit;
}
+
+#ifdef VIMAGE
+ /*
+ * Test that we are in vnet0 and have a curvnet set.
+ */
+ KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+ KASSERT(IS_DEFAULT_VNET(curvnet), ("%s: curvnet %p is not vnet0 %p",
+ __func__, curvnet, vnet0));
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ V_netisr_enable[proto] = 1;
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
+#endif
NETISR_WUNLOCK();
}
@@ -584,6 +618,7 @@ netisr_drain_proto(struct netisr_work *npwp)
void
netisr_unregister(const struct netisr_handler *nhp)
{
+ VNET_ITERATOR_DECL(vnet_iter);
struct netisr_work *npwp;
#ifdef INVARIANTS
const char *name;
@@ -602,6 +637,16 @@ netisr_unregister(const struct netisr_handler *nhp)
("%s(%u): protocol not registered for %s", __func__, proto,
name));
+#ifdef VIMAGE
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ V_netisr_enable[proto] = 0;
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
+#endif
+
netisr_proto[proto].np_name = NULL;
netisr_proto[proto].np_handler = NULL;
netisr_proto[proto].np_m2flow = NULL;
@@ -616,6 +661,97 @@ netisr_unregister(const struct netisr_handler *nhp)
NETISR_WUNLOCK();
}
+#ifdef VIMAGE
+void
+netisr_register_vnet(const struct netisr_handler *nhp)
+{
+ u_int proto;
+
+ proto = nhp->nh_proto;
+
+ KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+ KASSERT(proto < NETISR_MAXPROT,
+ ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name));
+ NETISR_WLOCK();
+ KASSERT(netisr_proto[proto].np_handler != NULL,
+ ("%s(%u): protocol not registered for %s", __func__, proto,
+ nhp->nh_name));
+
+ V_netisr_enable[proto] = 1;
+ NETISR_WUNLOCK();
+}
+
+static void
+netisr_drain_proto_vnet(struct vnet *vnet, u_int proto)
+{
+ struct netisr_workstream *nwsp;
+ struct netisr_work *npwp;
+ struct mbuf *m, *mp, *n, *ne;
+ u_int i;
+
+ KASSERT(vnet != NULL, ("%s: vnet is NULL", __func__));
+ NETISR_LOCK_ASSERT();
+
+ CPU_FOREACH(i) {
+ nwsp = DPCPU_ID_PTR(i, nws);
+ if (nwsp->nws_intr_event == NULL)
+ continue;
+ npwp = &nwsp->nws_work[proto];
+ NWS_LOCK(nwsp);
+
+ /*
+ * Rather than dissecting and removing mbufs from the middle
+ * of the chain, we build a new chain if the packet stays and
+ * update the head and tail pointers at the end. All packets
+ * matching the given vnet are freed.
+ */
+ m = npwp->nw_head;
+ n = ne = NULL;
+ while (m != NULL) {
+ mp = m;
+ m = m->m_nextpkt;
+ mp->m_nextpkt = NULL;
+ if (mp->m_pkthdr.rcvif->if_vnet != vnet) {
+ if (n == NULL) {
+ n = ne = mp;
+ } else {
+ ne->m_nextpkt = mp;
+ ne = mp;
+ }
+ continue;
+ }
+ /* This is a packet in the selected vnet. Free it. */
+ npwp->nw_len--;
+ m_freem(mp);
+ }
+ npwp->nw_head = n;
+ npwp->nw_tail = ne;
+ NWS_UNLOCK(nwsp);
+ }
+}
+
+void
+netisr_unregister_vnet(const struct netisr_handler *nhp)
+{
+ u_int proto;
+
+ proto = nhp->nh_proto;
+
+ KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+ KASSERT(proto < NETISR_MAXPROT,
+ ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name));
+ NETISR_WLOCK();
+ KASSERT(netisr_proto[proto].np_handler != NULL,
+ ("%s(%u): protocol not registered for %s", __func__, proto,
+ nhp->nh_name));
+
+ V_netisr_enable[proto] = 0;
+
+ netisr_drain_proto_vnet(curvnet, proto);
+ NETISR_WUNLOCK();
+}
+#endif
+
/*
* Compose the global and per-protocol policies on dispatch, and return the
* dispatch policy to use.
@@ -906,6 +1042,13 @@ netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m)
KASSERT(netisr_proto[proto].np_handler != NULL,
("%s: invalid proto %u", __func__, proto));
+#ifdef VIMAGE
+ if (V_netisr_enable[proto] == 0) {
+ m_freem(m);
+ return (ENOPROTOOPT);
+ }
+#endif
+
m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED,
source, m, &cpuid);
if (m != NULL) {
@@ -952,6 +1095,13 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__,
proto));
+#ifdef VIMAGE
+ if (V_netisr_enable[proto] == 0) {
+ m_freem(m);
+ return (ENOPROTOOPT);
+ }
+#endif
+
dispatch_policy = netisr_get_dispatch(npp);
if (dispatch_policy == NETISR_DISPATCH_DEFERRED)
return (netisr_queue_src(proto, source, m));
diff --git a/sys/net/netisr.h b/sys/net/netisr.h
index 94a6cc4..63764a7 100644
--- a/sys/net/netisr.h
+++ b/sys/net/netisr.h
@@ -210,6 +210,10 @@ void netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp);
void netisr_register(const struct netisr_handler *nhp);
int netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit);
void netisr_unregister(const struct netisr_handler *nhp);
+#ifdef VIMAGE
+void netisr_register_vnet(const struct netisr_handler *nhp);
+void netisr_unregister_vnet(const struct netisr_handler *nhp);
+#endif
/*
* Process a packet destined for a protocol, and attempt direct dispatch.
diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c
index c074603..91158b0 100644
--- a/sys/net/rtsock.c
+++ b/sys/net/rtsock.c
@@ -191,15 +191,33 @@ SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
"maximum routing socket dispatch queue length");
static void
-rts_init(void)
+vnet_rts_init(void)
{
int tmp;
- if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
- rtsock_nh.nh_qlimit = tmp;
- netisr_register(&rtsock_nh);
+ if (IS_DEFAULT_VNET(curvnet)) {
+ if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
+ rtsock_nh.nh_qlimit = tmp;
+ netisr_register(&rtsock_nh);
+ }
+#ifdef VIMAGE
+ else
+ netisr_register_vnet(&rtsock_nh);
+#endif
+}
+VNET_SYSINIT(vnet_rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+ vnet_rts_init, 0);
+
+#ifdef VIMAGE
+static void
+vnet_rts_uninit(void)
+{
+
+ netisr_unregister_vnet(&rtsock_nh);
}
-SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);
+VNET_SYSUNINIT(vnet_rts_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+ vnet_rts_uninit, 0);
+#endif
static int
raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src,
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c
index 48fae92..1a23390 100644
--- a/sys/netinet/if_ether.c
+++ b/sys/netinet/if_ether.c
@@ -143,7 +143,6 @@ SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_log_per_second,
} while (0)
-static void arp_init(void);
static void arpintr(struct mbuf *);
static void arptimer(void *);
#ifdef INET
@@ -1337,12 +1336,33 @@ arp_iflladdr(void *arg __unused, struct ifnet *ifp)
}
static void
-arp_init(void)
+vnet_arp_init(void)
{
- netisr_register(&arp_nh);
- if (IS_DEFAULT_VNET(curvnet))
+ if (IS_DEFAULT_VNET(curvnet)) {
+ netisr_register(&arp_nh);
iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
arp_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
+ }
+#ifdef VIMAGE
+ else
+ netisr_register_vnet(&arp_nh);
+#endif
}
-SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0);
+VNET_SYSINIT(vnet_arp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND,
+ vnet_arp_init, 0);
+
+#ifdef VIMAGE
+/*
+ * We have to unregister ARP along with IP otherwise we risk doing INADDR_HASH
+ * lookups after destroying the hash. Ideally this would go on SI_ORDER_3.5.
+ */
+static void
+vnet_arp_destroy(__unused void *arg)
+{
+
+ netisr_unregister_vnet(&arp_nh);
+}
+VNET_SYSUNINIT(vnet_arp_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+ vnet_arp_destroy, NULL);
+#endif
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 9afb8d6..f30913e 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -331,8 +331,15 @@ ip_init(void)
__func__);
/* Skip initialization of globals for non-default instances. */
- if (!IS_DEFAULT_VNET(curvnet))
+#ifdef VIMAGE
+ if (!IS_DEFAULT_VNET(curvnet)) {
+ netisr_register_vnet(&ip_nh);
+#ifdef RSS
+ netisr_register_vnet(&ip_direct_nh);
+#endif
return;
+ }
+#endif
pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
if (pr == NULL)
@@ -366,6 +373,11 @@ ip_destroy(void *unused __unused)
{
int error;
+#ifdef RSS
+ netisr_unregister_vnet(&ip_direct_nh);
+#endif
+ netisr_unregister_vnet(&ip_nh);
+
if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
printf("%s: WARNING: unable to unregister pfil hook, "
"error %d\n", __func__, error);
diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c
index d7fc9ee..a897d6c 100644
--- a/sys/netinet6/ip6_input.c
+++ b/sys/netinet6/ip6_input.c
@@ -217,8 +217,15 @@ ip6_init(void)
V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
/* Skip global initialization stuff for non-default instances. */
- if (!IS_DEFAULT_VNET(curvnet))
+#ifdef VIMAGE
+ if (!IS_DEFAULT_VNET(curvnet)) {
+ netisr_register_vnet(&ip6_nh);
+#ifdef RSS
+ netisr_register_vnet(&ip6_direct_nh);
+#endif
return;
+ }
+#endif
pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
if (pr == NULL)
@@ -310,6 +317,11 @@ ip6_destroy(void *unused __unused)
{
int error;
+#ifdef RSS
+ netisr_unregister_vnet(&ip6_direct_nh);
+#endif
+ netisr_unregister_vnet(&ip6_nh);
+
if ((error = pfil_head_unregister(&V_inet6_pfil_hook)) != 0)
printf("%s: WARNING: unable to unregister pfil hook, "
"error %d\n", __func__, error);
OpenPOWER on IntegriCloud