summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--share/man/man9/netisr.922
-rw-r--r--sys/net/if_epair.c6
-rw-r--r--sys/net/if_ethersubr.c18
-rw-r--r--sys/net/netisr.c150
-rw-r--r--sys/net/netisr.h4
-rw-r--r--sys/net/rtsock.c28
-rw-r--r--sys/netinet/if_ether.c30
-rw-r--r--sys/netinet/ip_input.c14
-rw-r--r--sys/netinet6/ip6_input.c14
9 files changed, 272 insertions, 14 deletions
diff --git a/share/man/man9/netisr.9 b/share/man/man9/netisr.9
index 63112be..ac648d1 100644
--- a/share/man/man9/netisr.9
+++ b/share/man/man9/netisr.9
@@ -27,7 +27,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd January 11, 2015
+.Dd June 3, 2016
.Dt NETISR 9
.Os
.Sh NAME
@@ -61,6 +61,16 @@
.Fn netisr_get_cpucount "void"
.Ft u_int
.Fn netisr_get_cpuid "u_int cpunumber"
+.Pp
+With optional virtual network stack support enabled via the following kernel
+compile option:
+.Bd -ragged -offset indent
+.Cd "options VIMAGE"
+.Ed
+.Ft void
+.Fn netisr_register_vnet "const struct netisr_handler *nhp"
+.Ft void
+.Fn netisr_unregister_vnet "const struct netisr_handler *nhp"
.Sh DESCRIPTION
The
.Nm
@@ -80,6 +90,16 @@ and may also manage queue limits and statistics using the
and
.Fn netisr_setqlimit .
.Pp
+In case of VIMAGE kernels each virtual network stack (vnet), that is not the
+default base system network stack, calls
+.Fn netisr_register_vnet
+and
+.Fn netisr_unregister_vnet
+to enable or disable packet processing by the
+.Nm
+for each protocol.
+Disabling will also purge any outstanding packet from the protocol queue.
+.Pp
.Nm
supports multi-processor execution of handlers, and relies on a combination
of source ordering and protocol-specific ordering and work-placement
diff --git a/sys/net/if_epair.c b/sys/net/if_epair.c
index 943776c..949a417 100644
--- a/sys/net/if_epair.c
+++ b/sys/net/if_epair.c
@@ -959,6 +959,9 @@ vnet_epair_init(const void *unused __unused)
V_epair_cloner = if_clone_advanced(epairname, 0,
epair_clone_match, epair_clone_create, epair_clone_destroy);
+#ifdef VIMAGE
+ netisr_register_vnet(&epair_nh);
+#endif
}
VNET_SYSINIT(vnet_epair_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
vnet_epair_init, NULL);
@@ -967,6 +970,9 @@ static void
vnet_epair_uninit(const void *unused __unused)
{
+#ifdef VIMAGE
+ netisr_unregister_vnet(&epair_nh);
+#endif
if_clone_detach(V_epair_cloner);
}
VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index 9346aec..2542ab6 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -702,12 +702,16 @@ vnet_ether_init(__unused void *arg)
if ((i = pfil_head_register(&V_link_pfil_hook)) != 0)
printf("%s: WARNING: unable to register pfil link hook, "
"error %d\n", __func__, i);
+#ifdef VIMAGE
+ netisr_register_vnet(&ether_nh);
+#endif
}
VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
vnet_ether_init, NULL);
+#ifdef VIMAGE
static void
-vnet_ether_destroy(__unused void *arg)
+vnet_ether_pfil_destroy(__unused void *arg)
{
int i;
@@ -715,8 +719,18 @@ vnet_ether_destroy(__unused void *arg)
printf("%s: WARNING: unable to unregister pfil link hook, "
"error %d\n", __func__, i);
}
+VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY,
+ vnet_ether_pfil_destroy, NULL);
+
+static void
+vnet_ether_destroy(__unused void *arg)
+{
+
+ netisr_unregister_vnet(&ether_nh);
+}
VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
vnet_ether_destroy, NULL);
+#endif
@@ -740,7 +754,9 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
* so assert it is correct here.
*/
KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__));
+ CURVNET_SET_QUIET(ifp->if_vnet);
netisr_dispatch(NETISR_ETHER, m);
+ CURVNET_RESTORE();
m = mn;
}
}
diff --git a/sys/net/netisr.c b/sys/net/netisr.c
index 492a851..318e1a3 100644
--- a/sys/net/netisr.c
+++ b/sys/net/netisr.c
@@ -210,6 +210,23 @@ SYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD,
*/
static struct netisr_proto netisr_proto[NETISR_MAXPROT];
+#ifdef VIMAGE
+/*
+ * The netisr_enable array describes a per-VNET flag for registered
+ * protocols on whether this netisr is active in this VNET or not.
+ * netisr_register() will automatically enable the netisr for the
+ * default VNET and all currently active instances.
+ * netisr_unregister() will disable all active VNETs, including vnet0.
+ * Individual network stack instances can be enabled/disabled by the
+ * netisr_(un)register _vnet() functions.
+ * With this we keep the one netisr_proto per protocol but add a
+ * mechanism to stop netisr processing for vnet teardown.
+ * Apart from that we expect a VNET to always be enabled.
+ */
+static VNET_DEFINE(u_int, netisr_enable[NETISR_MAXPROT]);
+#define V_netisr_enable VNET(netisr_enable)
+#endif
+
/*
* Per-CPU workstream data. See netisr_internal.h for more details.
*/
@@ -352,6 +369,7 @@ sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS)
void
netisr_register(const struct netisr_handler *nhp)
{
+ VNET_ITERATOR_DECL(vnet_iter);
struct netisr_work *npwp;
const char *name;
u_int i, proto;
@@ -420,6 +438,22 @@ netisr_register(const struct netisr_handler *nhp)
bzero(npwp, sizeof(*npwp));
npwp->nw_qlimit = netisr_proto[proto].np_qlimit;
}
+
+#ifdef VIMAGE
+ /*
+ * Test that we are in vnet0 and have a curvnet set.
+ */
+ KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+ KASSERT(IS_DEFAULT_VNET(curvnet), ("%s: curvnet %p is not vnet0 %p",
+ __func__, curvnet, vnet0));
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ V_netisr_enable[proto] = 1;
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
+#endif
NETISR_WUNLOCK();
}
@@ -584,6 +618,7 @@ netisr_drain_proto(struct netisr_work *npwp)
void
netisr_unregister(const struct netisr_handler *nhp)
{
+ VNET_ITERATOR_DECL(vnet_iter);
struct netisr_work *npwp;
#ifdef INVARIANTS
const char *name;
@@ -602,6 +637,16 @@ netisr_unregister(const struct netisr_handler *nhp)
("%s(%u): protocol not registered for %s", __func__, proto,
name));
+#ifdef VIMAGE
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ V_netisr_enable[proto] = 0;
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
+#endif
+
netisr_proto[proto].np_name = NULL;
netisr_proto[proto].np_handler = NULL;
netisr_proto[proto].np_m2flow = NULL;
@@ -616,6 +661,97 @@ netisr_unregister(const struct netisr_handler *nhp)
NETISR_WUNLOCK();
}
+#ifdef VIMAGE
+void
+netisr_register_vnet(const struct netisr_handler *nhp)
+{
+ u_int proto;
+
+ proto = nhp->nh_proto;
+
+ KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+ KASSERT(proto < NETISR_MAXPROT,
+ ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name));
+ NETISR_WLOCK();
+ KASSERT(netisr_proto[proto].np_handler != NULL,
+ ("%s(%u): protocol not registered for %s", __func__, proto,
+ nhp->nh_name));
+
+ V_netisr_enable[proto] = 1;
+ NETISR_WUNLOCK();
+}
+
+static void
+netisr_drain_proto_vnet(struct vnet *vnet, u_int proto)
+{
+ struct netisr_workstream *nwsp;
+ struct netisr_work *npwp;
+ struct mbuf *m, *mp, *n, *ne;
+ u_int i;
+
+ KASSERT(vnet != NULL, ("%s: vnet is NULL", __func__));
+ NETISR_LOCK_ASSERT();
+
+ CPU_FOREACH(i) {
+ nwsp = DPCPU_ID_PTR(i, nws);
+ if (nwsp->nws_intr_event == NULL)
+ continue;
+ npwp = &nwsp->nws_work[proto];
+ NWS_LOCK(nwsp);
+
+ /*
+ * Rather than dissecting and removing mbufs from the middle
+ * of the chain, we build a new chain if the packet stays and
+ * update the head and tail pointers at the end. All packets
+ * matching the given vnet are freed.
+ */
+ m = npwp->nw_head;
+ n = ne = NULL;
+ while (m != NULL) {
+ mp = m;
+ m = m->m_nextpkt;
+ mp->m_nextpkt = NULL;
+ if (mp->m_pkthdr.rcvif->if_vnet != vnet) {
+ if (n == NULL) {
+ n = ne = mp;
+ } else {
+ ne->m_nextpkt = mp;
+ ne = mp;
+ }
+ continue;
+ }
+ /* This is a packet in the selected vnet. Free it. */
+ npwp->nw_len--;
+ m_freem(mp);
+ }
+ npwp->nw_head = n;
+ npwp->nw_tail = ne;
+ NWS_UNLOCK(nwsp);
+ }
+}
+
+void
+netisr_unregister_vnet(const struct netisr_handler *nhp)
+{
+ u_int proto;
+
+ proto = nhp->nh_proto;
+
+ KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+ KASSERT(proto < NETISR_MAXPROT,
+ ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name));
+ NETISR_WLOCK();
+ KASSERT(netisr_proto[proto].np_handler != NULL,
+ ("%s(%u): protocol not registered for %s", __func__, proto,
+ nhp->nh_name));
+
+ V_netisr_enable[proto] = 0;
+
+ netisr_drain_proto_vnet(curvnet, proto);
+ NETISR_WUNLOCK();
+}
+#endif
+
/*
* Compose the global and per-protocol policies on dispatch, and return the
* dispatch policy to use.
@@ -906,6 +1042,13 @@ netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m)
KASSERT(netisr_proto[proto].np_handler != NULL,
("%s: invalid proto %u", __func__, proto));
+#ifdef VIMAGE
+ if (V_netisr_enable[proto] == 0) {
+ m_freem(m);
+ return (ENOPROTOOPT);
+ }
+#endif
+
m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED,
source, m, &cpuid);
if (m != NULL) {
@@ -952,6 +1095,13 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__,
proto));
+#ifdef VIMAGE
+ if (V_netisr_enable[proto] == 0) {
+ m_freem(m);
+ return (ENOPROTOOPT);
+ }
+#endif
+
dispatch_policy = netisr_get_dispatch(npp);
if (dispatch_policy == NETISR_DISPATCH_DEFERRED)
return (netisr_queue_src(proto, source, m));
diff --git a/sys/net/netisr.h b/sys/net/netisr.h
index 94a6cc4..63764a7 100644
--- a/sys/net/netisr.h
+++ b/sys/net/netisr.h
@@ -210,6 +210,10 @@ void netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp);
void netisr_register(const struct netisr_handler *nhp);
int netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit);
void netisr_unregister(const struct netisr_handler *nhp);
+#ifdef VIMAGE
+void netisr_register_vnet(const struct netisr_handler *nhp);
+void netisr_unregister_vnet(const struct netisr_handler *nhp);
+#endif
/*
* Process a packet destined for a protocol, and attempt direct dispatch.
diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c
index c074603..91158b0 100644
--- a/sys/net/rtsock.c
+++ b/sys/net/rtsock.c
@@ -191,15 +191,33 @@ SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
"maximum routing socket dispatch queue length");
static void
-rts_init(void)
+vnet_rts_init(void)
{
int tmp;
- if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
- rtsock_nh.nh_qlimit = tmp;
- netisr_register(&rtsock_nh);
+ if (IS_DEFAULT_VNET(curvnet)) {
+ if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
+ rtsock_nh.nh_qlimit = tmp;
+ netisr_register(&rtsock_nh);
+ }
+#ifdef VIMAGE
+ else
+ netisr_register_vnet(&rtsock_nh);
+#endif
+}
+VNET_SYSINIT(vnet_rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+ vnet_rts_init, 0);
+
+#ifdef VIMAGE
+static void
+vnet_rts_uninit(void)
+{
+
+ netisr_unregister_vnet(&rtsock_nh);
}
-SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);
+VNET_SYSUNINIT(vnet_rts_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+ vnet_rts_uninit, 0);
+#endif
static int
raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src,
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c
index 48fae92..1a23390 100644
--- a/sys/netinet/if_ether.c
+++ b/sys/netinet/if_ether.c
@@ -143,7 +143,6 @@ SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_log_per_second,
} while (0)
-static void arp_init(void);
static void arpintr(struct mbuf *);
static void arptimer(void *);
#ifdef INET
@@ -1337,12 +1336,33 @@ arp_iflladdr(void *arg __unused, struct ifnet *ifp)
}
static void
-arp_init(void)
+vnet_arp_init(void)
{
- netisr_register(&arp_nh);
- if (IS_DEFAULT_VNET(curvnet))
+ if (IS_DEFAULT_VNET(curvnet)) {
+ netisr_register(&arp_nh);
iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
arp_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
+ }
+#ifdef VIMAGE
+ else
+ netisr_register_vnet(&arp_nh);
+#endif
}
-SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0);
+VNET_SYSINIT(vnet_arp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND,
+ vnet_arp_init, 0);
+
+#ifdef VIMAGE
+/*
+ * We have to unregister ARP along with IP otherwise we risk doing INADDR_HASH
+ * lookups after destroying the hash. Ideally this would go on SI_ORDER_3.5.
+ */
+static void
+vnet_arp_destroy(__unused void *arg)
+{
+
+ netisr_unregister_vnet(&arp_nh);
+}
+VNET_SYSUNINIT(vnet_arp_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+ vnet_arp_destroy, NULL);
+#endif
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 9afb8d6..f30913e 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -331,8 +331,15 @@ ip_init(void)
__func__);
/* Skip initialization of globals for non-default instances. */
- if (!IS_DEFAULT_VNET(curvnet))
+#ifdef VIMAGE
+ if (!IS_DEFAULT_VNET(curvnet)) {
+ netisr_register_vnet(&ip_nh);
+#ifdef RSS
+ netisr_register_vnet(&ip_direct_nh);
+#endif
return;
+ }
+#endif
pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
if (pr == NULL)
@@ -366,6 +373,11 @@ ip_destroy(void *unused __unused)
{
int error;
+#ifdef RSS
+ netisr_unregister_vnet(&ip_direct_nh);
+#endif
+ netisr_unregister_vnet(&ip_nh);
+
if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
printf("%s: WARNING: unable to unregister pfil hook, "
"error %d\n", __func__, error);
diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c
index d7fc9ee..a897d6c 100644
--- a/sys/netinet6/ip6_input.c
+++ b/sys/netinet6/ip6_input.c
@@ -217,8 +217,15 @@ ip6_init(void)
V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
/* Skip global initialization stuff for non-default instances. */
- if (!IS_DEFAULT_VNET(curvnet))
+#ifdef VIMAGE
+ if (!IS_DEFAULT_VNET(curvnet)) {
+ netisr_register_vnet(&ip6_nh);
+#ifdef RSS
+ netisr_register_vnet(&ip6_direct_nh);
+#endif
return;
+ }
+#endif
pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
if (pr == NULL)
@@ -310,6 +317,11 @@ ip6_destroy(void *unused __unused)
{
int error;
+#ifdef RSS
+ netisr_unregister_vnet(&ip6_direct_nh);
+#endif
+ netisr_unregister_vnet(&ip6_nh);
+
if ((error = pfil_head_unregister(&V_inet6_pfil_hook)) != 0)
printf("%s: WARNING: unable to unregister pfil hook, "
"error %d\n", __func__, error);
OpenPOWER on IntegriCloud