diff options
author | bz <bz@FreeBSD.org> | 2016-06-21 13:48:49 +0000 |
---|---|---|
committer | bz <bz@FreeBSD.org> | 2016-06-21 13:48:49 +0000 |
commit | 7a1c0b1ad10703084b50a5b307bbd60603471e1c (patch) | |
tree | f2fb0a7a66c6cdafb62dc189a1c8a15c38cdf22e /sys/net | |
parent | 63f808402a81b996671f13068a235f1e5407c58b (diff) | |
download | FreeBSD-src-7a1c0b1ad10703084b50a5b307bbd60603471e1c.zip FreeBSD-src-7a1c0b1ad10703084b50a5b307bbd60603471e1c.tar.gz |
Get closer to a VIMAGE network stack teardown from top to bottom rather
than removing the network interfaces first. This change is rather larger
and convoluted as the ordering requirements cannot be separated.
Move the pfil(9) framework to SI_SUB_PROTO_PFIL, move Firewalls and
related modules to their own SI_SUB_PROTO_FIREWALL.
Move initialization of "physical" interfaces to SI_SUB_DRIVERS,
move virtual (cloned) interfaces to SI_SUB_PSEUDO.
Move Multicast to SI_SUB_PROTO_MC.
Re-work parts of multicast initialisation and teardown, not taking the
huge amount of memory into account if used as a module yet.
For interface teardown we try to do as many of them as we can on
SI_SUB_INIT_IF, but for some this makes no sense, e.g., when tunnelling
over a higher layer protocol such as IP. In that case the interface
has to go along (or before) the higher layer protocol is shutdown.
Kernel hhooks need to go last on teardown as they may be used at various
higher layers and we cannot remove them before we cleaned up the higher
layers.
For interface teardown there are multiple paths:
(a) a cloned interface is destroyed (inside a VIMAGE or in the base system),
(b) any interface is moved from a virtual network stack to a different
network stack ("vmove"), or (c) a virtual network stack is being shut down.
All code paths go through if_detach_internal() where we, depending on the
vmove flag or the vnet state, make a decision on how much to shut down;
in case we are destroying a VNET the individual protocol layers will
cleanup their own parts thus we cannot do so again for each interface as
we end up with, e.g., double-frees, destroying locks twice or acquiring
already destroyed locks.
When calling into protocol cleanups we equally have to tell them
whether they need to detach upper layer protocols ("ulp") or not
(e.g., in6_ifdetach()).
Provide or enahnce helper functions to do proper cleanup at a protocol
rather than at an interface level.
Approved by: re (hrs)
Obtained from: projects/vnet
Reviewed by: gnn, jhb
Sponsored by: The FreeBSD Foundation
MFC after: 2 weeks
Differential Revision: https://reviews.freebsd.org/D6747
Diffstat (limited to 'sys/net')
-rw-r--r-- | sys/net/if.c | 106 | ||||
-rw-r--r-- | sys/net/if_bridge.c | 2 | ||||
-rw-r--r-- | sys/net/if_disc.c | 4 | ||||
-rw-r--r-- | sys/net/if_edsc.c | 2 | ||||
-rw-r--r-- | sys/net/if_enc.c | 37 | ||||
-rw-r--r-- | sys/net/if_epair.c | 6 | ||||
-rw-r--r-- | sys/net/if_lagg.c | 2 | ||||
-rw-r--r-- | sys/net/if_loop.c | 4 | ||||
-rw-r--r-- | sys/net/if_vlan.c | 2 | ||||
-rw-r--r-- | sys/net/pfil.c | 11 | ||||
-rw-r--r-- | sys/net/route.c | 2 | ||||
-rw-r--r-- | sys/net/vnet.c | 3 | ||||
-rw-r--r-- | sys/net/vnet.h | 8 |
13 files changed, 144 insertions, 45 deletions
diff --git a/sys/net/if.c b/sys/net/if.c index 4d475d9..5bad9e2 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -914,6 +914,16 @@ if_detach(struct ifnet *ifp) CURVNET_RESTORE(); } +/* + * The vmove flag, if set, indicates that we are called from a callpath + * that is moving an interface to a different vnet instance. + * + * The shutdown flag, if set, indicates that we are called in the + * process of shutting down a vnet instance. Currently only the + * vnet_if_return SYSUNINIT function sets it. Note: we can be called + * on a vnet instance shutdown without this flag being set, e.g., when + * the cloned interfaces are destoyed as first thing of teardown. + */ static int if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) { @@ -921,8 +931,10 @@ if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) int i; struct domain *dp; struct ifnet *iter; - int found = 0; + int found = 0, shutdown; + shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && + ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; IFNET_WLOCK(); TAILQ_FOREACH(iter, &V_ifnet, if_link) if (iter == ifp) { @@ -930,10 +942,6 @@ if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) found = 1; break; } -#ifdef VIMAGE - if (found) - curvnet->vnet_ifcnt--; -#endif IFNET_WUNLOCK(); if (!found) { /* @@ -951,19 +959,58 @@ if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) #endif } - /* Check if this is a cloned interface or not. */ + /* + * At this point we know the interface still was on the ifnet list + * and we removed it so we are in a stable state. + */ +#ifdef VIMAGE + curvnet->vnet_ifcnt--; +#endif + + /* + * In any case (destroy or vmove) detach us from the groups + * and remove/wait for pending events on the taskq. + * XXX-BZ in theory an interface could still enqueue a taskq change? + */ + if_delgroups(ifp); + + taskqueue_drain(taskqueue_swi, &ifp->if_linktask); + + /* + * Check if this is a cloned interface or not. Must do even if + * shutting down as a if_vmove_reclaim() would move the ifp and + * the if_clone_addgroup() will have a corrupted string overwise + * from a gibberish pointer. + */ if (vmove && ifcp != NULL) *ifcp = if_clone_findifc(ifp); + if_down(ifp); + /* - * Remove/wait for pending events. + * On VNET shutdown abort here as the stack teardown will do all + * the work top-down for us. + */ + if (shutdown) { + /* + * In case of a vmove we are done here without error. + * If we would signal an error it would lead to the same + * abort as if we did not find the ifnet anymore. + * if_detach() calls us in void context and does not care + * about an early abort notification, so life is splendid :) + */ + goto finish_vnet_shutdown; + } + + /* + * At this point we are not tearing down a VNET and are either + * going to destroy or vmove the interface and have to cleanup + * accordingly. */ - taskqueue_drain(taskqueue_swi, &ifp->if_linktask); /* * Remove routes and flush queues. */ - if_down(ifp); #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) altq_disable(&ifp->if_snd); @@ -1018,8 +1065,8 @@ if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) } rt_flushifroutes(ifp); - if_delgroups(ifp); +finish_vnet_shutdown: /* * We cannot hold the lock over dom_ifdetach calls as they might * sleep, for example trying to drain a callout, thus open up the @@ -1048,7 +1095,7 @@ if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) * unused if_index in target vnet and calls if_grow() if necessary, * and finally find an unused if_xname for the target vnet. */ -void +static void if_vmove(struct ifnet *ifp, struct vnet *new_vnet) { struct if_clone *ifc; @@ -1115,6 +1162,7 @@ if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid) { struct prison *pr; struct ifnet *difp; + int shutdown; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); @@ -1135,12 +1183,22 @@ if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid) /* XXX Lock interfaces to avoid races. */ CURVNET_SET_QUIET(pr->pr_vnet); difp = ifunit(ifname); - CURVNET_RESTORE(); if (difp != NULL) { + CURVNET_RESTORE(); prison_free(pr); return (EEXIST); } + /* Make sure the VNET is stable. */ + shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && + ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; + if (shutdown) { + CURVNET_RESTORE(); + prison_free(pr); + return (EBUSY); + } + CURVNET_RESTORE(); + /* Move the interface into the child jail/vnet. */ if_vmove(ifp, pr->pr_vnet); @@ -1157,6 +1215,7 @@ if_vmove_reclaim(struct thread *td, char *ifname, int jid) struct prison *pr; struct vnet *vnet_dst; struct ifnet *ifp; + int shutdown; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); @@ -1184,6 +1243,15 @@ if_vmove_reclaim(struct thread *td, char *ifname, int jid) return (EEXIST); } + /* Make sure the VNET is stable. */ + shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && + ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; + if (shutdown) { + CURVNET_RESTORE(); + prison_free(pr); + return (EBUSY); + } + /* Get interface back from child jail/vnet. */ if_vmove(ifp, vnet_dst); CURVNET_RESTORE(); @@ -2642,8 +2710,22 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) struct ifreq *ifr; int error; int oif_flags; +#ifdef VIMAGE + int shutdown; +#endif CURVNET_SET(so->so_vnet); +#ifdef VIMAGE + /* Make sure the VNET is stable. */ + shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET && + so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; + if (shutdown) { + CURVNET_RESTORE(); + return (EBUSY); + } +#endif + + switch (cmd) { case SIOCGIFCONF: error = ifconf(cmd, data); diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 4fe5e67..384ef89 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -541,7 +541,7 @@ vnet_bridge_uninit(const void *unused __unused) V_bridge_cloner = NULL; BRIDGE_LIST_LOCK_DESTROY(); } -VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_bridge_uninit, NULL); static int diff --git a/sys/net/if_disc.c b/sys/net/if_disc.c index d7ea59e..3f7472c 100644 --- a/sys/net/if_disc.c +++ b/sys/net/if_disc.c @@ -137,7 +137,7 @@ vnet_disc_init(const void *unused __unused) V_disc_cloner = if_clone_simple(discname, disc_clone_create, disc_clone_destroy, 0); } -VNET_SYSINIT(vnet_disc_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSINIT(vnet_disc_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_disc_init, NULL); static void @@ -146,7 +146,7 @@ vnet_disc_uninit(const void *unused __unused) if_clone_detach(V_disc_cloner); } -VNET_SYSUNINIT(vnet_disc_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_disc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_disc_uninit, NULL); static int diff --git a/sys/net/if_edsc.c b/sys/net/if_edsc.c index a58cc44..26c2109 100644 --- a/sys/net/if_edsc.c +++ b/sys/net/if_edsc.c @@ -336,7 +336,7 @@ vnet_edsc_uninit(const void *unused __unused) */ if_clone_detach(V_edsc_cloner); } -VNET_SYSUNINIT(vnet_edsc_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_edsc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_edsc_uninit, NULL); /* diff --git a/sys/net/if_enc.c b/sys/net/if_enc.c index e3bf5b8..8e176e7 100644 --- a/sys/net/if_enc.c +++ b/sys/net/if_enc.c @@ -136,7 +136,6 @@ enc_clone_destroy(struct ifnet *ifp) sc = ifp->if_softc; KASSERT(sc == V_enc_sc, ("sc != ifp->if_softc")); - enc_remove_hhooks(sc); bpfdetach(ifp); if_detach(ifp); if_free(ifp); @@ -170,10 +169,6 @@ enc_clone_create(struct if_clone *ifc, int unit, caddr_t params) ifp->if_softc = sc; if_attach(ifp); bpfattach(ifp, DLT_ENC, sizeof(struct enchdr)); - if (enc_add_hhooks(sc) != 0) { - enc_clone_destroy(ifp); - return (ENXIO); - } return (0); } @@ -369,18 +364,44 @@ vnet_enc_init(const void *unused __unused) V_enc_cloner = if_clone_simple(encname, enc_clone_create, enc_clone_destroy, 1); } -VNET_SYSINIT(vnet_enc_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSINIT(vnet_enc_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_enc_init, NULL); static void +vnet_enc_init_proto(void *unused __unused) +{ + KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc)); + + if (enc_add_hhooks(V_enc_sc) != 0) + enc_clone_destroy(V_enc_sc->sc_ifp); +} +VNET_SYSINIT(vnet_enc_init_proto, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_enc_init_proto, NULL); + +static void vnet_enc_uninit(const void *unused __unused) { + KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc)); if_clone_detach(V_enc_cloner); } -VNET_SYSUNINIT(vnet_enc_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_enc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_enc_uninit, NULL); +/* + * The hhook consumer needs to go before ip[6]_destroy are called on + * SI_ORDER_THIRD. + */ +static void +vnet_enc_uninit_hhook(const void *unused __unused) +{ + KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc)); + + enc_remove_hhooks(V_enc_sc); +} +VNET_SYSUNINIT(vnet_enc_uninit_hhook, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, + vnet_enc_uninit_hhook, NULL); + static int enc_modevent(module_t mod, int type, void *data) { @@ -401,4 +422,4 @@ static moduledata_t enc_mod = { 0 }; -DECLARE_MODULE(if_enc, enc_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); +DECLARE_MODULE(if_enc, enc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); diff --git a/sys/net/if_epair.c b/sys/net/if_epair.c index 949a417..86e8379 100644 --- a/sys/net/if_epair.c +++ b/sys/net/if_epair.c @@ -963,7 +963,7 @@ vnet_epair_init(const void *unused __unused) netisr_register_vnet(&epair_nh); #endif } -VNET_SYSINIT(vnet_epair_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_epair_init, NULL); static void @@ -975,7 +975,7 @@ vnet_epair_uninit(const void *unused __unused) #endif if_clone_detach(V_epair_cloner); } -VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_epair_uninit, NULL); static int @@ -1012,5 +1012,5 @@ static moduledata_t epair_mod = { 0 }; -DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); MODULE_VERSION(if_epair, 1); diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index 8b81abf..16f872a 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -271,7 +271,7 @@ vnet_lagg_uninit(const void *unused __unused) if_clone_detach(V_lagg_cloner); LAGG_LIST_LOCK_DESTROY(); } -VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_lagg_uninit, NULL); static int diff --git a/sys/net/if_loop.c b/sys/net/if_loop.c index 1623732..4d7de10 100644 --- a/sys/net/if_loop.c +++ b/sys/net/if_loop.c @@ -156,7 +156,7 @@ vnet_loif_init(const void *unused __unused) 1); #endif } -VNET_SYSINIT(vnet_loif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSINIT(vnet_loif_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_loif_init, NULL); #ifdef VIMAGE @@ -167,7 +167,7 @@ vnet_loif_uninit(const void *unused __unused) if_clone_detach(V_lo_cloner); V_loif = NULL; } -VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_loif_uninit, NULL); #endif diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index 415be01..73470dc 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -823,7 +823,7 @@ vnet_vlan_uninit(const void *unused __unused) if_clone_detach(V_vlan_cloner); } -VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, +VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_vlan_uninit, NULL); #endif diff --git a/sys/net/pfil.c b/sys/net/pfil.c index 248d183..625bcb8 100644 --- a/sys/net/pfil.c +++ b/sys/net/pfil.c @@ -383,17 +383,14 @@ vnet_pfil_uninit(const void *unused __unused) PFIL_LOCK_DESTROY_REAL(&V_pfil_lock); } -/* Define startup order. */ -#define PFIL_SYSINIT_ORDER SI_SUB_PROTO_BEGIN -#define PFIL_MODEVENT_ORDER (SI_ORDER_FIRST) /* On boot slot in here. */ -#define PFIL_VNET_ORDER (PFIL_MODEVENT_ORDER + 2) /* Later still. */ - /* * Starting up. * * VNET_SYSINIT is called for each existing vnet and each new vnet. + * Make sure the pfil bits are first before any possible subsystem which + * might piggyback on the SI_SUB_PROTO_PFIL. */ -VNET_SYSINIT(vnet_pfil_init, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER, +VNET_SYSINIT(vnet_pfil_init, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST, vnet_pfil_init, NULL); /* @@ -401,5 +398,5 @@ VNET_SYSINIT(vnet_pfil_init, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER, * * VNET_SYSUNINIT is called for each exiting vnet as it exits. */ -VNET_SYSUNINIT(vnet_pfil_uninit, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER, +VNET_SYSUNINIT(vnet_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST, vnet_pfil_uninit, NULL); diff --git a/sys/net/route.c b/sys/net/route.c index f2f8897..d7b3b96 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -334,7 +334,7 @@ vnet_route_uninit(const void *unused __unused) free(V_rt_tables, M_RTABLE); uma_zdestroy(V_rtzone); } -VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, +VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, vnet_route_uninit, 0); #endif diff --git a/sys/net/vnet.c b/sys/net/vnet.c index afb836a..b2cad63 100644 --- a/sys/net/vnet.c +++ b/sys/net/vnet.c @@ -331,8 +331,7 @@ vnet_init_done(void *unused __unused) curvnet = NULL; } - -SYSINIT(vnet_init_done, SI_SUB_VNET_DONE, SI_ORDER_FIRST, vnet_init_done, +SYSINIT(vnet_init_done, SI_SUB_VNET_DONE, SI_ORDER_ANY, vnet_init_done, NULL); /* diff --git a/sys/net/vnet.h b/sys/net/vnet.h index cc23f87..4985d4b 100644 --- a/sys/net/vnet.h +++ b/sys/net/vnet.h @@ -111,8 +111,8 @@ vnet_##name##_init(const void *unused) \ { \ VNET_PCPUSTAT_ALLOC(name, M_WAITOK); \ } \ -VNET_SYSINIT(vnet_ ## name ## _init, SI_SUB_PROTO_IFATTACHDOMAIN, \ - SI_ORDER_ANY, vnet_ ## name ## _init, NULL) +VNET_SYSINIT(vnet_ ## name ## _init, SI_SUB_INIT_IF, \ + SI_ORDER_FIRST, vnet_ ## name ## _init, NULL) #define VNET_PCPUSTAT_SYSUNINIT(name) \ static void \ @@ -120,8 +120,8 @@ vnet_##name##_uninit(const void *unused) \ { \ VNET_PCPUSTAT_FREE(name); \ } \ -VNET_SYSUNINIT(vnet_ ## name ## _uninit, SI_SUB_PROTO_IFATTACHDOMAIN, \ - SI_ORDER_ANY, vnet_ ## name ## _uninit, NULL) +VNET_SYSUNINIT(vnet_ ## name ## _uninit, SI_SUB_INIT_IF, \ + SI_ORDER_FIRST, vnet_ ## name ## _uninit, NULL) #ifdef SYSCTL_OID #define SYSCTL_VNET_PCPUSTAT(parent, nbr, name, type, array, desc) \ |