diff options
author | hrs <hrs@FreeBSD.org> | 2015-09-12 20:36:39 +0000 |
---|---|---|
committer | hrs <hrs@FreeBSD.org> | 2015-09-12 20:36:39 +0000 |
commit | 8bd36880a406e727629e7659159c6c7fd7798ef6 (patch) | |
tree | 0cfbaefd9ede06268578e5130a7758b721dbf251 /sys/net | |
parent | 8cb3d8b81705f6463338c64cc79dc1271df692c7 (diff) | |
download | FreeBSD-src-8bd36880a406e727629e7659159c6c7fd7798ef6.zip FreeBSD-src-8bd36880a406e727629e7659159c6c7fd7798ef6.tar.gz |
MFC 272159,272161,272386,272446,272547,272548,273210:
- Make lagg protos a enum.
- When reconfiguring protocol on a lagg, first set it to LAGG_PROTO_NONE,
then drop lock, run the attach routines, and then set it to specific
proto. This removes tons of WITNESS warnings.
- Make lagg protocol attach handlers not failing and allocate memory
with M_WAITOK.
- Virtualize lagg(4) cloner. This change fixes a panic when tearing down
if_lagg(4) interfaces which were cloned in a vnet jail.
Sysctl nodes which are dynamically generated for each cloned interface
(net.link.lagg.N.*) have been removed, and use_flowid and flowid_shift
ifconfig(8) parameters have been added instead. Flags and per-interface
statistics counters are displayed in "ifconfig -v".
- Separate option handling from SIOC[SG]LAGG to SIOC[SG]LAGGOPTS for
backward compatibility with old ifconfig(8).
- Move L2 addr configuration for the primary port to a taskqueue. This fixes
LOR of softc rmlock in iflladdr_event handlers.
- Call if_delmulti_ifma() after LACP_UNLOCK(). This fixes another LOR.
- Fix a panic in lacp_transit_expire().
- Fix a panic in lagg_input() upon shutting down a port.
- Use printb() for boolean flags in ro_opts and actor_state for LACP.
- Fix lladdr configuration which could prevent LACP mode from working.
- Fix LORs when a laggport interface has an IPv6 LLA.
Diffstat (limited to 'sys/net')
-rw-r--r-- | sys/net/ieee8023ad_lacp.c | 91 | ||||
-rw-r--r-- | sys/net/ieee8023ad_lacp.h | 6 | ||||
-rw-r--r-- | sys/net/if_lagg.c | 462 | ||||
-rw-r--r-- | sys/net/if_lagg.h | 47 |
4 files changed, 329 insertions, 277 deletions
diff --git a/sys/net/ieee8023ad_lacp.c b/sys/net/ieee8023ad_lacp.c index e0fd776..3a0de01 100644 --- a/sys/net/ieee8023ad_lacp.c +++ b/sys/net/ieee8023ad_lacp.c @@ -187,15 +187,15 @@ static const char *lacp_format_portid(const struct lacp_portid *, char *, static void lacp_dprintf(const struct lacp_port *, const char *, ...) __attribute__((__format__(__printf__, 2, 3))); -static int lacp_debug = 0; +static VNET_DEFINE(int, lacp_debug); +#define V_lacp_debug VNET(lacp_debug) SYSCTL_NODE(_net_link_lagg, OID_AUTO, lacp, CTLFLAG_RD, 0, "ieee802.3ad"); -SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_TUN, - &lacp_debug, 0, "Enable LACP debug logging (1=debug, 2=trace)"); -TUNABLE_INT("net.link.lagg.lacp.debug", &lacp_debug); +SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RWTUN | CTLFLAG_VNET, + &VNET_NAME(lacp_debug), 0, "Enable LACP debug logging (1=debug, 2=trace)"); -#define LACP_DPRINTF(a) if (lacp_debug & 0x01) { lacp_dprintf a ; } -#define LACP_TRACE(a) if (lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); } -#define LACP_TPRINTF(a) if (lacp_debug & 0x04) { lacp_dprintf a ; } +#define LACP_DPRINTF(a) if (V_lacp_debug & 0x01) { lacp_dprintf a ; } +#define LACP_TRACE(a) if (V_lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); } +#define LACP_TPRINTF(a) if (V_lacp_debug & 0x04) { lacp_dprintf a ; } /* * partner administration variables. @@ -298,7 +298,7 @@ lacp_pdu_input(struct lacp_port *lp, struct mbuf *m) goto bad; } - if (lacp_debug > 0) { + if (V_lacp_debug > 0) { lacp_dprintf(lp, "lacpdu receive\n"); lacp_dump_lacpdu(du); } @@ -383,7 +383,7 @@ lacp_xmit_lacpdu(struct lacp_port *lp) sizeof(du->ldu_collector)); du->ldu_collector.lci_maxdelay = 0; - if (lacp_debug > 0) { + if (V_lacp_debug > 0) { lacp_dprintf(lp, "lacpdu transmit\n"); lacp_dump_lacpdu(du); } @@ -495,12 +495,14 @@ lacp_tick(void *arg) if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) continue; + CURVNET_SET(lp->lp_ifp->if_vnet); lacp_run_timers(lp); lacp_select(lp); lacp_sm_mux(lp); lacp_sm_tx(lp); lacp_sm_ptx_tx_schedule(lp); + CURVNET_RESTORE(); } callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc); } @@ -577,12 +579,13 @@ lacp_port_destroy(struct lagg_port *lgp) lacp_disable_distributing(lp); lacp_unselect(lp); + LIST_REMOVE(lp, lp_next); + LACP_UNLOCK(lsc); + /* The address may have already been removed by if_purgemaddrs() */ if (!lgp->lp_detaching) if_delmulti_ifma(lp->lp_ifma); - LIST_REMOVE(lp, lp_next); - LACP_UNLOCK(lsc); free(lp, M_DEVBUF); } @@ -743,58 +746,19 @@ lacp_transit_expire(void *vp) LACP_LOCK_ASSERT(lsc); + CURVNET_SET(lsc->lsc_softc->sc_ifp->if_vnet); LACP_TRACE(NULL); + CURVNET_RESTORE(); lsc->lsc_suppress_distributing = FALSE; } -static void -lacp_attach_sysctl(struct lacp_softc *lsc, struct sysctl_oid *p_oid) -{ - struct lagg_softc *sc = lsc->lsc_softc; - - SYSCTL_ADD_UINT(&sc->ctx, SYSCTL_CHILDREN(p_oid), OID_AUTO, - "lacp_strict_mode", - CTLFLAG_RW, - &lsc->lsc_strict_mode, - lsc->lsc_strict_mode, - "Enable LACP strict mode"); -} - -static void -lacp_attach_sysctl_debug(struct lacp_softc *lsc, struct sysctl_oid *p_oid) -{ - struct lagg_softc *sc = lsc->lsc_softc; - struct sysctl_oid *oid; - - /* Create a child of the parent lagg interface */ - oid = SYSCTL_ADD_NODE(&sc->ctx, SYSCTL_CHILDREN(p_oid), - OID_AUTO, "debug", CTLFLAG_RD, NULL, "DEBUG"); - - SYSCTL_ADD_UINT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "rx_test", - CTLFLAG_RW, - &lsc->lsc_debug.lsc_rx_test, - lsc->lsc_debug.lsc_rx_test, - "Bitmap of if_dunit entries to drop RX frames for"); - SYSCTL_ADD_UINT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "tx_test", - CTLFLAG_RW, - &lsc->lsc_debug.lsc_tx_test, - lsc->lsc_debug.lsc_tx_test, - "Bitmap of if_dunit entries to drop TX frames for"); -} - -int +void lacp_attach(struct lagg_softc *sc) { struct lacp_softc *lsc; - struct sysctl_oid *oid; - lsc = malloc(sizeof(struct lacp_softc), - M_DEVBUF, M_NOWAIT|M_ZERO); - if (lsc == NULL) - return (ENOMEM); + lsc = malloc(sizeof(struct lacp_softc), M_DEVBUF, M_WAITOK | M_ZERO); sc->sc_psc = (caddr_t)lsc; lsc->lsc_softc = sc; @@ -806,35 +770,24 @@ lacp_attach(struct lagg_softc *sc) TAILQ_INIT(&lsc->lsc_aggregators); LIST_INIT(&lsc->lsc_ports); - /* Create a child of the parent lagg interface */ - oid = SYSCTL_ADD_NODE(&sc->ctx, SYSCTL_CHILDREN(sc->sc_oid), - OID_AUTO, "lacp", CTLFLAG_RD, NULL, "LACP"); - - /* Attach sysctl nodes */ - lacp_attach_sysctl(lsc, oid); - lacp_attach_sysctl_debug(lsc, oid); - callout_init_mtx(&lsc->lsc_transit_callout, &lsc->lsc_mtx, 0); callout_init_mtx(&lsc->lsc_callout, &lsc->lsc_mtx, 0); /* if the lagg is already up then do the same */ if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) lacp_init(sc); - - return (0); } int -lacp_detach(struct lagg_softc *sc) +lacp_detach(void *psc) { - struct lacp_softc *lsc = LACP_SOFTC(sc); + struct lacp_softc *lsc = (struct lacp_softc *)psc; KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators), ("aggregators still active")); KASSERT(lsc->lsc_active_aggregator == NULL, ("aggregator still attached")); - sc->sc_psc = NULL; callout_drain(&lsc->lsc_transit_callout); callout_drain(&lsc->lsc_callout); @@ -883,7 +836,7 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m) return (NULL); } - if (sc->use_flowid && + if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) hash = m->m_pkthdr.flowid >> sc->flowid_shift; else @@ -1425,7 +1378,7 @@ lacp_sm_mux(struct lacp_port *lp) enum lacp_selected selected = lp->lp_selected; struct lacp_aggregator *la; - if (lacp_debug > 1) + if (V_lacp_debug > 1) lacp_dprintf(lp, "%s: state= 0x%x, selected= 0x%x, " "p_sync= 0x%x, p_collecting= 0x%x\n", __func__, lp->lp_mux_state, selected, p_sync, p_collecting); diff --git a/sys/net/ieee8023ad_lacp.h b/sys/net/ieee8023ad_lacp.h index ca5f76e..6dbdb95 100644 --- a/sys/net/ieee8023ad_lacp.h +++ b/sys/net/ieee8023ad_lacp.h @@ -75,6 +75,7 @@ "\007DEFAULTED" \ "\010EXPIRED" +#ifdef _KERNEL /* * IEEE802.3 slow protocols * @@ -282,8 +283,8 @@ struct lacp_softc { struct mbuf *lacp_input(struct lagg_port *, struct mbuf *); struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *); -int lacp_attach(struct lagg_softc *); -int lacp_detach(struct lagg_softc *); +void lacp_attach(struct lagg_softc *); +int lacp_detach(void *); void lacp_init(struct lagg_softc *); void lacp_stop(struct lagg_softc *); int lacp_port_create(struct lagg_port *); @@ -336,3 +337,4 @@ lacp_isdistributing(struct lagg_port *lgp) #define LACP_LAGIDSTR_MAX \ (1 + LACP_PARTNERSTR_MAX + 1 + LACP_PARTNERSTR_MAX + 1) #define LACP_STATESTR_MAX (255) /* XXX */ +#endif /* _KERNEL */ diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index 45d31a7..d62f148 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include <net/if_types.h> #include <net/if_var.h> #include <net/bpf.h> +#include <net/vnet.h> #if defined(INET) || defined(INET6) #include <netinet/in.h> @@ -81,13 +82,21 @@ static struct { {0, NULL} }; -SLIST_HEAD(__trhead, lagg_softc) lagg_list; /* list of laggs */ -static struct mtx lagg_list_mtx; +VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */ +#define V_lagg_list VNET(lagg_list) +static VNET_DEFINE(struct mtx, lagg_list_mtx); +#define V_lagg_list_mtx VNET(lagg_list_mtx) +#define LAGG_LIST_LOCK_INIT(x) mtx_init(&V_lagg_list_mtx, \ + "if_lagg list", NULL, MTX_DEF) +#define LAGG_LIST_LOCK_DESTROY(x) mtx_destroy(&V_lagg_list_mtx) +#define LAGG_LIST_LOCK(x) mtx_lock(&V_lagg_list_mtx) +#define LAGG_LIST_UNLOCK(x) mtx_unlock(&V_lagg_list_mtx) eventhandler_tag lagg_detach_cookie = NULL; static int lagg_clone_create(struct if_clone *, int, caddr_t); static void lagg_clone_destroy(struct ifnet *); -static struct if_clone *lagg_cloner; +static VNET_DEFINE(struct if_clone *, lagg_cloner); +#define V_lagg_cloner VNET(lagg_cloner) static const char laggname[] = "lagg"; static void lagg_lladdr(struct lagg_softc *, uint8_t *); @@ -122,24 +131,23 @@ static void lagg_media_status(struct ifnet *, struct ifmediareq *); static struct lagg_port *lagg_link_active(struct lagg_softc *, struct lagg_port *); static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *); -static int lagg_sysctl_active(SYSCTL_HANDLER_ARGS); /* Simple round robin */ -static int lagg_rr_attach(struct lagg_softc *); +static void lagg_rr_attach(struct lagg_softc *); static int lagg_rr_detach(struct lagg_softc *); static int lagg_rr_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); /* Active failover */ -static int lagg_fail_attach(struct lagg_softc *); +static void lagg_fail_attach(struct lagg_softc *); static int lagg_fail_detach(struct lagg_softc *); static int lagg_fail_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); /* Loadbalancing */ -static int lagg_lb_attach(struct lagg_softc *); +static void lagg_lb_attach(struct lagg_softc *); static int lagg_lb_detach(struct lagg_softc *); static int lagg_lb_port_create(struct lagg_port *); static void lagg_lb_port_destroy(struct lagg_port *); @@ -149,7 +157,7 @@ static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *, static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *); /* 802.3ad LACP */ -static int lagg_lacp_attach(struct lagg_softc *); +static void lagg_lacp_attach(struct lagg_softc *); static int lagg_lacp_detach(struct lagg_softc *); static int lagg_lacp_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *, @@ -159,9 +167,9 @@ static void lagg_lacp_lladdr(struct lagg_softc *); static void lagg_callout(void *); /* lagg protocol table */ -static const struct { - int ti_proto; - int (*ti_attach)(struct lagg_softc *); +static const struct lagg_proto { + lagg_proto ti_proto; + void (*ti_attach)(struct lagg_softc *); } lagg_protos[] = { { LAGG_PROTO_ROUNDROBIN, lagg_rr_attach }, { LAGG_PROTO_FAILOVER, lagg_fail_attach }, @@ -175,31 +183,55 @@ SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0, "Link Aggregation"); -static int lagg_failover_rx_all = 0; /* Allow input on any failover links */ -SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW, - &lagg_failover_rx_all, 0, +/* Allow input on any failover links */ +static VNET_DEFINE(int, lagg_failover_rx_all); +#define V_lagg_failover_rx_all VNET(lagg_failover_rx_all) +SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET, + &VNET_NAME(lagg_failover_rx_all), 0, "Accept input from any interface in a failover lagg"); -static int def_use_flowid = 1; /* Default value for using flowid */ -TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid); -SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW, - &def_use_flowid, 0, + +/* Default value for using M_FLOWID */ +static VNET_DEFINE(int, def_use_flowid) = 1; +#define V_def_use_flowid VNET(def_use_flowid) +SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN, + &VNET_NAME(def_use_flowid), 0, "Default setting for using flow id for load sharing"); -static int def_flowid_shift = 16; /* Default value for using flow shift */ -TUNABLE_INT("net.link.lagg.default_flowid_shift", &def_flowid_shift); -SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RW, - &def_flowid_shift, 0, + +/* Default value for using M_FLOWID */ +static VNET_DEFINE(int, def_flowid_shift) = 16; +#define V_def_flowid_shift VNET(def_flowid_shift) +SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN, + &VNET_NAME(def_flowid_shift), 0, "Default setting for flowid shift for load sharing"); +static void +vnet_lagg_init(const void *unused __unused) +{ + + LAGG_LIST_LOCK_INIT(); + SLIST_INIT(&V_lagg_list); + V_lagg_cloner = if_clone_simple(laggname, lagg_clone_create, + lagg_clone_destroy, 0); +} +VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_lagg_init, NULL); + +static void +vnet_lagg_uninit(const void *unused __unused) +{ + + if_clone_detach(V_lagg_cloner); + LAGG_LIST_LOCK_DESTROY(); +} +VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_lagg_uninit, NULL); + static int lagg_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: - mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF); - SLIST_INIT(&lagg_list); - lagg_cloner = if_clone_simple(laggname, lagg_clone_create, - lagg_clone_destroy, 0); lagg_input_p = lagg_input; lagg_linkstate_p = lagg_port_state; lagg_detach_cookie = EVENTHANDLER_REGISTER( @@ -209,10 +241,8 @@ lagg_modevent(module_t mod, int type, void *data) case MOD_UNLOAD: EVENTHANDLER_DEREGISTER(ifnet_departure_event, lagg_detach_cookie); - if_clone_detach(lagg_cloner); lagg_input_p = NULL; lagg_linkstate_p = NULL; - mtx_destroy(&lagg_list_mtx); break; default: return (EOPNOTSUPP); @@ -278,10 +308,8 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct lagg_softc *sc; struct ifnet *ifp; - int i, error = 0; static const u_char eaddr[6]; /* 00:00:00:00:00:00 */ - struct sysctl_oid *oid; - char num[14]; /* sufficient for 32 bits */ + int i; sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); ifp = sc->sc_ifp = if_alloc(IFT_ETHER); @@ -295,29 +323,10 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) sc->sc_ibytes = counter_u64_alloc(M_WAITOK); sc->sc_obytes = counter_u64_alloc(M_WAITOK); - sysctl_ctx_init(&sc->ctx); - snprintf(num, sizeof(num), "%u", unit); - sc->use_flowid = def_use_flowid; - sc->flowid_shift = def_flowid_shift; - sc->sc_oid = oid = SYSCTL_ADD_NODE(&sc->ctx, - &SYSCTL_NODE_CHILDREN(_net_link, lagg), - OID_AUTO, num, CTLFLAG_RD, NULL, ""); - SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "use_flowid", CTLFLAG_RW, &sc->use_flowid, - sc->use_flowid, "Use flow id for load sharing"); - SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "flowid_shift", CTLFLAG_RW, &sc->flowid_shift, - sc->flowid_shift, - "Shift flowid bits to prevent multiqueue collisions"); - SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "count", CTLFLAG_RD, &sc->sc_count, sc->sc_count, - "Total number of ports"); - SYSCTL_ADD_PROC(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "active", CTLTYPE_INT|CTLFLAG_RD, sc, 0, lagg_sysctl_active, - "I", "Total number of active ports"); - SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "flapping", CTLFLAG_RD, &sc->sc_flapping, - sc->sc_flapping, "Total number of port change events"); + if (V_def_use_flowid) + sc->sc_opts |= LAGG_OPT_USE_FLOWID; + sc->flowid_shift = V_def_flowid_shift; + /* Hash all layers by default */ sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4; @@ -325,11 +334,7 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) { if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) { sc->sc_proto = lagg_protos[i].ti_proto; - if ((error = lagg_protos[i].ti_attach(sc)) != 0) { - if_free(ifp); - free(sc, M_DEVBUF); - return (error); - } + lagg_protos[i].ti_attach(sc); break; } } @@ -371,9 +376,9 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); /* Insert into the global list of laggs */ - mtx_lock(&lagg_list_mtx); - SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries); - mtx_unlock(&lagg_list_mtx); + LAGG_LIST_LOCK(); + SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries); + LAGG_LIST_UNLOCK(); callout_reset(&sc->sc_callout, hz, lagg_callout, sc); @@ -400,10 +405,9 @@ lagg_clone_destroy(struct ifnet *ifp) /* Unhook the aggregation protocol */ if (sc->sc_detach != NULL) (*sc->sc_detach)(sc); + else + LAGG_WUNLOCK(sc); - LAGG_WUNLOCK(sc); - - sysctl_ctx_free(&sc->ctx); ifmedia_removeall(&sc->sc_media); ether_ifdetach(ifp); if_free(ifp); @@ -417,9 +421,9 @@ lagg_clone_destroy(struct ifnet *ifp) counter_u64_free(sc->sc_ibytes); counter_u64_free(sc->sc_obytes); - mtx_lock(&lagg_list_mtx); - SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries); - mtx_unlock(&lagg_list_mtx); + LAGG_LIST_LOCK(); + SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries); + LAGG_LIST_UNLOCK(); taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task); LAGG_LOCK_DESTROY(sc); @@ -431,15 +435,28 @@ static void lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr) { struct ifnet *ifp = sc->sc_ifp; + struct lagg_port lp; if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) return; + LAGG_WLOCK_ASSERT(sc); + /* + * Set the link layer address on the lagg interface. + * sc_lladdr() notifies the MAC change to + * the aggregation protocol. iflladdr_event handler which + * may trigger gratuitous ARPs for INET will be handled in + * a taskqueue. + */ bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN); - /* Let the protocol know the MAC has changed */ if (sc->sc_lladdr != NULL) (*sc->sc_lladdr)(sc); - EVENTHANDLER_INVOKE(iflladdr_event, ifp); + + bzero(&lp, sizeof(lp)); + lp.lp_ifp = sc->sc_ifp; + lp.lp_softc = sc; + + lagg_port_lladdr(&lp, lladdr); } static void @@ -487,11 +504,13 @@ lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr) struct ifnet *ifp = lp->lp_ifp; struct lagg_llq *llq; int pending = 0; + int primary; LAGG_WLOCK_ASSERT(sc); - if (lp->lp_detaching || - memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) + primary = (sc->sc_primary->lp_ifp == ifp) ? 1 : 0; + if (primary == 0 && (lp->lp_detaching || + memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)) return; /* Check to make sure its not already queued to be changed */ @@ -510,6 +529,7 @@ lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr) /* Update the lladdr even if pending, it may have changed */ llq->llq_ifp = ifp; + llq->llq_primary = primary; bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN); if (!pending) @@ -542,14 +562,20 @@ lagg_port_setlladdr(void *arg, int pending) for (llq = head; llq != NULL; llq = head) { ifp = llq->llq_ifp; - /* Set the link layer address */ CURVNET_SET(ifp->if_vnet); - error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN); + if (llq->llq_primary == 0) { + /* + * Set the link layer address on the laggport interface. + * if_setlladdr() triggers gratuitous ARPs for INET. + */ + error = if_setlladdr(ifp, llq->llq_lladdr, + ETHER_ADDR_LEN); + if (error) + printf("%s: setlladdr failed on %s\n", __func__, + ifp->if_xname); + } else + EVENTHANDLER_INVOKE(iflladdr_event, ifp); CURVNET_RESTORE(); - if (error) - printf("%s: setlladdr failed on %s\n", __func__, - ifp->if_xname); - head = SLIST_NEXT(llq, llq_entries); free(llq, M_DEVBUF); } @@ -581,34 +607,6 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) if (ifp->if_type != IFT_ETHER) return (EPROTONOSUPPORT); -#ifdef INET6 - /* - * The member interface should not have inet6 address because - * two interfaces with a valid link-local scope zone must not be - * merged in any form. This restriction is needed to - * prevent violation of link-local scope zone. Attempts to - * add a member interface which has inet6 addresses triggers - * removal of all inet6 addresses on the member interface. - */ - SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { - if (in6ifa_llaonifp(lp->lp_ifp)) { - in6_ifdetach(lp->lp_ifp); - if_printf(sc->sc_ifp, - "IPv6 addresses on %s have been removed " - "before adding it as a member to prevent " - "IPv6 address scope violation.\n", - lp->lp_ifp->if_xname); - } - } - if (in6ifa_llaonifp(ifp)) { - in6_ifdetach(ifp); - if_printf(sc->sc_ifp, - "IPv6 addresses on %s have been removed " - "before adding it as a member to prevent " - "IPv6 address scope violation.\n", - ifp->if_xname); - } -#endif /* Allow the first Ethernet member to define the MTU */ if (SLIST_EMPTY(&sc->sc_ports)) sc->sc_ifp->if_mtu = ifp->if_mtu; @@ -623,10 +621,10 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) return (ENOMEM); /* Check if port is a stacked lagg */ - mtx_lock(&lagg_list_mtx); - SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) { + LAGG_LIST_LOCK(); + SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) { if (ifp == sc_ptr->sc_ifp) { - mtx_unlock(&lagg_list_mtx); + LAGG_LIST_UNLOCK(); free(lp, M_DEVBUF); return (EINVAL); /* XXX disable stacking for the moment, its untested */ @@ -634,14 +632,14 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) lp->lp_flags |= LAGG_PORT_STACK; if (lagg_port_checkstacking(sc_ptr) >= LAGG_MAX_STACKING) { - mtx_unlock(&lagg_list_mtx); + LAGG_LIST_UNLOCK(); free(lp, M_DEVBUF); return (E2BIG); } #endif } } - mtx_unlock(&lagg_list_mtx); + LAGG_LIST_UNLOCK(); /* Change the interface type */ lp->lp_iftype = ifp->if_type; @@ -991,10 +989,12 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; struct lagg_reqall *ra = (struct lagg_reqall *)data; + struct lagg_reqopts *ro = (struct lagg_reqopts *)data; struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf; struct lagg_reqflags *rf = (struct lagg_reqflags *)data; struct ifreq *ifr = (struct ifreq *)data; struct lagg_port *lp; + const struct lagg_proto *proto = NULL; struct ifnet *tpif; struct thread *td = curthread; char *buf, *outbuf; @@ -1042,50 +1042,136 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = priv_check(td, PRIV_NET_LAGG); if (error) break; - if (ra->ra_proto >= LAGG_PROTO_MAX) { + for (proto = lagg_protos; proto->ti_proto != LAGG_PROTO_NONE; + proto++) { + if (proto->ti_proto == ra->ra_proto) { + if (sc->sc_ifflags & IFF_DEBUG) + printf("%s: using proto %u\n", + sc->sc_ifname, proto->ti_proto); + break; + } + } + if (proto->ti_proto == LAGG_PROTO_NONE) { error = EPROTONOSUPPORT; break; } + /* Set to LAGG_PROTO_NONE during the attach. */ LAGG_WLOCK(sc); if (sc->sc_proto != LAGG_PROTO_NONE) { - /* Reset protocol first in case detach unlocks */ sc->sc_proto = LAGG_PROTO_NONE; - error = sc->sc_detach(sc); - sc->sc_detach = NULL; - sc->sc_start = NULL; - sc->sc_input = NULL; - sc->sc_port_create = NULL; - sc->sc_port_destroy = NULL; - sc->sc_linkstate = NULL; - sc->sc_init = NULL; - sc->sc_stop = NULL; - sc->sc_lladdr = NULL; - sc->sc_req = NULL; - sc->sc_portreq = NULL; - } else if (sc->sc_input != NULL) { - /* Still detaching */ - error = EBUSY; + if (sc->sc_detach != NULL) + sc->sc_detach(sc); + else + LAGG_WUNLOCK(sc); } - if (error != 0) { - LAGG_WUNLOCK(sc); + proto->ti_attach(sc); + LAGG_WLOCK(sc); + sc->sc_proto = proto->ti_proto; + LAGG_WUNLOCK(sc); + break; + case SIOCGLAGGOPTS: + ro->ro_opts = sc->sc_opts; + if (sc->sc_proto == LAGG_PROTO_LACP) { + struct lacp_softc *lsc; + + lsc = (struct lacp_softc *)sc->sc_psc; + if (lsc->lsc_debug.lsc_tx_test != 0) + ro->ro_opts |= LAGG_OPT_LACP_TXTEST; + if (lsc->lsc_debug.lsc_rx_test != 0) + ro->ro_opts |= LAGG_OPT_LACP_RXTEST; + if (lsc->lsc_strict_mode != 0) + ro->ro_opts |= LAGG_OPT_LACP_STRICT; + + ro->ro_active = sc->sc_active; + } else { + ro->ro_active = 0; + SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) + ro->ro_active += LAGG_PORTACTIVE(lp); + } + ro->ro_flapping = sc->sc_flapping; + ro->ro_flowid_shift = sc->flowid_shift; + break; + case SIOCSLAGGOPTS: + error = priv_check(td, PRIV_NET_LAGG); + if (error) + break; + if (ro->ro_opts == 0) + break; + /* + * Set options. LACP options are stored in sc->sc_psc, + * not in sc_opts. + */ + int valid, lacp; + + switch (ro->ro_opts) { + case LAGG_OPT_USE_FLOWID: + case -LAGG_OPT_USE_FLOWID: + case LAGG_OPT_FLOWIDSHIFT: + valid = 1; + lacp = 0; + break; + case LAGG_OPT_LACP_TXTEST: + case -LAGG_OPT_LACP_TXTEST: + case LAGG_OPT_LACP_RXTEST: + case -LAGG_OPT_LACP_RXTEST: + case LAGG_OPT_LACP_STRICT: + case -LAGG_OPT_LACP_STRICT: + valid = lacp = 1; + break; + default: + valid = lacp = 0; break; } - for (int i = 0; i < (sizeof(lagg_protos) / - sizeof(lagg_protos[0])); i++) { - if (lagg_protos[i].ti_proto == ra->ra_proto) { - if (sc->sc_ifflags & IFF_DEBUG) - printf("%s: using proto %u\n", - sc->sc_ifname, - lagg_protos[i].ti_proto); - sc->sc_proto = lagg_protos[i].ti_proto; - if (sc->sc_proto != LAGG_PROTO_NONE) - error = lagg_protos[i].ti_attach(sc); - LAGG_WUNLOCK(sc); - return (error); + + LAGG_WLOCK(sc); + if (valid == 0 || + (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) { + /* Invalid combination of options specified. */ + error = EINVAL; + LAGG_WUNLOCK(sc); + break; /* Return from SIOCSLAGGOPTS. */ + } + /* + * Store new options into sc->sc_opts except for + * FLOWIDSHIFT and LACP options. + */ + if (lacp == 0) { + if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT) + sc->flowid_shift = ro->ro_flowid_shift; + else if (ro->ro_opts > 0) + sc->sc_opts |= ro->ro_opts; + else + sc->sc_opts &= ~ro->ro_opts; + } else { + struct lacp_softc *lsc; + + lsc = (struct lacp_softc *)sc->sc_psc; + + switch (ro->ro_opts) { + case LAGG_OPT_LACP_TXTEST: + lsc->lsc_debug.lsc_tx_test = 1; + break; + case -LAGG_OPT_LACP_TXTEST: + lsc->lsc_debug.lsc_tx_test = 0; + break; + case LAGG_OPT_LACP_RXTEST: + lsc->lsc_debug.lsc_rx_test = 1; + break; + case -LAGG_OPT_LACP_RXTEST: + lsc->lsc_debug.lsc_rx_test = 0; + break; + case LAGG_OPT_LACP_STRICT: + lsc->lsc_strict_mode = 1; + break; + case -LAGG_OPT_LACP_STRICT: + lsc->lsc_strict_mode = 0; + break; } } + proto->ti_attach(sc); + LAGG_WLOCK(sc); + sc->sc_proto = proto->ti_proto; LAGG_WUNLOCK(sc); - error = EPROTONOSUPPORT; break; case SIOCGLAGGFLAGS: rf->rf_flags = sc->sc_flags; @@ -1130,6 +1216,26 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = EINVAL; break; } +#ifdef INET6 + /* + * A laggport interface should not have inet6 address + * because two interfaces with a valid link-local + * scope zone must not be merged in any form. This + * restriction is needed to prevent violation of + * link-local scope zone. Attempts to add a laggport + * interface which has inet6 addresses triggers + * removal of all inet6 addresses on the member + * interface. + */ + if (in6ifa_llaonifp(tpif)) { + in6_ifdetach(tpif); + if_printf(sc->sc_ifp, + "IPv6 addresses on %s have been removed " + "before adding it as a member to prevent " + "IPv6 address scope violation.\n", + tpif->if_xname); + } +#endif LAGG_WLOCK(sc); error = lagg_port_create(sc, tpif); LAGG_WUNLOCK(sc); @@ -1381,7 +1487,7 @@ lagg_input(struct ifnet *ifp, struct mbuf *m) ETHER_BPF_MTAP(scifp, m); - m = (*sc->sc_input)(sc, lp, m); + m = (lp->lp_detaching == 0) ? (*sc->sc_input)(sc, lp, m) : NULL; if (m != NULL) { counter_u64_add(sc->sc_ipackets, 1); @@ -1544,27 +1650,6 @@ lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf) return (mtod(m, char *) + off); } -static int -lagg_sysctl_active(SYSCTL_HANDLER_ARGS) -{ - struct lagg_softc *sc = (struct lagg_softc *)arg1; - struct lagg_port *lp; - int error; - - /* LACP tracks active links automatically, the others do not */ - if (sc->sc_proto != LAGG_PROTO_LACP) { - sc->sc_active = 0; - SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) - sc->sc_active += LAGG_PORTACTIVE(lp); - } - - error = sysctl_handle_int(oidp, &sc->sc_active, 0, req); - if ((error) || (req->newptr == NULL)) - return (error); - - return (0); -} - uint32_t lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key) { @@ -1677,18 +1762,16 @@ lagg_enqueue(struct ifnet *ifp, struct mbuf *m) /* * Simple round robin aggregation */ - -static int +static void lagg_rr_attach(struct lagg_softc *sc) { sc->sc_detach = lagg_rr_detach; sc->sc_start = lagg_rr_start; sc->sc_input = lagg_rr_input; + sc->sc_detach = NULL; sc->sc_port_create = NULL; sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX; sc->sc_seq = 0; - - return (0); } static int @@ -1736,8 +1819,7 @@ lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) /* * Active failover */ - -static int +static void lagg_fail_attach(struct lagg_softc *sc) { sc->sc_detach = lagg_fail_detach; @@ -1745,8 +1827,7 @@ lagg_fail_attach(struct lagg_softc *sc) sc->sc_input = lagg_fail_input; sc->sc_port_create = NULL; sc->sc_port_destroy = NULL; - - return (0); + sc->sc_detach = NULL; } static int @@ -1776,7 +1857,7 @@ lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) struct ifnet *ifp = sc->sc_ifp; struct lagg_port *tmp_tp; - if (lp == sc->sc_primary || lagg_failover_rx_all) { + if (lp == sc->sc_primary || V_lagg_failover_rx_all) { m->m_pkthdr.rcvif = ifp; return (m); } @@ -1800,16 +1881,13 @@ lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) /* * Loadbalancing */ - -static int +static void lagg_lb_attach(struct lagg_softc *sc) { struct lagg_port *lp; struct lagg_lb *lb; - if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb), - M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) - return (ENOMEM); + lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO); sc->sc_detach = lagg_lb_detach; sc->sc_start = lagg_lb_start; @@ -1823,14 +1901,13 @@ lagg_lb_attach(struct lagg_softc *sc) SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lagg_lb_port_create(lp); - - return (0); } static int lagg_lb_detach(struct lagg_softc *sc) { struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; + LAGG_WUNLOCK(sc); if (lb != NULL) free(lb, M_DEVBUF); return (0); @@ -1879,7 +1956,7 @@ lagg_lb_start(struct lagg_softc *sc, struct mbuf *m) struct lagg_port *lp = NULL; uint32_t p = 0; - if (sc->use_flowid && + if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) p = m->m_pkthdr.flowid >> sc->flowid_shift; else @@ -1914,12 +1991,10 @@ lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) /* * 802.3ad LACP */ - -static int +static void lagg_lacp_attach(struct lagg_softc *sc) { struct lagg_port *lp; - int error; sc->sc_detach = lagg_lacp_detach; sc->sc_port_create = lacp_port_create; @@ -1933,31 +2008,28 @@ lagg_lacp_attach(struct lagg_softc *sc) sc->sc_req = lacp_req; sc->sc_portreq = lacp_portreq; - error = lacp_attach(sc); - if (error) - return (error); + lacp_attach(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_create(lp); - - return (error); } static int lagg_lacp_detach(struct lagg_softc *sc) { struct lagg_port *lp; - int error; + void *psc; SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_destroy(lp); - /* unlocking is safe here */ + psc = sc->sc_psc; + sc->sc_psc = NULL; LAGG_WUNLOCK(sc); - error = lacp_detach(sc); - LAGG_WLOCK(sc); - return (error); + lacp_detach(psc); + + return (0); } static void diff --git a/sys/net/if_lagg.h b/sys/net/if_lagg.h index ff1ae2f..34f5f6e 100644 --- a/sys/net/if_lagg.h +++ b/sys/net/if_lagg.h @@ -47,17 +47,19 @@ "\05DISTRIBUTING\06DISABLED" /* Supported lagg PROTOs */ -#define LAGG_PROTO_NONE 0 /* no lagg protocol defined */ -#define LAGG_PROTO_ROUNDROBIN 1 /* simple round robin */ -#define LAGG_PROTO_FAILOVER 2 /* active failover */ -#define LAGG_PROTO_LOADBALANCE 3 /* loadbalance */ -#define LAGG_PROTO_LACP 4 /* 802.3ad lacp */ -#define LAGG_PROTO_ETHERCHANNEL 5 /* Cisco FEC */ -#define LAGG_PROTO_MAX 6 +typedef enum { + LAGG_PROTO_NONE = 0, /* no lagg protocol defined */ + LAGG_PROTO_ROUNDROBIN, /* simple round robin */ + LAGG_PROTO_FAILOVER, /* active failover */ + LAGG_PROTO_LOADBALANCE, /* loadbalance */ + LAGG_PROTO_LACP, /* 802.3ad lacp */ + LAGG_PROTO_ETHERCHANNEL,/* Cisco FEC */ + LAGG_PROTO_MAX, +} lagg_proto; struct lagg_protos { const char *lpr_name; - int lpr_proto; + lagg_proto lpr_proto; }; #define LAGG_PROTO_DEFAULT LAGG_PROTO_FAILOVER @@ -134,6 +136,30 @@ struct lagg_reqflags { #define SIOCGLAGGFLAGS _IOWR('i', 145, struct lagg_reqflags) #define SIOCSLAGGHASH _IOW('i', 146, struct lagg_reqflags) +struct lagg_reqopts { + char ro_ifname[IFNAMSIZ]; /* name of the lagg */ + + int ro_opts; /* Option bitmap */ +#define LAGG_OPT_NONE 0x00 +#define LAGG_OPT_USE_FLOWID 0x01 /* use M_FLOWID */ +/* Pseudo flags which are used in ro_opts but not stored into sc_opts. */ +#define LAGG_OPT_FLOWIDSHIFT 0x02 /* Set flowid */ +#define LAGG_OPT_FLOWIDSHIFT_MASK 0x1f /* flowid is uint32_t */ +#define LAGG_OPT_LACP_STRICT 0x10 /* LACP strict mode */ +#define LAGG_OPT_LACP_TXTEST 0x20 /* LACP debug: txtest */ +#define LAGG_OPT_LACP_RXTEST 0x40 /* LACP debug: rxtest */ + u_int ro_count; /* number of ports */ + u_int ro_active; /* active port count */ + u_int ro_flapping; /* number of flapping */ + int ro_flowid_shift; /* shift the flowid */ +}; + +#define SIOCGLAGGOPTS _IOWR('i', 152, struct lagg_reqopts) +#define SIOCSLAGGOPTS _IOW('i', 153, struct lagg_reqopts) + +#define LAGG_OPT_BITS "\020\001USE_FLOWID\005LACP_STRICT" \ + "\006LACP_TXTEST\007LACP_RXTEST" + #ifdef _KERNEL #include <sys/counter.h> @@ -183,6 +209,7 @@ struct lagg_mc { struct lagg_llq { struct ifnet *llq_ifp; uint8_t llq_lladdr[ETHER_ADDR_LEN]; + uint8_t llq_primary; SLIST_ENTRY(lagg_llq) llq_entries; }; @@ -229,9 +256,7 @@ struct lagg_softc { eventhandler_tag vlan_attach; eventhandler_tag vlan_detach; struct callout sc_callout; - struct sysctl_ctx_list ctx; /* sysctl variables */ - struct sysctl_oid *sc_oid; /* sysctl tree oid */ - int use_flowid; /* enable use of flowid */ + u_int sc_opts; int flowid_shift; /* set flowid shift*/ }; |