diff options
Diffstat (limited to 'drivers/net/bonding')
-rw-r--r-- | drivers/net/bonding/bond_3ad.c | 123 | ||||
-rw-r--r-- | drivers/net/bonding/bond_alb.c | 3 | ||||
-rw-r--r-- | drivers/net/bonding/bond_ipv6.c | 7 | ||||
-rw-r--r-- | drivers/net/bonding/bond_main.c | 316 | ||||
-rw-r--r-- | drivers/net/bonding/bond_sysfs.c | 92 | ||||
-rw-r--r-- | drivers/net/bonding/bonding.h | 35 |
6 files changed, 334 insertions, 242 deletions
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index c3fa31c..d69e683 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -446,6 +446,48 @@ static u16 __ad_timer_to_ticks(u16 timer_type, u16 par) ///////////////////////////////////////////////////////////////////////////////// /** + * __choose_matched - update a port's matched variable from a received lacpdu + * @lacpdu: the lacpdu we've received + * @port: the port we're looking at + * + * Update the value of the matched variable, using parameter values from a + * newly received lacpdu. Parameter values for the partner carried in the + * received PDU are compared with the corresponding operational parameter + * values for the actor. Matched is set to TRUE if all of these parameters + * match and the PDU parameter partner_state.aggregation has the same value as + * actor_oper_port_state.aggregation and lacp will actively maintain the link + * in the aggregation. Matched is also set to TRUE if the value of + * actor_state.aggregation in the received PDU is set to FALSE, i.e., indicates + * an individual link and lacp will actively maintain the link. Otherwise, + * matched is set to FALSE. LACP is considered to be actively maintaining the + * link if either the PDU's actor_state.lacp_activity variable is TRUE or both + * the actor's actor_oper_port_state.lacp_activity and the PDU's + * partner_state.lacp_activity variables are TRUE. + * + * Note: the AD_PORT_MATCHED "variable" is not specified by 802.3ad; it is + * used here to implement the language from 802.3ad 43.4.9 that requires + * recordPDU to "match" the LACPDU parameters to the stored values. + */ +static void __choose_matched(struct lacpdu *lacpdu, struct port *port) +{ + // check if all parameters are alike + if (((ntohs(lacpdu->partner_port) == port->actor_port_number) && + (ntohs(lacpdu->partner_port_priority) == port->actor_port_priority) && + !MAC_ADDRESS_COMPARE(&(lacpdu->partner_system), &(port->actor_system)) && + (ntohs(lacpdu->partner_system_priority) == port->actor_system_priority) && + (ntohs(lacpdu->partner_key) == port->actor_oper_port_key) && + ((lacpdu->partner_state & AD_STATE_AGGREGATION) == (port->actor_oper_port_state & AD_STATE_AGGREGATION))) || + // or this is individual link(aggregation == FALSE) + ((lacpdu->actor_state & AD_STATE_AGGREGATION) == 0) + ) { + // update the state machine Matched variable + port->sm_vars |= AD_PORT_MATCHED; + } else { + port->sm_vars &= ~AD_PORT_MATCHED; + } +} + +/** * __record_pdu - record parameters from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at @@ -459,6 +501,7 @@ static void __record_pdu(struct lacpdu *lacpdu, struct port *port) if (lacpdu && port) { struct port_params *partner = &port->partner_oper; + __choose_matched(lacpdu, port); // record the new parameter values for the partner operational partner->port_number = ntohs(lacpdu->actor_port); partner->port_priority = ntohs(lacpdu->actor_port_priority); @@ -518,12 +561,12 @@ static void __update_selected(struct lacpdu *lacpdu, struct port *port) const struct port_params *partner = &port->partner_oper; // check if any parameter is different - if (ntohs(lacpdu->actor_port) != partner->port_number - || ntohs(lacpdu->actor_port_priority) != partner->port_priority - || MAC_ADDRESS_COMPARE(&lacpdu->actor_system, &partner->system) - || ntohs(lacpdu->actor_system_priority) != partner->system_priority - || ntohs(lacpdu->actor_key) != partner->key - || (lacpdu->actor_state & AD_STATE_AGGREGATION) != (partner->port_state & AD_STATE_AGGREGATION)) { + if (ntohs(lacpdu->actor_port) != partner->port_number || + ntohs(lacpdu->actor_port_priority) != partner->port_priority || + MAC_ADDRESS_COMPARE(&lacpdu->actor_system, &partner->system) || + ntohs(lacpdu->actor_system_priority) != partner->system_priority || + ntohs(lacpdu->actor_key) != partner->key || + (lacpdu->actor_state & AD_STATE_AGGREGATION) != (partner->port_state & AD_STATE_AGGREGATION)) { // update the state machine Selected variable port->sm_vars &= ~AD_PORT_SELECTED; } @@ -549,12 +592,12 @@ static void __update_default_selected(struct port *port) const struct port_params *oper = &port->partner_oper; // check if any parameter is different - if (admin->port_number != oper->port_number - || admin->port_priority != oper->port_priority - || MAC_ADDRESS_COMPARE(&admin->system, &oper->system) - || admin->system_priority != oper->system_priority - || admin->key != oper->key - || (admin->port_state & AD_STATE_AGGREGATION) + if (admin->port_number != oper->port_number || + admin->port_priority != oper->port_priority || + MAC_ADDRESS_COMPARE(&admin->system, &oper->system) || + admin->system_priority != oper->system_priority || + admin->key != oper->key || + (admin->port_state & AD_STATE_AGGREGATION) != (oper->port_state & AD_STATE_AGGREGATION)) { // update the state machine Selected variable port->sm_vars &= ~AD_PORT_SELECTED; @@ -563,47 +606,6 @@ static void __update_default_selected(struct port *port) } /** - * __choose_matched - update a port's matched variable from a received lacpdu - * @lacpdu: the lacpdu we've received - * @port: the port we're looking at - * - * Update the value of the matched variable, using parameter values from a - * newly received lacpdu. Parameter values for the partner carried in the - * received PDU are compared with the corresponding operational parameter - * values for the actor. Matched is set to TRUE if all of these parameters - * match and the PDU parameter partner_state.aggregation has the same value as - * actor_oper_port_state.aggregation and lacp will actively maintain the link - * in the aggregation. Matched is also set to TRUE if the value of - * actor_state.aggregation in the received PDU is set to FALSE, i.e., indicates - * an individual link and lacp will actively maintain the link. Otherwise, - * matched is set to FALSE. LACP is considered to be actively maintaining the - * link if either the PDU's actor_state.lacp_activity variable is TRUE or both - * the actor's actor_oper_port_state.lacp_activity and the PDU's - * partner_state.lacp_activity variables are TRUE. - */ -static void __choose_matched(struct lacpdu *lacpdu, struct port *port) -{ - // validate lacpdu and port - if (lacpdu && port) { - // check if all parameters are alike - if (((ntohs(lacpdu->partner_port) == port->actor_port_number) && - (ntohs(lacpdu->partner_port_priority) == port->actor_port_priority) && - !MAC_ADDRESS_COMPARE(&(lacpdu->partner_system), &(port->actor_system)) && - (ntohs(lacpdu->partner_system_priority) == port->actor_system_priority) && - (ntohs(lacpdu->partner_key) == port->actor_oper_port_key) && - ((lacpdu->partner_state & AD_STATE_AGGREGATION) == (port->actor_oper_port_state & AD_STATE_AGGREGATION))) || - // or this is individual link(aggregation == FALSE) - ((lacpdu->actor_state & AD_STATE_AGGREGATION) == 0) - ) { - // update the state machine Matched variable - port->sm_vars |= AD_PORT_MATCHED; - } else { - port->sm_vars &= ~AD_PORT_MATCHED; - } - } -} - -/** * __update_ntt - update a port's ntt variable from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at @@ -1134,7 +1136,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) __update_selected(lacpdu, port); __update_ntt(lacpdu, port); __record_pdu(lacpdu, port); - __choose_matched(lacpdu, port); port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(port->actor_oper_port_state & AD_STATE_LACP_TIMEOUT)); port->actor_oper_port_state &= ~AD_STATE_EXPIRED; // verify that if the aggregator is enabled, the port is enabled too. @@ -1956,7 +1957,7 @@ void bond_3ad_unbind_slave(struct slave *slave) struct port *port, *prev_port, *temp_port; struct aggregator *aggregator, *new_aggregator, *temp_aggregator; int select_new_active_agg = 0; - + // find the aggregator related to this slave aggregator = &(SLAVE_AD_INFO(slave).aggregator); @@ -2024,7 +2025,7 @@ void bond_3ad_unbind_slave(struct slave *slave) // clear the aggregator ad_clear_agg(aggregator); - + if (select_new_active_agg) { ad_agg_selection_logic(__get_first_agg(port)); } @@ -2075,7 +2076,7 @@ void bond_3ad_unbind_slave(struct slave *slave) } } } - port->slave=NULL; + port->slave=NULL; } /** @@ -2301,7 +2302,7 @@ void bond_3ad_handle_link_change(struct slave *slave, char link) } /* - * set link state for bonding master: if we have an active + * set link state for bonding master: if we have an active * aggregator, we're up, if not, we're down. Presumes that we cannot * have an active aggregator if there are no slaves with link up. * @@ -2395,7 +2396,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) goto out; } - slave_agg_no = bond->xmit_hash_policy(skb, dev, slaves_in_agg); + slave_agg_no = bond->xmit_hash_policy(skb, slaves_in_agg); bond_for_each_slave(bond, slave, i) { struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator; @@ -2445,9 +2446,6 @@ int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct pac struct slave *slave = NULL; int ret = NET_RX_DROP; - if (dev_net(dev) != &init_net) - goto out; - if (!(dev->flags & IFF_MASTER)) goto out; @@ -2468,4 +2466,3 @@ out: return ret; } - diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 9b5936f..0d30d1e 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -355,9 +355,6 @@ static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct struct arp_pkt *arp = (struct arp_pkt *)skb->data; int res = NET_RX_DROP; - if (dev_net(bond_dev) != &init_net) - goto out; - while (bond_dev->priv_flags & IFF_802_1Q_VLAN) bond_dev = vlan_dev_real_dev(bond_dev); diff --git a/drivers/net/bonding/bond_ipv6.c b/drivers/net/bonding/bond_ipv6.c index 83921ab..b72e1dc 100644 --- a/drivers/net/bonding/bond_ipv6.c +++ b/drivers/net/bonding/bond_ipv6.c @@ -25,6 +25,7 @@ #include <net/ipv6.h> #include <net/ndisc.h> #include <net/addrconf.h> +#include <net/netns/generic.h> #include "bonding.h" /* @@ -152,11 +153,9 @@ static int bond_inet6addr_event(struct notifier_block *this, struct net_device *vlan_dev, *event_dev = ifa->idev->dev; struct bonding *bond; struct vlan_entry *vlan; + struct bond_net *bn = net_generic(dev_net(event_dev), bond_net_id); - if (dev_net(event_dev) != &init_net) - return NOTIFY_DONE; - - list_for_each_entry(bond, &bond_dev_list, bond_list) { + list_for_each_entry(bond, &bn->dev_list, bond_list) { if (bond->dev == event_dev) { switch (event) { case NETDEV_UP: diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 40fb5ee..af9b9c4 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -75,6 +75,7 @@ #include <linux/jiffies.h> #include <net/route.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include "bonding.h" #include "bond_3ad.h" #include "bond_alb.h" @@ -94,6 +95,7 @@ static int downdelay; static int use_carrier = 1; static char *mode; static char *primary; +static char *primary_reselect; static char *lacp_rate; static char *ad_select; static char *xmit_hash_policy; @@ -126,6 +128,14 @@ MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, " "6 for balance-alb"); module_param(primary, charp, 0); MODULE_PARM_DESC(primary, "Primary network device to use"); +module_param(primary_reselect, charp, 0); +MODULE_PARM_DESC(primary_reselect, "Reselect primary slave " + "once it comes up; " + "0 for always (default), " + "1 for only if speed of primary is " + "better, " + "2 for only on active slave " + "failure"); module_param(lacp_rate, charp, 0); MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner " "(slow/fast)"); @@ -148,11 +158,7 @@ MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the static const char * const version = DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n"; -LIST_HEAD(bond_dev_list); - -#ifdef CONFIG_PROC_FS -static struct proc_dir_entry *bond_proc_dir; -#endif +int bond_net_id __read_mostly; static __be32 arp_target[BOND_MAX_ARP_TARGETS]; static int arp_ip_count; @@ -200,6 +206,13 @@ const struct bond_parm_tbl fail_over_mac_tbl[] = { { NULL, -1}, }; +const struct bond_parm_tbl pri_reselect_tbl[] = { +{ "always", BOND_PRI_RESELECT_ALWAYS}, +{ "better", BOND_PRI_RESELECT_BETTER}, +{ "failure", BOND_PRI_RESELECT_FAILURE}, +{ NULL, -1}, +}; + struct bond_parm_tbl ad_select_tbl[] = { { "stable", BOND_AD_STABLE}, { "bandwidth", BOND_AD_BANDWIDTH}, @@ -211,7 +224,7 @@ struct bond_parm_tbl ad_select_tbl[] = { static void bond_send_gratuitous_arp(struct bonding *bond); static int bond_init(struct net_device *bond_dev); -static void bond_deinit(struct net_device *bond_dev); +static void bond_uninit(struct net_device *bond_dev); /*---------------------------- General routines -----------------------------*/ @@ -1070,6 +1083,25 @@ out: } +static bool bond_should_change_active(struct bonding *bond) +{ + struct slave *prim = bond->primary_slave; + struct slave *curr = bond->curr_active_slave; + + if (!prim || !curr || curr->link != BOND_LINK_UP) + return true; + if (bond->force_primary) { + bond->force_primary = false; + return true; + } + if (bond->params.primary_reselect == BOND_PRI_RESELECT_BETTER && + (prim->speed < curr->speed || + (prim->speed == curr->speed && prim->duplex <= curr->duplex))) + return false; + if (bond->params.primary_reselect == BOND_PRI_RESELECT_FAILURE) + return false; + return true; +} /** * find_best_interface - select the best available slave to be the active one @@ -1084,7 +1116,7 @@ static struct slave *bond_find_best_slave(struct bonding *bond) int mintime = bond->params.updelay; int i; - new_active = old_active = bond->curr_active_slave; + new_active = bond->curr_active_slave; if (!new_active) { /* there were no active slaves left */ if (bond->slave_cnt > 0) /* found one slave */ @@ -1094,7 +1126,8 @@ static struct slave *bond_find_best_slave(struct bonding *bond) } if ((bond->primary_slave) && - bond->primary_slave->link == BOND_LINK_UP) { + bond->primary_slave->link == BOND_LINK_UP && + bond_should_change_active(bond)) { new_active = bond->primary_slave; } @@ -1678,8 +1711,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { /* if there is a primary slave, remember it */ - if (strcmp(bond->params.primary, new_slave->dev->name) == 0) + if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { bond->primary_slave = new_slave; + bond->force_primary = true; + } } write_lock_bh(&bond->curr_slave_lock); @@ -1817,8 +1852,8 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) } if (!bond->params.fail_over_mac) { - if (!compare_ether_addr(bond_dev->dev_addr, slave->perm_hwaddr) - && bond->slave_cnt > 1) + if (!compare_ether_addr(bond_dev->dev_addr, slave->perm_hwaddr) && + bond->slave_cnt > 1) pr_warning(DRV_NAME ": %s: Warning: the permanent HWaddr of %s - " "%pM - is still in use by %s. " @@ -1965,25 +2000,6 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) } /* -* Destroy a bonding device. -* Must be under rtnl_lock when this function is called. -*/ -static void bond_uninit(struct net_device *bond_dev) -{ - struct bonding *bond = netdev_priv(bond_dev); - - bond_deinit(bond_dev); - bond_destroy_sysfs_entry(bond); - - if (bond->wq) - destroy_workqueue(bond->wq); - - netif_addr_lock_bh(bond_dev); - bond_mc_list_destroy(bond); - netif_addr_unlock_bh(bond_dev); -} - -/* * First release a slave and than destroy the bond if no more slaves are left. * Must be under rtnl_lock when this function is called. */ @@ -2567,7 +2583,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) fl.fl4_dst = targets[i]; fl.fl4_tos = RTO_ONLINK; - rv = ip_route_output_key(&init_net, &rt, &fl); + rv = ip_route_output_key(dev_net(bond->dev), &rt, &fl); if (rv) { if (net_ratelimit()) { pr_warning(DRV_NAME @@ -2675,9 +2691,6 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack unsigned char *arp_ptr; __be32 sip, tip; - if (dev_net(dev) != &init_net) - goto out; - if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER)) goto out; @@ -3201,11 +3214,14 @@ static void bond_info_show_master(struct seq_file *seq) } if (USES_PRIMARY(bond->params.mode)) { - seq_printf(seq, "Primary Slave: %s\n", + seq_printf(seq, "Primary Slave: %s", (bond->primary_slave) ? bond->primary_slave->dev->name : "None"); + if (bond->primary_slave) + seq_printf(seq, " (primary_reselect %s)", + pri_reselect_tbl[bond->params.primary_reselect].modename); - seq_printf(seq, "Currently Active Slave: %s\n", + seq_printf(seq, "\nCurrently Active Slave: %s\n", (curr) ? curr->dev->name : "None"); } @@ -3334,13 +3350,14 @@ static const struct file_operations bond_info_fops = { .release = seq_release, }; -static int bond_create_proc_entry(struct bonding *bond) +static void bond_create_proc_entry(struct bonding *bond) { struct net_device *bond_dev = bond->dev; + struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); - if (bond_proc_dir) { + if (bn->proc_dir) { bond->proc_entry = proc_create_data(bond_dev->name, - S_IRUGO, bond_proc_dir, + S_IRUGO, bn->proc_dir, &bond_info_fops, bond); if (bond->proc_entry == NULL) pr_warning(DRV_NAME @@ -3349,14 +3366,15 @@ static int bond_create_proc_entry(struct bonding *bond) else memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); } - - return 0; } static void bond_remove_proc_entry(struct bonding *bond) { - if (bond_proc_dir && bond->proc_entry) { - remove_proc_entry(bond->proc_file_name, bond_proc_dir); + struct net_device *bond_dev = bond->dev; + struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); + + if (bn->proc_dir && bond->proc_entry) { + remove_proc_entry(bond->proc_file_name, bn->proc_dir); memset(bond->proc_file_name, 0, IFNAMSIZ); bond->proc_entry = NULL; } @@ -3365,11 +3383,11 @@ static void bond_remove_proc_entry(struct bonding *bond) /* Create the bonding directory under /proc/net, if doesn't exist yet. * Caller must hold rtnl_lock. */ -static void bond_create_proc_dir(void) +static void bond_create_proc_dir(struct bond_net *bn) { - if (!bond_proc_dir) { - bond_proc_dir = proc_mkdir(DRV_NAME, init_net.proc_net); - if (!bond_proc_dir) + if (!bn->proc_dir) { + bn->proc_dir = proc_mkdir(DRV_NAME, bn->net->proc_net); + if (!bn->proc_dir) pr_warning(DRV_NAME ": Warning: cannot create /proc/net/%s\n", DRV_NAME); @@ -3379,17 +3397,17 @@ static void bond_create_proc_dir(void) /* Destroy the bonding directory under /proc/net, if empty. * Caller must hold rtnl_lock. */ -static void bond_destroy_proc_dir(void) +static void bond_destroy_proc_dir(struct bond_net *bn) { - if (bond_proc_dir) { - remove_proc_entry(DRV_NAME, init_net.proc_net); - bond_proc_dir = NULL; + if (bn->proc_dir) { + remove_proc_entry(DRV_NAME, bn->net->proc_net); + bn->proc_dir = NULL; } } #else /* !CONFIG_PROC_FS */ -static int bond_create_proc_entry(struct bonding *bond) +static void bond_create_proc_entry(struct bonding *bond) { } @@ -3397,11 +3415,11 @@ static void bond_remove_proc_entry(struct bonding *bond) { } -static void bond_create_proc_dir(void) +static void bond_create_proc_dir(struct bond_net *bn) { } -static void bond_destroy_proc_dir(void) +static void bond_destroy_proc_dir(struct bond_net *bn) { } @@ -3418,9 +3436,6 @@ static int bond_event_changename(struct bonding *bond) bond_remove_proc_entry(bond); bond_create_proc_entry(bond); - bond_destroy_sysfs_entry(bond); - bond_create_sysfs_entry(bond); - return NOTIFY_DONE; } @@ -3432,9 +3447,6 @@ static int bond_master_netdev_event(unsigned long event, switch (event) { case NETDEV_CHANGENAME: return bond_event_changename(event_bond); - case NETDEV_UNREGISTER: - bond_release_all(event_bond->dev); - break; default: break; } @@ -3526,9 +3538,6 @@ static int bond_netdev_event(struct notifier_block *this, { struct net_device *event_dev = (struct net_device *)ptr; - if (dev_net(event_dev) != &init_net) - return NOTIFY_DONE; - pr_debug("event_dev: %s, event: %lx\n", (event_dev ? event_dev->name : "None"), event); @@ -3561,13 +3570,11 @@ static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, { struct in_ifaddr *ifa = ptr; struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev; + struct bond_net *bn = net_generic(dev_net(event_dev), bond_net_id); struct bonding *bond; struct vlan_entry *vlan; - if (dev_net(ifa->ifa_dev->dev) != &init_net) - return NOTIFY_DONE; - - list_for_each_entry(bond, &bond_dev_list, bond_list) { + list_for_each_entry(bond, &bn->dev_list, bond_list) { if (bond->dev == event_dev) { switch (event) { case NETDEV_UP: @@ -3657,8 +3664,7 @@ void bond_unregister_arp(struct bonding *bond) * Hash for the output device based upon layer 2 and layer 3 data. If * the packet is not IP mimic bond_xmit_hash_policy_l2() */ -static int bond_xmit_hash_policy_l23(struct sk_buff *skb, - struct net_device *bond_dev, int count) +static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) { struct ethhdr *data = (struct ethhdr *)skb->data; struct iphdr *iph = ip_hdr(skb); @@ -3676,8 +3682,7 @@ static int bond_xmit_hash_policy_l23(struct sk_buff *skb, * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is * altogether not IP, mimic bond_xmit_hash_policy_l2() */ -static int bond_xmit_hash_policy_l34(struct sk_buff *skb, - struct net_device *bond_dev, int count) +static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count) { struct ethhdr *data = (struct ethhdr *)skb->data; struct iphdr *iph = ip_hdr(skb); @@ -3701,8 +3706,7 @@ static int bond_xmit_hash_policy_l34(struct sk_buff *skb, /* * Hash for the output device based upon layer 2 data */ -static int bond_xmit_hash_policy_l2(struct sk_buff *skb, - struct net_device *bond_dev, int count) +static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count) { struct ethhdr *data = (struct ethhdr *)skb->data; @@ -3939,7 +3943,7 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd if (!capable(CAP_NET_ADMIN)) return -EPERM; - slave_dev = dev_get_by_name(&init_net, ifr->ifr_slave); + slave_dev = dev_get_by_name(dev_net(bond_dev), ifr->ifr_slave); pr_debug("slave_dev=%p: \n", slave_dev); @@ -4295,7 +4299,7 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) if (!BOND_IS_OK(bond)) goto out; - slave_no = bond->xmit_hash_policy(skb, bond_dev, bond->slave_cnt); + slave_no = bond->xmit_hash_policy(skb, bond->slave_cnt); bond_for_each_slave(bond, slave, i) { slave_no--; @@ -4576,37 +4580,29 @@ static void bond_work_cancel_all(struct bonding *bond) cancel_delayed_work(&bond->ad_work); } -/* De-initialize device specific data. - * Caller must hold rtnl_lock. - */ -static void bond_deinit(struct net_device *bond_dev) +/* +* Destroy a bonding device. +* Must be under rtnl_lock when this function is called. +*/ +static void bond_uninit(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); + /* Release the bonded slaves */ + bond_release_all(bond_dev); + list_del(&bond->bond_list); bond_work_cancel_all(bond); bond_remove_proc_entry(bond); -} - -/* Unregister and free all bond devices. - * Caller must hold rtnl_lock. - */ -static void bond_free_all(void) -{ - struct bonding *bond, *nxt; - list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) { - struct net_device *bond_dev = bond->dev; - - bond_work_cancel_all(bond); - /* Release the bonded slaves */ - bond_release_all(bond_dev); - unregister_netdevice(bond_dev); - } + if (bond->wq) + destroy_workqueue(bond->wq); - bond_destroy_proc_dir(); + netif_addr_lock_bh(bond_dev); + bond_mc_list_destroy(bond); + netif_addr_unlock_bh(bond_dev); } /*------------------------- Module initialization ---------------------------*/ @@ -4646,7 +4642,7 @@ int bond_parse_parm(const char *buf, const struct bond_parm_tbl *tbl) static int bond_check_params(struct bond_params *params) { - int arp_validate_value, fail_over_mac_value; + int arp_validate_value, fail_over_mac_value, primary_reselect_value; /* * Convert string parameters. @@ -4665,7 +4661,8 @@ static int bond_check_params(struct bond_params *params) if ((bond_mode != BOND_MODE_XOR) && (bond_mode != BOND_MODE_8023AD)) { pr_info(DRV_NAME - ": xor_mode param is irrelevant in mode %s\n", + ": xmit_hash_policy param is irrelevant in" + " mode %s\n", bond_mode_name(bond_mode)); } else { xmit_hashtype = bond_parse_parm(xmit_hash_policy, @@ -4945,6 +4942,20 @@ static int bond_check_params(struct bond_params *params) primary = NULL; } + if (primary && primary_reselect) { + primary_reselect_value = bond_parse_parm(primary_reselect, + pri_reselect_tbl); + if (primary_reselect_value == -1) { + pr_err(DRV_NAME + ": Error: Invalid primary_reselect \"%s\"\n", + primary_reselect == + NULL ? "NULL" : primary_reselect); + return -EINVAL; + } + } else { + primary_reselect_value = BOND_PRI_RESELECT_ALWAYS; + } + if (fail_over_mac) { fail_over_mac_value = bond_parse_parm(fail_over_mac, fail_over_mac_tbl); @@ -4976,6 +4987,7 @@ static int bond_check_params(struct bond_params *params) params->use_carrier = use_carrier; params->lacp_fast = lacp_fast; params->primary[0] = 0; + params->primary_reselect = primary_reselect_value; params->fail_over_mac = fail_over_mac_value; if (primary) { @@ -5012,6 +5024,7 @@ static void bond_set_lockdep_class(struct net_device *dev) static int bond_init(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); + struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); pr_debug("Begin bond_init for %s\n", bond_dev->name); @@ -5024,30 +5037,41 @@ static int bond_init(struct net_device *bond_dev) netif_carrier_off(bond_dev); bond_create_proc_entry(bond); - list_add_tail(&bond->bond_list, &bond_dev_list); + list_add_tail(&bond->bond_list, &bn->dev_list); + bond_prepare_sysfs_group(bond); + return 0; +} + +static int bond_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } return 0; } +static struct rtnl_link_ops bond_link_ops __read_mostly = { + .kind = "bond", + .priv_size = sizeof(struct bonding), + .setup = bond_setup, + .validate = bond_validate, +}; + /* Create a new bond based on the specified name and bonding parameters. * If name is NULL, obtain a suitable "bond%d" name for us. * Caller must NOT hold rtnl_lock; we need to release it here before we * set up our sysfs entries. */ -int bond_create(const char *name) +int bond_create(struct net *net, const char *name) { struct net_device *bond_dev; int res; rtnl_lock(); - /* Check to see if the bond already exists. */ - /* FIXME: pass netns from caller */ - if (name && __dev_get_by_name(&init_net, name)) { - pr_err(DRV_NAME ": cannot add bond %s; already exists\n", - name); - res = -EEXIST; - goto out_rtnl; - } bond_dev = alloc_netdev(sizeof(struct bonding), name ? name : "", bond_setup); @@ -5055,9 +5079,12 @@ int bond_create(const char *name) pr_err(DRV_NAME ": %s: eek! can't alloc netdev!\n", name); res = -ENOMEM; - goto out_rtnl; + goto out; } + dev_net_set(bond_dev, net); + bond_dev->rtnl_link_ops = &bond_link_ops; + if (!name) { res = dev_alloc_name(bond_dev, "bond%d"); if (res < 0) @@ -5065,27 +5092,41 @@ int bond_create(const char *name) } res = register_netdevice(bond_dev); - if (res < 0) - goto out_bond; - - res = bond_create_sysfs_entry(netdev_priv(bond_dev)); - if (res < 0) - goto out_unreg; +out: rtnl_unlock(); - return 0; - -out_unreg: - unregister_netdevice(bond_dev); -out_bond: - bond_deinit(bond_dev); + return res; out_netdev: free_netdev(bond_dev); -out_rtnl: - rtnl_unlock(); - return res; + goto out; +} + +static int bond_net_init(struct net *net) +{ + struct bond_net *bn = net_generic(net, bond_net_id); + + bn->net = net; + INIT_LIST_HEAD(&bn->dev_list); + + bond_create_proc_dir(bn); + + return 0; } +static void bond_net_exit(struct net *net) +{ + struct bond_net *bn = net_generic(net, bond_net_id); + + bond_destroy_proc_dir(bn); +} + +static struct pernet_operations bond_net_ops = { + .init = bond_net_init, + .exit = bond_net_exit, + .id = &bond_net_id, + .size = sizeof(struct bond_net), +}; + static int __init bonding_init(void) { int i; @@ -5097,10 +5138,16 @@ static int __init bonding_init(void) if (res) goto out; - bond_create_proc_dir(); + res = register_pernet_subsys(&bond_net_ops); + if (res) + goto out; + + res = rtnl_link_register(&bond_link_ops); + if (res) + goto err_link; for (i = 0; i < max_bonds; i++) { - res = bond_create(NULL); + res = bond_create(&init_net, NULL); if (res) goto err; } @@ -5112,14 +5159,13 @@ static int __init bonding_init(void) register_netdevice_notifier(&bond_netdev_notifier); register_inetaddr_notifier(&bond_inetaddr_notifier); bond_register_ipv6_notifier(); - - goto out; -err: - rtnl_lock(); - bond_free_all(); - rtnl_unlock(); out: return res; +err: + rtnl_link_unregister(&bond_link_ops); +err_link: + unregister_pernet_subsys(&bond_net_ops); + goto out; } @@ -5131,9 +5177,8 @@ static void __exit bonding_exit(void) bond_destroy_sysfs(); - rtnl_lock(); - bond_free_all(); - rtnl_unlock(); + rtnl_link_unregister(&bond_link_ops); + unregister_pernet_subsys(&bond_net_ops); } module_init(bonding_init); @@ -5142,3 +5187,4 @@ MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION); MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); +MODULE_ALIAS_RTNL_LINK("bond"); diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 8762a27..4e00b4f 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -36,6 +36,8 @@ #include <linux/rtnetlink.h> #include <linux/etherdevice.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <linux/nsproxy.h> #include "bonding.h" @@ -48,12 +50,14 @@ */ static ssize_t bonding_show_bonds(struct class *cls, char *buf) { + struct net *net = current->nsproxy->net_ns; + struct bond_net *bn = net_generic(net, bond_net_id); int res = 0; struct bonding *bond; rtnl_lock(); - list_for_each_entry(bond, &bond_dev_list, bond_list) { + list_for_each_entry(bond, &bn->dev_list, bond_list) { if (res > (PAGE_SIZE - IFNAMSIZ)) { /* not enough space for another interface name */ if ((PAGE_SIZE - res) > 10) @@ -70,11 +74,12 @@ static ssize_t bonding_show_bonds(struct class *cls, char *buf) return res; } -static struct net_device *bond_get_by_name(const char *ifname) +static struct net_device *bond_get_by_name(struct net *net, const char *ifname) { + struct bond_net *bn = net_generic(net, bond_net_id); struct bonding *bond; - list_for_each_entry(bond, &bond_dev_list, bond_list) { + list_for_each_entry(bond, &bn->dev_list, bond_list) { if (strncmp(bond->dev->name, ifname, IFNAMSIZ) == 0) return bond->dev; } @@ -92,6 +97,7 @@ static struct net_device *bond_get_by_name(const char *ifname) static ssize_t bonding_store_bonds(struct class *cls, const char *buffer, size_t count) { + struct net *net = current->nsproxy->net_ns; char command[IFNAMSIZ + 1] = {0, }; char *ifname; int rv, res = count; @@ -105,7 +111,7 @@ static ssize_t bonding_store_bonds(struct class *cls, if (command[0] == '+') { pr_info(DRV_NAME ": %s is being created...\n", ifname); - rv = bond_create(ifname); + rv = bond_create(net, ifname); if (rv) { pr_info(DRV_NAME ": Bond creation failed.\n"); res = rv; @@ -114,7 +120,7 @@ static ssize_t bonding_store_bonds(struct class *cls, struct net_device *bond_dev; rtnl_lock(); - bond_dev = bond_get_by_name(ifname); + bond_dev = bond_get_by_name(net, ifname); if (bond_dev) { pr_info(DRV_NAME ": %s is being deleted...\n", ifname); @@ -239,8 +245,7 @@ static ssize_t bonding_store_slaves(struct device *d, /* Got a slave name in ifname. Is it already in the list? */ found = 0; - /* FIXME: get netns from sysfs object */ - dev = __dev_get_by_name(&init_net, ifname); + dev = __dev_get_by_name(dev_net(bond->dev), ifname); if (!dev) { pr_info(DRV_NAME ": %s: Interface %s does not exist!\n", @@ -1214,6 +1219,58 @@ static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR, bonding_show_primary, bonding_store_primary); /* + * Show and set the primary_reselect flag. + */ +static ssize_t bonding_show_primary_reselect(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + + return sprintf(buf, "%s %d\n", + pri_reselect_tbl[bond->params.primary_reselect].modename, + bond->params.primary_reselect); +} + +static ssize_t bonding_store_primary_reselect(struct device *d, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int new_value, ret = count; + struct bonding *bond = to_bond(d); + + if (!rtnl_trylock()) + return restart_syscall(); + + new_value = bond_parse_parm(buf, pri_reselect_tbl); + if (new_value < 0) { + pr_err(DRV_NAME + ": %s: Ignoring invalid primary_reselect value %.*s.\n", + bond->dev->name, + (int) strlen(buf) - 1, buf); + ret = -EINVAL; + goto out; + } + + bond->params.primary_reselect = new_value; + pr_info(DRV_NAME ": %s: setting primary_reselect to %s (%d).\n", + bond->dev->name, pri_reselect_tbl[new_value].modename, + new_value); + + read_lock(&bond->lock); + write_lock_bh(&bond->curr_slave_lock); + bond_select_active_slave(bond); + write_unlock_bh(&bond->curr_slave_lock); + read_unlock(&bond->lock); +out: + rtnl_unlock(); + return ret; +} +static DEVICE_ATTR(primary_reselect, S_IRUGO | S_IWUSR, + bonding_show_primary_reselect, + bonding_store_primary_reselect); + +/* * Show and set the use_carrier flag. */ static ssize_t bonding_show_carrier(struct device *d, @@ -1502,6 +1559,7 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_num_unsol_na.attr, &dev_attr_miimon.attr, &dev_attr_primary.attr, + &dev_attr_primary_reselect.attr, &dev_attr_use_carrier.attr, &dev_attr_active_slave.attr, &dev_attr_mii_status.attr, @@ -1564,24 +1622,8 @@ void bond_destroy_sysfs(void) * Initialize sysfs for each bond. This sets up and registers * the 'bondctl' directory for each individual bond under /sys/class/net. */ -int bond_create_sysfs_entry(struct bonding *bond) +void bond_prepare_sysfs_group(struct bonding *bond) { - struct net_device *dev = bond->dev; - int err; - - err = sysfs_create_group(&(dev->dev.kobj), &bonding_group); - if (err) - pr_emerg("eek! didn't create group!\n"); - - return err; -} -/* - * Remove sysfs entries for each bond. - */ -void bond_destroy_sysfs_entry(struct bonding *bond) -{ - struct net_device *dev = bond->dev; - - sysfs_remove_group(&(dev->dev.kobj), &bonding_group); + bond->dev->sysfs_groups[0] = &bonding_group; } diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 6824771..558ec13 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -23,15 +23,13 @@ #include "bond_3ad.h" #include "bond_alb.h" -#define DRV_VERSION "3.5.0" -#define DRV_RELDATE "November 4, 2008" +#define DRV_VERSION "3.6.0" +#define DRV_RELDATE "September 26, 2009" #define DRV_NAME "bonding" #define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" #define BOND_MAX_ARP_TARGETS 16 -extern struct list_head bond_dev_list; - #define IS_UP(dev) \ ((((dev)->flags & IFF_UP) == IFF_UP) && \ netif_running(dev) && \ @@ -131,6 +129,7 @@ struct bond_params { int lacp_fast; int ad_select; char primary[IFNAMSIZ]; + int primary_reselect; __be32 arp_targets[BOND_MAX_ARP_TARGETS]; }; @@ -190,6 +189,7 @@ struct bonding { struct slave *curr_active_slave; struct slave *current_arp_slave; struct slave *primary_slave; + bool force_primary; s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ rwlock_t lock; rwlock_t curr_slave_lock; @@ -204,7 +204,7 @@ struct bonding { #endif /* CONFIG_PROC_FS */ struct list_head bond_list; struct dev_mc_list *mc_list; - int (*xmit_hash_policy)(struct sk_buff *, struct net_device *, int); + int (*xmit_hash_policy)(struct sk_buff *, int); __be32 master_ip; u16 flags; u16 rr_tx_counter; @@ -254,10 +254,14 @@ static inline struct bonding *bond_get_bond_by_slave(struct slave *slave) static inline bool bond_is_lb(const struct bonding *bond) { - return bond->params.mode == BOND_MODE_TLB - || bond->params.mode == BOND_MODE_ALB; + return (bond->params.mode == BOND_MODE_TLB || + bond->params.mode == BOND_MODE_ALB); } +#define BOND_PRI_RESELECT_ALWAYS 0 +#define BOND_PRI_RESELECT_BETTER 1 +#define BOND_PRI_RESELECT_FAILURE 2 + #define BOND_FOM_NONE 0 #define BOND_FOM_ACTIVE 1 #define BOND_FOM_FOLLOW 2 @@ -321,12 +325,11 @@ static inline void bond_unset_master_alb_flags(struct bonding *bond) struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr); int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev); -int bond_create(const char *name); +int bond_create(struct net *net, const char *name); int bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev); int bond_create_sysfs(void); void bond_destroy_sysfs(void); -void bond_destroy_sysfs_entry(struct bonding *bond); -int bond_create_sysfs_entry(struct bonding *bond); +void bond_prepare_sysfs_group(struct bonding *bond); int bond_create_slave_symlinks(struct net_device *master, struct net_device *slave); void bond_destroy_slave_symlinks(struct net_device *master, struct net_device *slave); int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev); @@ -341,13 +344,22 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active); void bond_register_arp(struct bonding *); void bond_unregister_arp(struct bonding *); +struct bond_net { + struct net * net; /* Associated network namespace */ + struct list_head dev_list; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry * proc_dir; +#endif +}; + /* exported from bond_main.c */ -extern struct list_head bond_dev_list; +extern int bond_net_id; extern const struct bond_parm_tbl bond_lacp_tbl[]; extern const struct bond_parm_tbl bond_mode_tbl[]; extern const struct bond_parm_tbl xmit_hashtype_tbl[]; extern const struct bond_parm_tbl arp_validate_tbl[]; extern const struct bond_parm_tbl fail_over_mac_tbl[]; +extern const struct bond_parm_tbl pri_reselect_tbl[]; extern struct bond_parm_tbl ad_select_tbl[]; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -370,4 +382,3 @@ static inline void bond_unregister_ipv6_notifier(void) #endif #endif /* _LINUX_BONDING_H */ - |