summaryrefslogtreecommitdiffstats
path: root/drivers/net/bonding/bond_main.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 13:38:27 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 13:38:27 -0700
commitaecdc33e111b2c447b622e287c6003726daa1426 (patch)
tree3e7657eae4b785e1a1fb5dfb225dbae0b2f0cfc6 /drivers/net/bonding/bond_main.c
parenta20acf99f75e49271381d65db097c9763060a1e8 (diff)
parenta3a6cab5ea10cca64d036851fe0d932448f2fe4f (diff)
downloadop-kernel-dev-aecdc33e111b2c447b622e287c6003726daa1426.zip
op-kernel-dev-aecdc33e111b2c447b622e287c6003726daa1426.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David Miller: 1) GRE now works over ipv6, from Dmitry Kozlov. 2) Make SCTP more network namespace aware, from Eric Biederman. 3) TEAM driver now works with non-ethernet devices, from Jiri Pirko. 4) Make openvswitch network namespace aware, from Pravin B Shelar. 5) IPV6 NAT implementation, from Patrick McHardy. 6) Server side support for TCP Fast Open, from Jerry Chu and others. 7) Packet BPF filter supports MOD and XOR, from Eric Dumazet and Daniel Borkmann. 8) Increate the loopback default MTU to 64K, from Eric Dumazet. 9) Use a per-task rather than per-socket page fragment allocator for outgoing networking traffic. This benefits processes that have very many mostly idle sockets, which is quite common. From Eric Dumazet. 10) Use up to 32K for page fragment allocations, with fallbacks to smaller sizes when higher order page allocations fail. Benefits are a) less segments for driver to process b) less calls to page allocator c) less waste of space. From Eric Dumazet. 11) Allow GRO to be used on GRE tunnels, from Eric Dumazet. 12) VXLAN device driver, one way to handle VLAN issues such as the limitation of 4096 VLAN IDs yet still have some level of isolation. From Stephen Hemminger. 13) As usual there is a large boatload of driver changes, with the scale perhaps tilted towards the wireless side this time around. Fix up various fairly trivial conflicts, mostly caused by the user namespace changes. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1012 commits) hyperv: Add buffer for extended info after the RNDIS response message. hyperv: Report actual status in receive completion packet hyperv: Remove extra allocated space for recv_pkt_list elements hyperv: Fix page buffer handling in rndis_filter_send_request() hyperv: Fix the missing return value in rndis_filter_set_packet_filter() hyperv: Fix the max_xfer_size in RNDIS initialization vxlan: put UDP socket in correct namespace vxlan: Depend on CONFIG_INET sfc: Fix the reported priorities of different filter types sfc: Remove EFX_FILTER_FLAG_RX_OVERRIDE_IP sfc: Fix loopback self-test with separate_tx_channels=1 sfc: Fix MCDI structure field lookup sfc: Add parentheses around use of bitfield macro arguments sfc: Fix null function pointer in efx_sriov_channel_type vxlan: virtual extensible lan igmp: export symbol ip_mc_leave_group netlink: add attributes to fdb interface tg3: unconditionally select HWMON support when tg3 is enabled. Revert "net: ti cpsw ethernet: allow reading phy interface mode from DT" gre: fix sparse warning ...
Diffstat (limited to 'drivers/net/bonding/bond_main.c')
-rw-r--r--drivers/net/bonding/bond_main.c140
1 files changed, 93 insertions, 47 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d688a8a..7858c58 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1120,10 +1120,10 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
- netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER);
+ call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
if (should_notify_peers)
- netdev_bonding_change(bond->dev,
- NETDEV_NOTIFY_PEERS);
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
+ bond->dev);
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
@@ -1558,8 +1558,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
bond_dev->name,
bond_dev->type, slave_dev->type);
- res = netdev_bonding_change(bond_dev,
- NETDEV_PRE_TYPE_CHANGE);
+ res = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE,
+ bond_dev);
res = notifier_to_errno(res);
if (res) {
pr_err("%s: refused to change device type\n",
@@ -1579,8 +1579,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
}
- netdev_bonding_change(bond_dev,
- NETDEV_POST_TYPE_CHANGE);
+ call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE,
+ bond_dev);
}
} else if (bond_dev->type != slave_dev->type) {
pr_err("%s ether type (%d) is different from other slaves (%d), can not enslave it.\n",
@@ -1941,7 +1941,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
}
block_netpoll_tx();
- netdev_bonding_change(bond_dev, NETDEV_RELEASE);
+ call_netdevice_notifiers(NETDEV_RELEASE, bond_dev);
write_lock_bh(&bond->lock);
slave = bond_get_slave_by_dev(bond, slave_dev);
@@ -2584,7 +2584,7 @@ re_arm:
read_unlock(&bond->lock);
return;
}
- netdev_bonding_change(bond->dev, NETDEV_NOTIFY_PEERS);
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
rtnl_unlock();
}
}
@@ -2811,12 +2811,13 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
arp_work.work);
struct slave *slave, *oldcurrent;
int do_failover = 0;
- int delta_in_ticks;
+ int delta_in_ticks, extra_ticks;
int i;
read_lock(&bond->lock);
delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
+ extra_ticks = delta_in_ticks / 2;
if (bond->slave_cnt == 0)
goto re_arm;
@@ -2839,10 +2840,10 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
if (slave->link != BOND_LINK_UP) {
if (time_in_range(jiffies,
trans_start - delta_in_ticks,
- trans_start + delta_in_ticks) &&
+ trans_start + delta_in_ticks + extra_ticks) &&
time_in_range(jiffies,
slave->dev->last_rx - delta_in_ticks,
- slave->dev->last_rx + delta_in_ticks)) {
+ slave->dev->last_rx + delta_in_ticks + extra_ticks)) {
slave->link = BOND_LINK_UP;
bond_set_active_slave(slave);
@@ -2872,10 +2873,10 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
*/
if (!time_in_range(jiffies,
trans_start - delta_in_ticks,
- trans_start + 2 * delta_in_ticks) ||
+ trans_start + 2 * delta_in_ticks + extra_ticks) ||
!time_in_range(jiffies,
slave->dev->last_rx - delta_in_ticks,
- slave->dev->last_rx + 2 * delta_in_ticks)) {
+ slave->dev->last_rx + 2 * delta_in_ticks + extra_ticks)) {
slave->link = BOND_LINK_DOWN;
bond_set_backup_slave(slave);
@@ -2933,6 +2934,14 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
struct slave *slave;
int i, commit = 0;
unsigned long trans_start;
+ int extra_ticks;
+
+ /* All the time comparisons below need some extra time. Otherwise, on
+ * fast networks the ARP probe/reply may arrive within the same jiffy
+ * as it was sent. Then, the next time the ARP monitor is run, one
+ * arp_interval will already have passed in the comparisons.
+ */
+ extra_ticks = delta_in_ticks / 2;
bond_for_each_slave(bond, slave, i) {
slave->new_link = BOND_LINK_NOCHANGE;
@@ -2940,7 +2949,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
if (slave->link != BOND_LINK_UP) {
if (time_in_range(jiffies,
slave_last_rx(bond, slave) - delta_in_ticks,
- slave_last_rx(bond, slave) + delta_in_ticks)) {
+ slave_last_rx(bond, slave) + delta_in_ticks + extra_ticks)) {
slave->new_link = BOND_LINK_UP;
commit++;
@@ -2956,7 +2965,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
*/
if (time_in_range(jiffies,
slave->jiffies - delta_in_ticks,
- slave->jiffies + 2 * delta_in_ticks))
+ slave->jiffies + 2 * delta_in_ticks + extra_ticks))
continue;
/*
@@ -2976,7 +2985,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
!bond->current_arp_slave &&
!time_in_range(jiffies,
slave_last_rx(bond, slave) - delta_in_ticks,
- slave_last_rx(bond, slave) + 3 * delta_in_ticks)) {
+ slave_last_rx(bond, slave) + 3 * delta_in_ticks + extra_ticks)) {
slave->new_link = BOND_LINK_DOWN;
commit++;
@@ -2992,10 +3001,10 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
if (bond_is_active_slave(slave) &&
(!time_in_range(jiffies,
trans_start - delta_in_ticks,
- trans_start + 2 * delta_in_ticks) ||
+ trans_start + 2 * delta_in_ticks + extra_ticks) ||
!time_in_range(jiffies,
slave_last_rx(bond, slave) - delta_in_ticks,
- slave_last_rx(bond, slave) + 2 * delta_in_ticks))) {
+ slave_last_rx(bond, slave) + 2 * delta_in_ticks + extra_ticks))) {
slave->new_link = BOND_LINK_DOWN;
commit++;
@@ -3027,7 +3036,7 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
if ((!bond->curr_active_slave &&
time_in_range(jiffies,
trans_start - delta_in_ticks,
- trans_start + delta_in_ticks)) ||
+ trans_start + delta_in_ticks + delta_in_ticks / 2)) ||
bond->curr_active_slave != slave) {
slave->link = BOND_LINK_UP;
if (bond->current_arp_slave) {
@@ -3203,7 +3212,7 @@ re_arm:
read_unlock(&bond->lock);
return;
}
- netdev_bonding_change(bond->dev, NETDEV_NOTIFY_PEERS);
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
rtnl_unlock();
}
}
@@ -3352,56 +3361,93 @@ static struct notifier_block bond_netdev_notifier = {
/*---------------------------- Hashing Policies -----------------------------*/
/*
+ * Hash for the output device based upon layer 2 data
+ */
+static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
+{
+ struct ethhdr *data = (struct ethhdr *)skb->data;
+
+ if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto))
+ return (data->h_dest[5] ^ data->h_source[5]) % count;
+
+ return 0;
+}
+
+/*
* Hash for the output device based upon layer 2 and layer 3 data. If
- * the packet is not IP mimic bond_xmit_hash_policy_l2()
+ * the packet is not IP, fall back on bond_xmit_hash_policy_l2()
*/
static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
{
struct ethhdr *data = (struct ethhdr *)skb->data;
- struct iphdr *iph = ip_hdr(skb);
-
- if (skb->protocol == htons(ETH_P_IP)) {
+ struct iphdr *iph;
+ struct ipv6hdr *ipv6h;
+ u32 v6hash;
+ __be32 *s, *d;
+
+ if (skb->protocol == htons(ETH_P_IP) &&
+ skb_network_header_len(skb) >= sizeof(*iph)) {
+ iph = ip_hdr(skb);
return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
(data->h_dest[5] ^ data->h_source[5])) % count;
+ } else if (skb->protocol == htons(ETH_P_IPV6) &&
+ skb_network_header_len(skb) >= sizeof(*ipv6h)) {
+ ipv6h = ipv6_hdr(skb);
+ s = &ipv6h->saddr.s6_addr32[0];
+ d = &ipv6h->daddr.s6_addr32[0];
+ v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
+ v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8);
+ return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count;
}
- return (data->h_dest[5] ^ data->h_source[5]) % count;
+ return bond_xmit_hash_policy_l2(skb, count);
}
/*
* Hash for the output device based upon layer 3 and layer 4 data. If
* the packet is a frag or not TCP or UDP, just use layer 3 data. If it is
- * altogether not IP, mimic bond_xmit_hash_policy_l2()
+ * altogether not IP, fall back on bond_xmit_hash_policy_l2()
*/
static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count)
{
- struct ethhdr *data = (struct ethhdr *)skb->data;
- struct iphdr *iph = ip_hdr(skb);
- __be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl);
- int layer4_xor = 0;
-
- if (skb->protocol == htons(ETH_P_IP)) {
+ u32 layer4_xor = 0;
+ struct iphdr *iph;
+ struct ipv6hdr *ipv6h;
+ __be32 *s, *d;
+ __be16 *layer4hdr;
+
+ if (skb->protocol == htons(ETH_P_IP) &&
+ skb_network_header_len(skb) >= sizeof(*iph)) {
+ iph = ip_hdr(skb);
if (!ip_is_fragment(iph) &&
(iph->protocol == IPPROTO_TCP ||
- iph->protocol == IPPROTO_UDP)) {
- layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1)));
+ iph->protocol == IPPROTO_UDP) &&
+ (skb_headlen(skb) - skb_network_offset(skb) >=
+ iph->ihl * sizeof(u32) + sizeof(*layer4hdr) * 2)) {
+ layer4hdr = (__be16 *)((u32 *)iph + iph->ihl);
+ layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1));
}
return (layer4_xor ^
((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
-
+ } else if (skb->protocol == htons(ETH_P_IPV6) &&
+ skb_network_header_len(skb) >= sizeof(*ipv6h)) {
+ ipv6h = ipv6_hdr(skb);
+ if ((ipv6h->nexthdr == IPPROTO_TCP ||
+ ipv6h->nexthdr == IPPROTO_UDP) &&
+ (skb_headlen(skb) - skb_network_offset(skb) >=
+ sizeof(*ipv6h) + sizeof(*layer4hdr) * 2)) {
+ layer4hdr = (__be16 *)(ipv6h + 1);
+ layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1));
+ }
+ s = &ipv6h->saddr.s6_addr32[0];
+ d = &ipv6h->daddr.s6_addr32[0];
+ layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
+ layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^
+ (layer4_xor >> 8);
+ return layer4_xor % count;
}
- return (data->h_dest[5] ^ data->h_source[5]) % count;
-}
-
-/*
- * Hash for the output device based upon layer 2 data
- */
-static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
-{
- struct ethhdr *data = (struct ethhdr *)skb->data;
-
- return (data->h_dest[5] ^ data->h_source[5]) % count;
+ return bond_xmit_hash_policy_l2(skb, count);
}
/*-------------------------- Device entry points ----------------------------*/
OpenPOWER on IntegriCloud