diff options
author | David S. Miller <davem@davemloft.net> | 2014-01-27 13:12:50 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-01-27 13:12:50 -0800 |
commit | 66dd1c077a3f3c130d1b3f0abad364f56a3ed493 (patch) | |
tree | 097d2f21d367ae28eaa40fc5d1ee9519f8932687 | |
parent | bb9fbe2ddd6d6111c5fe5987fa1e71da7d2ab854 (diff) | |
parent | f2ebd477f141bc09b10fb8deb612a4d9b8999bba (diff) | |
download | op-kernel-dev-66dd1c077a3f3c130d1b3f0abad364f56a3ed493.zip op-kernel-dev-66dd1c077a3f3c130d1b3f0abad364f56a3ed493.tar.gz |
Merge branch 'bonding'
Veaceslav Falico says:
====================
bonding: fix locking in bond_ab_arp_prob
After the latest patches, on every call of bond_ab_arp_probe() without an
active slave I see the following warning:
[ 7.912314] RTNL: assertion failed at net/core/dev.c (4494)
...
[ 7.922495] [<ffffffff817acc6f>] dump_stack+0x51/0x72
[ 7.923714] [<ffffffff8168795e>] netdev_master_upper_dev_get+0x6e/0x70
[ 7.924940] [<ffffffff816a2a66>] rtnl_link_fill+0x116/0x260
[ 7.926143] [<ffffffff817acc6f>] ? dump_stack+0x51/0x72
[ 7.927333] [<ffffffff816a350c>] rtnl_fill_ifinfo+0x95c/0xb90
[ 7.928529] [<ffffffff8167af2b>] ? __kmalloc_reserve+0x3b/0xa0
[ 7.929681] [<ffffffff8167bfcf>] ? __alloc_skb+0x9f/0x1e0
[ 7.930827] [<ffffffff816a3b64>] rtmsg_ifinfo+0x84/0x100
[ 7.931960] [<ffffffffa00bca07>] bond_ab_arp_probe+0x1a7/0x370 [bonding]
[ 7.933133] [<ffffffffa00bcd78>] bond_activebackup_arp_mon+0x1a8/0x2f0 [bonding]
...
It happens because in bond_ab_arp_probe() we change the flags of a slave
without holding the RTNL lock.
To fix this - remove the useless curr_active_lock, RCUify it and lock RTNL
while changing the slave's flags. Also, remove bond_ab_arp_probe() from
under any locks in bond_ab_arp_mon().
====================
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/bonding/bond_main.c | 67 |
1 files changed, 39 insertions, 28 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a7db819..dd75615 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2599,45 +2599,51 @@ do_failover: /* * Send ARP probes for active-backup mode ARP monitor. - * - * Called with rcu_read_lock hold. */ -static void bond_ab_arp_probe(struct bonding *bond) +static bool bond_ab_arp_probe(struct bonding *bond) { struct slave *slave, *before = NULL, *new_slave = NULL, - *curr_arp_slave = rcu_dereference(bond->current_arp_slave); + *curr_arp_slave, *curr_active_slave; struct list_head *iter; bool found = false; - read_lock(&bond->curr_slave_lock); + rcu_read_lock(); + curr_arp_slave = rcu_dereference(bond->current_arp_slave); + curr_active_slave = rcu_dereference(bond->curr_active_slave); - if (curr_arp_slave && bond->curr_active_slave) + if (curr_arp_slave && curr_active_slave) pr_info("PROBE: c_arp %s && cas %s BAD\n", curr_arp_slave->dev->name, - bond->curr_active_slave->dev->name); + curr_active_slave->dev->name); - if (bond->curr_active_slave) { - bond_arp_send_all(bond, bond->curr_active_slave); - read_unlock(&bond->curr_slave_lock); - return; + if (curr_active_slave) { + bond_arp_send_all(bond, curr_active_slave); + rcu_read_unlock(); + return true; } - - read_unlock(&bond->curr_slave_lock); + rcu_read_unlock(); /* if we don't have a curr_active_slave, search for the next available * backup slave from the current_arp_slave and make it the candidate * for becoming the curr_active_slave */ + if (!rtnl_trylock()) + return false; + /* curr_arp_slave might have gone away */ + curr_arp_slave = ACCESS_ONCE(bond->current_arp_slave); + if (!curr_arp_slave) { - curr_arp_slave = bond_first_slave_rcu(bond); - if (!curr_arp_slave) - return; + curr_arp_slave = bond_first_slave(bond); + if (!curr_arp_slave) { + rtnl_unlock(); + return true; + } } bond_set_slave_inactive_flags(curr_arp_slave); - bond_for_each_slave_rcu(bond, slave, iter) { + bond_for_each_slave(bond, slave, iter) { if (!found && !before && IS_UP(slave->dev)) before = slave; @@ -2667,21 +2673,26 @@ static void bond_ab_arp_probe(struct bonding *bond) if (!new_slave && before) new_slave = before; - if (!new_slave) - return; + if (!new_slave) { + rtnl_unlock(); + return true; + } new_slave->link = BOND_LINK_BACK; bond_set_slave_active_flags(new_slave); bond_arp_send_all(bond, new_slave); new_slave->jiffies = jiffies; rcu_assign_pointer(bond->current_arp_slave, new_slave); + rtnl_unlock(); + + return true; } static void bond_activebackup_arp_mon(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, arp_work.work); - bool should_notify_peers = false; + bool should_notify_peers = false, should_commit = false; int delta_in_ticks; delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); @@ -2690,12 +2701,11 @@ static void bond_activebackup_arp_mon(struct work_struct *work) goto re_arm; rcu_read_lock(); - should_notify_peers = bond_should_notify_peers(bond); + should_commit = bond_ab_arp_inspect(bond); + rcu_read_unlock(); - if (bond_ab_arp_inspect(bond)) { - rcu_read_unlock(); - + if (should_commit) { /* Race avoidance with bond_close flush of workqueue */ if (!rtnl_trylock()) { delta_in_ticks = 1; @@ -2704,13 +2714,14 @@ static void bond_activebackup_arp_mon(struct work_struct *work) } bond_ab_arp_commit(bond); - rtnl_unlock(); - rcu_read_lock(); } - bond_ab_arp_probe(bond); - rcu_read_unlock(); + if (!bond_ab_arp_probe(bond)) { + /* rtnl locking failed, re-arm */ + delta_in_ticks = 1; + should_notify_peers = false; + } re_arm: if (bond->params.arp_interval) |