diff options
author | David S. Miller <davem@davemloft.net> | 2017-02-06 22:53:14 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-02-06 22:53:14 -0500 |
commit | 152bff377653047c2a69c226435e2c3fd316b592 (patch) | |
tree | 9582a827ce818aba63b5bb5f0f7a3d1e9f992cf9 | |
parent | 63dfef75ed75364901d7caa52c6420cec3e73519 (diff) | |
parent | 83a718d6294964fd1b227fa5f1ad001bc1fe7656 (diff) | |
download | op-kernel-dev-152bff377653047c2a69c226435e2c3fd316b592.zip op-kernel-dev-152bff377653047c2a69c226435e2c3fd316b592.tar.gz |
Merge branch 'bridge-improve-cache-utilization'
Nikolay Aleksandrov says:
====================
bridge: improve cache utilization
This is the first set which begins to deal with the bad bridge cache
access patterns. The first patch rearranges the bridge and port structs
a little so the frequently (and closely) accessed members are in the same
cache line. The second patch then moves the garbage collection to a
workqueue trying to improve system responsiveness under load (many fdbs)
and more importantly removes the need to check if the matched entry is
expired in __br_fdb_get which was a major source of false-sharing.
The third patch is a preparation for the final one which
If properly configured, i.e. ports bound to CPUs (thus updating "updated"
locally) then the bridge's HitM goes from 100% to 0%, but even without
binding we get a win because previously every lookup that iterated over
the hash chain caused false-sharing due to the first cache line being
used for both mac/vid and used/updated fields.
Some results from tests I've run:
(note that these were run in good conditions for the baseline, everything
ran on a single NUMA node and there were only 3 fdbs)
1. baseline
100% Load HitM on the fdbs (between everyone who has done lookups and hit
one of the 3 hash chains of the communicating
src/dst fdbs)
Overall 5.06% Load HitM for the bridge, first place in the list
2. patched & ports bound to CPUs
0% Local load HitM, bridge is not even in the c2c report list
Also there's 3% consistent improvement in netperf tests.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/bridge/br_device.c | 1 | ||||
-rw-r--r-- | net/bridge/br_fdb.c | 34 | ||||
-rw-r--r-- | net/bridge/br_if.c | 2 | ||||
-rw-r--r-- | net/bridge/br_input.c | 3 | ||||
-rw-r--r-- | net/bridge/br_ioctl.c | 2 | ||||
-rw-r--r-- | net/bridge/br_netlink.c | 2 | ||||
-rw-r--r-- | net/bridge/br_private.h | 57 | ||||
-rw-r--r-- | net/bridge/br_stp.c | 2 | ||||
-rw-r--r-- | net/bridge/br_stp_if.c | 4 | ||||
-rw-r--r-- | net/bridge/br_stp_timer.c | 2 | ||||
-rw-r--r-- | net/bridge/br_sysfs_br.c | 2 |
11 files changed, 59 insertions, 52 deletions
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 5ba0b55..d208ee9 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -411,4 +411,5 @@ void br_dev_setup(struct net_device *dev) br_netfilter_rtable_init(br); br_stp_timer_init(br); br_multicast_init(br); + INIT_DELAYED_WORK(&br->gc_work, br_fdb_cleanup); } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e4a4176..5028691 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -154,7 +154,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) if (f->added_by_external_learn) fdb_del_external_learn(f); - hlist_del_rcu(&f->hlist); + hlist_del_init_rcu(&f->hlist); fdb_notify(br, f, RTM_DELNEIGH); call_rcu(&f->rcu, fdb_rcu_free); } @@ -290,34 +290,43 @@ out: spin_unlock_bh(&br->hash_lock); } -void br_fdb_cleanup(unsigned long _data) +void br_fdb_cleanup(struct work_struct *work) { - struct net_bridge *br = (struct net_bridge *)_data; + struct net_bridge *br = container_of(work, struct net_bridge, + gc_work.work); unsigned long delay = hold_time(br); - unsigned long next_timer = jiffies + br->ageing_time; + unsigned long work_delay = delay; + unsigned long now = jiffies; int i; - spin_lock(&br->hash_lock); for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; struct hlist_node *n; + if (!br->hash[i].first) + continue; + + spin_lock_bh(&br->hash_lock); hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { unsigned long this_timer; + if (f->is_static) continue; if (f->added_by_external_learn) continue; this_timer = f->updated + delay; - if (time_before_eq(this_timer, jiffies)) + if (time_after(this_timer, now)) + work_delay = min(work_delay, this_timer - now); + else fdb_delete(br, f); - else if (time_before(this_timer, next_timer)) - next_timer = this_timer; } + spin_unlock_bh(&br->hash_lock); + cond_resched(); } - spin_unlock(&br->hash_lock); - mod_timer(&br->gc_timer, round_jiffies_up(next_timer)); + /* Cleanup minimum 10 milliseconds apart */ + work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10)); + mod_delayed_work(system_long_wq, &br->gc_work, work_delay); } /* Completely flush all dynamic entries in forwarding database.*/ @@ -382,8 +391,6 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, &br->hash[br_mac_hash(addr, vid)], hlist) { if (ether_addr_equal(fdb->addr.addr, addr) && fdb->vlan_id == vid) { - if (unlikely(has_expired(br, fdb))) - break; return fdb; } } @@ -590,7 +597,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, fdb->dst = source; fdb_modified = true; } - fdb->updated = jiffies; + if (jiffies != fdb->updated) + fdb->updated = jiffies; if (unlikely(added_by_user)) fdb->added_by_user = 1; if (unlikely(fdb_modified)) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ed0dd33..8ac1770 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -313,7 +313,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) br_vlan_flush(br); br_multicast_dev_del(br); - del_timer_sync(&br->gc_timer); + cancel_delayed_work_sync(&br->gc_work); br_sysfs_delbr(br->dev); unregister_netdevice_queue(br->dev, head); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index fba38d8..220943f 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -198,7 +198,8 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (dst->is_local) return br_pass_frame_up(skb); - dst->used = jiffies; + if (jiffies != dst->used) + dst->used = jiffies; br_forward(dst->dst, skb, local_rcv, false); } else { if (!mcast_hit) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index da8157c..7970f85 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -149,7 +149,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) b.hello_timer_value = br_timer_value(&br->hello_timer); b.tcn_timer_value = br_timer_value(&br->tcn_timer); b.topology_change_timer_value = br_timer_value(&br->topology_change_timer); - b.gc_timer_value = br_timer_value(&br->gc_timer); + b.gc_timer_value = br_timer_value(&br->gc_work.timer); rcu_read_unlock(); if (copy_to_user((void __user *)args[1], &b, sizeof(b))) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index fc5d885..1cbdc5b9 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1250,7 +1250,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) if (nla_put_u64_64bit(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval, IFLA_BR_PAD)) return -EMSGSIZE; - clockval = br_timer_value(&br->gc_timer); + clockval = br_timer_value(&br->gc_work.timer); if (nla_put_u64_64bit(skb, IFLA_BR_GC_TIMER, clockval, IFLA_BR_PAD)) return -EMSGSIZE; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 40177df..1cbbf63 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -160,19 +160,21 @@ struct net_bridge_vlan_group { u16 pvid; }; -struct net_bridge_fdb_entry -{ +struct net_bridge_fdb_entry { struct hlist_node hlist; struct net_bridge_port *dst; - unsigned long updated; - unsigned long used; mac_addr addr; __u16 vlan_id; unsigned char is_local:1, is_static:1, added_by_user:1, added_by_external_learn:1; + + /* write-heavy members should not affect lookups */ + unsigned long updated ____cacheline_aligned_in_smp; + unsigned long used; + struct rcu_head rcu; }; @@ -212,12 +214,16 @@ struct net_bridge_mdb_htable u32 ver; }; -struct net_bridge_port -{ +struct net_bridge_port { struct net_bridge *br; struct net_device *dev; struct list_head list; + unsigned long flags; +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + struct net_bridge_vlan_group __rcu *vlgrp; +#endif + /* STP */ u8 priority; u8 state; @@ -238,8 +244,6 @@ struct net_bridge_port struct kobject kobj; struct rcu_head rcu; - unsigned long flags; - #ifdef CONFIG_BRIDGE_IGMP_SNOOPING struct bridge_mcast_own_query ip4_own_query; #if IS_ENABLED(CONFIG_IPV6) @@ -259,9 +263,6 @@ struct net_bridge_port #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *np; #endif -#ifdef CONFIG_BRIDGE_VLAN_FILTERING - struct net_bridge_vlan_group __rcu *vlgrp; -#endif #ifdef CONFIG_NET_SWITCHDEV int offload_fwd_mark; #endif @@ -283,14 +284,21 @@ static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device * rtnl_dereference(dev->rx_handler_data) : NULL; } -struct net_bridge -{ +struct net_bridge { spinlock_t lock; + spinlock_t hash_lock; struct list_head port_list; struct net_device *dev; - struct pcpu_sw_netstats __percpu *stats; - spinlock_t hash_lock; + /* These fields are accessed on each packet */ +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + u8 vlan_enabled; + u8 vlan_stats_enabled; + __be16 vlan_proto; + u16 default_pvid; + struct net_bridge_vlan_group __rcu *vlgrp; +#endif + struct hlist_head hash[BR_HASH_SIZE]; #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) union { @@ -308,6 +316,9 @@ struct net_bridge bridge_id designated_root; bridge_id bridge_id; u32 root_path_cost; + unsigned char topology_change; + unsigned char topology_change_detected; + u16 root_port; unsigned long max_age; unsigned long hello_time; unsigned long forward_delay; @@ -319,7 +330,6 @@ struct net_bridge u8 group_addr[ETH_ALEN]; bool group_addr_set; - u16 root_port; enum { BR_NO_STP, /* no spanning tree */ @@ -327,9 +337,6 @@ struct net_bridge BR_USER_STP, /* new RSTP in userspace */ } stp_enabled; - unsigned char topology_change; - unsigned char topology_change_detected; - #ifdef CONFIG_BRIDGE_IGMP_SNOOPING unsigned char multicast_router; @@ -374,21 +381,13 @@ struct net_bridge struct timer_list hello_timer; struct timer_list tcn_timer; struct timer_list topology_change_timer; - struct timer_list gc_timer; + struct delayed_work gc_work; struct kobject *ifobj; u32 auto_cnt; #ifdef CONFIG_NET_SWITCHDEV int offload_fwd_mark; #endif - -#ifdef CONFIG_BRIDGE_VLAN_FILTERING - struct net_bridge_vlan_group __rcu *vlgrp; - u8 vlan_enabled; - u8 vlan_stats_enabled; - __be16 vlan_proto; - u16 default_pvid; -#endif }; struct br_input_skb_cb { @@ -505,7 +504,7 @@ void br_fdb_find_delete_local(struct net_bridge *br, const unsigned char *addr, u16 vid); void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr); void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr); -void br_fdb_cleanup(unsigned long arg); +void br_fdb_cleanup(struct work_struct *work); void br_fdb_delete_by_port(struct net_bridge *br, const struct net_bridge_port *p, u16 vid, int do_all); struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 71fd1a4..8f56c2d 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -602,7 +602,7 @@ int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time) br->ageing_time = t; spin_unlock_bh(&br->lock); - mod_timer(&br->gc_timer, jiffies); + mod_delayed_work(system_long_wq, &br->gc_work, 0); return 0; } diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 6c1e214..08341d2 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -57,7 +57,7 @@ void br_stp_enable_bridge(struct net_bridge *br) spin_lock_bh(&br->lock); if (br->stp_enabled == BR_KERNEL_STP) mod_timer(&br->hello_timer, jiffies + br->hello_time); - mod_timer(&br->gc_timer, jiffies + HZ/10); + mod_delayed_work(system_long_wq, &br->gc_work, HZ / 10); br_config_bpdu_generation(br); @@ -88,7 +88,7 @@ void br_stp_disable_bridge(struct net_bridge *br) del_timer_sync(&br->hello_timer); del_timer_sync(&br->topology_change_timer); del_timer_sync(&br->tcn_timer); - del_timer_sync(&br->gc_timer); + cancel_delayed_work_sync(&br->gc_work); } /* called under bridge lock */ diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 7ddb38e..c98b3e5 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -153,8 +153,6 @@ void br_stp_timer_init(struct net_bridge *br) setup_timer(&br->topology_change_timer, br_topology_change_timer_expired, (unsigned long) br); - - setup_timer(&br->gc_timer, br_fdb_cleanup, (unsigned long) br); } void br_stp_port_timer_init(struct net_bridge_port *p) diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index a181482..0f40349 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -263,7 +263,7 @@ static ssize_t gc_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); - return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer)); + return sprintf(buf, "%ld\n", br_timer_value(&br->gc_work.timer)); } static DEVICE_ATTR_RO(gc_timer); |