From e67f88dd12f610da98ca838822f2c9b4e7c6100e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Apr 2011 22:56:07 +0000 Subject: net: dont hold rtnl mutex during netlink dump callbacks Four years ago, Patrick made a change to hold rtnl mutex during netlink dump callbacks. I believe it was a wrong move. This slows down concurrent dumps, making good old /proc/net/ files faster than rtnetlink in some situations. This occurred to me because one "ip link show dev ..." was _very_ slow on a workload adding/removing network devices in background. All dump callbacks are able to use RCU locking now, so this patch does roughly a revert of commits : 1c2d670f366 : [RTNETLINK]: Hold rtnl_mutex during netlink dump callbacks 6313c1e0992 : [RTNETLINK]: Remove unnecessary locking in dump callbacks This let writers fight for rtnl mutex and readers going full speed. It also takes care of phonet : phonet_route_get() is now called from rcu read section. I renamed it to phonet_route_get_rcu() Signed-off-by: Eric Dumazet Cc: Patrick McHardy Cc: Remi Denis-Courmont Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d7c4bb4..29633125 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1007,10 +1007,11 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) s_h = cb->args[0]; s_idx = cb->args[1]; + rcu_read_lock(); for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; - hlist_for_each_entry(dev, node, head, index_hlist) { + hlist_for_each_entry_rcu(dev, node, head, index_hlist) { if (idx < s_idx) goto cont; if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, @@ -1023,6 +1024,7 @@ cont: } } out: + rcu_read_unlock(); cb->args[1] = idx; cb->args[0] = h; @@ -1879,7 +1881,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) int min_len; int family; int type; - int err; type = nlh->nlmsg_type; if (type > RTM_MAX) @@ -1906,11 +1907,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (dumpit == NULL) return -EOPNOTSUPP; - __rtnl_unlock(); rtnl = net->rtnl; - err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); - rtnl_lock(); - return err; + return netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); } memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); @@ -1980,7 +1978,7 @@ static int __net_init rtnetlink_net_init(struct net *net) { struct sock *sk; sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, - rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); + rtnetlink_rcv, NULL, THIS_MODULE); if (!sk) return -ENOMEM; net->rtnl = sk; -- cgit v1.1 From 1c5cae815d19ffe02bdfda1260949ef2b1806171 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 30 Apr 2011 01:21:32 +0000 Subject: net: call dev_alloc_name from register_netdevice Force dev_alloc_name() to be called from register_netdevice() by dev_get_valid_name(). That allows to remove multiple explicit dev_alloc_name() calls. The possibility to call dev_alloc_name in advance remains. This also fixes veth creation regresion caused by 84c49d8c3e4abefb0a41a77b25aa37ebe8d6b743 Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 29633125..5a160f4 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1572,12 +1572,6 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, dev->rtnl_link_state = RTNL_LINK_INITIALIZING; dev->real_num_tx_queues = real_num_queues; - if (strchr(dev->name, '%')) { - err = dev_alloc_name(dev, dev->name); - if (err < 0) - goto err_free; - } - if (tb[IFLA_MTU]) dev->mtu = nla_get_u32(tb[IFLA_MTU]); if (tb[IFLA_ADDRESS]) @@ -1597,8 +1591,6 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, return dev; -err_free: - free_netdev(dev); err: return ERR_PTR(err); } -- cgit v1.1 From 226bd3411471af42f7edbdfaf73f2d54ebb62a66 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 May 2011 23:17:57 +0000 Subject: net: use batched device unregister in veth and macvlan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit veth devices dont use the batched device unregisters yet. Since veth are a pair of devices, it makes sense to use a batch of two unregisters, this roughly divides dismantle time by two. Fix this by changing dellink() callers to always provide a non NULL head. (Idea from Michał Mirosław) This patch also handles macvlan case : We now dismantle all macvlans on top of a lower dev at once. Reported-by: Alex Bligh Signed-off-by: Eric Dumazet Cc: Michał Mirosław Cc: Jesse Gross Cc: Paul E. McKenney Cc: Ben Greear Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5a160f4..d2ba259 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1501,6 +1501,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) char ifname[IFNAMSIZ]; struct nlattr *tb[IFLA_MAX+1]; int err; + LIST_HEAD(list_kill); err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) @@ -1524,7 +1525,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!ops) return -EOPNOTSUPP; - ops->dellink(dev, NULL); + ops->dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + list_del(&list_kill); return 0; } -- cgit v1.1