From 6cb6a27c45cec9184302c2e350b3593c64bc7f6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sat, 2 Apr 2011 22:48:47 -0700 Subject: net: Call netdev_features_change() from netdev_update_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue FEAT_CHANGE notification when features are changed by netdev_update_features(). This will allow changes made by extra constraints on e.g. MTU change to be properly propagated like changes via ethtool. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3da9fb0..02f5637 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5236,7 +5236,7 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) } EXPORT_SYMBOL(netdev_fix_features); -void netdev_update_features(struct net_device *dev) +int __netdev_update_features(struct net_device *dev) { u32 features; int err = 0; @@ -5250,7 +5250,7 @@ void netdev_update_features(struct net_device *dev) features = netdev_fix_features(dev, features); if (dev->features == features) - return; + return 0; netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n", dev->features, features); @@ -5258,12 +5258,23 @@ void netdev_update_features(struct net_device *dev) if (dev->netdev_ops->ndo_set_features) err = dev->netdev_ops->ndo_set_features(dev, features); - if (!err) - dev->features = features; - else if (err < 0) + if (unlikely(err < 0)) { netdev_err(dev, "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", err, features, dev->features); + return -1; + } + + if (!err) + dev->features = features; + + return 1; +} + +void netdev_update_features(struct net_device *dev) +{ + if (__netdev_update_features(dev)) + netdev_features_change(dev); } EXPORT_SYMBOL(netdev_update_features); @@ -5430,7 +5441,7 @@ int register_netdevice(struct net_device *dev) goto err_uninit; dev->reg_state = NETREG_REGISTERED; - netdev_update_features(dev); + __netdev_update_features(dev); /* * Default initial state at registry is that the -- cgit v1.1 From c6e1a0d12ca7b4f22c58e55a16beacfb7d3d8462 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Apr 2011 22:30:30 -0700 Subject: net: Allow no-cache copy from user on transmit This patch uses __copy_from_user_nocache on transmit to bypass data cache for a performance improvement. skb_add_data_nocache and skb_copy_to_page_nocache can be called by sendmsg functions to use this feature, initial support is in tcp_sendmsg. This functionality is configurable per device using ethtool. Presumably, this feature would only be useful when the driver does not touch the data. The feature is turned on by default if a device indicates that it does some form of checksum offload; it is off by default for devices that do no checksum offload or indicate no checksum is necessary. For the former case copy-checksum is probably done anyway, in the latter case the device is likely loopback in which case the no cache copy is probably not beneficial. This patch was tested using 200 instances of netperf TCP_RR with 1400 byte request and one byte reply. Platform is 16 core AMD x86. No-cache copy disabled: 672703 tps, 97.13% utilization 50/90/99% latency:244.31 484.205 1028.41 No-cache copy enabled: 702113 tps, 96.16% utilization, 50/90/99% latency 238.56 467.56 956.955 Using 14000 byte request and response sizes demonstrate the effects more dramatically: No-cache copy disabled: 79571 tps, 34.34 %utlization 50/90/95% latency 1584.46 2319.59 5001.76 No-cache copy enabled: 83856 tps, 34.81% utilization 50/90/95% latency 2508.42 2622.62 2735.88 Note especially the effect on latency tail (95th percentile). This seems to provide a nice performance improvement and is consistent in the tests I ran. Presumably, this would provide the greatest benfits in the presence of an application workload stressing the cache and a lot of transmit data happening. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 02f5637..5d0b4f6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5425,6 +5425,14 @@ int register_netdevice(struct net_device *dev) dev->features &= ~NETIF_F_GSO; } + /* Turn on no cache copy if HW is doing checksum */ + dev->hw_features |= NETIF_F_NOCACHE_COPY; + if ((dev->features & NETIF_F_ALL_CSUM) && + !(dev->features & NETIF_F_NO_CSUM)) { + dev->wanted_features |= NETIF_F_NOCACHE_COPY; + dev->features |= NETIF_F_NOCACHE_COPY; + } + /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features * are enabled only if supported by underlying device. @@ -6182,6 +6190,10 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask) } } + /* If device can't no cache copy, don't do for all */ + if (!(one & NETIF_F_NOCACHE_COPY)) + all &= ~NETIF_F_NOCACHE_COPY; + one |= NETIF_F_ALL_CSUM; one |= all & NETIF_F_ONE_FOR_ALL; -- cgit v1.1 From bcc6d47903612c3861201cc3a866fb604f26b8b2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Apr 2011 19:48:33 +0000 Subject: net: vlan: make non-hw-accel rx path similar to hw-accel Now there are 2 paths for rx vlan frames. When rx-vlan-hw-accel is enabled, skb is untagged by NIC, vlan_tci is set and the skb gets into vlan code in __netif_receive_skb - vlan_hwaccel_do_receive. For non-rx-vlan-hw-accel however, tagged skb goes thru whole __netif_receive_skb, it's untagged in ptype_base hander and reinjected This incosistency is fixed by this patch. Vlan untagging happens early in __netif_receive_skb so the rest of code (ptype_all handlers, rx_handlers) see the skb like it was untagged by hw. Signed-off-by: Jiri Pirko v1->v2: remove "inline" from vlan_core.c functions Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 95897ff..d1aebf7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3130,6 +3130,12 @@ another_round: __this_cpu_inc(softnet_data.processed); + if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { + skb = vlan_untag(skb); + if (unlikely(!skb)) + goto out; + } + #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); @@ -3177,7 +3183,7 @@ ncls: ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } - if (vlan_hwaccel_do_receive(&skb)) { + if (vlan_do_receive(&skb)) { ret = __netif_receive_skb(skb); goto out; } else if (unlikely(!skb)) -- cgit v1.1 From 872674858fe236b746317741013c830bb70775c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 12 Apr 2011 09:56:38 +0000 Subject: net: add RTNL_ASSERT in __netdev_update_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index d1aebf7..f523eee 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5247,6 +5247,8 @@ int __netdev_update_features(struct net_device *dev) u32 features; int err = 0; + ASSERT_RTNL(); + features = netdev_get_wanted_features(dev); if (dev->netdev_ops->ndo_fix_features) -- cgit v1.1 From b71d1d426d263b0b6cb5760322efebbfc89d4463 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Apr 2011 04:53:02 +0000 Subject: inet: constify ip headers and in6_addr Add const qualifiers to structs iphdr, ipv6hdr and in6_addr pointers where possible, to make code intention more obvious. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3871bf6..379c993 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2502,8 +2502,8 @@ static inline void ____napi_schedule(struct softnet_data *sd, __u32 __skb_get_rxhash(struct sk_buff *skb) { int nhoff, hash = 0, poff; - struct ipv6hdr *ip6; - struct iphdr *ip; + const struct ipv6hdr *ip6; + const struct iphdr *ip; u8 ip_proto; u32 addr1, addr2, ihl; union { @@ -2518,7 +2518,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) goto done; - ip = (struct iphdr *) (skb->data + nhoff); + ip = (const struct iphdr *) (skb->data + nhoff); if (ip->frag_off & htons(IP_MF | IP_OFFSET)) ip_proto = 0; else @@ -2531,7 +2531,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) goto done; - ip6 = (struct ipv6hdr *) (skb->data + nhoff); + ip6 = (const struct ipv6hdr *) (skb->data + nhoff); ip_proto = ip6->nexthdr; addr1 = (__force u32) ip6->saddr.s6_addr32[3]; addr2 = (__force u32) ip6->daddr.s6_addr32[3]; -- cgit v1.1 From 22d5969fb450afd3a4aff606360f7d52c5a3a628 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Thu, 21 Apr 2011 12:42:15 +0000 Subject: net: make WARN_ON in dev_disable_lro() useful MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 379c993..541f22a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1315,7 +1315,8 @@ void dev_disable_lro(struct net_device *dev) return; __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO); - WARN_ON(dev->features & NETIF_F_LRO); + if (unlikely(dev->features & NETIF_F_LRO)) + netdev_WARN(dev, "failed to disable LRO!\n"); } EXPORT_SYMBOL(dev_disable_lro); -- cgit v1.1 From 3aba891dde3842d89ad022237b99c1ed308040b0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 19 Apr 2011 03:48:16 +0000 Subject: bonding: move processing of recv handlers into handle_frame() Since now when bonding uses rx_handler, all traffic going into bond device goes thru bond_handle_frame. So there's no need to go back into bonding code later via ptype handlers. This patch converts original ptype handlers into "bonding receive probes". These functions are called from bond_handle_frame and they are registered per-mode. Note that vlan packets are also handled because they are always untagged thanks to vlan_untag() Note that this also allows arpmon for eth-bond-bridge-vlan topology. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 541f22a..3bbb4c2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3077,25 +3077,6 @@ void netdev_rx_handler_unregister(struct net_device *dev) } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); -static void vlan_on_bond_hook(struct sk_buff *skb) -{ - /* - * Make sure ARP frames received on VLAN interfaces stacked on - * bonding interfaces still make their way to any base bonding - * device that may have registered for a specific ptype. - */ - if (skb->dev->priv_flags & IFF_802_1Q_VLAN && - vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING && - skb->protocol == htons(ETH_P_ARP)) { - struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); - - if (!skb2) - return; - skb2->dev = vlan_dev_real_dev(skb->dev); - netif_rx(skb2); - } -} - static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; @@ -3191,8 +3172,6 @@ ncls: goto out; } - vlan_on_bond_hook(skb); - /* deliver only exact match when indicated */ null_or_dev = deliver_exact ? skb->dev : NULL; -- cgit v1.1 From 1742f183fc218798dab6fcf0ded25b6608fc0a48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 22 Apr 2011 06:31:16 +0000 Subject: net: fix netdev_increment_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify and fix netdev_increment_features() to conform to what is stated in netdevice.h comments about NETIF_F_ONE_FOR_ALL. Include FCoE segmentation and VLAN-challedged flags in computation. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 35 +++++++++++------------------------ 1 file changed, 11 insertions(+), 24 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3bbb4c2..7db99b5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6164,33 +6164,20 @@ static int dev_cpu_callback(struct notifier_block *nfb, */ u32 netdev_increment_features(u32 all, u32 one, u32 mask) { - /* If device needs checksumming, downgrade to it. */ - if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) - all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM); - else if (mask & NETIF_F_ALL_CSUM) { - /* If one device supports v4/v6 checksumming, set for all. */ - if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) && - !(all & NETIF_F_GEN_CSUM)) { - all &= ~NETIF_F_ALL_CSUM; - all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); - } - - /* If one device supports hw checksumming, set for all. */ - if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) { - all &= ~NETIF_F_ALL_CSUM; - all |= NETIF_F_HW_CSUM; - } - } + if (mask & NETIF_F_GEN_CSUM) + mask |= NETIF_F_ALL_CSUM; + mask |= NETIF_F_VLAN_CHALLENGED; - /* If device can't no cache copy, don't do for all */ - if (!(one & NETIF_F_NOCACHE_COPY)) - all &= ~NETIF_F_NOCACHE_COPY; + all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; + all &= one | ~NETIF_F_ALL_FOR_ALL; - one |= NETIF_F_ALL_CSUM; + /* If device needs checksumming, downgrade to it. */ + if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM)) + all &= ~NETIF_F_NO_CSUM; - one |= all & NETIF_F_ONE_FOR_ALL; - all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO; - all |= one & mask & NETIF_F_ONE_FOR_ALL; + /* If one device supports hw checksumming, set for all. */ + if (all & NETIF_F_GEN_CSUM) + all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); return all; } -- cgit v1.1 From 8ae6daca85c8bbd6a32c382db5e2a2a989f8bed2 Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Wed, 27 Apr 2011 18:32:38 +0000 Subject: ethtool: Call ethtool's get/set_settings callbacks with cleaned data This makes sure that when a driver calls the ethtool's get/set_settings() callback of another driver, the data passed to it is clean. This guarantees that speed_hi will be zeroed correctly if the called callback doesn't explicitely set it: we are sure we don't get a corrupted speed from the underlying driver. We also take care of setting the cmd field appropriately (ETHTOOL_GSET/SSET). This applies to dev_ethtool_get_settings(), which now makes sure it sets up that ethtool command parameter correctly before passing it to drivers. This also means that whoever calls dev_ethtool_get_settings() does not have to clean the ethtool command parameter. This function also becomes an exported symbol instead of an inline. All drivers visible to make allyesconfig under x86_64 have been updated. Signed-off-by: David Decotigny Signed-off-by: David S. Miller --- net/core/dev.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 7db99b5..e95dc30 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4496,6 +4496,30 @@ void dev_set_rx_mode(struct net_device *dev) } /** + * dev_ethtool_get_settings - call device's ethtool_ops::get_settings() + * @dev: device + * @cmd: memory area for ethtool_ops::get_settings() result + * + * The cmd arg is initialized properly (cleared and + * ethtool_cmd::cmd field set to ETHTOOL_GSET). + * + * Return device's ethtool_ops::get_settings() result value or + * -EOPNOTSUPP when device doesn't expose + * ethtool_ops::get_settings() operation. + */ +int dev_ethtool_get_settings(struct net_device *dev, + struct ethtool_cmd *cmd) +{ + if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) + return -EOPNOTSUPP; + + memset(cmd, 0, sizeof(struct ethtool_cmd)); + cmd->cmd = ETHTOOL_GSET; + return dev->ethtool_ops->get_settings(dev, cmd); +} +EXPORT_SYMBOL(dev_ethtool_get_settings); + +/** * dev_get_flags - get flags reported to userspace * @dev: device * -- cgit v1.1 From 1c5cae815d19ffe02bdfda1260949ef2b1806171 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 30 Apr 2011 01:21:32 +0000 Subject: net: call dev_alloc_name from register_netdevice Force dev_alloc_name() to be called from register_netdevice() by dev_get_valid_name(). That allows to remove multiple explicit dev_alloc_name() calls. The possibility to call dev_alloc_name in advance remains. This also fixes veth creation regresion caused by 84c49d8c3e4abefb0a41a77b25aa37ebe8d6b743 Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index e95dc30..3b79bad 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -948,7 +948,7 @@ int dev_alloc_name(struct net_device *dev, const char *name) } EXPORT_SYMBOL(dev_alloc_name); -static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt) +static int dev_get_valid_name(struct net_device *dev, const char *name) { struct net *net; @@ -958,7 +958,7 @@ static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt if (!dev_valid_name(name)) return -EINVAL; - if (fmt && strchr(name, '%')) + if (strchr(name, '%')) return dev_alloc_name(dev, name); else if (__dev_get_by_name(net, name)) return -EEXIST; @@ -995,7 +995,7 @@ int dev_change_name(struct net_device *dev, const char *newname) memcpy(oldname, dev->name, IFNAMSIZ); - err = dev_get_valid_name(dev, newname, 1); + err = dev_get_valid_name(dev, newname); if (err < 0) return err; @@ -5420,8 +5420,8 @@ int register_netdevice(struct net_device *dev) } } - ret = dev_get_valid_name(dev, dev->name, 0); - if (ret) + ret = dev_get_valid_name(dev, dev->name); + if (ret < 0) goto err_uninit; dev->ifindex = dev_new_index(net); @@ -5562,19 +5562,7 @@ int register_netdev(struct net_device *dev) int err; rtnl_lock(); - - /* - * If the name is a format string the caller wants us to do a - * name allocation. - */ - if (strchr(dev->name, '%')) { - err = dev_alloc_name(dev, dev->name); - if (err < 0) - goto out; - } - err = register_netdevice(dev); -out: rtnl_unlock(); return err; } @@ -6056,7 +6044,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* We get here if we can't use the current device name */ if (!pat) goto out; - if (dev_get_valid_name(dev, pat, 1)) + if (dev_get_valid_name(dev, pat) < 0) goto out; } -- cgit v1.1 From afe12cc86b0ba545a01ad8716539ab07ab6e9e89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sat, 7 May 2011 03:22:17 +0000 Subject: net: introduce netdev_change_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It will be needed by bonding and other drivers changing vlan_features after ndo_init callback. As a bonus, this includes kernel-doc for netdev_update_features(). Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 75898a3..ea23353 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5289,6 +5289,14 @@ int __netdev_update_features(struct net_device *dev) return 1; } +/** + * netdev_update_features - recalculate device features + * @dev: the device to check + * + * Recalculate dev->features set and send notifications if it + * has changed. Should be called after driver or hardware dependent + * conditions might have changed that influence the features. + */ void netdev_update_features(struct net_device *dev) { if (__netdev_update_features(dev)) @@ -5297,6 +5305,23 @@ void netdev_update_features(struct net_device *dev) EXPORT_SYMBOL(netdev_update_features); /** + * netdev_change_features - recalculate device features + * @dev: the device to check + * + * Recalculate dev->features set and send notifications even + * if they have not changed. Should be called instead of + * netdev_update_features() if also dev->vlan_features might + * have changed to allow the changes to be propagated to stacked + * VLAN devices. + */ +void netdev_change_features(struct net_device *dev) +{ + __netdev_update_features(dev); + netdev_features_change(dev); +} +EXPORT_SYMBOL(netdev_change_features); + +/** * netif_stacked_transfer_operstate - transfer operstate * @rootdev: the root or lower level device to transfer state from * @dev: the device to transfer operstate to -- cgit v1.1 From 0696c3a8acd3b7c3186dd231d65d97e05a75189f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Pan=28=E6=BD=98=E5=8D=AB=E5=B9=B3=29?= Date: Thu, 12 May 2011 15:46:56 +0000 Subject: net:set valid name before calling ndo_init() In commit 1c5cae815d19 (net: call dev_alloc_name from register_netdevice), a bug of bonding was involved, see example 1 and 2. In register_netdevice(), the name of net_device is not valid until dev_get_valid_name() is called. But dev->netdev_ops->ndo_init(that is bond_init) is called before dev_get_valid_name(), and it uses the invalid name of net_device. I think register_netdevice() should make sure that the name of net_device is valid before calling ndo_init(). example 1: modprobe bonding ls /proc/net/bonding/bond%d ps -eLf root 3398 2 3398 0 1 21:34 ? 00:00:00 [bond%d] example 2: modprobe bonding max_bonds=3 [ 170.100292] bonding: Ethernet Channel Bonding Driver: v3.7.1 (April 27, 2011) [ 170.101090] bonding: Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details. [ 170.102469] ------------[ cut here ]------------ [ 170.103150] WARNING: at /home/pwp/net-next-2.6/fs/proc/generic.c:586 proc_register+0x126/0x157() [ 170.104075] Hardware name: VirtualBox [ 170.105065] proc_dir_entry 'bonding/bond%d' already registered [ 170.105613] Modules linked in: bonding(+) sunrpc ipv6 uinput microcode ppdev parport_pc parport joydev e1000 pcspkr i2c_piix4 i2c_core [last unloaded: bonding] [ 170.108397] Pid: 3457, comm: modprobe Not tainted 2.6.39-rc2+ #14 [ 170.108935] Call Trace: [ 170.109382] [] warn_slowpath_common+0x6a/0x7f [ 170.109911] [] ? proc_register+0x126/0x157 [ 170.110329] [] warn_slowpath_fmt+0x2b/0x2f [ 170.110846] [] proc_register+0x126/0x157 [ 170.111870] [] proc_create_data+0x82/0x98 [ 170.112335] [] bond_create_proc_entry+0x3f/0x73 [bonding] [ 170.112905] [] bond_init+0x77/0xa5 [bonding] [ 170.113319] [] register_netdevice+0x8c/0x1d3 [ 170.113848] [] bond_create+0x6c/0x90 [bonding] [ 170.114322] [] bonding_init+0x763/0x7b1 [bonding] [ 170.114879] [] do_one_initcall+0x76/0x122 [ 170.115317] [] ? 0xf94f3fff [ 170.115799] [] sys_init_module+0x1286/0x140d [ 170.116879] [] sysenter_do_call+0x12/0x28 [ 170.117404] ---[ end trace 64e4fac3ae5fff1a ]--- [ 170.117924] bond%d: Warning: failed to register to debugfs [ 170.128728] ------------[ cut here ]------------ [ 170.129360] WARNING: at /home/pwp/net-next-2.6/fs/proc/generic.c:586 proc_register+0x126/0x157() [ 170.130323] Hardware name: VirtualBox [ 170.130797] proc_dir_entry 'bonding/bond%d' already registered [ 170.131315] Modules linked in: bonding(+) sunrpc ipv6 uinput microcode ppdev parport_pc parport joydev e1000 pcspkr i2c_piix4 i2c_core [last unloaded: bonding] [ 170.133731] Pid: 3457, comm: modprobe Tainted: G W 2.6.39-rc2+ #14 [ 170.134308] Call Trace: [ 170.134743] [] warn_slowpath_common+0x6a/0x7f [ 170.135305] [] ? proc_register+0x126/0x157 [ 170.135820] [] warn_slowpath_fmt+0x2b/0x2f [ 170.137168] [] proc_register+0x126/0x157 [ 170.137700] [] proc_create_data+0x82/0x98 [ 170.138174] [] bond_create_proc_entry+0x3f/0x73 [bonding] [ 170.138745] [] bond_init+0x77/0xa5 [bonding] [ 170.139278] [] register_netdevice+0x8c/0x1d3 [ 170.139828] [] bond_create+0x6c/0x90 [bonding] [ 170.140361] [] bonding_init+0x763/0x7b1 [bonding] [ 170.140927] [] do_one_initcall+0x76/0x122 [ 170.141494] [] ? 0xf94f3fff [ 170.141975] [] sys_init_module+0x1286/0x140d [ 170.142463] [] sysenter_do_call+0x12/0x28 [ 170.142974] ---[ end trace 64e4fac3ae5fff1b ]--- [ 170.144949] bond%d: Warning: failed to register to debugfs Signed-off-by: Weiping Pan Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index ea23353..3ed09f8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5437,6 +5437,10 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; + ret = dev_get_valid_name(dev, dev->name); + if (ret < 0) + goto out; + /* Init, if this function is available */ if (dev->netdev_ops->ndo_init) { ret = dev->netdev_ops->ndo_init(dev); @@ -5447,10 +5451,6 @@ int register_netdevice(struct net_device *dev) } } - ret = dev_get_valid_name(dev, dev->name); - if (ret < 0) - goto err_uninit; - dev->ifindex = dev_new_index(net); if (dev->iflink == -1) dev->iflink = dev->ifindex; -- cgit v1.1 From 372b2312010bece1e36f577d6c99a6193ec54cbd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 May 2011 13:56:59 -0400 Subject: net: use hlist_del_rcu() in dev_change_name() Using plain hlist_del() in dev_change_name() is wrong since a concurrent reader can crash trying to dereference LIST_POISON1. Bug introduced in commit 72c9528bab94 (net: Introduce dev_get_by_name_rcu()) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index b624fe4..30a4078 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1007,7 +1007,7 @@ rollback: } write_lock_bh(&dev_base_lock); - hlist_del(&dev->name_hlist); + hlist_del_rcu(&dev->name_hlist); write_unlock_bh(&dev_base_lock); synchronize_rcu(); -- cgit v1.1 From 604ae14ffb6d75d6eef4757859226b758d6bf9e3 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 16 May 2011 10:37:39 +0000 Subject: net: Change netdev_fix_features messages loglevel Cool, how about we make 'Features changed' debug as well? This way userspace can't fill up the log just by tweaking tun features with an ioctl. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 30a4078..acd7423 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5258,7 +5258,7 @@ void netdev_update_features(struct net_device *dev) if (dev->features == features) return; - netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n", + netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n", dev->features, features); if (dev->netdev_ops->ndo_set_features) -- cgit v1.1 From 449f4544267e73d5db372971da63634707c32299 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 May 2011 12:24:16 +0000 Subject: macvlan: remove one synchronize_rcu() call When one macvlan device is dismantled, we can avoid one synchronize_rcu() call done after deletion from hash list, since caller will perform a synchronize_net() call after its ndo_stop() call. Add a new netdev->dismantle field to signal this dismantle intent. Reduces RTNL hold time. Signed-off-by: Eric Dumazet CC: Patrick McHardy CC: Ben Greear Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 155de20..d945379 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5126,7 +5126,7 @@ static void rollback_registered_many(struct list_head *head) list_del(&dev->unreg_list); continue; } - + dev->dismantle = true; BUG_ON(dev->reg_state != NETREG_REGISTERED); } -- cgit v1.1