From 8bc8aecdc5e26cfda12dbd6867af4aa67836da6a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 21 Mar 2011 20:01:00 +0100 Subject: mac80211: initialize sta->last_rx in sta_info_alloc This field is used to determine the inactivity time. When in AP mode, hostapd uses it for kicking out inactive clients after a while. Without this patch, hostapd immediately deauthenticates a new client if it checks the inactivity time before the client sends its first data frame. Signed-off-by: Felix Fietkau Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/sta_info.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 5a11078..d0311a3 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -243,6 +243,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, memcpy(sta->sta.addr, addr, ETH_ALEN); sta->local = local; sta->sdata = sdata; + sta->last_rx = jiffies; ewma_init(&sta->avg_signal, 1024, 8); -- cgit v1.1 From 27660515a21bf913e3208ded3f27abd0529fae0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 18 Mar 2011 16:56:34 +0000 Subject: net: implement dev_disable_lro() hw_features compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement compatibility with new hw_features for dev_disable_lro(). This is a transition path - dev_disable_lro() should be later integrated into netdev_fix_features() after all drivers are converted. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 19 +++++++++++-------- net/core/ethtool.c | 2 +- 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 0b88eba..f453370 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1353,14 +1353,17 @@ EXPORT_SYMBOL(dev_close); */ void dev_disable_lro(struct net_device *dev) { - if (dev->ethtool_ops && dev->ethtool_ops->get_flags && - dev->ethtool_ops->set_flags) { - u32 flags = dev->ethtool_ops->get_flags(dev); - if (flags & ETH_FLAG_LRO) { - flags &= ~ETH_FLAG_LRO; - dev->ethtool_ops->set_flags(dev, flags); - } - } + u32 flags; + + if (dev->ethtool_ops && dev->ethtool_ops->get_flags) + flags = dev->ethtool_ops->get_flags(dev); + else + flags = ethtool_op_get_flags(dev); + + if (!(flags & ETH_FLAG_LRO)) + return; + + __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO); WARN_ON(dev->features & NETIF_F_LRO); } EXPORT_SYMBOL(dev_disable_lro); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index a1086fb..24bd574 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -513,7 +513,7 @@ static int ethtool_set_one_feature(struct net_device *dev, } } -static int __ethtool_set_flags(struct net_device *dev, u32 data) +int __ethtool_set_flags(struct net_device *dev, u32 data) { u32 changed; -- cgit v1.1 From 74cb3c108bc0f599a4eb40980db8580cfba725c9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 19 Mar 2011 12:13:46 +0000 Subject: ipv4: match prefsrc when deleting routes fib_table_delete forgets to match the routes by prefsrc. Callers can specify known IP in fc_prefsrc and we should remove the exact route. This is needed for cases when same local or broadcast addresses are used in different subnets and the routes differ only in prefsrc. All callers that do not provide fc_prefsrc will ignore the route prefsrc as before and will delete the first occurence. That is how the ip route del default magic works. Current callers are: - ip_rt_ioctl where rtentry_to_fib_config provides fc_prefsrc only when the provided device name matches IP label with colon. - inet_rtm_delroute where RTA_PREFSRC is optional too - fib_magic which deals with routes when deleting addresses and where the fc_prefsrc is always set with the primary IP for the concerned IFA. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 3d28a35..ac87a49 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1665,6 +1665,8 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && (cfg->fc_scope == RT_SCOPE_NOWHERE || fa->fa_scope == cfg->fc_scope) && + (!cfg->fc_prefsrc || + fi->fib_prefsrc == cfg->fc_prefsrc) && (!cfg->fc_protocol || fi->fib_protocol == cfg->fc_protocol) && fib_nh_match(cfg, fi) == 0) { -- cgit v1.1 From e6abbaa2725a43cf5d26c4c2a5dc6c0f6029ea19 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 19 Mar 2011 12:13:49 +0000 Subject: ipv4: fix route deletion for IPs on many subnets Alex Sidorenko reported for problems with local routes left after IP addresses are deleted. It happens when same IPs are used in more than one subnet for the device. Fix fib_del_ifaddr to restrict the checks for duplicate local and broadcast addresses only to the IFAs that use our primary IFA or another primary IFA with same address. And we expect the prefsrc to be matched when the routes are deleted because it is possible they to differ only by prefsrc. This patch prevents local and broadcast routes to be leaked until their primary IP is deleted finally from the box. As the secondary address promotion needs to delete the routes for all secondaries that used the old primary IFA, add option to ignore these secondaries from the checks and to assume they are already deleted, so that we can safely delete the route while these IFAs are still on the device list. Reported-by: Alex Sidorenko Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 101 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 88 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index a373a25..02c3ba6 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -722,12 +722,17 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) } } -static void fib_del_ifaddr(struct in_ifaddr *ifa) +/* Delete primary or secondary address. + * Optionally, on secondary address promotion consider the addresses + * from subnet iprim as deleted, even if they are in device list. + * In this case the secondary ifa can be in device list. + */ +void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) { struct in_device *in_dev = ifa->ifa_dev; struct net_device *dev = in_dev->dev; struct in_ifaddr *ifa1; - struct in_ifaddr *prim = ifa; + struct in_ifaddr *prim = ifa, *prim1 = NULL; __be32 brd = ifa->ifa_address | ~ifa->ifa_mask; __be32 any = ifa->ifa_address & ifa->ifa_mask; #define LOCAL_OK 1 @@ -735,17 +740,26 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) #define BRD0_OK 4 #define BRD1_OK 8 unsigned ok = 0; + int subnet = 0; /* Primary network */ + int gone = 1; /* Address is missing */ + int same_prefsrc = 0; /* Another primary with same IP */ - if (!(ifa->ifa_flags & IFA_F_SECONDARY)) - fib_magic(RTM_DELROUTE, - dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, - any, ifa->ifa_prefixlen, prim); - else { + if (ifa->ifa_flags & IFA_F_SECONDARY) { prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); if (prim == NULL) { printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); return; } + if (iprim && iprim != prim) { + printk(KERN_WARNING "fib_del_ifaddr: bug: iprim != prim\n"); + return; + } + } else if (!ipv4_is_zeronet(any) && + (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) { + fib_magic(RTM_DELROUTE, + dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, + any, ifa->ifa_prefixlen, prim); + subnet = 1; } /* Deletion is more complicated than add. @@ -755,6 +769,49 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) */ for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { + if (ifa1 == ifa) { + /* promotion, keep the IP */ + gone = 0; + continue; + } + /* Ignore IFAs from our subnet */ + if (iprim && ifa1->ifa_mask == iprim->ifa_mask && + inet_ifa_match(ifa1->ifa_address, iprim)) + continue; + + /* Ignore ifa1 if it uses different primary IP (prefsrc) */ + if (ifa1->ifa_flags & IFA_F_SECONDARY) { + /* Another address from our subnet? */ + if (ifa1->ifa_mask == prim->ifa_mask && + inet_ifa_match(ifa1->ifa_address, prim)) + prim1 = prim; + else { + /* We reached the secondaries, so + * same_prefsrc should be determined. + */ + if (!same_prefsrc) + continue; + /* Search new prim1 if ifa1 is not + * using the current prim1 + */ + if (!prim1 || + ifa1->ifa_mask != prim1->ifa_mask || + !inet_ifa_match(ifa1->ifa_address, prim1)) + prim1 = inet_ifa_byprefix(in_dev, + ifa1->ifa_address, + ifa1->ifa_mask); + if (!prim1) + continue; + if (prim1->ifa_local != prim->ifa_local) + continue; + } + } else { + if (prim->ifa_local != ifa1->ifa_local) + continue; + prim1 = ifa1; + if (prim != prim1) + same_prefsrc = 1; + } if (ifa->ifa_local == ifa1->ifa_local) ok |= LOCAL_OK; if (ifa->ifa_broadcast == ifa1->ifa_broadcast) @@ -763,19 +820,37 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) ok |= BRD1_OK; if (any == ifa1->ifa_broadcast) ok |= BRD0_OK; + /* primary has network specific broadcasts */ + if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) { + __be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask; + __be32 any1 = ifa1->ifa_address & ifa1->ifa_mask; + + if (!ipv4_is_zeronet(any1)) { + if (ifa->ifa_broadcast == brd1 || + ifa->ifa_broadcast == any1) + ok |= BRD_OK; + if (brd == brd1 || brd == any1) + ok |= BRD1_OK; + if (any == brd1 || any == any1) + ok |= BRD0_OK; + } + } } if (!(ok & BRD_OK)) fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); - if (!(ok & BRD1_OK)) - fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); - if (!(ok & BRD0_OK)) - fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); + if (subnet && ifa->ifa_prefixlen < 31) { + if (!(ok & BRD1_OK)) + fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); + if (!(ok & BRD0_OK)) + fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); + } if (!(ok & LOCAL_OK)) { fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); /* Check, that this local address finally disappeared. */ - if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { + if (gone && + inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { /* And the last, but not the least thing. * We must flush stray FIB entries. * @@ -896,7 +971,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: - fib_del_ifaddr(ifa); + fib_del_ifaddr(ifa, NULL); fib_update_nh_saddrs(dev); if (ifa->ifa_dev->ifa_list == NULL) { /* Last address was deleted from this interface. -- cgit v1.1 From 2d230e2b2c3111cf4a11619f60dcd158ae84e3ab Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 19 Mar 2011 12:13:52 +0000 Subject: ipv4: remove the routes on secondary promotion The secondary address promotion relies on fib_sync_down_addr to remove all routes created for the secondary addresses when the old primary address is deleted. It does not happen for cases when the primary address is also in another subnet. Fix that by deleting local and broadcast routes for all secondaries while they are on device list and by faking that all addresses from this subnet are to be deleted. It relies on fib_del_ifaddr being able to ignore the IPs from the concerned subnet while checking for duplication. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6d85800d..2523001 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -345,6 +345,17 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } } + /* On promotion all secondaries from subnet are changing + * the primary IP, we must remove all their routes silently + * and later to add them back with new prefsrc. Do this + * while all addresses are on the device list. + */ + for (ifa = promote; ifa; ifa = ifa->ifa_next) { + if (ifa1->ifa_mask == ifa->ifa_mask && + inet_ifa_match(ifa1->ifa_address, ifa)) + fib_del_ifaddr(ifa, ifa1); + } + /* 2. Unlink it */ *ifap = ifa1->ifa_next; -- cgit v1.1 From 04024b937a6e9b7d4320b5853557cea3930d528c Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 19 Mar 2011 12:13:54 +0000 Subject: ipv4: optimize route adding on secondary promotion Optimize the calling of fib_add_ifaddr for all secondary addresses after the promoted one to start from their place, not from the new place of the promoted secondary. It will save some CPU cycles because we are sure the promoted secondary was first for the subnet and all next secondaries do not change their place. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 2523001..d5a4553 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -375,6 +375,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); if (promote) { + struct in_ifaddr *next_sec = promote->ifa_next; if (prev_prom) { prev_prom->ifa_next = promote->ifa_next; @@ -386,7 +387,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote); - for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) { + for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { if (ifa1->ifa_mask != ifa->ifa_mask || !inet_ifa_match(ifa1->ifa_address, ifa)) continue; -- cgit v1.1 From 9c7a4f9ce651383c73dfdff3d7e21d5f9572c4ec Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Mar 2011 19:17:36 -0700 Subject: ipv6: ip6_route_output does not modify sk parameter, so make it const This avoids explicit cast to avoid 'discards qualifiers' compiler warning in a netfilter patch that i've been working on. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6814c87..843406f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -854,7 +854,7 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); } -struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, +struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, struct flowi6 *fl6) { int flags = 0; -- cgit v1.1 From a7bff75b087e7a355838a32efe61707cfa73c194 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 22 Mar 2011 11:40:32 +0000 Subject: bridge: Fix possibly wrong MLD queries' ethernet source address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ipv6_dev_get_saddr() is currently called with an uninitialized destination address. Although in tests it usually seemed to nevertheless always fetch the right source address, there seems to be a possible race condition. Therefore this commit changes this, first setting the destination address and only after that fetching the source address. Reported-by: Jan Beulich Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 030a002..f61eb2e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -445,9 +445,9 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, ip6h->payload_len = htons(8 + sizeof(*mldq)); ip6h->nexthdr = IPPROTO_HOPOPTS; ip6h->hop_limit = 1; + ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, &ip6h->saddr); - ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); -- cgit v1.1 From 67d4120a1793138bc9f4a6eb61d0fc5298ed97e0 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 14 Mar 2011 10:57:03 +0000 Subject: tcp: avoid cwnd moderation in undo In the current undo logic, cwnd is moderated after it was restored to the value prior entering fast-recovery. It was moderated first in tcp_try_undo_recovery then again in tcp_complete_cwr. Since the undo indicates recovery was false, these moderations are not necessary. If the undo is triggered when most of the outstanding data have been acknowledged, the (restored) cwnd is falsely pulled down to a small value. This patch removes these cwnd moderations if cwnd is undone a) during fast-recovery b) by receiving DSACKs past fast-recovery Signed-off-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index da782e7..e72af7e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2659,7 +2659,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) #define DBGUNDO(x...) do { } while (0) #endif -static void tcp_undo_cwr(struct sock *sk, const int undo) +static void tcp_undo_cwr(struct sock *sk, const int undo_ssthresh) { struct tcp_sock *tp = tcp_sk(sk); @@ -2671,14 +2671,13 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) else tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); - if (undo && tp->prior_ssthresh > tp->snd_ssthresh) { + if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; TCP_ECN_withdraw_cwr(tp); } } else { tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); } - tcp_moderate_cwnd(tp); tp->snd_cwnd_stamp = tcp_time_stamp; } @@ -2822,8 +2821,11 @@ static int tcp_try_undo_loss(struct sock *sk) static inline void tcp_complete_cwr(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); - tp->snd_cwnd_stamp = tcp_time_stamp; + /* Do not moderate cwnd if it's already undone in cwr or recovery */ + if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { + tp->snd_cwnd = tp->snd_ssthresh; + tp->snd_cwnd_stamp = tcp_time_stamp; + } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } -- cgit v1.1 From f6152737a95bd6c22f0c664b20831aefd48085a8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Mar 2011 19:37:11 -0700 Subject: tcp: Make undo_ssthresh arg to tcp_undo_cwr() a bool. Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e72af7e..bef9f04 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2659,7 +2659,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) #define DBGUNDO(x...) do { } while (0) #endif -static void tcp_undo_cwr(struct sock *sk, const int undo_ssthresh) +static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) { struct tcp_sock *tp = tcp_sk(sk); @@ -2698,7 +2698,7 @@ static int tcp_try_undo_recovery(struct sock *sk) * or our original transmission succeeded. */ DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); - tcp_undo_cwr(sk, 1); + tcp_undo_cwr(sk, true); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) mib_idx = LINUX_MIB_TCPLOSSUNDO; else @@ -2725,7 +2725,7 @@ static void tcp_try_undo_dsack(struct sock *sk) if (tp->undo_marker && !tp->undo_retrans) { DBGUNDO(sk, "D-SACK"); - tcp_undo_cwr(sk, 1); + tcp_undo_cwr(sk, true); tp->undo_marker = 0; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); } @@ -2778,7 +2778,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); DBGUNDO(sk, "Hoe"); - tcp_undo_cwr(sk, 0); + tcp_undo_cwr(sk, false); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); /* So... Do not make Hoe's retransmit yet. @@ -2807,7 +2807,7 @@ static int tcp_try_undo_loss(struct sock *sk) DBGUNDO(sk, "partial loss"); tp->lost_out = 0; - tcp_undo_cwr(sk, 1); + tcp_undo_cwr(sk, true); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; @@ -3496,7 +3496,7 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag) if (flag & FLAG_ECE) tcp_ratehalving_spur_to_response(sk); else - tcp_undo_cwr(sk, 1); + tcp_undo_cwr(sk, true); } /* F-RTO spurious RTO detection algorithm (RFC4138) -- cgit v1.1 From 406b6f974dae76a5b795d5c251d11c979a4e509b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Mar 2011 21:56:23 -0700 Subject: ipv4: Fallback to FIB local table in __ip_dev_find(). In commit 9435eb1cf0b76b323019cebf8d16762a50a12a19 ("ipv4: Implement __ip_dev_find using new interface address hash.") we reimplemented __ip_dev_find() so that it doesn't have to do a full FIB table lookup. Instead, it consults a hash table of addresses configured to interfaces. This works identically to the old code in all except one case, and that is for loopback subnets. The old code would match the loopback device for any IP address that falls within a subnet configured to the loopback device. Handle this corner case by doing the FIB lookup. We could implement this via inet_addr_onlink() but: 1) Someone could configure many addresses to loopback and inet_addr_onlink() is a simple list traversal. 2) We know the old code works. Reported-by: Julian Anastasov Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index d5a4553..5345b0b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -64,6 +64,8 @@ #include #include +#include "fib_lookup.h" + static struct ipv4_devconf ipv4_devconf = { .data = { [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, @@ -151,6 +153,20 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) break; } } + if (!result) { + struct flowi4 fl4 = { .daddr = addr }; + struct fib_result res = { 0 }; + struct fib_table *local; + + /* Fallback to FIB local table so that communication + * over loopback subnets work. + */ + local = fib_get_table(net, RT_TABLE_LOCAL); + if (local && + !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) && + res.type == RTN_LOCAL) + result = FIB_RES_DEV(res); + } if (result && devref) dev_hold(result); rcu_read_unlock(); -- cgit v1.1 From eb49a97363f020c1d7eef8bcd93865726b1fa11d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Mar 2011 12:18:15 -0700 Subject: ipv4: fix ip_rt_update_pmtu() commit 2c8cec5c10bc (Cache learned PMTU information in inetpeer) added an extra inet_putpeer() call in ip_rt_update_pmtu(). This results in various problems, since we can free one inetpeer, while it is still in use. Ref: http://www.spinics.net/lists/netdev/msg159121.html Reported-by: Alexander Beregalov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 870b518..34921b0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1593,8 +1593,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) rt->rt_peer_genid = rt_peer_genid(); } check_peer_pmtu(dst, peer); - - inet_putpeer(peer); } } -- cgit v1.1 From fcd13f42c9d6ab7b1024b9b7125a2e8db3cc00b2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 24 Mar 2011 07:01:24 +0000 Subject: ipv4: fix fib metrics Alessandro Suardi reported that we could not change route metrics : ip ro change default .... advmss 1400 This regression came with commit 9c150e82ac50 (Allocate fib metrics dynamically). fib_metrics is no longer an array, but a pointer to an array. Reported-by: Alessandro Suardi Signed-off-by: Eric Dumazet Tested-by: Alessandro Suardi Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 622ac4c..75b9fb5 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -251,7 +251,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) nfi->fib_prefsrc == fi->fib_prefsrc && nfi->fib_priority == fi->fib_priority && memcmp(nfi->fib_metrics, fi->fib_metrics, - sizeof(fi->fib_metrics)) == 0 && + sizeof(u32) * RTAX_MAX) == 0 && ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 && (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) return fi; -- cgit v1.1 From 436c3b66ec9824a633724ae42de1c416af4f2063 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Mar 2011 17:42:21 -0700 Subject: ipv4: Invalidate nexthop cache nh_saddr more correctly. Any operation that: 1) Brings up an interface 2) Adds an IP address to an interface 3) Deletes an IP address from an interface can potentially invalidate the nh_saddr value, requiring it to be recomputed. Perform the recomputation lazily using a generation ID. Reported-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 11 +++++++---- net/ipv4/fib_semantics.c | 32 +++++++++++--------------------- net/ipv4/route.c | 6 ++++-- 3 files changed, 22 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 02c3ba6..f116ce8f 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -228,7 +228,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, if (res.type != RTN_LOCAL || !accept_local) goto e_inval; } - *spec_dst = FIB_RES_PREFSRC(res); + *spec_dst = FIB_RES_PREFSRC(net, res); fib_combine_itag(itag, &res); dev_match = false; @@ -258,7 +258,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, ret = 0; if (fib_lookup(net, &fl4, &res) == 0) { if (res.type == RTN_UNICAST) { - *spec_dst = FIB_RES_PREFSRC(res); + *spec_dst = FIB_RES_PREFSRC(net, res); ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; } } @@ -960,6 +960,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, { struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct net_device *dev = ifa->ifa_dev->dev; + struct net *net = dev_net(dev); switch (event) { case NETDEV_UP: @@ -967,12 +968,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, #ifdef CONFIG_IP_ROUTE_MULTIPATH fib_sync_up(dev); #endif - fib_update_nh_saddrs(dev); + atomic_inc(&net->ipv4.dev_addr_genid); rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: fib_del_ifaddr(ifa, NULL); - fib_update_nh_saddrs(dev); + atomic_inc(&net->ipv4.dev_addr_genid); if (ifa->ifa_dev->ifa_list == NULL) { /* Last address was deleted from this interface. * Disable IP. @@ -990,6 +991,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo { struct net_device *dev = ptr; struct in_device *in_dev = __in_dev_get_rtnl(dev); + struct net *net = dev_net(dev); if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2, -1); @@ -1007,6 +1009,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo #ifdef CONFIG_IP_ROUTE_MULTIPATH fib_sync_up(dev); #endif + atomic_inc(&net->ipv4.dev_addr_genid); rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 75b9fb5..2d4bebc 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -695,6 +695,16 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, fib_info_hash_free(old_laddrhash, bytes); } +__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) +{ + nh->nh_saddr = inet_select_addr(nh->nh_dev, + nh->nh_gw, + nh->nh_cfg_scope); + nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); + + return nh->nh_saddr; +} + struct fib_info *fib_create_info(struct fib_config *cfg) { int err; @@ -855,9 +865,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) change_nexthops(fi) { nexthop_nh->nh_cfg_scope = cfg->fc_scope; - nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev, - nexthop_nh->nh_gw, - nexthop_nh->nh_cfg_scope); + fib_info_update_nh_saddr(net, nexthop_nh); } endfor_nexthops(fi) link_it: @@ -1128,24 +1136,6 @@ out: return; } -void fib_update_nh_saddrs(struct net_device *dev) -{ - struct hlist_head *head; - struct hlist_node *node; - struct fib_nh *nh; - unsigned int hash; - - hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; - hlist_for_each_entry(nh, node, head, nh_hash) { - if (nh->nh_dev != dev) - continue; - nh->nh_saddr = inet_select_addr(nh->nh_dev, - nh->nh_gw, - nh->nh_cfg_scope); - } -} - #ifdef CONFIG_IP_ROUTE_MULTIPATH /* diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 34921b0..4b0c811 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1718,7 +1718,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) - src = FIB_RES_PREFSRC(res); + src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); else src = inet_select_addr(rt->dst.dev, rt->rt_gateway, RT_SCOPE_UNIVERSE); @@ -2615,7 +2615,7 @@ static struct rtable *ip_route_output_slow(struct net *net, fib_select_default(&res); if (!fl4.saddr) - fl4.saddr = FIB_RES_PREFSRC(res); + fl4.saddr = FIB_RES_PREFSRC(net, res); dev_out = FIB_RES_DEV(res); fl4.flowi4_oif = dev_out->ifindex; @@ -3219,6 +3219,8 @@ static __net_init int rt_genid_init(struct net *net) { get_random_bytes(&net->ipv4.rt_genid, sizeof(net->ipv4.rt_genid)); + get_random_bytes(&net->ipv4.dev_addr_genid, + sizeof(net->ipv4.dev_addr_genid)); return 0; } -- cgit v1.1 From 37e826c513883099c298317bad1b3b677b2905fb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Mar 2011 18:06:47 -0700 Subject: ipv4: Fix nexthop caching wrt. scoping. Move the scope value out of the fib alias entries and into fib_info, so that we always use the correct scope when recomputing the nexthop cached source address. Reported-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/fib_lookup.h | 3 +-- net/ipv4/fib_semantics.c | 15 ++++++++------- net/ipv4/fib_trie.c | 12 ++++-------- 3 files changed, 13 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 4ec3238..af0f14a 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -10,7 +10,6 @@ struct fib_alias { struct fib_info *fa_info; u8 fa_tos; u8 fa_type; - u8 fa_scope; u8 fa_state; struct rcu_head rcu; }; @@ -29,7 +28,7 @@ extern void fib_release_info(struct fib_info *); extern struct fib_info *fib_create_info(struct fib_config *cfg); extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u32 tb_id, u8 type, u8 scope, __be32 dst, + u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int); extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2d4bebc..641a5a2 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -222,7 +222,7 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi) unsigned int mask = (fib_info_hash_size - 1); unsigned int val = fi->fib_nhs; - val ^= fi->fib_protocol; + val ^= (fi->fib_protocol << 8) | fi->fib_scope; val ^= (__force u32)fi->fib_prefsrc; val ^= fi->fib_priority; for_nexthops(fi) { @@ -248,6 +248,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) if (fi->fib_nhs != nfi->fib_nhs) continue; if (nfi->fib_protocol == fi->fib_protocol && + nfi->fib_scope == fi->fib_scope && nfi->fib_prefsrc == fi->fib_prefsrc && nfi->fib_priority == fi->fib_priority && memcmp(nfi->fib_metrics, fi->fib_metrics, @@ -328,7 +329,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, goto errout; err = fib_dump_info(skb, info->pid, seq, event, tb_id, - fa->fa_type, fa->fa_scope, key, dst_len, + fa->fa_type, key, dst_len, fa->fa_tos, fa->fa_info, nlm_flags); if (err < 0) { /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ @@ -699,7 +700,7 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) { nh->nh_saddr = inet_select_addr(nh->nh_dev, nh->nh_gw, - nh->nh_cfg_scope); + nh->nh_parent->fib_scope); nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); return nh->nh_saddr; @@ -763,6 +764,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi->fib_net = hold_net(net); fi->fib_protocol = cfg->fc_protocol; + fi->fib_scope = cfg->fc_scope; fi->fib_flags = cfg->fc_flags; fi->fib_priority = cfg->fc_priority; fi->fib_prefsrc = cfg->fc_prefsrc; @@ -864,7 +866,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg) } change_nexthops(fi) { - nexthop_nh->nh_cfg_scope = cfg->fc_scope; fib_info_update_nh_saddr(net, nexthop_nh); } endfor_nexthops(fi) @@ -914,7 +915,7 @@ failure: } int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, + u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) { struct nlmsghdr *nlh; @@ -936,7 +937,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, NLA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_type = type; rtm->rtm_flags = fi->fib_flags; - rtm->rtm_scope = scope; + rtm->rtm_scope = fi->fib_scope; rtm->rtm_protocol = fi->fib_protocol; if (rtm->rtm_dst_len) @@ -1092,7 +1093,7 @@ void fib_select_default(struct fib_result *res) list_for_each_entry_rcu(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; - if (fa->fa_scope != res->scope || + if (next_fi->fib_scope != res->scope || fa->fa_type != RTN_UNICAST) continue; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ac87a49..90a3ff6 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1245,7 +1245,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) if (fa->fa_info->fib_priority != fi->fib_priority) break; if (fa->fa_type == cfg->fc_type && - fa->fa_scope == cfg->fc_scope && fa->fa_info == fi) { fa_match = fa; break; @@ -1271,7 +1270,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_tos = fa->fa_tos; new_fa->fa_info = fi; new_fa->fa_type = cfg->fc_type; - new_fa->fa_scope = cfg->fc_scope; state = fa->fa_state; new_fa->fa_state = state & ~FA_S_ACCESSED; @@ -1308,7 +1306,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_info = fi; new_fa->fa_tos = tos; new_fa->fa_type = cfg->fc_type; - new_fa->fa_scope = cfg->fc_scope; new_fa->fa_state = 0; /* * Insert new entry to the list. @@ -1362,7 +1359,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) continue; - if (fa->fa_scope < flp->flowi4_scope) + if (fa->fa_info->fib_scope < flp->flowi4_scope) continue; fib_alias_accessed(fa); err = fib_props[fa->fa_type].error; @@ -1388,7 +1385,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, res->prefixlen = plen; res->nh_sel = nhsel; res->type = fa->fa_type; - res->scope = fa->fa_scope; + res->scope = fa->fa_info->fib_scope; res->fi = fi; res->table = tb; res->fa_head = &li->falh; @@ -1664,7 +1661,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && (cfg->fc_scope == RT_SCOPE_NOWHERE || - fa->fa_scope == cfg->fc_scope) && + fa->fa_info->fib_scope == cfg->fc_scope) && (!cfg->fc_prefsrc || fi->fib_prefsrc == cfg->fc_prefsrc) && (!cfg->fc_protocol || @@ -1863,7 +1860,6 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, RTM_NEWROUTE, tb->tb_id, fa->fa_type, - fa->fa_scope, xkey, plen, fa->fa_tos, @@ -2384,7 +2380,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) seq_indent(seq, iter->depth+1); seq_printf(seq, " /%d %s %s", li->plen, rtn_scope(buf1, sizeof(buf1), - fa->fa_scope), + fa->fa_info->fib_scope), rtn_type(buf2, sizeof(buf2), fa->fa_type)); if (fa->fa_tos) -- cgit v1.1