From bafa6d9d89072c1a18853afe9ee5de05c491c13a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 7 Sep 2012 00:45:29 +0000 Subject: ipv4/route: arg delay is useless in rt_cache_flush() Since route cache deletion (89aef8921bfbac22f), delay is no more used. Remove it. Signed-off-by: Nicolas Dichtel Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/arp.c | 2 +- net/ipv4/devinet.c | 6 +++--- net/ipv4/fib_frontend.c | 20 ++++++++++---------- net/ipv4/fib_rules.c | 2 +- net/ipv4/fib_trie.c | 6 +++--- net/ipv4/route.c | 19 +++---------------- 6 files changed, 21 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 77e87af..4780045 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1225,7 +1225,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); - rt_cache_flush(dev_net(dev), 0); + rt_cache_flush(dev_net(dev)); break; default: break; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 44bf82e..9b55b6f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1503,7 +1503,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write, if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 || i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) if ((new_value == 0) && (old_value != 0)) - rt_cache_flush(net, 0); + rt_cache_flush(net); } return ret; @@ -1537,7 +1537,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, dev_disable_lro(idev->dev); } rtnl_unlock(); - rt_cache_flush(net, 0); + rt_cache_flush(net); } } @@ -1554,7 +1554,7 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write, struct net *net = ctl->extra2; if (write && *valp != val) - rt_cache_flush(net, 0); + rt_cache_flush(net); return ret; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c43ae3f..8e2b475 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -148,7 +148,7 @@ static void fib_flush(struct net *net) } if (flushed) - rt_cache_flush(net, -1); + rt_cache_flush(net); } /* @@ -999,11 +999,11 @@ static void nl_fib_lookup_exit(struct net *net) net->ipv4.fibnl = NULL; } -static void fib_disable_ip(struct net_device *dev, int force, int delay) +static void fib_disable_ip(struct net_device *dev, int force) { if (fib_sync_down_dev(dev, force)) fib_flush(dev_net(dev)); - rt_cache_flush(dev_net(dev), delay); + rt_cache_flush(dev_net(dev)); arp_ifdown(dev); } @@ -1020,7 +1020,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, fib_sync_up(dev); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(dev_net(dev)); break; case NETDEV_DOWN: fib_del_ifaddr(ifa, NULL); @@ -1029,9 +1029,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, /* Last address was deleted from this interface. * Disable IP. */ - fib_disable_ip(dev, 1, 0); + fib_disable_ip(dev, 1); } else { - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(dev_net(dev)); } break; } @@ -1045,7 +1045,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo struct net *net = dev_net(dev); if (event == NETDEV_UNREGISTER) { - fib_disable_ip(dev, 2, -1); + fib_disable_ip(dev, 2); rt_flush_dev(dev); return NOTIFY_DONE; } @@ -1062,14 +1062,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_sync_up(dev); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(dev_net(dev)); break; case NETDEV_DOWN: - fib_disable_ip(dev, 0, 0); + fib_disable_ip(dev, 0); break; case NETDEV_CHANGEMTU: case NETDEV_CHANGE: - rt_cache_flush(dev_net(dev), 0); + rt_cache_flush(dev_net(dev)); break; case NETDEV_UNREGISTER_BATCH: break; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index a83d74e..274309d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -259,7 +259,7 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) static void fib4_rule_flush_cache(struct fib_rules_ops *ops) { - rt_cache_flush(ops->fro_net, -1); + rt_cache_flush(ops->fro_net); } static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 57bd978..d1b9359 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1286,7 +1286,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); @@ -1333,7 +1333,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) list_add_tail_rcu(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head)); - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, 0); succeeded: @@ -1708,7 +1708,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) trie_leaf_remove(t, l); if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 82cf2a7..f6436d3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -461,11 +461,7 @@ static void rt_cache_invalidate(struct net *net) atomic_add(shuffle + 1U, &net->ipv4.rt_genid); } -/* - * delay < 0 : invalidate cache (fast : entries will be deleted later) - * delay >= 0 : invalidate & flush cache (can be long) - */ -void rt_cache_flush(struct net *net, int delay) +void rt_cache_flush(struct net *net) { rt_cache_invalidate(net); } @@ -2345,7 +2341,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) void ip_rt_multicast_event(struct in_device *in_dev) { - rt_cache_flush(dev_net(in_dev->dev), 0); + rt_cache_flush(dev_net(in_dev->dev)); } #ifdef CONFIG_SYSCTL @@ -2354,16 +2350,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, size_t *lenp, loff_t *ppos) { if (write) { - int flush_delay; - ctl_table ctl; - struct net *net; - - memcpy(&ctl, __ctl, sizeof(ctl)); - ctl.data = &flush_delay; - proc_dointvec(&ctl, write, buffer, lenp, ppos); - - net = (struct net *)__ctl->extra1; - rt_cache_flush(net, flush_delay); + rt_cache_flush((struct net *)__ctl->extra1); return 0; } -- cgit v1.1 From 2885da72966fcb89f48d554339d347fb02b5ea78 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Sep 2012 22:27:11 +0200 Subject: net: rt_cache_flush() cleanup We dont use jhash anymore since route cache removal, so we can get rid of get_random_bytes() calls for rt_genid changes. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f6436d3..be27cfa9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -447,23 +447,9 @@ static inline bool rt_is_expired(const struct rtable *rth) return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); } -/* - * Perturbation of rt_genid by a small quantity [1..256] - * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() - * many times (2^24) without giving recent rt_genid. - * Jenkins hash is strong enough that litle changes of rt_genid are OK. - */ -static void rt_cache_invalidate(struct net *net) -{ - unsigned char shuffle; - - get_random_bytes(&shuffle, sizeof(shuffle)); - atomic_add(shuffle + 1U, &net->ipv4.rt_genid); -} - void rt_cache_flush(struct net *net) { - rt_cache_invalidate(net); + atomic_inc(&net->ipv4.rt_genid); } static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, @@ -2520,8 +2506,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { static __net_init int rt_genid_init(struct net *net) { - get_random_bytes(&net->ipv4.rt_genid, - sizeof(net->ipv4.rt_genid)); + atomic_set(&net->ipv4.rt_genid, 0); get_random_bytes(&net->ipv4.dev_addr_genid, sizeof(net->ipv4.dev_addr_genid)); return 0; -- cgit v1.1 From b42664f898c976247f7f609b8bb9c94d7475ca10 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 10 Sep 2012 22:09:44 +0000 Subject: netns: move net->ipv4.rt_genid to net->rt_genid This commit prepares the use of rt_genid by both IPv4 and IPv6. Initialization is left in IPv4 part. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/route.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index be27cfa9..fd9af60 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -202,11 +202,6 @@ EXPORT_SYMBOL(ip_tos2prio); static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) -static inline int rt_genid(struct net *net) -{ - return atomic_read(&net->ipv4.rt_genid); -} - #ifdef CONFIG_PROC_FS static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) { @@ -449,7 +444,7 @@ static inline bool rt_is_expired(const struct rtable *rth) void rt_cache_flush(struct net *net) { - atomic_inc(&net->ipv4.rt_genid); + rt_genid_bump(net); } static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, @@ -2506,7 +2501,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { static __net_init int rt_genid_init(struct net *net) { - atomic_set(&net->ipv4.rt_genid, 0); + atomic_set(&net->rt_genid, 0); get_random_bytes(&net->ipv4.dev_addr_genid, sizeof(net->ipv4.dev_addr_genid)); return 0; -- cgit v1.1 From ee8372dd1989287c5eedb69d44bac43f69e496f1 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 10 Sep 2012 22:09:45 +0000 Subject: xfrm: invalidate dst on policy insertion/deletion When a policy is inserted or deleted, all dst should be recalculated. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 5a2aa17..ab2ce7d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -585,6 +585,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) xfrm_pol_hold(policy); net->xfrm.policy_count[dir]++; atomic_inc(&flow_cache_genid); + rt_genid_bump(net); if (delpol) __xfrm_policy_unlink(delpol, dir); policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); -- cgit v1.1 From 6f3118b571b8a4c06c7985dc3172c3526cb86253 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 10 Sep 2012 22:09:46 +0000 Subject: ipv6: use net->rt_genid to check dst validity IPv6 dst should take care of rt_genid too. When a xfrm policy is inserted or deleted, all dst should be invalidated. To force the validation, dst entries should be created with ->obsolete set to DST_OBSOLETE_FORCE_CHK. This was already the case for all functions calling ip6_dst_alloc(), except for ip6_rt_copy(). As a consequence, we can remove the specific code in inet6_connection_sock. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/inet6_connection_sock.c | 23 +---------------------- net/ipv6/route.c | 13 +++++++++---- 2 files changed, 10 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 0251a60..c4f9341 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -175,33 +175,12 @@ void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, const struct in6_addr *saddr) { __ip6_dst_store(sk, dst, daddr, saddr); - -#ifdef CONFIG_XFRM - { - struct rt6_info *rt = (struct rt6_info *)dst; - rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid); - } -#endif } static inline struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) { - struct dst_entry *dst; - - dst = __sk_dst_check(sk, cookie); - -#ifdef CONFIG_XFRM - if (dst) { - struct rt6_info *rt = (struct rt6_info *)dst; - if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) { - __sk_dst_reset(sk); - dst = NULL; - } - } -#endif - - return dst; + return __sk_dst_check(sk, cookie); } static struct dst_entry *inet6_csk_route_socket(struct sock *sk, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8e80fd2..fb29e22 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -281,13 +281,14 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, struct fib6_table *table) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, - 0, DST_OBSOLETE_NONE, flags); + 0, DST_OBSOLETE_FORCE_CHK, flags); if (rt) { struct dst_entry *dst = &rt->dst; memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); + rt->rt6i_genid = rt_genid(net); } return rt; } @@ -1031,6 +1032,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt = (struct rt6_info *) dst; + /* All IPV6 dsts are created with ->obsolete set to the value + * DST_OBSOLETE_FORCE_CHK which forces validation calls down + * into this function always. + */ + if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev))) + return NULL; + if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { if (rt->rt6i_peer_genid != rt6_peer_genid()) { if (!rt6_has_peer(rt)) @@ -1397,8 +1405,6 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } - rt->dst.obsolete = -1; - if (cfg->fc_flags & RTF_EXPIRES) rt6_set_expires(rt, jiffies + clock_t_to_jiffies(cfg->fc_expires)); @@ -2080,7 +2086,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->dst.input = ip6_input; rt->dst.output = ip6_output; rt->rt6i_idev = idev; - rt->dst.obsolete = -1; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; if (anycast) -- cgit v1.1 From 2c20cbd7e3aa6e9dddc07975d3f3a89fe1f69c00 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 10 Sep 2012 22:09:47 +0000 Subject: ipv6: use DST_* macro to set obselete field Signed-off-by: Nicolas Dichtel Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fb29e22..854e401 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -226,7 +226,7 @@ static struct rt6_info ip6_null_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .obsolete = -1, + .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -ENETUNREACH, .input = ip6_pkt_discard, .output = ip6_pkt_discard_out, @@ -246,7 +246,7 @@ static struct rt6_info ip6_prohibit_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .obsolete = -1, + .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EACCES, .input = ip6_pkt_prohibit, .output = ip6_pkt_prohibit_out, @@ -261,7 +261,7 @@ static struct rt6_info ip6_blk_hole_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .obsolete = -1, + .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EINVAL, .input = dst_discard, .output = dst_discard, -- cgit v1.1 From 864745d291b5ba80ea0bd0edcbe67273de368836 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 13 Sep 2012 11:41:26 +0000 Subject: xfrm_user: return error pointer instead of NULL When dump_one_state() returns an error, e.g. because of a too small buffer to dump the whole xfrm state, xfrm_state_netlink() returns NULL instead of an error pointer. But its callers expect an error pointer and therefore continue to operate on a NULL skbuff. This could lead to a privilege escalation (execution of user code in kernel context) if the attacker has CAP_NET_ADMIN and is able to map address 0. Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e75d8e4..dac08e2 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -878,6 +878,7 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) @@ -888,9 +889,10 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, info.nlmsg_seq = seq; info.nlmsg_flags = 0; - if (dump_one_state(x, 0, &info)) { + err = dump_one_state(x, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; -- cgit v1.1 From c25463722509fef0ed630b271576a8c9a70236f3 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Fri, 14 Sep 2012 09:58:32 +0000 Subject: xfrm_user: return error pointer instead of NULL #2 When dump_one_policy() returns an error, e.g. because of a too small buffer to dump the whole xfrm policy, xfrm_policy_netlink() returns NULL instead of an error pointer. But its caller expects an error pointer and therefore continues to operate on a NULL skbuff. Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index dac08e2..d12b625 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1548,6 +1548,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) @@ -1558,9 +1559,10 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, info.nlmsg_seq = seq; info.nlmsg_flags = 0; - if (dump_one_policy(xp, dir, 0, &info) < 0) { + err = dump_one_policy(xp, dir, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; -- cgit v1.1 From 0e698bf6624c469cd4f3f391247b142963ca9c4e Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sat, 15 Sep 2012 22:44:16 +0000 Subject: net: fix memory leak on oom with zerocopy If orphan flags fails, we don't free the skb on receive, which leaks the skb memory. Return value was also wrong: netif_receive_skb is supposed to return NET_RX_DROP, not ENOMEM. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d7fe32c..ac7609d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3322,7 +3322,7 @@ ncls: if (pt_prev) { if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) - ret = -ENOMEM; + goto drop; else ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { -- cgit v1.1 From 1d57f19539c074105791da6384a8ad674bba8037 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 17 Sep 2012 12:51:39 +0000 Subject: tcp: fix regression in urgent data handling Stephan Springl found that commit 1402d366019fed "tcp: introduce tcp_try_coalesce" introduced a regression for rlogin It turns out problem comes from TCP urgent data handling and a change in behavior in input path. rlogin sends two one-byte packets with URG ptr set, and when next data frame is coalesced, we lack sk_data_ready() calls to wakeup consumer. Signed-off-by: Eric Dumazet Reported-by: Stephan Springl Cc: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6e38c6c..d377f48 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4661,7 +4661,7 @@ queue_and_out: if (eaten > 0) kfree_skb_partial(skb, fragstolen); - else if (!sock_flag(sk, SOCK_DEAD)) + if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, 0); return; } @@ -5556,8 +5556,7 @@ no_ack: #endif if (eaten) kfree_skb_partial(skb, fragstolen); - else - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk, 0); return 0; } } -- cgit v1.1 From 433a19548061bb5457b6ab77ed7ea58ca6e43ddb Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 17 Sep 2012 22:40:10 +0000 Subject: xfrm: fix a read lock imbalance in make_blackhole if xfrm_policy_get_afinfo returns 0, it has already released the read lock, xfrm_policy_put_afinfo should not be called again. Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ab2ce7d..387848e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1764,7 +1764,7 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family, if (!afinfo) { dst_release(dst_orig); - ret = ERR_PTR(-EINVAL); + return ERR_PTR(-EINVAL); } else { ret = afinfo->blackhole_route(net, dst_orig); } -- cgit v1.1 From 3d1cbdd6aefff711bcf389fdabc4af9bc22e8201 Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Wed, 29 Aug 2012 10:02:08 +0200 Subject: Bluetooth: mgmt: Fix enabling SSP while powered off When new BT USB adapter is plugged in it's configured while still being powered off (HCI_AUTO_OFF flag is set), thus Set SSP will only set dev_flags but won't write changes to controller. As a result remote devices won't use Secure Simple Pairing with our device due to SSP Host Support flag disabled in extended features and may also reject SSP attempt from our side (with possible fallback to legacy pairing). This patch ensures HCI Write Simple Pairing Mode is sent when Set Powered is called to power on controller and clear HCI_AUTO_OFF flag. Signed-off-by: Andrzej Kaczmarek Cc: stable@vger.kernel.org Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ad6613d..f943bbf 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2875,6 +2875,12 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) if (scan) hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { + u8 ssp = 1; + + hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &ssp); + } + update_class(hdev); update_name(hdev, hdev->dev_name); update_eir(hdev); -- cgit v1.1 From 562fcc246ebe31ade6e1be08585673b9b2785498 Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Wed, 29 Aug 2012 10:02:09 +0200 Subject: Bluetooth: mgmt: Fix enabling LE while powered off When new BT USB adapter is plugged in it's configured while still being powered off (HCI_AUTO_OFF flag is set), thus Set LE will only set dev_flags but won't write changes to controller. As a result it's not possible to start device discovery session on LE controller as it uses interleaved discovery which requires LE Supported Host flag in extended features. This patch ensures HCI Write LE Host Supported is sent when Set Powered is called to power on controller and clear HCI_AUTO_OFF flag. Signed-off-by: Andrzej Kaczmarek Cc: stable@vger.kernel.org Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f943bbf..eba022d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2881,6 +2881,16 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &ssp); } + if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { + struct hci_cp_write_le_host_supported cp; + + cp.le = 1; + cp.simul = !!(hdev->features[6] & LMP_SIMUL_LE_BR); + + hci_send_cmd(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, + sizeof(cp), &cp); + } + update_class(hdev); update_name(hdev, hdev->dev_name); update_eir(hdev); -- cgit v1.1 From aad3d0e343900a4c2c5dbc73f76550aa64a0ac1b Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Thu, 6 Sep 2012 15:05:42 +0300 Subject: Bluetooth: Fix freeing uninitialized delayed works When releasing L2CAP socket which is in BT_CONFIG state l2cap_chan_close invokes l2cap_send_disconn_req which cancel delayed works which are only set in BT_CONNECTED state with l2cap_ertm_init. Add state check before cancelling those works. ... [ 9668.574372] [21085] l2cap_sock_release: sock cd065200, sk f073e800 [ 9668.574399] [21085] l2cap_sock_shutdown: sock cd065200, sk f073e800 [ 9668.574411] [21085] l2cap_chan_close: chan f073ec00 state BT_CONFIG sk f073e800 [ 9668.574421] [21085] l2cap_send_disconn_req: chan f073ec00 conn ecc16600 [ 9668.574441] INFO: trying to register non-static key. [ 9668.574443] the code is fine but needs lockdep annotation. [ 9668.574446] turning off the locking correctness validator. [ 9668.574450] Pid: 21085, comm: obex-client Tainted: G O 3.5.0+ #57 [ 9668.574452] Call Trace: [ 9668.574463] [] __lock_acquire+0x12e3/0x1700 [ 9668.574468] [] ? trace_hardirqs_on+0xb/0x10 [ 9668.574476] [] ? printk+0x4d/0x4f [ 9668.574479] [] lock_acquire+0x88/0x130 [ 9668.574487] [] ? try_to_del_timer_sync+0x60/0x60 [ 9668.574491] [] del_timer_sync+0x50/0xc0 [ 9668.574495] [] ? try_to_del_timer_sync+0x60/0x60 [ 9668.574515] [] l2cap_send_disconn_req+0xe3/0x160 [bluetooth] ... Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4ea1710..38c00f1 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1008,7 +1008,7 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct l2cap_chan *c if (!conn) return; - if (chan->mode == L2CAP_MODE_ERTM) { + if (chan->mode == L2CAP_MODE_ERTM && chan->state == BT_CONNECTED) { __clear_retrans_timer(chan); __clear_monitor_timer(chan); __clear_ack_timer(chan); -- cgit v1.1 From 78c04c0bf52360dc2f7185e99c8e9aa05d73ae5a Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 14 Sep 2012 16:34:46 -0300 Subject: Bluetooth: Fix not removing power_off delayed work For example, when a usb reset is received (I could reproduce it running something very similar to this[1] in a loop) it could be that the device is unregistered while the power_off delayed work is still scheduled to run. Backtrace: WARNING: at lib/debugobjects.c:261 debug_print_object+0x7c/0x8d() Hardware name: To Be Filled By O.E.M. ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x26 Modules linked in: nouveau mxm_wmi btusb wmi bluetooth ttm coretemp drm_kms_helper Pid: 2114, comm: usb-reset Not tainted 3.5.0bt-next #2 Call Trace: [] ? free_obj_work+0x57/0x91 [] warn_slowpath_common+0x7e/0x97 [] warn_slowpath_fmt+0x41/0x43 [] debug_print_object+0x7c/0x8d [] ? __queue_work+0x259/0x259 [] ? debug_check_no_obj_freed+0x6f/0x1b5 [] debug_check_no_obj_freed+0x98/0x1b5 [] ? bt_host_release+0x10/0x1e [bluetooth] [] kfree+0x90/0xe6 [] bt_host_release+0x10/0x1e [bluetooth] [] device_release+0x4a/0x7e [] kobject_release+0x11d/0x154 [] kobject_put+0x4a/0x4f [] put_device+0x12/0x14 [] hci_free_dev+0x22/0x26 [bluetooth] [] btusb_disconnect+0x96/0x9f [btusb] [] usb_unbind_interface+0x57/0x106 [] __device_release_driver+0x83/0xd6 [] device_release_driver+0x20/0x2d [] usb_driver_release_interface+0x44/0x7b [] usb_forced_unbind_intf+0x45/0x4e [] usb_reset_device+0xa6/0x12e [] usbdev_do_ioctl+0x319/0xe20 [] ? avc_has_perm_flags+0xc9/0x12e [] ? avc_has_perm_flags+0x25/0x12e [] ? do_page_fault+0x31e/0x3a1 [] usbdev_ioctl+0x9/0xd [] vfs_ioctl+0x21/0x34 [] do_vfs_ioctl+0x408/0x44b [] ? file_has_perm+0x76/0x81 [] sys_ioctl+0x51/0x76 [] system_call_fastpath+0x16/0x1b [1] http://cpansearch.perl.org/src/DPAVLIN/Biblio-RFID-0.03/examples/usbreset.c Signed-off-by: Vinicius Costa Gomes Cc: stable@vger.kernel.org Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d4de5db..0b997c8f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -734,6 +734,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) cancel_work_sync(&hdev->le_scan); + cancel_delayed_work(&hdev->power_off); + hci_req_cancel(hdev, ENODEV); hci_req_lock(hdev); -- cgit v1.1 From a85d0d7f3460b1a123b78e7f7e39bf72c37dfb78 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 14 Sep 2012 15:36:57 -0700 Subject: cfg80211: fix possible circular lock on reg_regdb_search() When call_crda() is called we kick off a witch hunt search for the same regulatory domain on our internal regulatory database and that work gets kicked off on a workqueue, this is done while the cfg80211_mutex is held. If that workqueue kicks off it will first lock reg_regdb_search_mutex and later cfg80211_mutex but to ensure two CPUs will not contend against cfg80211_mutex the right thing to do is to have the reg_regdb_search() wait until the cfg80211_mutex is let go. The lockdep report is pasted below. cfg80211: Calling CRDA to update world regulatory domain ====================================================== [ INFO: possible circular locking dependency detected ] 3.3.8 #3 Tainted: G O ------------------------------------------------------- kworker/0:1/235 is trying to acquire lock: (cfg80211_mutex){+.+...}, at: [<816468a4>] set_regdom+0x78c/0x808 [cfg80211] but task is already holding lock: (reg_regdb_search_mutex){+.+...}, at: [<81646828>] set_regdom+0x710/0x808 [cfg80211] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (reg_regdb_search_mutex){+.+...}: [<800a8384>] lock_acquire+0x60/0x88 [<802950a8>] mutex_lock_nested+0x54/0x31c [<81645778>] is_world_regdom+0x9f8/0xc74 [cfg80211] -> #1 (reg_mutex#2){+.+...}: [<800a8384>] lock_acquire+0x60/0x88 [<802950a8>] mutex_lock_nested+0x54/0x31c [<8164539c>] is_world_regdom+0x61c/0xc74 [cfg80211] -> #0 (cfg80211_mutex){+.+...}: [<800a77b8>] __lock_acquire+0x10d4/0x17bc [<800a8384>] lock_acquire+0x60/0x88 [<802950a8>] mutex_lock_nested+0x54/0x31c [<816468a4>] set_regdom+0x78c/0x808 [cfg80211] other info that might help us debug this: Chain exists of: cfg80211_mutex --> reg_mutex#2 --> reg_regdb_search_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(reg_regdb_search_mutex); lock(reg_mutex#2); lock(reg_regdb_search_mutex); lock(cfg80211_mutex); *** DEADLOCK *** 3 locks held by kworker/0:1/235: #0: (events){.+.+..}, at: [<80089a00>] process_one_work+0x230/0x460 #1: (reg_regdb_work){+.+...}, at: [<80089a00>] process_one_work+0x230/0x460 #2: (reg_regdb_search_mutex){+.+...}, at: [<81646828>] set_regdom+0x710/0x808 [cfg80211] stack backtrace: Call Trace: [<80290fd4>] dump_stack+0x8/0x34 [<80291bc4>] print_circular_bug+0x2ac/0x2d8 [<800a77b8>] __lock_acquire+0x10d4/0x17bc [<800a8384>] lock_acquire+0x60/0x88 [<802950a8>] mutex_lock_nested+0x54/0x31c [<816468a4>] set_regdom+0x78c/0x808 [cfg80211] Reported-by: Felix Fietkau Tested-by: Felix Fietkau Cc: stable@vger.kernel.org Signed-off-by: Luis R. Rodriguez Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/reg.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2ded3c7..72d170c 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -350,6 +350,9 @@ static void reg_regdb_search(struct work_struct *work) struct reg_regdb_search_request *request; const struct ieee80211_regdomain *curdom, *regdom; int i, r; + bool set_reg = false; + + mutex_lock(&cfg80211_mutex); mutex_lock(®_regdb_search_mutex); while (!list_empty(®_regdb_search_list)) { @@ -365,9 +368,7 @@ static void reg_regdb_search(struct work_struct *work) r = reg_copy_regd(®dom, curdom); if (r) break; - mutex_lock(&cfg80211_mutex); - set_regdom(regdom); - mutex_unlock(&cfg80211_mutex); + set_reg = true; break; } } @@ -375,6 +376,11 @@ static void reg_regdb_search(struct work_struct *work) kfree(request); } mutex_unlock(®_regdb_search_mutex); + + if (set_reg) + set_regdom(regdom); + + mutex_unlock(&cfg80211_mutex); } static DECLARE_WORK(reg_regdb_work, reg_regdb_search); -- cgit v1.1 From dbd6b11e15a2f96030da17dbeda943a8a98ee990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Fri, 14 Sep 2012 00:40:54 +0000 Subject: batman-adv: make batadv_test_bit() return 0 or 1 only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some architectures test_bit() can return other values than 0 or 1: With a generic x86 OpenWrt image in a kvm setup (batadv_)test_bit() frequently returns -1 for me, leading to batadv_iv_ogm_update_seqnos() wrongly signaling a protected seqno window. This patch tries to fix this issue by making batadv_test_bit() return 0 or 1 only. Signed-off-by: Linus Lüssing Acked-by: Sven Eckelmann Signed-off-by: Antonio Quartulli Signed-off-by: David S. Miller --- net/batman-adv/bitarray.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index a081ce1..cebaae7 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -20,8 +20,8 @@ #ifndef _NET_BATMAN_ADV_BITARRAY_H_ #define _NET_BATMAN_ADV_BITARRAY_H_ -/* returns true if the corresponding bit in the given seq_bits indicates true - * and curr_seqno is within range of last_seqno +/* Returns 1 if the corresponding bit in the given seq_bits indicates true + * and curr_seqno is within range of last_seqno. Otherwise returns 0. */ static inline int batadv_test_bit(const unsigned long *seq_bits, uint32_t last_seqno, uint32_t curr_seqno) @@ -32,7 +32,7 @@ static inline int batadv_test_bit(const unsigned long *seq_bits, if (diff < 0 || diff >= BATADV_TQ_LOCAL_WINDOW_SIZE) return 0; else - return test_bit(diff, seq_bits); + return test_bit(diff, seq_bits) != 0; } /* turn corresponding bit on, so we can remember that we got the packet */ -- cgit v1.1 From 15c041759bfcd9ab0a4e43f1c16e2644977d0467 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Fri, 14 Sep 2012 04:59:52 +0000 Subject: tcp: flush DMA queue before sk_wait_data if rcv_wnd is zero If recv() syscall is called for a TCP socket so that - IOAT DMA is used - MSG_WAITALL flag is used - requested length is bigger than sk_rcvbuf - enough data has already arrived to bring rcv_wnd to zero then when tcp_recvmsg() gets to calling sk_wait_data(), receive window can be still zero while sk_async_wait_queue exhausts enough space to keep it zero. As this queue isn't cleaned until the tcp_service_net_dma() call, sk_wait_data() cannot receive any data and blocks forever. If zero receive window and non-empty sk_async_wait_queue is detected before calling sk_wait_data(), process the queue first. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2109ff4..bf9a8ab 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1762,8 +1762,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } #ifdef CONFIG_NET_DMA - if (tp->ucopy.dma_chan) - dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + if (tp->ucopy.dma_chan) { + if (tp->rcv_wnd == 0 && + !skb_queue_empty(&sk->sk_async_wait_queue)) { + tcp_service_net_dma(sk, true); + tcp_cleanup_rbuf(sk, copied); + } else + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + } #endif if (copied >= target) { /* Do not sleep, just process backlog. */ -- cgit v1.1 From 71261956973ba9e0637848a5adb4a5819b4bae83 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Sat, 15 Sep 2012 00:41:35 +0000 Subject: pkt_sched: fix virtual-start-time update in QFQ If the old timestamps of a class, say cl, are stale when the class becomes active, then QFQ may assign to cl a much higher start time than the maximum value allowed. This may happen when QFQ assigns to the start time of cl the finish time of a group whose classes are characterized by a higher value of the ratio max_class_pkt/weight_of_the_class with respect to that of cl. Inserting a class with a too high start time into the bucket list corrupts the data structure and may eventually lead to crashes. This patch limits the maximum start time assigned to a class. Signed-off-by: Paolo Valente Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index e4723d3..211a212 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -865,7 +865,10 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) if (mask) { struct qfq_group *next = qfq_ffs(q, mask); if (qfq_gt(roundedF, next->F)) { - cl->S = next->F; + if (qfq_gt(limit, next->F)) + cl->S = next->F; + else /* preserve timestamp correctness */ + cl->S = limit; return; } } -- cgit v1.1 From 8ea853fd0b721f14eacff1a5b364fe3e60d2dd82 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 18 Sep 2012 16:53:21 +0000 Subject: net/core: fix comment in skb_try_coalesce It should be the skb which is not cloned Signed-off-by: Li RongQing Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fe00d12..e33ebae 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3502,7 +3502,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, if (!skb_cloned(from)) skb_shinfo(from)->nr_frags = 0; - /* if the skb is cloned this does nothing since we set nr_frags to 0 */ + /* if the skb is not cloned this does nothing + * since we set nr_frags to 0. + */ for (i = 0; i < skb_shinfo(from)->nr_frags; i++) skb_frag_ref(from, i); -- cgit v1.1 From bc26ccd8fc756749de95606d28314efd0ce5aec3 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 19 Sep 2012 09:40:00 +0000 Subject: tcp: restore rcv_wscale in a repair mode (v2) rcv_wscale is a symetric parameter with snd_wscale. Both this parameters are set on a connection handshake. Without this value a remote window size can not be interpreted correctly, because a value from a packet should be shifted on rcv_wscale. And one more thing is that wscale_ok should be set too. This patch doesn't break a backward compatibility. If someone uses it in a old scheme, a rcv window will be restored with the same bug (rcv_wscale = 0). v2: Save backward compatibility on big-endian system. Before the first two bytes were snd_wscale and the second two bytes were rcv_wscale. Now snd_wscale is opt_val & 0xFFFF and rcv_wscale >> 16. This approach is independent on byte ordering. Cc: David S. Miller Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy CC: Pavel Emelyanov Signed-off-by: Andrew Vagin Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bf9a8ab..5f64193 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2331,10 +2331,17 @@ static int tcp_repair_options_est(struct tcp_sock *tp, tp->rx_opt.mss_clamp = opt.opt_val; break; case TCPOPT_WINDOW: - if (opt.opt_val > 14) - return -EFBIG; + { + u16 snd_wscale = opt.opt_val & 0xFFFF; + u16 rcv_wscale = opt.opt_val >> 16; + + if (snd_wscale > 14 || rcv_wscale > 14) + return -EFBIG; - tp->rx_opt.snd_wscale = opt.opt_val; + tp->rx_opt.snd_wscale = snd_wscale; + tp->rx_opt.rcv_wscale = rcv_wscale; + tp->rx_opt.wscale_ok = 1; + } break; case TCPOPT_SACK_PERM: if (opt.opt_val != 0) -- cgit v1.1 From 4c87308bdea31a7b4828a51f6156e6f721a1fcc9 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Sep 2012 11:33:38 +0000 Subject: xfrm_user: fix info leak in copy_to_user_auth() copy_to_user_auth() fails to initialize the remainder of alg_name and therefore discloses up to 54 bytes of heap memory via netlink to userland. Use strncpy() instead of strcpy() to fill the trailing bytes of alg_name with null bytes. Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d12b625..40dd50d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -742,7 +742,7 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) return -EMSGSIZE; algo = nla_data(nla); - strcpy(algo->alg_name, auth->alg_name); + strncpy(algo->alg_name, auth->alg_name, sizeof(algo->alg_name)); memcpy(algo->alg_key, auth->alg_key, (auth->alg_key_len + 7) / 8); algo->alg_key_len = auth->alg_key_len; -- cgit v1.1 From f778a636713a435d3a922c60b1622a91136560c1 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Sep 2012 11:33:39 +0000 Subject: xfrm_user: fix info leak in copy_to_user_state() The memory reserved to dump the xfrm state includes the padding bytes of struct xfrm_usersa_info added by the compiler for alignment (7 for amd64, 3 for i386). Add an explicit memset(0) before filling the buffer to avoid the info leak. Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 40dd50d..d585459 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -689,6 +689,7 @@ out: static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { + memset(p, 0, sizeof(*p)); memcpy(&p->id, &x->id, sizeof(p->id)); memcpy(&p->sel, &x->sel, sizeof(p->sel)); memcpy(&p->lft, &x->lft, sizeof(p->lft)); -- cgit v1.1 From 7b789836f434c87168eab067cfbed1ec4783dffd Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Sep 2012 11:33:40 +0000 Subject: xfrm_user: fix info leak in copy_to_user_policy() The memory reserved to dump the xfrm policy includes multiple padding bytes added by the compiler for alignment (padding bytes in struct xfrm_selector and struct xfrm_userpolicy_info). Add an explicit memset(0) before filling the buffer to avoid the heap info leak. Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d585459..84dd85c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1320,6 +1320,7 @@ static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir) { + memset(p, 0, sizeof(*p)); memcpy(&p->sel, &xp->selector, sizeof(p->sel)); memcpy(&p->lft, &xp->lft, sizeof(p->lft)); memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft)); -- cgit v1.1 From 1f86840f897717f86d523a13e99a447e6a5d2fa5 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Sep 2012 11:33:41 +0000 Subject: xfrm_user: fix info leak in copy_to_user_tmpl() The memory used for the template copy is a local stack variable. As struct xfrm_user_tmpl contains multiple holes added by the compiler for alignment, not initializing the memory will lead to leaking stack bytes to userland. Add an explicit memset(0) to avoid the info leak. Initial version of the patch by Brad Spengler. Cc: Brad Spengler Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 84dd85c..8024b3d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1425,6 +1425,7 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) struct xfrm_user_tmpl *up = &vec[i]; struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; + memset(up, 0, sizeof(*up)); memcpy(&up->id, &kp->id, sizeof(up->id)); up->family = kp->encap_family; memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr)); -- cgit v1.1 From ecd7918745234e423dd87fcc0c077da557909720 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 20 Sep 2012 10:01:49 +0000 Subject: xfrm_user: ensure user supplied esn replay window is valid The current code fails to ensure that the netlink message actually contains as many bytes as the header indicates. If a user creates a new state or updates an existing one but does not supply the bytes for the whole ESN replay window, the kernel copies random heap bytes into the replay bitmap, the ones happen to follow the XFRMA_REPLAY_ESN_VAL netlink attribute. This leads to following issues: 1. The replay window has random bits set confusing the replay handling code later on. 2. A malicious user could use this flaw to leak up to ~3.5kB of heap memory when she has access to the XFRM netlink interface (requires CAP_NET_ADMIN). Known users of the ESN replay window are strongSwan and Steffen's iproute2 patch (). The latter uses the interface with a bitmap supplied while the former does not. strongSwan is therefore prone to run into issue 1. To fix both issues without breaking existing userland allow using the XFRMA_REPLAY_ESN_VAL netlink attribute with either an empty bitmap or a fully specified one. For the former case we initialize the in-kernel bitmap with zero, for the latter we copy the user supplied bitmap. For state updates the full bitmap must be supplied. To prevent overflows in the bitmap length calculation the maximum size of bmp_len is limited to 128 by this patch -- resulting in a maximum replay window of 4096 packets. This should be sufficient for all real life scenarios (RFC 4303 recommends a default replay window size of 64). Cc: Steffen Klassert Cc: Martin Willi Cc: Ben Hutchings Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8024b3d..5927065 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -123,9 +123,21 @@ static inline int verify_replay(struct xfrm_usersa_info *p, struct nlattr **attrs) { struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; + struct xfrm_replay_state_esn *rs; - if ((p->flags & XFRM_STATE_ESN) && !rt) - return -EINVAL; + if (p->flags & XFRM_STATE_ESN) { + if (!rt) + return -EINVAL; + + rs = nla_data(rt); + + if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) + return -EINVAL; + + if (nla_len(rt) < xfrm_replay_state_esn_len(rs) && + nla_len(rt) != sizeof(*rs)) + return -EINVAL; + } if (!rt) return 0; @@ -370,14 +382,15 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es struct nlattr *rp) { struct xfrm_replay_state_esn *up; + int ulen; if (!replay_esn || !rp) return 0; up = nla_data(rp); + ulen = xfrm_replay_state_esn_len(up); - if (xfrm_replay_state_esn_len(replay_esn) != - xfrm_replay_state_esn_len(up)) + if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) return -EINVAL; return 0; @@ -388,22 +401,28 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn struct nlattr *rta) { struct xfrm_replay_state_esn *p, *pp, *up; + int klen, ulen; if (!rta) return 0; up = nla_data(rta); + klen = xfrm_replay_state_esn_len(up); + ulen = nla_len(rta) >= klen ? klen : sizeof(*up); - p = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + p = kzalloc(klen, GFP_KERNEL); if (!p) return -ENOMEM; - pp = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + pp = kzalloc(klen, GFP_KERNEL); if (!pp) { kfree(p); return -ENOMEM; } + memcpy(p, up, ulen); + memcpy(pp, up, ulen); + *replay_esn = p; *preplay_esn = pp; -- cgit v1.1 From e3ac104d41a97b42316915020ba228c505447d21 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Sep 2012 11:33:43 +0000 Subject: xfrm_user: don't copy esn replay window twice for new states The ESN replay window was already fully initialized in xfrm_alloc_replay_state_esn(). No need to copy it again. Cc: Steffen Klassert Signed-off-by: Mathias Krause Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5927065..289f4bf 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -461,10 +461,11 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * * somehow made shareable and move it to xfrm_state.c - JHS * */ -static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) +static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, + int update_esn) { struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; - struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; + struct nlattr *re = update_esn ? attrs[XFRMA_REPLAY_ESN_VAL] : NULL; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; @@ -574,7 +575,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; /* override default values from above */ - xfrm_update_ae_params(x, attrs); + xfrm_update_ae_params(x, attrs, 0); return x; @@ -1848,7 +1849,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; spin_lock_bh(&x->lock); - xfrm_update_ae_params(x, attrs); + xfrm_update_ae_params(x, attrs, 1); spin_unlock_bh(&x->lock); c.event = nlh->nlmsg_type; -- cgit v1.1 From c0d680e577ff171e7b37dbdb1b1bf5451e851f04 Mon Sep 17 00:00:00 2001 From: Ed Cashin Date: Wed, 19 Sep 2012 15:49:00 +0000 Subject: net: do not disable sg for packets requiring no checksum A change in a series of VLAN-related changes appears to have inadvertently disabled the use of the scatter gather feature of network cards for transmission of non-IP ethernet protocols like ATA over Ethernet (AoE). Below is a reference to the commit that introduces a "harmonize_features" function that turns off scatter gather when the NIC does not support hardware checksumming for the ethernet protocol of an sk buff. commit f01a5236bd4b140198fbcc550f085e8361fd73fa Author: Jesse Gross Date: Sun Jan 9 06:23:31 2011 +0000 net offloading: Generalize netif_get_vlan_features(). The can_checksum_protocol function is not equipped to consider a protocol that does not require checksumming. Calling it for a protocol that requires no checksum is inappropriate. The patch below has harmonize_features call can_checksum_protocol when the protocol needs a checksum, so that the network layer is not forced to perform unnecessary skb linearization on the transmission of AoE packets. Unnecessary linearization results in decreased performance and increased memory pressure, as reported here: http://www.spinics.net/lists/linux-mm/msg15184.html The problem has probably not been widely experienced yet, because only recently has the kernel.org-distributed aoe driver acquired the ability to use payloads of over a page in size, with the patchset recently included in the mm tree: https://lkml.org/lkml/2012/8/28/140 The coraid.com-distributed aoe driver already could use payloads of greater than a page in size, but its users generally do not use the newest kernels. Signed-off-by: Ed Cashin Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index ac7609d..89e33a5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2134,7 +2134,8 @@ static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) static netdev_features_t harmonize_features(struct sk_buff *skb, __be16 protocol, netdev_features_t features) { - if (!can_checksum_protocol(features, protocol)) { + if (skb->ip_summed != CHECKSUM_NONE && + !can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; features &= ~NETIF_F_SG; } else if (illegal_highdma(skb->dev, skb)) { -- cgit v1.1 From f950c0ecc78f745e490d615280e031de4dbb1306 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 20 Sep 2012 18:29:56 +0000 Subject: ipv6: fix return value check in fib6_add() In case of error, the function fib6_add_1() returns ERR_PTR() or NULL pointer. The ERR_PTR() case check is missing in fib6_add(). dpatch engine is used to generated this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 13690d6..286acfc 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -819,6 +819,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) offsetof(struct rt6_info, rt6i_src), allow_create, replace_required); + if (IS_ERR(sn)) { + err = PTR_ERR(sn); + sn = NULL; + } if (!sn) { /* If it is failed, discard just allocated root, and then (in st_failure) stale node -- cgit v1.1 From bf5b30b8a4416de04f1ac1196281ddb318669464 Mon Sep 17 00:00:00 2001 From: Zhao Hongjiang Date: Thu, 20 Sep 2012 22:37:25 +0000 Subject: net: change return values from -EACCES to -EPERM Change return value from -EACCES to -EPERM when the permission check fails. Signed-off-by: Zhao Hongjiang Signed-off-by: David S. Miller --- net/bluetooth/bnep/sock.c | 4 ++-- net/bluetooth/cmtp/sock.c | 4 ++-- net/bluetooth/hci_sock.c | 16 ++++++++-------- net/bluetooth/hidp/sock.c | 4 ++-- net/ipv4/devinet.c | 4 ++-- net/netrom/af_netrom.c | 2 +- 6 files changed, 17 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 5e5f5b4..1eaacf1 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -58,7 +58,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long switch (cmd) { case BNEPCONNADD: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&ca, argp, sizeof(ca))) return -EFAULT; @@ -84,7 +84,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long case BNEPCONNDEL: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&cd, argp, sizeof(cd))) return -EFAULT; diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 311668d..32dc83d 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -72,7 +72,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long switch (cmd) { case CMTPCONNADD: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&ca, argp, sizeof(ca))) return -EFAULT; @@ -97,7 +97,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long case CMTPCONNDEL: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&cd, argp, sizeof(cd))) return -EFAULT; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 19fdac7..d5ace1e 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -490,7 +490,7 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, switch (cmd) { case HCISETRAW: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return -EPERM; @@ -510,12 +510,12 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, case HCIBLOCKADDR: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_sock_blacklist_add(hdev, (void __user *) arg); case HCIUNBLOCKADDR: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_sock_blacklist_del(hdev, (void __user *) arg); default: @@ -546,22 +546,22 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, case HCIDEVUP: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_open(arg); case HCIDEVDOWN: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_close(arg); case HCIDEVRESET: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_reset(arg); case HCIDEVRESTAT: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_reset_stat(arg); case HCISETSCAN: @@ -573,7 +573,7 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, case HCISETACLMTU: case HCISETSCOMTU: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_cmd(cmd, argp); case HCIINQUIRY: diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 18b3f68..b24fb3b 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -56,7 +56,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long switch (cmd) { case HIDPCONNADD: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&ca, argp, sizeof(ca))) return -EFAULT; @@ -91,7 +91,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long case HIDPCONNDEL: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&cd, argp, sizeof(cd))) return -EFAULT; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 9b55b6f..e12fad7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -725,7 +725,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; case SIOCSIFFLAGS: - ret = -EACCES; + ret = -EPERM; if (!capable(CAP_NET_ADMIN)) goto out; break; @@ -733,7 +733,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSIFBRDADDR: /* Set the broadcast address */ case SIOCSIFDSTADDR: /* Set the destination address */ case SIOCSIFNETMASK: /* Set the netmask for the interface */ - ret = -EACCES; + ret = -EPERM; if (!capable(CAP_NET_ADMIN)) goto out; ret = -EINVAL; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 1b9024e..7261eb8 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -601,7 +601,7 @@ static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!capable(CAP_NET_BIND_SERVICE)) { dev_put(dev); release_sock(sk); - return -EACCES; + return -EPERM; } nr->user_addr = addr->fsa_digipeater[0]; nr->source_addr = addr->fsa_ax25.sax25_call; -- cgit v1.1 From 5ce765a540f34d1e2005e1210f49f67fdf11e997 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 21 Sep 2012 17:59:58 -0500 Subject: libceph: only kunmap kmapped pages In write_partial_msg_pages(), pages need to be kmapped in order to perform a CRC-32c calculation on them. As an artifact of the way this code used to be structured, the kunmap() call was separated from the kmap() call and both were done conditionally. But the conditions under which the kmap() and kunmap() calls were made differed, so there was a chance a kunmap() call would be done on a page that had not been mapped. The symptom of this was tripping a BUG() in kunmap_high() when pkmap_count[nr] became 0. Reported-by: Bryan K. Wright Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- net/ceph/messenger.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 24c5eea..159aa8b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1073,16 +1073,13 @@ static int write_partial_msg_pages(struct ceph_connection *con) BUG_ON(kaddr == NULL); base = kaddr + con->out_msg_pos.page_pos + bio_offset; crc = crc32c(crc, base, len); + kunmap(page); msg->footer.data_crc = cpu_to_le32(crc); con->out_msg_pos.did_page_crc = true; } ret = ceph_tcp_sendpage(con->sock, page, con->out_msg_pos.page_pos + bio_offset, len, 1); - - if (do_datacrc) - kunmap(page); - if (ret <= 0) goto out; -- cgit v1.1 From ab43ed8b7490cb387782423ecf74aeee7237e591 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 22 Sep 2012 00:08:29 +0000 Subject: ipv4: raw: fix icmp_filter() icmp_filter() should not modify its input, or else its caller would need to recompute ip_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() and change the prototype to make clear both sk and skb are const. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/raw.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ff0f071..d23c657 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -131,18 +131,20 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) +static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) { - int type; + struct icmphdr _hdr; + const struct icmphdr *hdr; - if (!pskb_may_pull(skb, sizeof(struct icmphdr))) + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (!hdr) return 1; - type = icmp_hdr(skb)->type; - if (type < 32) { + if (hdr->type < 32) { __u32 data = raw_sk(sk)->filter.data; - return ((1 << type) & data) != 0; + return ((1U << hdr->type) & data) != 0; } /* Do not block unknown ICMP types */ -- cgit v1.1 From 40a3eb33e307616567f4b81792f405a7f3f0abee Mon Sep 17 00:00:00 2001 From: Def Date: Thu, 20 Sep 2012 14:56:13 +0200 Subject: batman-adv: Fix change mac address of soft iface. Into function interface_set_mac_addr, the function tt_local_add was invoked before updating dev->dev_addr. The new MAC address was not tagged as NoPurge. Signed-off-by: Def --- net/batman-adv/soft-interface.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 109ea2a..21c5357 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -100,18 +100,21 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) { struct batadv_priv *bat_priv = netdev_priv(dev); struct sockaddr *addr = p; + uint8_t old_addr[ETH_ALEN]; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; + memcpy(old_addr, dev->dev_addr, ETH_ALEN); + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); + /* only modify transtable if it has been initialized before */ if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE) { - batadv_tt_local_remove(bat_priv, dev->dev_addr, + batadv_tt_local_remove(bat_priv, old_addr, "mac address changed", false); batadv_tt_local_add(dev, addr->sa_data, BATADV_NULL_IFINDEX); } - memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); dev->addr_assign_type &= ~NET_ADDR_RANDOM; return 0; } -- cgit v1.1 From 7caf69fb9c5017df01945a1861c042f6aa08edeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 18 Sep 2012 03:01:08 +0200 Subject: batman-adv: Fix symmetry check / route flapping in multi interface setups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If receiving an OGM from a neighbor other than the currently selected and if it has the same TQ then we are supposed to switch if this neighbor provides a more symmetric link than the currently selected one. However this symmetry check currently is broken if the interface of the neighbor we received the OGM from and the one of the currently selected neighbor differ: We are currently trying to determine the symmetry of the link towards the selected router via the link we received the OGM from instead of just checking via the link towards the currently selected router. This leads to way more route switches than necessary and can lead to permanent route flapping in many common multi interface setups. This patch fixes this issue by using the right interface for this symmetry check. Signed-off-by: Linus Lüssing --- net/batman-adv/bat_iv_ogm.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index e877af8..469daab 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -642,7 +642,8 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; struct hlist_node *node; - uint8_t bcast_own_sum_orig, bcast_own_sum_neigh; + int if_num; + uint8_t sum_orig, sum_neigh; uint8_t *neigh_addr; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -727,17 +728,17 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, if (router && (neigh_node->tq_avg == router->tq_avg)) { orig_node_tmp = router->orig_node; spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); - bcast_own_sum_orig = - orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + if_num = router->if_incoming->if_num; + sum_orig = orig_node_tmp->bcast_own_sum[if_num]; spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); orig_node_tmp = neigh_node->orig_node; spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); - bcast_own_sum_neigh = - orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + if_num = neigh_node->if_incoming->if_num; + sum_neigh = orig_node_tmp->bcast_own_sum[if_num]; spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); - if (bcast_own_sum_orig >= bcast_own_sum_neigh) + if (sum_orig >= sum_neigh) goto update_tt; } -- cgit v1.1 From 3e10986d1d698140747fcfc2761ec9cb64c1d582 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Sep 2012 07:00:11 +0000 Subject: net: guard tcp_set_keepalive() to tcp sockets Its possible to use RAW sockets to get a crash in tcp_set_keepalive() / sk_reset_timer() Fix is to make sure socket is a SOCK_STREAM one. Reported-by: Dave Jones Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 3057920..a6000fb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -691,7 +691,8 @@ set_rcvbuf: case SO_KEEPALIVE: #ifdef CONFIG_INET - if (sk->sk_protocol == IPPROTO_TCP) + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) tcp_set_keepalive(sk, valbool); #endif sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); -- cgit v1.1 From 1b05c4b50edbddbdde715c4a7350629819f6655e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Sep 2012 07:03:40 +0000 Subject: ipv6: raw: fix icmpv6_filter() icmpv6_filter() should not modify its input, or else its caller would need to recompute ipv6_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() and change the prototype to make clear both sk and skb are const. Also, if icmpv6 header cannot be found, do not deliver the packet, as we do in IPv4. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/raw.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ef0579d..4a5f78b 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -107,21 +107,20 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) +static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) { - struct icmp6hdr *icmph; - struct raw6_sock *rp = raw6_sk(sk); - - if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) { - __u32 *data = &rp->filter.data[0]; - int bit_nr; + struct icmp6hdr *_hdr; + const struct icmp6hdr *hdr; - icmph = (struct icmp6hdr *) skb->data; - bit_nr = icmph->icmp6_type; + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (hdr) { + const __u32 *data = &raw6_sk(sk)->filter.data[0]; + unsigned int type = hdr->icmp6_type; - return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0; + return (data[type >> 5] & (1U << (type & 31))) != 0; } - return 0; + return 1; } #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) -- cgit v1.1 From 96af69ea2a83d292238bdba20e4508ee967cf8cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Sep 2012 22:01:28 +0200 Subject: ipv6: mip6: fix mip6_mh_filter() mip6_mh_filter() should not modify its input, or else its caller would need to recompute ipv6_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/mip6.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 5b087c3..0f9bdc5 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -86,28 +86,30 @@ static int mip6_mh_len(int type) static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) { - struct ip6_mh *mh; + struct ip6_mh _hdr; + const struct ip6_mh *mh; - if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) || - !pskb_may_pull(skb, (skb_transport_offset(skb) + - ((skb_transport_header(skb)[1] + 1) << 3)))) + mh = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (!mh) return -1; - mh = (struct ip6_mh *)skb_transport_header(skb); + if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len) + return -1; if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); - mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) - - skb_network_header(skb))); + mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) + + skb_network_header_len(skb)); return -1; } if (mh->ip6mh_proto != IPPROTO_NONE) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", mh->ip6mh_proto); - mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) - - skb_network_header(skb))); + mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) + + skb_network_header_len(skb)); return -1; } -- cgit v1.1 From 82e6bfe2fbc4d48852114c4f979137cd5bf1d1a8 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 21 Sep 2012 22:26:52 +0000 Subject: netfilter: xt_limit: have r->cost != 0 case work Commit v2.6.19-rc1~1272^2~41 tells us that r->cost != 0 can happen when a running state is saved to userspace and then reinstated from there. Make sure that private xt_limit area is initialized with correct values. Otherwise, random matchings due to use of uninitialized memory. Signed-off-by: Jan Engelhardt Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_limit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 5c22ce8..a4c1e45 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -117,11 +117,11 @@ static int limit_mt_check(const struct xt_mtchk_param *par) /* For SMP, we only want to use one set of state. */ r->master = priv; + /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * + 128. */ + priv->prev = jiffies; + priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ if (r->cost == 0) { - /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * - 128. */ - priv->prev = jiffies; - priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ r->credit_cap = priv->credit; /* Credits full. */ r->cost = user2credits(r->avg); } -- cgit v1.1 From 7f8436a1269eaaf2d0b1054a325eddf4e14cb80d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 24 Sep 2012 18:29:01 +0000 Subject: l2tp: fix return value check In case of error, the function genlmsg_put() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/l2tp/l2tp_netlink.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index d71cd92..6f93635 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -80,8 +80,8 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, &l2tp_nl_family, 0, L2TP_CMD_NOOP); - if (IS_ERR(hdr)) { - ret = PTR_ERR(hdr); + if (!hdr) { + ret = -EMSGSIZE; goto err_out; } @@ -250,8 +250,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_TUNNEL_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version) || nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) || @@ -617,8 +617,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags sk = tunnel->sock; hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) || nla_put_u32(skb, L2TP_ATTR_SESSION_ID, session->session_id) || -- cgit v1.1 From bc9259a8bae9e814fc1f775a1b3effa13e6ad330 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 27 Sep 2012 04:11:00 +0000 Subject: inetpeer: fix token initialization When jiffies wraps around (for example, 5 minutes after the boot, see INITIAL_JIFFIES) and peer has just been created, now - peer->rate_last can be < XRLIM_BURST_FACTOR * timeout, so token is not set to the maximum value, thus some icmp packets can be unexpectedly dropped. Fix this case by initializing last_rate to 60 seconds in the past. Signed-off-by: Nicolas Dichtel Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e1e0a4e..c7527f6 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -510,7 +510,10 @@ relookup: secure_ipv6_id(daddr->addr.a6)); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; - p->rate_last = 0; + /* 60*HZ is arbitrary, but chosen enough high so that the first + * calculation of tokens is at its maximum. + */ + p->rate_last = jiffies - 60*HZ; INIT_LIST_HEAD(&p->gc_list); /* Link the node. */ -- cgit v1.1