From 8e662164abb4a8fde701a46e1431980f9e325742 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 19 Nov 2015 13:49:59 +0100 Subject: netfilter: nfnetlink_queue: avoid harmless unnitialized variable warnings Several ARM default configurations give us warnings on recent compilers about potentially uninitialized variables in the nfnetlink code in two functions: net/netfilter/nfnetlink_queue.c: In function 'nfqnl_build_packet_message': net/netfilter/nfnetlink_queue.c:519:19: warning: 'nfnl_ct' may be used uninitialized in this function [-Wmaybe-uninitialized] if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0) Moving the rcu_dereference(nfnl_ct_hook) call outside of the conditional code avoids the warning without forcing us to preinitialize the variable. Signed-off-by: Arnd Bergmann Fixes: a4b4766c3ceb ("netfilter: nfnetlink_queue: rename related to nfqueue attaching conntrack info") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7d81d28..3e24054 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -365,8 +365,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, break; } + nfnl_ct = rcu_dereference(nfnl_ct_hook); + if (queue->flags & NFQA_CFG_F_CONNTRACK) { - nfnl_ct = rcu_dereference(nfnl_ct_hook); if (nfnl_ct != NULL) { ct = nfnl_ct->get_ct(entskb, &ctinfo); if (ct != NULL) @@ -1064,9 +1065,10 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, if (entry == NULL) return -ENOENT; + /* rcu lock already held from nfnl->call_rcu. */ + nfnl_ct = rcu_dereference(nfnl_ct_hook); + if (nfqa[NFQA_CT]) { - /* rcu lock already held from nfnl->call_rcu. */ - nfnl_ct = rcu_dereference(nfnl_ct_hook); if (nfnl_ct != NULL) ct = nfqnl_ct_parse(nfnl_ct, nlh, nfqa, entry, &ctinfo); } -- cgit v1.1 From 1f7dd3e5a6e4f093017fff12232572ee1aa4639b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Dec 2015 10:18:21 -0500 Subject: cgroup: fix handling of multi-destination migration from subtree_control enabling Consider the following v2 hierarchy. P0 (+memory) --- P1 (-memory) --- A \- B P0 has memory enabled in its subtree_control while P1 doesn't. If both A and B contain processes, they would belong to the memory css of P1. Now if memory is enabled on P1's subtree_control, memory csses should be created on both A and B and A's processes should be moved to the former and B's processes the latter. IOW, enabling controllers can cause atomic migrations into different csses. The core cgroup migration logic has been updated accordingly but the controller migration methods haven't and still assume that all tasks migrate to a single target css; furthermore, the methods were fed the css in which subtree_control was updated which is the parent of the target csses. pids controller depends on the migration methods to move charges and this made the controller attribute charges to the wrong csses often triggering the following warning by driving a counter negative. WARNING: CPU: 1 PID: 1 at kernel/cgroup_pids.c:97 pids_cancel.constprop.6+0x31/0x40() Modules linked in: CPU: 1 PID: 1 Comm: systemd Not tainted 4.4.0-rc1+ #29 ... ffffffff81f65382 ffff88007c043b90 ffffffff81551ffc 0000000000000000 ffff88007c043bc8 ffffffff810de202 ffff88007a752000 ffff88007a29ab00 ffff88007c043c80 ffff88007a1d8400 0000000000000001 ffff88007c043bd8 Call Trace: [] dump_stack+0x4e/0x82 [] warn_slowpath_common+0x82/0xc0 [] warn_slowpath_null+0x1a/0x20 [] pids_cancel.constprop.6+0x31/0x40 [] pids_can_attach+0x6d/0xf0 [] cgroup_taskset_migrate+0x6c/0x330 [] cgroup_migrate+0xf5/0x190 [] cgroup_attach_task+0x176/0x200 [] __cgroup_procs_write+0x2ad/0x460 [] cgroup_procs_write+0x14/0x20 [] cgroup_file_write+0x35/0x1c0 [] kernfs_fop_write+0x141/0x190 [] __vfs_write+0x28/0xe0 [] vfs_write+0xac/0x1a0 [] SyS_write+0x49/0xb0 [] entry_SYSCALL_64_fastpath+0x12/0x76 This patch fixes the bug by removing @css parameter from the three migration methods, ->can_attach, ->cancel_attach() and ->attach() and updating cgroup_taskset iteration helpers also return the destination css in addition to the task being migrated. All controllers are updated accordingly. * Controllers which don't care whether there are one or multiple target csses can be converted trivially. cpu, io, freezer, perf, netclassid and netprio fall in this category. * cpuset's current implementation assumes that there's single source and destination and thus doesn't support v2 hierarchy already. The only change made by this patchset is how that single destination css is obtained. * memory migration path already doesn't do anything on v2. How the single destination css is obtained is updated and the prep stage of mem_cgroup_can_attach() is reordered to accomodate the change. * pids is the only controller which was affected by this bug. It now correctly handles multi-destination migrations and no longer causes counter underflow from incorrect accounting. Signed-off-by: Tejun Heo Reported-and-tested-by: Daniel Wagner Cc: Aleksa Sarai --- net/core/netclassid_cgroup.c | 11 ++++++----- net/core/netprio_cgroup.c | 9 +++++---- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 6441f47..81cb3c7 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -67,14 +67,15 @@ static int update_classid(const void *v, struct file *file, unsigned n) return 0; } -static void cgrp_attach(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset) +static void cgrp_attach(struct cgroup_taskset *tset) { - struct cgroup_cls_state *cs = css_cls_state(css); - void *v = (void *)(unsigned long)cs->classid; struct task_struct *p; + struct cgroup_subsys_state *css; + + cgroup_taskset_for_each(p, css, tset) { + struct cgroup_cls_state *cs = css_cls_state(css); + void *v = (void *)(unsigned long)cs->classid; - cgroup_taskset_for_each(p, tset) { task_lock(p); iterate_fd(p->files, 0, update_classid, v); task_unlock(p); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index cbd0a19..40fd09f 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -218,13 +218,14 @@ static int update_netprio(const void *v, struct file *file, unsigned n) return 0; } -static void net_prio_attach(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset) +static void net_prio_attach(struct cgroup_taskset *tset) { struct task_struct *p; - void *v = (void *)(unsigned long)css->cgroup->id; + struct cgroup_subsys_state *css; + + cgroup_taskset_for_each(p, css, tset) { + void *v = (void *)(unsigned long)css->cgroup->id; - cgroup_taskset_for_each(p, tset) { task_lock(p); iterate_fd(p->files, 0, update_netprio, v); task_unlock(p); -- cgit v1.1 From 6a61d4dbf4f54b5683e0f1e58d873cecca7cb977 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 3 Dec 2015 17:21:50 +0100 Subject: gre6: allow to update all parameters via rtnl Parameters were updated only if the kernel was unable to find the tunnel with the new parameters, ie only if core pamareters were updated (keys, addr, link, type). Now it's possible to update ttl, hoplimit, flowinfo and flags. Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3c7b931..e5ea177 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1571,13 +1571,11 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], return -EEXIST; } else { t = nt; - - ip6gre_tunnel_unlink(ign, t); - ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]); - ip6gre_tunnel_link(ign, t); - netdev_state_change(dev); } + ip6gre_tunnel_unlink(ign, t); + ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]); + ip6gre_tunnel_link(ign, t); return 0; } -- cgit v1.1 From 9a1ec4612c9bfc94d4185e3459055a37a685e575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 4 Dec 2015 14:15:08 +0100 Subject: ipv6: keep existing flags when setting IFA_F_OPTIMISTIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 64236f3f3d74 ("ipv6: introduce IFA_F_STABLE_PRIVACY flag") failed to update the setting of the IFA_F_OPTIMISTIC flag, causing the IFA_F_STABLE_PRIVACY flag to be lost if IFA_F_OPTIMISTIC is set. Cc: Erik Kline Cc: Fernando Gont Cc: Lorenzo Colitti Cc: YOSHIFUJI Hideaki/吉藤英明 Cc: Hannes Frederic Sowa Fixes: 64236f3f3d74 ("ipv6: introduce IFA_F_STABLE_PRIVACY flag") Signed-off-by: Bjørn Mork Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 61f26851..a57d3d1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2455,7 +2455,7 @@ ok: #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (in6_dev->cnf.optimistic_dad && !net->ipv6.devconf_all->forwarding && sllao) - addr_flags = IFA_F_OPTIMISTIC; + addr_flags |= IFA_F_OPTIMISTIC; #endif /* Do not allow to create too much of autoconfigured -- cgit v1.1 From cb5e173ed7c03a0d4630ce68a95a186cce3cc872 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 4 Dec 2015 15:14:03 -0200 Subject: sctp: use the same clock as if sock source timestamps were on SCTP echoes a cookie o INIT ACK chunks that contains a timestamp, for detecting stale cookies. This cookie is echoed back to the server by the client and then that timestamp is checked. Thing is, if the listening socket is using packet timestamping, the cookie is encoded with ktime_get() value and checked against ktime_get_real(), as done by __net_timestamp(). The fix is to sctp also use ktime_get_real(), so we can compare bananas with bananas later no matter if packet timestamping was enabled or not. Fixes: 52db882f3fc2 ("net: sctp: migrate cookie life from timeval to ktime") Signed-off-by: Marcelo Ricardo Leitner Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 763e06a..5d6a03f 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1652,7 +1652,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, /* Set an expiration time for the cookie. */ cookie->c.expiration = ktime_add(asoc->cookie_life, - ktime_get()); + ktime_get_real()); /* Copy the peer's init packet. */ memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr, @@ -1780,7 +1780,7 @@ no_hmac: if (sock_flag(ep->base.sk, SOCK_TIMESTAMP)) kt = skb_get_ktime(skb); else - kt = ktime_get(); + kt = ktime_get_real(); if (!asoc && ktime_before(bear_cookie->expiration, kt)) { /* -- cgit v1.1 From 01ce63c90170283a9855d1db4fe81934dddce648 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 4 Dec 2015 15:14:04 -0200 Subject: sctp: update the netstamp_needed counter when copying sockets Dmitry Vyukov reported that SCTP was triggering a WARN on socket destroy related to disabling sock timestamp. When SCTP accepts an association or peel one off, it copies sock flags but forgot to call net_enable_timestamp() if a packet timestamping flag was copied, leading to extra calls to net_disable_timestamp() whenever such clones were closed. The fix is to call net_enable_timestamp() whenever we copy a sock with that flag on, like tcp does. Reported-by: Dmitry Vyukov Signed-off-by: Marcelo Ricardo Leitner Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/core/sock.c | 2 -- net/sctp/socket.c | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index e31dfce..d01c8f4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -433,8 +433,6 @@ static bool sock_needs_netstamp(const struct sock *sk) } } -#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) - static void sock_disable_timestamp(struct sock *sk, unsigned long flags) { if (sk->sk_flags & flags) { diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 03c82560..4c9282b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7199,6 +7199,9 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newinet->mc_ttl = 1; newinet->mc_index = 0; newinet->mc_list = NULL; + + if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) + net_enable_timestamp(); } static inline void sctp_copy_descendant(struct sock *sk_to, -- cgit v1.1 From 50a5ffb1ef535e3c6989711c51b5d61b543a3b45 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 4 Dec 2015 15:14:05 -0200 Subject: sctp: also copy sk_tsflags when copying the socket As we are keeping timestamps on when copying the socket, we also have to copy sk_tsflags. This is needed since b9f40e21ef42 ("net-timestamp: move timestamp flags out of sk_flags"). Signed-off-by: Marcelo Ricardo Leitner Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4c9282b..1a32ecd 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7167,6 +7167,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newsk->sk_type = sk->sk_type; newsk->sk_bound_dev_if = sk->sk_bound_dev_if; newsk->sk_flags = sk->sk_flags; + newsk->sk_tsflags = sk->sk_tsflags; newsk->sk_no_check_tx = sk->sk_no_check_tx; newsk->sk_no_check_rx = sk->sk_no_check_rx; newsk->sk_reuse = sk->sk_reuse; -- cgit v1.1 From 69b5777f2e5779bb987d4a25a33401d5ac257c14 Mon Sep 17 00:00:00 2001 From: lucien Date: Sat, 5 Dec 2015 15:15:17 +0800 Subject: sctp: hold the chunks only after the chunk is enqueued in outq When a msg is sent, sctp will hold the chunks of this msg and then try to enqueue them. But if the chunks are not enqueued in sctp_outq_tail() because of the invalid state, sctp_cmd_interpreter() may still return success to sctp_sendmsg() after calling sctp_outq_flush(), these chunks will become orphans and will leak. So we fix them by moving sctp_chunk_hold() to sctp_outq_tail(), where we are sure that the chunk is going to get queued. Signed-off-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 1 + net/sctp/socket.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 7e8f0a1..0b3d818 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -324,6 +324,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : "illegal chunk"); + sctp_chunk_hold(chunk); sctp_outq_tail_data(q, chunk); if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1a32ecd..bd57300 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1952,8 +1952,6 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) /* Now send the (possibly) fragmented message. */ list_for_each_entry(chunk, &datamsg->chunks, frag_list) { - sctp_chunk_hold(chunk); - /* Do accounting for the write space. */ sctp_set_owner_w(chunk); -- cgit v1.1 From 8b570dc9f7b634e853866ce40097c0342ac5bb81 Mon Sep 17 00:00:00 2001 From: lucien Date: Sat, 5 Dec 2015 15:19:27 +0800 Subject: sctp: only drop the reference on the datamsg after sending a msg If the chunks are enqueued successfully but sctp_cmd_interpreter() return err to sctp_sendmsg() (mainly because of no mem), the chunks will get re-queued, but we are dropping the reference and freeing them. The fix is to just drop the reference on the datamsg just as it had succeeded, as: - if the chunks weren't queued, this is enough to get them freed. - if they were queued, they will get freed when they finally get out or discarded. Signed-off-by: Xin Long Marcelo Ricardo Leitner Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index bd57300..9b6cc6d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1964,15 +1964,13 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) * breaks. */ err = sctp_primitive_SEND(net, asoc, datamsg); + sctp_datamsg_put(datamsg); /* Did the lower layer accept the chunk? */ - if (err) { - sctp_datamsg_free(datamsg); + if (err) goto out_free; - } pr_debug("%s: we sent primitively\n", __func__); - sctp_datamsg_put(datamsg); err = msg_len; if (unlikely(wait_connect)) { -- cgit v1.1 From 8a0d19c5ed417c78d03f4e0fa7215e58c40896d8 Mon Sep 17 00:00:00 2001 From: lucien Date: Sat, 5 Dec 2015 15:35:36 +0800 Subject: sctp: start t5 timer only when peer rwnd is 0 and local state is SHUTDOWN_PENDING when A sends a data to B, then A close() and enter into SHUTDOWN_PENDING state, if B neither claim his rwnd is 0 nor send SACK for this data, A will keep retransmitting this data until t5 timeout, Max.Retrans times can't work anymore, which is bad. if B's rwnd is not 0, it should send abort after Max.Retrans times, only when B's rwnd == 0 and A's retransmitting beyonds Max.Retrans times, A will start t5 timer, which is also commit f8d960524328 ("sctp: Enforce retransmission limit during shutdown") means, but it lacks the condition peer rwnd == 0. so fix it by adding a bit (zero_window_announced) in peer to record if the last rwnd is 0. If it was, zero_window_announced will be set. and use this bit to decide if start t5 timer when local.state is SHUTDOWN_PENDING. Fixes: commit f8d960524328 ("sctp: Enforce retransmission limit during shutdown") Signed-off-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 1 + net/sctp/sm_statefuns.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 0b3d818..c0380cf 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1252,6 +1252,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) */ sack_a_rwnd = ntohl(sack->a_rwnd); + asoc->peer.zero_window_announced = !sack_a_rwnd; outstanding = q->outstanding_bytes; if (outstanding < sack_a_rwnd) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 6f46aa1..cd34a4a 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -5412,7 +5412,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_T3_RTX_EXPIREDS); if (asoc->overall_error_count >= asoc->max_retrans) { - if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) { + if (asoc->peer.zero_window_announced && + asoc->state == SCTP_STATE_SHUTDOWN_PENDING) { /* * We are here likely because the receiver had its rwnd * closed for a while and we have not been able to -- cgit v1.1 From 437bb0e645d9286a97508d24dd9a4a7a7fa86b93 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 2 Sep 2015 20:09:54 +0200 Subject: batman-adv: fix speedy join for DAT cache replies DAT Cache replies are answered on behalf of other clients which are not connected to the answering originator. Therefore, we shouldn't add these clients to the answering originators TT table through speed join to avoid bogus entries. Reported-by: Alessandro Bolletta Signed-off-by: Simon Wunderlich Acked-by: Antonio Quartulli Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/routing.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 8d990b0..3207667 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -836,6 +836,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, u8 *orig_addr; struct batadv_orig_node *orig_node = NULL; int check, hdr_size = sizeof(*unicast_packet); + enum batadv_subtype subtype; bool is4addr; unicast_packet = (struct batadv_unicast_packet *)skb->data; @@ -863,10 +864,20 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, /* packet for me */ if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { if (is4addr) { - batadv_dat_inc_counter(bat_priv, - unicast_4addr_packet->subtype); - orig_addr = unicast_4addr_packet->src; - orig_node = batadv_orig_hash_find(bat_priv, orig_addr); + subtype = unicast_4addr_packet->subtype; + batadv_dat_inc_counter(bat_priv, subtype); + + /* Only payload data should be considered for speedy + * join. For example, DAT also uses unicast 4addr + * types, but those packets should not be considered + * for speedy join, since the clients do not actually + * reside at the sending originator. + */ + if (subtype == BATADV_P_DATA) { + orig_addr = unicast_4addr_packet->src; + orig_node = batadv_orig_hash_find(bat_priv, + orig_addr); + } } if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb, -- cgit v1.1 From a6cb390940b622eb0893519a54989222234f7c4b Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 2 Sep 2015 20:09:55 +0200 Subject: batman-adv: avoid keeping false temporary entry In the case when a temporary entry is added first and a proper tt entry is added after that, the temporary tt entry is kept in the orig list. However the temporary flag is removed at this point, and therefore the purge function can not find this temporary entry anymore. Therefore, remove the previous temp entry before adding the new proper one. This case can happen if a client behind a given originator moves before the TT announcement is sent out. Other than that, this case can also be created by bogus or malicious payload frames for VLANs which are not existent on the sending originator. Reported-by: Alessandro Bolletta Signed-off-by: Simon Wunderlich Acked-by: Antonio Quartulli Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/translation-table.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 4228b10..a3fc903 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1427,9 +1427,15 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, } /* if the client was temporary added before receiving the first - * OGM announcing it, we have to clear the TEMP flag + * OGM announcing it, we have to clear the TEMP flag. Also, + * remove the previous temporary orig node and re-add it + * if required. If the orig entry changed, the new one which + * is a non-temporary entry is preferred. */ - common->flags &= ~BATADV_TT_CLIENT_TEMP; + if (common->flags & BATADV_TT_CLIENT_TEMP) { + batadv_tt_global_del_orig_list(tt_global_entry); + common->flags &= ~BATADV_TT_CLIENT_TEMP; + } /* the change can carry possible "attribute" flags like the * TT_CLIENT_WIFI, therefore they have to be copied in the -- cgit v1.1 From 4c71895830dd66fb9d3331ab27481a777d2a9202 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Thu, 6 Aug 2015 10:38:54 +0200 Subject: batman-adv: fix erroneous client entry duplicate detection The translation table implementation, namely batadv_compare_tt(), is used to compare two client entries and deciding if they are the holding the same information. Each client entry is identified by its mac address and its VLAN id (VID). Consequently, batadv_compare_tt() has to not only compare the mac addresses but also the VIDs. Without this fix adding a new client entry that possesses the same mac address as another client but operates on a different VID will fail because both client entries will considered identical. Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/translation-table.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index a3fc903..76f19ba 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -68,13 +68,15 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv, unsigned short vid, const char *message, bool roaming); -/* returns 1 if they are the same mac addr */ +/* returns 1 if they are the same mac addr and vid */ static int batadv_compare_tt(const struct hlist_node *node, const void *data2) { const void *data1 = container_of(node, struct batadv_tt_common_entry, hash_entry); + const struct batadv_tt_common_entry *tt1 = data1; + const struct batadv_tt_common_entry *tt2 = data2; - return batadv_compare_eth(data1, data2); + return (tt1->vid == tt2->vid) && batadv_compare_eth(data1, data2); } /** -- cgit v1.1 From b7fe3d4f4a65bc675e737d88071300ea9c4bcddd Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 3 Nov 2015 10:05:44 +0100 Subject: batman-adv: Fix invalid stack access in batadv_dat_select_candidates batadv_dat_select_candidates provides an u32 to batadv_hash_dat but it needs a batadv_dat_entry with at least ip and vid filled in. Fixes: 3e26722bc9f2 ("batman-adv: make the Distributed ARP Table vlan aware") Signed-off-by: Sven Eckelmann Acked-by: Antonio Quartulli Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/distributed-arp-table.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 83bc1aa..a49c705 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -566,6 +566,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) int select; batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key; struct batadv_dat_candidate *res; + struct batadv_dat_entry dat; if (!bat_priv->orig_hash) return NULL; @@ -575,7 +576,9 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) if (!res) return NULL; - ip_key = (batadv_dat_addr_t)batadv_hash_dat(&ip_dst, + dat.ip = ip_dst; + dat.vid = 0; + ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat, BATADV_DAT_ADDR_MAX); batadv_dbg(BATADV_DBG_DAT, bat_priv, -- cgit v1.1 From 756b9b37cfb2e3dc76b2e43a8c097402ac736e07 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 7 Dec 2015 12:52:23 -0800 Subject: SUNRPC: Fix callback channel The NFSv4.1 callback channel is currently broken because the receive message will keep shrinking because the backchannel receive buffer size never gets reset. The easiest solution to this problem is instead of changing the receive buffer, to rather adjust the copied request. Fixes: 38b7631fbe42 ("nfs4: limit callback decoding to received bytes") Cc: Benjamin Coddington Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/backchannel_rqst.c | 8 -------- net/sunrpc/svc.c | 12 ++++++++++++ 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 95f82d8..229956b 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -353,20 +353,12 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) { struct rpc_xprt *xprt = req->rq_xprt; struct svc_serv *bc_serv = xprt->bc_serv; - struct xdr_buf *rq_rcv_buf = &req->rq_rcv_buf; spin_lock(&xprt->bc_pa_lock); list_del(&req->rq_bc_pa_list); xprt_dec_alloc_count(xprt, 1); spin_unlock(&xprt->bc_pa_lock); - if (copied <= rq_rcv_buf->head[0].iov_len) { - rq_rcv_buf->head[0].iov_len = copied; - rq_rcv_buf->page_len = 0; - } else { - rq_rcv_buf->page_len = copied - rq_rcv_buf->head[0].iov_len; - } - req->rq_private_buf.len = copied; set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 7fccf96..cc98528 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1363,7 +1363,19 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen); memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg)); memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res)); + + /* Adjust the argument buffer length */ rqstp->rq_arg.len = req->rq_private_buf.len; + if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { + rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len; + rqstp->rq_arg.page_len = 0; + } else if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len + + rqstp->rq_arg.page_len) + rqstp->rq_arg.page_len = rqstp->rq_arg.len - + rqstp->rq_arg.head[0].iov_len; + else + rqstp->rq_arg.len = rqstp->rq_arg.head[0].iov_len + + rqstp->rq_arg.page_len; /* reset result send buffer "put" position */ resv->iov_len = 0; -- cgit v1.1 From fe82b3300ec9c0dc4ba871f9a58b265aadf4e186 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Mon, 7 Dec 2015 12:53:15 +0000 Subject: mpls: fix sending of local encapped packets Locally generated IPv4 and (probably) IPv6 packets are dropped because skb->protocol isn't set. We could write wrappers to lwtunnel_output for IPv4 and IPv6 that set the protocol accordingly and then call lwtunnel_output, but mpls_output relies on the AF-specific type of dst anyway to get the via address. Therefore, make use of dst->dst_ops->family in mpls_output to determine the type of nexthop and thus protocol of the packet instead of checking skb->protocol. Fixes: 61adedf3e3f1 ("route: move lwtunnel state to dst_entry") Reported-by: Sam Russell Signed-off-by: Robert Shearman Signed-off-by: David S. Miller --- net/mpls/mpls_iptunnel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 67591ae..64afd3d 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -54,10 +54,10 @@ int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb) unsigned int ttl; /* Obtain the ttl */ - if (skb->protocol == htons(ETH_P_IP)) { + if (dst->ops->family == AF_INET) { ttl = ip_hdr(skb)->ttl; rt = (struct rtable *)dst; - } else if (skb->protocol == htons(ETH_P_IPV6)) { + } else if (dst->ops->family == AF_INET6) { ttl = ipv6_hdr(skb)->hop_limit; rt6 = (struct rt6_info *)dst; } else { -- cgit v1.1 From 69ce6487dcd364245a3d26322fc8f4ffd1e8d947 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Dec 2015 08:25:21 -0800 Subject: ipv6: sctp: fix lockdep splat in sctp_v6_get_dst() While cooking the sctp np->opt rcu fixes, I forgot to move one rcu_read_unlock() after the added rcu_dereference() in sctp_v6_get_dst() This gave lockdep warnings reported by Dave Jones. Fixes: c836a8ba9386 ("ipv6: sctp: add rcu protection around np->opt") Reported-by: Dave Jones Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index acb45b8..d28c0b4 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -323,14 +323,13 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, } } } - rcu_read_unlock(); - if (baddr) { fl6->saddr = baddr->v6.sin6_addr; fl6->fl6_sport = baddr->v6.sin6_port; final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); dst = ip6_dst_lookup_flow(sk, fl6, final_p); } + rcu_read_unlock(); out: if (!IS_ERR_OR_NULL(dst)) { -- cgit v1.1 From bd5eb35f16a9c55afcf5eb1c920cbbaf09747369 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Dec 2015 08:53:17 -0800 Subject: xfrm: take care of request sockets TCP SYNACK messages might now be attached to request sockets. XFRM needs to get back to a listener socket. Adds new helpers that might be used elsewhere : sk_to_full_sk() and sk_const_to_full_sk() Note: We also need to add RCU protection for xfrm lookups, now TCP/DCCP have lockless listener processing. This will be addressed in separate patches. Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener") Reported-by: Dave Jones Signed-off-by: Eric Dumazet Cc: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 09bfcba..18276f0 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2198,6 +2198,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, xdst = NULL; route = NULL; + sk = sk_const_to_full_sk(sk); if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { num_pols = 1; pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); @@ -2477,6 +2478,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } pol = NULL; + sk = sk_to_full_sk(sk); if (sk && sk->sk_policy[dir]) { pol = xfrm_sk_policy_lookup(sk, dir, &fl); if (IS_ERR(pol)) { -- cgit v1.1 From 639e077b43d9c54ffb1e1b54a2de54597ceae1d8 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 7 Dec 2015 12:13:26 +0200 Subject: netfilter: nfnetlink_queue: Unregister pernet subsys in case of init failure Commit 3bfe049807c2403 ("netfilter: nfnetlink_{log,queue}: Register pernet in first place") reorganised the initialisation order of the pernet_subsys to avoid "use-before-initialised" condition. However, in doing so the cleanup logic in nfnetlink_queue got botched in that the pernet_subsys wasn't cleaned in case nfnetlink_subsys_register failed. This patch adds the necessary cleanup routine call. Fixes: 3bfe049807c2403 ("netfilter: nfnetlink_{log,queue}: Register pernet in first place") Signed-off-by: Nikolay Borisov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 3e24054..861c661 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1419,6 +1419,7 @@ static int __init nfnetlink_queue_init(void) cleanup_netlink_notifier: netlink_unregister_notifier(&nfqnl_rtnl_notifier); + unregister_pernet_subsys(&nfnl_queue_net_ops); out: return status; } -- cgit v1.1 From b7bb110008607a915298bf0f47d25886ecb94477 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 10 Dec 2015 10:37:51 +0100 Subject: rfkill: copy the name into the rfkill struct Some users of rfkill, like NFC and cfg80211, use a dynamic name when allocating rfkill, in those cases dev_name(). Therefore, the pointer passed to rfkill_alloc() might not be valid forever, I specifically found the case that the rfkill name was quite obviously an invalid pointer (or at least garbage) when the wiphy had been renamed. Fix this by making a copy of the rfkill name in rfkill_alloc(). Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/rfkill/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/rfkill/core.c b/net/rfkill/core.c index b41e9ea..f53bf3b6 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -49,7 +49,6 @@ struct rfkill { spinlock_t lock; - const char *name; enum rfkill_type type; unsigned long state; @@ -73,6 +72,7 @@ struct rfkill { struct delayed_work poll_work; struct work_struct uevent_work; struct work_struct sync_work; + char name[]; }; #define to_rfkill(d) container_of(d, struct rfkill, dev) @@ -876,14 +876,14 @@ struct rfkill * __must_check rfkill_alloc(const char *name, if (WARN_ON(type == RFKILL_TYPE_ALL || type >= NUM_RFKILL_TYPES)) return NULL; - rfkill = kzalloc(sizeof(*rfkill), GFP_KERNEL); + rfkill = kzalloc(sizeof(*rfkill) + strlen(name) + 1, GFP_KERNEL); if (!rfkill) return NULL; spin_lock_init(&rfkill->lock); INIT_LIST_HEAD(&rfkill->node); rfkill->type = type; - rfkill->name = name; + strcpy(rfkill->name, name); rfkill->ops = ops; rfkill->data = ops_data; -- cgit v1.1 From 633c9a840d0bf1cce690f3165bdacd8ab412949e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 9 Dec 2015 12:08:26 +0100 Subject: netfilter: nfnetlink: avoid recurrent netns lookups in call_batch Pass the net pointer to the call_batch callback functions so we can skip recurrent lookups. Signed-off-by: Pablo Neira Ayuso Tested-by: Arturo Borrero Gonzalez --- net/netfilter/nf_tables_api.c | 96 ++++++++++++++++++++----------------------- net/netfilter/nfnetlink.c | 2 +- 2 files changed, 46 insertions(+), 52 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 93cc473..f1002dc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -89,6 +89,7 @@ nf_tables_afinfo_lookup(struct net *net, int family, bool autoload) } static void nft_ctx_init(struct nft_ctx *ctx, + struct net *net, const struct sk_buff *skb, const struct nlmsghdr *nlh, struct nft_af_info *afi, @@ -96,7 +97,7 @@ static void nft_ctx_init(struct nft_ctx *ctx, struct nft_chain *chain, const struct nlattr * const *nla) { - ctx->net = sock_net(skb->sk); + ctx->net = net; ctx->afi = afi; ctx->table = table; ctx->chain = chain; @@ -672,15 +673,14 @@ err: return ret; } -static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_newtable(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nlattr *name; struct nft_af_info *afi; struct nft_table *table; - struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; u32 flags = 0; struct nft_ctx ctx; @@ -706,7 +706,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); return nf_tables_updtable(&ctx); } @@ -730,7 +730,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&table->sets); table->flags = flags; - nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); if (err < 0) goto err3; @@ -810,18 +810,17 @@ out: return err; } -static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_deltable(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi; struct nft_table *table; - struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nft_ctx ctx; - nft_ctx_init(&ctx, skb, nlh, NULL, NULL, NULL, nla); + nft_ctx_init(&ctx, net, skb, nlh, NULL, NULL, NULL, nla); if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL) return nft_flush(&ctx, family); @@ -1221,8 +1220,8 @@ static void nf_tables_chain_destroy(struct nft_chain *chain) } } -static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_newchain(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -1232,7 +1231,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, struct nft_chain *chain; struct nft_base_chain *basechain = NULL; struct nlattr *ha[NFTA_HOOK_MAX + 1]; - struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct net_device *dev = NULL; u8 policy = NF_ACCEPT; @@ -1313,7 +1311,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, return PTR_ERR(stats); } - nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN, sizeof(struct nft_trans_chain)); if (trans == NULL) { @@ -1461,7 +1459,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (err < 0) goto err1; - nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN); if (err < 0) goto err2; @@ -1476,15 +1474,14 @@ err1: return err; } -static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_delchain(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; - struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nft_ctx ctx; @@ -1506,7 +1503,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, if (chain->use > 0) return -EBUSY; - nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); return nft_delchain(&ctx); } @@ -2010,13 +2007,12 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, static struct nft_expr_info *info; -static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_newrule(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi; - struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *old_rule = NULL; @@ -2075,7 +2071,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, return PTR_ERR(old_rule); } - nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); n = 0; size = 0; @@ -2176,13 +2172,12 @@ err1: return err; } -static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_delrule(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi; - struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_chain *chain = NULL; struct nft_rule *rule; @@ -2205,7 +2200,7 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, return PTR_ERR(chain); } - nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); if (chain) { if (nla[NFTA_RULE_HANDLE]) { @@ -2344,12 +2339,11 @@ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { [NFTA_SET_DESC_SIZE] = { .type = NLA_U32 }, }; -static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, +static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { - struct net *net = sock_net(skb->sk); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi = NULL; struct nft_table *table = NULL; @@ -2371,7 +2365,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, return -ENOENT; } - nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla); return 0; } @@ -2623,6 +2617,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + struct net *net = sock_net(skb->sk); const struct nft_set *set; struct nft_ctx ctx; struct sk_buff *skb2; @@ -2630,7 +2625,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, int err; /* Verify existence before starting dump */ - err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla); + err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla); if (err < 0) return err; @@ -2693,14 +2688,13 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx, return 0; } -static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_newset(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_set_ops *ops; struct nft_af_info *afi; - struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; @@ -2798,7 +2792,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(table)) return PTR_ERR(table); - nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) { @@ -2882,8 +2876,8 @@ static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set nft_set_destroy(set); } -static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_delset(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -2896,7 +2890,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, if (nla[NFTA_SET_TABLE] == NULL) return -EINVAL; - err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla); + err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla); if (err < 0) return err; @@ -3024,7 +3018,7 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + [NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 }, }; -static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, +static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@ -3033,7 +3027,6 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi; struct nft_table *table; - struct net *net = sock_net(skb->sk); afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); if (IS_ERR(afi)) @@ -3045,7 +3038,7 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, if (!trans && (table->flags & NFT_TABLE_INACTIVE)) return -ENOENT; - nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla); return 0; } @@ -3135,6 +3128,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); const struct nft_set *set; struct nft_set_dump_args args; struct nft_ctx ctx; @@ -3150,8 +3144,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) return err; - err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla, - false); + err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh, + (void *)nla, false); if (err < 0) return err; @@ -3212,11 +3206,12 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + struct net *net = sock_net(skb->sk); const struct nft_set *set; struct nft_ctx ctx; int err; - err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, false); if (err < 0) return err; @@ -3528,11 +3523,10 @@ err1: return err; } -static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { - struct net *net = sock_net(skb->sk); const struct nlattr *attr; struct nft_set *set; struct nft_ctx ctx; @@ -3541,7 +3535,7 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) return -EINVAL; - err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, true); if (err < 0) return err; @@ -3623,8 +3617,8 @@ err1: return err; } -static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nlattr *attr; @@ -3635,7 +3629,7 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) return -EINVAL; - err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, false); if (err < 0) return err; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 46453ab..445590f 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -381,7 +381,7 @@ replay: goto ack; if (nc->call_batch) { - err = nc->call_batch(net->nfnl, skb, nlh, + err = nc->call_batch(net, net->nfnl, skb, nlh, (const struct nlattr **)cda); } -- cgit v1.1 From bd678e09dc1797bc0e2e536b6b268e7cf46e0184 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 9 Dec 2015 12:09:56 +0100 Subject: netfilter: nfnetlink: fix splat due to incorrect socket memory accounting in skbuff clones If we attach the sk to the skb from nfnetlink_rcv_batch(), then netlink_skb_destructor() will underflow the socket receive memory counter and we get warning splat when releasing the socket. $ cat /proc/net/netlink sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode ffff8800ca903000 12 0 00000000 -54144 0 0 2 0 17942 ^^^^^^ Rmem above shows an underflow. And here below the warning splat: [ 1363.815976] WARNING: CPU: 2 PID: 1356 at net/netlink/af_netlink.c:958 netlink_sock_destruct+0x80/0xb9() [...] [ 1363.816152] CPU: 2 PID: 1356 Comm: kworker/u16:1 Tainted: G W 4.4.0-rc1+ #153 [ 1363.816155] Hardware name: LENOVO 23259H1/23259H1, BIOS G2ET32WW (1.12 ) 05/30/2012 [ 1363.816160] Workqueue: netns cleanup_net [ 1363.816163] 0000000000000000 ffff880119203dd0 ffffffff81240204 0000000000000000 [ 1363.816169] ffff880119203e08 ffffffff8104db4b ffffffff813d49a1 ffff8800ca771000 [ 1363.816174] ffffffff81a42b00 0000000000000000 ffff8800c0afe1e0 ffff880119203e18 [ 1363.816179] Call Trace: [ 1363.816181] [] dump_stack+0x4e/0x79 [ 1363.816193] [] warn_slowpath_common+0x9a/0xb3 [ 1363.816197] [] ? netlink_sock_destruct+0x80/0xb9 skb->sk was only needed to lookup for the netns, however we don't need this anymore since 633c9a840d0b ("netfilter: nfnetlink: avoid recurrent netns lookups in call_batch") so this patch removes this manual socket assignment to resolve this problem. Reported-by: Arturo Borrero Gonzalez Reported-by: Ben Hutchings Signed-off-by: Pablo Neira Ayuso Tested-by: Arturo Borrero Gonzalez --- net/netfilter/nfnetlink.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 445590f..77afe91 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -295,8 +295,6 @@ replay: if (!skb) return netlink_ack(oskb, nlh, -ENOMEM); - skb->sk = oskb->sk; - nfnl_lock(subsys_id); ss = rcu_dereference_protected(table[subsys_id].subsys, lockdep_is_held(&table[subsys_id].mutex)); -- cgit v1.1 From d3340b79ec8222d20453b1e7f261b017d1d09dc9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 9 Dec 2015 22:06:59 +0100 Subject: netfilter: nf_dup: add missing dependencies with NF_CONNTRACK CONFIG_NF_CONNTRACK=m CONFIG_NF_DUP_IPV4=y results in: net/built-in.o: In function `nf_dup_ipv4': >> (.text+0xd434f): undefined reference to `nf_conntrack_untracked' Reported-by: kbuild test robot Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 1 + net/ipv6/netfilter/Kconfig | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index a355841..c187c60 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -60,6 +60,7 @@ config NFT_REJECT_IPV4 config NFT_DUP_IPV4 tristate "IPv4 nf_tables packet duplication support" + depends on !NF_CONNTRACK || NF_CONNTRACK select NF_DUP_IPV4 help This module enables IPv4 packet duplication support for nf_tables. diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index f6a024e..e10a04c 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -49,6 +49,7 @@ config NFT_REJECT_IPV6 config NFT_DUP_IPV6 tristate "IPv6 nf_tables packet duplication support" + depends on !NF_CONNTRACK || NF_CONNTRACK select NF_DUP_IPV6 help This module enables IPv6 packet duplication support for nf_tables. -- cgit v1.1 From 56f047305dd4b6b61771ac4f523718e4111052a8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Dec 2015 07:22:01 -0800 Subject: xfrm: add rcu grace period in xfrm_policy_destroy() We will soon switch sk->sk_policy[] to RCU protection, as SYNACK packets are sent while listener socket is not locked. This patch simply adds RCU grace period before struct xfrm_policy freeing, and the corresponding rcu_head in struct xfrm_policy. Signed-off-by: Eric Dumazet Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 18276f0..f57a571 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -303,6 +303,14 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) } EXPORT_SYMBOL(xfrm_policy_alloc); +static void xfrm_policy_destroy_rcu(struct rcu_head *head) +{ + struct xfrm_policy *policy = container_of(head, struct xfrm_policy, rcu); + + security_xfrm_policy_free(policy->security); + kfree(policy); +} + /* Destroy xfrm_policy: descendant resources must be released to this moment. */ void xfrm_policy_destroy(struct xfrm_policy *policy) @@ -312,8 +320,7 @@ void xfrm_policy_destroy(struct xfrm_policy *policy) if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer)) BUG(); - security_xfrm_policy_free(policy->security); - kfree(policy); + call_rcu(&policy->rcu, xfrm_policy_destroy_rcu); } EXPORT_SYMBOL(xfrm_policy_destroy); -- cgit v1.1 From d188ba86dd07a72ebebfa22fe9cb0b0572e57740 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Dec 2015 07:22:02 -0800 Subject: xfrm: add rcu protection to sk->sk_policy[] XFRM can deal with SYNACK messages, sent while listener socket is not locked. We add proper rcu protection to __xfrm_sk_clone_policy() and xfrm_sk_policy_lookup() This might serve as the first step to remove xfrm.xfrm_policy_lock use in fast path. Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer") Signed-off-by: Eric Dumazet Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- net/xfrm/xfrm_policy.c | 37 +++++++++++++++++++++++++------------ 2 files changed, 26 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index d01c8f4..765be83 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1550,7 +1550,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) */ is_charged = sk_filter_charge(newsk, filter); - if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk))) { + if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { /* It is still raw copy of parent, so invalidate * destructor and make plain sk_free() */ newsk->sk_destruct = NULL; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f57a571..948fa55 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1221,8 +1221,10 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, struct xfrm_policy *pol; struct net *net = sock_net(sk); + rcu_read_lock(); read_lock_bh(&net->xfrm.xfrm_policy_lock); - if ((pol = sk->sk_policy[dir]) != NULL) { + pol = rcu_dereference(sk->sk_policy[dir]); + if (pol != NULL) { bool match = xfrm_selector_match(&pol->selector, fl, sk->sk_family); int err = 0; @@ -1246,6 +1248,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, } out: read_unlock_bh(&net->xfrm.xfrm_policy_lock); + rcu_read_unlock(); return pol; } @@ -1314,13 +1317,14 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) #endif write_lock_bh(&net->xfrm.xfrm_policy_lock); - old_pol = sk->sk_policy[dir]; - sk->sk_policy[dir] = pol; + old_pol = rcu_dereference_protected(sk->sk_policy[dir], + lockdep_is_held(&net->xfrm.xfrm_policy_lock)); if (pol) { pol->curlft.add_time = get_seconds(); pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0); xfrm_sk_policy_link(pol, dir); } + rcu_assign_pointer(sk->sk_policy[dir], pol); if (old_pol) { if (pol) xfrm_policy_requeue(old_pol, pol); @@ -1368,17 +1372,26 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) return newp; } -int __xfrm_sk_clone_policy(struct sock *sk) +int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { - struct xfrm_policy *p0 = sk->sk_policy[0], - *p1 = sk->sk_policy[1]; + const struct xfrm_policy *p; + struct xfrm_policy *np; + int i, ret = 0; - sk->sk_policy[0] = sk->sk_policy[1] = NULL; - if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL) - return -ENOMEM; - if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL) - return -ENOMEM; - return 0; + rcu_read_lock(); + for (i = 0; i < 2; i++) { + p = rcu_dereference(osk->sk_policy[i]); + if (p) { + np = clone_policy(p, i); + if (unlikely(!np)) { + ret = -ENOMEM; + break; + } + rcu_assign_pointer(sk->sk_policy[i], np); + } + } + rcu_read_unlock(); + return ret; } static int -- cgit v1.1 From 9470e24f35ab81574da54e69df90c1eb4a96b43f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Dec 2015 07:25:06 -0800 Subject: ipv6: sctp: clone options to avoid use after free SCTP is lacking proper np->opt cloning at accept() time. TCP and DCCP use ipv6_dup_options() helper, do the same in SCTP. We might later factorize this code in a common helper to avoid future mistakes. Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index d28c0b4..ec52912 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -641,6 +641,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct sock *newsk; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; + struct ipv6_txoptions *opt; newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0); if (!newsk) @@ -660,6 +661,13 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt) + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); + rcu_read_unlock(); + /* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname() * and getpeername(). */ -- cgit v1.1 From 2f3ab9f9fc23811188b9d07d86e4d99ffee887f4 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 9 Dec 2015 14:07:39 -0800 Subject: openvswitch: Fix helper reference leak If the actions (re)allocation fails, or the actions list is larger than the maximum size, and the conntrack action is the last action when these problems are hit, then references to helper modules may be leaked. Fix the issue. Fixes: cae3a2627520 ("openvswitch: Allow attaching helpers to ct action") Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index c2cc111..585a5aa 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -53,6 +53,8 @@ struct ovs_conntrack_info { struct md_labels labels; }; +static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info); + static u16 key_to_nfproto(const struct sw_flow_key *key) { switch (ntohs(key->eth.type)) { @@ -708,7 +710,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, nf_conntrack_get(&ct_info.ct->ct_general); return 0; err_free_ct: - nf_conntrack_free(ct_info.ct); + __ovs_ct_free_action(&ct_info); return err; } @@ -750,6 +752,11 @@ void ovs_ct_free_action(const struct nlattr *a) { struct ovs_conntrack_info *ct_info = nla_data(a); + __ovs_ct_free_action(ct_info); +} + +static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) +{ if (ct_info->helper) module_put(ct_info->helper->me); if (ct_info->ct) -- cgit v1.1 From d110986c5ddb1caf576e8576044c0c831e3e7fa4 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 9 Dec 2015 14:07:40 -0800 Subject: openvswitch: Respect conntrack zone even if invalid If userspace executes ct(zone=1), and the connection tracker determines that the packet is invalid, then the ct_zone flow key field is populated with the default zone rather than the zone that was specified. Even though connection tracking failed, this field should be updated with the value that the action specified. Fix the issue. Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 585a5aa..3e88922 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -143,6 +143,7 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, * previously sent the packet to conntrack via the ct action. */ static void ovs_ct_update_key(const struct sk_buff *skb, + const struct ovs_conntrack_info *info, struct sw_flow_key *key, bool post_ct) { const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; @@ -160,13 +161,15 @@ static void ovs_ct_update_key(const struct sk_buff *skb, zone = nf_ct_zone(ct); } else if (post_ct) { state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; + if (info) + zone = &info->zone; } __ovs_ct_update_key(key, state, zone, ct); } void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) { - ovs_ct_update_key(skb, key, false); + ovs_ct_update_key(skb, NULL, key, false); } int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) @@ -420,7 +423,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, } } - ovs_ct_update_key(skb, key, true); + ovs_ct_update_key(skb, info, key, true); return 0; } -- cgit v1.1 From a3e948e83a302c63f196512e253067ded6bbc832 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Thu, 10 Dec 2015 19:30:48 +0000 Subject: mpls: validate L2 via address length If an L2 via address for an mpls nexthop is specified, the length of the L2 address must match that expected by the output device, otherwise it could access memory beyond the end of the via address buffer in the route. This check was present prior to commit f8efb73c97e2 ("mpls: multipath route support"), but got lost in the refactoring, so add it back, applying it to all nexthops in multipath routes. Fixes: f8efb73c97e2 ("mpls: multipath route support") Signed-off-by: Robert Shearman Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index c70d750..3be29cb 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -534,6 +534,10 @@ static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, if (!mpls_dev_get(dev)) goto errout; + if ((nh->nh_via_table == NEIGH_LINK_TABLE) && + (dev->addr_len != nh->nh_via_alen)) + goto errout; + RCU_INIT_POINTER(nh->nh_dev, dev); return 0; -- cgit v1.1 From 72dcac96c7f8320caf80dfaa559331174060a1ce Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Thu, 10 Dec 2015 19:30:49 +0000 Subject: mpls: don't dump RTA_VIA attribute if not specified The problem seen is that when adding a route with a nexthop with no via address specified, iproute2 generates bogus output: # ip -f mpls route add 100 dev lo # ip -f mpls route list 100 via inet 0.0.8.0 dev lo The reason for this is that the kernel generates an RTA_VIA attribute with the family set to AF_INET, but the via address data having zero length. The cause of family being AF_INET is that on route insert cfg->rc_via_table is left set to 0, which just happens to be NEIGH_ARP_TABLE which is then translated into AF_INET. iproute2 doesn't validate the length prior to printing and so prints garbage. Although it could be fixed to do the validation, I would argue that AF_INET addresses should always be exactly 4 bytes so the kernel is really giving userspace bogus data. Therefore, avoid generating the RTA_VIA attribute when dumping the route if the via address wasn't specified on add/modify. This is indicated by NEIGH_ARP_TABLE and a zero via address length - if the user specified a via address the address length would have been validated such that it was 4 bytes. Although this is a change in behaviour that is visible to userspace, I believe that what was generated before was invalid and as such userspace wouldn't be expecting it. Signed-off-by: Robert Shearman Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 3be29cb..ac1c116 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1235,7 +1235,9 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, nh->nh_label)) goto nla_put_failure; - if (nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), + if ((nh->nh_via_table != NEIGH_ARP_TABLE || + nh->nh_via_alen != 0) && + nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), nh->nh_via_alen)) goto nla_put_failure; dev = rtnl_dereference(nh->nh_dev); @@ -1323,7 +1325,9 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt) if (nh->nh_dev) payload += nla_total_size(4); /* RTA_OIF */ - payload += nla_total_size(2 + nh->nh_via_alen); /* RTA_VIA */ + if (nh->nh_via_table != NEIGH_ARP_TABLE || + nh->nh_via_alen != 0) /* RTA_VIA */ + payload += nla_total_size(2 + nh->nh_via_alen); if (nh->nh_labels) /* RTA_NEWDST */ payload += nla_total_size(nh->nh_labels * 4); } else { -- cgit v1.1 From eb7809f093b109a7db7454dc775423675d075653 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Thu, 10 Dec 2015 19:30:50 +0000 Subject: mpls: fix out-of-bounds access when via address not specified When a via address isn't specified, the via table is left initialised to 0 (NEIGH_ARP_TABLE), and the via address length also left initialised to 0. This results in a via address array of length 0 being allocated (contiguous with route and nexthop array), meaning that when a packet is sent using neigh_xmit the neighbour lookup and creation will cause an out-of-bounds access when accessing the 4 bytes of the IPv4 address it assumes it has been given a pointer to. This could be fixed by allocating the 4 bytes of via address necessary and leaving it as all zeroes. However, it seems wrong to me to use an ipv4 nexthop (including possibly ARPing for 0.0.0.0) when the user didn't specify to do so. Instead, set the via address table to NEIGH_NR_TABLES to signify it hasn't been specified and use this at forwarding time to signify a neigh_xmit using an L2 address consisting of the device address. This mechanism is the same as that used for both ARP and ND for loopback interfaces and those flagged as no-arp, which are all we can really support in this case. Fixes: cf4b24f0024f ("mpls: reduce memory usage of routes") Signed-off-by: Robert Shearman Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index ac1c116..7bfc85f 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -27,6 +27,8 @@ */ #define MAX_MP_SELECT_LABELS 4 +#define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1) + static int zero = 0; static int label_limit = (1 << 20) - 1; @@ -317,7 +319,13 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, } } - err = neigh_xmit(nh->nh_via_table, out_dev, mpls_nh_via(rt, nh), skb); + /* If via wasn't specified then send out using device address */ + if (nh->nh_via_table == MPLS_NEIGH_TABLE_UNSPEC) + err = neigh_xmit(NEIGH_LINK_TABLE, out_dev, + out_dev->dev_addr, skb); + else + err = neigh_xmit(nh->nh_via_table, out_dev, + mpls_nh_via(rt, nh), skb); if (err) net_dbg_ratelimited("%s: packet transmission failed: %d\n", __func__, err); @@ -1122,6 +1130,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->rc_label = LABEL_NOT_SPECIFIED; cfg->rc_protocol = rtm->rtm_protocol; + cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC; cfg->rc_nlflags = nlh->nlmsg_flags; cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->rc_nlinfo.nlh = nlh; @@ -1235,8 +1244,7 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, nh->nh_label)) goto nla_put_failure; - if ((nh->nh_via_table != NEIGH_ARP_TABLE || - nh->nh_via_alen != 0) && + if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC && nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), nh->nh_via_alen)) goto nla_put_failure; @@ -1325,8 +1333,7 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt) if (nh->nh_dev) payload += nla_total_size(4); /* RTA_OIF */ - if (nh->nh_via_table != NEIGH_ARP_TABLE || - nh->nh_via_alen != 0) /* RTA_VIA */ + if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) /* RTA_VIA */ payload += nla_total_size(2 + nh->nh_via_alen); if (nh->nh_labels) /* RTA_NEWDST */ payload += nla_total_size(nh->nh_labels * 4); -- cgit v1.1 From f20367df1af8f6a4e85b7e586213b8508796fe79 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Thu, 10 Dec 2015 19:30:51 +0000 Subject: mpls: make via address optional for multipath routes The via address is optional for a single path route, yet is mandatory when the multipath attribute is used: # ip -f mpls route add 100 dev lo # ip -f mpls route add 101 nexthop dev lo RTNETLINK answers: Invalid argument Make them consistent by making the via address optional when the RTA_MULTIPATH attribute is being parsed so that both forms of specifying the route work. Signed-off-by: Robert Shearman Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 7bfc85f..c32fc41 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -604,10 +604,14 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt, goto errout; } - err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table, - __mpls_nh_via(rt, nh)); - if (err) - goto errout; + if (via) { + err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table, + __mpls_nh_via(rt, nh)); + if (err) + goto errout; + } else { + nh->nh_via_table = MPLS_NEIGH_TABLE_UNSPEC; + } err = mpls_nh_assign_dev(net, rt, nh, oif); if (err) @@ -689,9 +693,6 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST); } - if (!nla_via) - goto errout; - err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, rtnh->rtnh_ifindex, nla_via, nla_newdst); @@ -1271,7 +1272,8 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, nh->nh_labels, nh->nh_label)) goto nla_put_failure; - if (nla_put_via(skb, nh->nh_via_table, + if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC && + nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), nh->nh_via_alen)) goto nla_put_failure; @@ -1343,7 +1345,9 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt) for_nexthops(rt) { nhsize += nla_total_size(sizeof(struct rtnexthop)); - nhsize += nla_total_size(2 + nh->nh_via_alen); + /* RTA_VIA */ + if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) + nhsize += nla_total_size(2 + nh->nh_via_alen); if (nh->nh_labels) nhsize += nla_total_size(nh->nh_labels * 4); } endfor_nexthops(rt); -- cgit v1.1 From a907e36d54e0ff836e55e04531be201bf6b4d8c8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 7 Dec 2015 18:48:07 +0800 Subject: netfilter: nf_tables: use reverse traversal commit_list in nf_tables_abort When we use 'nft -f' to submit rules, it will build multiple rules into one netlink skb to send to kernel, kernel will process them one by one. meanwhile, it add the trans into commit_list to record every commit. if one of them's return value is -EAGAIN, status |= NFNL_BATCH_REPLAY will be marked. after all the process is done. it will roll back all the commits. now kernel use list_add_tail to add trans to commit, and use list_for_each_entry_safe to roll back. which means the order of adding and rollback is the same. that will cause some cases cannot work well, even trigger call trace, like: 1. add a set into table foo [return -EAGAIN]: commit_list = 'add set trans' 2. del foo: commit_list = 'add set trans' -> 'del set trans' -> 'del tab trans' then nf_tables_abort will be called to roll back: firstly process 'add set trans': case NFT_MSG_NEWSET: trans->ctx.table->use--; list_del_rcu(&nft_trans_set(trans)->list); it will del the set from the table foo, but it has removed when del table foo [step 2], then the kernel will panic. the right order of rollback should be: 'del tab trans' -> 'del set trans' -> 'add set trans'. which is opposite with commit_list order. so fix it by rolling back commits with reverse order in nf_tables_abort. Signed-off-by: Xin Long Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f1002dc..2cb429d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4024,7 +4024,8 @@ static int nf_tables_abort(struct sk_buff *skb) struct nft_trans *trans, *next; struct nft_trans_elem *te; - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list, + list) { switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { -- cgit v1.1 From dfd01f026058a59a513f8a365b439a0681b803af Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 13 Dec 2015 22:11:16 +0100 Subject: sched/wait: Fix the signal handling fix Jan Stancek reported that I wrecked things for him by fixing things for Vladimir :/ His report was due to an UNINTERRUPTIBLE wait getting -EINTR, which should not be possible, however my previous patch made this possible by unconditionally checking signal_pending(). We cannot use current->state as was done previously, because the instruction after the store to that variable it can be changed. We must instead pass the initial state along and use that. Fixes: 68985633bccb ("sched/wait: Fix signal handling in bit wait helpers") Reported-by: Jan Stancek Reported-by: Chris Mason Tested-by: Jan Stancek Tested-by: Vladimir Murzin Tested-by: Chris Mason Reviewed-by: Paul Turner Cc: Ingo Molnar Cc: tglx@linutronix.de Cc: Oleg Nesterov Cc: hpa@zytor.com Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds --- net/sunrpc/sched.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index f14f24e..73ad57a 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -250,11 +250,11 @@ void rpc_destroy_wait_queue(struct rpc_wait_queue *queue) } EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); -static int rpc_wait_bit_killable(struct wait_bit_key *key) +static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode) { - if (fatal_signal_pending(current)) - return -ERESTARTSYS; freezable_schedule_unsafe(); + if (signal_pending_state(mode, current)) + return -ERESTARTSYS; return 0; } -- cgit v1.1 From 7f49e7a38b77a7538acf48762c22ccbd05d9535c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 10 Dec 2015 10:25:24 -0800 Subject: net: Flush local routes when device changes vrf association The VRF driver cycles netdevs when an interface is enslaved or released: the down event is used to flush neighbor and route tables and the up event (if the interface was already up) effectively moves local and connected routes to the proper table. As of 4f823defdd5b the local route is left hanging around after a link down, so when a netdev is moved from one VRF to another (or released from a VRF altogether) local routes are left in the wrong table. Fix by handling the NETDEV_CHANGEUPPER event. When the upper dev is an L3mdev then call fib_disable_ip to flush all routes, local ones to. Fixes: 4f823defdd5b ("ipv4: fix to not remove local route on link down") Cc: Julian Anastasov Signed-off-by: David Ahern Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index cc8f3e5..4734475 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1155,6 +1155,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changeupper_info *info; struct in_device *in_dev; struct net *net = dev_net(dev); unsigned int flags; @@ -1193,6 +1194,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo case NETDEV_CHANGEMTU: rt_cache_flush(net); break; + case NETDEV_CHANGEUPPER: + info = ptr; + /* flush all routes if dev is linked to or unlinked from + * an L3 master device (e.g., VRF) + */ + if (info->upper_dev && netif_is_l3_master(info->upper_dev)) + fib_disable_ip(dev, NETDEV_DOWN, true); + break; } return NOTIFY_DONE; } -- cgit v1.1 From 79462ad02e861803b3840cc782248c7359451cd9 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 14 Dec 2015 22:03:39 +0100 Subject: net: add validation for the socket syscall protocol argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 郭永刚 reported that one could simply crash the kernel as root by using a simple program: int socket_fd; struct sockaddr_in addr; addr.sin_port = 0; addr.sin_addr.s_addr = INADDR_ANY; addr.sin_family = 10; socket_fd = socket(10,3,0x40000000); connect(socket_fd , &addr,16); AF_INET, AF_INET6 sockets actually only support 8-bit protocol identifiers. inet_sock's skc_protocol field thus is sized accordingly, thus larger protocol identifiers simply cut off the higher bits and store a zero in the protocol fields. This could lead to e.g. NULL function pointer because as a result of the cut off inet_num is zero and we call down to inet_autobind, which is NULL for raw sockets. kernel: Call Trace: kernel: [] ? inet_autobind+0x2e/0x70 kernel: [] inet_dgram_connect+0x54/0x80 kernel: [] SYSC_connect+0xd9/0x110 kernel: [] ? ptrace_notify+0x5b/0x80 kernel: [] ? syscall_trace_enter_phase2+0x108/0x200 kernel: [] SyS_connect+0xe/0x10 kernel: [] tracesys_phase2+0x84/0x89 I found no particular commit which introduced this problem. CVE: CVE-2015-8543 Cc: Cong Wang Reported-by: 郭永刚 Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 3 +++ net/decnet/af_decnet.c | 3 +++ net/ipv4/af_inet.c | 3 +++ net/ipv6/af_inet6.c | 3 +++ net/irda/af_irda.c | 3 +++ 5 files changed, 15 insertions(+) (limited to 'net') diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index ae3a47f..fbd0acf 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -805,6 +805,9 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol, struct sock *sk; ax25_cb *ax25; + if (protocol < 0 || protocol > SK_PROTOCOL_MAX) + return -EINVAL; + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index eebf5ac..13d6b1a 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -678,6 +678,9 @@ static int dn_create(struct net *net, struct socket *sock, int protocol, { struct sock *sk; + if (protocol < 0 || protocol > SK_PROTOCOL_MAX) + return -EINVAL; + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 11c4ca1..5c5db66 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -257,6 +257,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, int try_loading_module = 0; int err; + if (protocol < 0 || protocol >= IPPROTO_MAX) + return -EINVAL; + sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8ec0df7..9f5137c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -109,6 +109,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, int try_loading_module = 0; int err; + if (protocol < 0 || protocol >= IPPROTO_MAX) + return -EINVAL; + /* Look for the requested type/protocol pair. */ lookup_protocol: err = -ESOCKTNOSUPPORT; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index e6aa48b..923abd6 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1086,6 +1086,9 @@ static int irda_create(struct net *net, struct socket *sock, int protocol, struct sock *sk; struct irda_sock *self; + if (protocol < 0 || protocol > SK_PROTOCOL_MAX) + return -EINVAL; + if (net != &init_net) return -EAFNOSUPPORT; -- cgit v1.1 From 5037e9ef9454917b047f9f3a19b4dd179fbf7cd4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Dec 2015 14:08:53 -0800 Subject: net: fix IP early demux races David Wilder reported crashes caused by dst reuse. I am seeing a crash on a distro V4.2.3 kernel caused by a double release of a dst_entry. In ipv4_dst_destroy() the call to list_empty() finds a poisoned next pointer, indicating the dst_entry has already been removed from the list and freed. The crash occurs 18 to 24 hours into a run of a network stress exerciser. Thanks to his detailed report and analysis, we were able to understand the core issue. IP early demux can associate a dst to skb, after a lookup in TCP/UDP sockets. When socket cache is not properly set, we want to store into sk->sk_dst_cache the dst for future IP early demux lookups, by acquiring a stable refcount on the dst. Problem is this acquisition is simply using an atomic_inc(), which works well, unless the dst was queued for destruction from dst_release() noticing dst refcount went to zero, if DST_NOCACHE was set on dst. We need to make sure current refcount is not zero before incrementing it, or risk double free as David reported. This patch, being a stable candidate, adds two new helpers, and use them only from IP early demux problematic paths. It might be possible to merge in net-next skb_dst_force() and skb_dst_force_safe(), but I prefer having the smallest patch for stable kernels : Maybe some skb_dst_force() callers do not expect skb->dst can suddenly be cleared. Can probably be backported back to linux-3.6 kernels Reported-by: David J. Wilder Tested-by: David J. Wilder Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 5 ++--- net/ipv6/tcp_ipv6.c | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index db00343..d8841a2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1493,7 +1493,7 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) if (likely(sk->sk_rx_dst)) skb_dst_drop(skb); else - skb_dst_force(skb); + skb_dst_force_safe(skb); __skb_queue_tail(&tp->ucopy.prequeue, skb); tp->ucopy.memory += skb->truesize; @@ -1721,8 +1721,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - if (dst) { - dst_hold(dst); + if (dst && dst_hold_safe(dst)) { sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e7aab56..6b8a8a9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -93,10 +93,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - if (dst) { + if (dst && dst_hold_safe(dst)) { const struct rt6_info *rt = (const struct rt6_info *)dst; - dst_hold(dst); sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); -- cgit v1.1 From f654861569872d10dcb79d9d7ca219b316f94ff0 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 14 Dec 2015 17:44:10 -0500 Subject: skbuff: Fix offset error in skb_reorder_vlan_header skb_reorder_vlan_header is called after the vlan header has been pulled. As a result the offset of the begining of the mac header has been incrased by 4 bytes (VLAN_HLEN). When moving the mac addresses, include this incrase in the offset calcualation so that the mac addresses are copied correctly. Fixes: a6e18ff1117 (vlan: Fix untag operations of stacked vlans with REORDER_HEADER off) CC: Nicolas Dichtel CC: Patrick McHardy Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 152b9c7..5cc43d37 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4268,7 +4268,7 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) return NULL; } - memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len, + memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN, 2 * ETH_ALEN); skb->mac_header += VLAN_HLEN; return skb; -- cgit v1.1 From 09d118008f9815181d2114b84800e68019cd7b7d Mon Sep 17 00:00:00 2001 From: Ola Olsson Date: Sun, 13 Dec 2015 19:12:03 +0100 Subject: nl80211: fix a few memory leaks in reg.c The first leak occurs when entering the default case in the switch for the initiator in set_regdom. The second leaks a platform_device struct if the platform registration in regulatory_init succeeds but the sub sequent regulatory hint fails due to no memory. Signed-off-by: Ola Olsson Signed-off-by: Johannes Berg --- net/wireless/reg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2e8d6f3..06d050d 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3029,6 +3029,7 @@ int set_regdom(const struct ieee80211_regdomain *rd, break; default: WARN(1, "invalid initiator %d\n", lr->initiator); + kfree(rd); return -EINVAL; } @@ -3221,8 +3222,10 @@ int __init regulatory_init(void) /* We always try to get an update for the static regdomain */ err = regulatory_hint_core(cfg80211_world_regdom->alpha2); if (err) { - if (err == -ENOMEM) + if (err == -ENOMEM) { + platform_device_unregister(reg_pdev); return err; + } /* * N.B. kobject_uevent_env() can fail mainly for when we're out * memory which is handled and propagated appropriately above -- cgit v1.1 From e5dbe0701a0d7c6127f313a0f68f960176f0209e Mon Sep 17 00:00:00 2001 From: Ola Olsson Date: Sat, 12 Dec 2015 23:17:17 +0100 Subject: nl80211: Fix potential memory leak in nl80211_set_wowlan Compared to cfg80211_rdev_free_wowlan in core.h, the error goto label lacks the freeing of nd_config. Fix that. Signed-off-by: Ola Olsson Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c71e274..624174f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9503,6 +9503,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) if (new_triggers.tcp && new_triggers.tcp->sock) sock_release(new_triggers.tcp->sock); kfree(new_triggers.tcp); + kfree(new_triggers.nd_config); return err; } #endif -- cgit v1.1 From 707554b4d117330e58374990b6c27ded650dc684 Mon Sep 17 00:00:00 2001 From: Ola Olsson Date: Fri, 11 Dec 2015 21:04:52 +0100 Subject: nl80211: Fix potential memory leak in nl80211_connect Free cached keys if the last early return path is taken. Signed-off-by: Ola Olsson Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 624174f..75b0d23 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7941,8 +7941,10 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) { if (!(rdev->wiphy.features & NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) || - !(rdev->wiphy.features & NL80211_FEATURE_QUIET)) + !(rdev->wiphy.features & NL80211_FEATURE_QUIET)) { + kzfree(connkeys); return -EINVAL; + } connect.flags |= ASSOC_REQ_USE_RRM; } -- cgit v1.1 From 74430f9489a3b6116a5c144eea2bc62cd52012f6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 8 Dec 2015 16:04:38 +0200 Subject: mac80211: run scan completed work on reconfig failure When reconfiguration during resume fails while a scan is pending for completion work, that work will never run, and the scan will be stuck forever. Factor out the code to recover this and call it also in ieee80211_handle_reconfig_failure(). Signed-off-by: Johannes Berg --- net/mac80211/util.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 7405802..f9a8d14 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1641,6 +1641,29 @@ void ieee80211_stop_device(struct ieee80211_local *local) drv_stop(local); } +static void ieee80211_flush_completed_scan(struct ieee80211_local *local, + bool aborted) +{ + /* It's possible that we don't handle the scan completion in + * time during suspend, so if it's still marked as completed + * here, queue the work and flush it to clean things up. + * Instead of calling the worker function directly here, we + * really queue it to avoid potential races with other flows + * scheduling the same work. + */ + if (test_bit(SCAN_COMPLETED, &local->scanning)) { + /* If coming from reconfiguration failure, abort the scan so + * we don't attempt to continue a partial HW scan - which is + * possible otherwise if (e.g.) the 2.4 GHz portion was the + * completed scan, and a 5 GHz portion is still pending. + */ + if (aborted) + set_bit(SCAN_ABORTED, &local->scanning); + ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0); + flush_delayed_work(&local->scan_work); + } +} + static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; @@ -1660,6 +1683,8 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local) local->suspended = false; local->in_reconfig = false; + ieee80211_flush_completed_scan(local, true); + /* scheduled scan clearly can't be running any more, but tell * cfg80211 and clear local state */ @@ -2074,17 +2099,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) mb(); local->resuming = false; - /* It's possible that we don't handle the scan completion in - * time during suspend, so if it's still marked as completed - * here, queue the work and flush it to clean things up. - * Instead of calling the worker function directly here, we - * really queue it to avoid potential races with other flows - * scheduling the same work. - */ - if (test_bit(SCAN_COMPLETED, &local->scanning)) { - ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0); - flush_delayed_work(&local->scan_work); - } + ieee80211_flush_completed_scan(local, false); if (local->open_count && !reconfig_due_to_wowlan) drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_SUSPEND); -- cgit v1.1 From 1ea2c864808e525247d2b6cfdb61b93fe669145e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 8 Dec 2015 16:04:39 +0200 Subject: mac80211: reprogram in interface order During reprogramming, mac80211 currently first adds all the channel contexts, then binds them to the vifs and then goes to reconfigure all the interfaces. Drivers might, perhaps implicitly, rely on the operation order for certain things that typically happen within a single function elsewhere in mac80211. To avoid problems with that, reorder the code in mac80211's restart/reprogramming to work fully within the interface loop so that the order of operations is like in normal operation. For iwlwifi, this fixes a firmware crash when reprogramming with an AP/GO interface active. Reported-by: David Spinadel Signed-off-by: Johannes Berg --- net/mac80211/util.c | 76 ++++++++++++++++++++++++++--------------------------- 1 file changed, 37 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index f9a8d14..33344f5 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1723,6 +1723,27 @@ static void ieee80211_assign_chanctx(struct ieee80211_local *local, mutex_unlock(&local->chanctx_mtx); } +static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + /* add STAs back */ + mutex_lock(&local->sta_mtx); + list_for_each_entry(sta, &local->sta_list, list) { + enum ieee80211_sta_state state; + + if (!sta->uploaded || sta->sdata != sdata) + continue; + + for (state = IEEE80211_STA_NOTEXIST; + state < sta->sta_state; state++) + WARN_ON(drv_sta_state(local, sta->sdata, sta, state, + state + 1)); + } + mutex_unlock(&local->sta_mtx); +} + int ieee80211_reconfig(struct ieee80211_local *local) { struct ieee80211_hw *hw = &local->hw; @@ -1858,50 +1879,11 @@ int ieee80211_reconfig(struct ieee80211_local *local) WARN_ON(drv_add_chanctx(local, ctx)); mutex_unlock(&local->chanctx_mtx); - list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) - continue; - ieee80211_assign_chanctx(local, sdata); - } - sdata = rtnl_dereference(local->monitor_sdata); if (sdata && ieee80211_sdata_running(sdata)) ieee80211_assign_chanctx(local, sdata); } - /* add STAs back */ - mutex_lock(&local->sta_mtx); - list_for_each_entry(sta, &local->sta_list, list) { - enum ieee80211_sta_state state; - - if (!sta->uploaded) - continue; - - /* AP-mode stations will be added later */ - if (sta->sdata->vif.type == NL80211_IFTYPE_AP) - continue; - - for (state = IEEE80211_STA_NOTEXIST; - state < sta->sta_state; state++) - WARN_ON(drv_sta_state(local, sta->sdata, sta, state, - state + 1)); - } - mutex_unlock(&local->sta_mtx); - - /* reconfigure tx conf */ - if (hw->queues >= IEEE80211_NUM_ACS) { - list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN || - sdata->vif.type == NL80211_IFTYPE_MONITOR || - !ieee80211_sdata_running(sdata)) - continue; - - for (i = 0; i < IEEE80211_NUM_ACS; i++) - drv_conf_tx(local, sdata, i, - &sdata->tx_conf[i]); - } - } - /* reconfigure hardware */ ieee80211_hw_config(local, ~0); @@ -1914,6 +1896,22 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (!ieee80211_sdata_running(sdata)) continue; + ieee80211_assign_chanctx(local, sdata); + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MONITOR: + break; + default: + ieee80211_reconfig_stations(sdata); + /* fall through */ + case NL80211_IFTYPE_AP: /* AP stations are handled later */ + for (i = 0; i < IEEE80211_NUM_ACS; i++) + drv_conf_tx(local, sdata, i, + &sdata->tx_conf[i]); + break; + } + /* common change flags for all interface types */ changed = BSS_CHANGED_ERP_CTS_PROT | BSS_CHANGED_ERP_PREAMBLE | -- cgit v1.1 From a87da0cbc42949cefc8282c39ab4cb8c460bd6ea Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 8 Dec 2015 16:04:37 +0200 Subject: mac80211: suppress unchanged "limiting TX power" messages When the AP is advertising limited TX power, the message can be printed over and over again. Suppress it when the power level isn't changing. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=106011 Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index b140cc6..f360b77 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1379,21 +1379,26 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, */ if (has_80211h_pwr && (!has_cisco_pwr || pwr_level_80211h <= pwr_level_cisco)) { + new_ap_level = pwr_level_80211h; + + if (sdata->ap_power_level == new_ap_level) + return 0; + sdata_dbg(sdata, "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n", pwr_level_80211h, chan_pwr, pwr_reduction_80211h, sdata->u.mgd.bssid); - new_ap_level = pwr_level_80211h; } else { /* has_cisco_pwr is always true here. */ + new_ap_level = pwr_level_cisco; + + if (sdata->ap_power_level == new_ap_level) + return 0; + sdata_dbg(sdata, "Limiting TX power to %d dBm as advertised by %pM\n", pwr_level_cisco, sdata->u.mgd.bssid); - new_ap_level = pwr_level_cisco; } - if (sdata->ap_power_level == new_ap_level) - return 0; - sdata->ap_power_level = new_ap_level; if (__ieee80211_recalc_txpower(sdata)) return BSS_CHANGED_TXPOWER; -- cgit v1.1 From cf1e05c63642ce65821a6277adfc2157f7334c9d Mon Sep 17 00:00:00 2001 From: Eyal Shapira Date: Tue, 8 Dec 2015 16:04:36 +0200 Subject: mac80211: handle width changes from opmode notification IE in beacon An AP can send an operating channel width change in a beacon opmode notification IE as long as there's a change in the nss as well (See 802.11ac-2013 section 10.41). So don't limit updating to nss only from an opmode notification IE. Signed-off-by: Eyal Shapira Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 3 +-- net/mac80211/ieee80211_i.h | 4 ++-- net/mac80211/mlme.c | 2 +- net/mac80211/rx.c | 3 +-- net/mac80211/vht.c | 10 +++------- 5 files changed, 8 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index da471ee..c12f348 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1169,8 +1169,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, * rc isn't initialized here yet, so ignore it */ __ieee80211_vht_handle_opmode(sdata, sta, - params->opmode_notif, - band, false); + params->opmode_notif, band); } if (ieee80211_vif_is_mesh(&sdata->vif)) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d832bd5..5322b4c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1709,10 +1709,10 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta); void ieee80211_sta_set_rx_nss(struct sta_info *sta); u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, u8 opmode, - enum ieee80211_band band, bool nss_only); + enum ieee80211_band band); void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, u8 opmode, - enum ieee80211_band band, bool nss_only); + enum ieee80211_band band); void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_vht_cap *vht_cap); void ieee80211_get_vht_mask_from_cap(__le16 vht_cap, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f360b77..3aa0434 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3580,7 +3580,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (sta && elems.opmode_notif) ieee80211_vht_handle_opmode(sdata, sta, *elems.opmode_notif, - rx_status->band, true); + rx_status->band); mutex_unlock(&local->sta_mtx); changed |= ieee80211_handle_pwr_constr(sdata, chan, mgmt, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8bae5de..82af407 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2736,8 +2736,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode; ieee80211_vht_handle_opmode(rx->sdata, rx->sta, - opmode, status->band, - false); + opmode, status->band); goto handled; } default: diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index ff1c798..c38b2f0 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -378,7 +378,7 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta) u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, u8 opmode, - enum ieee80211_band band, bool nss_only) + enum ieee80211_band band) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; @@ -401,9 +401,6 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, changed |= IEEE80211_RC_NSS_CHANGED; } - if (nss_only) - return changed; - switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) { case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20; @@ -430,13 +427,12 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, u8 opmode, - enum ieee80211_band band, bool nss_only) + enum ieee80211_band band) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; - u32 changed = __ieee80211_vht_handle_opmode(sdata, sta, opmode, - band, nss_only); + u32 changed = __ieee80211_vht_handle_opmode(sdata, sta, opmode, band); if (changed > 0) rate_control_rate_update(local, sband, sta, changed); -- cgit v1.1 From 225734de70cd0a9e0b978f3583a4a87939271d5e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Dec 2015 09:43:12 -0800 Subject: net_sched: make qdisc_tree_decrease_qlen() work for non mq Stas Nichiporovich reported a regression in his HFSC qdisc setup on a non multi queue device. It turns out I mistakenly added a TCQ_F_NOPARENT flag on all qdisc allocated in qdisc_create() for non multi queue devices, which was rather buggy. I was clearly mislead by the TCQ_F_ONETXQUEUE that is also set here for no good reason, since it only matters for the root qdisc. Fixes: 4eaf3b84f288 ("net_sched: fix qdisc_tree_decrease_qlen() races") Reported-by: Stas Nichiporovich Tested-by: Stas Nichiporovich Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 7ec667d..b5c2cf2 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -950,7 +950,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, } lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock); if (!netif_is_multiqueue(dev)) - sch->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; + sch->flags |= TCQ_F_ONETXQUEUE; } sch->handle = handle; -- cgit v1.1 From 5233252fce714053f0151680933571a2da9cbfb4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 Dec 2015 15:39:08 -0500 Subject: bluetooth: Validate socket address length in sco_sock_bind(). Signed-off-by: David S. Miller --- net/bluetooth/sco.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index fe12966..f52bcbf 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -526,6 +526,9 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, if (!addr || addr->sa_family != AF_BLUETOOTH) return -EINVAL; + if (addr_len < sizeof(struct sockaddr_sco)) + return -EINVAL; + lock_sock(sk); if (sk->sk_state != BT_OPEN) { -- cgit v1.1 From 130ed5d105dde141e7fe60d5440aa53e0a84f13b Mon Sep 17 00:00:00 2001 From: "tadeusz.struk@intel.com" Date: Tue, 15 Dec 2015 10:46:17 -0800 Subject: net: fix uninitialized variable issue msg_iocb needs to be initialized on the recv/recvfrom path. Otherwise afalg will wrongly interpret it as an async call. Cc: stable@vger.kernel.org Reported-by: Harald Freudenberger Signed-off-by: Tadeusz Struk Signed-off-by: David S. Miller --- net/socket.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 456fadb..29822d6 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1695,6 +1695,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, msg.msg_name = addr ? (struct sockaddr *)&address : NULL; /* We assume all kernel code knows the size of sockaddr_storage */ msg.msg_namelen = 0; + msg.msg_iocb = NULL; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags); -- cgit v1.1 From 9b29c6962b70f232cde4076b1020191e1be0889d Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 15 Dec 2015 22:59:12 +0100 Subject: ipv6: automatically enable stable privacy mode if stable_secret set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bjørn reported that while we switch all interfaces to privacy stable mode when setting the secret, we don't set this mode for new interfaces. This does not make sense, so change this behaviour. Fixes: 622c81d57b392cc ("ipv6: generation of stable privacy addresses for link-local and autoconf") Reported-by: Bjørn Mork Cc: Bjørn Mork Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a57d3d1..17f8e7e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -350,6 +350,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) setup_timer(&ndev->rs_timer, addrconf_rs_timer, (unsigned long)ndev); memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); + + if (ndev->cnf.stable_secret.initialized) + ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + else + ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64; + ndev->cnf.mtu6 = dev->mtu; ndev->cnf.sysctl = NULL; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); -- cgit v1.1 From 3036facbb7be3a169e35be3b271162b0fa564a2d Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 15 Dec 2015 21:01:53 +0100 Subject: fou: clean up socket with kfree_rcu fou->udp_offloads is managed by RCU. As it is actually included inside the fou sockets, we cannot let the memory go out of scope before a grace period. We either can synchronize_rcu or switch over to kfree_rcu to manage the sockets. kfree_rcu seems appropriate as it is used by vxlan and geneve. Fixes: 23461551c00628c ("fou: Support for foo-over-udp RX path") Cc: Tom Herbert Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/fou.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index e0fcbbb..bd903fe 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -24,6 +24,7 @@ struct fou { u16 type; struct udp_offload udp_offloads; struct list_head list; + struct rcu_head rcu; }; #define FOU_F_REMCSUM_NOPARTIAL BIT(0) @@ -417,7 +418,7 @@ static void fou_release(struct fou *fou) list_del(&fou->list); udp_tunnel_sock_release(sock); - kfree(fou); + kfree_rcu(fou, rcu); } static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) -- cgit v1.1 From 3822b5c2fc62e3de8a0f33806ff279fb7df92432 Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Wed, 16 Dec 2015 20:09:25 +0000 Subject: af_unix: Revert 'lock_interruptible' in stream receive code With b3ca9b02b00704053a38bfe4c31dbbb9c13595d0, the AF_UNIX SOCK_STREAM receive code was changed from using mutex_lock(&u->readlock) to mutex_lock_interruptible(&u->readlock) to prevent signals from being delayed for an indefinite time if a thread sleeping on the mutex happened to be selected for handling the signal. But this was never a problem with the stream receive code (as opposed to its datagram counterpart) as that never went to sleep waiting for new messages with the mutex held and thus, wouldn't cause secondary readers to block on the mutex waiting for the sleeping primary reader. As the interruptible locking makes the code more complicated in exchange for no benefit, change it back to using mutex_lock. Signed-off-by: Rainer Weikusat Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/unix/af_unix.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 45aebd9..a4631477 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2256,14 +2256,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ - err = mutex_lock_interruptible(&u->readlock); - if (unlikely(err)) { - /* recvmsg() in non blocking mode is supposed to return -EAGAIN - * sk_rcvtimeo is not honored by mutex_lock_interruptible() - */ - err = noblock ? -EAGAIN : -ERESTARTSYS; - goto out; - } + mutex_lock(&u->readlock); if (flags & MSG_PEEK) skip = sk_peek_offset(sk, flags); @@ -2307,12 +2300,12 @@ again: timeo = unix_stream_data_wait(sk, timeo, last, last_len); - if (signal_pending(current) || - mutex_lock_interruptible(&u->readlock)) { + if (signal_pending(current)) { err = sock_intr_errno(timeo); goto out; } + mutex_lock(&u->readlock); continue; unlock: unix_state_unlock(sk); -- cgit v1.1 From 07e100f984975cb0417a7d5e626d0409efbad478 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Dec 2015 13:53:10 -0800 Subject: tcp: restore fastopen with no data in SYN packet Yuchung tracked a regression caused by commit 57be5bdad759 ("ip: convert tcp_sendmsg() to iov_iter primitives") for TCP Fast Open. Some Fast Open users do not actually add any data in the SYN packet. Fixes: 57be5bdad759 ("ip: convert tcp_sendmsg() to iov_iter primitives") Reported-by: Yuchung Cheng Signed-off-by: Eric Dumazet Cc: Al Viro Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index cb7ca56..9bfc39f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3150,7 +3150,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; - int syn_loss = 0, space, err = 0, copied; + int syn_loss = 0, space, err = 0; unsigned long last_syn_loss = 0; struct sk_buff *syn_data; @@ -3188,17 +3188,18 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) goto fallback; syn_data->ip_summed = CHECKSUM_PARTIAL; memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); - copied = copy_from_iter(skb_put(syn_data, space), space, - &fo->data->msg_iter); - if (unlikely(!copied)) { - kfree_skb(syn_data); - goto fallback; - } - if (copied != space) { - skb_trim(syn_data, copied); - space = copied; + if (space) { + int copied = copy_from_iter(skb_put(syn_data, space), space, + &fo->data->msg_iter); + if (unlikely(!copied)) { + kfree_skb(syn_data); + goto fallback; + } + if (copied != space) { + skb_trim(syn_data, copied); + space = copied; + } } - /* No more data pending in inet_wait_for_connect() */ if (space == fo->size) fo->data = NULL; -- cgit v1.1 From ac5cc977991d2dce85fc734a6c71ddb33f6fe3c1 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 16 Dec 2015 23:39:04 -0800 Subject: net: check both type and procotol for tcp sockets Dmitry reported the following out-of-bound access: Call Trace: [] __asan_report_load4_noabort+0x3e/0x40 mm/kasan/report.c:294 [] sock_setsockopt+0x1284/0x13d0 net/core/sock.c:880 [< inline >] SYSC_setsockopt net/socket.c:1746 [] SyS_setsockopt+0x1fe/0x240 net/socket.c:1729 [] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 This is because we mistake a raw socket as a tcp socket. We should check both sk->sk_type and sk->sk_protocol to ensure it is a tcp socket. Willem points out __skb_complete_tx_timestamp() needs to fix as well. Reported-by: Dmitry Vyukov Cc: Willem de Bruijn Cc: Eric Dumazet Signed-off-by: Cong Wang Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 ++- net/core/sock.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5cc43d37..b2df375 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3643,7 +3643,8 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, serr->ee.ee_info = tstype; if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; - if (sk->sk_protocol == IPPROTO_TCP) + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) serr->ee.ee_data -= sk->sk_tskey; } diff --git a/net/core/sock.c b/net/core/sock.c index 765be83..0d91f7d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -872,7 +872,8 @@ set_rcvbuf: if (val & SOF_TIMESTAMPING_OPT_ID && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { - if (sk->sk_protocol == IPPROTO_TCP) { + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) { if (sk->sk_state != TCP_ESTABLISHED) { ret = -EINVAL; break; -- cgit v1.1