From 9c5a18a31b321f120efda412281bb9f610f84aa0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 9 Jun 2015 21:35:44 +0200 Subject: cfg80211: wext: clear sinfo struct before calling driver Until recently, mac80211 overwrote all the statistics it could provide when getting called, but it now relies on the struct having been zeroed by the caller. This was always the case in nl80211, but wext used a static struct which could even cause values from one device leak to another. Using a static struct is OK (as even documented in a comment) since the whole usage of this function and its return value is always locked under RTNL. Not clearing the struct for calling the driver has always been wrong though, since drivers were free to only fill values they could report, so calling this for one device and then for another would always have leaked values from one to the other. Fix this by initializing the structure in question before the driver method call. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=99691 Cc: stable@vger.kernel.org Reported-by: Gerrit Renker Reported-by: Alexander Kaltsas Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/wireless/wext-compat.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index fff1bef..fd68283 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1333,6 +1333,8 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN); wdev_unlock(wdev); + memset(&sinfo, 0, sizeof(sinfo)); + if (rdev_get_station(rdev, dev, bssid, &sinfo)) return NULL; -- cgit v1.1 From 1b0ccfe54a6abd1bc4d7bdd1c33e61e2c58f72c7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 Jun 2015 15:29:31 -0700 Subject: Revert "ipv6: Fix protocol resubmission" This reverts commit 0243508edd317ff1fa63b495643a7c192fbfcd92. It introduces new regressions. Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 41a73da..f2e464e 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -212,13 +212,13 @@ static int ip6_input_finish(struct sock *sk, struct sk_buff *skb) */ rcu_read_lock(); +resubmit: idev = ip6_dst_idev(skb_dst(skb)); if (!pskb_pull(skb, skb_transport_offset(skb))) goto discard; nhoff = IP6CB(skb)->nhoff; nexthdr = skb_network_header(skb)[nhoff]; -resubmit: raw = raw6_local_deliver(skb, nexthdr); ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot) { @@ -246,12 +246,10 @@ resubmit: goto discard; ret = ipprot->handler(skb); - if (ret < 0) { - nexthdr = -ret; + if (ret > 0) goto resubmit; - } else if (ret == 0) { + else if (ret == 0) IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); - } } else { if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { -- cgit v1.1 From b3be5e3e726a6cc849f40c70c3ae525e4146e9df Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Tue, 9 Jun 2015 17:27:12 +0200 Subject: tipc: disconnect socket directly after probe failure If the TIPC connection timer expires in a probing state, a self abort message is supposed to be generated and delivered to the local socket. This is currently broken, and the abort message is actually sent out to the peer node with invalid addressing information. This will cause the link to enter a constant retransmission state and eventually reset. We fix this by removing the self-abort message creation and tear down connection immediately instead. Signed-off-by: Erik Hugne Reviewed-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9074b5c..f485600 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2142,11 +2142,17 @@ static void tipc_sk_timeout(unsigned long data) peer_node = tsk_peer_node(tsk); if (tsk->probing_state == TIPC_CONN_PROBING) { - /* Previous probe not answered -> self abort */ - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, - TIPC_CONN_MSG, SHORT_H_SIZE, 0, - own_node, peer_node, tsk->portid, - peer_port, TIPC_ERR_NO_PORT); + if (!sock_owned_by_user(sk)) { + sk->sk_socket->state = SS_DISCONNECTING; + tsk->connected = 0; + tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), + tsk_peer_port(tsk)); + sk->sk_state_change(sk); + } else { + /* Try again later */ + sk_reset_timer(sk, &sk->sk_timer, (HZ / 20)); + } + } else { skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, peer_node, own_node, -- cgit v1.1 From 1a040eaca1a22f8da8285ceda6b5e4a2cb704867 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 9 Jun 2015 10:23:57 -0700 Subject: bridge: fix multicast router rlist endless loop Since the addition of sysfs multicast router support if one set multicast_router to "2" more than once, then the port would be added to the hlist every time and could end up linking to itself and thus causing an endless loop for rlist walkers. So to reproduce just do: echo 2 > multicast_router; echo 2 > multicast_router; in a bridge port and let some igmp traffic flow, for me it hangs up in br_multicast_flood(). Fix this by adding a check in br_multicast_add_router() if the port is already linked. The reason this didn't happen before the addition of multicast_router sysfs entries is because there's a !hlist_unhashed check that prevents it. Signed-off-by: Nikolay Aleksandrov Fixes: 0909e11758bd ("bridge: Add multicast_router sysfs entries") Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 22fd041..ff667e1 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1167,6 +1167,9 @@ static void br_multicast_add_router(struct net_bridge *br, struct net_bridge_port *p; struct hlist_node *slot = NULL; + if (!hlist_unhashed(&port->rlist)) + return; + hlist_for_each_entry(p, &br->router_list, rlist) { if ((unsigned long) port >= (unsigned long) p) break; @@ -1194,12 +1197,8 @@ static void br_multicast_mark_router(struct net_bridge *br, if (port->multicast_router != 1) return; - if (!hlist_unhashed(&port->rlist)) - goto timer; - br_multicast_add_router(br, port); -timer: mod_timer(&port->multicast_router_timer, now + br->multicast_querier_interval); } -- cgit v1.1 From 5d753610277862fd8050901f38b6571b9500cdb6 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 10 Jun 2015 21:02:04 -0400 Subject: net, swap: Remove a warning and clarify why sk_mem_reclaim is required when deactivating swap Jeff Layton reported the following; [ 74.232485] ------------[ cut here ]------------ [ 74.233354] WARNING: CPU: 2 PID: 754 at net/core/sock.c:364 sk_clear_memalloc+0x51/0x80() [ 74.234790] Modules linked in: cts rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache xfs libcrc32c snd_hda_codec_generic snd_hda_intel snd_hda_controller snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device nfsd snd_pcm snd_timer snd e1000 ppdev parport_pc joydev parport pvpanic soundcore floppy serio_raw i2c_piix4 pcspkr nfs_acl lockd virtio_balloon acpi_cpufreq auth_rpcgss grace sunrpc qxl drm_kms_helper ttm drm virtio_console virtio_blk virtio_pci ata_generic virtio_ring pata_acpi virtio [ 74.243599] CPU: 2 PID: 754 Comm: swapoff Not tainted 4.1.0-rc6+ #5 [ 74.244635] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 74.245546] 0000000000000000 0000000079e69e31 ffff8800d066bde8 ffffffff8179263d [ 74.246786] 0000000000000000 0000000000000000 ffff8800d066be28 ffffffff8109e6fa [ 74.248175] 0000000000000000 ffff880118d48000 ffff8800d58f5c08 ffff880036e380a8 [ 74.249483] Call Trace: [ 74.249872] [] dump_stack+0x45/0x57 [ 74.250703] [] warn_slowpath_common+0x8a/0xc0 [ 74.251655] [] warn_slowpath_null+0x1a/0x20 [ 74.252585] [] sk_clear_memalloc+0x51/0x80 [ 74.253519] [] xs_disable_swap+0x42/0x80 [sunrpc] [ 74.254537] [] rpc_clnt_swap_deactivate+0x7e/0xc0 [sunrpc] [ 74.255610] [] nfs_swap_deactivate+0x27/0x30 [nfs] [ 74.256582] [] destroy_swap_extents+0x74/0x80 [ 74.257496] [] SyS_swapoff+0x222/0x5c0 [ 74.258318] [] ? syscall_trace_leave+0xc7/0x140 [ 74.259253] [] system_call_fastpath+0x12/0x71 [ 74.260158] ---[ end trace 2530722966429f10 ]--- The warning in question was unnecessary but with Jeff's series the rules are also clearer. This patch removes the warning and updates the comment to explain why sk_mem_reclaim() may still be called. [jlayton: remove if (sk->sk_forward_alloc) conditional. As Leon points out that it's not needed.] Cc: Leon Romanovsky Signed-off-by: Mel Gorman Signed-off-by: Jeff Layton Signed-off-by: David S. Miller --- net/core/sock.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 292f422..469d603 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -354,15 +354,12 @@ void sk_clear_memalloc(struct sock *sk) /* * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward - * progress of swapping. However, if SOCK_MEMALLOC is cleared while - * it has rmem allocations there is a risk that the user of the - * socket cannot make forward progress due to exceeding the rmem - * limits. By rights, sk_clear_memalloc() should only be called - * on sockets being torn down but warn and reset the accounting if - * that assumption breaks. + * progress of swapping. SOCK_MEMALLOC may be cleared while + * it has rmem allocations due to the last swapfile being deactivated + * but there is a risk that the socket is unusable due to exceeding + * the rmem limits. Reclaim the reserves and obey rmem limits again. */ - if (WARN_ON(sk->sk_forward_alloc)) - sk_mem_reclaim(sk); + sk_mem_reclaim(sk); } EXPORT_SYMBOL_GPL(sk_clear_memalloc); -- cgit v1.1 From 0fae3bf018d97b210051c8797a49d66d31071847 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Thu, 11 Jun 2015 19:58:26 +0100 Subject: mpls: handle device renames for per-device sysctls If a device is renamed and the original name is subsequently reused for a new device, the following warning is generated: sysctl duplicate entry: /net/mpls/conf/veth0//input CPU: 3 PID: 1379 Comm: ip Not tainted 4.1.0-rc4+ #20 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 0000000000000000 0000000000000000 ffffffff81566aaf 0000000000000000 ffffffff81236279 ffff88002f7d7f00 0000000000000000 ffff88000db336d8 ffff88000db33698 0000000000000005 ffff88002e046000 ffff8800168c9280 Call Trace: [] ? dump_stack+0x40/0x50 [] ? __register_sysctl_table+0x289/0x5a0 [] ? mpls_dev_notify+0x1ff/0x300 [mpls_router] [] ? notifier_call_chain+0x4f/0x70 [] ? register_netdevice+0x2b2/0x480 [] ? veth_newlink+0x178/0x2d3 [veth] [] ? rtnl_newlink+0x73c/0x8e0 [] ? rtnl_newlink+0x16a/0x8e0 [] ? __kmalloc_reserve.isra.30+0x32/0x90 [] ? rtnetlink_rcv_msg+0x8d/0x250 [] ? __alloc_skb+0x47/0x1f0 [] ? __netlink_lookup+0xab/0xe0 [] ? rtnetlink_rcv+0x30/0x30 [] ? netlink_rcv_skb+0xb0/0xd0 [] ? rtnetlink_rcv+0x24/0x30 [] ? netlink_unicast+0x107/0x1a0 [] ? netlink_sendmsg+0x50e/0x630 [] ? sock_sendmsg+0x3c/0x50 [] ? ___sys_sendmsg+0x27b/0x290 [] ? mem_cgroup_try_charge+0x88/0x110 [] ? mem_cgroup_commit_charge+0x56/0xa0 [] ? do_filp_open+0x30/0xa0 [] ? __sys_sendmsg+0x3e/0x80 [] ? system_call_fastpath+0x16/0x75 Fix this by unregistering the previous sysctl table (registered for the path containing the original device name) and re-registering the table for the path containing the new device name. Fixes: 37bde79979c3 ("mpls: Per-device enabling of packet input") Reported-by: Scott Feldman Signed-off-by: Robert Shearman Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index bff427f..1f93a59 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -564,6 +564,17 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, case NETDEV_UNREGISTER: mpls_ifdown(dev); break; + case NETDEV_CHANGENAME: + mdev = mpls_dev_get(dev); + if (mdev) { + int err; + + mpls_dev_sysctl_unregister(mdev); + err = mpls_dev_sysctl_register(dev, mdev); + if (err) + return notifier_from_errno(err); + } + break; } return NOTIFY_OK; } -- cgit v1.1 From fb05e7a89f500cfc06ae277bdc911b281928995d Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 11 Jun 2015 16:50:48 -0700 Subject: net: don't wait for order-3 page allocation We saw excessive direct memory compaction triggered by skb_page_frag_refill. This causes performance issues and add latency. Commit 5640f7685831e0 introduces the order-3 allocation. According to the changelog, the order-3 allocation isn't a must-have but to improve performance. But direct memory compaction has high overhead. The benefit of order-3 allocation can't compensate the overhead of direct memory compaction. This patch makes the order-3 page allocation atomic. If there is no memory pressure and memory isn't fragmented, the alloction will still success, so we don't sacrifice the order-3 benefit here. If the atomic allocation fails, direct memory compaction will not be triggered, skb_page_frag_refill will fallback to order-0 immediately, hence the direct memory compaction overhead is avoided. In the allocation failure case, kswapd is waken up and doing compaction, so chances are allocation could success next time. alloc_skb_with_frags is the same. The mellanox driver does similar thing, if this is accepted, we must fix the driver too. V3: fix the same issue in alloc_skb_with_frags as pointed out by Eric V2: make the changelog clearer Cc: Eric Dumazet Cc: Chris Mason Cc: Debabrata Banerjee Signed-off-by: Shaohua Li Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- net/core/sock.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3cfff2a..41ec022 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4398,7 +4398,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, while (order) { if (npages >= 1 << order) { - page = alloc_pages(gfp_mask | + page = alloc_pages((gfp_mask & ~__GFP_WAIT) | __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY, diff --git a/net/core/sock.c b/net/core/sock.c index 469d603..dc30dc5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1880,7 +1880,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) pfrag->offset = 0; if (SKB_FRAG_PAGE_ORDER) { - pfrag->page = alloc_pages(gfp | __GFP_COMP | + pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY, SKB_FRAG_PAGE_ORDER); if (likely(pfrag->page)) { -- cgit v1.1 From ae36806a622aea5ac79f279cfccc82144967b6e7 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 11 Jun 2015 14:49:46 -0300 Subject: sctp: allow authenticating DATA chunks that are bundled with COOKIE_ECHO Currently, we can ask to authenticate DATA chunks and we can send DATA chunks on the same packet as COOKIE_ECHO, but if you try to combine both, the DATA chunk will be sent unauthenticated and peer won't accept it, leading to a communication failure. This happens because even though the data was queued after it was requested to authenticate DATA chunks, it was also queued before we could know that remote peer can handle authenticating, so sctp_auth_send_cid() returns false. The fix is whenever we set up an active key, re-check send queue for chunks that now should be authenticated. As a result, such packet will now contain COOKIE_ECHO + AUTH + DATA chunks, in that order. Reported-by: Liu Wei Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/auth.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/auth.c b/net/sctp/auth.c index fb7976a..4f15b7d 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -381,13 +381,14 @@ nomem: } -/* Public interface to creat the association shared key. +/* Public interface to create the association shared key. * See code above for the algorithm. */ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) { struct sctp_auth_bytes *secret; struct sctp_shared_key *ep_key; + struct sctp_chunk *chunk; /* If we don't support AUTH, or peer is not capable * we don't need to do anything. @@ -410,6 +411,14 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) sctp_auth_key_put(asoc->asoc_shared_key); asoc->asoc_shared_key = secret; + /* Update send queue in case any chunk already in there now + * needs authenticating + */ + list_for_each_entry(chunk, &asoc->outqueue.out_chunk_list, list) { + if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) + chunk->auth = 1; + } + return 0; } -- cgit v1.1