From 45efccdbec3cd465c4776ed9ca1d7b1bba1b7e34 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Oct 2015 18:02:01 -0700 Subject: netfilter: xt_TEE: fix NULL dereference iptables -I INPUT ... -j TEE --gateway 10.1.2.3 because --oif was not specified tee_tg_check() sets ->priv pointer to NULL in this case. Fixes: bbde9fc1824a ("netfilter: factor out packet duplication for IPv4/IPv6") Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_TEE.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index fd980aa..c5fdea1 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -31,8 +31,9 @@ static unsigned int tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; + int oif = info->priv ? info->priv->oif : 0; - nf_dup_ipv4(skb, par->hooknum, &info->gw.in, info->priv->oif); + nf_dup_ipv4(skb, par->hooknum, &info->gw.in, oif); return XT_CONTINUE; } @@ -42,8 +43,9 @@ static unsigned int tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; + int oif = info->priv ? info->priv->oif : 0; - nf_dup_ipv6(skb, par->hooknum, &info->gw.in6, info->priv->oif); + nf_dup_ipv6(skb, par->hooknum, &info->gw.in6, oif); return XT_CONTINUE; } -- cgit v1.1 From 94f9cd81436c85d8c3a318ba92e236ede73752fc Mon Sep 17 00:00:00 2001 From: Munehisa Kamata Date: Mon, 26 Oct 2015 19:10:52 -0700 Subject: netfilter: nf_nat_redirect: add missing NULL pointer check Commit 8b13eddfdf04cbfa561725cfc42d6868fe896f56 ("netfilter: refactor NAT redirect IPv4 to use it from nf_tables") has introduced a trivial logic change which can result in the following crash. BUG: unable to handle kernel NULL pointer dereference at 0000000000000030 IP: [] nf_nat_redirect_ipv4+0x2d/0xa0 [nf_nat_redirect] PGD 3ba662067 PUD 3ba661067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: ipv6(E) xt_REDIRECT(E) nf_nat_redirect(E) xt_tcpudp(E) iptable_nat(E) nf_conntrack_ipv4(E) nf_defrag_ipv4(E) nf_nat_ipv4(E) nf_nat(E) nf_conntrack(E) ip_tables(E) x_tables(E) binfmt_misc(E) xfs(E) libcrc32c(E) evbug(E) evdev(E) psmouse(E) i2c_piix4(E) i2c_core(E) acpi_cpufreq(E) button(E) ext4(E) crc16(E) jbd2(E) mbcache(E) dm_mirror(E) dm_region_hash(E) dm_log(E) dm_mod(E) CPU: 0 PID: 2536 Comm: ip Tainted: G E 4.1.7-15.23.amzn1.x86_64 #1 Hardware name: Xen HVM domU, BIOS 4.2.amazon 05/06/2015 task: ffff8800eb438000 ti: ffff8803ba664000 task.ti: ffff8803ba664000 [...] Call Trace: [] redirect_tg4+0x15/0x20 [xt_REDIRECT] [] ipt_do_table+0x2b9/0x5e1 [ip_tables] [] iptable_nat_do_chain+0x25/0x30 [iptable_nat] [] nf_nat_ipv4_fn+0x13d/0x1f0 [nf_nat_ipv4] [] ? iptable_nat_ipv4_fn+0x20/0x20 [iptable_nat] [] nf_nat_ipv4_in+0x2e/0x90 [nf_nat_ipv4] [] iptable_nat_ipv4_in+0x15/0x20 [iptable_nat] [] nf_iterate+0x57/0x80 [] nf_hook_slow+0x97/0x100 [] ip_rcv+0x314/0x400 unsigned int nf_nat_redirect_ipv4(struct sk_buff *skb, ... { ... rcu_read_lock(); indev = __in_dev_get_rcu(skb->dev); if (indev != NULL) { ifa = indev->ifa_list; newdst = ifa->ifa_local; <--- } rcu_read_unlock(); ... } Before the commit, 'ifa' had been always checked before access. After the commit, however, it could be accessed even if it's NULL. Interestingly, this was once fixed in 2003. http://marc.info/?l=netfilter-devel&m=106668497403047&w=2 In addition to the original one, we have seen the crash when packets that need to be redirected somehow arrive on an interface which hasn't been yet fully configured. This change just reverts the logic to the old behavior to avoid the crash. Fixes: 8b13eddfdf04 ("netfilter: refactor NAT redirect IPv4 to use it from nf_tables") Signed-off-by: Munehisa Kamata Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_redirect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c index 97b75f9..d438698 100644 --- a/net/netfilter/nf_nat_redirect.c +++ b/net/netfilter/nf_nat_redirect.c @@ -55,7 +55,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb, rcu_read_lock(); indev = __in_dev_get_rcu(skb->dev); - if (indev != NULL) { + if (indev && indev->ifa_list) { ifa = indev->ifa_list; newdst = ifa->ifa_local; } -- cgit v1.1 From dbc3617f4c1f9fcbe63612048cb9583fea1e11ab Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Oct 2015 13:20:25 +0100 Subject: netfilter: nfnetlink: don't probe module if it exists nfnetlink_bind request_module()s all the time as nfnetlink_get_subsys() shifts the argument by 8 to obtain the subsys id. So using type instead of type << 8 always returns NULL. Fixes: 03292745b02d11 ("netlink: add nlk->netlink_bind hook for module auto-loading") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 70277b1..27b93da 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -492,7 +492,7 @@ static int nfnetlink_bind(struct net *net, int group) type = nfnl_group2type[group]; rcu_read_lock(); - ss = nfnetlink_get_subsys(type); + ss = nfnetlink_get_subsys(type << 8); rcu_read_unlock(); if (!ss) request_module("nfnetlink-subsys-%d", type); -- cgit v1.1 From 4ee3bd4a8c7463cdef0b82ebc33fc94a9170a7e0 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 3 Nov 2015 14:32:57 -0800 Subject: ipv4: disable BH when changing ip local port range This fixes the following lockdep warning: [ INFO: inconsistent lock state ] 4.3.0-rc7+ #1197 Not tainted --------------------------------- inconsistent {IN-SOFTIRQ-R} -> {SOFTIRQ-ON-W} usage. sysctl/1019 [HC0[0]:SC0[0]:HE1:SE1] takes: (&(&net->ipv4.ip_local_ports.lock)->seqcount){+.+-..}, at: [] ipv4_local_port_range+0xb4/0x12a {IN-SOFTIRQ-R} state was registered at: [] __lock_acquire+0x2f6/0xdf0 [] lock_acquire+0x11c/0x1a4 [] inet_get_local_port_range+0x4e/0xae [] udp_flow_src_port.constprop.40+0x23/0x116 [] vxlan_xmit_one+0x219/0xa6a [] vxlan_xmit+0xa6b/0xaa5 [] dev_hard_start_xmit+0x2ae/0x465 [] __dev_queue_xmit+0x531/0x633 [] dev_queue_xmit_sk+0x13/0x15 [] neigh_resolve_output+0x12f/0x14d [] ip6_finish_output2+0x344/0x39f [] ip6_finish_output+0x88/0x8e [] ip6_output+0x91/0xe5 [] dst_output_sk+0x47/0x4c [] NF_HOOK_THRESH.constprop.30+0x38/0x82 [] mld_sendpack+0x189/0x266 [] mld_ifc_timer_expire+0x1ef/0x223 [] call_timer_fn+0xfb/0x28c [] run_timer_softirq+0x1c7/0x1f1 Fixes: b8f1a55639e6 ("udp: Add function to make source port for UDP tunnels") Cc: Tom Herbert Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 25300c5..a0bd7a5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -48,14 +48,14 @@ static void set_local_port_range(struct net *net, int range[2]) { bool same_parity = !((range[0] ^ range[1]) & 1); - write_seqlock(&net->ipv4.ip_local_ports.lock); + write_seqlock_bh(&net->ipv4.ip_local_ports.lock); if (same_parity && !net->ipv4.ip_local_ports.warned) { net->ipv4.ip_local_ports.warned = true; pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n"); } net->ipv4.ip_local_ports.range[0] = range[0]; net->ipv4.ip_local_ports.range[1] = range[1]; - write_sequnlock(&net->ipv4.ip_local_ports.lock); + write_sequnlock_bh(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ -- cgit v1.1 From 87e9f0315952b0dd8b5e51ba04beda03efc009d9 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 3 Nov 2015 15:41:16 -0800 Subject: ipv4: fix a potential deadlock in mcast getsockopt() path Sasha reported the following lockdep warning: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(sk_lock-AF_INET); lock(rtnl_mutex); lock(sk_lock-AF_INET); lock(rtnl_mutex); This is due to that for IP_MSFILTER and MCAST_MSFILTER, we take rtnl lock before the socket lock in setsockopt() path, but take the socket lock before rtnl lock in getsockopt() path. All the rest optnames are setsockopt()-only. Fix this by aligning the getsockopt() path with the setsockopt() path, so that all mcast socket path would be locked in the same order. Note, IPv6 part is different where rtnl lock is not held. Fixes: 54ff9ef36bdf ("ipv4, ipv6: kill ip_mc_{join, leave}_group and ipv6_sock_mc_{join, drop}") Reported-by: Sasha Levin Cc: Marcelo Ricardo Leitner Signed-off-by: Cong Wang Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 12 ++++-------- net/ipv4/ip_sockglue.c | 45 ++++++++++++++++++++++++++++++--------------- 2 files changed, 34 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 64aaf35..6baf36e 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2392,11 +2392,11 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, struct ip_sf_socklist *psl; struct net *net = sock_net(sk); + ASSERT_RTNL(); + if (!ipv4_is_multicast(addr)) return -EINVAL; - rtnl_lock(); - imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; imr.imr_address.s_addr = msf->imsf_interface; imr.imr_ifindex = 0; @@ -2417,7 +2417,6 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, goto done; msf->imsf_fmode = pmc->sfmode; psl = rtnl_dereference(pmc->sflist); - rtnl_unlock(); if (!psl) { len = 0; count = 0; @@ -2436,7 +2435,6 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, return -EFAULT; return 0; done: - rtnl_unlock(); return err; } @@ -2450,6 +2448,8 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; + ASSERT_RTNL(); + psin = (struct sockaddr_in *)&gsf->gf_group; if (psin->sin_family != AF_INET) return -EINVAL; @@ -2457,8 +2457,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!ipv4_is_multicast(addr)) return -EINVAL; - rtnl_lock(); - err = -EADDRNOTAVAIL; for_each_pmc_rtnl(inet, pmc) { @@ -2470,7 +2468,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, goto done; gsf->gf_fmode = pmc->sfmode; psl = rtnl_dereference(pmc->sflist); - rtnl_unlock(); count = psl ? psl->sl_count : 0; copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; gsf->gf_numsrc = count; @@ -2490,7 +2487,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, } return 0; done: - rtnl_unlock(); return err; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index c3c359a..5f73a7c 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1251,11 +1251,22 @@ EXPORT_SYMBOL(compat_ip_setsockopt); * the _received_ ones. The set sets the _sent_ ones. */ +static bool getsockopt_needs_rtnl(int optname) +{ + switch (optname) { + case IP_MSFILTER: + case MCAST_MSFILTER: + return true; + } + return false; +} + static int do_ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); - int val; + bool needs_rtnl = getsockopt_needs_rtnl(optname); + int val, err = 0; int len; if (level != SOL_IP) @@ -1269,6 +1280,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, if (len < 0) return -EINVAL; + if (needs_rtnl) + rtnl_lock(); lock_sock(sk); switch (optname) { @@ -1386,39 +1399,35 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_MSFILTER: { struct ip_msfilter msf; - int err; if (len < IP_MSFILTER_SIZE(0)) { - release_sock(sk); - return -EINVAL; + err = -EINVAL; + goto out; } if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) { - release_sock(sk); - return -EFAULT; + err = -EFAULT; + goto out; } err = ip_mc_msfget(sk, &msf, (struct ip_msfilter __user *)optval, optlen); - release_sock(sk); - return err; + goto out; } case MCAST_MSFILTER: { struct group_filter gsf; - int err; if (len < GROUP_FILTER_SIZE(0)) { - release_sock(sk); - return -EINVAL; + err = -EINVAL; + goto out; } if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) { - release_sock(sk); - return -EFAULT; + err = -EFAULT; + goto out; } err = ip_mc_gsfget(sk, &gsf, (struct group_filter __user *)optval, optlen); - release_sock(sk); - return err; + goto out; } case IP_MULTICAST_ALL: val = inet->mc_all; @@ -1485,6 +1494,12 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; } return 0; + +out: + release_sock(sk); + if (needs_rtnl) + rtnl_unlock(); + return err; } int ip_getsockopt(struct sock *sk, int level, -- cgit v1.1 From e1b8d903c6c3862160d2d5036806a94786c8fc4e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 3 Nov 2015 15:59:28 -0800 Subject: net: Fix prefsrc lookups A bug report (https://bugzilla.kernel.org/show_bug.cgi?id=107071) noted that the follwoing ip command is failing with v4.3: $ ip route add 10.248.5.0/24 dev bond0.250 table vlan_250 src 10.248.5.154 RTNETLINK answers: Invalid argument 021dd3b8a142d changed the lookup of the given preferred source address to use the table id passed in, but this assumes the local entries are in the given table which is not necessarily true for non-VRF use cases. When validating the preferred source fallback to the local table on failure. Fixes: 021dd3b8a142d ("net: Add routes to the table associated with the device") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3e87447..d97268e 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -923,14 +923,21 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || fib_prefsrc != cfg->fc_dst) { u32 tb_id = cfg->fc_table; + int rc; if (tb_id == RT_TABLE_MAIN) tb_id = RT_TABLE_LOCAL; - if (inet_addr_type_table(cfg->fc_nlinfo.nl_net, - fib_prefsrc, tb_id) != RTN_LOCAL) { - return false; + rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, + fib_prefsrc, tb_id); + + if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) { + rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, + fib_prefsrc, RT_TABLE_LOCAL); } + + if (rc != RTN_LOCAL) + return false; } return true; } -- cgit v1.1 From e7868a85e1b26bcb2e71088841eec1d310a97ac9 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Tue, 3 Nov 2015 23:09:32 -0500 Subject: net/core: ensure features get disabled on new lower devs With moving netdev_sync_lower_features() after the .ndo_set_features calls, I neglected to verify that devices added *after* a flag had been disabled on an upper device were properly added with that flag disabled as well. This currently happens, because we exit __netdev_update_features() when we see dev->features == features for the upper dev. We can retain the optimization of leaving without calling .ndo_set_features with a bit of tweaking and a goto here. Fixes: fd867d51f889 ("net/core: generic support for disabling netdev features down stack") CC: "David S. Miller" CC: Eric Dumazet CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: Jiri Pirko CC: Nikolay Aleksandrov CC: Michal Kubecek CC: Alexander Duyck CC: netdev@vger.kernel.org Reported-by: Nikolay Aleksandrov Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- net/core/dev.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 8ce3f74..ab9b8d0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6402,7 +6402,7 @@ int __netdev_update_features(struct net_device *dev) struct net_device *upper, *lower; netdev_features_t features; struct list_head *iter; - int err = 0; + int err = -1; ASSERT_RTNL(); @@ -6419,7 +6419,7 @@ int __netdev_update_features(struct net_device *dev) features = netdev_sync_upper_features(dev, upper, features); if (dev->features == features) - return 0; + goto sync_lower; netdev_dbg(dev, "Features changed: %pNF -> %pNF\n", &dev->features, &features); @@ -6434,6 +6434,7 @@ int __netdev_update_features(struct net_device *dev) return -1; } +sync_lower: /* some features must be disabled on lower devices when disabled * on an upper device (think: bonding master or bridge) */ @@ -6443,7 +6444,7 @@ int __netdev_update_features(struct net_device *dev) if (!err) dev->features = features; - return 1; + return err < 0 ? 0 : 1; } /** -- cgit v1.1 From 7362945aea72fccc9df4f4e7643c971e0a8c33dc Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 4 Nov 2015 12:58:42 +0000 Subject: VSOCK: call sk->sk_data_ready() on accept() When a listen socket enqueues a connection for userspace to accept(), the sk->sk_data_ready() callback should be invoked. In-kernel socket users rely on this callback to detect when incoming connections are available. Currently the sk->sk_state_change() callback is invoked by vmci_transport.c. This happens to work for userspace applications since sk->sk_state_change = sock_def_wakeup() and sk->sk_data_ready = sock_def_readable() both wake up the accept() waiter. In-kernel socket users, on the other hand, fail to detect incoming connections. Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 400d872..0a369bb 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1234,7 +1234,7 @@ vmci_transport_recv_connecting_server(struct sock *listener, /* Callers of accept() will be be waiting on the listening socket, not * the pending socket. */ - listener->sk_state_change(listener); + listener->sk_data_ready(listener); return 0; -- cgit v1.1 From 2ab216a7a9ca89d77388ad3f22a31f752dec5897 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 1 Nov 2015 09:39:48 +0100 Subject: Bluetooth: Check for supported white list before issuing commands The white list commands might not be implemented if the controller does not actually support the white list. So check the supported commands first before issuing these commands. Not supporting the white list is the same as supporting a white list with zero size. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 83a6aac..62edbf1 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -508,12 +508,6 @@ static void le_setup(struct hci_request *req) /* Read LE Supported States */ hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL); - /* Read LE White List Size */ - hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL); - - /* Clear LE White List */ - hci_req_add(req, HCI_OP_LE_CLEAR_WHITE_LIST, 0, NULL); - /* LE-only controllers have LE implicitly enabled */ if (!lmp_bredr_capable(hdev)) hci_dev_set_flag(hdev, HCI_LE_ENABLED); @@ -832,6 +826,17 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); } + if (hdev->commands[26] & 0x40) { + /* Read LE White List Size */ + hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, + 0, NULL); + } + + if (hdev->commands[26] & 0x80) { + /* Clear LE White List */ + hci_req_add(req, HCI_OP_LE_CLEAR_WHITE_LIST, 0, NULL); + } + if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { /* Read LE Maximum Data Length */ hci_req_add(req, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL); -- cgit v1.1 From 8a7889cc6e2dbbace114130f4efd9b77452069cd Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 2 Nov 2015 14:39:15 +0200 Subject: Bluetooth: L2CAP: Fix returning correct LE CoC response codes The core spec defines specific response codes for situations when the received CID is incorrect. Add the defines for these and return them as appropriate from the LE Connect Request handler function. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap_core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 7c65ee2..8fd36f5 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5437,9 +5437,16 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, goto response_unlock; } + /* Check for valid dynamic CID range */ + if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) { + result = L2CAP_CR_INVALID_SCID; + chan = NULL; + goto response_unlock; + } + /* Check if we already have channel with that dcid */ if (__l2cap_get_chan_by_dcid(conn, scid)) { - result = L2CAP_CR_NO_MEM; + result = L2CAP_CR_SCID_IN_USE; chan = NULL; goto response_unlock; } -- cgit v1.1 From ab0c127fbb21c19adb34b78ba26b84748d0cd4de Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 2 Nov 2015 14:39:16 +0200 Subject: Bluetooth: L2CAP: Fix checked range when allocating new CID The 'dyn_end' value is also a valid CID so it should be included in the range of values checked. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 8fd36f5..fdb7989 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -239,7 +239,7 @@ static u16 l2cap_alloc_cid(struct l2cap_conn *conn) else dyn_end = L2CAP_CID_DYN_END; - for (cid = L2CAP_CID_DYN_START; cid < dyn_end; cid++) { + for (cid = L2CAP_CID_DYN_START; cid <= dyn_end; cid++) { if (!__l2cap_get_chan_by_scid(conn, cid)) return cid; } -- cgit v1.1 From 40624183c202278e7e0edd01d1273efc87ddd1f2 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 2 Nov 2015 14:39:17 +0200 Subject: Bluetooth: L2CAP: Add missing checks for invalid LE DCID When receiving a connect response we should make sure that the DCID is within the valid range and that we don't already have another channel allocated for the same DCID. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap_core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index fdb7989..66e8b6e 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5250,7 +5250,9 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn, credits = __le16_to_cpu(rsp->credits); result = __le16_to_cpu(rsp->result); - if (result == L2CAP_CR_SUCCESS && (mtu < 23 || mps < 23)) + if (result == L2CAP_CR_SUCCESS && (mtu < 23 || mps < 23 || + dcid < L2CAP_CID_DYN_START || + dcid > L2CAP_CID_LE_DYN_END)) return -EPROTO; BT_DBG("dcid 0x%4.4x mtu %u mps %u credits %u result 0x%2.2x", @@ -5270,6 +5272,11 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn, switch (result) { case L2CAP_CR_SUCCESS: + if (__l2cap_get_chan_by_dcid(conn, dcid)) { + err = -EBADSLT; + break; + } + chan->ident = 0; chan->dcid = dcid; chan->omtu = mtu; -- cgit v1.1 From 2a189f9e57650e9f310ddf4aad75d66c1233a064 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 4 Nov 2015 14:47:53 +0100 Subject: ipv6: clean up dev_snmp6 proc entry when we fail to initialize inet6_dev In ipv6_add_dev, when addrconf_sysctl_register fails, we do not clean up the dev_snmp6 entry that we have already registered for this device. Call snmp6_unregister_dev in this case. Fixes: a317a2f19da7d ("ipv6: fail early when creating netdev named all or default") Reported-by: Dmitry Vyukov Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d72fa90..d84742f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -418,6 +418,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) if (err) { ipv6_mc_destroy_dev(ndev); del_timer(&ndev->regen_timer); + snmp6_unregister_dev(ndev); goto err_release; } /* protected by rtnl_lock */ -- cgit v1.1 From 805c4bc05705fb2b71ec970960b456eee9900953 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Nov 2015 11:07:13 -0800 Subject: tcp: fix req->saved_syn race For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix ireq->pktopts race"), we need to make sure we do not access req->saved_syn unless we own the request sock. This fixes races for listeners using TCP_SAVE_SYN option. Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet Reported-by: Ying Cai Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 ++ net/ipv4/tcp_minisocks.c | 3 --- net/ipv6/tcp_ipv6.c | 20 ++++++++++++-------- 3 files changed, 14 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1c2648b..59aff63 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1326,6 +1326,8 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + if (*own_req) + tcp_move_syn(newtp, req_unhash); return newsk; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 3575dd1..ac6b196 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -551,9 +551,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rack.mstamp.v64 = 0; newtp->rack.advanced = 0; - newtp->saved_syn = req->saved_syn; - req->saved_syn = NULL; - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); } return newsk; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ea2f4d5..c509e55 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1140,14 +1140,18 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * goto out; } *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); - /* Clone pktoptions received with SYN, if we own the req */ - if (*own_req && ireq->pktopts) { - newnp->pktoptions = skb_clone(ireq->pktopts, - sk_gfp_atomic(sk, GFP_ATOMIC)); - consume_skb(ireq->pktopts); - ireq->pktopts = NULL; - if (newnp->pktoptions) - skb_set_owner_r(newnp->pktoptions, newsk); + if (*own_req) { + tcp_move_syn(newtp, req_unhash); + + /* Clone pktoptions received with SYN, if we own the req */ + if (ireq->pktopts) { + newnp->pktoptions = skb_clone(ireq->pktopts, + sk_gfp_atomic(sk, GFP_ATOMIC)); + consume_skb(ireq->pktopts); + ireq->pktopts = NULL; + if (newnp->pktoptions) + skb_set_owner_r(newnp->pktoptions, newsk); + } } return newsk; -- cgit v1.1 From f668f5f7e0861087ef9d64d473a9c1399fc25471 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Nov 2015 11:34:06 -0800 Subject: ipv4: use sk_fullsock() in ipv4_conntrack_defrag() Before converting a 'socket pointer' into inet socket, use sk_fullsock() to detect timewait or request sockets. Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Cc: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_defrag_ipv4.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 0e5591c..6fb869f6 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -67,10 +67,9 @@ static unsigned int ipv4_conntrack_defrag(void *priv, const struct nf_hook_state *state) { struct sock *sk = skb->sk; - struct inet_sock *inet = inet_sk(skb->sk); - if (sk && (sk->sk_family == PF_INET) && - inet->nodefrag) + if (sk && sk_fullsock(sk) && (sk->sk_family == PF_INET) && + inet_sk(sk)->nodefrag) return NF_ACCEPT; #if IS_ENABLED(CONFIG_NF_CONNTRACK) -- cgit v1.1 From 30f7ea1c2b5f5fb7462c5ae44fe2e40cb2d6a474 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Thu, 5 Nov 2015 08:16:14 -0800 Subject: packet: race condition in packet_bind There is a race conditions between packet_notifier and packet_bind{_spkt}. It happens if packet_notifier(NETDEV_UNREGISTER) executes between the time packet_bind{_spkt} takes a reference on the new netdevice and the time packet_do_bind sets po->ifindex. In this case the notification can be missed. If this happens during a dev_change_net_namespace this can result in the netdevice to be moved to the new namespace while the packet_sock in the old namespace still holds a reference on it. When the netdevice is later deleted in the new namespace the deletion hangs since the packet_sock is not found in the new namespace' &net->packet.sklist. It can be reproduced with the script below. This patch makes packet_do_bind check again for the presence of the netdevice in the packet_sock's namespace after the synchronize_net in unregister_prot_hook. More in general it also uses the rcu lock for the duration of the bind to stop dev_change_net_namespace/rollback_registered_many from going past the synchronize_net following unlist_netdevice, so that no NETDEV_UNREGISTER notifications can happen on the new netdevice while the bind is executing. In order to do this some code from packet_bind{_spkt} is consolidated into packet_do_dev. import socket, os, time, sys proto=7 realDev='em1' vlanId=400 if len(sys.argv) > 1: vlanId=int(sys.argv[1]) dev='vlan%d' % vlanId os.system('taskset -p 0x10 %d' % os.getpid()) s = socket.socket(socket.PF_PACKET, socket.SOCK_RAW, proto) os.system('ip link add link %s name %s type vlan id %d' % (realDev, dev, vlanId)) os.system('ip netns add dummy') pid=os.fork() if pid == 0: # dev should be moved while packet_do_bind is in synchronize net os.system('taskset -p 0x20000 %d' % os.getpid()) os.system('ip link set %s netns dummy' % dev) os.system('ip netns exec dummy ip link del %s' % dev) s.close() sys.exit(0) time.sleep(.004) try: s.bind(('%s' % dev, proto+1)) except: print 'Could not bind socket' s.close() os.system('ip netns del dummy') sys.exit(0) os.waitpid(pid, 0) s.close() os.system('ip netns del dummy') sys.exit(0) Signed-off-by: Francesco Ruggeri Signed-off-by: David S. Miller --- net/packet/af_packet.c | 80 +++++++++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 691660b..af399ca 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2911,22 +2911,40 @@ static int packet_release(struct socket *sock) * Attach a packet hook. */ -static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) +static int packet_do_bind(struct sock *sk, const char *name, int ifindex, + __be16 proto) { struct packet_sock *po = pkt_sk(sk); struct net_device *dev_curr; __be16 proto_curr; bool need_rehook; + struct net_device *dev = NULL; + int ret = 0; + bool unlisted = false; - if (po->fanout) { - if (dev) - dev_put(dev); - + if (po->fanout) return -EINVAL; - } lock_sock(sk); spin_lock(&po->bind_lock); + rcu_read_lock(); + + if (name) { + dev = dev_get_by_name_rcu(sock_net(sk), name); + if (!dev) { + ret = -ENODEV; + goto out_unlock; + } + } else if (ifindex) { + dev = dev_get_by_index_rcu(sock_net(sk), ifindex); + if (!dev) { + ret = -ENODEV; + goto out_unlock; + } + } + + if (dev) + dev_hold(dev); proto_curr = po->prot_hook.type; dev_curr = po->prot_hook.dev; @@ -2934,14 +2952,29 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) need_rehook = proto_curr != proto || dev_curr != dev; if (need_rehook) { - unregister_prot_hook(sk, true); + if (po->running) { + rcu_read_unlock(); + __unregister_prot_hook(sk, true); + rcu_read_lock(); + dev_curr = po->prot_hook.dev; + if (dev) + unlisted = !dev_get_by_index_rcu(sock_net(sk), + dev->ifindex); + } po->num = proto; po->prot_hook.type = proto; - po->prot_hook.dev = dev; - po->ifindex = dev ? dev->ifindex : 0; - packet_cached_dev_assign(po, dev); + if (unlikely(unlisted)) { + dev_put(dev); + po->prot_hook.dev = NULL; + po->ifindex = -1; + packet_cached_dev_reset(po); + } else { + po->prot_hook.dev = dev; + po->ifindex = dev ? dev->ifindex : 0; + packet_cached_dev_assign(po, dev); + } } if (dev_curr) dev_put(dev_curr); @@ -2949,7 +2982,7 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) if (proto == 0 || !need_rehook) goto out_unlock; - if (!dev || (dev->flags & IFF_UP)) { + if (!unlisted && (!dev || (dev->flags & IFF_UP))) { register_prot_hook(sk); } else { sk->sk_err = ENETDOWN; @@ -2958,9 +2991,10 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) } out_unlock: + rcu_read_unlock(); spin_unlock(&po->bind_lock); release_sock(sk); - return 0; + return ret; } /* @@ -2972,8 +3006,6 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, { struct sock *sk = sock->sk; char name[15]; - struct net_device *dev; - int err = -ENODEV; /* * Check legality @@ -2983,19 +3015,13 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, return -EINVAL; strlcpy(name, uaddr->sa_data, sizeof(name)); - dev = dev_get_by_name(sock_net(sk), name); - if (dev) - err = packet_do_bind(sk, dev, pkt_sk(sk)->num); - return err; + return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); } static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; struct sock *sk = sock->sk; - struct net_device *dev = NULL; - int err; - /* * Check legality @@ -3006,16 +3032,8 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len if (sll->sll_family != AF_PACKET) return -EINVAL; - if (sll->sll_ifindex) { - err = -ENODEV; - dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex); - if (dev == NULL) - goto out; - } - err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num); - -out: - return err; + return packet_do_bind(sk, NULL, sll->sll_ifindex, + sll->sll_protocol ? : pkt_sk(sk)->num); } static struct proto packet_proto = { -- cgit v1.1 From 49a496c97d035f2eab7cef4894dd46202184fc81 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Nov 2015 12:50:19 -0800 Subject: tcp: use correct req pointer in tcp_move_syn() calls I mistakenly took wrong request sock pointer when calling tcp_move_syn() @req_unhash is either a copy of @req, or a NULL value for FastOpen connexions (as we do not expect to unhash the temporary request sock from ehash table) Fixes: 805c4bc05705 ("tcp: fix req->saved_syn race") Signed-off-by: Eric Dumazet Cc: Ying Cai Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 59aff63..950e28c0c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1327,7 +1327,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, goto put_and_exit; *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); if (*own_req) - tcp_move_syn(newtp, req_unhash); + tcp_move_syn(newtp, req); return newsk; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c509e55..5baa8e7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1141,7 +1141,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * } *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); if (*own_req) { - tcp_move_syn(newtp, req_unhash); + tcp_move_syn(newtp, req); /* Clone pktoptions received with SYN, if we own the req */ if (ireq->pktopts) { -- cgit v1.1 From fdd723e2a856b6132d5e7beb2a2d3ec1e6a6297f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Nov 2015 10:54:09 -0800 Subject: netfilter: xt_owner: use skb_to_full_sk() helper SYNACK packets might be attached to a request socket, xt_owner wants to gte the listener in this case. Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/netfilter/xt_owner.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index ca2e577..1302b47 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -33,8 +34,9 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_owner_match_info *info = par->matchinfo; const struct file *filp; + struct sock *sk = skb_to_full_sk(skb); - if (skb->sk == NULL || skb->sk->sk_socket == NULL) + if (sk == NULL || sk->sk_socket == NULL) return (info->match ^ info->invert) == 0; else if (info->match & info->invert & XT_OWNER_SOCKET) /* @@ -43,7 +45,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) */ return false; - filp = skb->sk->sk_socket->file; + filp = sk->sk_socket->file; if (filp == NULL) return ((info->match ^ info->invert) & (XT_OWNER_UID | XT_OWNER_GID)) == 0; -- cgit v1.1 From 743b2a66744635b6d91e3d9da1fff29ad5ceb456 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Nov 2015 10:54:10 -0800 Subject: sched: cls_flow: use skb_to_full_sk() helper SYNACK packets might be attached to request sockets. Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/cls_flow.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 536838b..fbfec6a 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -197,8 +198,11 @@ static u32 flow_get_rtclassid(const struct sk_buff *skb) static u32 flow_get_skuid(const struct sk_buff *skb) { - if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) { - kuid_t skuid = skb->sk->sk_socket->file->f_cred->fsuid; + struct sock *sk = skb_to_full_sk(skb); + + if (sk && sk->sk_socket && sk->sk_socket->file) { + kuid_t skuid = sk->sk_socket->file->f_cred->fsuid; + return from_kuid(&init_user_ns, skuid); } return 0; @@ -206,8 +210,11 @@ static u32 flow_get_skuid(const struct sk_buff *skb) static u32 flow_get_skgid(const struct sk_buff *skb) { - if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) { - kgid_t skgid = skb->sk->sk_socket->file->f_cred->fsgid; + struct sock *sk = skb_to_full_sk(skb); + + if (sk && sk->sk_socket && sk->sk_socket->file) { + kgid_t skgid = sk->sk_socket->file->f_cred->fsgid; + return from_kgid(&init_user_ns, skgid); } return 0; -- cgit v1.1 From 02a56c81cf33dea892da1f8a5231b0f7d7e714fe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Nov 2015 10:54:11 -0800 Subject: net_sched: em_meta: use skb_to_full_sk() helper SYNACK packets might be attached to request sockets. Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/em_meta.c | 138 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 92 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index b5294ce..f2aabc0 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -343,119 +343,145 @@ META_COLLECTOR(int_sk_refcnt) META_COLLECTOR(int_sk_rcvbuf) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_rcvbuf; + dst->value = sk->sk_rcvbuf; } META_COLLECTOR(int_sk_shutdown) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_shutdown; + dst->value = sk->sk_shutdown; } META_COLLECTOR(int_sk_proto) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_protocol; + dst->value = sk->sk_protocol; } META_COLLECTOR(int_sk_type) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_type; + dst->value = sk->sk_type; } META_COLLECTOR(int_sk_rmem_alloc) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = sk_rmem_alloc_get(skb->sk); + dst->value = sk_rmem_alloc_get(sk); } META_COLLECTOR(int_sk_wmem_alloc) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = sk_wmem_alloc_get(skb->sk); + dst->value = sk_wmem_alloc_get(sk); } META_COLLECTOR(int_sk_omem_alloc) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = atomic_read(&skb->sk->sk_omem_alloc); + dst->value = atomic_read(&sk->sk_omem_alloc); } META_COLLECTOR(int_sk_rcv_qlen) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_receive_queue.qlen; + dst->value = sk->sk_receive_queue.qlen; } META_COLLECTOR(int_sk_snd_qlen) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_write_queue.qlen; + dst->value = sk->sk_write_queue.qlen; } META_COLLECTOR(int_sk_wmem_queued) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_wmem_queued; + dst->value = sk->sk_wmem_queued; } META_COLLECTOR(int_sk_fwd_alloc) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_forward_alloc; + dst->value = sk->sk_forward_alloc; } META_COLLECTOR(int_sk_sndbuf) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_sndbuf; + dst->value = sk->sk_sndbuf; } META_COLLECTOR(int_sk_alloc) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = (__force int) skb->sk->sk_allocation; + dst->value = (__force int) sk->sk_allocation; } META_COLLECTOR(int_sk_hash) @@ -469,92 +495,112 @@ META_COLLECTOR(int_sk_hash) META_COLLECTOR(int_sk_lingertime) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_lingertime / HZ; + dst->value = sk->sk_lingertime / HZ; } META_COLLECTOR(int_sk_err_qlen) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_error_queue.qlen; + dst->value = sk->sk_error_queue.qlen; } META_COLLECTOR(int_sk_ack_bl) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_ack_backlog; + dst->value = sk->sk_ack_backlog; } META_COLLECTOR(int_sk_max_ack_bl) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_max_ack_backlog; + dst->value = sk->sk_max_ack_backlog; } META_COLLECTOR(int_sk_prio) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_priority; + dst->value = sk->sk_priority; } META_COLLECTOR(int_sk_rcvlowat) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_rcvlowat; + dst->value = sk->sk_rcvlowat; } META_COLLECTOR(int_sk_rcvtimeo) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_rcvtimeo / HZ; + dst->value = sk->sk_rcvtimeo / HZ; } META_COLLECTOR(int_sk_sndtimeo) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_sndtimeo / HZ; + dst->value = sk->sk_sndtimeo / HZ; } META_COLLECTOR(int_sk_sendmsg_off) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_frag.offset; + dst->value = sk->sk_frag.offset; } META_COLLECTOR(int_sk_write_pend) { - if (skip_nonlocal(skb)) { + const struct sock *sk = skb_to_full_sk(skb); + + if (!sk) { *err = -1; return; } - dst->value = skb->sk->sk_write_pending; + dst->value = sk->sk_write_pending; } /************************************************************************** -- cgit v1.1 From 3aed822591556f93169ff532fda8c71b9b596de5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Nov 2015 10:54:12 -0800 Subject: netfilter: nft_meta: use skb_to_full_sk() helper SYNACK packets might be attached to request sockets. Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/netfilter/nft_meta.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index e4ad2c2..9dfaf4d 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -31,6 +31,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; const struct net_device *in = pkt->in, *out = pkt->out; + struct sock *sk; u32 *dest = ®s->data[priv->dreg]; switch (priv->key) { @@ -86,33 +87,35 @@ void nft_meta_get_eval(const struct nft_expr *expr, *(u16 *)dest = out->type; break; case NFT_META_SKUID: - if (skb->sk == NULL || !sk_fullsock(skb->sk)) + sk = skb_to_full_sk(skb); + if (!sk || !sk_fullsock(sk)) goto err; - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket == NULL || - skb->sk->sk_socket->file == NULL) { - read_unlock_bh(&skb->sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); + if (sk->sk_socket == NULL || + sk->sk_socket->file == NULL) { + read_unlock_bh(&sk->sk_callback_lock); goto err; } *dest = from_kuid_munged(&init_user_ns, - skb->sk->sk_socket->file->f_cred->fsuid); - read_unlock_bh(&skb->sk->sk_callback_lock); + sk->sk_socket->file->f_cred->fsuid); + read_unlock_bh(&sk->sk_callback_lock); break; case NFT_META_SKGID: - if (skb->sk == NULL || !sk_fullsock(skb->sk)) + sk = skb_to_full_sk(skb); + if (!sk || !sk_fullsock(sk)) goto err; - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket == NULL || - skb->sk->sk_socket->file == NULL) { - read_unlock_bh(&skb->sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); + if (sk->sk_socket == NULL || + sk->sk_socket->file == NULL) { + read_unlock_bh(&sk->sk_callback_lock); goto err; } *dest = from_kgid_munged(&init_user_ns, - skb->sk->sk_socket->file->f_cred->fsgid); - read_unlock_bh(&skb->sk->sk_callback_lock); + sk->sk_socket->file->f_cred->fsgid); + read_unlock_bh(&sk->sk_callback_lock); break; #ifdef CONFIG_IP_ROUTE_CLASSID case NFT_META_RTCLASSID: { @@ -168,9 +171,10 @@ void nft_meta_get_eval(const struct nft_expr *expr, break; #ifdef CONFIG_CGROUP_NET_CLASSID case NFT_META_CGROUP: - if (skb->sk == NULL || !sk_fullsock(skb->sk)) + sk = skb_to_full_sk(skb); + if (!sk || !sk_fullsock(sk)) goto err; - *dest = skb->sk->sk_classid; + *dest = sk->sk_classid; break; #endif default: -- cgit v1.1 From d69bbf88c8d0b367cf3e3a052f6daadf630ee566 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Nov 2015 17:51:23 -0800 Subject: net: fix a race in dst_release() Only cpu seeing dst refcount going to 0 can safely dereference dst->flags. Otherwise an other cpu might already have freed the dst. Fixes: 27b75c95f10d ("net: avoid RCU for NOCACHE dst") Reported-by: Greg Thelen Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dst.c b/net/core/dst.c index 2a18180..e6dc772 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -306,7 +306,7 @@ void dst_release(struct dst_entry *dst) if (unlikely(newrefcnt < 0)) net_warn_ratelimited("%s: dst:%p refcnt:%d\n", __func__, dst, newrefcnt); - if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) + if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE)) call_rcu(&dst->rcu_head, dst_destroy_rcu); } } -- cgit v1.1 From 8a921265e2cd31e61a0c2eda582af54c5bfef897 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 10 Nov 2015 06:15:32 -0500 Subject: Revert "bridge: Allow forward delay to be cfgd when STP enabled" This reverts commit 34c2d9fb0498c066afbe610b15e18995fd8be792. There are 2 reasons for this revert: 1) The commit in question doesn't do what it says it does. The description reads: "Allow bridge forward delay to be configured when Spanning Tree is enabled." This was already the case before the commit was made. What the commit actually do was disallow invalid values or 'forward_delay' when STP was turned off. 2) The above change was actually a change in the user observed behavior and broke things like libvirt and other network configs that set 'forward_delay' to 0 without enabling STP. The value of 0 is actually used when STP is turned off to immediately mark the bridge as forwarding. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/bridge/br_stp.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 80c34d7..f7e8dee 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -600,12 +600,17 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned long t) int br_set_forward_delay(struct net_bridge *br, unsigned long val) { unsigned long t = clock_t_to_jiffies(val); - - if (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY) - return -ERANGE; + int err = -ERANGE; spin_lock_bh(&br->lock); + if (br->stp_enabled != BR_NO_STP && + (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY)) + goto unlock; + __br_set_forward_delay(br, t); + err = 0; + +unlock: spin_unlock_bh(&br->lock); - return 0; + return err; } -- cgit v1.1