From 7c258670ce655659a4c8d1013676c55e74d09ee7 Mon Sep 17 00:00:00 2001 From: Tedd Ho-Jeong An Date: Thu, 25 Jun 2015 23:27:55 -0700 Subject: Bluetooth: hidp: Initialize list header of hidp session user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When new hidp session is created, list header in l2cap_user is not initialized and this causes list_empty() to fail in l2cap_register_user() even if l2cap_user list is empty. Signed-off-by: Tedd Ho-Jeong An Tested-by: Jörg Otte Signed-off-by: Marcel Holtmann --- net/bluetooth/hidp/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 9070dfd..f1a117f 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -915,6 +915,7 @@ static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr, session->conn = l2cap_conn_get(conn); session->user.probe = hidp_session_probe; session->user.remove = hidp_session_remove; + INIT_LIST_HEAD(&session->user.list); session->ctrl_sock = ctrl_sock; session->intr_sock = intr_sock; skb_queue_head_init(&session->ctrl_transmit); -- cgit v1.1 From ab944c83f6690df0c7f67e6bcc29fc0c82ef6021 Mon Sep 17 00:00:00 2001 From: Tedd Ho-Jeong An Date: Tue, 30 Jun 2015 11:43:40 -0700 Subject: Bluetooth: Reinitialize the list after deletion for session user list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the user->list is deleted with list_del(), it doesn't initialize the entry which can cause the issue with list_empty(). According to the comment from the list.h, list_empty() returns false even if the list is empty and put the entry in an undefined state. /** * list_del - deletes entry from list. * @entry: the element to delete from the list. * Note: list_empty() on entry does not return true after this, the entry is * in an undefined state. */ Because of this behavior, list_empty() returns false even if list is empty when the device is reconnected. So, user->list needs to be re-initialized after list_del(). list.h already have a macro list_del_init() which deletes the entry and initailze it again. Signed-off-by: Tedd Ho-Jeong An Tested-by: Jörg Otte Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 51594fb..45fffa4 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1634,7 +1634,7 @@ void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user) if (list_empty(&user->list)) goto out_unlock; - list_del(&user->list); + list_del_init(&user->list); user->remove(conn, user); out_unlock: @@ -1648,7 +1648,7 @@ static void l2cap_unregister_all_users(struct l2cap_conn *conn) while (!list_empty(&conn->users)) { user = list_first_entry(&conn->users, struct l2cap_user, list); - list_del(&user->list); + list_del_init(&user->list); user->remove(conn, user); } } -- cgit v1.1 From f307170d6e591a48529425b1ed6ca835790995a9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 19 Jun 2015 17:23:37 -0500 Subject: netfilter: nf_queue: Don't recompute the hook_list head If someone sends packets from one of the netdevice ingress hooks to the a userspace queue, and then userspace later accepts the packet, the netfilter code can enter an infinite loop as the list head will never be found. Pass in the saved list_head to avoid this. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index cd60d39..8a8b2ab 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -213,7 +213,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) if (verdict == NF_ACCEPT) { next_hook: - verdict = nf_iterate(&nf_hooks[entry->state.pf][entry->state.hook], + verdict = nf_iterate(entry->state.hook_list, skb, &entry->state, &elem); } -- cgit v1.1 From a1bc1b356a9d21bf29bc7c873718b5cacdf119b4 Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 20 Jun 2015 00:17:50 +0200 Subject: netfilter: bridge: fix CONFIG_NF_DEFRAG_IPV4/6 related warnings/errors br_nf_ip_fragment() is not needed when neither CONFIG_NF_DEFRAG_IPV4 nor CONFIG_NF_DEFRAG_IPV6 is set. struct brnf_frag_data must be available if either CONFIG_NF_DEFRAG_IPV4 or CONFIG_NF_DEFRAG_IPV6 is set. Fixes: efb6de9b4ba0 ("netfilter: bridge: forward IPv6 fragmented packets") Reported-by: kbuild test robot Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index d89f4fa..8a394bd 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -111,7 +111,7 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) /* largest possible L2 header, see br_nf_dev_queue_xmit() */ #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct brnf_frag_data { char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; u8 encap_size; @@ -694,6 +694,7 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) } #endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, int (*output)(struct sock *, struct sk_buff *)) { @@ -712,6 +713,7 @@ static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, return ip_do_fragment(sk, skb, output); } +#endif static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { -- cgit v1.1 From 3bd229976f64bea64c60803f9fc8d9f0059ba2f2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Jun 2015 22:21:00 +0200 Subject: netfilter: arptables: use percpu jumpstack commit 482cfc318559 ("netfilter: xtables: avoid percpu ruleset duplication") Unlike ip and ip6tables, arp tables were never converted to use the percpu jump stack. It still uses the rule blob to store return address, which isn't safe anymore since we now share this blob among all processors. Because there is no TEE support for arptables, we don't need to cope with reentrancy, so we can use loocal variable to hold stack offset. Fixes: 482cfc318559 ("netfilter: xtables: avoid percpu ruleset duplication") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 95c9b6e..92305a1 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -254,9 +254,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; const struct arphdr *arp; - struct arpt_entry *e, *back; + struct arpt_entry *e, **jumpstack; const char *indev, *outdev; const void *table_base; + unsigned int cpu, stackidx = 0; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; @@ -270,15 +271,16 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; + cpu = smp_processor_id(); /* * Ensure we load private-> members after we've fetched the base * pointer. */ smp_read_barrier_depends(); table_base = private->entries; + jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; e = get_entry(table_base, private->hook_entry[hook]); - back = get_entry(table_base, private->underflow[hook]); acpar.in = state->in; acpar.out = state->out; @@ -312,18 +314,23 @@ unsigned int arpt_do_table(struct sk_buff *skb, verdict = (unsigned int)(-v) - 1; break; } - e = back; - back = get_entry(table_base, back->comefrom); + if (stackidx == 0) { + e = get_entry(table_base, + private->underflow[hook]); + } else { + e = jumpstack[--stackidx]; + e = arpt_next_entry(e); + } continue; } if (table_base + v != arpt_next_entry(e)) { - /* Save old back ptr in next entry */ - struct arpt_entry *next = arpt_next_entry(e); - next->comefrom = (void *)back - table_base; - /* set back pointer to next entry */ - back = next; + if (stackidx >= private->stacksize) { + verdict = NF_DROP; + break; + } + jumpstack[stackidx++] = e; } e = get_entry(table_base, v); -- cgit v1.1 From dd302b59bde0149c20df7278c0d36c765e66afbd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Jun 2015 22:27:51 +0200 Subject: netfilter: bridge: don't leak skb in error paths br_nf_dev_queue_xmit must free skb in its error path. NF_DROP is misleading -- its an okfn, not a netfilter hook. Fixes: 462fb2af9788a ("bridge : Sanitize skb before it enters the IP stack") Fixes: efb6de9b4ba00 ("netfilter: bridge: forward IPv6 fragmented packets") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 8a394bd..c8b9bcf 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -744,7 +744,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) struct brnf_frag_data *data; if (br_validate_ipv4(skb)) - return NF_DROP; + goto drop; IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; @@ -769,7 +769,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) struct brnf_frag_data *data; if (br_validate_ipv6(skb)) - return NF_DROP; + goto drop; IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; @@ -784,12 +784,16 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) if (v6ops) return v6ops->fragment(sk, skb, br_nf_push_frag_xmit); - else - return -EMSGSIZE; + + kfree_skb(skb); + return -EMSGSIZE; } #endif nf_bridge_info_free(skb); return br_dev_queue_push_xmit(sk, skb); + drop: + kfree_skb(skb); + return 0; } /* PF_BRIDGE/POST_ROUTING ********************************************/ -- cgit v1.1 From 6742b9e310bcf511b876532846e5302b07b7fedc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 1 Jul 2015 16:14:25 +0200 Subject: netfilter: nfnetlink: keep going batch handling on missing modules After a fresh boot with no modules in place at all and a large rulesets, the existing nfnetlink_rcv_batch() funcion can take long time to commit the ruleset due to the many abort path. This is specifically a problem for the existing client of this code, ie. nf_tables, since it results in several synchronize_rcu() call in a row. This patch changes the policy to keep full batch processing on missing modules errors so we abort only once. Reported-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 8b117c9..0c0e8ec 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -269,6 +269,12 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) } } +enum { + NFNL_BATCH_FAILURE = (1 << 0), + NFNL_BATCH_DONE = (1 << 1), + NFNL_BATCH_REPLAY = (1 << 2), +}; + static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, u_int16_t subsys_id) { @@ -276,13 +282,15 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); const struct nfnetlink_subsystem *ss; const struct nfnl_callback *nc; - bool success = true, done = false; static LIST_HEAD(err_list); + u32 status; int err; if (subsys_id >= NFNL_SUBSYS_COUNT) return netlink_ack(skb, nlh, -EINVAL); replay: + status = 0; + skb = netlink_skb_clone(oskb, GFP_KERNEL); if (!skb) return netlink_ack(oskb, nlh, -ENOMEM); @@ -336,10 +344,10 @@ replay: if (type == NFNL_MSG_BATCH_BEGIN) { /* Malformed: Batch begin twice */ nfnl_err_reset(&err_list); - success = false; + status |= NFNL_BATCH_FAILURE; goto done; } else if (type == NFNL_MSG_BATCH_END) { - done = true; + status |= NFNL_BATCH_DONE; goto done; } else if (type < NLMSG_MIN_TYPE) { err = -EINVAL; @@ -382,11 +390,8 @@ replay: * original skb. */ if (err == -EAGAIN) { - nfnl_err_reset(&err_list); - ss->abort(oskb); - nfnl_unlock(subsys_id); - kfree_skb(skb); - goto replay; + status |= NFNL_BATCH_REPLAY; + goto next; } } ack: @@ -402,7 +407,7 @@ ack: */ nfnl_err_reset(&err_list); netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM); - success = false; + status |= NFNL_BATCH_FAILURE; goto done; } /* We don't stop processing the batch on errors, thus, @@ -410,19 +415,26 @@ ack: * triggers. */ if (err) - success = false; + status |= NFNL_BATCH_FAILURE; } - +next: msglen = NLMSG_ALIGN(nlh->nlmsg_len); if (msglen > skb->len) msglen = skb->len; skb_pull(skb, msglen); } done: - if (success && done) + if (status & NFNL_BATCH_REPLAY) { + ss->abort(oskb); + nfnl_err_reset(&err_list); + nfnl_unlock(subsys_id); + kfree_skb(skb); + goto replay; + } else if (status == NFNL_BATCH_DONE) { ss->commit(oskb); - else + } else { ss->abort(oskb); + } nfnl_err_deliver(&err_list, oskb); nfnl_unlock(subsys_id); -- cgit v1.1 From 462e1ead9296a8452499fb10cf3b51903ffe24ac Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 2 Jul 2015 05:48:17 -0700 Subject: bridge: vlan: fix usage of vlan 0 and 4095 again Vlan ids 0 and 4095 were disallowed by commit: 8adff41c3d25 ("bridge: Don't use VID 0 and 4095 in vlan filtering") but then the check was removed when vlan ranges were introduced by: bdced7ef7838 ("bridge: support for multiple vlans and vlan ranges in setlink and dellink requests") So reintroduce the vlan range check. Before patch: [root@testvm ~]# bridge vlan add vid 0 dev eth0 master (succeeds) After Patch: [root@testvm ~]# bridge vlan add vid 0 dev eth0 master RTNETLINK answers: Invalid argument Signed-off-by: Nikolay Aleksandrov Fixes: bdced7ef7838 ("bridge: support for multiple vlans and vlan ranges in setlink and dellink requests") Acked-by: Toshiaki Makita Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 6b67ed3..364bdc9 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -457,6 +457,8 @@ static int br_afspec(struct net_bridge *br, if (nla_len(attr) != sizeof(struct bridge_vlan_info)) return -EINVAL; vinfo = nla_data(attr); + if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) + return -EINVAL; if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { if (vinfo_start) return -EINVAL; -- cgit v1.1 From 87775312a86bcf213e3b21f6f7c79e2e00d96f7b Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 2 Jul 2015 16:30:24 +0200 Subject: net-ipv6: Delete an unnecessary check before the function call "free_percpu" The free_percpu() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- net/ipv6/route.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1a1122a..6090969 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -369,10 +369,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) struct inet6_dev *idev; dst_destroy_metrics_generic(dst); - - if (rt->rt6i_pcpu) - free_percpu(rt->rt6i_pcpu); - + free_percpu(rt->rt6i_pcpu); rt6_uncached_list_del(rt); idev = rt->rt6i_idev; -- cgit v1.1 From f6e1c91666990d06ba37c76129495c724202144d Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 2 Jul 2015 17:58:21 +0200 Subject: net-RDS: Delete an unnecessary check before the function call "module_put" The module_put() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- net/rds/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rds/transport.c b/net/rds/transport.c index 8b4a6cd..83498e1 100644 --- a/net/rds/transport.c +++ b/net/rds/transport.c @@ -73,7 +73,7 @@ EXPORT_SYMBOL_GPL(rds_trans_unregister); void rds_trans_put(struct rds_transport *trans) { - if (trans && trans->t_owner) + if (trans) module_put(trans->t_owner); } -- cgit v1.1 From 92b80eb33c52583b48ed289bd993579b87b52d9a Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 2 Jul 2015 18:38:12 +0200 Subject: netlink: Delete an unnecessary check before the function call "module_put" The module_put() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index dea9253..9a0ae71 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -158,7 +158,7 @@ static int __netlink_remove_tap(struct netlink_tap *nt) out: spin_unlock(&netlink_tap_lock); - if (found && nt->module) + if (found) module_put(nt->module); return found ? 0 : -ENODEV; -- cgit v1.1 From 4c938d22c88a9ddccc8c55a85e0430e9c62b1ac5 Mon Sep 17 00:00:00 2001 From: Angga Date: Fri, 3 Jul 2015 14:40:52 +1200 Subject: ipv6: Make MLD packets to only be processed locally Before commit daad151263cf ("ipv6: Make ipv6_is_mld() inline and use it from ip6_mc_input().") MLD packets were only processed locally. After the change, a copy of MLD packet goes through ip6_mr_input, causing MRT6MSG_NOCACHE message to be generated to user space. Make MLD packet only processed locally. Fixes: daad151263cf ("ipv6: Make ipv6_is_mld() inline and use it from ip6_mc_input().") Signed-off-by: Hermin Anggawijaya Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index f2e464e..57990c9 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -331,10 +331,10 @@ int ip6_mc_input(struct sk_buff *skb) if (offset < 0) goto out; - if (!ipv6_is_mld(skb, nexthdr, offset)) - goto out; + if (ipv6_is_mld(skb, nexthdr, offset)) + deliver = true; - deliver = true; + goto out; } /* unknown RA - process it normally */ } -- cgit v1.1 From 86e8971800381c3a8d8d9327f83b1f97ccb04a4f Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Tue, 7 Jul 2015 15:55:21 +0100 Subject: netfilter: bridge: Use __in6_dev_get rather than in6_dev_get in br_validate_ipv6 The commit efb6de9b4ba0092b2c55f6a52d16294a8a698edd "netfilter: bridge: forward IPv6 fragmented packets" introduced a new function br_validate_ipv6 which take a reference on the inet6 device. Although, the reference is not released at the end. This will result to the impossibility to destroy any netdevice using ipv6 and bridge. It's possible to directly retrieve the inet6 device without taking a reference as all netfilter hooks are protected by rcu_read_lock via nf_hook_slow. Spotted while trying to destroy a Xen guest on the upstream Linux: "unregister_netdevice: waiting for vif1.0 to become free. Usage count = 1" Signed-off-by: Julien Grall Cc: Bernhard Thaler Cc: Pablo Neira Ayuso Cc: fw@strlen.de Cc: ian.campbell@citrix.com Cc: wei.liu2@citrix.com Cc: Bob Liu Acked-by: Stephen Hemminger Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 6d12d26..13b7d1e 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -104,7 +104,7 @@ int br_validate_ipv6(struct sk_buff *skb) { const struct ipv6hdr *hdr; struct net_device *dev = skb->dev; - struct inet6_dev *idev = in6_dev_get(skb->dev); + struct inet6_dev *idev = __in6_dev_get(skb->dev); u32 pkt_len; u8 ip6h_len = sizeof(struct ipv6hdr); -- cgit v1.1 From 32f675bbc9be14a40d972820e190ac56341ce198 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Jul 2015 15:57:19 +0200 Subject: net_sched: gen_estimator: extend pps limit rate estimators are limited to 4 Mpps, which was fine years ago, but too small with current hardware generation. Lets use 2^5 scaling instead of 2^10 to get 128 Mpps new limit. On 64bit arch, use an "unsigned long" for temp storage and remove limit. (We do not expect 32bit arches to be able to reach this point) Tested: tc -s -d filter sh dev eth0 parent ffff: filter protocol ip pref 1 u32 filter protocol ip pref 1 u32 fh 800: ht divisor 1 filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15 match 07000000/ff000000 at 12 action order 1: gact action drop random type none pass val 0 index 1 ref 1 bind 1 installed 166 sec Action statistics: Sent 39734251496 bytes 863788076 pkt (dropped 863788117, overlimits 0 requeues 0) rate 4067Mbit 11053596pps backlog 0b 0p requeues 0 Signed-off-by: Eric Dumazet Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/gen_estimator.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 9dfb88a..92d886f 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -66,7 +66,7 @@ NOTES. - * avbps is scaled by 2^5, avpps is scaled by 2^10. + * avbps and avpps are scaled by 2^5. * both values are reported as 32 bit unsigned values. bps can overflow for fast links : max speed being 34360Mbit/sec * Minimal interval is HZ/4=250msec (it is the greatest common divisor @@ -85,10 +85,10 @@ struct gen_estimator struct gnet_stats_rate_est64 *rate_est; spinlock_t *stats_lock; int ewma_log; + u32 last_packets; + unsigned long avpps; u64 last_bytes; u64 avbps; - u32 last_packets; - u32 avpps; struct rcu_head e_rcu; struct rb_node node; struct gnet_stats_basic_cpu __percpu *cpu_bstats; @@ -118,8 +118,8 @@ static void est_timer(unsigned long arg) rcu_read_lock(); list_for_each_entry_rcu(e, &elist[idx].list, list) { struct gnet_stats_basic_packed b = {0}; + unsigned long rate; u64 brate; - u32 rate; spin_lock(e->stats_lock); read_lock(&est_lock); @@ -133,10 +133,11 @@ static void est_timer(unsigned long arg) e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log); e->rate_est->bps = (e->avbps+0xF)>>5; - rate = (b.packets - e->last_packets)<<(12 - idx); + rate = b.packets - e->last_packets; + rate <<= (7 - idx); e->last_packets = b.packets; e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log); - e->rate_est->pps = (e->avpps+0x1FF)>>10; + e->rate_est->pps = (e->avpps + 0xF) >> 5; skip: read_unlock(&est_lock); spin_unlock(e->stats_lock); -- cgit v1.1 From f7e2965db17dd3b60f05fad88e7afc79ea75b48f Mon Sep 17 00:00:00 2001 From: Satish Ashok Date: Mon, 6 Jul 2015 05:53:35 -0700 Subject: bridge: mdb: start delete timer for temp static entries Start the delete timer when adding temp static entries so they can expire. Signed-off-by: Satish Ashok Signed-off-by: Nikolay Aleksandrov Fixes: ccb1c31a7a87 ("bridge: add flags to distinguish permanent mdb entires") Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index e29ad70..3bfc675 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -323,6 +323,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; struct net_bridge_mdb_htable *mdb; + unsigned long now = jiffies; int err; mdb = mlock_dereference(br->mdb, br); @@ -347,6 +348,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, if (unlikely(!p)) return -ENOMEM; rcu_assign_pointer(*pp, p); + if (state == MDB_TEMPORARY) + mod_timer(&p->timer, now + br->multicast_membership_interval); br_mdb_notify(br->dev, port, group, RTM_NEWMDB); return 0; -- cgit v1.1 From d339727c2b1a10f25e6636670ab6e1841170e328 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jul 2015 17:13:26 +0200 Subject: net: graceful exit from netif_alloc_netdev_queues() User space can crash kernel with ip link add ifb10 numtxqueues 100000 type ifb We must replace a BUG_ON() by proper test and return -EINVAL for crazy values. Fixes: 60877a32bce00 ("net: allow large number of tx queues") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 6778a99..0ad6262 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6409,7 +6409,8 @@ static int netif_alloc_netdev_queues(struct net_device *dev) struct netdev_queue *tx; size_t sz = count * sizeof(*tx); - BUG_ON(count < 1 || count > 0xffff); + if (count < 1 || count > 0xffff) + return -EINVAL; tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); if (!tx) { -- cgit v1.1 From 95ec655bc465ccb2a3329d4aff9a45e3c8188db5 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 6 Jul 2015 17:25:10 +0200 Subject: Revert "dev: set iflink to 0 for virtual interfaces" This reverts commit e1622baf54df8cc958bf29d71de5ad545ea7d93c. The side effect of this commit is to add a '@NONE' after each virtual interface name with a 'ip link'. It may break existing scripts. Reported-by: Olivier Hartkopp Signed-off-by: Nicolas Dichtel Tested-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/core/dev.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 0ad6262..1e57efd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -677,10 +677,6 @@ int dev_get_iflink(const struct net_device *dev) if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) return dev->netdev_ops->ndo_get_iflink(dev); - /* If dev->rtnl_link_ops is set, it's a virtual interface. */ - if (dev->rtnl_link_ops) - return 0; - return dev->ifindex; } EXPORT_SYMBOL(dev_get_iflink); -- cgit v1.1 From 4f7d2cdfdde71ffe962399b7020c674050329423 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 7 Jul 2015 00:07:52 +0200 Subject: rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes anymore with respect to their policy, that is, ifla_vfinfo_policy[]. Before, they were part of ifla_policy[], but they have been nested since placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO, which is another nested attribute for the actual VF attributes such as IFLA_VF_MAC, IFLA_VF_VLAN, etc. Despite the policy being split out from ifla_policy[] in this commit, it's never applied anywhere. nla_for_each_nested() only does basic nla_ok() testing for struct nlattr, but it doesn't know about the data context and their requirements. Fix, on top of Jason's initial work, does 1) parsing of the attributes with the right policy, and 2) using the resulting parsed attribute table from 1) instead of the nla_for_each_nested() loop (just like we used to do when still part of ifla_policy[]). Reference: http://thread.gmane.org/gmane.linux.network/368913 Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric") Reported-by: Jason Gunthorpe Cc: Chris Wright Cc: Sucheta Chakraborty Cc: Greg Rose Cc: Jeff Kirsher Cc: Rony Efraim Cc: Vlad Zolotarov Cc: Nicolas Dichtel Cc: Thomas Graf Signed-off-by: Jason Gunthorpe Signed-off-by: Daniel Borkmann Acked-by: Vlad Zolotarov Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 187 ++++++++++++++++++++++++++------------------------- 1 file changed, 96 insertions(+), 91 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 01ced4a..9e433d5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1328,10 +1328,6 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED }, }; -static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { - [IFLA_VF_INFO] = { .type = NLA_NESTED }, -}; - static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, @@ -1488,96 +1484,98 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return 0; } -static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) +static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) { - int rem, err = -EINVAL; - struct nlattr *vf; const struct net_device_ops *ops = dev->netdev_ops; + int err = -EINVAL; - nla_for_each_nested(vf, attr, rem) { - switch (nla_type(vf)) { - case IFLA_VF_MAC: { - struct ifla_vf_mac *ivm; - ivm = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_mac) - err = ops->ndo_set_vf_mac(dev, ivm->vf, - ivm->mac); - break; - } - case IFLA_VF_VLAN: { - struct ifla_vf_vlan *ivv; - ivv = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_vlan) - err = ops->ndo_set_vf_vlan(dev, ivv->vf, - ivv->vlan, - ivv->qos); - break; - } - case IFLA_VF_TX_RATE: { - struct ifla_vf_tx_rate *ivt; - struct ifla_vf_info ivf; - ivt = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_get_vf_config) - err = ops->ndo_get_vf_config(dev, ivt->vf, - &ivf); - if (err) - break; - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivf.min_tx_rate, - ivt->rate); - break; - } - case IFLA_VF_RATE: { - struct ifla_vf_rate *ivt; - ivt = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivt->min_tx_rate, - ivt->max_tx_rate); - break; - } - case IFLA_VF_SPOOFCHK: { - struct ifla_vf_spoofchk *ivs; - ivs = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_spoofchk) - err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, - ivs->setting); - break; - } - case IFLA_VF_LINK_STATE: { - struct ifla_vf_link_state *ivl; - ivl = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_link_state) - err = ops->ndo_set_vf_link_state(dev, ivl->vf, - ivl->link_state); - break; - } - case IFLA_VF_RSS_QUERY_EN: { - struct ifla_vf_rss_query_en *ivrssq_en; + if (tb[IFLA_VF_MAC]) { + struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]); - ivrssq_en = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rss_query_en) - err = ops->ndo_set_vf_rss_query_en(dev, - ivrssq_en->vf, - ivrssq_en->setting); - break; - } - default: - err = -EINVAL; - break; - } - if (err) - break; + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_mac) + err = ops->ndo_set_vf_mac(dev, ivm->vf, + ivm->mac); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_VLAN]) { + struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_vlan) + err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, + ivv->qos); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_TX_RATE]) { + struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]); + struct ifla_vf_info ivf; + + err = -EOPNOTSUPP; + if (ops->ndo_get_vf_config) + err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf); + if (err < 0) + return err; + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivf.min_tx_rate, + ivt->rate); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_RATE]) { + struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivt->min_tx_rate, + ivt->max_tx_rate); + if (err < 0) + return err; } + + if (tb[IFLA_VF_SPOOFCHK]) { + struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_spoofchk) + err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, + ivs->setting); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_LINK_STATE]) { + struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_link_state) + err = ops->ndo_set_vf_link_state(dev, ivl->vf, + ivl->link_state); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_RSS_QUERY_EN]) { + struct ifla_vf_rss_query_en *ivrssq_en; + + err = -EOPNOTSUPP; + ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]); + if (ops->ndo_set_vf_rss_query_en) + err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf, + ivrssq_en->setting); + if (err < 0) + return err; + } + return err; } @@ -1773,14 +1771,21 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_VFINFO_LIST]) { + struct nlattr *vfinfo[IFLA_VF_MAX + 1]; struct nlattr *attr; int rem; + nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { - if (nla_type(attr) != IFLA_VF_INFO) { + if (nla_type(attr) != IFLA_VF_INFO || + nla_len(attr) < NLA_HDRLEN) { err = -EINVAL; goto errout; } - err = do_setvfinfo(dev, attr); + err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr, + ifla_vf_policy); + if (err < 0) + goto errout; + err = do_setvfinfo(dev, vfinfo); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; -- cgit v1.1 From fc24f2b2094366da8786f59f2606307e934cea17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Tue, 7 Jul 2015 08:34:13 +0300 Subject: ip_tunnel: fix ipv4 pmtu check to honor inner ip header df MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Frag needed should be sent only if the inner header asked to not fragment. Currently fragmentation is broken if the tunnel has df set, but df was not asked in the original packet. The tunnel's df needs to be still checked to update internally the pmtu cache. Commit 23a3647bc4f93bac broke it, and this commit fixes the ipv4 df check back to the way it was. Fixes: 23a3647bc4f93bac ("ip_tunnels: Use skb-len to PMTU check.") Cc: Pravin B Shelar Signed-off-by: Timo Teräs Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 4c2c3ba..626d9e5 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -586,7 +586,8 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, EXPORT_SYMBOL(ip_tunnel_encap); static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, - struct rtable *rt, __be16 df) + struct rtable *rt, __be16 df, + const struct iphdr *inner_iph) { struct ip_tunnel *tunnel = netdev_priv(dev); int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; @@ -603,7 +604,8 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && - (df & htons(IP_DF)) && mtu < pkt_size) { + (inner_iph->frag_off & htons(IP_DF)) && + mtu < pkt_size) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); return -E2BIG; @@ -737,7 +739,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { ip_rt_put(rt); goto tx_error; } -- cgit v1.1 From fdd75ea8df370f206a8163786e7470c1277a5064 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 7 Jul 2015 09:43:45 -0400 Subject: net/tipc: initialize security state for new connection socket Calling connect() with an AF_TIPC socket would trigger a series of error messages from SELinux along the lines of: SELinux: Invalid class 0 type=AVC msg=audit(1434126658.487:34500): avc: denied { } for pid=292 comm="kworker/u16:5" scontext=system_u:system_r:kernel_t:s0 tcontext=system_u:object_r:unlabeled_t:s0 tclass= permissive=0 This was due to a failure to initialize the security state of the new connection sock by the tipc code, leaving it with junk in the security class field and an unlabeled secid. Add a call to security_sk_clone() to inherit the security state from the parent socket. Reported-by: Tim Shearer Signed-off-by: Stephen Smalley Acked-by: Paul Moore Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 46b6ed5..3a7567f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2007,6 +2007,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); if (res) goto exit; + security_sk_clone(sock->sk, new_sock->sk); new_sk = new_sock->sk; new_tsock = tipc_sk(new_sk); -- cgit v1.1 From f1158b74e54f2e2462ba5e2f45a118246d9d5b43 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 7 Jul 2015 15:55:56 +0200 Subject: bridge: mdb: zero out the local br_ip variable before use Since commit b0e9a30dd669 ("bridge: Add vlan id to multicast groups") there's a check in br_ip_equal() for a matching vlan id, but the mdb functions were not modified to use (or at least zero it) so when an entry was added it would have a garbage vlan id (from the local br_ip variable in __br_mdb_add/del) and this would prevent it from being matched and also deleted. So zero out the whole local ip var to protect ourselves from future changes and also to fix the current bug, since there's no vlan id support in the mdb uapi - use always vlan id 0. Example before patch: root@debian:~# bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb del dev br0 port eth1 grp 239.0.0.1 permanent RTNETLINK answers: Invalid argument After patch: root@debian:~# bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb del dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb Signed-off-by: Nikolay Aleksandrov Fixes: b0e9a30dd669 ("bridge: Add vlan id to multicast groups") Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 3bfc675..60868c2 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -374,6 +374,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, if (!p || p->br != br || p->state == BR_STATE_DISABLED) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) ip.u.ip4 = entry->addr.u.ip4; @@ -420,6 +421,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (!netif_running(br->dev) || br->multicast_disabled) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) { if (timer_pending(&br->ip4_other_query.timer)) -- cgit v1.1 From 974d7af5fcc295dcf8315255142b2fe44fd74b0c Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Tue, 7 Jul 2015 13:56:57 -0400 Subject: ipv4: add support for linkdown sysctl to netconf This kernel patch exports the value of the new ignore_routes_with_linkdown via netconf. v2: changes to notify userspace via netlink when sysctl values change and proposed for 'net' since this could be considered a bugfix Signed-off-by: Andy Gospodarek Suggested-by: Nicolas Dichtel Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7498716..e813196 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1740,6 +1740,8 @@ static int inet_netconf_msgsize_devconf(int type) size += nla_total_size(4); if (type == -1 || type == NETCONFA_PROXY_NEIGH) size += nla_total_size(4); + if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) + size += nla_total_size(4); return size; } @@ -1780,6 +1782,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nla_put_s32(skb, NETCONFA_PROXY_NEIGH, IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) goto nla_put_failure; + if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && + nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -1819,6 +1825,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { [NETCONFA_FORWARDING] = { .len = sizeof(int) }, [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, + [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, }; static int inet_netconf_get_devconf(struct sk_buff *in_skb, @@ -2048,6 +2055,12 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write, inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, ifindex, cnf); } + if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 && + new_value != old_value) { + ifindex = devinet_conf_ifindex(net, cnf); + inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + ifindex, cnf); + } } return ret; -- cgit v1.1 From fecdf8be2d91e04b0a9a4f79ff06499a36f5d14f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Jul 2015 21:42:11 +0200 Subject: net: pktgen: fix race between pktgen_thread_worker() and kthread_stop() pktgen_thread_worker() is obviously racy, kthread_stop() can come between the kthread_should_stop() check and set_current_state(). Signed-off-by: Oleg Nesterov Reported-by: Jan Stancek Reported-by: Marcelo Leitner Signed-off-by: David S. Miller --- net/core/pktgen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 05badbb..41489e3 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3572,8 +3572,10 @@ static int pktgen_thread_worker(void *arg) pktgen_rem_thread(t); /* Wait for kthread_stop */ - while (!kthread_should_stop()) { + for (;;) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; schedule(); } __set_current_state(TASK_RUNNING); -- cgit v1.1 From 1fbe4b46caca5b01b070af93d513031ffbcc480c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Jul 2015 21:42:13 +0200 Subject: net: pktgen: kill the "Wait for kthread_stop" code in pktgen_thread_worker() pktgen_thread_worker() doesn't need to wait for kthread_stop(), it can simply exit. Just pktgen_create_thread() and pg_net_exit() should do get_task_struct()/put_task_struct(). kthread_stop(dead_thread) is fine. Signed-off-by: Oleg Nesterov Signed-off-by: David S. Miller --- net/core/pktgen.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 41489e3..1ebdf1c 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3571,15 +3571,6 @@ static int pktgen_thread_worker(void *arg) pr_debug("%s removing thread\n", t->tsk->comm); pktgen_rem_thread(t); - /* Wait for kthread_stop */ - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) - break; - schedule(); - } - __set_current_state(TASK_RUNNING); - return 0; } @@ -3771,6 +3762,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) } t->net = pn; + get_task_struct(p); wake_up_process(p); wait_for_completion(&t->start_done); @@ -3893,6 +3885,7 @@ static void __net_exit pg_net_exit(struct net *net) t = list_entry(q, struct pktgen_thread, th_list); list_del(&t->th_list); kthread_stop(t->tsk); + put_task_struct(t->tsk); kfree(t); } -- cgit v1.1 From a7d35f9d73e9ffa74a02304b817e579eec632f67 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Jul 2015 18:56:07 +0200 Subject: bridge: fix potential crash in __netdev_pick_tx() Commit c29390c6dfee ("xps: must clear sender_cpu before forwarding") fixed an issue in normal forward path, caused by sender_cpu & napi_id skb fields being an union. Bridge is another point where skb can be forwarded, so we need the same cure. Bug triggers if packet was received on a NIC using skb_mark_napi_id() Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices") Signed-off-by: Eric Dumazet Reported-by: Bob Liu Tested-by: Bob Liu Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index e97572b..0ff6e1b 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -42,6 +42,7 @@ int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb) } else { skb_push(skb, ETH_HLEN); br_drop_fake_rtable(skb); + skb_sender_cpu_clear(skb); dev_queue_xmit(skb); } -- cgit v1.1 From e9e4dd3267d0c5234c5c0f47440456b10875dec9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 9 Jul 2015 09:59:09 +0300 Subject: net: do not process device backlog during unregistration commit 381c759d9916 ("ipv4: Avoid crashing in ip_error") fixes a problem where processed packet comes from device with destroyed inetdev (dev->ip_ptr). This is not expected because inetdev_destroy is called in NETDEV_UNREGISTER phase and packets should not be processed after dev_close_many() and synchronize_net(). Above fix is still required because inetdev_destroy can be called for other reasons. But it shows the real problem: backlog can keep packets for long time and they do not hold reference to device. Such packets are then delivered to upper levels at the same time when device is unregistered. Calling flush_backlog after NETDEV_UNREGISTER_FINAL still accounts all packets from backlog but before that some packets continue to be delivered to upper levels long after the synchronize_net call which is supposed to wait the last ones. Also, as Eric pointed out, processed packets, mostly from other devices, can continue to add new packets to backlog. Fix the problem by moving flush_backlog early, after the device driver is stopped and before the synchronize_net() call. Then use netif_running check to make sure we do not add more packets to backlog. We have to do it in enqueue_to_backlog context when the local IRQ is disabled. As result, after the flush_backlog and synchronize_net sequence all packets should be accounted. Thanks to Eric W. Biederman for the test script and his valuable feedback! Reported-by: Vittorio Gambaletta Fixes: 6e583ce5242f ("net: eliminate refcounting in backlog queue") Cc: Eric W. Biederman Cc: Stephen Hemminger Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 1e57efd..6dd126a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3448,6 +3448,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, local_irq_save(flags); rps_lock(sd); + if (!netif_running(skb->dev)) + goto drop; qlen = skb_queue_len(&sd->input_pkt_queue); if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { if (qlen) { @@ -3469,6 +3471,7 @@ enqueue: goto enqueue; } +drop: sd->dropped++; rps_unlock(sd); @@ -6135,6 +6138,7 @@ static void rollback_registered_many(struct list_head *head) unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; + on_each_cpu(flush_backlog, dev, 1); } synchronize_net(); @@ -6770,8 +6774,6 @@ void netdev_run_todo(void) dev->reg_state = NETREG_UNREGISTERED; - on_each_cpu(flush_backlog, dev, 1); - netdev_wait_allrefs(dev); /* paranoia */ -- cgit v1.1 From 2c17d27c36dcce2b6bf689f41a46b9e909877c21 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 9 Jul 2015 09:59:10 +0300 Subject: net: call rcu_read_lock early in process_backlog Incoming packet should be either in backlog queue or in RCU read-side section. Otherwise, the final sequence of flush_backlog() and synchronize_net() may miss packets that can run without device reference: CPU 1 CPU 2 skb->dev: no reference process_backlog:__skb_dequeue process_backlog:local_irq_enable on_each_cpu for flush_backlog => IPI(hardirq): flush_backlog - packet not found in backlog CPU delayed ... synchronize_net - no ongoing RCU read-side sections netdev_run_todo, rcu_barrier: no ongoing callbacks __netif_receive_skb_core:rcu_read_lock - too late free dev process packet for freed dev Fixes: 6e583ce5242f ("net: eliminate refcounting in backlog queue") Cc: Eric W. Biederman Cc: Stephen Hemminger Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/core/dev.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 6dd126a..a8e4dd4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3774,8 +3774,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) pt_prev = NULL; - rcu_read_lock(); - another_round: skb->skb_iif = skb->dev->ifindex; @@ -3785,7 +3783,7 @@ another_round: skb->protocol == cpu_to_be16(ETH_P_8021AD)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) - goto unlock; + goto out; } #ifdef CONFIG_NET_CLS_ACT @@ -3815,10 +3813,10 @@ skip_taps: if (static_key_false(&ingress_needed)) { skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto unlock; + goto out; if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) - goto unlock; + goto out; } #endif #ifdef CONFIG_NET_CLS_ACT @@ -3836,7 +3834,7 @@ ncls: if (vlan_do_receive(&skb)) goto another_round; else if (unlikely(!skb)) - goto unlock; + goto out; } rx_handler = rcu_dereference(skb->dev->rx_handler); @@ -3848,7 +3846,7 @@ ncls: switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: ret = NET_RX_SUCCESS; - goto unlock; + goto out; case RX_HANDLER_ANOTHER: goto another_round; case RX_HANDLER_EXACT: @@ -3902,8 +3900,7 @@ drop: ret = NET_RX_DROP; } -unlock: - rcu_read_unlock(); +out: return ret; } @@ -3934,29 +3931,30 @@ static int __netif_receive_skb(struct sk_buff *skb) static int netif_receive_skb_internal(struct sk_buff *skb) { + int ret; + net_timestamp_check(netdev_tstamp_prequeue, skb); if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; + rcu_read_lock(); + #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; - int cpu, ret; - - rcu_read_lock(); - - cpu = get_rps_cpu(skb->dev, skb, &rflow); + int cpu = get_rps_cpu(skb->dev, skb, &rflow); if (cpu >= 0) { ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); return ret; } - rcu_read_unlock(); } #endif - return __netif_receive_skb(skb); + ret = __netif_receive_skb(skb); + rcu_read_unlock(); + return ret; } /** @@ -4501,8 +4499,10 @@ static int process_backlog(struct napi_struct *napi, int quota) struct sk_buff *skb; while ((skb = __skb_dequeue(&sd->process_queue))) { + rcu_read_lock(); local_irq_enable(); __netif_receive_skb(skb); + rcu_read_unlock(); local_irq_disable(); input_queue_head_incr(sd); if (++work >= quota) { -- cgit v1.1 From 51ed7f3e7d33824820837ad784801973f147c51a Mon Sep 17 00:00:00 2001 From: Satish Ashok Date: Thu, 9 Jul 2015 04:12:45 -0700 Subject: bridge: mdb: allow the user to delete mdb entry if there's a querier Until now when a querier was present static entries couldn't be deleted. Fix this and allow the user to manipulate the mdb with or without a querier. Signed-off-by: Satish Ashok Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 60868c2..c11cf26 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -423,19 +423,12 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; - if (ip.proto == htons(ETH_P_IP)) { - if (timer_pending(&br->ip4_other_query.timer)) - return -EBUSY; - + if (ip.proto == htons(ETH_P_IP)) ip.u.ip4 = entry->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) - } else { - if (timer_pending(&br->ip6_other_query.timer)) - return -EBUSY; - + else ip.u.ip6 = entry->addr.u.ip6; #endif - } spin_lock_bh(&br->multicast_lock); mdb = mlock_dereference(br->mdb, br); -- cgit v1.1 From 8220ea23243178c16b87fc3ccadf071647b64e04 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 10 Jul 2015 11:39:57 +0200 Subject: net: inet_diag: always export IPV6_V6ONLY sockopt for listening sockets Reconsidering my commit 20462155 "net: inet_diag: export IPV6_V6ONLY sockopt", I am not happy with the limitations it causes for socket analysing code in userspace. Exporting the value only if it is set makes it hard for userspace to decide whether the option is not set or the kernel does not support exporting the option at all. >From an auditor's perspective, the interesting question for listening AF_INET6 sockets is: "Does it NOT have IPV6_V6ONLY set?" Because it is the unexpected case. This patch allows to answer this question reliably. Signed-off-by: Phil Sutter Cc: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 9bc2667..c3b1f3a 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -152,8 +152,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, inet6_sk(sk)->tclass) < 0) goto errout; - if (ipv6_only_sock(sk) && - nla_put_u8(skb, INET_DIAG_SKV6ONLY, 1)) + if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && + nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) goto errout; } #endif -- cgit v1.1 From 2ee94014d9bd3868b1c0d17405f96d63bec83f28 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 10 Jul 2015 19:48:58 -0400 Subject: net: switchdev: don't abort unsupported operations There is no need to abort attribute setting or object addition, if the prepare phase returned operation not supported. Thus, abort these two transactions only if the error is not -EOPNOTSUPP. Signed-off-by: Vivien Didelot Acked-by: Jiri Pirko Acked-by: Scott Feldman Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 84f77a0..9f2add3 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -171,8 +171,10 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) * released. */ - attr->trans = SWITCHDEV_TRANS_ABORT; - __switchdev_port_attr_set(dev, attr); + if (err != -EOPNOTSUPP) { + attr->trans = SWITCHDEV_TRANS_ABORT; + __switchdev_port_attr_set(dev, attr); + } return err; } @@ -249,8 +251,10 @@ int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) * released. */ - obj->trans = SWITCHDEV_TRANS_ABORT; - __switchdev_port_obj_add(dev, obj); + if (err != -EOPNOTSUPP) { + obj->trans = SWITCHDEV_TRANS_ABORT; + __switchdev_port_obj_add(dev, obj); + } return err; } -- cgit v1.1 From 8f5063e97f393d49611151d3cf7dcbeb41397f12 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 11 Jul 2015 18:02:10 -0700 Subject: net: dsa: Test array index before use port_index is used an index into an array, and this information comes from Device Tree, make sure that port_index is not equal to the array size before using it. Move the check against port_index earlier in the loop. Fixes: 5e95329b701c: ("dsa: add device tree bindings to register DSA switches") Reported-by: Dan Carpenter Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 392e29a..52beeb8 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -642,6 +642,8 @@ static int dsa_of_probe(struct device *dev) continue; port_index = be32_to_cpup(port_reg); + if (port_index >= DSA_MAX_PORTS) + break; port_name = of_get_property(port, "label", NULL); if (!port_name) @@ -666,8 +668,6 @@ static int dsa_of_probe(struct device *dev) goto out_free_chip; } - if (port_index == DSA_MAX_PORTS) - break; } } -- cgit v1.1 From c8cf89f73f3d9ecbdea479778f0ac714be79be33 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 11 Jul 2015 18:02:11 -0700 Subject: net: dsa: Fix off-by-one in switch address parsing cd->sw_addr is used as a MDIO bus address, which cannot exceed PHY_MAX_ADDR (32), our check was off-by-one. Fixes: 5e95329b701c ("dsa: add device tree bindings to register DSA switches") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 52beeb8..b445d49 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -630,7 +630,7 @@ static int dsa_of_probe(struct device *dev) continue; cd->sw_addr = be32_to_cpup(sw_addr); - if (cd->sw_addr > PHY_MAX_ADDR) + if (cd->sw_addr >= PHY_MAX_ADDR) continue; if (!of_property_read_u32(child, "eeprom-length", &eeprom_len)) -- cgit v1.1 From d3b58c47d330de8c29898fe9746f7530408f8a59 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 26 Jun 2015 11:58:19 +0200 Subject: can: replace timestamp as unique skb attribute Commit 514ac99c64b "can: fix multiple delivery of a single CAN frame for overlapping CAN filters" requires the skb->tstamp to be set to check for identical CAN skbs. Without timestamping to be required by user space applications this timestamp was not generated which lead to commit 36c01245eb8 "can: fix loss of CAN frames in raw_rcv" - which forces the timestamp to be set in all CAN related skbuffs by introducing several __net_timestamp() calls. This forces e.g. out of tree drivers which are not using alloc_can{,fd}_skb() to add __net_timestamp() after skbuff creation to prevent the frame loss fixed in mainline Linux. This patch removes the timestamp dependency and uses an atomic counter to create an unique identifier together with the skbuff pointer. Btw: the new skbcnt element introduced in struct can_skb_priv has to be initialized with zero in out-of-tree drivers which are not using alloc_can{,fd}_skb() too. Signed-off-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- net/can/af_can.c | 12 +++++++----- net/can/bcm.c | 2 ++ net/can/raw.c | 7 ++++--- 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/can/af_can.c b/net/can/af_can.c index 7933e62..166d436 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -89,6 +89,8 @@ struct timer_list can_stattimer; /* timer for statistics update */ struct s_stats can_stats; /* packet statistics */ struct s_pstats can_pstats; /* receive list statistics */ +static atomic_t skbcounter = ATOMIC_INIT(0); + /* * af_can socket functions */ @@ -310,12 +312,8 @@ int can_send(struct sk_buff *skb, int loop) return err; } - if (newskb) { - if (!(newskb->tstamp.tv64)) - __net_timestamp(newskb); - + if (newskb) netif_rx_ni(newskb); - } /* update statistics */ can_stats.tx_frames++; @@ -683,6 +681,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) can_stats.rx_frames++; can_stats.rx_frames_delta++; + /* create non-zero unique skb identifier together with *skb */ + while (!(can_skb_prv(skb)->skbcnt)) + can_skb_prv(skb)->skbcnt = atomic_inc_return(&skbcounter); + rcu_read_lock(); /* deliver the packet to sockets listening on all devices */ diff --git a/net/can/bcm.c b/net/can/bcm.c index b523453..a1ba687 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -261,6 +261,7 @@ static void bcm_can_tx(struct bcm_op *op) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; memcpy(skb_put(skb, CFSIZ), cf, CFSIZ); @@ -1217,6 +1218,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) } can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; skb->dev = dev; can_skb_set_owner(skb, sk); err = can_send(skb, 1); /* send with loopback */ diff --git a/net/can/raw.c b/net/can/raw.c index 31b9748..2e67b14 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -75,7 +75,7 @@ MODULE_ALIAS("can-proto-1"); */ struct uniqframe { - ktime_t tstamp; + int skbcnt; const struct sk_buff *skb; unsigned int join_rx_count; }; @@ -133,7 +133,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) /* eliminate multiple filter matches for the same skb */ if (this_cpu_ptr(ro->uniq)->skb == oskb && - ktime_equal(this_cpu_ptr(ro->uniq)->tstamp, oskb->tstamp)) { + this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) { if (ro->join_filters) { this_cpu_inc(ro->uniq->join_rx_count); /* drop frame until all enabled filters matched */ @@ -144,7 +144,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) } } else { this_cpu_ptr(ro->uniq)->skb = oskb; - this_cpu_ptr(ro->uniq)->tstamp = oskb->tstamp; + this_cpu_ptr(ro->uniq)->skbcnt = can_skb_prv(oskb)->skbcnt; this_cpu_ptr(ro->uniq)->join_rx_count = 1; /* drop first frame to check all enabled filters? */ if (ro->join_filters && ro->count > 1) @@ -749,6 +749,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; err = memcpy_from_msg(skb_put(skb, size), msg, size); if (err < 0) -- cgit v1.1