From 7c258670ce655659a4c8d1013676c55e74d09ee7 Mon Sep 17 00:00:00 2001
From: Tedd Ho-Jeong An <tedd.an@intel.com>
Date: Thu, 25 Jun 2015 23:27:55 -0700
Subject: Bluetooth: hidp: Initialize list header of hidp session user
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When new hidp session is created, list header in l2cap_user is
not initialized and this causes list_empty() to fail in
l2cap_register_user() even if l2cap_user list is empty.

Signed-off-by: Tedd Ho-Jeong An <tedd.an@intel.com>
Tested-by: Jörg Otte <jrg.otte@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hidp/core.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 9070dfd..f1a117f 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -915,6 +915,7 @@ static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr,
 	session->conn = l2cap_conn_get(conn);
 	session->user.probe = hidp_session_probe;
 	session->user.remove = hidp_session_remove;
+	INIT_LIST_HEAD(&session->user.list);
 	session->ctrl_sock = ctrl_sock;
 	session->intr_sock = intr_sock;
 	skb_queue_head_init(&session->ctrl_transmit);
-- 
cgit v1.1


From ab944c83f6690df0c7f67e6bcc29fc0c82ef6021 Mon Sep 17 00:00:00 2001
From: Tedd Ho-Jeong An <tedd.an@intel.com>
Date: Tue, 30 Jun 2015 11:43:40 -0700
Subject: Bluetooth: Reinitialize the list after deletion for session user list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the user->list is deleted with list_del(), it doesn't initialize the
entry which can cause the issue with list_empty(). According to the
comment from the list.h, list_empty() returns false even if the list is
empty and put the entry in an undefined state.

/**
 * list_del - deletes entry from list.
 * @entry: the element to delete from the list.
 * Note: list_empty() on entry does not return true after this, the entry is
 * in an undefined state.
 */

Because of this behavior, list_empty() returns false even if list is empty
when the device is reconnected.

So, user->list needs to be re-initialized after list_del(). list.h already
have a macro list_del_init() which deletes the entry and initailze it again.

Signed-off-by: Tedd Ho-Jeong An <tedd.an@intel.com>
Tested-by: Jörg Otte <jrg.otte@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 51594fb..45fffa4 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1634,7 +1634,7 @@ void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user)
 	if (list_empty(&user->list))
 		goto out_unlock;
 
-	list_del(&user->list);
+	list_del_init(&user->list);
 	user->remove(conn, user);
 
 out_unlock:
@@ -1648,7 +1648,7 @@ static void l2cap_unregister_all_users(struct l2cap_conn *conn)
 
 	while (!list_empty(&conn->users)) {
 		user = list_first_entry(&conn->users, struct l2cap_user, list);
-		list_del(&user->list);
+		list_del_init(&user->list);
 		user->remove(conn, user);
 	}
 }
-- 
cgit v1.1


From f307170d6e591a48529425b1ed6ca835790995a9 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 19 Jun 2015 17:23:37 -0500
Subject: netfilter: nf_queue: Don't recompute the hook_list head

If someone sends packets from one of the netdevice ingress hooks to
the a userspace queue, and then userspace later accepts the packet,
the netfilter code can enter an infinite loop as the list head will
never be found.

Pass in the saved list_head to avoid this.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index cd60d39..8a8b2ab 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -213,7 +213,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 
 	if (verdict == NF_ACCEPT) {
 	next_hook:
-		verdict = nf_iterate(&nf_hooks[entry->state.pf][entry->state.hook],
+		verdict = nf_iterate(entry->state.hook_list,
 				     skb, &entry->state, &elem);
 	}
 
-- 
cgit v1.1


From a1bc1b356a9d21bf29bc7c873718b5cacdf119b4 Mon Sep 17 00:00:00 2001
From: Bernhard Thaler <bernhard.thaler@wvnet.at>
Date: Sat, 20 Jun 2015 00:17:50 +0200
Subject: netfilter: bridge: fix CONFIG_NF_DEFRAG_IPV4/6 related
 warnings/errors

br_nf_ip_fragment() is not needed when neither CONFIG_NF_DEFRAG_IPV4 nor
CONFIG_NF_DEFRAG_IPV6 is set.

struct brnf_frag_data must be available if either CONFIG_NF_DEFRAG_IPV4
or CONFIG_NF_DEFRAG_IPV6 is set.

Fixes: efb6de9b4ba0 ("netfilter: bridge: forward IPv6 fragmented packets")
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Bernhard Thaler <bernhard.thaler@wvnet.at>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/br_netfilter_hooks.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index d89f4fa..8a394bd 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -111,7 +111,7 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
 /* largest possible L2 header, see br_nf_dev_queue_xmit() */
 #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
 
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 struct brnf_frag_data {
 	char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
 	u8 encap_size;
@@ -694,6 +694,7 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
 static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb,
 			     int (*output)(struct sock *, struct sk_buff *))
 {
@@ -712,6 +713,7 @@ static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb,
 
 	return ip_do_fragment(sk, skb, output);
 }
+#endif
 
 static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 {
-- 
cgit v1.1


From 3bd229976f64bea64c60803f9fc8d9f0059ba2f2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 30 Jun 2015 22:21:00 +0200
Subject: netfilter: arptables: use percpu jumpstack

commit 482cfc318559 ("netfilter: xtables: avoid percpu ruleset duplication")

Unlike ip and ip6tables, arp tables were never converted to use the percpu
jump stack.

It still uses the rule blob to store return address, which isn't safe
anymore since we now share this blob among all processors.

Because there is no TEE support for arptables, we don't need to cope
with reentrancy, so we can use loocal variable to hold stack offset.

Fixes: 482cfc318559 ("netfilter: xtables: avoid percpu ruleset duplication")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/arp_tables.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 95c9b6e..92305a1 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -254,9 +254,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	unsigned int verdict = NF_DROP;
 	const struct arphdr *arp;
-	struct arpt_entry *e, *back;
+	struct arpt_entry *e, **jumpstack;
 	const char *indev, *outdev;
 	const void *table_base;
+	unsigned int cpu, stackidx = 0;
 	const struct xt_table_info *private;
 	struct xt_action_param acpar;
 	unsigned int addend;
@@ -270,15 +271,16 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	local_bh_disable();
 	addend = xt_write_recseq_begin();
 	private = table->private;
+	cpu     = smp_processor_id();
 	/*
 	 * Ensure we load private-> members after we've fetched the base
 	 * pointer.
 	 */
 	smp_read_barrier_depends();
 	table_base = private->entries;
+	jumpstack  = (struct arpt_entry **)private->jumpstack[cpu];
 
 	e = get_entry(table_base, private->hook_entry[hook]);
-	back = get_entry(table_base, private->underflow[hook]);
 
 	acpar.in      = state->in;
 	acpar.out     = state->out;
@@ -312,18 +314,23 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 					verdict = (unsigned int)(-v) - 1;
 					break;
 				}
-				e = back;
-				back = get_entry(table_base, back->comefrom);
+				if (stackidx == 0) {
+					e = get_entry(table_base,
+						      private->underflow[hook]);
+				} else {
+					e = jumpstack[--stackidx];
+					e = arpt_next_entry(e);
+				}
 				continue;
 			}
 			if (table_base + v
 			    != arpt_next_entry(e)) {
-				/* Save old back ptr in next entry */
-				struct arpt_entry *next = arpt_next_entry(e);
-				next->comefrom = (void *)back - table_base;
 
-				/* set back pointer to next entry */
-				back = next;
+				if (stackidx >= private->stacksize) {
+					verdict = NF_DROP;
+					break;
+				}
+				jumpstack[stackidx++] = e;
 			}
 
 			e = get_entry(table_base, v);
-- 
cgit v1.1


From dd302b59bde0149c20df7278c0d36c765e66afbd Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 30 Jun 2015 22:27:51 +0200
Subject: netfilter: bridge: don't leak skb in error paths

br_nf_dev_queue_xmit must free skb in its error path.
NF_DROP is misleading -- its an okfn, not a netfilter hook.

Fixes: 462fb2af9788a ("bridge : Sanitize skb before it enters the IP stack")
Fixes: efb6de9b4ba00 ("netfilter: bridge: forward IPv6 fragmented packets")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/br_netfilter_hooks.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 8a394bd..c8b9bcf 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -744,7 +744,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
 		struct brnf_frag_data *data;
 
 		if (br_validate_ipv4(skb))
-			return NF_DROP;
+			goto drop;
 
 		IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
 
@@ -769,7 +769,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
 		struct brnf_frag_data *data;
 
 		if (br_validate_ipv6(skb))
-			return NF_DROP;
+			goto drop;
 
 		IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
 
@@ -784,12 +784,16 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
 
 		if (v6ops)
 			return v6ops->fragment(sk, skb, br_nf_push_frag_xmit);
-		else
-			return -EMSGSIZE;
+
+		kfree_skb(skb);
+		return -EMSGSIZE;
 	}
 #endif
 	nf_bridge_info_free(skb);
 	return br_dev_queue_push_xmit(sk, skb);
+ drop:
+	kfree_skb(skb);
+	return 0;
 }
 
 /* PF_BRIDGE/POST_ROUTING ********************************************/
-- 
cgit v1.1


From 6742b9e310bcf511b876532846e5302b07b7fedc Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 1 Jul 2015 16:14:25 +0200
Subject: netfilter: nfnetlink: keep going batch handling on missing modules

After a fresh boot with no modules in place at all and a large rulesets, the
existing nfnetlink_rcv_batch() funcion can take long time to commit the ruleset
due to the many abort path. This is specifically a problem for the existing
client of this code, ie. nf_tables, since it results in several
synchronize_rcu() call in a row.

This patch changes the policy to keep full batch processing on missing modules
errors so we abort only once.

Reported-by: Eric Leblond <eric@regit.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink.c | 38 +++++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 8b117c9..0c0e8ec 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -269,6 +269,12 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
 	}
 }
 
+enum {
+	NFNL_BATCH_FAILURE	= (1 << 0),
+	NFNL_BATCH_DONE		= (1 << 1),
+	NFNL_BATCH_REPLAY	= (1 << 2),
+};
+
 static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
 				u_int16_t subsys_id)
 {
@@ -276,13 +282,15 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct net *net = sock_net(skb->sk);
 	const struct nfnetlink_subsystem *ss;
 	const struct nfnl_callback *nc;
-	bool success = true, done = false;
 	static LIST_HEAD(err_list);
+	u32 status;
 	int err;
 
 	if (subsys_id >= NFNL_SUBSYS_COUNT)
 		return netlink_ack(skb, nlh, -EINVAL);
 replay:
+	status = 0;
+
 	skb = netlink_skb_clone(oskb, GFP_KERNEL);
 	if (!skb)
 		return netlink_ack(oskb, nlh, -ENOMEM);
@@ -336,10 +344,10 @@ replay:
 		if (type == NFNL_MSG_BATCH_BEGIN) {
 			/* Malformed: Batch begin twice */
 			nfnl_err_reset(&err_list);
-			success = false;
+			status |= NFNL_BATCH_FAILURE;
 			goto done;
 		} else if (type == NFNL_MSG_BATCH_END) {
-			done = true;
+			status |= NFNL_BATCH_DONE;
 			goto done;
 		} else if (type < NLMSG_MIN_TYPE) {
 			err = -EINVAL;
@@ -382,11 +390,8 @@ replay:
 			 * original skb.
 			 */
 			if (err == -EAGAIN) {
-				nfnl_err_reset(&err_list);
-				ss->abort(oskb);
-				nfnl_unlock(subsys_id);
-				kfree_skb(skb);
-				goto replay;
+				status |= NFNL_BATCH_REPLAY;
+				goto next;
 			}
 		}
 ack:
@@ -402,7 +407,7 @@ ack:
 				 */
 				nfnl_err_reset(&err_list);
 				netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM);
-				success = false;
+				status |= NFNL_BATCH_FAILURE;
 				goto done;
 			}
 			/* We don't stop processing the batch on errors, thus,
@@ -410,19 +415,26 @@ ack:
 			 * triggers.
 			 */
 			if (err)
-				success = false;
+				status |= NFNL_BATCH_FAILURE;
 		}
-
+next:
 		msglen = NLMSG_ALIGN(nlh->nlmsg_len);
 		if (msglen > skb->len)
 			msglen = skb->len;
 		skb_pull(skb, msglen);
 	}
 done:
-	if (success && done)
+	if (status & NFNL_BATCH_REPLAY) {
+		ss->abort(oskb);
+		nfnl_err_reset(&err_list);
+		nfnl_unlock(subsys_id);
+		kfree_skb(skb);
+		goto replay;
+	} else if (status == NFNL_BATCH_DONE) {
 		ss->commit(oskb);
-	else
+	} else {
 		ss->abort(oskb);
+	}
 
 	nfnl_err_deliver(&err_list, oskb);
 	nfnl_unlock(subsys_id);
-- 
cgit v1.1


From 462e1ead9296a8452499fb10cf3b51903ffe24ac Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Thu, 2 Jul 2015 05:48:17 -0700
Subject: bridge: vlan: fix usage of vlan 0 and 4095 again

Vlan ids 0 and 4095 were disallowed by commit:
8adff41c3d25 ("bridge: Don't use VID 0 and 4095 in vlan filtering")
but then the check was removed when vlan ranges were introduced by:
bdced7ef7838 ("bridge: support for multiple vlans and vlan ranges in setlink and dellink requests")
So reintroduce the vlan range check.
Before patch:
[root@testvm ~]# bridge vlan add vid 0 dev eth0 master
(succeeds)
After Patch:
[root@testvm ~]# bridge vlan add vid 0 dev eth0 master
RTNETLINK answers: Invalid argument

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Fixes: bdced7ef7838 ("bridge: support for multiple vlans and vlan ranges in setlink and dellink requests")
Acked-by: Toshiaki Makita <toshiaki.makita1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 6b67ed3..364bdc9 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -457,6 +457,8 @@ static int br_afspec(struct net_bridge *br,
 		if (nla_len(attr) != sizeof(struct bridge_vlan_info))
 			return -EINVAL;
 		vinfo = nla_data(attr);
+		if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
+			return -EINVAL;
 		if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
 			if (vinfo_start)
 				return -EINVAL;
-- 
cgit v1.1


From 87775312a86bcf213e3b21f6f7c79e2e00d96f7b Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Thu, 2 Jul 2015 16:30:24 +0200
Subject: net-ipv6: Delete an unnecessary check before the function call
 "free_percpu"

The free_percpu() function tests whether its argument is NULL and then
returns immediately. Thus the test around the call is not needed.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1a1122a..6090969 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -369,10 +369,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 	struct inet6_dev *idev;
 
 	dst_destroy_metrics_generic(dst);
-
-	if (rt->rt6i_pcpu)
-		free_percpu(rt->rt6i_pcpu);
-
+	free_percpu(rt->rt6i_pcpu);
 	rt6_uncached_list_del(rt);
 
 	idev = rt->rt6i_idev;
-- 
cgit v1.1


From f6e1c91666990d06ba37c76129495c724202144d Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Thu, 2 Jul 2015 17:58:21 +0200
Subject: net-RDS: Delete an unnecessary check before the function call
 "module_put"

The module_put() function tests whether its argument is NULL and then
returns immediately. Thus the test around the call is not needed.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/transport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/transport.c b/net/rds/transport.c
index 8b4a6cd..83498e1 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -73,7 +73,7 @@ EXPORT_SYMBOL_GPL(rds_trans_unregister);
 
 void rds_trans_put(struct rds_transport *trans)
 {
-	if (trans && trans->t_owner)
+	if (trans)
 		module_put(trans->t_owner);
 }
 
-- 
cgit v1.1


From 92b80eb33c52583b48ed289bd993579b87b52d9a Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Thu, 2 Jul 2015 18:38:12 +0200
Subject: netlink: Delete an unnecessary check before the function call
 "module_put"

The module_put() function tests whether its argument is NULL and then
returns immediately. Thus the test around the call is not needed.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index dea9253..9a0ae71 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -158,7 +158,7 @@ static int __netlink_remove_tap(struct netlink_tap *nt)
 out:
 	spin_unlock(&netlink_tap_lock);
 
-	if (found && nt->module)
+	if (found)
 		module_put(nt->module);
 
 	return found ? 0 : -ENODEV;
-- 
cgit v1.1


From 4c938d22c88a9ddccc8c55a85e0430e9c62b1ac5 Mon Sep 17 00:00:00 2001
From: Angga <Hermin.Anggawijaya@alliedtelesis.co.nz>
Date: Fri, 3 Jul 2015 14:40:52 +1200
Subject: ipv6: Make MLD packets to only be processed locally

Before commit daad151263cf ("ipv6: Make ipv6_is_mld() inline and use it
from ip6_mc_input().") MLD packets were only processed locally. After the
change, a copy of MLD packet goes through ip6_mr_input, causing
MRT6MSG_NOCACHE message to be generated to user space.

Make MLD packet only processed locally.

Fixes: daad151263cf ("ipv6: Make ipv6_is_mld() inline and use it from ip6_mc_input().")
Signed-off-by: Hermin Anggawijaya <hermin.anggawijaya@alliedtelesis.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_input.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index f2e464e..57990c9 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -331,10 +331,10 @@ int ip6_mc_input(struct sk_buff *skb)
 				if (offset < 0)
 					goto out;
 
-				if (!ipv6_is_mld(skb, nexthdr, offset))
-					goto out;
+				if (ipv6_is_mld(skb, nexthdr, offset))
+					deliver = true;
 
-				deliver = true;
+				goto out;
 			}
 			/* unknown RA - process it normally */
 		}
-- 
cgit v1.1


From 86e8971800381c3a8d8d9327f83b1f97ccb04a4f Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@citrix.com>
Date: Tue, 7 Jul 2015 15:55:21 +0100
Subject: netfilter: bridge: Use __in6_dev_get rather than in6_dev_get in
 br_validate_ipv6

The commit efb6de9b4ba0092b2c55f6a52d16294a8a698edd "netfilter: bridge:
forward IPv6 fragmented packets" introduced a new function
br_validate_ipv6 which take a reference on the inet6 device. Although,
the reference is not released at the end.

This will result to the impossibility to destroy any netdevice using
ipv6 and bridge.

It's possible to directly retrieve the inet6 device without taking a
reference as all netfilter hooks are protected by rcu_read_lock via
nf_hook_slow.

Spotted while trying to destroy a Xen guest on the upstream Linux:
"unregister_netdevice: waiting for vif1.0 to become free. Usage count = 1"

Signed-off-by: Julien Grall <julien.grall@citrix.com>
Cc: Bernhard Thaler <bernhard.thaler@wvnet.at>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: fw@strlen.de
Cc: ian.campbell@citrix.com
Cc: wei.liu2@citrix.com
Cc: Bob Liu <bob.liu@oracle.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/br_netfilter_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 6d12d26..13b7d1e 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -104,7 +104,7 @@ int br_validate_ipv6(struct sk_buff *skb)
 {
 	const struct ipv6hdr *hdr;
 	struct net_device *dev = skb->dev;
-	struct inet6_dev *idev = in6_dev_get(skb->dev);
+	struct inet6_dev *idev = __in6_dev_get(skb->dev);
 	u32 pkt_len;
 	u8 ip6h_len = sizeof(struct ipv6hdr);
 
-- 
cgit v1.1


From 32f675bbc9be14a40d972820e190ac56341ce198 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 2 Jul 2015 15:57:19 +0200
Subject: net_sched: gen_estimator: extend pps limit

rate estimators are limited to 4 Mpps, which was fine years ago, but
too small with current hardware generation.

Lets use 2^5 scaling instead of 2^10 to get 128 Mpps new limit.

On 64bit arch, use an "unsigned long" for temp storage and remove limit.
(We do not expect 32bit arches to be able to reach this point)

Tested:

tc -s -d filter sh dev eth0 parent ffff:

filter protocol ip pref 1 u32
filter protocol ip pref 1 u32 fh 800: ht divisor 1
filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15
  match 07000000/ff000000 at 12
	action order 1: gact action drop
	 random type none pass val 0
	 index 1 ref 1 bind 1 installed 166 sec
 	Action statistics:
	Sent 39734251496 bytes 863788076 pkt (dropped 863788117, overlimits 0 requeues 0)
	rate 4067Mbit 11053596pps backlog 0b 0p requeues 0

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/gen_estimator.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 9dfb88a..92d886f 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -66,7 +66,7 @@
 
    NOTES.
 
-   * avbps is scaled by 2^5, avpps is scaled by 2^10.
+   * avbps and avpps are scaled by 2^5.
    * both values are reported as 32 bit unsigned values. bps can
      overflow for fast links : max speed being 34360Mbit/sec
    * Minimal interval is HZ/4=250msec (it is the greatest common divisor
@@ -85,10 +85,10 @@ struct gen_estimator
 	struct gnet_stats_rate_est64	*rate_est;
 	spinlock_t		*stats_lock;
 	int			ewma_log;
+	u32			last_packets;
+	unsigned long		avpps;
 	u64			last_bytes;
 	u64			avbps;
-	u32			last_packets;
-	u32			avpps;
 	struct rcu_head		e_rcu;
 	struct rb_node		node;
 	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
@@ -118,8 +118,8 @@ static void est_timer(unsigned long arg)
 	rcu_read_lock();
 	list_for_each_entry_rcu(e, &elist[idx].list, list) {
 		struct gnet_stats_basic_packed b = {0};
+		unsigned long rate;
 		u64 brate;
-		u32 rate;
 
 		spin_lock(e->stats_lock);
 		read_lock(&est_lock);
@@ -133,10 +133,11 @@ static void est_timer(unsigned long arg)
 		e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
 		e->rate_est->bps = (e->avbps+0xF)>>5;
 
-		rate = (b.packets - e->last_packets)<<(12 - idx);
+		rate = b.packets - e->last_packets;
+		rate <<= (7 - idx);
 		e->last_packets = b.packets;
 		e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
-		e->rate_est->pps = (e->avpps+0x1FF)>>10;
+		e->rate_est->pps = (e->avpps + 0xF) >> 5;
 skip:
 		read_unlock(&est_lock);
 		spin_unlock(e->stats_lock);
-- 
cgit v1.1


From f7e2965db17dd3b60f05fad88e7afc79ea75b48f Mon Sep 17 00:00:00 2001
From: Satish Ashok <sashok@cumulusnetworks.com>
Date: Mon, 6 Jul 2015 05:53:35 -0700
Subject: bridge: mdb: start delete timer for temp static entries

Start the delete timer when adding temp static entries so they can expire.

Signed-off-by: Satish Ashok <sashok@cumulusnetworks.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Fixes: ccb1c31a7a87 ("bridge: add flags to distinguish permanent mdb entires")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_mdb.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index e29ad70..3bfc675 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -323,6 +323,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
 	struct net_bridge_port_group *p;
 	struct net_bridge_port_group __rcu **pp;
 	struct net_bridge_mdb_htable *mdb;
+	unsigned long now = jiffies;
 	int err;
 
 	mdb = mlock_dereference(br->mdb, br);
@@ -347,6 +348,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
 	if (unlikely(!p))
 		return -ENOMEM;
 	rcu_assign_pointer(*pp, p);
+	if (state == MDB_TEMPORARY)
+		mod_timer(&p->timer, now + br->multicast_membership_interval);
 
 	br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
 	return 0;
-- 
cgit v1.1


From d339727c2b1a10f25e6636670ab6e1841170e328 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 6 Jul 2015 17:13:26 +0200
Subject: net: graceful exit from netif_alloc_netdev_queues()

User space can crash kernel with

ip link add ifb10 numtxqueues 100000 type ifb

We must replace a BUG_ON() by proper test and return -EINVAL for
crazy values.

Fixes: 60877a32bce00 ("net: allow large number of tx queues")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 6778a99..0ad6262 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6409,7 +6409,8 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 	struct netdev_queue *tx;
 	size_t sz = count * sizeof(*tx);
 
-	BUG_ON(count < 1 || count > 0xffff);
+	if (count < 1 || count > 0xffff)
+		return -EINVAL;
 
 	tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
 	if (!tx) {
-- 
cgit v1.1


From 95ec655bc465ccb2a3329d4aff9a45e3c8188db5 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 6 Jul 2015 17:25:10 +0200
Subject: Revert "dev: set iflink to 0 for virtual interfaces"

This reverts commit e1622baf54df8cc958bf29d71de5ad545ea7d93c.

The side effect of this commit is to add a '@NONE' after each virtual
interface name with a 'ip link'. It may break existing scripts.

Reported-by: Olivier Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Tested-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0ad6262..1e57efd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -677,10 +677,6 @@ int dev_get_iflink(const struct net_device *dev)
 	if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
 		return dev->netdev_ops->ndo_get_iflink(dev);
 
-	/* If dev->rtnl_link_ops is set, it's a virtual interface. */
-	if (dev->rtnl_link_ops)
-		return 0;
-
 	return dev->ifindex;
 }
 EXPORT_SYMBOL(dev_get_iflink);
-- 
cgit v1.1


From 4f7d2cdfdde71ffe962399b7020c674050329423 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 7 Jul 2015 00:07:52 +0200
Subject: rtnetlink: verify IFLA_VF_INFO attributes before passing them to
 driver

Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make
SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes
anymore with respect to their policy, that is, ifla_vfinfo_policy[].

Before, they were part of ifla_policy[], but they have been nested since
placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO,
which is another nested attribute for the actual VF attributes such as
IFLA_VF_MAC, IFLA_VF_VLAN, etc.

Despite the policy being split out from ifla_policy[] in this commit,
it's never applied anywhere. nla_for_each_nested() only does basic nla_ok()
testing for struct nlattr, but it doesn't know about the data context and
their requirements.

Fix, on top of Jason's initial work, does 1) parsing of the attributes
with the right policy, and 2) using the resulting parsed attribute table
from 1) instead of the nla_for_each_nested() loop (just like we used to
do when still part of ifla_policy[]).

Reference: http://thread.gmane.org/gmane.linux.network/368913
Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Rony Efraim <ronye@mellanox.com>
Cc: Vlad Zolotarov <vladz@cloudius-systems.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 187 ++++++++++++++++++++++++++-------------------------
 1 file changed, 96 insertions(+), 91 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 01ced4a..9e433d5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1328,10 +1328,6 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
 	[IFLA_INFO_SLAVE_DATA]	= { .type = NLA_NESTED },
 };
 
-static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
-	[IFLA_VF_INFO]		= { .type = NLA_NESTED },
-};
-
 static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
 	[IFLA_VF_MAC]		= { .len = sizeof(struct ifla_vf_mac) },
 	[IFLA_VF_VLAN]		= { .len = sizeof(struct ifla_vf_vlan) },
@@ -1488,96 +1484,98 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 	return 0;
 }
 
-static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
+static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
 {
-	int rem, err = -EINVAL;
-	struct nlattr *vf;
 	const struct net_device_ops *ops = dev->netdev_ops;
+	int err = -EINVAL;
 
-	nla_for_each_nested(vf, attr, rem) {
-		switch (nla_type(vf)) {
-		case IFLA_VF_MAC: {
-			struct ifla_vf_mac *ivm;
-			ivm = nla_data(vf);
-			err = -EOPNOTSUPP;
-			if (ops->ndo_set_vf_mac)
-				err = ops->ndo_set_vf_mac(dev, ivm->vf,
-							  ivm->mac);
-			break;
-		}
-		case IFLA_VF_VLAN: {
-			struct ifla_vf_vlan *ivv;
-			ivv = nla_data(vf);
-			err = -EOPNOTSUPP;
-			if (ops->ndo_set_vf_vlan)
-				err = ops->ndo_set_vf_vlan(dev, ivv->vf,
-							   ivv->vlan,
-							   ivv->qos);
-			break;
-		}
-		case IFLA_VF_TX_RATE: {
-			struct ifla_vf_tx_rate *ivt;
-			struct ifla_vf_info ivf;
-			ivt = nla_data(vf);
-			err = -EOPNOTSUPP;
-			if (ops->ndo_get_vf_config)
-				err = ops->ndo_get_vf_config(dev, ivt->vf,
-							     &ivf);
-			if (err)
-				break;
-			err = -EOPNOTSUPP;
-			if (ops->ndo_set_vf_rate)
-				err = ops->ndo_set_vf_rate(dev, ivt->vf,
-							   ivf.min_tx_rate,
-							   ivt->rate);
-			break;
-		}
-		case IFLA_VF_RATE: {
-			struct ifla_vf_rate *ivt;
-			ivt = nla_data(vf);
-			err = -EOPNOTSUPP;
-			if (ops->ndo_set_vf_rate)
-				err = ops->ndo_set_vf_rate(dev, ivt->vf,
-							   ivt->min_tx_rate,
-							   ivt->max_tx_rate);
-			break;
-		}
-		case IFLA_VF_SPOOFCHK: {
-			struct ifla_vf_spoofchk *ivs;
-			ivs = nla_data(vf);
-			err = -EOPNOTSUPP;
-			if (ops->ndo_set_vf_spoofchk)
-				err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
-							       ivs->setting);
-			break;
-		}
-		case IFLA_VF_LINK_STATE: {
-			struct ifla_vf_link_state *ivl;
-			ivl = nla_data(vf);
-			err = -EOPNOTSUPP;
-			if (ops->ndo_set_vf_link_state)
-				err = ops->ndo_set_vf_link_state(dev, ivl->vf,
-								 ivl->link_state);
-			break;
-		}
-		case IFLA_VF_RSS_QUERY_EN: {
-			struct ifla_vf_rss_query_en *ivrssq_en;
+	if (tb[IFLA_VF_MAC]) {
+		struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]);
 
-			ivrssq_en = nla_data(vf);
-			err = -EOPNOTSUPP;
-			if (ops->ndo_set_vf_rss_query_en)
-				err = ops->ndo_set_vf_rss_query_en(dev,
-							    ivrssq_en->vf,
-							    ivrssq_en->setting);
-			break;
-		}
-		default:
-			err = -EINVAL;
-			break;
-		}
-		if (err)
-			break;
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_vf_mac)
+			err = ops->ndo_set_vf_mac(dev, ivm->vf,
+						  ivm->mac);
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[IFLA_VF_VLAN]) {
+		struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]);
+
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_vf_vlan)
+			err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan,
+						   ivv->qos);
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[IFLA_VF_TX_RATE]) {
+		struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]);
+		struct ifla_vf_info ivf;
+
+		err = -EOPNOTSUPP;
+		if (ops->ndo_get_vf_config)
+			err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf);
+		if (err < 0)
+			return err;
+
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_vf_rate)
+			err = ops->ndo_set_vf_rate(dev, ivt->vf,
+						   ivf.min_tx_rate,
+						   ivt->rate);
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[IFLA_VF_RATE]) {
+		struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]);
+
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_vf_rate)
+			err = ops->ndo_set_vf_rate(dev, ivt->vf,
+						   ivt->min_tx_rate,
+						   ivt->max_tx_rate);
+		if (err < 0)
+			return err;
 	}
+
+	if (tb[IFLA_VF_SPOOFCHK]) {
+		struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]);
+
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_vf_spoofchk)
+			err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
+						       ivs->setting);
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[IFLA_VF_LINK_STATE]) {
+		struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]);
+
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_vf_link_state)
+			err = ops->ndo_set_vf_link_state(dev, ivl->vf,
+							 ivl->link_state);
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[IFLA_VF_RSS_QUERY_EN]) {
+		struct ifla_vf_rss_query_en *ivrssq_en;
+
+		err = -EOPNOTSUPP;
+		ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]);
+		if (ops->ndo_set_vf_rss_query_en)
+			err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf,
+							   ivrssq_en->setting);
+		if (err < 0)
+			return err;
+	}
+
 	return err;
 }
 
@@ -1773,14 +1771,21 @@ static int do_setlink(const struct sk_buff *skb,
 	}
 
 	if (tb[IFLA_VFINFO_LIST]) {
+		struct nlattr *vfinfo[IFLA_VF_MAX + 1];
 		struct nlattr *attr;
 		int rem;
+
 		nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
-			if (nla_type(attr) != IFLA_VF_INFO) {
+			if (nla_type(attr) != IFLA_VF_INFO ||
+			    nla_len(attr) < NLA_HDRLEN) {
 				err = -EINVAL;
 				goto errout;
 			}
-			err = do_setvfinfo(dev, attr);
+			err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr,
+					       ifla_vf_policy);
+			if (err < 0)
+				goto errout;
+			err = do_setvfinfo(dev, vfinfo);
 			if (err < 0)
 				goto errout;
 			status |= DO_SETLINK_NOTIFY;
-- 
cgit v1.1


From fc24f2b2094366da8786f59f2606307e934cea17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Tue, 7 Jul 2015 08:34:13 +0300
Subject: ip_tunnel: fix ipv4 pmtu check to honor inner ip header df
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Frag needed should be sent only if the inner header asked
to not fragment. Currently fragmentation is broken if the
tunnel has df set, but df was not asked in the original
packet. The tunnel's df needs to be still checked to update
internally the pmtu cache.

Commit 23a3647bc4f93bac broke it, and this commit fixes
the ipv4 df check back to the way it was.

Fixes: 23a3647bc4f93bac ("ip_tunnels: Use skb-len to PMTU check.")
Cc: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_tunnel.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 4c2c3ba..626d9e5 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -586,7 +586,8 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
 EXPORT_SYMBOL(ip_tunnel_encap);
 
 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
-			    struct rtable *rt, __be16 df)
+			    struct rtable *rt, __be16 df,
+			    const struct iphdr *inner_iph)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
@@ -603,7 +604,8 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 
 	if (skb->protocol == htons(ETH_P_IP)) {
 		if (!skb_is_gso(skb) &&
-		    (df & htons(IP_DF)) && mtu < pkt_size) {
+		    (inner_iph->frag_off & htons(IP_DF)) &&
+		    mtu < pkt_size) {
 			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 			return -E2BIG;
@@ -737,7 +739,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		goto tx_error;
 	}
 
-	if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
+	if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
 		ip_rt_put(rt);
 		goto tx_error;
 	}
-- 
cgit v1.1


From fdd75ea8df370f206a8163786e7470c1277a5064 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Tue, 7 Jul 2015 09:43:45 -0400
Subject: net/tipc: initialize security state for new connection socket

Calling connect() with an AF_TIPC socket would trigger a series
of error messages from SELinux along the lines of:
SELinux: Invalid class 0
type=AVC msg=audit(1434126658.487:34500): avc:  denied  { <unprintable> }
  for pid=292 comm="kworker/u16:5" scontext=system_u:system_r:kernel_t:s0
  tcontext=system_u:object_r:unlabeled_t:s0 tclass=<unprintable>
  permissive=0

This was due to a failure to initialize the security state of the new
connection sock by the tipc code, leaving it with junk in the security
class field and an unlabeled secid.  Add a call to security_sk_clone()
to inherit the security state from the parent socket.

Reported-by: Tim Shearer <tim.shearer@overturenetworks.com>
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Paul Moore <paul@paul-moore.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 46b6ed5..3a7567f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2007,6 +2007,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
 	res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1);
 	if (res)
 		goto exit;
+	security_sk_clone(sock->sk, new_sock->sk);
 
 	new_sk = new_sock->sk;
 	new_tsock = tipc_sk(new_sk);
-- 
cgit v1.1


From f1158b74e54f2e2462ba5e2f45a118246d9d5b43 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <razor@blackwall.org>
Date: Tue, 7 Jul 2015 15:55:56 +0200
Subject: bridge: mdb: zero out the local br_ip variable before use

Since commit b0e9a30dd669 ("bridge: Add vlan id to multicast groups")
there's a check in br_ip_equal() for a matching vlan id, but the mdb
functions were not modified to use (or at least zero it) so when an
entry was added it would have a garbage vlan id (from the local br_ip
variable in __br_mdb_add/del) and this would prevent it from being
matched and also deleted. So zero out the whole local ip var to protect
ourselves from future changes and also to fix the current bug, since
there's no vlan id support in the mdb uapi - use always vlan id 0.
Example before patch:
root@debian:~# bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent
root@debian:~# bridge mdb
dev br0 port eth1 grp 239.0.0.1 permanent
root@debian:~# bridge mdb del dev br0 port eth1 grp 239.0.0.1 permanent
RTNETLINK answers: Invalid argument

After patch:
root@debian:~# bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent
root@debian:~# bridge mdb
dev br0 port eth1 grp 239.0.0.1 permanent
root@debian:~# bridge mdb del dev br0 port eth1 grp 239.0.0.1 permanent
root@debian:~# bridge mdb

Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
Fixes: b0e9a30dd669 ("bridge: Add vlan id to multicast groups")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_mdb.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 3bfc675..60868c2 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -374,6 +374,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
 	if (!p || p->br != br || p->state == BR_STATE_DISABLED)
 		return -EINVAL;
 
+	memset(&ip, 0, sizeof(ip));
 	ip.proto = entry->addr.proto;
 	if (ip.proto == htons(ETH_P_IP))
 		ip.u.ip4 = entry->addr.u.ip4;
@@ -420,6 +421,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
 	if (!netif_running(br->dev) || br->multicast_disabled)
 		return -EINVAL;
 
+	memset(&ip, 0, sizeof(ip));
 	ip.proto = entry->addr.proto;
 	if (ip.proto == htons(ETH_P_IP)) {
 		if (timer_pending(&br->ip4_other_query.timer))
-- 
cgit v1.1


From 974d7af5fcc295dcf8315255142b2fe44fd74b0c Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <gospo@cumulusnetworks.com>
Date: Tue, 7 Jul 2015 13:56:57 -0400
Subject: ipv4: add support for linkdown sysctl to netconf

This kernel patch exports the value of the new
ignore_routes_with_linkdown via netconf.

v2: changes to notify userspace via netlink when sysctl values change
and proposed for 'net' since this could be considered a bugfix

Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Suggested-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7498716..e813196 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1740,6 +1740,8 @@ static int inet_netconf_msgsize_devconf(int type)
 		size += nla_total_size(4);
 	if (type == -1 || type == NETCONFA_PROXY_NEIGH)
 		size += nla_total_size(4);
+	if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
+		size += nla_total_size(4);
 
 	return size;
 }
@@ -1780,6 +1782,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
 		goto nla_put_failure;
+	if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
+	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
+		goto nla_put_failure;
 
 	nlmsg_end(skb, nlh);
 	return 0;
@@ -1819,6 +1825,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
+	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
 };
 
 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
@@ -2048,6 +2055,12 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
 			inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
 						    ifindex, cnf);
 		}
+		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
+		    new_value != old_value) {
+			ifindex = devinet_conf_ifindex(net, cnf);
+			inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+						    ifindex, cnf);
+		}
 	}
 
 	return ret;
-- 
cgit v1.1


From fecdf8be2d91e04b0a9a4f79ff06499a36f5d14f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 8 Jul 2015 21:42:11 +0200
Subject: net: pktgen: fix race between pktgen_thread_worker() and
 kthread_stop()

pktgen_thread_worker() is obviously racy, kthread_stop() can come
between the kthread_should_stop() check and set_current_state().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Jan Stancek <jstancek@redhat.com>
Reported-by: Marcelo Leitner <mleitner@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 05badbb..41489e3 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3572,8 +3572,10 @@ static int pktgen_thread_worker(void *arg)
 	pktgen_rem_thread(t);
 
 	/* Wait for kthread_stop */
-	while (!kthread_should_stop()) {
+	for (;;) {
 		set_current_state(TASK_INTERRUPTIBLE);
+		if (kthread_should_stop())
+			break;
 		schedule();
 	}
 	__set_current_state(TASK_RUNNING);
-- 
cgit v1.1


From 1fbe4b46caca5b01b070af93d513031ffbcc480c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 8 Jul 2015 21:42:13 +0200
Subject: net: pktgen: kill the "Wait for kthread_stop" code in
 pktgen_thread_worker()

pktgen_thread_worker() doesn't need to wait for kthread_stop(), it
can simply exit. Just pktgen_create_thread() and pg_net_exit() should
do get_task_struct()/put_task_struct(). kthread_stop(dead_thread) is
fine.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 41489e3..1ebdf1c 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3571,15 +3571,6 @@ static int pktgen_thread_worker(void *arg)
 	pr_debug("%s removing thread\n", t->tsk->comm);
 	pktgen_rem_thread(t);
 
-	/* Wait for kthread_stop */
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (kthread_should_stop())
-			break;
-		schedule();
-	}
-	__set_current_state(TASK_RUNNING);
-
 	return 0;
 }
 
@@ -3771,6 +3762,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
 	}
 
 	t->net = pn;
+	get_task_struct(p);
 	wake_up_process(p);
 	wait_for_completion(&t->start_done);
 
@@ -3893,6 +3885,7 @@ static void __net_exit pg_net_exit(struct net *net)
 		t = list_entry(q, struct pktgen_thread, th_list);
 		list_del(&t->th_list);
 		kthread_stop(t->tsk);
+		put_task_struct(t->tsk);
 		kfree(t);
 	}
 
-- 
cgit v1.1


From a7d35f9d73e9ffa74a02304b817e579eec632f67 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 9 Jul 2015 18:56:07 +0200
Subject: bridge: fix potential crash in __netdev_pick_tx()

Commit c29390c6dfee ("xps: must clear sender_cpu before forwarding")
fixed an issue in normal forward path, caused by sender_cpu & napi_id
skb fields being an union.

Bridge is another point where skb can be forwarded, so we need
the same cure.

Bug triggers if packet was received on a NIC using skb_mark_napi_id()

Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Bob Liu <bob.liu@oracle.com>
Tested-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index e97572b..0ff6e1b 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -42,6 +42,7 @@ int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb)
 	} else {
 		skb_push(skb, ETH_HLEN);
 		br_drop_fake_rtable(skb);
+		skb_sender_cpu_clear(skb);
 		dev_queue_xmit(skb);
 	}
 
-- 
cgit v1.1


From e9e4dd3267d0c5234c5c0f47440456b10875dec9 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Thu, 9 Jul 2015 09:59:09 +0300
Subject: net: do not process device backlog during unregistration

commit 381c759d9916 ("ipv4: Avoid crashing in ip_error")
fixes a problem where processed packet comes from device
with destroyed inetdev (dev->ip_ptr). This is not expected
because inetdev_destroy is called in NETDEV_UNREGISTER
phase and packets should not be processed after
dev_close_many() and synchronize_net(). Above fix is still
required because inetdev_destroy can be called for other
reasons. But it shows the real problem: backlog can keep
packets for long time and they do not hold reference to
device. Such packets are then delivered to upper levels
at the same time when device is unregistered.
Calling flush_backlog after NETDEV_UNREGISTER_FINAL still
accounts all packets from backlog but before that some packets
continue to be delivered to upper levels long after the
synchronize_net call which is supposed to wait the last
ones. Also, as Eric pointed out, processed packets, mostly
from other devices, can continue to add new packets to backlog.

Fix the problem by moving flush_backlog early, after the
device driver is stopped and before the synchronize_net() call.
Then use netif_running check to make sure we do not add more
packets to backlog. We have to do it in enqueue_to_backlog
context when the local IRQ is disabled. As result, after the
flush_backlog and synchronize_net sequence all packets
should be accounted.

Thanks to Eric W. Biederman for the test script and his
valuable feedback!

Reported-by: Vittorio Gambaletta <linuxbugs@vittgam.net>
Fixes: 6e583ce5242f ("net: eliminate refcounting in backlog queue")
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 1e57efd..6dd126a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3448,6 +3448,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 	local_irq_save(flags);
 
 	rps_lock(sd);
+	if (!netif_running(skb->dev))
+		goto drop;
 	qlen = skb_queue_len(&sd->input_pkt_queue);
 	if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
 		if (qlen) {
@@ -3469,6 +3471,7 @@ enqueue:
 		goto enqueue;
 	}
 
+drop:
 	sd->dropped++;
 	rps_unlock(sd);
 
@@ -6135,6 +6138,7 @@ static void rollback_registered_many(struct list_head *head)
 		unlist_netdevice(dev);
 
 		dev->reg_state = NETREG_UNREGISTERING;
+		on_each_cpu(flush_backlog, dev, 1);
 	}
 
 	synchronize_net();
@@ -6770,8 +6774,6 @@ void netdev_run_todo(void)
 
 		dev->reg_state = NETREG_UNREGISTERED;
 
-		on_each_cpu(flush_backlog, dev, 1);
-
 		netdev_wait_allrefs(dev);
 
 		/* paranoia */
-- 
cgit v1.1


From 2c17d27c36dcce2b6bf689f41a46b9e909877c21 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Thu, 9 Jul 2015 09:59:10 +0300
Subject: net: call rcu_read_lock early in process_backlog

Incoming packet should be either in backlog queue or
in RCU read-side section. Otherwise, the final sequence of
flush_backlog() and synchronize_net() may miss packets
that can run without device reference:

CPU 1                  CPU 2
                       skb->dev: no reference
                       process_backlog:__skb_dequeue
                       process_backlog:local_irq_enable

on_each_cpu for
flush_backlog =>       IPI(hardirq): flush_backlog
                       - packet not found in backlog

                       CPU delayed ...
synchronize_net
- no ongoing RCU
read-side sections

netdev_run_todo,
rcu_barrier: no
ongoing callbacks
                       __netif_receive_skb_core:rcu_read_lock
                       - too late
free dev
                       process packet for freed dev

Fixes: 6e583ce5242f ("net: eliminate refcounting in backlog queue")
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 6dd126a..a8e4dd4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3774,8 +3774,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 
 	pt_prev = NULL;
 
-	rcu_read_lock();
-
 another_round:
 	skb->skb_iif = skb->dev->ifindex;
 
@@ -3785,7 +3783,7 @@ another_round:
 	    skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
 		skb = skb_vlan_untag(skb);
 		if (unlikely(!skb))
-			goto unlock;
+			goto out;
 	}
 
 #ifdef CONFIG_NET_CLS_ACT
@@ -3815,10 +3813,10 @@ skip_taps:
 	if (static_key_false(&ingress_needed)) {
 		skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
 		if (!skb)
-			goto unlock;
+			goto out;
 
 		if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
-			goto unlock;
+			goto out;
 	}
 #endif
 #ifdef CONFIG_NET_CLS_ACT
@@ -3836,7 +3834,7 @@ ncls:
 		if (vlan_do_receive(&skb))
 			goto another_round;
 		else if (unlikely(!skb))
-			goto unlock;
+			goto out;
 	}
 
 	rx_handler = rcu_dereference(skb->dev->rx_handler);
@@ -3848,7 +3846,7 @@ ncls:
 		switch (rx_handler(&skb)) {
 		case RX_HANDLER_CONSUMED:
 			ret = NET_RX_SUCCESS;
-			goto unlock;
+			goto out;
 		case RX_HANDLER_ANOTHER:
 			goto another_round;
 		case RX_HANDLER_EXACT:
@@ -3902,8 +3900,7 @@ drop:
 		ret = NET_RX_DROP;
 	}
 
-unlock:
-	rcu_read_unlock();
+out:
 	return ret;
 }
 
@@ -3934,29 +3931,30 @@ static int __netif_receive_skb(struct sk_buff *skb)
 
 static int netif_receive_skb_internal(struct sk_buff *skb)
 {
+	int ret;
+
 	net_timestamp_check(netdev_tstamp_prequeue, skb);
 
 	if (skb_defer_rx_timestamp(skb))
 		return NET_RX_SUCCESS;
 
+	rcu_read_lock();
+
 #ifdef CONFIG_RPS
 	if (static_key_false(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
-		int cpu, ret;
-
-		rcu_read_lock();
-
-		cpu = get_rps_cpu(skb->dev, skb, &rflow);
+		int cpu = get_rps_cpu(skb->dev, skb, &rflow);
 
 		if (cpu >= 0) {
 			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
 			rcu_read_unlock();
 			return ret;
 		}
-		rcu_read_unlock();
 	}
 #endif
-	return __netif_receive_skb(skb);
+	ret = __netif_receive_skb(skb);
+	rcu_read_unlock();
+	return ret;
 }
 
 /**
@@ -4501,8 +4499,10 @@ static int process_backlog(struct napi_struct *napi, int quota)
 		struct sk_buff *skb;
 
 		while ((skb = __skb_dequeue(&sd->process_queue))) {
+			rcu_read_lock();
 			local_irq_enable();
 			__netif_receive_skb(skb);
+			rcu_read_unlock();
 			local_irq_disable();
 			input_queue_head_incr(sd);
 			if (++work >= quota) {
-- 
cgit v1.1


From 51ed7f3e7d33824820837ad784801973f147c51a Mon Sep 17 00:00:00 2001
From: Satish Ashok <sashok@cumulusnetworks.com>
Date: Thu, 9 Jul 2015 04:12:45 -0700
Subject: bridge: mdb: allow the user to delete mdb entry if there's a querier

Until now when a querier was present static entries couldn't be deleted.
Fix this and allow the user to manipulate the mdb with or without a
querier.

Signed-off-by: Satish Ashok <sashok@cumulusnetworks.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_mdb.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 60868c2..c11cf26 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -423,19 +423,12 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
 
 	memset(&ip, 0, sizeof(ip));
 	ip.proto = entry->addr.proto;
-	if (ip.proto == htons(ETH_P_IP)) {
-		if (timer_pending(&br->ip4_other_query.timer))
-			return -EBUSY;
-
+	if (ip.proto == htons(ETH_P_IP))
 		ip.u.ip4 = entry->addr.u.ip4;
 #if IS_ENABLED(CONFIG_IPV6)
-	} else {
-		if (timer_pending(&br->ip6_other_query.timer))
-			return -EBUSY;
-
+	else
 		ip.u.ip6 = entry->addr.u.ip6;
 #endif
-	}
 
 	spin_lock_bh(&br->multicast_lock);
 	mdb = mlock_dereference(br->mdb, br);
-- 
cgit v1.1


From 8220ea23243178c16b87fc3ccadf071647b64e04 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Fri, 10 Jul 2015 11:39:57 +0200
Subject: net: inet_diag: always export IPV6_V6ONLY sockopt for listening
 sockets

Reconsidering my commit 20462155 "net: inet_diag: export IPV6_V6ONLY
sockopt", I am not happy with the limitations it causes for socket
analysing code in userspace. Exporting the value only if it is set makes
it hard for userspace to decide whether the option is not set or the
kernel does not support exporting the option at all.

>From an auditor's perspective, the interesting question for listening
AF_INET6 sockets is: "Does it NOT have IPV6_V6ONLY set?" Because it is
the unexpected case. This patch allows to answer this question reliably.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_diag.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 9bc2667..c3b1f3a 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -152,8 +152,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 				       inet6_sk(sk)->tclass) < 0)
 				goto errout;
 
-		if (ipv6_only_sock(sk) &&
-		    nla_put_u8(skb, INET_DIAG_SKV6ONLY, 1))
+		if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
+		    nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk)))
 			goto errout;
 	}
 #endif
-- 
cgit v1.1


From 2ee94014d9bd3868b1c0d17405f96d63bec83f28 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Fri, 10 Jul 2015 19:48:58 -0400
Subject: net: switchdev: don't abort unsupported operations

There is no need to abort attribute setting or object addition, if the
prepare phase returned operation not supported.

Thus, abort these two transactions only if the error is not -EOPNOTSUPP.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/switchdev/switchdev.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 84f77a0..9f2add3 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -171,8 +171,10 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
 		 * released.
 		 */
 
-		attr->trans = SWITCHDEV_TRANS_ABORT;
-		__switchdev_port_attr_set(dev, attr);
+		if (err != -EOPNOTSUPP) {
+			attr->trans = SWITCHDEV_TRANS_ABORT;
+			__switchdev_port_attr_set(dev, attr);
+		}
 
 		return err;
 	}
@@ -249,8 +251,10 @@ int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
 		 * released.
 		 */
 
-		obj->trans = SWITCHDEV_TRANS_ABORT;
-		__switchdev_port_obj_add(dev, obj);
+		if (err != -EOPNOTSUPP) {
+			obj->trans = SWITCHDEV_TRANS_ABORT;
+			__switchdev_port_obj_add(dev, obj);
+		}
 
 		return err;
 	}
-- 
cgit v1.1


From 8f5063e97f393d49611151d3cf7dcbeb41397f12 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sat, 11 Jul 2015 18:02:10 -0700
Subject: net: dsa: Test array index before use

port_index is used an index into an array, and this information comes
from Device Tree, make sure that port_index is not equal to the array
size before using it. Move the check against port_index earlier in the
loop.

Fixes: 5e95329b701c: ("dsa: add device tree bindings to register DSA switches")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 392e29a..52beeb8 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -642,6 +642,8 @@ static int dsa_of_probe(struct device *dev)
 				continue;
 
 			port_index = be32_to_cpup(port_reg);
+			if (port_index >= DSA_MAX_PORTS)
+				break;
 
 			port_name = of_get_property(port, "label", NULL);
 			if (!port_name)
@@ -666,8 +668,6 @@ static int dsa_of_probe(struct device *dev)
 					goto out_free_chip;
 			}
 
-			if (port_index == DSA_MAX_PORTS)
-				break;
 		}
 	}
 
-- 
cgit v1.1


From c8cf89f73f3d9ecbdea479778f0ac714be79be33 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sat, 11 Jul 2015 18:02:11 -0700
Subject: net: dsa: Fix off-by-one in switch address parsing

cd->sw_addr is used as a MDIO bus address, which cannot exceed
PHY_MAX_ADDR (32), our check was off-by-one.

Fixes: 5e95329b701c ("dsa: add device tree bindings to register DSA switches")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 52beeb8..b445d49 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -630,7 +630,7 @@ static int dsa_of_probe(struct device *dev)
 			continue;
 
 		cd->sw_addr = be32_to_cpup(sw_addr);
-		if (cd->sw_addr > PHY_MAX_ADDR)
+		if (cd->sw_addr >= PHY_MAX_ADDR)
 			continue;
 
 		if (!of_property_read_u32(child, "eeprom-length", &eeprom_len))
-- 
cgit v1.1


From d3b58c47d330de8c29898fe9746f7530408f8a59 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Fri, 26 Jun 2015 11:58:19 +0200
Subject: can: replace timestamp as unique skb attribute

Commit 514ac99c64b "can: fix multiple delivery of a single CAN frame for
overlapping CAN filters" requires the skb->tstamp to be set to check for
identical CAN skbs.

Without timestamping to be required by user space applications this timestamp
was not generated which lead to commit 36c01245eb8 "can: fix loss of CAN frames
in raw_rcv" - which forces the timestamp to be set in all CAN related skbuffs
by introducing several __net_timestamp() calls.

This forces e.g. out of tree drivers which are not using alloc_can{,fd}_skb()
to add __net_timestamp() after skbuff creation to prevent the frame loss fixed
in mainline Linux.

This patch removes the timestamp dependency and uses an atomic counter to
create an unique identifier together with the skbuff pointer.

Btw: the new skbcnt element introduced in struct can_skb_priv has to be
initialized with zero in out-of-tree drivers which are not using
alloc_can{,fd}_skb() too.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/af_can.c | 12 +++++++-----
 net/can/bcm.c    |  2 ++
 net/can/raw.c    |  7 ++++---
 3 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/can/af_can.c b/net/can/af_can.c
index 7933e62..166d436 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -89,6 +89,8 @@ struct timer_list can_stattimer;   /* timer for statistics update */
 struct s_stats    can_stats;       /* packet statistics */
 struct s_pstats   can_pstats;      /* receive list statistics */
 
+static atomic_t skbcounter = ATOMIC_INIT(0);
+
 /*
  * af_can socket functions
  */
@@ -310,12 +312,8 @@ int can_send(struct sk_buff *skb, int loop)
 		return err;
 	}
 
-	if (newskb) {
-		if (!(newskb->tstamp.tv64))
-			__net_timestamp(newskb);
-
+	if (newskb)
 		netif_rx_ni(newskb);
-	}
 
 	/* update statistics */
 	can_stats.tx_frames++;
@@ -683,6 +681,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
 	can_stats.rx_frames++;
 	can_stats.rx_frames_delta++;
 
+	/* create non-zero unique skb identifier together with *skb */
+	while (!(can_skb_prv(skb)->skbcnt))
+		can_skb_prv(skb)->skbcnt = atomic_inc_return(&skbcounter);
+
 	rcu_read_lock();
 
 	/* deliver the packet to sockets listening on all devices */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index b523453..a1ba687 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -261,6 +261,7 @@ static void bcm_can_tx(struct bcm_op *op)
 
 	can_skb_reserve(skb);
 	can_skb_prv(skb)->ifindex = dev->ifindex;
+	can_skb_prv(skb)->skbcnt = 0;
 
 	memcpy(skb_put(skb, CFSIZ), cf, CFSIZ);
 
@@ -1217,6 +1218,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 	}
 
 	can_skb_prv(skb)->ifindex = dev->ifindex;
+	can_skb_prv(skb)->skbcnt = 0;
 	skb->dev = dev;
 	can_skb_set_owner(skb, sk);
 	err = can_send(skb, 1); /* send with loopback */
diff --git a/net/can/raw.c b/net/can/raw.c
index 31b9748..2e67b14 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -75,7 +75,7 @@ MODULE_ALIAS("can-proto-1");
  */
 
 struct uniqframe {
-	ktime_t tstamp;
+	int skbcnt;
 	const struct sk_buff *skb;
 	unsigned int join_rx_count;
 };
@@ -133,7 +133,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
 
 	/* eliminate multiple filter matches for the same skb */
 	if (this_cpu_ptr(ro->uniq)->skb == oskb &&
-	    ktime_equal(this_cpu_ptr(ro->uniq)->tstamp, oskb->tstamp)) {
+	    this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) {
 		if (ro->join_filters) {
 			this_cpu_inc(ro->uniq->join_rx_count);
 			/* drop frame until all enabled filters matched */
@@ -144,7 +144,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
 		}
 	} else {
 		this_cpu_ptr(ro->uniq)->skb = oskb;
-		this_cpu_ptr(ro->uniq)->tstamp = oskb->tstamp;
+		this_cpu_ptr(ro->uniq)->skbcnt = can_skb_prv(oskb)->skbcnt;
 		this_cpu_ptr(ro->uniq)->join_rx_count = 1;
 		/* drop first frame to check all enabled filters? */
 		if (ro->join_filters && ro->count > 1)
@@ -749,6 +749,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 
 	can_skb_reserve(skb);
 	can_skb_prv(skb)->ifindex = dev->ifindex;
+	can_skb_prv(skb)->skbcnt = 0;
 
 	err = memcpy_from_msg(skb_put(skb, size), msg, size);
 	if (err < 0)
-- 
cgit v1.1