From bf798657eb5ba57552096843c315f096fdf9b715 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 12 Aug 2015 17:41:00 +0200
Subject: netfilter: nf_tables: Use 32 bit addressing register from
 nft_type_to_reg()

nft_type_to_reg() needs to return the register in the new 32 bit addressing,
otherwise we hit EINVAL when using mappings.

Fixes: 49499c3 ("netfilter: nf_tables: switch registers to 32 bit addressing")
Reported-by: Andreas Schultz <aschultz@tpip.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2a24668..aa8bee7 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -125,7 +125,7 @@ static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg)
 
 static inline enum nft_registers nft_type_to_reg(enum nft_data_types type)
 {
-	return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1;
+	return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE;
 }
 
 unsigned int nft_parse_register(const struct nlattr *attr);
-- 
cgit v1.1


From 18e1db67e93ed75d9dc0d34c8d783ccf10547c2b Mon Sep 17 00:00:00 2001
From: Bernhard Thaler <bernhard.thaler@wvnet.at>
Date: Thu, 13 Aug 2015 08:58:15 +0200
Subject: netfilter: bridge: fix IPv6 packets not being bridged with
 CONFIG_IPV6=n

230ac490f7fba introduced a dependency to CONFIG_IPV6 which breaks bridging
of IPv6 packets on a bridge with CONFIG_IPV6=n.

Sysctl entry /proc/sys/net/bridge/bridge-nf-call-ip6tables defaults to 1,
for this reason packets are handled by br_nf_pre_routing_ipv6(). When compiled
with CONFIG_IPV6=n this function returns NF_DROP but should return NF_ACCEPT
to let packets through.

Change CONFIG_IPV6=n br_nf_pre_routing_ipv6() return value to NF_ACCEPT.

Tested with a simple bridge with two interfaces and IPv6 packets trying
to pass from host on left side to host on right side of the bridge.

Fixes: 230ac490f7fba ("netfilter: bridge: split ipv6 code into separated file")
Signed-off-by: Bernhard Thaler <bernhard.thaler@wvnet.at>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/br_netfilter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index bab824b..d4c6b5f 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -59,7 +59,7 @@ static inline unsigned int
 br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		       const struct nf_hook_state *state)
 {
-	return NF_DROP;
+	return NF_ACCEPT;
 }
 #endif
 
-- 
cgit v1.1


From 6fe7ccfd77415a6ba250c10c580eb3f9acf79753 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Tue, 25 Aug 2015 11:17:51 +0200
Subject: netfilter: ipset: Out of bound access in hash:net* types fixed

Dave Jones reported that KASan detected out of bounds access in hash:net*
types:

[   23.139532] ==================================================================
[   23.146130] BUG: KASan: out of bounds access in hash_net4_add_cidr+0x1db/0x220 at addr ffff8800d4844b58
[   23.152937] Write of size 4 by task ipset/457
[   23.159742] =============================================================================
[   23.166672] BUG kmalloc-512 (Not tainted): kasan: bad access detected
[   23.173641] -----------------------------------------------------------------------------
[   23.194668] INFO: Allocated in hash_net_create+0x16a/0x470 age=7 cpu=1 pid=456
[   23.201836]  __slab_alloc.constprop.66+0x554/0x620
[   23.208994]  __kmalloc+0x2f2/0x360
[   23.216105]  hash_net_create+0x16a/0x470
[   23.223238]  ip_set_create+0x3e6/0x740
[   23.230343]  nfnetlink_rcv_msg+0x599/0x640
[   23.237454]  netlink_rcv_skb+0x14f/0x190
[   23.244533]  nfnetlink_rcv+0x3f6/0x790
[   23.251579]  netlink_unicast+0x272/0x390
[   23.258573]  netlink_sendmsg+0x5a1/0xa50
[   23.265485]  SYSC_sendto+0x1da/0x2c0
[   23.272364]  SyS_sendto+0xe/0x10
[   23.279168]  entry_SYSCALL_64_fastpath+0x12/0x6f

The bug is fixed in the patch and the testsuite is extended in ipset
to check cidr handling more thoroughly.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 net/netfilter/ipset/ip_set_hash_gen.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index afe905c..691b54f 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -152,9 +152,13 @@ htable_bits(u32 hashsize)
 #define SET_HOST_MASK(family)	(family == AF_INET ? 32 : 128)
 
 #ifdef IP_SET_HASH_WITH_NET0
+/* cidr from 0 to SET_HOST_MASK() value and c = cidr + 1 */
 #define NLEN(family)		(SET_HOST_MASK(family) + 1)
+#define CIDR_POS(c)		((c) - 1)
 #else
+/* cidr from 1 to SET_HOST_MASK() value and c = cidr + 1 */
 #define NLEN(family)		SET_HOST_MASK(family)
+#define CIDR_POS(c)		((c) - 2)
 #endif
 
 #else
@@ -305,7 +309,7 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 		} else if (h->nets[i].cidr[n] < cidr) {
 			j = i;
 		} else if (h->nets[i].cidr[n] == cidr) {
-			h->nets[cidr - 1].nets[n]++;
+			h->nets[CIDR_POS(cidr)].nets[n]++;
 			return;
 		}
 	}
@@ -314,7 +318,7 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 			h->nets[i].cidr[n] = h->nets[i - 1].cidr[n];
 	}
 	h->nets[i].cidr[n] = cidr;
-	h->nets[cidr - 1].nets[n] = 1;
+	h->nets[CIDR_POS(cidr)].nets[n] = 1;
 }
 
 static void
@@ -325,8 +329,8 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 	for (i = 0; i < nets_length; i++) {
 		if (h->nets[i].cidr[n] != cidr)
 			continue;
-		h->nets[cidr - 1].nets[n]--;
-		if (h->nets[cidr - 1].nets[n] > 0)
+		h->nets[CIDR_POS(cidr)].nets[n]--;
+		if (h->nets[CIDR_POS(cidr)].nets[n] > 0)
 			return;
 		for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
 			h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
-- 
cgit v1.1


From 96be5f2806cd65a2ebced3bfcdf7df0116e6c4a6 Mon Sep 17 00:00:00 2001
From: Elad Raz <eladr@mellanox.com>
Date: Sat, 22 Aug 2015 08:44:11 +0300
Subject: netfilter: ipset: Fixing unnamed union init

In continue to proposed Vinson Lee's post [1], this patch fixes compilation
issues founded at gcc 4.4.7. The initialization of .cidr field of unnamed
unions causes compilation error in gcc 4.4.x.

References

Visible links
[1] https://lkml.org/lkml/2015/7/5/74

Signed-off-by: Elad Raz <eladr@mellanox.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipset/ip_set_hash_netnet.c     | 20 ++++++++++++++++++--
 net/netfilter/ipset/ip_set_hash_netportnet.c | 20 ++++++++++++++++++--
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 3c862c0..a93dfeb 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -131,6 +131,13 @@ hash_netnet4_data_next(struct hash_netnet4_elem *next,
 #define HOST_MASK	32
 #include "ip_set_hash_gen.h"
 
+static void
+hash_netnet4_init(struct hash_netnet4_elem *e)
+{
+	e->cidr[0] = HOST_MASK;
+	e->cidr[1] = HOST_MASK;
+}
+
 static int
 hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		  const struct xt_action_param *par,
@@ -160,7 +167,7 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 {
 	const struct hash_netnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_netnet4_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
+	struct hash_netnet4_elem e = { };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip = 0, ip_to = 0, last;
 	u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2;
@@ -169,6 +176,7 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
+	hash_netnet4_init(&e);
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
 		return -IPSET_ERR_PROTOCOL;
@@ -357,6 +365,13 @@ hash_netnet6_data_next(struct hash_netnet4_elem *next,
 #define IP_SET_EMIT_CREATE
 #include "ip_set_hash_gen.h"
 
+static void
+hash_netnet6_init(struct hash_netnet6_elem *e)
+{
+	e->cidr[0] = HOST_MASK;
+	e->cidr[1] = HOST_MASK;
+}
+
 static int
 hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		  const struct xt_action_param *par,
@@ -385,13 +400,14 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_netnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
+	struct hash_netnet6_elem e = { };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	int ret;
 
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
+	hash_netnet6_init(&e);
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
 		return -IPSET_ERR_PROTOCOL;
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 0c68734..9a14c23 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -142,6 +142,13 @@ hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
 #define HOST_MASK	32
 #include "ip_set_hash_gen.h"
 
+static void
+hash_netportnet4_init(struct hash_netportnet4_elem *e)
+{
+	e->cidr[0] = HOST_MASK;
+	e->cidr[1] = HOST_MASK;
+}
+
 static int
 hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		      const struct xt_action_param *par,
@@ -175,7 +182,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 {
 	const struct hash_netportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_netportnet4_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
+	struct hash_netportnet4_elem e = { };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to;
 	u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
@@ -185,6 +192,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
+	hash_netportnet4_init(&e);
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -412,6 +420,13 @@ hash_netportnet6_data_next(struct hash_netportnet4_elem *next,
 #define IP_SET_EMIT_CREATE
 #include "ip_set_hash_gen.h"
 
+static void
+hash_netportnet6_init(struct hash_netportnet6_elem *e)
+{
+	e->cidr[0] = HOST_MASK;
+	e->cidr[1] = HOST_MASK;
+}
+
 static int
 hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		      const struct xt_action_param *par,
@@ -445,7 +460,7 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 {
 	const struct hash_netportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct hash_netportnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
+	struct hash_netportnet6_elem e = { };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port, port_to;
 	bool with_ports = false;
@@ -454,6 +469,7 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (tb[IPSET_ATTR_LINENO])
 		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
 
+	hash_netportnet6_init(&e);
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
-- 
cgit v1.1


From a9de9777d613500b089a7416f936bf3ae5f070d2 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 28 Aug 2015 21:01:43 +0200
Subject: netfilter: nfnetlink: work around wrong endianess in res_id field

The convention in nfnetlink is to use network byte order in every header field
as well as in the attribute payload. The initial version of the batching
infrastructure assumes that res_id comes in host byte order though.

The only client of the batching infrastructure is nf_tables, so let's add a
workaround to address this inconsistency. We currently have 11 nfnetlink
subsystems according to NFNL_SUBSYS_COUNT, so we can assume that the subsystem
2560, ie. htons(10), will not be allocated anytime soon, so it can be an alias
of nf_tables from the nfnetlink batching path when interpreting the res_id
field.

Based on original patch from Florian Westphal.

Reported-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 0c0e8ec..70277b1 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -444,6 +444,7 @@ done:
 static void nfnetlink_rcv(struct sk_buff *skb)
 {
 	struct nlmsghdr *nlh = nlmsg_hdr(skb);
+	u_int16_t res_id;
 	int msglen;
 
 	if (nlh->nlmsg_len < NLMSG_HDRLEN ||
@@ -468,7 +469,12 @@ static void nfnetlink_rcv(struct sk_buff *skb)
 
 		nfgenmsg = nlmsg_data(nlh);
 		skb_pull(skb, msglen);
-		nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id);
+		/* Work around old nft using host byte order */
+		if (nfgenmsg->res_id == NFNL_SUBSYS_NFTABLES)
+			res_id = NFNL_SUBSYS_NFTABLES;
+		else
+			res_id = ntohs(nfgenmsg->res_id);
+		nfnetlink_rcv_batch(skb, nlh, res_id);
 	} else {
 		netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
 	}
-- 
cgit v1.1


From 9cf94eab8b309e8bcc78b41dd1561c75b537dd0b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 31 Aug 2015 19:11:02 +0200
Subject: netfilter: conntrack: use nf_ct_tmpl_free in CT/synproxy error paths

Commit 0838aa7fcfcd ("netfilter: fix netns dependencies with conntrack
templates") migrated templates to the new allocator api, but forgot to
update error paths for them in CT and synproxy to use nf_ct_tmpl_free()
instead of nf_conntrack_free().

Due to that, memory is being freed into the wrong kmemcache, but also
we drop the per net reference count of ct objects causing an imbalance.

In Brad's case, this leads to a wrap-around of net->ct.count and thus
lets __nf_conntrack_alloc() refuse to create a new ct object:

  [   10.340913] xt_addrtype: ipv6 does not support BROADCAST matching
  [   10.810168] nf_conntrack: table full, dropping packet
  [   11.917416] r8169 0000:07:00.0 eth0: link up
  [   11.917438] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
  [   12.815902] nf_conntrack: table full, dropping packet
  [   15.688561] nf_conntrack: table full, dropping packet
  [   15.689365] nf_conntrack: table full, dropping packet
  [   15.690169] nf_conntrack: table full, dropping packet
  [   15.690967] nf_conntrack: table full, dropping packet
  [...]

With slab debugging, it also reports the wrong kmemcache (kmalloc-512 vs.
nf_conntrack_ffffffff81ce75c0) and reports poison overwrites, etc. Thus,
to fix the problem, export and use nf_ct_tmpl_free() instead.

Fixes: 0838aa7fcfcd ("netfilter: fix netns dependencies with conntrack templates")
Reported-by: Brad Jackson <bjackson0971@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 1 +
 net/netfilter/nf_conntrack_core.c    | 3 ++-
 net/netfilter/nf_synproxy_core.c     | 2 +-
 net/netfilter/xt_CT.c                | 2 +-
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 37cd391..4023c4c 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -292,6 +292,7 @@ extern unsigned int nf_conntrack_hash_rnd;
 void init_nf_conntrack_hash_rnd(void);
 
 struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags);
+void nf_ct_tmpl_free(struct nf_conn *tmpl);
 
 #define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3c20d02..0625a42 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -320,12 +320,13 @@ out_free:
 }
 EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
 
-static void nf_ct_tmpl_free(struct nf_conn *tmpl)
+void nf_ct_tmpl_free(struct nf_conn *tmpl)
 {
 	nf_ct_ext_destroy(tmpl);
 	nf_ct_ext_free(tmpl);
 	kfree(tmpl);
 }
+EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
 
 static void
 destroy_conntrack(struct nf_conntrack *nfct)
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index d7f1685..d6ee8f8 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -378,7 +378,7 @@ static int __net_init synproxy_net_init(struct net *net)
 err3:
 	free_percpu(snet->stats);
 err2:
-	nf_conntrack_free(ct);
+	nf_ct_tmpl_free(ct);
 err1:
 	return err;
 }
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 43ddeee..f3377ce 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -233,7 +233,7 @@ out:
 	return 0;
 
 err3:
-	nf_conntrack_free(ct);
+	nf_ct_tmpl_free(ct);
 err2:
 	nf_ct_l3proto_module_put(par->family);
 err1:
-- 
cgit v1.1


From 4548a697e4969d695047cebd6d9af5e2f6cc728e Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <nemoto@toshiba-tops.co.jp>
Date: Wed, 2 Sep 2015 17:49:29 +0900
Subject: net: eth: altera: fix napi poll_list corruption

tse_poll() calls __napi_complete() with irq enabled.  This leads napi
poll_list corruption and may stop all napi drivers working.
Use napi_complete() instead of __napi_complete().

Signed-off-by: Atsushi Nemoto <nemoto@toshiba-tops.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/altera/altera_tse_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index da48e66..8207877 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -511,8 +511,7 @@ static int tse_poll(struct napi_struct *napi, int budget)
 
 	if (rxcomplete < budget) {
 
-		napi_gro_flush(napi, false);
-		__napi_complete(napi);
+		napi_complete(napi);
 
 		netdev_dbg(priv->dev,
 			   "NAPI Complete, did %d packets with budget %d\n",
-- 
cgit v1.1


From d82f0f1fc8a4f214a50c9dfc64e3896f9894afb7 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Wed, 2 Sep 2015 16:20:21 -0300
Subject: sctp: fix dst leak

Commit 0ca50d12fe46 failed to release the reference to dst entries that
it decided to skip.

Fixes: 0ca50d12fe46 ("sctp: fix src address selection if using secondary addresses")
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/protocol.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 4345790..4abf94d 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -511,8 +511,10 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 		 */
 		odev = __ip_dev_find(sock_net(sk), laddr->a.v4.sin_addr.s_addr,
 				     false);
-		if (!odev || odev->ifindex != fl4->flowi4_oif)
+		if (!odev || odev->ifindex != fl4->flowi4_oif) {
+			dst_release(&rt->dst);
 			continue;
+		}
 
 		dst = &rt->dst;
 		break;
-- 
cgit v1.1


From 410f03831c0768f2b1850d28ba697b167ddcb89b Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Wed, 2 Sep 2015 16:20:22 -0300
Subject: sctp: add routing output fallback

Commit 0ca50d12fe46 added a restriction that the address must belong to
the output interface, so that sctp will use the right interface even
when using secondary addresses.

But it breaks IPVS setups, on which people is used to attach VIP
addresses to loopback interface on real servers. It's preferred to
attach to the interface actually in use, but it's a very common setup
and that used to work.

This patch then saves the first routing good result, even if it would be
going out through an interface that doesn't have that address. If no
better hit found, it's then used. This effectively restores the original
behavior if no better interface could be found.

Fixes: 0ca50d12fe46 ("sctp: fix src address selection if using secondary addresses")
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/protocol.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 4abf94d..b714333 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -506,16 +506,22 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 		if (IS_ERR(rt))
 			continue;
 
+		if (!dst)
+			dst = &rt->dst;
+
 		/* Ensure the src address belongs to the output
 		 * interface.
 		 */
 		odev = __ip_dev_find(sock_net(sk), laddr->a.v4.sin_addr.s_addr,
 				     false);
 		if (!odev || odev->ifindex != fl4->flowi4_oif) {
-			dst_release(&rt->dst);
+			if (&rt->dst != dst)
+				dst_release(&rt->dst);
 			continue;
 		}
 
+		if (dst != &rt->dst)
+			dst_release(dst);
 		dst = &rt->dst;
 		break;
 	}
-- 
cgit v1.1


From 98a1f8282b8c37378c1b947d661a58942331ca90 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 26 Aug 2015 12:22:14 +0200
Subject: mac80211: Do not use sizeof() on pointer type

The rate_control_cap_mask() function takes a parameter mcs_mask, which
GCC will take to be u8 * even though it was declared with a fixed size.
This causes the following warning:

	net/mac80211/rate.c: In function 'rate_control_cap_mask':
	net/mac80211/rate.c:719:25: warning: 'sizeof' on array function parameter 'mcs_mask' will return size of 'u8 * {aka unsigned char *}' [-Wsizeof-array-argument]
	   for (i = 0; i < sizeof(mcs_mask); i++)
	                         ^
	net/mac80211/rate.c:684:10: note: declared here
	       u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN],
	          ^

This can be easily fixed by using the IEEE80211_HT_MCS_MASK_LEN directly
within the loop condition.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 9857693..9ce8883 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -716,7 +716,7 @@ static bool rate_control_cap_mask(struct ieee80211_sub_if_data *sdata,
 
 		/* Filter out rates that the STA does not support */
 		*mask &= sta->supp_rates[sband->band];
-		for (i = 0; i < sizeof(mcs_mask); i++)
+		for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
 			mcs_mask[i] &= sta->ht_cap.mcs.rx_mask[i];
 
 		sta_vht_cap = sta->vht_cap.vht_mcs.rx_mcs_map;
-- 
cgit v1.1


From 22f66895e60cfc55b92f6fa93f05bb3fbdbd0bed Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman@intel.com>
Date: Tue, 18 Aug 2015 16:52:07 +0300
Subject: mac80211: protect non-HT BSS when HT TDLS traffic exists

HT TDLS traffic should be protected in a non-HT BSS to avoid
collisions. Therefore, when TDLS peers join/leave, check if
protection is (now) needed and set the ht_operation_mode of
the virtual interface according to the HT capabilities of the
TDLS peer(s).

This works because a non-HT BSS connection never sets (or
otherwise uses) the ht_operation_mode; it just means that
drivers must be aware that this field applies to all HT
traffic for this virtual interface, not just the traffic
within the BSS. Document that.

Signed-off-by: Avri Altman <avri.altman@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h |  4 ++-
 net/mac80211/tdls.c    | 70 +++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e3314e5..bfc5694 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -477,7 +477,9 @@ struct ieee80211_event {
  * @chandef: Channel definition for this BSS -- the hardware might be
  *	configured a higher bandwidth than this BSS uses, for example.
  * @ht_operation_mode: HT operation mode like in &struct ieee80211_ht_operation.
- *	This field is only valid when the channel type is one of the HT types.
+ *	This field is only valid when the channel is a wide HT/VHT channel.
+ *	Note that with TDLS this can be the case (channel is HT, protection must
+ *	be used from this field) even when the BSS association isn't using HT.
  * @cqm_rssi_thold: Connection quality monitor RSSI threshold, a zero value
  *	implies disabled
  * @cqm_rssi_hyst: Connection quality monitor RSSI hysteresis
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index aee701a..4e202d0 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1249,6 +1249,58 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata)
 	mutex_unlock(&local->chanctx_mtx);
 }
 
+static int iee80211_tdls_have_ht_peers(struct ieee80211_sub_if_data *sdata)
+{
+	struct sta_info *sta;
+	bool result = false;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
+		if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded ||
+		    !test_sta_flag(sta, WLAN_STA_AUTHORIZED) ||
+		    !test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH) ||
+		    !sta->sta.ht_cap.ht_supported)
+			continue;
+		result = true;
+		break;
+	}
+	rcu_read_unlock();
+
+	return result;
+}
+
+static void
+iee80211_tdls_recalc_ht_protection(struct ieee80211_sub_if_data *sdata,
+				   struct sta_info *sta)
+{
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	bool tdls_ht;
+	u16 protection = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED |
+			 IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT |
+			 IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT;
+	u16 opmode;
+
+	/* Nothing to do if the BSS connection uses HT */
+	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
+		return;
+
+	tdls_ht = (sta && sta->sta.ht_cap.ht_supported) ||
+		  iee80211_tdls_have_ht_peers(sdata);
+
+	opmode = sdata->vif.bss_conf.ht_operation_mode;
+
+	if (tdls_ht)
+		opmode |= protection;
+	else
+		opmode &= ~protection;
+
+	if (opmode == sdata->vif.bss_conf.ht_operation_mode)
+		return;
+
+	sdata->vif.bss_conf.ht_operation_mode = opmode;
+	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT);
+}
+
 int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
 			const u8 *peer, enum nl80211_tdls_operation oper)
 {
@@ -1274,6 +1326,10 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
 		return -ENOTSUPP;
 	}
 
+	/* protect possible bss_conf changes and avoid concurrency in
+	 * ieee80211_bss_info_change_notify()
+	 */
+	sdata_lock(sdata);
 	mutex_lock(&local->mtx);
 	tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer);
 
@@ -1287,16 +1343,18 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
 
 		iee80211_tdls_recalc_chanctx(sdata);
 
-		rcu_read_lock();
+		mutex_lock(&local->sta_mtx);
 		sta = sta_info_get(sdata, peer);
 		if (!sta) {
-			rcu_read_unlock();
+			mutex_unlock(&local->sta_mtx);
 			ret = -ENOLINK;
 			break;
 		}
 
+		iee80211_tdls_recalc_ht_protection(sdata, sta);
+
 		set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH);
-		rcu_read_unlock();
+		mutex_unlock(&local->sta_mtx);
 
 		WARN_ON_ONCE(is_zero_ether_addr(sdata->u.mgd.tdls_peer) ||
 			     !ether_addr_equal(sdata->u.mgd.tdls_peer, peer));
@@ -1318,6 +1376,11 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
 		ieee80211_flush_queues(local, sdata, false);
 
 		ret = sta_info_destroy_addr(sdata, peer);
+
+		mutex_lock(&local->sta_mtx);
+		iee80211_tdls_recalc_ht_protection(sdata, NULL);
+		mutex_unlock(&local->sta_mtx);
+
 		iee80211_tdls_recalc_chanctx(sdata);
 		break;
 	default:
@@ -1335,6 +1398,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
 				     &sdata->u.mgd.request_smps_work);
 
 	mutex_unlock(&local->mtx);
+	sdata_unlock(sdata);
 	return ret;
 }
 
-- 
cgit v1.1


From 4c0778933a3d7c35a94e8c35847acd9bb59a257d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@gmail.com>
Date: Tue, 25 Aug 2015 08:56:43 -0400
Subject: rfkill: Copy "all" global state to other types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When switching the state of all RFKill switches of type all we need to
replicate the RFKILL_TYPE_ALL global state to all the other types global
state, so it is used to initialize persistent RFKill switches on
register.

Signed-off-by: João Paulo Rechi Vita <jprvita@endlessm.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/rfkill/core.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index f12149a..b41e9ea 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -341,7 +341,15 @@ static void __rfkill_switch_all(const enum rfkill_type type, bool blocked)
 {
 	struct rfkill *rfkill;
 
-	rfkill_global_states[type].cur = blocked;
+	if (type == RFKILL_TYPE_ALL) {
+		int i;
+
+		for (i = 0; i < NUM_RFKILL_TYPES; i++)
+			rfkill_global_states[i].cur = blocked;
+	} else {
+		rfkill_global_states[type].cur = blocked;
+	}
+
 	list_for_each_entry(rfkill, &rfkill_list, node) {
 		if (rfkill->type != type && type != RFKILL_TYPE_ALL)
 			continue;
-- 
cgit v1.1


From 549cc1c560128d583698ba9a73af283fe87dbab8 Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <mail@maciej.szmigiero.name>
Date: Wed, 2 Sep 2015 19:00:31 +0200
Subject: cfg80211: regulatory: restore proper user alpha2

restore_regulatory_settings() should restore alpha2
as computed in restore_alpha2(), not raw user_alpha2 to
behave as described in the comment just above that code.

This fixes endless loop of calling CRDA for "00" and "97"
countries after resume from suspend on my laptop.

Looks like others had the same problem, too:
http://ath9k-devel.ath9k.narkive.com/knY5W6St/ath9k-and-crda-messages-in-logs
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/899335
https://forum.porteus.org/viewtopic.php?t=4975&p=36436
https://forums.opensuse.org/showthread.php/483356-Authentication-Regulatory-Domain-issues-ath5k-12-2

Signed-off-by: Maciej Szmigiero <mail@maciej.szmigiero.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index b144485..2510b231 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2625,7 +2625,7 @@ static void restore_regulatory_settings(bool reset_user)
 	 * settings, user regulatory settings takes precedence.
 	 */
 	if (is_an_alpha2(alpha2))
-		regulatory_hint_user(user_alpha2, NL80211_USER_REG_HINT_USER);
+		regulatory_hint_user(alpha2, NL80211_USER_REG_HINT_USER);
 
 	spin_lock(&reg_requests_lock);
 	list_splice_tail_init(&tmp_reg_req_list, &reg_requests_list);
-- 
cgit v1.1


From 52a45f38ca5998db0394e782d137595a82a08b43 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Sat, 15 Aug 2015 22:39:53 +0300
Subject: mac80211: avoid VHT usage with no 80MHz chans allowed

Currently if 80MHz channels are not allowed for use, the VHT IE is not
included in the probe request for an AP. This is not good enough if the
AP is configured with the wrong regulatory and supports VHT even where
prohibited or in TDLS scenarios.
Mark the ifmgd with the DISABLE_VHT flag for the misbehaving-AP case, and
unset VHT support from the peer-station entry for the TDLS case.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 16 ++++++++++++++++
 net/mac80211/vht.c  | 15 +++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 705ef1d..cd7e55e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4267,6 +4267,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_supported_band *sband;
 	struct cfg80211_chan_def chandef;
 	int ret;
+	u32 i;
+	bool have_80mhz;
 
 	sband = local->hw.wiphy->bands[cbss->channel->band];
 
@@ -4317,6 +4319,20 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
+	/* Allow VHT if at least one channel on the sband supports 80 MHz */
+	have_80mhz = false;
+	for (i = 0; i < sband->n_channels; i++) {
+		if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED |
+						IEEE80211_CHAN_NO_80MHZ))
+			continue;
+
+		have_80mhz = true;
+		break;
+	}
+
+	if (!have_80mhz)
+		ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+
 	ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
 						     cbss->channel,
 						     ht_cap, ht_oper, vht_oper,
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 834ccdb..ff1c798 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -120,6 +120,7 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
 	struct ieee80211_sta_vht_cap own_cap;
 	u32 cap_info, i;
+	bool have_80mhz;
 
 	memset(vht_cap, 0, sizeof(*vht_cap));
 
@@ -129,6 +130,20 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 	if (!vht_cap_ie || !sband->vht_cap.vht_supported)
 		return;
 
+	/* Allow VHT if at least one channel on the sband supports 80 MHz */
+	have_80mhz = false;
+	for (i = 0; i < sband->n_channels; i++) {
+		if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED |
+						IEEE80211_CHAN_NO_80MHZ))
+			continue;
+
+		have_80mhz = true;
+		break;
+	}
+
+	if (!have_80mhz)
+		return;
+
 	/*
 	 * A VHT STA must support 40 MHz, but if we verify that here
 	 * then we break a few things - some APs (e.g. Netgear R6300v2
-- 
cgit v1.1


From ef9be10c8c999e00b239eec24cf01952a308f8e7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 28 Aug 2015 10:44:20 +0200
Subject: mac80211: reject software RSSI CQM with beacon filtering

When beacon filtering is enabled the mac80211 software implementation
for RSSI CQM cannot work as beacons will not be available. Rather than
accepting such a configuration without proper effect, reject it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 685ec13..17b1fe9 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2468,6 +2468,10 @@ static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy,
 	    rssi_hyst == bss_conf->cqm_rssi_hyst)
 		return 0;
 
+	if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER &&
+	    !(sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI))
+		return -EOPNOTSUPP;
+
 	bss_conf->cqm_rssi_thold = rssi_thold;
 	bss_conf->cqm_rssi_hyst = rssi_hyst;
 
-- 
cgit v1.1


From 25b4a44c19c83d98e8c0807a7ede07c1f28eab8b Mon Sep 17 00:00:00 2001
From: Richard Laing <richard.laing@alliedtelesis.co.nz>
Date: Thu, 3 Sep 2015 13:52:31 +1200
Subject: net/ipv6: Correct PIM6 mrt_lock handling

In the IPv6 multicast routing code the mrt_lock was not being released
correctly in the MFC iterator, as a result adding or deleting a MIF would
cause a hang because the mrt_lock could not be acquired.

This fix is a copy of the code for the IPv4 case and ensures that the lock
is released correctly.

Signed-off-by: Richard Laing <richard.laing@alliedtelesis.co.nz>
Acked-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 74ceb73..5f36266 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -550,7 +550,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 
 	if (it->cache == &mrt->mfc6_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == mrt->mfc6_cache_array)
+	else if (it->cache == &mrt->mfc6_cache_array[it->ct])
 		read_unlock(&mrt_lock);
 }
 
-- 
cgit v1.1


From bd516bd1feeb3890502178a74228ec05271f2b6d Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Thu, 3 Sep 2015 11:30:30 +0530
Subject: net: wan: sbni: fix device usage count

dev_get_by_name() will increment the usage count if the matching device
is found. But we were not decrementing the count if we have got the
device and the device is non-active.

Signed-off-by: Sudip Mukherjee <sudip@vectorindia.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/sbni.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index 758c4ba..8fef8d8 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -1358,6 +1358,8 @@ sbni_ioctl( struct net_device  *dev,  struct ifreq  *ifr,  int  cmd )
 		if( !slave_dev  ||  !(slave_dev->flags & IFF_UP) ) {
 			netdev_err(dev, "trying to enslave non-active device %s\n",
 				   slave_name);
+			if (slave_dev)
+				dev_put(slave_dev);
 			return  -EPERM;
 		}
 
-- 
cgit v1.1


From b65888796b72b677928527b17eeb8e935b767146 Mon Sep 17 00:00:00 2001
From: Sathya Perla <sathya.perla@avagotech.com>
Date: Thu, 3 Sep 2015 07:41:53 -0400
Subject: be2net: Revert "make the RX_FILTER command asynchronous" commit

The be_cmd_rx_filter() routine sends a non-embedded cmd to the FW and used
a pre-allocated dma memory to hold the cmd payload. This worked fine when
this cmd was synchronous. This cmd was changed to asynchronous mode by the
commit 8af65c2f4("make the RX_FILTER command asynchronous"). So now when
there are two quick invocations of this cmd, the 2nd request may end up
overwriting the first request, causing FW cmd corruption.

This patch reverts the offending commit and hence fixes the regression.

Fixes: 8af65c2f4("be2net: make the RX_FILTER command asynchronous")
Signed-off-by: Sathya Perla <sathya.perla@avagotech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 3be1fbd..eb32391 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -1968,7 +1968,7 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value)
 			memcpy(req->mcast_mac[i++].byte, ha->addr, ETH_ALEN);
 	}
 
-	status = be_mcc_notify(adapter);
+	status = be_mcc_notify_wait(adapter);
 err:
 	spin_unlock_bh(&adapter->mcc_lock);
 	return status;
-- 
cgit v1.1


From 0890cf6cb6ab1af650025670b1a839671a9a3fcb Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Thu, 3 Sep 2015 14:04:17 +0200
Subject: switchdev: fix return value of switchdev_port_fdb_dump in case of
 error

switchdev_port_fdb_dump is used as .ndo_fdb_dump. Its return value is
idx, so we cannot return errval.

Fixes: 45d4122ca7cd ("switchdev: add support for fdb add/del/dump via switchdev_port_obj ops.")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Acked-by: Scott Feldman<sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/switchdev/switchdev.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 16c1c43..fda38f8 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -853,12 +853,8 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		.cb = cb,
 		.idx = idx,
 	};
-	int err;
-
-	err = switchdev_port_obj_dump(dev, &dump.obj);
-	if (err)
-		return err;
 
+	switchdev_port_obj_dump(dev, &dump.obj);
 	return dump.idx;
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
-- 
cgit v1.1


From 42ea4457aea7aaeddf0c0b06724f297608f5e9d2 Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <mail@maciej.szmigiero.name>
Date: Thu, 3 Sep 2015 21:38:30 +0200
Subject: net: fec: normalize return value of pm_runtime_get_sync() in MDIO
 write

If fec MDIO write method succeeds its return value comes from
call to pm_runtime_get_sync().
But pm_runtime_get_sync() can also return 1.

In case of Micrel KSZ9031 PHY this value will then
be returned along the call chain of phy_write() ->
ksz9031_extended_write() -> ksz9031_center_flp_timing() ->
ksz9031_config_init() -> phy_init_hw() -> phy_attach_direct() ->
phy_connect_direct().

Then phy_connect() will cast it into a pointer using ERR_PTR(),
which then fec_enet_mii_probe() will try to dereference
resulting in an oops.

Fix it by normalizing return value of pm_runtime_get_sync()
to be zero if positive in MDIO write method.

Fixes: 8fff755e9f8d ("net: fec: Ensure clocks are enabled while using mdio bus")
Signed-off-by: Maciej Szmigiero <mail@maciej.szmigiero.name>
Acked-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 91925e3..6cc3340 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1816,11 +1816,13 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 	struct fec_enet_private *fep = bus->priv;
 	struct device *dev = &fep->pdev->dev;
 	unsigned long time_left;
-	int ret = 0;
+	int ret;
 
 	ret = pm_runtime_get_sync(dev);
 	if (ret < 0)
 		return ret;
+	else
+		ret = 0;
 
 	fep->mii_timeout = 0;
 	reinit_completion(&fep->mdio_done);
-- 
cgit v1.1


From 8f384c0177a03640312b9cb3638c998b32243b63 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Thu, 3 Sep 2015 16:24:52 -0400
Subject: RDS: rds_conn_lookup() should factor in the struct net for a match

Only return a conn if the rds_conn_net(conn) matches the struct
net passed to rds_conn_lookup().

Fixes: 467fa15356ac ("RDS-TCP: Support multiple RDS-TCP listen endpoints,
       one per netns.")

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/connection.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/rds/connection.c b/net/rds/connection.c
index a50e652..9b2de5e 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -70,7 +70,8 @@ static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
 } while (0)
 
 /* rcu read lock must be held or the connection spinlock */
-static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
+static struct rds_connection *rds_conn_lookup(struct net *net,
+					      struct hlist_head *head,
 					      __be32 laddr, __be32 faddr,
 					      struct rds_transport *trans)
 {
@@ -78,7 +79,7 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
 
 	hlist_for_each_entry_rcu(conn, head, c_hash_node) {
 		if (conn->c_faddr == faddr && conn->c_laddr == laddr &&
-				conn->c_trans == trans) {
+		    conn->c_trans == trans && net == rds_conn_net(conn)) {
 			ret = conn;
 			break;
 		}
@@ -132,7 +133,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
 		goto new_conn;
 	rcu_read_lock();
-	conn = rds_conn_lookup(head, laddr, faddr, trans);
+	conn = rds_conn_lookup(net, head, laddr, faddr, trans);
 	if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
 	    laddr == faddr && !is_outgoing) {
 		/* This is a looped back IB connection, and we're
@@ -239,7 +240,7 @@ new_conn:
 		if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
 			found = NULL;
 		else
-			found = rds_conn_lookup(head, laddr, faddr, trans);
+			found = rds_conn_lookup(net, head, laddr, faddr, trans);
 		if (found) {
 			trans->conn_free(conn->c_transport_data);
 			kmem_cache_free(rds_conn_slab, conn);
-- 
cgit v1.1


From 99c79eceb152e2ac7f8a81ff55d4a810f730ec7b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 4 Sep 2015 12:47:28 +0200
Subject: lan78xx: Fix ladv/radv error handling in lan78xx_link_reset()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/usb/lan78xx.c: In function ‘lan78xx_link_reset’:
net/usb/lan78xx.c:1107: warning: comparison is always false due to limited range of data type
net/usb/lan78xx.c:1111: warning: comparison is always false due to limited range of data type

Assigning return values that can be negative error codes to "u16"
variables makes them positive, ignoring the errors.  Hence use "int"
instead.

Drop the "unlikely"s (unlikely considered harmful) and propagate the
actual error values instead of overriding them to -EIO while we're at
it.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 39364a4..a39518f 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1049,8 +1049,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
 {
 	struct mii_if_info *mii = &dev->mii;
 	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
-	u16 ladv, radv;
-	int ret;
+	int ladv, radv, ret;
 	u32 buf;
 
 	/* clear PHY interrupt status */
@@ -1104,12 +1103,12 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
 		}
 
 		ladv = lan78xx_mdio_read(dev->net, mii->phy_id, MII_ADVERTISE);
-		if (unlikely(ladv < 0))
-			return -EIO;
+		if (ladv < 0)
+			return ladv;
 
 		radv = lan78xx_mdio_read(dev->net, mii->phy_id, MII_LPA);
-		if (unlikely(radv < 0))
-			return -EIO;
+		if (radv < 0)
+			return radv;
 
 		netif_dbg(dev, link, dev->net,
 			  "speed: %u duplex: %d anadv: 0x%04x anlpa: 0x%04x",
-- 
cgit v1.1


From 0f1b7354e0d65ad528b820a8a46c15d70954f699 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 4 Sep 2015 12:49:32 +0200
Subject: vxlan: Refactor vxlan_udp_encap_recv() to kill compiler warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drivers/net/vxlan.c: In function ‘vxlan_udp_encap_recv’:
drivers/net/vxlan.c:1226: warning: ‘info’ may be used uninitialized in this function

While this warning is a false positive, it can be killed easily by
getting rid of the pointer intermediary and referring directly to the
ip_tunnel_info structure.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index ce988fd..cf8b7f0 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1223,7 +1223,6 @@ drop:
 static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct metadata_dst *tun_dst = NULL;
-	struct ip_tunnel_info *info;
 	struct vxlan_sock *vs;
 	struct vxlanhdr *vxh;
 	u32 flags, vni;
@@ -1270,8 +1269,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		if (!tun_dst)
 			goto drop;
 
-		info = &tun_dst->u.tun_info;
-		md = ip_tunnel_info_opts(info);
+		md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
 	} else {
 		memset(md, 0, sizeof(*md));
 	}
@@ -1286,7 +1284,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		md->gbp = ntohs(gbp->policy_id);
 
 		if (tun_dst)
-			info->key.tun_flags |= TUNNEL_VXLAN_OPT;
+			tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
 
 		if (gbp->dont_learn)
 			md->gbp |= VXLAN_GBP_DONT_LEARN;
-- 
cgit v1.1


From e5a5837da756b4826d40636239718eb8f76775fd Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 4 Sep 2015 14:44:12 +0200
Subject: ethernet: synopsys: SYNOPSYS_DWC_ETH_QOS should depend on HAS_DMA

If NO_DMA=y:

    ERROR: "dma_alloc_coherent" [drivers/net/ethernet/synopsys/dwc_eth_qos.ko] undefined!
    ERROR: "dma_free_coherent" [drivers/net/ethernet/synopsys/dwc_eth_qos.ko] undefined!
    ERROR: "dma_unmap_single" [drivers/net/ethernet/synopsys/dwc_eth_qos.ko] undefined!
    ERROR: "dma_map_page" [drivers/net/ethernet/synopsys/dwc_eth_qos.ko] undefined!
    ERROR: "dma_mapping_error" [drivers/net/ethernet/synopsys/dwc_eth_qos.ko] undefined!
    ERROR: "dma_map_single" [drivers/net/ethernet/synopsys/dwc_eth_qos.ko] undefined!

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Lars Persson <larper@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/synopsys/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/synopsys/Kconfig b/drivers/net/ethernet/synopsys/Kconfig
index a8f3151..8276ee5 100644
--- a/drivers/net/ethernet/synopsys/Kconfig
+++ b/drivers/net/ethernet/synopsys/Kconfig
@@ -20,7 +20,7 @@ config SYNOPSYS_DWC_ETH_QOS
 	select PHYLIB
 	select CRC32
 	select MII
-	depends on OF
+	depends on OF && HAS_DMA
 	---help---
 	  This driver supports the DWC Ethernet QoS from Synopsys
 
-- 
cgit v1.1


From 585e7e1a544c5b13b2a0014c23f3cb6622e8c995 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Fri, 4 Sep 2015 11:22:24 -0400
Subject: net: dsa: mv88e6171: add hardware 802.1Q support

The Marvell 88E6171 switch is in the 88E6351 family, which supports
802.1Q, thus add support from the generic mv88e6xxx functions.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6171.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c
index d54b740..c2daaf0 100644
--- a/drivers/net/dsa/mv88e6171.c
+++ b/drivers/net/dsa/mv88e6171.c
@@ -117,6 +117,11 @@ struct dsa_switch_driver mv88e6171_switch_driver = {
 	.port_join_bridge       = mv88e6xxx_join_bridge,
 	.port_leave_bridge      = mv88e6xxx_leave_bridge,
 	.port_stp_update        = mv88e6xxx_port_stp_update,
+	.port_pvid_get		= mv88e6xxx_port_pvid_get,
+	.port_pvid_set		= mv88e6xxx_port_pvid_set,
+	.port_vlan_add		= mv88e6xxx_port_vlan_add,
+	.port_vlan_del		= mv88e6xxx_port_vlan_del,
+	.vlan_getnext		= mv88e6xxx_vlan_getnext,
 	.port_fdb_add		= mv88e6xxx_port_fdb_add,
 	.port_fdb_del		= mv88e6xxx_port_fdb_del,
 	.port_fdb_getnext	= mv88e6xxx_port_fdb_getnext,
-- 
cgit v1.1


From f88f69dd17f150e2abcc7e2d95f895f2546fa381 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Fri, 4 Sep 2015 13:07:40 -0700
Subject: openvswitch: Remove conntrack Kconfig option.

There's no particular desire to have conntrack action support in Open
vSwitch as an independently configurable bit, rather just to ensure
there is not a hard dependency. This exposed option doesn't accurately
reflect the conntrack dependency when enabled, so simplify this by
removing the option. Compile the support if NF_CONNTRACK is enabled.

Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/Kconfig     | 12 +-----------
 net/openvswitch/Makefile    |  4 +++-
 net/openvswitch/conntrack.h |  4 ++--
 3 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index af7cdef..2a071f4 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -5,6 +5,7 @@
 config OPENVSWITCH
 	tristate "Open vSwitch"
 	depends on INET
+	depends on (!NF_CONNTRACK || NF_CONNTRACK)
 	select LIBCRC32C
 	select MPLS
 	select NET_MPLS_GSO
@@ -31,17 +32,6 @@ config OPENVSWITCH
 
 	  If unsure, say N.
 
-config OPENVSWITCH_CONNTRACK
-	bool "Open vSwitch conntrack action support"
-	depends on OPENVSWITCH
-	depends on NF_CONNTRACK
-	default OPENVSWITCH
-	---help---
-	  If you say Y here, then Open vSwitch module will be able to pass
-	  packets through conntrack.
-
-	  Say N to exclude this support and reduce the binary size.
-
 config OPENVSWITCH_GRE
 	tristate "Open vSwitch GRE tunneling support"
 	depends on OPENVSWITCH
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 5b5913b..60f8090 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -15,7 +15,9 @@ openvswitch-y := \
 	vport-internal_dev.o \
 	vport-netdev.o
 
-openvswitch-$(CONFIG_OPENVSWITCH_CONNTRACK) += conntrack.o
+ifneq ($(CONFIG_NF_CONNTRACK),)
+openvswitch-y += conntrack.o
+endif
 
 obj-$(CONFIG_OPENVSWITCH_VXLAN)+= vport-vxlan.o
 obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index 3cb3066..43f5dd7 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -19,7 +19,7 @@
 struct ovs_conntrack_info;
 enum ovs_key_attr;
 
-#if defined(CONFIG_OPENVSWITCH_CONNTRACK)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 void ovs_ct_init(struct net *);
 void ovs_ct_exit(struct net *);
 bool ovs_ct_verify(struct net *, enum ovs_key_attr attr);
@@ -82,5 +82,5 @@ static inline int ovs_ct_put_key(const struct sw_flow_key *key,
 }
 
 static inline void ovs_ct_free_action(const struct nlattr *a) { }
-#endif
+#endif /* CONFIG_NF_CONNTRACK */
 #endif /* ovs_conntrack.h */
-- 
cgit v1.1


From bd1a05ee98b06c9a20138c45f96ccfddf3163f93 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Thu, 3 Sep 2015 23:22:16 +0300
Subject: fixed_phy: pass 'irq' to fixed_phy_add()

I've noticed  that fixed_phy_register() ignores its 'irq' parameter instead of
passing it to fixed_phy_add(). Luckily, fixed_phy_register()  seems to  always
be  called with PHY_POLL  for 'irq'... :-)

Fixes: a75951217472 ("net: phy: extend fixed driver with fixed_phy_register()")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/fixed_phy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
index 12c7eb2..fb1299c 100644
--- a/drivers/net/phy/fixed_phy.c
+++ b/drivers/net/phy/fixed_phy.c
@@ -325,7 +325,7 @@ struct phy_device *fixed_phy_register(unsigned int irq,
 	phy_addr = phy_fixed_addr++;
 	spin_unlock(&phy_fixed_addr_lock);
 
-	ret = fixed_phy_add(PHY_POLL, phy_addr, status, link_gpio);
+	ret = fixed_phy_add(irq, phy_addr, status, link_gpio);
 	if (ret < 0)
 		return ERR_PTR(ret);
 
-- 
cgit v1.1


From 46cdc9be0841b30ba612aec1878cb746faf280a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?fran=C3=A7ois=20romieu?= <romieu@fr.zoreil.com>
Date: Fri, 4 Sep 2015 23:05:42 +0200
Subject: cxgb4: fix usage of uninitialized variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c: In function ‘init_one’:
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:4579:8: warning: ‘chip’ may be used uninitialized in this function [-Wmaybe-uninitialized]
   chip |= CHELSIO_CHIP_CODE(CHELSIO_T4, pl_rev);
        ^
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:4571:11: note: ‘chip’ was declared here
  int ver, chip;
           ^

Fixes: d86bd29e0b31 ("cxgb4/cxgb4vf: read the correct bits of PL Who Am I register")
Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
Cc: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index eb22d58..592a4d6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4568,28 +4568,23 @@ static void free_some_resources(struct adapter *adapter)
 
 static int get_chip_type(struct pci_dev *pdev, u32 pl_rev)
 {
-	int ver, chip;
 	u16 device_id;
 
 	/* Retrieve adapter's device ID */
 	pci_read_config_word(pdev, PCI_DEVICE_ID, &device_id);
-	ver = device_id >> 12;
-	switch (ver) {
+
+	switch (device_id >> 12) {
 	case CHELSIO_T4:
-		chip |= CHELSIO_CHIP_CODE(CHELSIO_T4, pl_rev);
-		break;
+		return CHELSIO_CHIP_CODE(CHELSIO_T4, pl_rev);
 	case CHELSIO_T5:
-		chip |= CHELSIO_CHIP_CODE(CHELSIO_T5, pl_rev);
-		break;
+		return CHELSIO_CHIP_CODE(CHELSIO_T5, pl_rev);
 	case CHELSIO_T6:
-		chip |= CHELSIO_CHIP_CODE(CHELSIO_T6, pl_rev);
-		break;
+		return CHELSIO_CHIP_CODE(CHELSIO_T6, pl_rev);
 	default:
 		dev_err(&pdev->dev, "Device %d is not supported\n",
 			device_id);
-		return -EINVAL;
 	}
-	return chip;
+	return -EINVAL;
 }
 
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
-- 
cgit v1.1


From fcb0bb6aab256288a4e0a8650d26e4096ec30319 Mon Sep 17 00:00:00 2001
From: Eugene Shatokhin <eugene.shatokhin@rosalab.ru>
Date: Tue, 1 Sep 2015 17:05:33 +0300
Subject: usbnet: Fix a race between usbnet_stop() and the BH
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The race may happen when a device (e.g. YOTA 4G LTE Modem) is
unplugged while the system is downloading a large file from the Net.

Hardware breakpoints and Kprobes with delays were used to confirm that
the race does actually happen.

The race is on skb_queue ('next' pointer) between usbnet_stop()
and rx_complete(), which, in turn, calls usbnet_bh().

Here is a part of the call stack with the code where the changes to the
queue happen. The line numbers are for the kernel 4.1.0:

*0 __skb_unlink (skbuff.h:1517)
    prev->next = next;
*1 defer_bh (usbnet.c:430)
    spin_lock_irqsave(&list->lock, flags);
    old_state = entry->state;
    entry->state = state;
    __skb_unlink(skb, list);
    spin_unlock(&list->lock);
    spin_lock(&dev->done.lock);
    __skb_queue_tail(&dev->done, skb);
    if (dev->done.qlen == 1)
        tasklet_schedule(&dev->bh);
    spin_unlock_irqrestore(&dev->done.lock, flags);
*2 rx_complete (usbnet.c:640)
    state = defer_bh(dev, skb, &dev->rxq, state);

At the same time, the following code repeatedly checks if the queue is
empty and reads these values concurrently with the above changes:

*0  usbnet_terminate_urbs (usbnet.c:765)
    /* maybe wait for deletions to finish. */
    while (!skb_queue_empty(&dev->rxq)
        && !skb_queue_empty(&dev->txq)
        && !skb_queue_empty(&dev->done)) {
            schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
            set_current_state(TASK_UNINTERRUPTIBLE);
            netif_dbg(dev, ifdown, dev->net,
                  "waited for %d urb completions\n", temp);
    }
*1  usbnet_stop (usbnet.c:806)
    if (!(info->flags & FLAG_AVOID_UNLINK_URBS))
        usbnet_terminate_urbs(dev);

As a result, it is possible, for example, that the skb is removed from
dev->rxq by __skb_unlink() before the check
"!skb_queue_empty(&dev->rxq)" in usbnet_terminate_urbs() is made. It is
also possible in this case that the skb is added to dev->done queue
after "!skb_queue_empty(&dev->done)" is checked. So
usbnet_terminate_urbs() may stop waiting and return while dev->done
queue still has an item.

Locking in defer_bh() and usbnet_terminate_urbs() was revisited to avoid
this race.

Signed-off-by: Eugene Shatokhin <eugene.shatokhin@rosalab.ru>
Reviewed-by: Bjørn Mork <bjorn@mork.no>
Acked-by: Oliver Neukum <oneukum@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c | 39 ++++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index e049857..b4cf107 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -428,12 +428,18 @@ static enum skb_state defer_bh(struct usbnet *dev, struct sk_buff *skb,
 	old_state = entry->state;
 	entry->state = state;
 	__skb_unlink(skb, list);
-	spin_unlock(&list->lock);
-	spin_lock(&dev->done.lock);
+
+	/* defer_bh() is never called with list == &dev->done.
+	 * spin_lock_nested() tells lockdep that it is OK to take
+	 * dev->done.lock here with list->lock held.
+	 */
+	spin_lock_nested(&dev->done.lock, SINGLE_DEPTH_NESTING);
+
 	__skb_queue_tail(&dev->done, skb);
 	if (dev->done.qlen == 1)
 		tasklet_schedule(&dev->bh);
-	spin_unlock_irqrestore(&dev->done.lock, flags);
+	spin_unlock(&dev->done.lock);
+	spin_unlock_irqrestore(&list->lock, flags);
 	return old_state;
 }
 
@@ -749,6 +755,20 @@ EXPORT_SYMBOL_GPL(usbnet_unlink_rx_urbs);
 
 /*-------------------------------------------------------------------------*/
 
+static void wait_skb_queue_empty(struct sk_buff_head *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&q->lock, flags);
+	while (!skb_queue_empty(q)) {
+		spin_unlock_irqrestore(&q->lock, flags);
+		schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		spin_lock_irqsave(&q->lock, flags);
+	}
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+
 // precondition: never called in_interrupt
 static void usbnet_terminate_urbs(struct usbnet *dev)
 {
@@ -762,14 +782,11 @@ static void usbnet_terminate_urbs(struct usbnet *dev)
 		unlink_urbs(dev, &dev->rxq);
 
 	/* maybe wait for deletions to finish. */
-	while (!skb_queue_empty(&dev->rxq)
-		&& !skb_queue_empty(&dev->txq)
-		&& !skb_queue_empty(&dev->done)) {
-			schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			netif_dbg(dev, ifdown, dev->net,
-				  "waited for %d urb completions\n", temp);
-	}
+	wait_skb_queue_empty(&dev->rxq);
+	wait_skb_queue_empty(&dev->txq);
+	wait_skb_queue_empty(&dev->done);
+	netif_dbg(dev, ifdown, dev->net,
+		  "waited for %d urb completions\n", temp);
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&dev->wait, &wait);
 }
-- 
cgit v1.1


From 5b902d6f97f573fde911338e5d943e6b07fac7f9 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@citrix.com>
Date: Thu, 3 Sep 2015 23:59:50 +0100
Subject: device property: Don't overwrite addr when failing in
 device_get_mac_address

The function device_get_mac_address is trying different property names
in order to get the mac address. To check the return value, the variable
addr (which contain the buffer pass by the caller) will be re-used. This
means that if the previous property is not found, the next property will
be read using a NULL buffer.

Therefore it's only possible to retrieve the mac if node contains a
property "mac-address". Fix it by using a temporary buffer for the
return value.

This has been introduced by commit 4c96b7dc0d393f12c17e0d81db15aa4a820a6ab3
"Add a matching set of device_ functions for determining mac/phy"

Signed-off-by: Julien Grall <julien.grall@citrix.com>
Cc: Jeremy Linton <jeremy.linton@arm.com>
Cc: David S. Miller <davem@davemloft.net>
Reviewed-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/base/property.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/base/property.c b/drivers/base/property.c
index ff03f23..2d75366 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -611,13 +611,15 @@ static void *device_get_mac_addr(struct device *dev,
 */
 void *device_get_mac_address(struct device *dev, char *addr, int alen)
 {
-	addr = device_get_mac_addr(dev, "mac-address", addr, alen);
-	if (addr)
-		return addr;
+	char *res;
 
-	addr = device_get_mac_addr(dev, "local-mac-address", addr, alen);
-	if (addr)
-		return addr;
+	res = device_get_mac_addr(dev, "mac-address", addr, alen);
+	if (res)
+		return res;
+
+	res = device_get_mac_addr(dev, "local-mac-address", addr, alen);
+	if (res)
+		return res;
 
 	return device_get_mac_addr(dev, "address", addr, alen);
 }
-- 
cgit v1.1


From 39797a279d62972cd914ef580fdfacb13e508bf8 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sat, 5 Sep 2015 13:07:27 -0700
Subject: net: dsa: bcm_sf2: Fix ageing conditions and operation

The comparison check between cur_hw_state and hw_state is currently
invalid because cur_hw_state is right shifted by G_MISTP_SHIFT, while
hw_state is not, so we end-up comparing bits 2:0 with bits 7:5, which is
going to cause an additional aging to occur. Fix this by not shifting
cur_hw_state while reading it, but instead, mask the value with the
appropriately shitfted bitmask.

The other problem with the fast-ageing process is that we did not set
the EN_AGE_DYNAMIC bit to request the ageing to occur for dynamically
learned MAC addresses. Finally, write back 0 to the FAST_AGE_CTRL
register to avoid leaving spurious bits sets from one operation to the
other.

Fixes: 12f460f23423 ("net: dsa: bcm_sf2: add HW bridging support")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 289e2044..9d56515 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -418,7 +418,7 @@ static int bcm_sf2_sw_fast_age_port(struct dsa_switch  *ds, int port)
 	core_writel(priv, port, CORE_FAST_AGE_PORT);
 
 	reg = core_readl(priv, CORE_FAST_AGE_CTRL);
-	reg |= EN_AGE_PORT | FAST_AGE_STR_DONE;
+	reg |= EN_AGE_PORT | EN_AGE_DYNAMIC | FAST_AGE_STR_DONE;
 	core_writel(priv, reg, CORE_FAST_AGE_CTRL);
 
 	do {
@@ -432,6 +432,8 @@ static int bcm_sf2_sw_fast_age_port(struct dsa_switch  *ds, int port)
 	if (!timeout)
 		return -ETIMEDOUT;
 
+	core_writel(priv, 0, CORE_FAST_AGE_CTRL);
+
 	return 0;
 }
 
@@ -507,7 +509,7 @@ static int bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port,
 	u32 reg;
 
 	reg = core_readl(priv, CORE_G_PCTL_PORT(port));
-	cur_hw_state = reg >> G_MISTP_STATE_SHIFT;
+	cur_hw_state = reg & (G_MISTP_STATE_MASK << G_MISTP_STATE_SHIFT);
 
 	switch (state) {
 	case BR_STATE_DISABLED:
@@ -531,10 +533,12 @@ static int bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port,
 	}
 
 	/* Fast-age ARL entries if we are moving a port from Learning or
-	 * Forwarding state to Disabled, Blocking or Listening state
+	 * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening
+	 * state (hw_state)
 	 */
 	if (cur_hw_state != hw_state) {
-		if (cur_hw_state & 4 && !(hw_state & 4)) {
+		if (cur_hw_state >= G_MISTP_LEARN_STATE &&
+		    hw_state <= G_MISTP_LISTEN_STATE) {
 			ret = bcm_sf2_sw_fast_age_port(ds, port);
 			if (ret) {
 				pr_err("%s: fast-ageing failed\n", __func__);
-- 
cgit v1.1


From bf361ad38165939049a2649b1a0078f3268d4bd1 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Sat, 5 Sep 2015 21:27:57 -0400
Subject: net: bridge: check __vlan_vid_del for error

Since __vlan_del can return an error code, change its inner function
__vlan_vid_del to return an eventual error from switchdev_port_obj_del.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_vlan.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 3cd8cc9..5f5a02b 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -117,10 +117,11 @@ out_filt:
 	return err;
 }
 
-static void __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
-			   u16 vid)
+static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
+			  u16 vid)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
+	int err = 0;
 
 	/* If driver uses VLAN ndo ops, use 8021q to delete vid
 	 * on device, otherwise try switchdev ops to delete vid.
@@ -137,8 +138,12 @@ static void __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
 			},
 		};
 
-		switchdev_port_obj_del(dev, &vlan_obj);
+		err = switchdev_port_obj_del(dev, &vlan_obj);
+		if (err == -EOPNOTSUPP)
+			err = 0;
 	}
+
+	return err;
 }
 
 static int __vlan_del(struct net_port_vlans *v, u16 vid)
@@ -151,7 +156,11 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
 
 	if (v->port_idx) {
 		struct net_bridge_port *p = v->parent.port;
-		__vlan_vid_del(p->dev, p->br, vid);
+		int err;
+
+		err = __vlan_vid_del(p->dev, p->br, vid);
+		if (err)
+			return err;
 	}
 
 	clear_bit(vid, v->vlan_bitmap);
-- 
cgit v1.1


From 7a577f013d6745c800a11a2911ddc9a3214e7f09 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Sat, 5 Sep 2015 21:49:41 -0400
Subject: net: bridge: remove unnecessary switchdev include

Remove the unnecessary switchdev.h include from br_netlink.c.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index af5e187..ea748c9 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -16,7 +16,6 @@
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
-#include <net/switchdev.h>
 #include <uapi/linux/if_bridge.h>
 
 #include "br_private.h"
-- 
cgit v1.1


From 7b9018738e43c7c7693632174c69fde83b8edc07 Mon Sep 17 00:00:00 2001
From: Barry Song <Baohua.Song@csr.com>
Date: Mon, 7 Sep 2015 03:15:20 +0000
Subject: dm9000: fix a typo

Signed-off-by: Barry Song <Baohua.Song@csr.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/davicom/dm9000.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index c0a7813..cf94b72 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1226,7 +1226,7 @@ static irqreturn_t dm9000_interrupt(int irq, void *dev_id)
 	if (int_status & ISR_PRS)
 		dm9000_rx(dev);
 
-	/* Trnasmit Interrupt check */
+	/* Transmit Interrupt check */
 	if (int_status & ISR_PTS)
 		dm9000_tx_done(dev, db);
 
-- 
cgit v1.1


From 7845989cb4b3da1db903918c844fccb9817d34a0 Mon Sep 17 00:00:00 2001
From: Kolmakov Dmitriy <kolmakov.dmitriy@huawei.com>
Date: Mon, 7 Sep 2015 09:05:48 +0000
Subject: net: tipc: fix stall during bclink wakeup procedure

If an attempt to wake up users of broadcast link is made when there is
no enough place in send queue than it may hang up inside the
tipc_sk_rcv() function since the loop breaks only after the wake up
queue becomes empty. This can lead to complete CPU stall with the
following message generated by RCU:

INFO: rcu_sched self-detected stall on CPU { 0}  (t=2101 jiffies
					g=54225 c=54224 q=11465)
Task dump for CPU 0:
tpch            R  running task        0 39949  39948 0x0000000a
 ffffffff818536c0 ffff88181fa037a0 ffffffff8106a4be 0000000000000000
 ffffffff818536c0 ffff88181fa037c0 ffffffff8106d8a8 ffff88181fa03800
 0000000000000001 ffff88181fa037f0 ffffffff81094a50 ffff88181fa15680
Call Trace:
 <IRQ>  [<ffffffff8106a4be>] sched_show_task+0xae/0x120
 [<ffffffff8106d8a8>] dump_cpu_task+0x38/0x40
 [<ffffffff81094a50>] rcu_dump_cpu_stacks+0x90/0xd0
 [<ffffffff81097c3b>] rcu_check_callbacks+0x3eb/0x6e0
 [<ffffffff8106e53f>] ? account_system_time+0x7f/0x170
 [<ffffffff81099e64>] update_process_times+0x34/0x60
 [<ffffffff810a84d1>] tick_sched_handle.isra.18+0x31/0x40
 [<ffffffff810a851c>] tick_sched_timer+0x3c/0x70
 [<ffffffff8109a43d>] __run_hrtimer.isra.34+0x3d/0xc0
 [<ffffffff8109aa95>] hrtimer_interrupt+0xc5/0x1e0
 [<ffffffff81030d52>] ? native_smp_send_reschedule+0x42/0x60
 [<ffffffff81032f04>] local_apic_timer_interrupt+0x34/0x60
 [<ffffffff810335bc>] smp_apic_timer_interrupt+0x3c/0x60
 [<ffffffff8165a3fb>] apic_timer_interrupt+0x6b/0x70
 [<ffffffff81659129>] ? _raw_spin_unlock_irqrestore+0x9/0x10
 [<ffffffff8107eb9f>] __wake_up_sync_key+0x4f/0x60
 [<ffffffffa313ddd1>] tipc_write_space+0x31/0x40 [tipc]
 [<ffffffffa313dadf>] filter_rcv+0x31f/0x520 [tipc]
 [<ffffffffa313d699>] ? tipc_sk_lookup+0xc9/0x110 [tipc]
 [<ffffffff81659259>] ? _raw_spin_lock_bh+0x19/0x30
 [<ffffffffa314122c>] tipc_sk_rcv+0x2dc/0x3e0 [tipc]
 [<ffffffffa312e7ff>] tipc_bclink_wakeup_users+0x2f/0x40 [tipc]
 [<ffffffffa313ce26>] tipc_node_unlock+0x186/0x190 [tipc]
 [<ffffffff81597c1c>] ? kfree_skb+0x2c/0x40
 [<ffffffffa313475c>] tipc_rcv+0x2ac/0x8c0 [tipc]
 [<ffffffffa312ff58>] tipc_l2_rcv_msg+0x38/0x50 [tipc]
 [<ffffffff815a76d3>] __netif_receive_skb_core+0x5a3/0x950
 [<ffffffff815a98d3>] __netif_receive_skb+0x13/0x60
 [<ffffffff815a993e>] netif_receive_skb_internal+0x1e/0x90
 [<ffffffff815aa138>] napi_gro_receive+0x78/0xa0
 [<ffffffffa07f93f4>] tg3_poll_work+0xc54/0xf40 [tg3]
 [<ffffffff81597c8c>] ? consume_skb+0x2c/0x40
 [<ffffffffa07f9721>] tg3_poll_msix+0x41/0x160 [tg3]
 [<ffffffff815ab0f2>] net_rx_action+0xe2/0x290
 [<ffffffff8104b92a>] __do_softirq+0xda/0x1f0
 [<ffffffff8104bc26>] irq_exit+0x76/0xa0
 [<ffffffff81004355>] do_IRQ+0x55/0xf0
 [<ffffffff8165a12b>] common_interrupt+0x6b/0x6b
 <EOI>

The issue occurs only when tipc_sk_rcv() is used to wake up postponed
senders:

	tipc_bclink_wakeup_users()
		// wakeupq - is a queue which consists of special
		// 		 messages with SOCK_WAKEUP type.
		tipc_sk_rcv(wakeupq)
			...
			while (skb_queue_len(inputq)) {
				filter_rcv(skb)
					// Here the type of message is checked
					// and if it is SOCK_WAKEUP then
					// it tries to wake up a sender.
					tipc_write_space(sk)
						wake_up_interruptible_sync_poll()
			}

After the sender thread is woke up it can gather control and perform
an attempt to send a message. But if there is no enough place in send
queue it will call link_schedule_user() function which puts a message
of type SOCK_WAKEUP to the wakeup queue and put the sender to sleep.
Thus the size of the queue actually is not changed and the while()
loop never exits.

The approach I proposed is to wake up only senders for which there is
enough place in send queue so the described issue can't occur.
Moreover the same approach is already used to wake up senders on
unicast links.

I have got into the issue on our product code but to reproduce the
issue I changed a benchmark test application (from
tipcutils/demos/benchmark) to perform the following scenario:
	1. Run 64 instances of test application (nodes). It can be done
	   on the one physical machine.
	2. Each application connects to all other using TIPC sockets in
	   RDM mode.
	3. When setup is done all nodes start simultaneously send
	   broadcast messages.
	4. Everything hangs up.

The issue is reproducible only when a congestion on broadcast link
occurs. For example, when there are only 8 nodes it works fine since
congestion doesn't occur. Send queue limit is 40 in my case (I use a
critical importance level) and when 64 nodes send a message at the
same moment a congestion occurs every time.

Signed-off-by: Dmitry S Kolmakov <kolmakov.dmitriy@huawei.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 8b010c9..41042de 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -170,6 +170,30 @@ static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to)
 }
 
 /**
+ * bclink_prepare_wakeup - prepare users for wakeup after congestion
+ * @bcl: broadcast link
+ * @resultq: queue for users which can be woken up
+ * Move a number of waiting users, as permitted by available space in
+ * the send queue, from link wait queue to specified queue for wakeup
+ */
+static void bclink_prepare_wakeup(struct tipc_link *bcl, struct sk_buff_head *resultq)
+{
+	int pnd[TIPC_SYSTEM_IMPORTANCE + 1] = {0,};
+	int imp, lim;
+	struct sk_buff *skb, *tmp;
+
+	skb_queue_walk_safe(&bcl->wakeupq, skb, tmp) {
+		imp = TIPC_SKB_CB(skb)->chain_imp;
+		lim = bcl->window + bcl->backlog[imp].limit;
+		pnd[imp] += TIPC_SKB_CB(skb)->chain_sz;
+		if ((pnd[imp] + bcl->backlog[imp].len) >= lim)
+			continue;
+		skb_unlink(skb, &bcl->wakeupq);
+		skb_queue_tail(resultq, skb);
+	}
+}
+
+/**
  * tipc_bclink_wakeup_users - wake up pending users
  *
  * Called with no locks taken
@@ -177,8 +201,12 @@ static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to)
 void tipc_bclink_wakeup_users(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_link *bcl = tn->bcl;
+	struct sk_buff_head resultq;
 
-	tipc_sk_rcv(net, &tn->bclink->link.wakeupq);
+	skb_queue_head_init(&resultq);
+	bclink_prepare_wakeup(bcl, &resultq);
+	tipc_sk_rcv(net, &resultq);
 }
 
 /**
-- 
cgit v1.1


From d43cefcd68bbc9a67b2c0efe38eb9cf6b5170fe8 Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <nemoto@toshiba-tops.co.jp>
Date: Tue, 8 Sep 2015 18:15:41 +0900
Subject: net: eth: altera: Fix the initial device operstate

Call netif_carrier_off() prior to register_netdev(), otherwise
userspace can see incorrect link state.

Signed-off-by: Atsushi Nemoto <nemoto@toshiba-tops.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/altera/altera_tse_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index 8207877..fe64482 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -1517,6 +1517,7 @@ static int altera_tse_probe(struct platform_device *pdev)
 	spin_lock_init(&priv->tx_lock);
 	spin_lock_init(&priv->rxdma_irq_lock);
 
+	netif_carrier_off(ndev);
 	ret = register_netdev(ndev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to register TSE net device\n");
-- 
cgit v1.1


From fd1754fb8afbd9cf4ea279d533414aa6577b7e60 Mon Sep 17 00:00:00 2001
From: Hariprasad Shenai <hariprasad@chelsio.com>
Date: Tue, 8 Sep 2015 16:25:39 +0530
Subject: cxgb4: Fix tx flit calculation

In commit 0aac3f56d4a63f04 ("cxgb4: Add comment for calculate tx flits
and sge length code") introduced a regression where tx flit calculation
is going wrong, which can lead to data corruption, hang, stall and
write-combining failure. Fixing it.

Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/sge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 78f446c..9162746 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -807,7 +807,7 @@ static inline unsigned int calc_tx_flits(const struct sk_buff *skb)
 	 * message or, if we're doing a Large Send Offload, an LSO CPL message
 	 * with an embedded TX Packet Write CPL message.
 	 */
-	flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 4;
+	flits = sgl_len(skb_shinfo(skb)->nr_frags + 1);
 	if (skb_shinfo(skb)->gso_size)
 		flits += (sizeof(struct fw_eth_tx_pkt_wr) +
 			  sizeof(struct cpl_tx_pkt_lso_core) +
-- 
cgit v1.1


From 2a485cf7db2815ca0d1510143d9126c4475aab39 Mon Sep 17 00:00:00 2001
From: Hariprasad Shenai <hariprasad@chelsio.com>
Date: Tue, 8 Sep 2015 16:25:40 +0530
Subject: cxgb4: Fix for write-combining stats configuration

The write-combining configuration register SGE_STAT_CFG_A needs to
be configured after FW initializes the adapter, else FW will reset
the configuration

Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 592a4d6..f5dcde2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4719,8 +4719,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			err = -ENOMEM;
 			goto out_free_adapter;
 		}
-		t4_write_reg(adapter, SGE_STAT_CFG_A,
-			     STATSOURCE_T5_V(7) | STATMODE_V(0));
 	}
 
 	setup_memwin(adapter);
@@ -4732,6 +4730,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto out_unmap_bar;
 
+	/* configure SGE_STAT_CFG_A to read WC stats */
+	if (!is_t4(adapter->params.chip))
+		t4_write_reg(adapter, SGE_STAT_CFG_A,
+			     STATSOURCE_T5_V(7) | STATMODE_V(0));
+
 	for_each_port(adapter, i) {
 		struct net_device *netdev;
 
-- 
cgit v1.1


From 1d5d48523900a4b0f25d6b52f1a93c84bd671186 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Tue, 8 Sep 2015 14:25:14 +0100
Subject: xen-netback: require fewer guest Rx slots when not using GSO

Commit f48da8b14d04ca87ffcffe68829afd45f926ec6a (xen-netback: fix
unlimited guest Rx internal queue and carrier flapping) introduced a
regression.

The PV frontend in IPXE only places 4 requests on the guest Rx ring.
Since netback required at least (MAX_SKB_FRAGS + 1) slots, IPXE could
not receive any packets.

a) If GSO is not enabled on the VIF, fewer guest Rx slots are required
   for the largest possible packet.  Calculate the required slots
   based on the maximum GSO size or the MTU.

   This calculation of the number of required slots relies on
   1650d5455bd2 (xen-netback: always fully coalesce guest Rx packets)
   which present in 4.0-rc1 and later.

b) Reduce the Rx stall detection to checking for at least one
   available Rx request.  This is fine since we're predominately
   concerned with detecting interfaces which are down and thus have
   zero available Rx requests.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/common.h  | 10 ----------
 drivers/net/xen-netback/netback.c | 23 ++++++++++++++++-------
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 6dc76c1..a7bf747 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -200,11 +200,6 @@ struct xenvif_queue { /* Per-queue data for xenvif */
 	struct xenvif_stats stats;
 };
 
-/* Maximum number of Rx slots a to-guest packet may use, including the
- * slot needed for GSO meta-data.
- */
-#define XEN_NETBK_RX_SLOTS_MAX (MAX_SKB_FRAGS + 1)
-
 enum state_bit_shift {
 	/* This bit marks that the vif is connected */
 	VIF_STATUS_CONNECTED,
@@ -317,11 +312,6 @@ int xenvif_dealloc_kthread(void *data);
 
 void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
 
-/* Determine whether the needed number of slots (req) are available,
- * and set req_event if not.
- */
-bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed);
-
 void xenvif_carrier_on(struct xenvif *vif);
 
 /* Callback from stack when TX packet can be released */
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 42569b9..b588b1a 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -149,9 +149,20 @@ static inline pending_ring_idx_t pending_index(unsigned i)
 	return i & (MAX_PENDING_REQS-1);
 }
 
-bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed)
+static int xenvif_rx_ring_slots_needed(struct xenvif *vif)
+{
+	if (vif->gso_mask)
+		return DIV_ROUND_UP(vif->dev->gso_max_size, PAGE_SIZE) + 1;
+	else
+		return DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
+}
+
+static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
 {
 	RING_IDX prod, cons;
+	int needed;
+
+	needed = xenvif_rx_ring_slots_needed(queue->vif);
 
 	do {
 		prod = queue->rx.sring->req_prod;
@@ -513,7 +524,7 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
 
 	skb_queue_head_init(&rxq);
 
-	while (xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)
+	while (xenvif_rx_ring_slots_available(queue)
 	       && (skb = xenvif_rx_dequeue(queue)) != NULL) {
 		queue->last_rx_time = jiffies;
 
@@ -1938,8 +1949,7 @@ static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
 	prod = queue->rx.sring->req_prod;
 	cons = queue->rx.req_cons;
 
-	return !queue->stalled
-		&& prod - cons < XEN_NETBK_RX_SLOTS_MAX
+	return !queue->stalled && prod - cons < 1
 		&& time_after(jiffies,
 			      queue->last_rx_time + queue->vif->stall_timeout);
 }
@@ -1951,14 +1961,13 @@ static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
 	prod = queue->rx.sring->req_prod;
 	cons = queue->rx.req_cons;
 
-	return queue->stalled
-		&& prod - cons >= XEN_NETBK_RX_SLOTS_MAX;
+	return queue->stalled && prod - cons >= 1;
 }
 
 static bool xenvif_have_rx_work(struct xenvif_queue *queue)
 {
 	return (!skb_queue_empty(&queue->rx_queue)
-		&& xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX))
+		&& xenvif_rx_ring_slots_available(queue))
 		|| (queue->vif->stall_timeout &&
 		    (xenvif_rx_queue_stalled(queue)
 		     || xenvif_rx_queue_ready(queue)))
-- 
cgit v1.1


From 74e98eb085889b0d2d4908f59f6e00026063014f Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Tue, 8 Sep 2015 10:53:40 -0400
Subject: RDS: verify the underlying transport exists before creating a
 connection

There was no verification that an underlying transport exists when creating
a connection, this would cause dereferencing a NULL ptr.

It might happen on sockets that weren't properly bound before attempting to
send a message, which will cause a NULL ptr deref:

[135546.047719] kasan: GPF could be caused by NULL-ptr deref or user memory accessgeneral protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC KASAN
[135546.051270] Modules linked in:
[135546.051781] CPU: 4 PID: 15650 Comm: trinity-c4 Not tainted 4.2.0-next-20150902-sasha-00041-gbaa1222-dirty #2527
[135546.053217] task: ffff8800835bc000 ti: ffff8800bc708000 task.ti: ffff8800bc708000
[135546.054291] RIP: __rds_conn_create (net/rds/connection.c:194)
[135546.055666] RSP: 0018:ffff8800bc70fab0  EFLAGS: 00010202
[135546.056457] RAX: dffffc0000000000 RBX: 0000000000000f2c RCX: ffff8800835bc000
[135546.057494] RDX: 0000000000000007 RSI: ffff8800835bccd8 RDI: 0000000000000038
[135546.058530] RBP: ffff8800bc70fb18 R08: 0000000000000001 R09: 0000000000000000
[135546.059556] R10: ffffed014d7a3a23 R11: ffffed014d7a3a21 R12: 0000000000000000
[135546.060614] R13: 0000000000000001 R14: ffff8801ec3d0000 R15: 0000000000000000
[135546.061668] FS:  00007faad4ffb700(0000) GS:ffff880252000000(0000) knlGS:0000000000000000
[135546.062836] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[135546.063682] CR2: 000000000000846a CR3: 000000009d137000 CR4: 00000000000006a0
[135546.064723] Stack:
[135546.065048]  ffffffffafe2055c ffffffffafe23fc1 ffffed00493097bf ffff8801ec3d0008
[135546.066247]  0000000000000000 00000000000000d0 0000000000000000 ac194a24c0586342
[135546.067438]  1ffff100178e1f78 ffff880320581b00 ffff8800bc70fdd0 ffff880320581b00
[135546.068629] Call Trace:
[135546.069028] ? __rds_conn_create (include/linux/rcupdate.h:856 net/rds/connection.c:134)
[135546.069989] ? rds_message_copy_from_user (net/rds/message.c:298)
[135546.071021] rds_conn_create_outgoing (net/rds/connection.c:278)
[135546.071981] rds_sendmsg (net/rds/send.c:1058)
[135546.072858] ? perf_trace_lock (include/trace/events/lock.h:38)
[135546.073744] ? lockdep_init (kernel/locking/lockdep.c:3298)
[135546.074577] ? rds_send_drop_to (net/rds/send.c:976)
[135546.075508] ? __might_fault (./arch/x86/include/asm/current.h:14 mm/memory.c:3795)
[135546.076349] ? __might_fault (mm/memory.c:3795)
[135546.077179] ? rds_send_drop_to (net/rds/send.c:976)
[135546.078114] sock_sendmsg (net/socket.c:611 net/socket.c:620)
[135546.078856] SYSC_sendto (net/socket.c:1657)
[135546.079596] ? SYSC_connect (net/socket.c:1628)
[135546.080510] ? trace_dump_stack (kernel/trace/trace.c:1926)
[135546.081397] ? ring_buffer_unlock_commit (kernel/trace/ring_buffer.c:2479 kernel/trace/ring_buffer.c:2558 kernel/trace/ring_buffer.c:2674)
[135546.082390] ? trace_buffer_unlock_commit (kernel/trace/trace.c:1749)
[135546.083410] ? trace_event_raw_event_sys_enter (include/trace/events/syscalls.h:16)
[135546.084481] ? do_audit_syscall_entry (include/trace/events/syscalls.h:16)
[135546.085438] ? trace_buffer_unlock_commit (kernel/trace/trace.c:1749)
[135546.085515] rds_ib_laddr_check(): addr 36.74.25.172 ret -99 node type -1

Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/connection.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/rds/connection.c b/net/rds/connection.c
index 9b2de5e..49adeef 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -190,6 +190,12 @@ new_conn:
 		}
 	}
 
+	if (trans == NULL) {
+		kmem_cache_free(rds_conn_slab, conn);
+		conn = ERR_PTR(-ENODEV);
+		goto out;
+	}
+
 	conn->c_trans = trans;
 
 	ret = trans->conn_alloc(conn, gfp);
-- 
cgit v1.1


From 592867bfabe2fcb449393ba7eb0de4f972a08c63 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 8 Sep 2015 18:00:09 +0200
Subject: ebpf: fix fd refcount leaks related to maps in bpf syscall

We may already have gotten a proper fd struct through fdget(), so
whenever we return at the end of an map operation, we need to call
fdput(). However, each map operation from syscall side first probes
CHECK_ATTR() to verify that unused fields in the bpf_attr union are
zero.

In case of malformed input, we return with error, but the lookup to
the map_fd was already performed at that time, so that we return
without an corresponding fdput(). Fix it by performing an fdget()
only right before bpf_map_get(). The fdget() invocation on maps in
the verifier is not affected.

Fixes: db20fd2b0108 ("bpf: add lookup/update/delete/iterate methods to BPF maps")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/syscall.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index dc9b464..35bac8e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -155,14 +155,15 @@ static int map_lookup_elem(union bpf_attr *attr)
 	void __user *ukey = u64_to_ptr(attr->key);
 	void __user *uvalue = u64_to_ptr(attr->value);
 	int ufd = attr->map_fd;
-	struct fd f = fdget(ufd);
 	struct bpf_map *map;
 	void *key, *value, *ptr;
+	struct fd f;
 	int err;
 
 	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
 		return -EINVAL;
 
+	f = fdget(ufd);
 	map = bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
@@ -213,14 +214,15 @@ static int map_update_elem(union bpf_attr *attr)
 	void __user *ukey = u64_to_ptr(attr->key);
 	void __user *uvalue = u64_to_ptr(attr->value);
 	int ufd = attr->map_fd;
-	struct fd f = fdget(ufd);
 	struct bpf_map *map;
 	void *key, *value;
+	struct fd f;
 	int err;
 
 	if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
 		return -EINVAL;
 
+	f = fdget(ufd);
 	map = bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
@@ -265,14 +267,15 @@ static int map_delete_elem(union bpf_attr *attr)
 {
 	void __user *ukey = u64_to_ptr(attr->key);
 	int ufd = attr->map_fd;
-	struct fd f = fdget(ufd);
 	struct bpf_map *map;
+	struct fd f;
 	void *key;
 	int err;
 
 	if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
 		return -EINVAL;
 
+	f = fdget(ufd);
 	map = bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
@@ -305,14 +308,15 @@ static int map_get_next_key(union bpf_attr *attr)
 	void __user *ukey = u64_to_ptr(attr->key);
 	void __user *unext_key = u64_to_ptr(attr->next_key);
 	int ufd = attr->map_fd;
-	struct fd f = fdget(ufd);
 	struct bpf_map *map;
 	void *key, *next_key;
+	struct fd f;
 	int err;
 
 	if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
 		return -EINVAL;
 
+	f = fdget(ufd);
 	map = bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
-- 
cgit v1.1


From 6b9ea5a64ed5eeb3f68f2e6fcce0ed1179801d1e Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Tue, 8 Sep 2015 10:53:04 -0700
Subject: ipv6: fix multipath route replace error recovery

Problem:
The ecmp route replace support for ipv6 in the kernel, deletes the
existing ecmp route too early, ie when it installs the first nexthop.
If there is an error in installing the subsequent nexthops, its too late
to recover the already deleted existing route leaving the fib
in an inconsistent state.

This patch reduces the possibility of this by doing the following:
a) Changes the existing multipath route add code to a two stage process:
  build rt6_infos + insert them
	ip6_route_add rt6_info creation code is moved into
	ip6_route_info_create.
b) This ensures that most errors are caught during building rt6_infos
  and we fail early
c) Separates multipath add and del code. Because add needs the special
  two stage mode in a) and delete essentially does not care.
d) In any event if the code fails during inserting a route again, a
  warning is printed (This should be unlikely)

Before the patch:
$ip -6 route show
3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024
3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024
3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024

/* Try replacing the route with a duplicate nexthop */
$ip -6 route change 3000:1000:1000:1000::2/128 nexthop via
fe80::202:ff:fe00:b dev swp49s0 nexthop via fe80::202:ff:fe00:d dev
swp49s1 nexthop via fe80::202:ff:fe00:d dev swp49s1
RTNETLINK answers: File exists

$ip -6 route show
/* previously added ecmp route 3000:1000:1000:1000::2 dissappears from
 * kernel */

After the patch:
$ip -6 route show
3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024
3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024
3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024

/* Try replacing the route with a duplicate nexthop */
$ip -6 route change 3000:1000:1000:1000::2/128 nexthop via
fe80::202:ff:fe00:b dev swp49s0 nexthop via fe80::202:ff:fe00:d dev
swp49s1 nexthop via fe80::202:ff:fe00:d dev swp49s1
RTNETLINK answers: File exists

$ip -6 route show
3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024
3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024
3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024

Fixes: 27596472473a ("ipv6: fix ECMP route replacement")
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 201 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 175 insertions(+), 26 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f45cac6..34539d3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1748,7 +1748,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
 	return -EINVAL;
 }
 
-int ip6_route_add(struct fib6_config *cfg)
+int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
 {
 	int err;
 	struct net *net = cfg->fc_nlinfo.nl_net;
@@ -1756,7 +1756,6 @@ int ip6_route_add(struct fib6_config *cfg)
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
 	struct fib6_table *table;
-	struct mx6_config mxc = { .mx = NULL, };
 	int addr_type;
 
 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
@@ -1981,6 +1980,32 @@ install_route:
 
 	cfg->fc_nlinfo.nl_net = dev_net(dev);
 
+	*rt_ret = rt;
+
+	return 0;
+out:
+	if (dev)
+		dev_put(dev);
+	if (idev)
+		in6_dev_put(idev);
+	if (rt)
+		dst_free(&rt->dst);
+
+	*rt_ret = NULL;
+
+	return err;
+}
+
+int ip6_route_add(struct fib6_config *cfg)
+{
+	struct mx6_config mxc = { .mx = NULL, };
+	struct rt6_info *rt = NULL;
+	int err;
+
+	err = ip6_route_info_create(cfg, &rt);
+	if (err)
+		goto out;
+
 	err = ip6_convert_metrics(&mxc, cfg);
 	if (err)
 		goto out;
@@ -1988,14 +2013,12 @@ install_route:
 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
 
 	kfree(mxc.mx);
+
 	return err;
 out:
-	if (dev)
-		dev_put(dev);
-	if (idev)
-		in6_dev_put(idev);
 	if (rt)
 		dst_free(&rt->dst);
+
 	return err;
 }
 
@@ -2776,19 +2799,78 @@ errout:
 	return err;
 }
 
-static int ip6_route_multipath(struct fib6_config *cfg, int add)
+struct rt6_nh {
+	struct rt6_info *rt6_info;
+	struct fib6_config r_cfg;
+	struct mx6_config mxc;
+	struct list_head next;
+};
+
+static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
+{
+	struct rt6_nh *nh;
+
+	list_for_each_entry(nh, rt6_nh_list, next) {
+		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
+		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
+		        nh->r_cfg.fc_ifindex);
+	}
+}
+
+static int ip6_route_info_append(struct list_head *rt6_nh_list,
+				 struct rt6_info *rt, struct fib6_config *r_cfg)
+{
+	struct rt6_nh *nh;
+	struct rt6_info *rtnh;
+	int err = -EEXIST;
+
+	list_for_each_entry(nh, rt6_nh_list, next) {
+		/* check if rt6_info already exists */
+		rtnh = nh->rt6_info;
+
+		if (rtnh->dst.dev == rt->dst.dev &&
+		    rtnh->rt6i_idev == rt->rt6i_idev &&
+		    ipv6_addr_equal(&rtnh->rt6i_gateway,
+				    &rt->rt6i_gateway))
+			return err;
+	}
+
+	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
+	if (!nh)
+		return -ENOMEM;
+	nh->rt6_info = rt;
+	err = ip6_convert_metrics(&nh->mxc, r_cfg);
+	if (err) {
+		kfree(nh);
+		return err;
+	}
+	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
+	list_add_tail(&nh->next, rt6_nh_list);
+
+	return 0;
+}
+
+static int ip6_route_multipath_add(struct fib6_config *cfg)
 {
 	struct fib6_config r_cfg;
 	struct rtnexthop *rtnh;
+	struct rt6_info *rt;
+	struct rt6_nh *err_nh;
+	struct rt6_nh *nh, *nh_safe;
 	int remaining;
 	int attrlen;
-	int err = 0, last_err = 0;
+	int err = 1;
+	int nhn = 0;
+	int replace = (cfg->fc_nlinfo.nlh &&
+		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
+	LIST_HEAD(rt6_nh_list);
 
 	remaining = cfg->fc_mp_len;
-beginning:
 	rtnh = (struct rtnexthop *)cfg->fc_mp;
 
-	/* Parse a Multipath Entry */
+	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
+	 * rt6_info structs per nexthop
+	 */
 	while (rtnh_ok(rtnh, remaining)) {
 		memcpy(&r_cfg, cfg, sizeof(*cfg));
 		if (rtnh->rtnh_ifindex)
@@ -2808,22 +2890,32 @@ beginning:
 			if (nla)
 				r_cfg.fc_encap_type = nla_get_u16(nla);
 		}
-		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
+
+		err = ip6_route_info_create(&r_cfg, &rt);
+		if (err)
+			goto cleanup;
+
+		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
 		if (err) {
-			last_err = err;
-			/* If we are trying to remove a route, do not stop the
-			 * loop when ip6_route_del() fails (because next hop is
-			 * already gone), we should try to remove all next hops.
-			 */
-			if (add) {
-				/* If add fails, we should try to delete all
-				 * next hops that have been already added.
-				 */
-				add = 0;
-				remaining = cfg->fc_mp_len - remaining;
-				goto beginning;
-			}
+			dst_free(&rt->dst);
+			goto cleanup;
+		}
+
+		rtnh = rtnh_next(rtnh, &remaining);
+	}
+
+	err_nh = NULL;
+	list_for_each_entry(nh, &rt6_nh_list, next) {
+		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
+		/* nh->rt6_info is used or freed at this point, reset to NULL*/
+		nh->rt6_info = NULL;
+		if (err) {
+			if (replace && nhn)
+				ip6_print_replace_route_err(&rt6_nh_list);
+			err_nh = nh;
+			goto add_errout;
 		}
+
 		/* Because each route is added like a single route we remove
 		 * these flags after the first nexthop: if there is a collision,
 		 * we have already failed to add the first nexthop:
@@ -2833,6 +2925,63 @@ beginning:
 		 */
 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
 						     NLM_F_REPLACE);
+		nhn++;
+	}
+
+	goto cleanup;
+
+add_errout:
+	/* Delete routes that were already added */
+	list_for_each_entry(nh, &rt6_nh_list, next) {
+		if (err_nh == nh)
+			break;
+		ip6_route_del(&nh->r_cfg);
+	}
+
+cleanup:
+	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
+		if (nh->rt6_info)
+			dst_free(&nh->rt6_info->dst);
+		if (nh->mxc.mx)
+			kfree(nh->mxc.mx);
+		list_del(&nh->next);
+		kfree(nh);
+	}
+
+	return err;
+}
+
+static int ip6_route_multipath_del(struct fib6_config *cfg)
+{
+	struct fib6_config r_cfg;
+	struct rtnexthop *rtnh;
+	int remaining;
+	int attrlen;
+	int err = 1, last_err = 0;
+
+	remaining = cfg->fc_mp_len;
+	rtnh = (struct rtnexthop *)cfg->fc_mp;
+
+	/* Parse a Multipath Entry */
+	while (rtnh_ok(rtnh, remaining)) {
+		memcpy(&r_cfg, cfg, sizeof(*cfg));
+		if (rtnh->rtnh_ifindex)
+			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
+
+		attrlen = rtnh_attrlen(rtnh);
+		if (attrlen > 0) {
+			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+			if (nla) {
+				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
+				r_cfg.fc_flags |= RTF_GATEWAY;
+			}
+		}
+		err = ip6_route_del(&r_cfg);
+		if (err)
+			last_err = err;
+
 		rtnh = rtnh_next(rtnh, &remaining);
 	}
 
@@ -2849,7 +2998,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
 		return err;
 
 	if (cfg.fc_mp)
-		return ip6_route_multipath(&cfg, 0);
+		return ip6_route_multipath_del(&cfg);
 	else
 		return ip6_route_del(&cfg);
 }
@@ -2864,7 +3013,7 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
 		return err;
 
 	if (cfg.fc_mp)
-		return ip6_route_multipath(&cfg, 1);
+		return ip6_route_multipath_add(&cfg);
 	else
 		return ip6_route_add(&cfg);
 }
-- 
cgit v1.1


From 687f07156b0c99205c21aa4e2986564046d342fe Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Tue, 8 Sep 2015 13:40:01 -0700
Subject: bpf: fix out of bounds access in verifier log

when the verifier log is enabled the print_bpf_insn() is doing
bpf_alu_string[BPF_OP(insn->code) >> 4]
and
bpf_jmp_string[BPF_OP(insn->code) >> 4]
where BPF_OP is a 4-bit instruction opcode.
Malformed insns can cause out of bounds access.
Fix it by sizing arrays appropriately.

The bug was found by clang address sanitizer with libfuzzer.

Reported-by: Yonghong Song <yhs@plumgrid.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ed12e38..b074b23 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -283,7 +283,7 @@ static const char *const bpf_class_string[] = {
 	[BPF_ALU64] = "alu64",
 };
 
-static const char *const bpf_alu_string[] = {
+static const char *const bpf_alu_string[16] = {
 	[BPF_ADD >> 4]  = "+=",
 	[BPF_SUB >> 4]  = "-=",
 	[BPF_MUL >> 4]  = "*=",
@@ -307,7 +307,7 @@ static const char *const bpf_ldst_string[] = {
 	[BPF_DW >> 3] = "u64",
 };
 
-static const char *const bpf_jmp_string[] = {
+static const char *const bpf_jmp_string[16] = {
 	[BPF_JA >> 4]   = "jmp",
 	[BPF_JEQ >> 4]  = "==",
 	[BPF_JGT >> 4]  = ">",
-- 
cgit v1.1


From 03679a14739a0d4c14b52ba65a69ff553bfba73b Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 8 Sep 2015 20:06:41 -0700
Subject: net: dsa: bcm_sf2: Fix 64-bits register writes

The macro to write 64-bits quantities to the 32-bits register swapped
the value and offsets arguments, we want to preserve the ordering of the
arguments with respect to how writel() is implemented for instance:
value first, offset/base second.

Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h
index 22e2ebf..789d7b7 100644
--- a/drivers/net/dsa/bcm_sf2.h
+++ b/drivers/net/dsa/bcm_sf2.h
@@ -112,8 +112,8 @@ static inline u64 name##_readq(struct bcm_sf2_priv *priv, u32 off)	\
 	spin_unlock(&priv->indir_lock);					\
 	return (u64)indir << 32 | dir;					\
 }									\
-static inline void name##_writeq(struct bcm_sf2_priv *priv, u32 off,	\
-							u64 val)	\
+static inline void name##_writeq(struct bcm_sf2_priv *priv, u64 val,	\
+							u32 off)	\
 {									\
 	spin_lock(&priv->indir_lock);					\
 	reg_writel(priv, upper_32_bits(val), REG_DIR_DATA_WRITE);	\
-- 
cgit v1.1


From 444c5f92ed152346aef0952316e0ea855129846c Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 9 Sep 2015 11:24:29 +0200
Subject: net: ethoc: Remove unnecessary #ifdef CONFIG_OF

For !CONFIG_OF of_get_property() is defined to always return NULL. Thus
there's no need to protect the call to of_get_property() with #ifdef
CONFIG_OF.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ethoc.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 442410c..a2c96fd 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -1132,10 +1132,6 @@ static int ethoc_probe(struct platform_device *pdev)
 		memcpy(netdev->dev_addr, pdata->hwaddr, IFHWADDRLEN);
 		priv->phy_id = pdata->phy_id;
 	} else {
-		priv->phy_id = -1;
-
-#ifdef CONFIG_OF
-		{
 		const uint8_t *mac;
 
 		mac = of_get_property(pdev->dev.of_node,
@@ -1143,8 +1139,7 @@ static int ethoc_probe(struct platform_device *pdev)
 				      NULL);
 		if (mac)
 			memcpy(netdev->dev_addr, mac, IFHWADDRLEN);
-		}
-#endif
+		priv->phy_id = -1;
 	}
 
 	/* Check that the given MAC address is valid. If it isn't, read the
-- 
cgit v1.1


From f53de1e9a4aaf8cbe08845da6f7ff26a078ac507 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Wed, 9 Sep 2015 14:20:56 +0200
Subject: net: ipv6: use common fib_default_rule_pref

This switches IPv6 policy routing to use the shared
fib_default_rule_pref() function of IPv4 and DECnet. It is also used in
multicast routing for IPv4 as well as IPv6.

The motivation for this patch is a complaint about iproute2 behaving
inconsistent between IPv4 and IPv6 when adding policy rules: Formerly,
IPv6 rules were assigned a fixed priority of 0x3FFF whereas for IPv4 the
assigned priority value was decreased with each rule added.

Since then all users of the default_pref field have been converted to
assign the generic function fib_default_rule_pref(), fib_nl_newrule()
may just use it directly instead. Therefore get rid of the function
pointer altogether and make fib_default_rule_pref() static, as it's not
used outside fib_rules.c anymore.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h |  2 --
 net/core/fib_rules.c    | 10 +++-------
 net/decnet/dn_rules.c   |  1 -
 net/ipv4/fib_rules.c    |  1 -
 net/ipv4/ipmr.c         |  1 -
 net/ipv6/fib6_rules.c   |  6 ------
 net/ipv6/ip6mr.c        |  1 -
 7 files changed, 3 insertions(+), 19 deletions(-)

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 4e8f804..59160de 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -66,7 +66,6 @@ struct fib_rules_ops {
 					   struct nlattr **);
 	int			(*fill)(struct fib_rule *, struct sk_buff *,
 					struct fib_rule_hdr *);
-	u32			(*default_pref)(struct fib_rules_ops *ops);
 	size_t			(*nlmsg_payload)(struct fib_rule *);
 
 	/* Called after modifications to the rules set, must flush
@@ -118,5 +117,4 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
 		     struct fib_lookup_arg *);
 int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
 			 u32 flags);
-u32 fib_default_rule_pref(struct fib_rules_ops *ops);
 #endif
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index ae8306e..bf77e36 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -44,7 +44,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 }
 EXPORT_SYMBOL(fib_default_rule_add);
 
-u32 fib_default_rule_pref(struct fib_rules_ops *ops)
+static u32 fib_default_rule_pref(struct fib_rules_ops *ops)
 {
 	struct list_head *pos;
 	struct fib_rule *rule;
@@ -60,7 +60,6 @@ u32 fib_default_rule_pref(struct fib_rules_ops *ops)
 
 	return 0;
 }
-EXPORT_SYMBOL(fib_default_rule_pref);
 
 static void notify_rule_change(int event, struct fib_rule *rule,
 			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
@@ -299,8 +298,8 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
 	}
 	rule->fr_net = net;
 
-	if (tb[FRA_PRIORITY])
-		rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
+	rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
+	                              : fib_default_rule_pref(ops);
 
 	if (tb[FRA_IIFNAME]) {
 		struct net_device *dev;
@@ -350,9 +349,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
 	else
 		rule->suppress_ifgroup = -1;
 
-	if (!tb[FRA_PRIORITY] && ops->default_pref)
-		rule->pref = ops->default_pref(ops);
-
 	err = -EINVAL;
 	if (tb[FRA_GOTO]) {
 		if (rule->action != FR_ACT_GOTO)
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 9d66a0f..295bbd6 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -229,7 +229,6 @@ static const struct fib_rules_ops __net_initconst dn_fib_rules_ops_template = {
 	.configure	= dn_fib_rule_configure,
 	.compare	= dn_fib_rule_compare,
 	.fill		= dn_fib_rule_fill,
-	.default_pref	= fib_default_rule_pref,
 	.flush_cache	= dn_fib_rule_flush_cache,
 	.nlgroup	= RTNLGRP_DECnet_RULE,
 	.policy		= dn_fib_rule_policy,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 18123d5..f2bda9e 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -318,7 +318,6 @@ static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = {
 	.delete		= fib4_rule_delete,
 	.compare	= fib4_rule_compare,
 	.fill		= fib4_rule_fill,
-	.default_pref	= fib_default_rule_pref,
 	.nlmsg_payload	= fib4_rule_nlmsg_payload,
 	.flush_cache	= fib4_rule_flush_cache,
 	.nlgroup	= RTNLGRP_IPV4_RULE,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 3a2c016..866ee89 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -233,7 +233,6 @@ static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = {
 	.match		= ipmr_rule_match,
 	.configure	= ipmr_rule_configure,
 	.compare	= ipmr_rule_compare,
-	.default_pref	= fib_default_rule_pref,
 	.fill		= ipmr_rule_fill,
 	.nlgroup	= RTNLGRP_IPV4_RULE,
 	.policy		= ipmr_rule_policy,
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 2367a16..9f777ec 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -258,11 +258,6 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-static u32 fib6_rule_default_pref(struct fib_rules_ops *ops)
-{
-	return 0x3FFF;
-}
-
 static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
 {
 	return nla_total_size(16) /* dst */
@@ -279,7 +274,6 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
 	.configure		= fib6_rule_configure,
 	.compare		= fib6_rule_compare,
 	.fill			= fib6_rule_fill,
-	.default_pref		= fib6_rule_default_pref,
 	.nlmsg_payload		= fib6_rule_nlmsg_payload,
 	.nlgroup		= RTNLGRP_IPV6_RULE,
 	.policy			= fib6_rule_policy,
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 5f36266..0e004cc 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -217,7 +217,6 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 	.match		= ip6mr_rule_match,
 	.configure	= ip6mr_rule_configure,
 	.compare	= ip6mr_rule_compare,
-	.default_pref	= fib_default_rule_pref,
 	.fill		= ip6mr_rule_fill,
 	.nlgroup	= RTNLGRP_IPV6_RULE,
 	.policy		= ip6mr_rule_policy,
-- 
cgit v1.1


From ce8e5c7035098fa5b8fea910f14be59b8cace81f Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 9 Sep 2015 10:38:02 +0200
Subject: net: cavium: liquidio: use kzalloc in setup_glist()

We save a little .text and get rid of the sizeof(...) style
inconsistency.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 0660dee..f683d97 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -818,10 +818,9 @@ static int setup_glist(struct lio *lio)
 	INIT_LIST_HEAD(&lio->glist);
 
 	for (i = 0; i < lio->tx_qsize; i++) {
-		g = kmalloc(sizeof(*g), GFP_KERNEL);
+		g = kzalloc(sizeof(*g), GFP_KERNEL);
 		if (!g)
 			break;
-		memset(g, 0, sizeof(struct octnic_gather));
 
 		g->sg_size =
 			((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
-- 
cgit v1.1


From e9b5ac277e8f8dffa28f85a065e2fd890d9e48c7 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 9 Sep 2015 10:38:03 +0200
Subject: net: jme: use kzalloc() instead of kmalloc+memset

Using kzalloc saves a tiny bit on .text.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/jme.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 6e9a792..060dd39 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -583,7 +583,7 @@ jme_setup_tx_resources(struct jme_adapter *jme)
 	atomic_set(&txring->next_to_clean, 0);
 	atomic_set(&txring->nr_free, jme->tx_ring_size);
 
-	txring->bufinf		= kmalloc(sizeof(struct jme_buffer_info) *
+	txring->bufinf		= kzalloc(sizeof(struct jme_buffer_info) *
 					jme->tx_ring_size, GFP_ATOMIC);
 	if (unlikely(!(txring->bufinf)))
 		goto err_free_txring;
@@ -592,8 +592,6 @@ jme_setup_tx_resources(struct jme_adapter *jme)
 	 * Initialize Transmit Descriptors
 	 */
 	memset(txring->alloc, 0, TX_RING_ALLOC_SIZE(jme->tx_ring_size));
-	memset(txring->bufinf, 0,
-		sizeof(struct jme_buffer_info) * jme->tx_ring_size);
 
 	return 0;
 
@@ -845,7 +843,7 @@ jme_setup_rx_resources(struct jme_adapter *jme)
 	rxring->next_to_use	= 0;
 	atomic_set(&rxring->next_to_clean, 0);
 
-	rxring->bufinf		= kmalloc(sizeof(struct jme_buffer_info) *
+	rxring->bufinf		= kzalloc(sizeof(struct jme_buffer_info) *
 					jme->rx_ring_size, GFP_ATOMIC);
 	if (unlikely(!(rxring->bufinf)))
 		goto err_free_rxring;
@@ -853,8 +851,6 @@ jme_setup_rx_resources(struct jme_adapter *jme)
 	/*
 	 * Initiallize Receive Descriptors
 	 */
-	memset(rxring->bufinf, 0,
-		sizeof(struct jme_buffer_info) * jme->rx_ring_size);
 	for (i = 0 ; i < jme->rx_ring_size ; ++i) {
 		if (unlikely(jme_make_new_rx_buf(jme, i))) {
 			jme_free_rx_resources(jme);
-- 
cgit v1.1


From b66a60857ca4fae5900c5d81c2ba04e657509b99 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 9 Sep 2015 10:38:04 +0200
Subject: net: mv643xx_eth: use kzalloc

The double memset is a little ugly; using kzalloc avoids it altogether.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mv643xx_eth.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index d52639b..960169e 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -1859,14 +1859,11 @@ oom:
 		return;
 	}
 
-	mc_spec = kmalloc(0x200, GFP_ATOMIC);
+	mc_spec = kzalloc(0x200, GFP_ATOMIC);
 	if (mc_spec == NULL)
 		goto oom;
 	mc_other = mc_spec + (0x100 >> 2);
 
-	memset(mc_spec, 0, 0x100);
-	memset(mc_other, 0, 0x100);
-
 	netdev_for_each_mc_addr(ha, dev) {
 		u8 *a = ha->addr;
 		u32 *table;
-- 
cgit v1.1


From 1f0ca208531a152e1da6aa43d095fe0b2039d9ca Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 9 Sep 2015 10:38:05 +0200
Subject: net: qlcnic: delete redundant memsets

In all cases, mbx->req.arg and mbx->rsp.arg have just been allocated
using kcalloc(), so these six memsets are redundant.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c      | 2 --
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c          | 2 --
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c | 2 --
 3 files changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index 5ab3adf..9f0bdd9 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -918,8 +918,6 @@ int qlcnic_83xx_alloc_mbx_args(struct qlcnic_cmd_args *mbx,
 				mbx->req.arg = NULL;
 				return -ENOMEM;
 			}
-			memset(mbx->req.arg, 0, sizeof(u32) * mbx->req.num);
-			memset(mbx->rsp.arg, 0, sizeof(u32) * mbx->rsp.num);
 			temp = adapter->ahw->fw_hal_version << 29;
 			mbx->req.arg[0] = (type | (mbx->req.num << 16) | temp);
 			mbx->cmd_op = type;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
index 6e6f18f..a5f422f 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
@@ -73,8 +73,6 @@ int qlcnic_82xx_alloc_mbx_args(struct qlcnic_cmd_args *mbx,
 				mbx->req.arg = NULL;
 				return -ENOMEM;
 			}
-			memset(mbx->req.arg, 0, sizeof(u32) * mbx->req.num);
-			memset(mbx->rsp.arg, 0, sizeof(u32) * mbx->rsp.num);
 			mbx->req.arg[0] = type;
 			break;
 		}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
index 546cd5f..7327b72 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
@@ -729,8 +729,6 @@ static int qlcnic_sriov_alloc_bc_mbx_args(struct qlcnic_cmd_args *mbx, u32 type)
 				mbx->req.arg = NULL;
 				return -ENOMEM;
 			}
-			memset(mbx->req.arg, 0, sizeof(u32) * mbx->req.num);
-			memset(mbx->rsp.arg, 0, sizeof(u32) * mbx->rsp.num);
 			mbx->req.arg[0] = (type | (mbx->req.num << 16) |
 					   (3 << 29));
 			mbx->rsp.arg[0] = (type & 0xffff) | mbx->rsp.num << 16;
-- 
cgit v1.1


From dfc50fcaad574e5c8c85cbc83eca1426b2413fa4 Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <Alexey.Brodkin@synopsys.com>
Date: Wed, 9 Sep 2015 18:01:08 +0300
Subject: stmmac: fix check for phydev being open

Current check of phydev with IS_ERR(phydev) may make not much sense
because of_phy_connect() returns NULL on failure instead of error value.

Still for checking result of phy_connect() IS_ERR() makes perfect sense.

So let's use combined check IS_ERR_OR_NULL() that covers both cases.

Cc: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 864b476..925f2f8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -837,8 +837,11 @@ static int stmmac_init_phy(struct net_device *dev)
 				     interface);
 	}
 
-	if (IS_ERR(phydev)) {
+	if (IS_ERR_OR_NULL(phydev)) {
 		pr_err("%s: Could not attach to PHY\n", dev->name);
+		if (!phydev)
+			return -ENODEV;
+
 		return PTR_ERR(phydev);
 	}
 
-- 
cgit v1.1


From 792aec47d59d951865cc617a97b6e6be53d4b977 Mon Sep 17 00:00:00 2001
From: "Woojung.Huh@microchip.com" <Woojung.Huh@microchip.com>
Date: Wed, 9 Sep 2015 20:49:53 +0000
Subject: add microchip LAN88xx phy driver

Add Microchip LAN88XX phy driver for phylib.

Signed-off-by: Woojung Huh <woojung.huh@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig      |   5 ++
 drivers/net/phy/Makefile     |   1 +
 drivers/net/phy/microchip.c  | 148 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/microchipphy.h |  73 +++++++++++++++++++++
 4 files changed, 227 insertions(+)
 create mode 100644 drivers/net/phy/microchip.c
 create mode 100644 include/linux/microchipphy.h

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index c07030d..c5ad98a 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -127,6 +127,11 @@ config DP83867_PHY
 	---help---
 	  Currently supports the DP83867 PHY.
 
+config MICROCHIP_PHY
+	tristate "Drivers for Microchip PHYs"
+	help
+	  Supports the LAN88XX PHYs.
+
 config FIXED_PHY
 	tristate "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs"
 	depends on PHYLIB
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 9bb1033..87f079c 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -37,3 +37,4 @@ obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o
 obj-$(CONFIG_MDIO_SUN4I)	+= mdio-sun4i.o
 obj-$(CONFIG_MDIO_MOXART)	+= mdio-moxart.o
 obj-$(CONFIG_MDIO_BCM_UNIMAC)	+= mdio-bcm-unimac.o
+obj-$(CONFIG_MICROCHIP_PHY)	+= microchip.o
diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c
new file mode 100644
index 0000000..c0a20eb
--- /dev/null
+++ b/drivers/net/phy/microchip.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2015 Microchip Technology
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/phy.h>
+#include <linux/microchipphy.h>
+
+#define DRIVER_AUTHOR	"WOOJUNG HUH <woojung.huh@microchip.com>"
+#define DRIVER_DESC	"Microchip LAN88XX PHY driver"
+
+struct lan88xx_priv {
+	int	chip_id;
+	int	chip_rev;
+	__u32	wolopts;
+};
+
+static int lan88xx_phy_config_intr(struct phy_device *phydev)
+{
+	int rc;
+
+	if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+		/* unmask all source and clear them before enable */
+		rc = phy_write(phydev, LAN88XX_INT_MASK, 0x7FFF);
+		rc = phy_read(phydev, LAN88XX_INT_STS);
+		rc = phy_write(phydev, LAN88XX_INT_MASK,
+			       LAN88XX_INT_MASK_MDINTPIN_EN_ |
+			       LAN88XX_INT_MASK_LINK_CHANGE_);
+	} else {
+		rc = phy_write(phydev, LAN88XX_INT_MASK, 0);
+	}
+
+	return rc < 0 ? rc : 0;
+}
+
+static int lan88xx_phy_ack_interrupt(struct phy_device *phydev)
+{
+	int rc = phy_read(phydev, LAN88XX_INT_STS);
+
+	return rc < 0 ? rc : 0;
+}
+
+int lan88xx_suspend(struct phy_device *phydev)
+{
+	struct lan88xx_priv *priv = phydev->priv;
+
+	/* do not power down PHY when WOL is enabled */
+	if (!priv->wolopts)
+		genphy_suspend(phydev);
+
+	return 0;
+}
+
+static int lan88xx_probe(struct phy_device *phydev)
+{
+	struct device *dev = &phydev->dev;
+	struct lan88xx_priv *priv;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->wolopts = 0;
+
+	/* these values can be used to identify internal PHY */
+	priv->chip_id = phy_read_mmd_indirect(phydev, LAN88XX_MMD3_CHIP_ID,
+					      3, phydev->addr);
+	priv->chip_rev = phy_read_mmd_indirect(phydev, LAN88XX_MMD3_CHIP_REV,
+					       3, phydev->addr);
+
+	phydev->priv = priv;
+
+	return 0;
+}
+
+static void lan88xx_remove(struct phy_device *phydev)
+{
+	struct device *dev = &phydev->dev;
+	struct lan88xx_priv *priv = phydev->priv;
+
+	if (priv)
+		devm_kfree(dev, priv);
+}
+
+static int lan88xx_set_wol(struct phy_device *phydev,
+			   struct ethtool_wolinfo *wol)
+{
+	struct lan88xx_priv *priv = phydev->priv;
+
+	priv->wolopts = wol->wolopts;
+
+	return 0;
+}
+
+static struct phy_driver microchip_phy_driver[] = {
+{
+	.phy_id		= 0x0007c130,
+	.phy_id_mask	= 0xfffffff0,
+	.name		= "Microchip LAN88xx",
+
+	.features	= (PHY_GBIT_FEATURES |
+			   SUPPORTED_Pause | SUPPORTED_Asym_Pause),
+	.flags		= PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
+
+	.probe		= lan88xx_probe,
+	.remove		= lan88xx_remove,
+
+	.config_init	= genphy_config_init,
+	.config_aneg	= genphy_config_aneg,
+	.read_status	= genphy_read_status,
+
+	.ack_interrupt	= lan88xx_phy_ack_interrupt,
+	.config_intr	= lan88xx_phy_config_intr,
+
+	.suspend	= lan88xx_suspend,
+	.resume		= genphy_resume,
+	.set_wol	= lan88xx_set_wol,
+
+	.driver		= { .owner = THIS_MODULE, }
+} };
+
+module_phy_driver(microchip_phy_driver);
+
+static struct mdio_device_id __maybe_unused microchip_tbl[] = {
+	{ 0x0007c130, 0xfffffff0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(mdio, microchip_tbl);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
diff --git a/include/linux/microchipphy.h b/include/linux/microchipphy.h
new file mode 100644
index 0000000..eb492d4
--- /dev/null
+++ b/include/linux/microchipphy.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2015 Microchip Technology
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _MICROCHIPPHY_H
+#define _MICROCHIPPHY_H
+
+#define LAN88XX_INT_MASK			(0x19)
+#define LAN88XX_INT_MASK_MDINTPIN_EN_		(0x8000)
+#define LAN88XX_INT_MASK_SPEED_CHANGE_		(0x4000)
+#define LAN88XX_INT_MASK_LINK_CHANGE_		(0x2000)
+#define LAN88XX_INT_MASK_FDX_CHANGE_		(0x1000)
+#define LAN88XX_INT_MASK_AUTONEG_ERR_		(0x0800)
+#define LAN88XX_INT_MASK_AUTONEG_DONE_		(0x0400)
+#define LAN88XX_INT_MASK_POE_DETECT_		(0x0200)
+#define LAN88XX_INT_MASK_SYMBOL_ERR_		(0x0100)
+#define LAN88XX_INT_MASK_FAST_LINK_FAIL_	(0x0080)
+#define LAN88XX_INT_MASK_WOL_EVENT_		(0x0040)
+#define LAN88XX_INT_MASK_EXTENDED_INT_		(0x0020)
+#define LAN88XX_INT_MASK_RESERVED_		(0x0010)
+#define LAN88XX_INT_MASK_FALSE_CARRIER_		(0x0008)
+#define LAN88XX_INT_MASK_LINK_SPEED_DS_		(0x0004)
+#define LAN88XX_INT_MASK_MASTER_SLAVE_DONE_	(0x0002)
+#define LAN88XX_INT_MASK_RX__ER_		(0x0001)
+
+#define LAN88XX_INT_STS				(0x1A)
+#define LAN88XX_INT_STS_INT_ACTIVE_		(0x8000)
+#define LAN88XX_INT_STS_SPEED_CHANGE_		(0x4000)
+#define LAN88XX_INT_STS_LINK_CHANGE_		(0x2000)
+#define LAN88XX_INT_STS_FDX_CHANGE_		(0x1000)
+#define LAN88XX_INT_STS_AUTONEG_ERR_		(0x0800)
+#define LAN88XX_INT_STS_AUTONEG_DONE_		(0x0400)
+#define LAN88XX_INT_STS_POE_DETECT_		(0x0200)
+#define LAN88XX_INT_STS_SYMBOL_ERR_		(0x0100)
+#define LAN88XX_INT_STS_FAST_LINK_FAIL_		(0x0080)
+#define LAN88XX_INT_STS_WOL_EVENT_		(0x0040)
+#define LAN88XX_INT_STS_EXTENDED_INT_		(0x0020)
+#define LAN88XX_INT_STS_RESERVED_		(0x0010)
+#define LAN88XX_INT_STS_FALSE_CARRIER_		(0x0008)
+#define LAN88XX_INT_STS_LINK_SPEED_DS_		(0x0004)
+#define LAN88XX_INT_STS_MASTER_SLAVE_DONE_	(0x0002)
+#define LAN88XX_INT_STS_RX_ER_			(0x0001)
+
+#define LAN88XX_EXT_PAGE_ACCESS			(0x1F)
+#define LAN88XX_EXT_PAGE_SPACE_0		(0x0000)
+#define LAN88XX_EXT_PAGE_SPACE_1		(0x0001)
+#define LAN88XX_EXT_PAGE_SPACE_2		(0x0002)
+
+/* Extended Register Page 1 space */
+#define LAN88XX_EXT_MODE_CTRL			(0x13)
+#define LAN88XX_EXT_MODE_CTRL_MDIX_MASK_	(0x000C)
+#define LAN88XX_EXT_MODE_CTRL_AUTO_MDIX_	(0x0000)
+#define LAN88XX_EXT_MODE_CTRL_MDI_		(0x0008)
+#define LAN88XX_EXT_MODE_CTRL_MDI_X_		(0x000C)
+
+/* MMD 3 Registers */
+#define	LAN88XX_MMD3_CHIP_ID			(32877)
+#define	LAN88XX_MMD3_CHIP_REV			(32878)
+
+#endif /* _MICROCHIPPHY_H */
-- 
cgit v1.1


From 52fe51f8523751da0e79c85350c47eb3bb94da5b Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Thu, 10 Sep 2015 06:57:12 +0800
Subject: ipv6: fix ifnullfree.cocci warnings

net/ipv6/route.c:2946:3-8: WARNING: NULL check before freeing functions like kfree, debugfs_remove, debugfs_remove_recursive or usb_free_urb is not needed. Maybe consider reorganizing relevant code to avoid passing NULL values.

 NULL check before some freeing functions is not needed.

 Based on checkpatch warning
 "kfree(NULL) is safe this check is probably not required"
 and kfreeaddr.cocci by Julia Lawall.

Generated by: scripts/coccinelle/free/ifnullfree.cocci

CC: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 34539d3..53617d7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2942,8 +2942,7 @@ cleanup:
 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
 		if (nh->rt6_info)
 			dst_free(&nh->rt6_info->dst);
-		if (nh->mxc.mx)
-			kfree(nh->mxc.mx);
+		kfree(nh->mxc.mx);
 		list_del(&nh->next);
 		kfree(nh);
 	}
-- 
cgit v1.1


From d0942473e3ca4629a40bbf0c9fd74fc0c7ff2a79 Mon Sep 17 00:00:00 2001
From: hayeswang <hayeswang@realtek.com>
Date: Mon, 7 Sep 2015 11:57:43 +0800
Subject: r8152: split DRIVER_VERSION

Split DRIVER_VERSION into NETNEXT_VERSION and NET_VERSION. Then,
according to the value of DRIVER_VERSION, we could know which
patches are used generally without comparing the source code.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index fe4ec32..6bb48bc 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -26,8 +26,13 @@
 #include <linux/mdio.h>
 #include <linux/usb/cdc.h>
 
-/* Version Information */
-#define DRIVER_VERSION "v1.08.1 (2015/07/28)"
+/* Information for net-next */
+#define NETNEXT_VERSION		"08"
+
+/* Information for net */
+#define NET_VERSION		"1"
+
+#define DRIVER_VERSION		"v1." NETNEXT_VERSION "." NET_VERSION
 #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
 #define DRIVER_DESC "Realtek RTL8152/RTL8153 Based USB Ethernet Adapters"
 #define MODULENAME "r8152"
-- 
cgit v1.1


From 2dd49e0f16fb0e07c6fcc1322ebba310f5827072 Mon Sep 17 00:00:00 2001
From: hayeswang <hayeswang@realtek.com>
Date: Mon, 7 Sep 2015 11:57:44 +0800
Subject: r8152: fix the runtime suspend issues

Fix the runtime suspend issues result from the linking change.

Case 1:
a) link down occurs.
b) driver disable tx/rx.
c) autosuspend occurs.
d) hw linking up.
e) device suspends without enabling tx/rx.
f) couldn't wake up when receiving packets.

Case 2:
a) Nway results in linking down.
b) autosuspend occurs.
c) device suspends.
d) device may not wake up when linking up.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 57 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 6bb48bc..d9427ca 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -30,7 +30,7 @@
 #define NETNEXT_VERSION		"08"
 
 /* Information for net */
-#define NET_VERSION		"1"
+#define NET_VERSION		"2"
 
 #define DRIVER_VERSION		"v1." NETNEXT_VERSION "." NET_VERSION
 #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -148,6 +148,7 @@
 #define OCP_EEE_ABLE		0xa5c4
 #define OCP_EEE_ADV		0xa5d0
 #define OCP_EEE_LPABLE		0xa5d2
+#define OCP_PHY_STATE		0xa708		/* nway state for 8153 */
 #define OCP_ADC_CFG		0xbc06
 
 /* SRAM Register */
@@ -432,6 +433,10 @@
 /* OCP_DOWN_SPEED */
 #define EN_10M_BGOFF		0x0080
 
+/* OCP_PHY_STATE */
+#define TXDIS_STATE		0x01
+#define ABD_STATE		0x02
+
 /* OCP_ADC_CFG */
 #define CKADSEL_L		0x0100
 #define ADC_EN			0x0080
@@ -609,6 +614,7 @@ struct r8152 {
 		void (*unload)(struct r8152 *);
 		int (*eee_get)(struct r8152 *, struct ethtool_eee *);
 		int (*eee_set)(struct r8152 *, struct ethtool_eee *);
+		bool (*in_nway)(struct r8152 *);
 	} rtl_ops;
 
 	int intr_interval;
@@ -2946,6 +2952,32 @@ static void rtl8153_down(struct r8152 *tp)
 	r8153_enable_aldps(tp);
 }
 
+static bool rtl8152_in_nway(struct r8152 *tp)
+{
+	u16 nway_state;
+
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_OCP_GPHY_BASE, 0x2000);
+	tp->ocp_base = 0x2000;
+	ocp_write_byte(tp, MCU_TYPE_PLA, 0xb014, 0x4c);		/* phy state */
+	nway_state = ocp_read_word(tp, MCU_TYPE_PLA, 0xb01a);
+
+	/* bit 15: TXDIS_STATE, bit 14: ABD_STATE */
+	if (nway_state & 0xc000)
+		return false;
+	else
+		return true;
+}
+
+static bool rtl8153_in_nway(struct r8152 *tp)
+{
+	u16 phy_state = ocp_reg_read(tp, OCP_PHY_STATE) & 0xff;
+
+	if (phy_state == TXDIS_STATE || phy_state == ABD_STATE)
+		return false;
+	else
+		return true;
+}
+
 static void set_carrier(struct r8152 *tp)
 {
 	struct net_device *netdev = tp->netdev;
@@ -3410,6 +3442,27 @@ static int rtl8152_post_reset(struct usb_interface *intf)
 	return 0;
 }
 
+static bool delay_autosuspend(struct r8152 *tp)
+{
+	bool sw_linking = !!netif_carrier_ok(tp->netdev);
+	bool hw_linking = !!(rtl8152_get_speed(tp) & LINK_STATUS);
+
+	/* This means a linking change occurs and the driver doesn't detect it,
+	 * yet. If the driver has disabled tx/rx and hw is linking on, the
+	 * device wouldn't wake up by receiving any packet.
+	 */
+	if (work_busy(&tp->schedule.work) || sw_linking != hw_linking)
+		return true;
+
+	/* If the linking down is occurred by nway, the device may miss the
+	 * linking change event. And it wouldn't wake when linking on.
+	 */
+	if (!sw_linking && tp->rtl_ops.in_nway(tp))
+		return true;
+	else
+		return false;
+}
+
 static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message)
 {
 	struct r8152 *tp = usb_get_intfdata(intf);
@@ -3419,7 +3472,7 @@ static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message)
 	mutex_lock(&tp->control);
 
 	if (PMSG_IS_AUTO(message)) {
-		if (netif_running(netdev) && work_busy(&tp->schedule.work)) {
+		if (netif_running(netdev) && delay_autosuspend(tp)) {
 			ret = -EBUSY;
 			goto out1;
 		}
@@ -4049,6 +4102,7 @@ static int rtl_ops_init(struct r8152 *tp)
 		ops->unload		= rtl8152_unload;
 		ops->eee_get		= r8152_get_eee;
 		ops->eee_set		= r8152_set_eee;
+		ops->in_nway		= rtl8152_in_nway;
 		break;
 
 	case RTL_VER_03:
@@ -4063,6 +4117,7 @@ static int rtl_ops_init(struct r8152 *tp)
 		ops->unload		= rtl8153_unload;
 		ops->eee_get		= r8153_get_eee;
 		ops->eee_set		= r8153_set_eee;
+		ops->in_nway		= rtl8153_in_nway;
 		break;
 
 	default:
-- 
cgit v1.1


From 9638d19e481605217f95d9ab3c8896e499b1407d Mon Sep 17 00:00:00 2001
From: Nimrod Andy <B38611@freescale.com>
Date: Thu, 10 Sep 2015 09:35:39 +0800
Subject: net: fec: add netif status check before set mac address

There exist one issue by below case that case system hang:
ifconfig eth0 down
ifconfig eth0 hw ether 00:10:19:19:81:19

After eth0 down, all fec clocks are gated off. In the .fec_set_mac_address()
function, it will set new MAC address to registers, which causes system hang.

So it needs to add netif status check to avoid registers access when clocks are
gated off. Until eth0 up the new MAC address are wrote into related registers.

V2:
As Lucas Stach's suggestion, add a comment in the code to explain why it needed.

CC: Lucas Stach <l.stach@pengutronix.de>
CC: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Fugang Duan <B38611@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 6cc3340..dd4ca39 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3031,6 +3031,14 @@ fec_set_mac_address(struct net_device *ndev, void *p)
 		memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len);
 	}
 
+	/* Add netif status check here to avoid system hang in below case:
+	 * ifconfig ethx down; ifconfig ethx hw ether xx:xx:xx:xx:xx:xx;
+	 * After ethx down, fec all clocks are gated off and then register
+	 * access causes system hang.
+	 */
+	if (!netif_running(ndev))
+		return 0;
+
 	writel(ndev->dev_addr[3] | (ndev->dev_addr[2] << 8) |
 		(ndev->dev_addr[1] << 16) | (ndev->dev_addr[0] << 24),
 		fep->hwp + FEC_ADDR_LOW);
-- 
cgit v1.1


From f2be053c83ee93888fc09d90df2bded0deb28947 Mon Sep 17 00:00:00 2001
From: Hariprasad Shenai <hariprasad@chelsio.com>
Date: Thu, 10 Sep 2015 09:55:13 +0530
Subject: cxgb4: changes for new firmware 1.14.4.0

Incorporate fw_ldst_cmd structure change for new firmware and also
update version string for the same

Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h     | 33 +++++++++++++++++++++--
 drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h | 12 ++++-----
 2 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index ab46746..a32de30 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -762,8 +762,6 @@ enum fw_ldst_func_mod_index {
 
 struct fw_ldst_cmd {
 	__be32 op_to_addrspace;
-#define FW_LDST_CMD_ADDRSPACE_S		0
-#define FW_LDST_CMD_ADDRSPACE_V(x)	((x) << FW_LDST_CMD_ADDRSPACE_S)
 	__be32 cycles_to_len16;
 	union fw_ldst {
 		struct fw_ldst_addrval {
@@ -788,6 +786,13 @@ struct fw_ldst_cmd {
 			__be16 vctl;
 			__be16 rval;
 		} mdio;
+		struct fw_ldst_cim_rq {
+			u8 req_first64[8];
+			u8 req_second64[8];
+			u8 resp_first64[8];
+			u8 resp_second64[8];
+			__be32 r3[2];
+		} cim_rq;
 		union fw_ldst_mps {
 			struct fw_ldst_mps_rplc {
 				__be16 fid_idx;
@@ -828,9 +833,33 @@ struct fw_ldst_cmd {
 			__be16 nset_pkd;
 			__be32 data[12];
 		} pcie;
+		struct fw_ldst_i2c_deprecated {
+			u8 pid_pkd;
+			u8 base;
+			u8 boffset;
+			u8 data;
+			__be32 r9;
+		} i2c_deprecated;
+		struct fw_ldst_i2c {
+			u8 pid;
+			u8 did;
+			u8 boffset;
+			u8 blen;
+			__be32 r9;
+			__u8   data[48];
+		} i2c;
+		struct fw_ldst_le {
+			__be32 index;
+			__be32 r9;
+			u8 val[33];
+			u8 r11[7];
+		} le;
 	} u;
 };
 
+#define FW_LDST_CMD_ADDRSPACE_S		0
+#define FW_LDST_CMD_ADDRSPACE_V(x)	((x) << FW_LDST_CMD_ADDRSPACE_S)
+
 #define FW_LDST_CMD_MSG_S       31
 #define FW_LDST_CMD_MSG_V(x)	((x) << FW_LDST_CMD_MSG_S)
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
index 92bafa7..c4b262c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
@@ -36,8 +36,8 @@
 #define __T4FW_VERSION_H__
 
 #define T4FW_VERSION_MAJOR 0x01
-#define T4FW_VERSION_MINOR 0x0D
-#define T4FW_VERSION_MICRO 0x20
+#define T4FW_VERSION_MINOR 0x0E
+#define T4FW_VERSION_MICRO 0x04
 #define T4FW_VERSION_BUILD 0x00
 
 #define T4FW_MIN_VERSION_MAJOR 0x01
@@ -45,8 +45,8 @@
 #define T4FW_MIN_VERSION_MICRO 0x00
 
 #define T5FW_VERSION_MAJOR 0x01
-#define T5FW_VERSION_MINOR 0x0D
-#define T5FW_VERSION_MICRO 0x20
+#define T5FW_VERSION_MINOR 0x0E
+#define T5FW_VERSION_MICRO 0x04
 #define T5FW_VERSION_BUILD 0x00
 
 #define T5FW_MIN_VERSION_MAJOR 0x00
@@ -54,8 +54,8 @@
 #define T5FW_MIN_VERSION_MICRO 0x00
 
 #define T6FW_VERSION_MAJOR 0x01
-#define T6FW_VERSION_MINOR 0x0D
-#define T6FW_VERSION_MICRO 0x2D
+#define T6FW_VERSION_MINOR 0x0E
+#define T6FW_VERSION_MICRO 0x04
 #define T6FW_VERSION_BUILD 0x00
 
 #define T6FW_MIN_VERSION_MAJOR 0x00
-- 
cgit v1.1


From a66e36568e30ed3714c0e3a12bd3b64696343ff5 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 10 Sep 2015 01:20:46 +0200
Subject: netlink, mmap: don't walk rx ring on poll if receive queue non-empty

In case of netlink mmap, there can be situations where received frames
have to be placed into the normal receive queue. The ring buffer indicates
this through NL_MMAP_STATUS_COPY, so the user is asked to pick them up
via recvmsg(2) syscall, and to put the slot back to NL_MMAP_STATUS_UNUSED.

Commit 0ef707700f1c ("netlink: rx mmap: fix POLLIN condition") changed
polling, so that we walk in the worst case the whole ring through the
new netlink_has_valid_frame(), for example, when the ring would have no
NL_MMAP_STATUS_VALID, but at least one NL_MMAP_STATUS_COPY frame.

Since we do a datagram_poll() already earlier to pick up a mask that could
possibly contain POLLIN | POLLRDNORM already (due to NL_MMAP_STATUS_COPY),
we can skip checking the rx ring entirely.

In case the kernel is compiled with !CONFIG_NETLINK_MMAP, then all this is
irrelevant anyway as netlink_poll() is just defined as datagram_poll().

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 50889be..173817a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -674,12 +674,19 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock,
 
 	mask = datagram_poll(file, sock, wait);
 
-	spin_lock_bh(&sk->sk_receive_queue.lock);
-	if (nlk->rx_ring.pg_vec) {
-		if (netlink_has_valid_frame(&nlk->rx_ring))
-			mask |= POLLIN | POLLRDNORM;
+	/* We could already have received frames in the normal receive
+	 * queue, that will show up as NL_MMAP_STATUS_COPY in the ring,
+	 * so if mask contains pollin/etc already, there's no point
+	 * walking the ring.
+	 */
+	if ((mask & (POLLIN | POLLRDNORM)) != (POLLIN | POLLRDNORM)) {
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		if (nlk->rx_ring.pg_vec) {
+			if (netlink_has_valid_frame(&nlk->rx_ring))
+				mask |= POLLIN | POLLRDNORM;
+		}
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
 	}
-	spin_unlock_bh(&sk->sk_receive_queue.lock);
 
 	spin_lock_bh(&sk->sk_write_queue.lock);
 	if (nlk->tx_ring.pg_vec) {
-- 
cgit v1.1


From 6bb0fef489f667cf701853054f44579754f00a06 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 10 Sep 2015 02:10:57 +0200
Subject: netlink, mmap: fix edge-case leakages in nf queue zero-copy

When netlink mmap on receive side is the consumer of nf queue data,
it can happen that in some edge cases, we write skb shared info into
the user space mmap buffer:

Assume a possible rx ring frame size of only 4096, and the network skb,
which is being zero-copied into the netlink skb, contains page frags
with an overall skb->len larger than the linear part of the netlink
skb.

skb_zerocopy(), which is generic and thus not aware of the fact that
shared info cannot be accessed for such skbs then tries to write and
fill frags, thus leaking kernel data/pointers and in some corner cases
possibly writing out of bounds of the mmap area (when filling the
last slot in the ring buffer this way).

I.e. the ring buffer slot is then of status NL_MMAP_STATUS_VALID, has
an advertised length larger than 4096, where the linear part is visible
at the slot beginning, and the leaked sizeof(struct skb_shared_info)
has been written to the beginning of the next slot (also corrupting
the struct nl_mmap_hdr slot header incl. status etc), since skb->end
points to skb->data + ring->frame_size - NL_MMAP_HDRLEN.

The fix adds and lets __netlink_alloc_skb() take the actual needed
linear room for the network skb + meta data into account. It's completely
irrelevant for non-mmaped netlink sockets, but in case mmap sockets
are used, it can be decided whether the available skb_tailroom() is
really large enough for the buffer, or whether it needs to internally
fallback to a normal alloc_skb().

>From nf queue side, the information whether the destination port is
an mmap RX ring is not really available without extra port-to-socket
lookup, thus it can only be determined in lower layers i.e. when
__netlink_alloc_skb() is called that checks internally for this. I
chose to add the extra ldiff parameter as mmap will then still work:
We have data_len and hlen in nfqnl_build_packet_message(), data_len
is the full length (capped at queue->copy_range) for skb_zerocopy()
and hlen some possible part of data_len that needs to be copied; the
rem_len variable indicates the needed remaining linear mmap space.

The only other workaround in nf queue internally would be after
allocation time by f.e. cap'ing the data_len to the skb_tailroom()
iff we deal with an mmap skb, but that would 1) expose the fact that
we use a mmap skb to upper layers, and 2) trim the skb where we
otherwise could just have moved the full skb into the normal receive
queue.

After the patch, in my test case the ring slot doesn't fit and therefore
shows NL_MMAP_STATUS_COPY, where a full skb carries all the data and
thus needs to be picked up via recv().

Fixes: 3ab1f683bf8b ("nfnetlink: add support for memory mapped netlink")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h              | 13 +++++++++++--
 net/netfilter/nfnetlink_queue_core.c |  5 +++--
 net/netlink/af_netlink.c             | 18 ++++++++++++------
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 9120edb..639e9b8 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -68,8 +68,17 @@ extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
 extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group);
 extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
 extern int netlink_has_listeners(struct sock *sk, unsigned int group);
-extern struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
-					 u32 dst_portid, gfp_t gfp_mask);
+
+extern struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
+					   unsigned int ldiff, u32 dst_portid,
+					   gfp_t gfp_mask);
+static inline struct sk_buff *
+netlink_alloc_skb(struct sock *ssk, unsigned int size, u32 dst_portid,
+		  gfp_t gfp_mask)
+{
+	return __netlink_alloc_skb(ssk, size, 0, dst_portid, gfp_mask);
+}
+
 extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
 extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid,
 			     __u32 group, gfp_t allocation);
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 685cc6a..a5cd6d9 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -301,7 +301,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 			   __be32 **packet_id_ptr)
 {
 	size_t size;
-	size_t data_len = 0, cap_len = 0;
+	size_t data_len = 0, cap_len = 0, rem_len = 0;
 	unsigned int hlen = 0;
 	struct sk_buff *skb;
 	struct nlattr *nla;
@@ -360,6 +360,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 		hlen = min_t(unsigned int, hlen, data_len);
 		size += sizeof(struct nlattr) + hlen;
 		cap_len = entskb->len;
+		rem_len = data_len - hlen;
 		break;
 	}
 
@@ -377,7 +378,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 			size += nla_total_size(seclen);
 	}
 
-	skb = nfnetlink_alloc_skb(net, size, queue->peer_portid,
+	skb = __netlink_alloc_skb(net->nfnl, size, rem_len, queue->peer_portid,
 				  GFP_ATOMIC);
 	if (!skb) {
 		skb_tx_error(entskb);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 173817a..7f86d3b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1844,15 +1844,16 @@ retry:
 }
 EXPORT_SYMBOL(netlink_unicast);
 
-struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
-				  u32 dst_portid, gfp_t gfp_mask)
+struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
+				    unsigned int ldiff, u32 dst_portid,
+				    gfp_t gfp_mask)
 {
 #ifdef CONFIG_NETLINK_MMAP
+	unsigned int maxlen, linear_size;
 	struct sock *sk = NULL;
 	struct sk_buff *skb;
 	struct netlink_ring *ring;
 	struct nl_mmap_hdr *hdr;
-	unsigned int maxlen;
 
 	sk = netlink_getsockbyportid(ssk, dst_portid);
 	if (IS_ERR(sk))
@@ -1863,7 +1864,11 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
 	if (ring->pg_vec == NULL)
 		goto out_put;
 
-	if (ring->frame_size - NL_MMAP_HDRLEN < size)
+	/* We need to account the full linear size needed as a ring
+	 * slot cannot have non-linear parts.
+	 */
+	linear_size = size + ldiff;
+	if (ring->frame_size - NL_MMAP_HDRLEN < linear_size)
 		goto out_put;
 
 	skb = alloc_skb_head(gfp_mask);
@@ -1877,13 +1882,14 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
 
 	/* check again under lock */
 	maxlen = ring->frame_size - NL_MMAP_HDRLEN;
-	if (maxlen < size)
+	if (maxlen < linear_size)
 		goto out_free;
 
 	netlink_forward_ring(ring);
 	hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
 	if (hdr == NULL)
 		goto err2;
+
 	netlink_ring_setup_skb(skb, sk, ring, hdr);
 	netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
 	atomic_inc(&ring->pending);
@@ -1909,7 +1915,7 @@ out:
 #endif
 	return alloc_skb(size, gfp_mask);
 }
-EXPORT_SYMBOL_GPL(netlink_alloc_skb);
+EXPORT_SYMBOL_GPL(__netlink_alloc_skb);
 
 int netlink_has_listeners(struct sock *sk, unsigned int group)
 {
-- 
cgit v1.1


From 1ab1e895492d8084dfc1c854efacde219e56b8c1 Mon Sep 17 00:00:00 2001
From: Henrik Austad <henrik@austad.us>
Date: Wed, 9 Sep 2015 12:25:17 +0200
Subject: ether: add IEEE 1722 ethertype - TSN

IEEE 1722 describes AVB (later renamed to TSN - Time Sensitive
Networking), a protocol, encapsualtion and synchronization to utilize
standard networks for audio/video (and later other time-sensitive)
streams.

This standard uses ethertype 0x22F0.

http://standards.ieee.org/develop/regauth/ethertype/eth.txt

This is a respin of a previous patch ("ether: add AVB frame type
ETH_P_AVB")

CC: "David S. Miller" <davem@davemloft.net>
CC: netdev@vger.kernel.org
CC: linux-api@vger.kernel.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: Henrik Austad <henrik@austad.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index aa63ed0..ea9221b 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -42,6 +42,7 @@
 #define ETH_P_LOOP	0x0060		/* Ethernet Loopback packet	*/
 #define ETH_P_PUP	0x0200		/* Xerox PUP packet		*/
 #define ETH_P_PUPAT	0x0201		/* Xerox PUP Addr Trans packet	*/
+#define ETH_P_TSN	0x22F0		/* TSN (IEEE 1722) packet	*/
 #define ETH_P_IP	0x0800		/* Internet Protocol packet	*/
 #define ETH_P_X25	0x0805		/* CCITT X.25			*/
 #define ETH_P_ARP	0x0806		/* Address Resolution packet	*/
-- 
cgit v1.1


From 420203204eada39cfe0e8eb65e609da7b209cf33 Mon Sep 17 00:00:00 2001
From: Corinna Vinschen <vinschen@redhat.com>
Date: Thu, 10 Sep 2015 10:47:35 +0200
Subject: r8169: Fix sleeping function called during get_stats64, v2

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=104031
Fixes: 6e85d5ad36a26debc23a9a865c029cbe242b2dc8

Based on the discussion starting at
http://www.spinics.net/lists/netdev/msg342193.html

Tested locally on RTL8168evl/8111evl with various concurrent processes
accessing /proc/net/dev while changing the link state as well as
removing/reloading the r8169 module.

Signed-off-by: Corinna Vinschen <vinschen@redhat.com>
Tested-by: poma <pomidorabelisima@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 137 ++++++++++++++---------------------
 1 file changed, 54 insertions(+), 83 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 24dcbe6..2b32e0c 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -833,7 +833,8 @@ struct rtl8169_private {
 	unsigned features;
 
 	struct mii_if_info mii;
-	struct rtl8169_counters counters;
+	dma_addr_t counters_phys_addr;
+	struct rtl8169_counters *counters;
 	struct rtl8169_tc_offsets tc_offset;
 	u32 saved_wolopts;
 	u32 opts1_mask;
@@ -2190,53 +2191,37 @@ static int rtl8169_get_sset_count(struct net_device *dev, int sset)
 	}
 }
 
-static struct rtl8169_counters *rtl8169_map_counters(struct net_device *dev,
-						     dma_addr_t *paddr,
-						     u32 counter_cmd)
+DECLARE_RTL_COND(rtl_counters_cond)
 {
-	struct rtl8169_private *tp = netdev_priv(dev);
 	void __iomem *ioaddr = tp->mmio_addr;
-	struct device *d = &tp->pci_dev->dev;
-	struct rtl8169_counters *counters;
-	u32 cmd;
 
-	counters = dma_alloc_coherent(d, sizeof(*counters), paddr, GFP_KERNEL);
-	if (counters) {
-		RTL_W32(CounterAddrHigh, (u64)*paddr >> 32);
-		cmd = (u64)*paddr & DMA_BIT_MASK(32);
-		RTL_W32(CounterAddrLow, cmd);
-		RTL_W32(CounterAddrLow, cmd | counter_cmd);
-	}
-	return counters;
+	return RTL_R32(CounterAddrLow) & (CounterReset | CounterDump);
 }
 
-static void rtl8169_unmap_counters (struct net_device *dev,
-				    dma_addr_t paddr,
-				    struct rtl8169_counters *counters)
+static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 	void __iomem *ioaddr = tp->mmio_addr;
-	struct device *d = &tp->pci_dev->dev;
+	dma_addr_t paddr = tp->counters_phys_addr;
+	u32 cmd;
+	bool ret;
 
-	RTL_W32(CounterAddrLow, 0);
-	RTL_W32(CounterAddrHigh, 0);
+	RTL_W32(CounterAddrHigh, (u64)paddr >> 32);
+	cmd = (u64)paddr & DMA_BIT_MASK(32);
+	RTL_W32(CounterAddrLow, cmd);
+	RTL_W32(CounterAddrLow, cmd | counter_cmd);
 
-	dma_free_coherent(d, sizeof(*counters), counters, paddr);
-}
+	ret = rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
 
-DECLARE_RTL_COND(rtl_reset_counters_cond)
-{
-	void __iomem *ioaddr = tp->mmio_addr;
+	RTL_W32(CounterAddrLow, 0);
+	RTL_W32(CounterAddrHigh, 0);
 
-	return RTL_R32(CounterAddrLow) & CounterReset;
+	return ret;
 }
 
 static bool rtl8169_reset_counters(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	struct rtl8169_counters *counters;
-	dma_addr_t paddr;
-	bool ret = true;
 
 	/*
 	 * Versions prior to RTL_GIGA_MAC_VER_19 don't support resetting the
@@ -2245,32 +2230,13 @@ static bool rtl8169_reset_counters(struct net_device *dev)
 	if (tp->mac_version < RTL_GIGA_MAC_VER_19)
 		return true;
 
-	counters = rtl8169_map_counters(dev, &paddr, CounterReset);
-	if (!counters)
-		return false;
-
-	if (!rtl_udelay_loop_wait_low(tp, &rtl_reset_counters_cond, 10, 1000))
-		ret = false;
-
-	rtl8169_unmap_counters(dev, paddr, counters);
-
-	return ret;
-}
-
-DECLARE_RTL_COND(rtl_counters_cond)
-{
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(CounterAddrLow) & CounterDump;
+	return rtl8169_do_counters(dev, CounterReset);
 }
 
 static bool rtl8169_update_counters(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 	void __iomem *ioaddr = tp->mmio_addr;
-	struct rtl8169_counters *counters;
-	dma_addr_t paddr;
-	bool ret = true;
 
 	/*
 	 * Some chips are unable to dump tally counters when the receiver
@@ -2279,23 +2245,13 @@ static bool rtl8169_update_counters(struct net_device *dev)
 	if ((RTL_R8(ChipCmd) & CmdRxEnb) == 0)
 		return true;
 
-	counters = rtl8169_map_counters(dev, &paddr, CounterDump);
-	if (!counters)
-		return false;
-
-	if (rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000))
-		memcpy(&tp->counters, counters, sizeof(*counters));
-	else
-		ret = false;
-
-	rtl8169_unmap_counters(dev, paddr, counters);
-
-	return ret;
+	return rtl8169_do_counters(dev, CounterDump);
 }
 
 static bool rtl8169_init_counter_offsets(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
+	struct rtl8169_counters *counters = tp->counters;
 	bool ret = false;
 
 	/*
@@ -2323,9 +2279,9 @@ static bool rtl8169_init_counter_offsets(struct net_device *dev)
 	if (rtl8169_update_counters(dev))
 		ret = true;
 
-	tp->tc_offset.tx_errors = tp->counters.tx_errors;
-	tp->tc_offset.tx_multi_collision = tp->counters.tx_multi_collision;
-	tp->tc_offset.tx_aborted = tp->counters.tx_aborted;
+	tp->tc_offset.tx_errors = counters->tx_errors;
+	tp->tc_offset.tx_multi_collision = counters->tx_multi_collision;
+	tp->tc_offset.tx_aborted = counters->tx_aborted;
 	tp->tc_offset.inited = true;
 
 	return ret;
@@ -2335,24 +2291,25 @@ static void rtl8169_get_ethtool_stats(struct net_device *dev,
 				      struct ethtool_stats *stats, u64 *data)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
+	struct rtl8169_counters *counters = tp->counters;
 
 	ASSERT_RTNL();
 
 	rtl8169_update_counters(dev);
 
-	data[0] = le64_to_cpu(tp->counters.tx_packets);
-	data[1] = le64_to_cpu(tp->counters.rx_packets);
-	data[2] = le64_to_cpu(tp->counters.tx_errors);
-	data[3] = le32_to_cpu(tp->counters.rx_errors);
-	data[4] = le16_to_cpu(tp->counters.rx_missed);
-	data[5] = le16_to_cpu(tp->counters.align_errors);
-	data[6] = le32_to_cpu(tp->counters.tx_one_collision);
-	data[7] = le32_to_cpu(tp->counters.tx_multi_collision);
-	data[8] = le64_to_cpu(tp->counters.rx_unicast);
-	data[9] = le64_to_cpu(tp->counters.rx_broadcast);
-	data[10] = le32_to_cpu(tp->counters.rx_multicast);
-	data[11] = le16_to_cpu(tp->counters.tx_aborted);
-	data[12] = le16_to_cpu(tp->counters.tx_underun);
+	data[0] = le64_to_cpu(counters->tx_packets);
+	data[1] = le64_to_cpu(counters->rx_packets);
+	data[2] = le64_to_cpu(counters->tx_errors);
+	data[3] = le32_to_cpu(counters->rx_errors);
+	data[4] = le16_to_cpu(counters->rx_missed);
+	data[5] = le16_to_cpu(counters->align_errors);
+	data[6] = le32_to_cpu(counters->tx_one_collision);
+	data[7] = le32_to_cpu(counters->tx_multi_collision);
+	data[8] = le64_to_cpu(counters->rx_unicast);
+	data[9] = le64_to_cpu(counters->rx_broadcast);
+	data[10] = le32_to_cpu(counters->rx_multicast);
+	data[11] = le16_to_cpu(counters->tx_aborted);
+	data[12] = le16_to_cpu(counters->tx_underun);
 }
 
 static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data)
@@ -7780,6 +7737,7 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 	void __iomem *ioaddr = tp->mmio_addr;
+	struct rtl8169_counters *counters = tp->counters;
 	unsigned int start;
 
 	if (netif_running(dev))
@@ -7816,11 +7774,11 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	 * Subtract values fetched during initalization.
 	 * See rtl8169_init_counter_offsets for a description why we do that.
 	 */
-	stats->tx_errors = le64_to_cpu(tp->counters.tx_errors) -
+	stats->tx_errors = le64_to_cpu(counters->tx_errors) -
 		le64_to_cpu(tp->tc_offset.tx_errors);
-	stats->collisions = le32_to_cpu(tp->counters.tx_multi_collision) -
+	stats->collisions = le32_to_cpu(counters->tx_multi_collision) -
 		le32_to_cpu(tp->tc_offset.tx_multi_collision);
-	stats->tx_aborted_errors = le16_to_cpu(tp->counters.tx_aborted) -
+	stats->tx_aborted_errors = le16_to_cpu(counters->tx_aborted) -
 		le16_to_cpu(tp->tc_offset.tx_aborted);
 
 	return stats;
@@ -8022,6 +7980,9 @@ static void rtl_remove_one(struct pci_dev *pdev)
 
 	unregister_netdev(dev);
 
+	dma_free_coherent(&tp->pci_dev->dev, sizeof(*tp->counters),
+			  tp->counters, tp->counters_phys_addr);
+
 	rtl_release_firmware(tp);
 
 	if (pci_dev_run_wake(pdev))
@@ -8447,9 +8408,16 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	tp->rtl_fw = RTL_FIRMWARE_UNKNOWN;
 
+	tp->counters = dma_alloc_coherent (&pdev->dev, sizeof(*tp->counters),
+					   &tp->counters_phys_addr, GFP_KERNEL);
+	if (!tp->counters) {
+		rc = -ENOMEM;
+		goto err_out_msi_4;
+	}
+
 	rc = register_netdev(dev);
 	if (rc < 0)
-		goto err_out_msi_4;
+		goto err_out_cnt_5;
 
 	pci_set_drvdata(pdev, dev);
 
@@ -8483,6 +8451,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 out:
 	return rc;
 
+err_out_cnt_5:
+	dma_free_coherent(&pdev->dev, sizeof(*tp->counters), tp->counters,
+			  tp->counters_phys_addr);
 err_out_msi_4:
 	netif_napi_del(&tp->napi);
 	rtl_disable_msi(pdev, tp);
-- 
cgit v1.1


From 4c82ac3c37363e8c4ded6a5fe1ec5fa756b34df3 Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu2@citrix.com>
Date: Thu, 10 Sep 2015 11:18:57 +0100
Subject: xen-netback: respect user provided max_queues

Originally that parameter was always reset to num_online_cpus during
module initialisation, which renders it useless.

The fix is to only set max_queues to num_online_cpus when user has not
provided a value.

Reported-by: Johnny Strom <johnny.strom@linuxsolutions.fi>
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/netback.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index b588b1a..abc1381 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -2114,8 +2114,11 @@ static int __init netback_init(void)
 	if (!xen_domain())
 		return -ENODEV;
 
-	/* Allow as many queues as there are CPUs, by default */
-	xenvif_max_queues = num_online_cpus();
+	/* Allow as many queues as there are CPUs if user has not
+	 * specified a value.
+	 */
+	if (xenvif_max_queues == 0)
+		xenvif_max_queues = num_online_cpus();
 
 	if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
 		pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
-- 
cgit v1.1


From 32a844056fd43dda647e1c3c6b9983bdfa04d17d Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu2@citrix.com>
Date: Thu, 10 Sep 2015 11:18:58 +0100
Subject: xen-netfront: respect user provided max_queues

Originally that parameter was always reset to num_online_cpus during
module initialisation, which renders it useless.

The fix is to only set max_queues to num_online_cpus when user has not
provided a value.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Tested-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netfront.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index e27e6d2..b9c637a 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -2132,8 +2132,11 @@ static int __init netif_init(void)
 
 	pr_info("Initialising Xen virtual ethernet driver\n");
 
-	/* Allow as many queues as there are CPUs, by default */
-	xennet_max_queues = num_online_cpus();
+	/* Allow as many queues as there are CPUs if user has not
+	 * specified a value.
+	 */
+	if (xennet_max_queues == 0)
+		xennet_max_queues = num_online_cpus();
 
 	return xenbus_register_frontend(&netfront_driver);
 }
-- 
cgit v1.1


From 05c5a46d71f621df620fbabbd7758ee1b44575ad Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Wed, 9 Sep 2015 21:54:37 -0700
Subject: tcp: generate CA_EVENT_TX_START on data frames

Issuing a CC TX_START event on control frames like pure ACK
is a waste of time, as a CC should not care.

Following patch needs this change, as we want CUBIC to properly track
idle time at a low cost, with a single TX_START being generated.

Yuchung might slightly refine the condition triggering TX_START
on a followup patch.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Cc: Jana Iyengar <jri@google.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Sangtae Ha <sangtae.ha@gmail.com>
Cc: Lawrence Brakmo <lawrence@brakmo.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1188e4f..f9a8a12 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -164,6 +164,9 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	const u32 now = tcp_time_stamp;
 
+	if (tcp_packets_in_flight(tp) == 0)
+		tcp_ca_event(sk, CA_EVENT_TX_START);
+
 	tp->lsndtime = now;
 
 	/* If it is a reply for ato after last received
@@ -940,9 +943,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 							   &md5);
 	tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
 
-	if (tcp_packets_in_flight(tp) == 0)
-		tcp_ca_event(sk, CA_EVENT_TX_START);
-
 	/* if no packet is in qdisc/device queue, then allow XPS to select
 	 * another queue. We can be called from tcp_tsq_handler()
 	 * which holds one reference to sk_wmem_alloc.
-- 
cgit v1.1


From 30927520dbae297182990bb21d08762bcc35ce1d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 9 Sep 2015 21:55:07 -0700
Subject: tcp_cubic: better follow cubic curve after idle period

Jana Iyengar found an interesting issue on CUBIC :

The epoch is only updated/reset initially and when experiencing losses.
The delta "t" of now - epoch_start can be arbitrary large after app idle
as well as the bic_target. Consequentially the slope (inverse of
ca->cnt) would be really large, and eventually ca->cnt would be
lower-bounded in the end to 2 to have delayed-ACK slow-start behavior.

This particularly shows up when slow_start_after_idle is disabled
as a dangerous cwnd inflation (1.5 x RTT) after few seconds of idle
time.

Jana initial fix was to reset epoch_start if app limited,
but Neal pointed out it would ask the CUBIC algorithm to recalculate the
curve so that we again start growing steeply upward from where cwnd is
now (as CUBIC does just after a loss). Ideally we'd want the cwnd growth
curve to be the same shape, just shifted later in time by the amount of
the idle period.

Reported-by: Jana Iyengar <jri@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Sangtae Ha <sangtae.ha@gmail.com>
Cc: Lawrence Brakmo <lawrence@brakmo.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 28011fb..c6ded6b 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -151,6 +151,21 @@ static void bictcp_init(struct sock *sk)
 		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
 }
 
+static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+{
+	if (event == CA_EVENT_TX_START) {
+		s32 delta = tcp_time_stamp - tcp_sk(sk)->lsndtime;
+		struct bictcp *ca = inet_csk_ca(sk);
+
+		/* We were application limited (idle) for a while.
+		 * Shift epoch_start to keep cwnd growth to cubic curve.
+		 */
+		if (ca->epoch_start && delta > 0)
+			ca->epoch_start += delta;
+		return;
+	}
+}
+
 /* calculate the cubic root of x using a table lookup followed by one
  * Newton-Raphson iteration.
  * Avg err ~= 0.195%
@@ -450,6 +465,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
 	.cong_avoid	= bictcp_cong_avoid,
 	.set_state	= bictcp_state,
 	.undo_cwnd	= bictcp_undo_cwnd,
+	.cwnd_event	= bictcp_cwnd_event,
 	.pkts_acked     = bictcp_acked,
 	.owner		= THIS_MODULE,
 	.name		= "cubic",
-- 
cgit v1.1