From e050dbeb0d1e19072d0d656b51f06af1af860f19 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 15 Aug 2014 10:19:39 +0200
Subject: batman-adv: Fix parameter order of hlist_add_behind

1d023284c31a4e40a94d5bbcb7dbb7a35ee0bcbc ("list: fix order of arguments for
hlist_add_after(_rcu)") was incorrectly rebased on top of
d9124268d84a836f14a6ead54ff9d8eee4c43be5 ("batman-adv: Fix out-of-order
fragmentation support"). The parameter order change of the rebased patch was
not re-applied as expected. This causes a memory leak and can cause crashes
when out-of-order packets are received. hlist_add_behind will try to access the
uninitalized list pointers of frag_entry_new to find the previous/next entry
and may modify/read random memory locations.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/batman-adv/fragmentation.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 52c43f9..fc1835c 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -188,7 +188,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
 
 	/* Reached the end of the list, so insert after 'frag_entry_last'. */
 	if (likely(frag_entry_last)) {
-		hlist_add_behind(&frag_entry_last->list, &frag_entry_new->list);
+		hlist_add_behind(&frag_entry_new->list, &frag_entry_last->list);
 		chain->size += skb->len - hdr_size;
 		chain->timestamp = jiffies;
 		ret = true;
-- 
cgit v1.1


From ac32c7f705692b92fe12dcbe88fe87136fdfff6f Mon Sep 17 00:00:00 2001
From: Erik Hugne <erik.hugne@ericsson.com>
Date: Fri, 15 Aug 2014 16:44:35 +0200
Subject: tipc: fix message importance range check

Commit 3b4f302d8578 ("tipc: eliminate
redundant locking") introduced a bug by removing the sanity check
for message importance, allowing programs to assign any value to
the msg_user field. This will mess up the packet reception logic
and may cause random link resets.

Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/port.h   | 4 +++-
 net/tipc/socket.c | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/tipc/port.h b/net/tipc/port.h
index 3f93454..a69118f 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -179,8 +179,10 @@ static inline int tipc_port_importance(struct tipc_port *port)
 	return msg_importance(&port->phdr);
 }
 
-static inline void tipc_port_set_importance(struct tipc_port *port, int imp)
+static inline int tipc_port_set_importance(struct tipc_port *port, int imp)
 {
+	if (imp > TIPC_CRITICAL_IMPORTANCE)
+		return -EINVAL
 	msg_set_importance(&port->phdr, (u32)imp);
 }
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 7d423ee..ff8c811 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1973,7 +1973,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
 
 	switch (opt) {
 	case TIPC_IMPORTANCE:
-		tipc_port_set_importance(port, value);
+		res = tipc_port_set_importance(port, value);
 		break;
 	case TIPC_SRC_DROPPABLE:
 		if (sock->type != SOCK_STREAM)
-- 
cgit v1.1


From 8993cf8edf42527119186b558766539243b791a5 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 11 Aug 2014 18:21:49 +0200
Subject: netfilter: move NAT Kconfig switches out of the iptables scope

Currently, the NAT configs depend on iptables and ip6tables. However,
users should be capable of enabling NAT for nft without having to
switch on iptables.

Fix this by adding new specific IP_NF_NAT and IP6_NF_NAT config
switches for iptables and ip6tables NAT support. I have also moved
the original NF_NAT_IPV4 and NF_NAT_IPV6 configs out of the scope
of iptables to make them independent of it.

This patch also adds NETFILTER_XT_NAT which selects the xt_nat
combo that provides snat/dnat for iptables. We cannot use NF_NAT
anymore since nf_tables can select this.

Reported-by: Matteo Croce <technoboy85@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/Kconfig  | 102 ++++++++++++++++++++++++--------------------
 net/ipv4/netfilter/Makefile |   2 +-
 net/ipv6/netfilter/Kconfig  |  26 ++++++++---
 net/ipv6/netfilter/Makefile |   2 +-
 net/netfilter/Makefile      |   2 +-
 5 files changed, 77 insertions(+), 57 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index fb17312..7cbcaf4 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -82,6 +82,52 @@ config NF_TABLES_ARP
 	help
 	  This option enables the ARP support for nf_tables.
 
+config NF_NAT_IPV4
+	tristate "IPv4 NAT"
+	depends on NF_CONNTRACK_IPV4
+	default m if NETFILTER_ADVANCED=n
+	select NF_NAT
+	help
+	  The IPv4 NAT option allows masquerading, port forwarding and other
+	  forms of full Network Address Port Translation. This can be
+	  controlled by iptables or nft.
+
+if NF_NAT_IPV4
+
+config NF_NAT_SNMP_BASIC
+	tristate "Basic SNMP-ALG support"
+	depends on NF_CONNTRACK_SNMP
+	depends on NETFILTER_ADVANCED
+	default NF_NAT && NF_CONNTRACK_SNMP
+	---help---
+
+	  This module implements an Application Layer Gateway (ALG) for
+	  SNMP payloads.  In conjunction with NAT, it allows a network
+	  management system to access multiple private networks with
+	  conflicting addresses.  It works by modifying IP addresses
+	  inside SNMP payloads to match IP-layer NAT mapping.
+
+	  This is the "basic" form of SNMP-ALG, as described in RFC 2962
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config NF_NAT_PROTO_GRE
+	tristate
+	depends on NF_CT_PROTO_GRE
+
+config NF_NAT_PPTP
+	tristate
+	depends on NF_CONNTRACK
+	default NF_CONNTRACK_PPTP
+	select NF_NAT_PROTO_GRE
+
+config NF_NAT_H323
+	tristate
+	depends on NF_CONNTRACK
+	default NF_CONNTRACK_H323
+
+endif # NF_NAT_IPV4
+
 config IP_NF_IPTABLES
 	tristate "IP tables support (required for filtering/masq/NAT)"
 	default m if NETFILTER_ADVANCED=n
@@ -170,19 +216,21 @@ config IP_NF_TARGET_SYNPROXY
 	  To compile it as a module, choose M here. If unsure, say N.
 
 # NAT + specific targets: nf_conntrack
-config NF_NAT_IPV4
-	tristate "IPv4 NAT"
+config IP_NF_NAT
+	tristate "iptables NAT support"
 	depends on NF_CONNTRACK_IPV4
 	default m if NETFILTER_ADVANCED=n
 	select NF_NAT
+	select NF_NAT_IPV4
+	select NETFILTER_XT_NAT
 	help
-	  The IPv4 NAT option allows masquerading, port forwarding and other
-	  forms of full Network Address Port Translation.  It is controlled by
-	  the `nat' table in iptables: see the man page for iptables(8).
+	  This enables the `nat' table in iptables. This allows masquerading,
+	  port forwarding and other forms of full Network Address Port
+	  Translation.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-if NF_NAT_IPV4
+if IP_NF_NAT
 
 config IP_NF_TARGET_MASQUERADE
 	tristate "MASQUERADE target support"
@@ -214,47 +262,7 @@ config IP_NF_TARGET_REDIRECT
 	(e.g. when running oldconfig). It selects
 	CONFIG_NETFILTER_XT_TARGET_REDIRECT.
 
-endif
-
-config NF_NAT_SNMP_BASIC
-	tristate "Basic SNMP-ALG support"
-	depends on NF_CONNTRACK_SNMP && NF_NAT_IPV4
-	depends on NETFILTER_ADVANCED
-	default NF_NAT && NF_CONNTRACK_SNMP
-	---help---
-
-	  This module implements an Application Layer Gateway (ALG) for
-	  SNMP payloads.  In conjunction with NAT, it allows a network
-	  management system to access multiple private networks with
-	  conflicting addresses.  It works by modifying IP addresses
-	  inside SNMP payloads to match IP-layer NAT mapping.
-
-	  This is the "basic" form of SNMP-ALG, as described in RFC 2962
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-# If they want FTP, set to $CONFIG_IP_NF_NAT (m or y),
-# or $CONFIG_IP_NF_FTP (m or y), whichever is weaker.
-# From kconfig-language.txt:
-#
-#           <expr> '&&' <expr>                   (6)
-#
-# (6) Returns the result of min(/expr/, /expr/).
-
-config NF_NAT_PROTO_GRE
-	tristate
-	depends on NF_NAT_IPV4 && NF_CT_PROTO_GRE
-
-config NF_NAT_PPTP
-	tristate
-	depends on NF_CONNTRACK && NF_NAT_IPV4
-	default NF_NAT_IPV4 && NF_CONNTRACK_PPTP
-	select NF_NAT_PROTO_GRE
-
-config NF_NAT_H323
-	tristate
-	depends on NF_CONNTRACK && NF_NAT_IPV4
-	default NF_NAT_IPV4 && NF_CONNTRACK_H323
+endif # IP_NF_NAT
 
 # mangle + specific targets
 config IP_NF_MANGLE
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3300162..edf4af3 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -43,7 +43,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
 # the three instances of ip_tables
 obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
 obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_NF_NAT_IPV4) += iptable_nat.o
+obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
 obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
 obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
 
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index ac93df1..cf0b88f 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -60,6 +60,16 @@ config NF_LOG_IPV6
 	depends on NETFILTER_ADVANCED
 	select NF_LOG_COMMON
 
+config NF_NAT_IPV6
+	tristate "IPv6 NAT"
+	depends on NF_CONNTRACK_IPV6
+	depends on NETFILTER_ADVANCED
+	select NF_NAT
+	help
+	  The IPv6 NAT option allows masquerading, port forwarding and other
+	  forms of full Network Address Port Translation. This can be
+	  controlled by iptables or nft.
+
 config IP6_NF_IPTABLES
 	tristate "IP6 tables support (required for filtering)"
 	depends on INET && IPV6
@@ -232,19 +242,21 @@ config IP6_NF_SECURITY
 
          If unsure, say N.
 
-config NF_NAT_IPV6
-	tristate "IPv6 NAT"
+config IP6_NF_NAT
+	tristate "ip6tables NAT support"
 	depends on NF_CONNTRACK_IPV6
 	depends on NETFILTER_ADVANCED
 	select NF_NAT
+	select NF_NAT_IPV6
+	select NETFILTER_XT_NAT
 	help
-	  The IPv6 NAT option allows masquerading, port forwarding and other
-	  forms of full Network Address Port Translation. It is controlled by
-	  the `nat' table in ip6tables, see the man page for ip6tables(8).
+	  This enables the `nat' table in ip6tables. This allows masquerading,
+	  port forwarding and other forms of full Network Address Port
+	  Translation.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-if NF_NAT_IPV6
+if IP6_NF_NAT
 
 config IP6_NF_TARGET_MASQUERADE
 	tristate "MASQUERADE target support"
@@ -265,7 +277,7 @@ config IP6_NF_TARGET_NPT
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-endif # NF_NAT_IPV6
+endif # IP6_NF_NAT
 
 endif # IP6_NF_IPTABLES
 
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index c0b2631..c3d3286 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
 obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
 obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
 obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
-obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o
+obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o
 
 # objects for l3 independent conntrack
 nf_conntrack_ipv6-y  :=  nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 8308624..fad5fdb 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -95,7 +95,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
 obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
 obj-$(CONFIG_NETFILTER_XT_SET) += xt_set.o
-obj-$(CONFIG_NF_NAT) += xt_nat.o
+obj-$(CONFIG_NETFILTER_XT_NAT) += xt_nat.o
 
 # targets
 obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o
-- 
cgit v1.1


From 7629d1eaf33672c9d35ba1e2ad12b459d56ca3b1 Mon Sep 17 00:00:00 2001
From: Martin Townsend <martin.townsend@xsilon.com>
Date: Tue, 19 Aug 2014 19:03:28 +0200
Subject: mac802154: fixed potential skb leak with mac802154_parse_frame_start

This patch fix a memory leak if received frame was not able to parse.

Signed-off-by: Martin Townsend <martin.townsend@xsilon.com>
Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/mac802154/wpan.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index 3c3069f..4c13323 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -573,6 +573,7 @@ void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb)
 	ret = mac802154_parse_frame_start(skb, &hdr);
 	if (ret) {
 		pr_debug("got invalid frame\n");
+		kfree_skb(skb);
 		return;
 	}
 
-- 
cgit v1.1


From c4cb901ac667f81786b402ca7d69a9063e770b3a Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Tue, 19 Aug 2014 19:03:29 +0200
Subject: ieee802154: 6lowpan_rtnl: fix correct errno value

This patch correct the return value of lowpan_alloc_frag if an error
occur. Errno numbers should always be negative.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/ieee802154/6lowpan_rtnl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c
index 016b77e..71fa7d4 100644
--- a/net/ieee802154/6lowpan_rtnl.c
+++ b/net/ieee802154/6lowpan_rtnl.c
@@ -246,7 +246,7 @@ lowpan_alloc_frag(struct sk_buff *skb, int size,
 			return ERR_PTR(-rc);
 		}
 	} else {
-		frag = ERR_PTR(ENOMEM);
+		frag = ERR_PTR(-ENOMEM);
 	}
 
 	return frag;
-- 
cgit v1.1


From 6e361d6ffee322fcd092d97720e05032ffb98ae6 Mon Sep 17 00:00:00 2001
From: Martin Townsend <martin.townsend@xsilon.com>
Date: Tue, 19 Aug 2014 19:03:30 +0200
Subject: ieee802154: mac802154: handle the reserved dest mode by dropping the
 packet

If received frame contains the reserved destination address mode. The
frame should be dropped and free the skb.

Signed-off-by: Martin Townsend <martin.townsend@xsilon.com>
Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/mac802154/wpan.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index 4c13323..5478388 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -462,7 +462,10 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb,
 			skb->pkt_type = PACKET_OTHERHOST;
 		break;
 	default:
-		break;
+		spin_unlock_bh(&sdata->mib_lock);
+		pr_debug("invalid dest mode\n");
+		kfree_skb(skb);
+		return NET_RX_DROP;
 	}
 
 	spin_unlock_bh(&sdata->mib_lock);
-- 
cgit v1.1


From 685d632804b89ea25d3339afad162c48646ada5c Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Tue, 19 Aug 2014 19:03:31 +0200
Subject: ieee802154: 6lowpan: ensure of sending 1280 packets

This patch changes the 1281 MTU to 1280. Others stack have only a 1280
byte array for uncompressed 6LoWPAN packets, this avoid that these
stacks have an overflow. Sending 1281 uncompressed 6LoWPAN packets isn't
also rfc complaint.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/ieee802154/6lowpan_rtnl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c
index 71fa7d4..6591d27 100644
--- a/net/ieee802154/6lowpan_rtnl.c
+++ b/net/ieee802154/6lowpan_rtnl.c
@@ -437,7 +437,7 @@ static void lowpan_setup(struct net_device *dev)
 	/* Frame Control + Sequence Number + Address fields + Security Header */
 	dev->hard_header_len	= 2 + 1 + 20 + 14;
 	dev->needed_tailroom	= 2; /* FCS */
-	dev->mtu		= 1281;
+	dev->mtu		= IPV6_MIN_MTU;
 	dev->tx_queue_len	= 0;
 	dev->flags		= IFF_BROADCAST | IFF_MULTICAST;
 	dev->watchdog_timeo	= 0;
-- 
cgit v1.1


From 6697dabe27e03302ddfddc975275e6401defe2dd Mon Sep 17 00:00:00 2001
From: Martin Townsend <martin.townsend@xsilon.com>
Date: Tue, 19 Aug 2014 19:03:32 +0200
Subject: ieee802154: 6lowpan: ensure MTU of 1280 for 6lowpan

This patch drops the userspace accessable sysfs entry for the maximum
datagram size of a 6LoWPAN fragment packet.

A fragment should not have a datagram size value greater than 1280 byte.
Instead of make this value configurable, we accept 1280 datagram size
fragment packets only.

Signed-off-by: Martin Townsend <martin.townsend@xsilon.com>
Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/ieee802154/reassembly.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c
index ffec6ce..32755cb 100644
--- a/net/ieee802154/reassembly.c
+++ b/net/ieee802154/reassembly.c
@@ -355,8 +355,6 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type)
 	struct net *net = dev_net(skb->dev);
 	struct lowpan_frag_info *frag_info = lowpan_cb(skb);
 	struct ieee802154_addr source, dest;
-	struct netns_ieee802154_lowpan *ieee802154_lowpan =
-		net_ieee802154_lowpan(net);
 	int err;
 
 	source = mac_cb(skb)->source;
@@ -366,8 +364,10 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type)
 	if (err < 0)
 		goto err;
 
-	if (frag_info->d_size > ieee802154_lowpan->max_dsize)
+	if (frag_info->d_size > IPV6_MIN_MTU) {
+		net_warn_ratelimited("lowpan_frag_rcv: datagram size exceeds MTU\n");
 		goto err;
+	}
 
 	fq = fq_find(net, frag_info, &source, &dest);
 	if (fq != NULL) {
@@ -415,13 +415,6 @@ static struct ctl_table lowpan_frags_ns_ctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
 	},
-	{
-		.procname	= "6lowpanfrag_max_datagram_size",
-		.data		= &init_net.ieee802154_lowpan.max_dsize,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
 	{ }
 };
 
@@ -458,7 +451,6 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
 		table[1].data = &ieee802154_lowpan->frags.low_thresh;
 		table[1].extra2 = &ieee802154_lowpan->frags.high_thresh;
 		table[2].data = &ieee802154_lowpan->frags.timeout;
-		table[3].data = &ieee802154_lowpan->max_dsize;
 
 		/* Don't export sysctls to unprivileged users */
 		if (net->user_ns != &init_user_ns)
@@ -533,7 +525,6 @@ static int __net_init lowpan_frags_init_net(struct net *net)
 	ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
-	ieee802154_lowpan->max_dsize = 0xFFFF;
 
 	inet_frags_init_net(&ieee802154_lowpan->frags);
 
-- 
cgit v1.1


From 73d0f37ac4ee5b60e6b9c1b3ccb8766bade9d9c5 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@parallels.com>
Date: Thu, 14 Aug 2014 12:27:47 +0400
Subject: cbq: incorrectly low bandwidth setting blocks limited traffic

Mainstream commit f0f6ee1f70c4 ("cbq: incorrect processing of high limits")
have side effect: if cbq bandwidth setting is less than real interface
throughput non-limited traffic can delay limited traffic for a very long time.

This happen because of q->now changes incorrectly in cbq_dequeue():
in described scenario L2T is much greater than real time delay,
and q->now gets an extra boost for each transmitted packet.

Accumulated boost prevents update q->now, and blocked class can wait
very long time until (q->now >= cl->undertime) will be true again.

To fix the problem the patch updates q->now on each cbq_update() call.
L2T-related pre-modification q->now was moved to cbq_update().

My testing confirmed that it fixes the problem and did not discover
any side-effects

Fixes: f0f6ee1f70c4 ("cbq: incorrect processing of high limits")

Signed-off-by: Vasily Averin <vvs@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_cbq.c | 37 +++++++++++++------------------------
 1 file changed, 13 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index ead5264..550be95 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -700,8 +700,13 @@ cbq_update(struct cbq_sched_data *q)
 	struct cbq_class *this = q->tx_class;
 	struct cbq_class *cl = this;
 	int len = q->tx_len;
+	psched_time_t now;
 
 	q->tx_class = NULL;
+	/* Time integrator. We calculate EOS time
+	 * by adding expected packet transmission time.
+	 */
+	now = q->now + L2T(&q->link, len);
 
 	for ( ; cl; cl = cl->share) {
 		long avgidle = cl->avgidle;
@@ -717,7 +722,7 @@ cbq_update(struct cbq_sched_data *q)
 		 *	idle = (now - last) - last_pktlen/rate
 		 */
 
-		idle = q->now - cl->last;
+		idle = now - cl->last;
 		if ((unsigned long)idle > 128*1024*1024) {
 			avgidle = cl->maxidle;
 		} else {
@@ -761,7 +766,7 @@ cbq_update(struct cbq_sched_data *q)
 			idle -= L2T(&q->link, len);
 			idle += L2T(cl, len);
 
-			cl->undertime = q->now + idle;
+			cl->undertime = now + idle;
 		} else {
 			/* Underlimit */
 
@@ -771,7 +776,8 @@ cbq_update(struct cbq_sched_data *q)
 			else
 				cl->avgidle = avgidle;
 		}
-		cl->last = q->now;
+		if ((s64)(now - cl->last) > 0)
+			cl->last = now;
 	}
 
 	cbq_update_toplevel(q, this, q->tx_borrowed);
@@ -943,30 +949,13 @@ cbq_dequeue(struct Qdisc *sch)
 	struct sk_buff *skb;
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	psched_time_t now;
-	psched_tdiff_t incr;
 
 	now = psched_get_time();
-	incr = now - q->now_rt;
-
-	if (q->tx_class) {
-		psched_tdiff_t incr2;
-		/* Time integrator. We calculate EOS time
-		 * by adding expected packet transmission time.
-		 * If real time is greater, we warp artificial clock,
-		 * so that:
-		 *
-		 * cbq_time = max(real_time, work);
-		 */
-		incr2 = L2T(&q->link, q->tx_len);
-		q->now += incr2;
+
+	if (q->tx_class)
 		cbq_update(q);
-		if ((incr -= incr2) < 0)
-			incr = 0;
-		q->now += incr;
-	} else {
-		if (now > q->now)
-			q->now = now;
-	}
+
+	q->now = now;
 	q->now_rt = now;
 
 	for (;;) {
-- 
cgit v1.1


From 7201c1ddf774c12daa2dd5da098b8929db53f047 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@parallels.com>
Date: Thu, 14 Aug 2014 12:27:59 +0400
Subject: cbq: now_rt removal

Now q->now_rt is identical to q->now and is not required anymore.

Signed-off-by: Vasily Averin <vvs@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_cbq.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 550be95..762a04b 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -159,7 +159,6 @@ struct cbq_sched_data {
 	struct cbq_class	*tx_borrowed;
 	int			tx_len;
 	psched_time_t		now;		/* Cached timestamp */
-	psched_time_t		now_rt;		/* Cached real time */
 	unsigned int		pmask;
 
 	struct hrtimer		delay_timer;
@@ -353,12 +352,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 	int toplevel = q->toplevel;
 
 	if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
-		psched_time_t now;
-		psched_tdiff_t incr;
-
-		now = psched_get_time();
-		incr = now - q->now_rt;
-		now = q->now + incr;
+		psched_time_t now = psched_get_time();
 
 		do {
 			if (cl->undertime < now) {
@@ -956,7 +950,6 @@ cbq_dequeue(struct Qdisc *sch)
 		cbq_update(q);
 
 	q->now = now;
-	q->now_rt = now;
 
 	for (;;) {
 		q->wd_expires = 0;
@@ -1212,7 +1205,6 @@ cbq_reset(struct Qdisc *sch)
 	hrtimer_cancel(&q->delay_timer);
 	q->toplevel = TC_CBQ_MAXLEVEL;
 	q->now = psched_get_time();
-	q->now_rt = q->now;
 
 	for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
 		q->active[prio] = NULL;
@@ -1396,7 +1388,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->delay_timer.function = cbq_undelay;
 	q->toplevel = TC_CBQ_MAXLEVEL;
 	q->now = psched_get_time();
-	q->now_rt = q->now;
 
 	cbq_link_class(&q->link);
 
-- 
cgit v1.1


From 02784f1b05b8f241c8180af88869e717e2758593 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 19 Aug 2014 11:14:02 -0700
Subject: tipc: Fix build.

Missing semicolon in range check fix.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/port.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/port.h b/net/tipc/port.h
index a69118f..3087da3 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -182,8 +182,9 @@ static inline int tipc_port_importance(struct tipc_port *port)
 static inline int tipc_port_set_importance(struct tipc_port *port, int imp)
 {
 	if (imp > TIPC_CRITICAL_IMPORTANCE)
-		return -EINVAL
+		return -EINVAL;
 	msg_set_importance(&port->phdr, (u32)imp);
+	return 0;
 }
 
 #endif
-- 
cgit v1.1


From caa8ad94edf686d02b555c65a6162c0d1b434958 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 18 Aug 2014 15:46:28 +0200
Subject: netfilter: x_tables: allow to use default cgroup match

There's actually no good reason why we cannot use cgroup id 0,
so lets just remove this artificial barrier.

Reported-by: Alexey Perevalov <a.perevalov@samsung.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Tested-by: Alexey Perevalov <a.perevalov@samsung.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_cgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index f4e8330..7198d66 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -31,7 +31,7 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par)
 	if (info->invert & ~1)
 		return -EINVAL;
 
-	return info->id ? 0 : -EINVAL;
+	return 0;
 }
 
 static bool
-- 
cgit v1.1


From 1e8430f30b55a1f3f6925c9f37f8cc9afd141d2e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 11 Aug 2014 18:21:50 +0200
Subject: netfilter: nf_tables: nat expression must select CONFIG_NF_NAT

This enables the netfilter NAT engine in first place, otherwise
you cannot ever select the nf_tables nat expression if iptables
is not selected.

Reported-by: Matteo Croce <technoboy85@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index ad751fe..05eb177 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -499,7 +499,7 @@ config NFT_LIMIT
 config NFT_NAT
 	depends on NF_TABLES
 	depends on NF_CONNTRACK
-	depends on NF_NAT
+	select NF_NAT
 	tristate "Netfilter nf_tables nat module"
 	help
 	  This option adds the "nat" expression that you can use to perform
-- 
cgit v1.1


From f161dd4122ffa73e4e12000309dca65bec80d416 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 15 Aug 2014 21:06:54 +0300
Subject: Bluetooth: Fix hci_conn reference counting for auto-connections

Recently the LE passive scanning and auto-connections feature was
introduced. It uses the hci_connect_le() API which returns a hci_conn
along with a reference count to that object. All previous users would
tie this returned reference to some existing object, such as an L2CAP
channel, and there'd be no leaked references this way. For
auto-connections however the reference was returned but not stored
anywhere, leaving established connections with one higher reference
count than they should have.

Instead of playing special tricks with hci_conn_hold/drop this patch
associates the returned reference from hci_connect_le() with the object
that in practice does own this reference, i.e. the hci_conn_params
struct that caused us to initiate a connection in the first place. Once
the connection is established or fails to establish this reference is
removed appropriately.

One extra thing needed is to call hci_pend_le_actions_clear() before
calling hci_conn_hash_flush() so that the reference is cleared before
the hci_conn objects are fully removed.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_conn.c  |  8 ++++++++
 net/bluetooth/hci_core.c  | 14 ++++++++++++--
 net/bluetooth/hci_event.c | 17 +++++++++++++++--
 3 files changed, 35 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index b50dabb..faff624 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -589,6 +589,14 @@ EXPORT_SYMBOL(hci_get_route);
 void hci_le_conn_failed(struct hci_conn *conn, u8 status)
 {
 	struct hci_dev *hdev = conn->hdev;
+	struct hci_conn_params *params;
+
+	params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst,
+					   conn->dst_type);
+	if (params && params->conn) {
+		hci_conn_drop(params->conn);
+		params->conn = NULL;
+	}
 
 	conn->state = BT_CLOSED;
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index c32d361..1d9c29a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2536,8 +2536,13 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev)
 {
 	struct hci_conn_params *p;
 
-	list_for_each_entry(p, &hdev->le_conn_params, list)
+	list_for_each_entry(p, &hdev->le_conn_params, list) {
+		if (p->conn) {
+			hci_conn_drop(p->conn);
+			p->conn = NULL;
+		}
 		list_del_init(&p->action);
+	}
 
 	BT_DBG("All LE pending actions cleared");
 }
@@ -2578,8 +2583,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 
 	hci_dev_lock(hdev);
 	hci_inquiry_cache_flush(hdev);
-	hci_conn_hash_flush(hdev);
 	hci_pend_le_actions_clear(hdev);
+	hci_conn_hash_flush(hdev);
 	hci_dev_unlock(hdev);
 
 	hci_notify(hdev, HCI_DEV_DOWN);
@@ -3727,6 +3732,9 @@ void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type)
 	if (!params)
 		return;
 
+	if (params->conn)
+		hci_conn_drop(params->conn);
+
 	list_del(&params->action);
 	list_del(&params->list);
 	kfree(params);
@@ -3757,6 +3765,8 @@ void hci_conn_params_clear_all(struct hci_dev *hdev)
 	struct hci_conn_params *params, *tmp;
 
 	list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list) {
+		if (params->conn)
+			hci_conn_drop(params->conn);
 		list_del(&params->action);
 		list_del(&params->list);
 		kfree(params);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index be35598..a600082 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -4221,8 +4221,13 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_proto_connect_cfm(conn, ev->status);
 
 	params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
-	if (params)
+	if (params) {
 		list_del_init(&params->action);
+		if (params->conn) {
+			hci_conn_drop(params->conn);
+			params->conn = NULL;
+		}
+	}
 
 unlock:
 	hci_update_background_scan(hdev);
@@ -4304,8 +4309,16 @@ static void check_pending_le_conn(struct hci_dev *hdev, bdaddr_t *addr,
 
 	conn = hci_connect_le(hdev, addr, addr_type, BT_SECURITY_LOW,
 			      HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER);
-	if (!IS_ERR(conn))
+	if (!IS_ERR(conn)) {
+		/* Store the pointer since we don't really have any
+		 * other owner of the object besides the params that
+		 * triggered it. This way we can abort the connection if
+		 * the parameters get removed and keep the reference
+		 * count consistent once the connection is established.
+		 */
+		params->conn = conn;
 		return;
+	}
 
 	switch (PTR_ERR(conn)) {
 	case -EBUSY:
-- 
cgit v1.1


From 6df378d2d1f87a249a88ac4a8c7a14861d9c9474 Mon Sep 17 00:00:00 2001
From: chas williams - CONTRACTOR <chas@cmf.nrl.navy.mil>
Date: Thu, 14 Aug 2014 09:19:47 -0400
Subject: lec: Use rtnl lock/unlock when updating MTU

The LECS response contains the MTU that should be used.  Correctly
synchronize with other layers when updating.

Signed-off-by: Chas Williams - CONTRACTOR <chas@cmf.nrl.navy.mil>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/lec.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/atm/lec.c b/net/atm/lec.c
index e4853b5..4b98f89 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -410,9 +410,11 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
 		priv->lane2_ops = NULL;
 		if (priv->lane_version > 1)
 			priv->lane2_ops = &lane2_ops;
+		rtnl_lock();
 		if (dev_set_mtu(dev, mesg->content.config.mtu))
 			pr_info("%s: change_mtu to %d failed\n",
 				dev->name, mesg->content.config.mtu);
+		rtnl_unlock();
 		priv->is_proxy = mesg->content.config.is_proxy;
 		break;
 	case l_flush_tran_id:
-- 
cgit v1.1


From dc808110bb62b64a448696ecac3938902c92e1ab Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 15 Aug 2014 09:16:04 -0700
Subject: packet: handle too big packets for PACKET_V3

af_packet can currently overwrite kernel memory by out of bound
accesses, because it assumed a [new] block can always hold one frame.

This is not generally the case, even if most existing tools do it right.

This patch clamps too long frames as API permits, and issue a one time
error on syslog.

[  394.357639] tpacket_rcv: packet too big, clamped from 5042 to 3966. macoff=82

In this example, packet header tp_snaplen was set to 3966,
and tp_len was set to 5042 (skb->len)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.")
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 17 +++++++++++++++++
 net/packet/internal.h  |  1 +
 2 files changed, 18 insertions(+)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8d9f804..93896d2 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -632,6 +632,7 @@ static void init_prb_bdqc(struct packet_sock *po,
 	p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
 	p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
 
+	p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
 	prb_init_ft_ops(p1, req_u);
 	prb_setup_retire_blk_timer(po, tx_ring);
 	prb_open_block(p1, pbd);
@@ -1942,6 +1943,18 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 			if ((int)snaplen < 0)
 				snaplen = 0;
 		}
+	} else if (unlikely(macoff + snaplen >
+			    GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
+		u32 nval;
+
+		nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff;
+		pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n",
+			    snaplen, nval, macoff);
+		snaplen = nval;
+		if (unlikely((int)snaplen < 0)) {
+			snaplen = 0;
+			macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
+		}
 	}
 	spin_lock(&sk->sk_receive_queue.lock);
 	h.raw = packet_current_rx_frame(po, skb,
@@ -3783,6 +3796,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 			goto out;
 		if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
 			goto out;
+		if (po->tp_version >= TPACKET_V3 &&
+		    (int)(req->tp_block_size -
+			  BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0)
+			goto out;
 		if (unlikely(req->tp_frame_size < po->tp_hdrlen +
 					po->tp_reserve))
 			goto out;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index eb9580a..cdddf6a 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -29,6 +29,7 @@ struct tpacket_kbdq_core {
 	char		*pkblk_start;
 	char		*pkblk_end;
 	int		kblk_size;
+	unsigned int	max_frame_len;
 	unsigned int	knum_blocks;
 	uint64_t	knxt_seq_num;
 	char		*prev;
-- 
cgit v1.1


From 061079ac0b9be7a578dcd09f7865c2c0d6ac894a Mon Sep 17 00:00:00 2001
From: zhuyj <zyjzyj2000@gmail.com>
Date: Wed, 20 Aug 2014 17:31:43 +0800
Subject: sctp: not send SCTP_PEER_ADDR_CHANGE notifications with failed probe

Since the transport has always been in state SCTP_UNCONFIRMED, it
therefore wasn't active before and hasn't been used before, and it
always has been, so it is unnecessary to bug the user with a
notification.

Reported-by: Deepak Khandelwal <khandelwal.deepak.1987@gmail.com>
Suggested-by: Vlad Yasevich <vyasevich@gmail.com>
Suggested-by: Michael Tuexen <tuexen@fh-muenster.de>
Suggested-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Zhu Yanjun <Yanjun.Zhu@windriver.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 06a9ee6..aaafb32 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -813,6 +813,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
 		else {
 			dst_release(transport->dst);
 			transport->dst = NULL;
+			ulp_notify = false;
 		}
 
 		spc_state = SCTP_ADDR_UNREACHABLE;
-- 
cgit v1.1


From 793c3b4000a1ef611ae7e5c89bd2a9c6b776cb5e Mon Sep 17 00:00:00 2001
From: Benjamin Block <bebl@mageta.org>
Date: Thu, 21 Aug 2014 19:37:48 +0200
Subject: net: ipv6: fib: don't sleep inside atomic lock

The function fib6_commit_metrics() allocates a piece of memory in mode
GFP_KERNEL while holding an atomic lock from higher up in the stack, in
the function __ip6_ins_rt(). This produces the following BUG:

> BUG: sleeping function called from invalid context at mm/slub.c:1250
> in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: dhcpcd
> 2 locks held by dhcpcd/2909:
>  #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff81978e67>] rtnl_lock+0x17/0x20
>  #1:  (&tb->tb6_lock){++--+.}, at: [<ffffffff81a6951a>] ip6_route_add+0x65a/0x800
> CPU: 1 PID: 2909 Comm: dhcpcd Not tainted 3.17.0-rc1 #1
> Hardware name: ASUS All Series/Q87T, BIOS 0216 10/16/2013
>  0000000000000008 ffff8800c8f13858 ffffffff81af135a 0000000000000000
>  ffff880212202430 ffff8800c8f13878 ffffffff810f8d3a ffff880212202c98
>  0000000000000010 ffff8800c8f138c8 ffffffff8121ad0e 0000000000000001
> Call Trace:
>  [<ffffffff81af135a>] dump_stack+0x4e/0x68
>  [<ffffffff810f8d3a>] __might_sleep+0x10a/0x120
>  [<ffffffff8121ad0e>] kmem_cache_alloc_trace+0x4e/0x190
>  [<ffffffff81a6bcd6>] ? fib6_commit_metrics+0x66/0x110
>  [<ffffffff81a6bcd6>] fib6_commit_metrics+0x66/0x110
>  [<ffffffff81a6cbf3>] fib6_add+0x883/0xa80
>  [<ffffffff81a6951a>] ? ip6_route_add+0x65a/0x800
>  [<ffffffff81a69535>] ip6_route_add+0x675/0x800
>  [<ffffffff81a68f2a>] ? ip6_route_add+0x6a/0x800
>  [<ffffffff81a6990c>] inet6_rtm_newroute+0x5c/0x80
>  [<ffffffff8197cf01>] rtnetlink_rcv_msg+0x211/0x260
>  [<ffffffff81978e67>] ? rtnl_lock+0x17/0x20
>  [<ffffffff81119708>] ? lock_release_holdtime+0x28/0x180
>  [<ffffffff81978e67>] ? rtnl_lock+0x17/0x20
>  [<ffffffff8197ccf0>] ? __rtnl_unlock+0x20/0x20
>  [<ffffffff819a989e>] netlink_rcv_skb+0x6e/0xd0
>  [<ffffffff81978ee5>] rtnetlink_rcv+0x25/0x40
>  [<ffffffff819a8e59>] netlink_unicast+0xd9/0x180
>  [<ffffffff819a9600>] netlink_sendmsg+0x700/0x770
>  [<ffffffff81103735>] ? local_clock+0x25/0x30
>  [<ffffffff8194e83c>] sock_sendmsg+0x6c/0x90
>  [<ffffffff811f98e3>] ? might_fault+0xa3/0xb0
>  [<ffffffff8195ca6d>] ? verify_iovec+0x7d/0xf0
>  [<ffffffff8194ec3e>] ___sys_sendmsg+0x37e/0x3b0
>  [<ffffffff8111ef15>] ? trace_hardirqs_on_caller+0x185/0x220
>  [<ffffffff81af979e>] ? mutex_unlock+0xe/0x10
>  [<ffffffff819a55ec>] ? netlink_insert+0xbc/0xe0
>  [<ffffffff819a65e5>] ? netlink_autobind.isra.30+0x125/0x150
>  [<ffffffff819a6520>] ? netlink_autobind.isra.30+0x60/0x150
>  [<ffffffff819a84f9>] ? netlink_bind+0x159/0x230
>  [<ffffffff811f989a>] ? might_fault+0x5a/0xb0
>  [<ffffffff8194f25e>] ? SYSC_bind+0x7e/0xd0
>  [<ffffffff8194f8cd>] __sys_sendmsg+0x4d/0x80
>  [<ffffffff8194f912>] SyS_sendmsg+0x12/0x20
>  [<ffffffff81afc692>] system_call_fastpath+0x16/0x1b

Fixing this by replacing the mode GFP_KERNEL with GFP_ATOMIC.

Signed-off-by: Benjamin Block <bebl@mageta.org>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index cb4459b..76b7f5e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -643,7 +643,7 @@ static int fib6_commit_metrics(struct dst_entry *dst,
 	if (dst->flags & DST_HOST) {
 		mp = dst_metrics_write_ptr(dst);
 	} else {
-		mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+		mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
 		if (!mp)
 			return -ENOMEM;
 		dst_init_metrics(dst, mp, 0);
-- 
cgit v1.1


From 2ba5af42a7b59ef01f9081234d8855140738defd Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 21 Aug 2014 21:33:44 +0200
Subject: openvswitch: fix panic with multiple vlan headers

When there are multiple vlan headers present in a received frame, the first
one is put into vlan_tci and protocol is set to ETH_P_8021Q. Anything in the
skb beyond the VLAN TPID may be still non-linear, including the inner TCI
and ethertype. While ovs_flow_extract takes care of IP and IPv6 headers, it
does nothing with ETH_P_8021Q. Later, if OVS_ACTION_ATTR_POP_VLAN is
executed, __pop_vlan_tci pulls the next vlan header into vlan_tci.

This leads to two things:

1. Part of the resulting ethernet header is in the non-linear part of the
   skb. When eth_type_trans is called later as the result of
   OVS_ACTION_ATTR_OUTPUT, kernel BUGs in __skb_pull. Also, __pop_vlan_tci
   is in fact accessing random data when it reads past the TPID.

2. network_header points into the ethernet header instead of behind it.
   mac_len is set to a wrong value (10), too.

Reported-by: Yulong Pei <ypei@redhat.com>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/actions.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index fe5cda0..5231652 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -42,6 +42,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 
 static int make_writable(struct sk_buff *skb, int write_len)
 {
+	if (!pskb_may_pull(skb, write_len))
+		return -ENOMEM;
+
 	if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
 		return 0;
 
@@ -70,6 +73,8 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
 
 	vlan_set_encap_proto(skb, vhdr);
 	skb->mac_header += VLAN_HLEN;
+	if (skb_network_offset(skb) < ETH_HLEN)
+		skb_set_network_header(skb, ETH_HLEN);
 	skb_reset_mac_len(skb);
 
 	return 0;
-- 
cgit v1.1


From ea4f19c1f81d4bf709c74e3789ec785828bc6e51 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Fri, 22 Aug 2014 13:03:29 +0200
Subject: net: sctp: spare unnecessary comparison in sctp_trans_elect_best

When both transports are the same, we don't have to go down that
road only to realize that we will return the very same transport.
We are guaranteed that curr is always non-NULL. Therefore, just
short-circuit this special case.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index aaafb32..104fae4 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1245,7 +1245,7 @@ static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr,
 {
 	u8 score_curr, score_best;
 
-	if (best == NULL)
+	if (best == NULL || curr == best)
 		return curr;
 
 	score_curr = sctp_trans_score(curr);
-- 
cgit v1.1


From aa4a83ee8bbc08342c4acfd59ef234cac51a1eef Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Fri, 22 Aug 2014 13:03:30 +0200
Subject: net: sctp: fix suboptimal edge-case on non-active active/retrans path
 selection

In SCTP, selection of active (T.ACT) and retransmission (T.RET)
transports is being done whenever transport control operations
(UP, DOWN, PF, ...) are engaged through sctp_assoc_control_transport().

Commits 4c47af4d5eb2 ("net: sctp: rework multihoming retransmission
path selection to rfc4960") and a7288c4dd509 ("net: sctp: improve
sctp_select_active_and_retran_path selection") have both improved
it towards a more fine-grained and optimal path selection.

Currently, the selection algorithm for T.ACT and T.RET is as follows:

1) Elect the two most recently used ACTIVE transports T1, T2 for
   T.ACT, T.RET, where T.ACT<-T1 and T1 is most recently used
2) In case primary path T.PRI not in {T1, T2} but ACTIVE, set
   T.ACT<-T.PRI and T.RET<-T1
3) If only T1 is ACTIVE from the set, set T.ACT<-T1 and T.RET<-T1
4) If none is ACTIVE, set T.ACT<-best(T.PRI, T.RET, T3) where
   T3 is the most recently used (if avail) in PF, set T.RET<-T.PRI

Prior to above commits, 4) was simply a camp on T.ACT<-T.PRI and
T.RET<-T.PRI, ignoring possible paths in PF. Camping on T.PRI is
still slightly suboptimal as it can lead to the following scenario:

Setup:
        <A>                                <B>
    T1: p1p1 (10.0.10.10) <==>  .'`)  <==> p1p1 (10.0.10.12)  <= T.PRI
    T2: p1p2 (10.0.10.20) <==> (_ . ) <==> p1p2 (10.0.10.22)

    net.sctp.rto_min = 1000
    net.sctp.path_max_retrans = 2
    net.sctp.pf_retrans = 0
    net.sctp.hb_interval = 1000

T.PRI is permanently down, T2 is put briefly into PF state (e.g. due to
link flapping). Here, the first time transmission is sent over PF path
T2 as it's the only non-INACTIVE path, but the retransmitted data-chunks
are sent over the INACTIVE path T1 (T.PRI), which is not good.

After the patch, it's choosing better transports in both cases by
modifying step 4):

4) If none is ACTIVE, set T.ACT_new<-best(T.ACT_old, T3) where T3 is
   the most recently used (if avail) in PF, set T.RET<-T.ACT_new

This will still select a best possible path in PF if available (which
can also include T.PRI/T.RET), and set both T.ACT/T.RET to it.

In case sctp_assoc_control_transport() *just* put T.ACT_old into INACTIVE
as it transitioned from ACTIVE->PF->INACTIVE and stays in INACTIVE just
for a very short while before going back ACTIVE, it will guarantee that
this path will be reselected for T.ACT/T.RET since T3 (PF) is not
available.

Previously, this was not possible, as we would only select between T.PRI
and T.RET, and a possible T3 would be NULL due to the fact that we have
just transitioned T3 in sctp_assoc_control_transport() from PF->INACTIVE
and would select a suboptimal path when T.PRI/T.RET have worse properties.

In the case that T.ACT_old permanently went to INACTIVE during this
transition and there's no PF path available, plus T.PRI and T.RET are
INACTIVE as well, we would now camp on T.ACT_old, but if everything is
being INACTIVE there's really not much we can do except hoping for a
successful HB to bring one of the transports back up again and, thus
cause a new selection through sctp_assoc_control_transport().

Now both tests work fine:

Case 1:

 1. T1 S(ACTIVE) T.ACT
    T2 S(ACTIVE) T.RET

 2. T1 S(ACTIVE) T.ACT, T.RET
    T2 S(PF)

 3. T1 S(ACTIVE) T.ACT, T.RET
    T2 S(INACTIVE)

 5. T1 S(PF) T.ACT, T.RET
    T2 S(INACTIVE)

[ 5.1 T1 S(INACTIVE) T.ACT, T.RET
      T2 S(INACTIVE) ]

 6. T1 S(ACTIVE) T.ACT, T.RET
    T2 S(INACTIVE)

 7. T1 S(ACTIVE) T.ACT
    T2 S(ACTIVE) T.RET

Case 2:

 1. T1 S(ACTIVE) T.ACT
    T2 S(ACTIVE) T.RET

 2. T1 S(PF)
    T2 S(ACTIVE) T.ACT, T.RET

 3. T1 S(INACTIVE)
    T2 S(ACTIVE) T.ACT, T.RET

 5. T1 S(INACTIVE)
    T2 S(PF) T.ACT, T.RET

[ 5.1 T1 S(INACTIVE)
      T2 S(INACTIVE) T.ACT, T.RET ]

 6. T1 S(INACTIVE)
    T2 S(ACTIVE) T.ACT, T.RET

 7. T1 S(ACTIVE) T.ACT
    T2 S(ACTIVE) T.RET

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 104fae4..a88b852 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1356,14 +1356,11 @@ static void sctp_select_active_and_retran_path(struct sctp_association *asoc)
 		trans_sec = trans_pri;
 
 	/* If we failed to find a usable transport, just camp on the
-	 * primary or retran, even if they are inactive, if possible
-	 * pick a PF iff it's the better choice.
+	 * active or pick a PF iff it's the better choice.
 	 */
 	if (trans_pri == NULL) {
-		trans_pri = sctp_trans_elect_best(asoc->peer.primary_path,
-						  asoc->peer.retran_path);
-		trans_pri = sctp_trans_elect_best(trans_pri, trans_pf);
-		trans_sec = asoc->peer.primary_path;
+		trans_pri = sctp_trans_elect_best(asoc->peer.active_path, trans_pf);
+		trans_sec = trans_pri;
 	}
 
 	/* Set the active and retran transports. */
-- 
cgit v1.1


From 47e4df94d129cbca84de252ff63c4ded08a513e7 Mon Sep 17 00:00:00 2001
From: Michal Kazior <michal.kazior@tieto.com>
Date: Mon, 18 Aug 2014 13:19:09 +0200
Subject: mac80211: fix channel switch for chanctx-based drivers

The new_ctx pointer is set only for non-chanctx drivers.  This yielded a
crash for chanctx-based drivers during channel switch finalization:

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000020
  IP: ieee80211_vif_use_reserved_switch+0x71c/0xb00 [mac80211]

Use an adequate chanctx pointer to fix this.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/mac80211/chan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 6d537f0..0375009 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1444,7 +1444,7 @@ ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
 
 			list_del(&sdata->reserved_chanctx_list);
 			list_move(&sdata->assigned_chanctx_list,
-				  &new_ctx->assigned_vifs);
+				  &ctx->assigned_vifs);
 			sdata->reserved_chanctx = NULL;
 
 			ieee80211_vif_chanctx_reservation_complete(sdata);
-- 
cgit v1.1


From d1c85c2ebe7ffe1f1b27846bd1ba0944c513d822 Mon Sep 17 00:00:00 2001
From: Zhouyi Zhou <zhouzhouyi@gmail.com>
Date: Fri, 22 Aug 2014 10:40:15 +0800
Subject: netfilter: HAVE_JUMP_LABEL instead of CONFIG_JUMP_LABEL

Use HAVE_JUMP_LABEL as elsewhere in the kernel to ensure
that the toolchain has the required support in addition to
CONFIG_JUMP_LABEL being set.

Signed-off-by: Zhouyi Zhou <yizhouzhou@ict.ac.cn>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a93c97f..024a2e2 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
 struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
 EXPORT_SYMBOL(nf_hooks);
 
-#if defined(CONFIG_JUMP_LABEL)
+#ifdef HAVE_JUMP_LABEL
 struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 EXPORT_SYMBOL(nf_hooks_needed);
 #endif
@@ -72,7 +72,7 @@ int nf_register_hook(struct nf_hook_ops *reg)
 	}
 	list_add_rcu(&reg->list, elem->list.prev);
 	mutex_unlock(&nf_hook_mutex);
-#if defined(CONFIG_JUMP_LABEL)
+#ifdef HAVE_JUMP_LABEL
 	static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
 #endif
 	return 0;
@@ -84,7 +84,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
 	mutex_lock(&nf_hook_mutex);
 	list_del_rcu(&reg->list);
 	mutex_unlock(&nf_hook_mutex);
-#if defined(CONFIG_JUMP_LABEL)
+#ifdef HAVE_JUMP_LABEL
 	static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
 #endif
 	synchronize_net();
-- 
cgit v1.1


From fb70118c0e8b436eb0d957ef506a1d94028ae10c Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 19 Aug 2014 15:41:32 +0300
Subject: net: rfkill: gpio: Add more Broadcom bluetooth ACPI IDs

This adds one more ACPI ID of a Broadcom bluetooth chip.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill-gpio.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 14c98e4..02a86a2 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -158,6 +158,7 @@ static const struct acpi_device_id rfkill_acpi_match[] = {
 	{ "BCM2E1A", RFKILL_TYPE_BLUETOOTH },
 	{ "BCM2E39", RFKILL_TYPE_BLUETOOTH },
 	{ "BCM2E3D", RFKILL_TYPE_BLUETOOTH },
+	{ "BCM2E64", RFKILL_TYPE_BLUETOOTH },
 	{ "BCM4752", RFKILL_TYPE_GPS },
 	{ "LNV4752", RFKILL_TYPE_GPS },
 	{ },
-- 
cgit v1.1


From 4c75431ac3520631f1d9e74aa88407e6374dbbc4 Mon Sep 17 00:00:00 2001
From: "Alexander Y. Fomichev" <git.user@gmail.com>
Date: Mon, 25 Aug 2014 16:26:45 +0400
Subject: net: prevent of emerging cross-namespace symlinks

Code manipulating sysfs symlinks on adjacent net_devices(s)
currently doesn't take into account that devices potentially
belong to different namespaces.

This patch trying to fix an issue as follows:
- check for net_ns before creating / deleting symlink.
  for now only netdev_adjacent_rename_links and
  __netdev_adjacent_dev_remove are affected, afaics
  __netdev_adjacent_dev_insert implies both net_devs
  belong to the same namespace.
- Drop all existing symlinks to / from all adj_devs before
  switching namespace and recreate them just after.

Signed-off-by: Alexander Y. Fomichev <git.user@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 60 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index b65a505..66738e9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4889,7 +4889,8 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
 	if (adj->master)
 		sysfs_remove_link(&(dev->dev.kobj), "master");
 
-	if (netdev_adjacent_is_neigh_list(dev, dev_list))
+	if (netdev_adjacent_is_neigh_list(dev, dev_list) &&
+	    net_eq(dev_net(dev),dev_net(adj_dev)))
 		netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
 
 	list_del_rcu(&adj->list);
@@ -5159,11 +5160,65 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
+void netdev_adjacent_add_links(struct net_device *dev)
+{
+	struct netdev_adjacent *iter;
+
+	struct net *net = dev_net(dev);
+
+	list_for_each_entry(iter, &dev->adj_list.upper, list) {
+		if (!net_eq(net,dev_net(iter->dev)))
+			continue;
+		netdev_adjacent_sysfs_add(iter->dev, dev,
+					  &iter->dev->adj_list.lower);
+		netdev_adjacent_sysfs_add(dev, iter->dev,
+					  &dev->adj_list.upper);
+	}
+
+	list_for_each_entry(iter, &dev->adj_list.lower, list) {
+		if (!net_eq(net,dev_net(iter->dev)))
+			continue;
+		netdev_adjacent_sysfs_add(iter->dev, dev,
+					  &iter->dev->adj_list.upper);
+		netdev_adjacent_sysfs_add(dev, iter->dev,
+					  &dev->adj_list.lower);
+	}
+}
+
+void netdev_adjacent_del_links(struct net_device *dev)
+{
+	struct netdev_adjacent *iter;
+
+	struct net *net = dev_net(dev);
+
+	list_for_each_entry(iter, &dev->adj_list.upper, list) {
+		if (!net_eq(net,dev_net(iter->dev)))
+			continue;
+		netdev_adjacent_sysfs_del(iter->dev, dev->name,
+					  &iter->dev->adj_list.lower);
+		netdev_adjacent_sysfs_del(dev, iter->dev->name,
+					  &dev->adj_list.upper);
+	}
+
+	list_for_each_entry(iter, &dev->adj_list.lower, list) {
+		if (!net_eq(net,dev_net(iter->dev)))
+			continue;
+		netdev_adjacent_sysfs_del(iter->dev, dev->name,
+					  &iter->dev->adj_list.upper);
+		netdev_adjacent_sysfs_del(dev, iter->dev->name,
+					  &dev->adj_list.lower);
+	}
+}
+
 void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
 {
 	struct netdev_adjacent *iter;
 
+	struct net *net = dev_net(dev);
+
 	list_for_each_entry(iter, &dev->adj_list.upper, list) {
+		if (!net_eq(net,dev_net(iter->dev)))
+			continue;
 		netdev_adjacent_sysfs_del(iter->dev, oldname,
 					  &iter->dev->adj_list.lower);
 		netdev_adjacent_sysfs_add(iter->dev, dev,
@@ -5171,6 +5226,8 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
 	}
 
 	list_for_each_entry(iter, &dev->adj_list.lower, list) {
+		if (!net_eq(net,dev_net(iter->dev)))
+			continue;
 		netdev_adjacent_sysfs_del(iter->dev, oldname,
 					  &iter->dev->adj_list.upper);
 		netdev_adjacent_sysfs_add(iter->dev, dev,
@@ -6773,6 +6830,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 
 	/* Send a netdev-removed uevent to the old namespace */
 	kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
+	netdev_adjacent_del_links(dev);
 
 	/* Actually switch the network namespace */
 	dev_net_set(dev, net);
@@ -6787,6 +6845,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 
 	/* Send a netdev-add uevent to the new namespace */
 	kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
+	netdev_adjacent_add_links(dev);
 
 	/* Fixup kobjects */
 	err = device_rename(&dev->dev, dev->name);
-- 
cgit v1.1


From db115037bb57cdfe97078b13da762213f7980e81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= <mkubecek@suse.cz>
Date: Mon, 25 Aug 2014 15:16:22 +0200
Subject: net: fix checksum features handling in netif_skb_features()

This is follow-up to

  da08143b8520 ("vlan: more careful checksum features handling")

which introduced more careful feature intersection in vlan code,
taking into account that HW_CSUM should be considered superset
of IP_CSUM/IPV6_CSUM. The same is needed in netif_skb_features()
in order to avoid offloading mismatch warning when vlan is
created on top of a bond consisting of slaves supporting IP/IPv6
checksumming but not vlan Tx offloading.

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 66738e9..ab9a165 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2587,13 +2587,19 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 		return harmonize_features(skb, features);
 	}
 
-	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
-					       NETIF_F_HW_VLAN_STAG_TX);
+	features = netdev_intersect_features(features,
+					     skb->dev->vlan_features |
+					     NETIF_F_HW_VLAN_CTAG_TX |
+					     NETIF_F_HW_VLAN_STAG_TX);
 
 	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
-		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
-				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
-				NETIF_F_HW_VLAN_STAG_TX;
+		features = netdev_intersect_features(features,
+						     NETIF_F_SG |
+						     NETIF_F_HIGHDMA |
+						     NETIF_F_FRAGLIST |
+						     NETIF_F_GEN_CSUM |
+						     NETIF_F_HW_VLAN_CTAG_TX |
+						     NETIF_F_HW_VLAN_STAG_TX);
 
 	return harmonize_features(skb, features);
 }
-- 
cgit v1.1


From bb512ad0732232f1d2693bb68f31a76bed8f22ae Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 25 Aug 2014 12:08:09 +0200
Subject: Revert "mac80211: disable uAPSD if all ACs are under ACM"

This reverts commit 24aa11ab8ae03292d38ec0dbd9bc2ac49fe8a6dd.

That commit was wrong since it uses data that hasn't even been set
up yet, but might be a hold-over from a previous connection.

Additionally, it seems like a driver-specific workaround that
shouldn't have been in mac80211 to start with.

Cc: stable@vger.kernel.org
Fixes: 24aa11ab8ae0 ("mac80211: disable uAPSD if all ACs are under ACM")
Reviewed-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 31a8afa..b82a12a 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4376,8 +4376,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	rcu_read_unlock();
 
 	if (bss->wmm_used && bss->uapsd_supported &&
-	    (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) &&
-	    sdata->wmm_acm != 0xff) {
+	    (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) {
 		assoc_data->uapsd = true;
 		ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED;
 	} else {
-- 
cgit v1.1


From 0e67c13667a72093aa3ef3cb54dd521e34e500fc Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Fri, 25 Jul 2014 16:20:22 +0200
Subject: mac80211: ignore AP_VLAN in ieee80211_recalc_chanctx_chantype

When bringing down the AP, a WARN_ON is hit because the bss config chandef
is empty here.
Since AP_VLAN channel settings do not matter for anything chanctx related
(always inherits the settings from the AP interface), let's just ignore
it here.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/chan.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 6d537f0..4206a11 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -541,6 +541,8 @@ static void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
 			continue;
 		if (rcu_access_pointer(sdata->vif.chanctx_conf) != conf)
 			continue;
+		if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+			continue;
 
 		if (!compat)
 			compat = &sdata->vif.bss_conf.chandef;
-- 
cgit v1.1


From 3918edb0e6a8b16c2866f4657d9fed41f9da562d Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Fri, 25 Jul 2014 16:20:23 +0200
Subject: mac80211: fix smps mode check for AP_VLAN

In ieee80211_sta_ps_deliver_wakeup, sdata->smps_mode is checked. This is
initialized only for the base AP interface, not the individual VLANs.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index c6ee213..441875f 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1094,8 +1094,11 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
 	unsigned long flags;
 	struct ps_data *ps;
 
-	if (sdata->vif.type == NL80211_IFTYPE_AP ||
-	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+		sdata = container_of(sdata->bss, struct ieee80211_sub_if_data,
+				     u.ap);
+
+	if (sdata->vif.type == NL80211_IFTYPE_AP)
 		ps = &sdata->bss->ps;
 	else if (ieee80211_vif_is_mesh(&sdata->vif))
 		ps = &sdata->u.mesh.ps;
-- 
cgit v1.1


From 6c6fa49649dc826b7ef81314324fe55cf1d0d954 Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Sat, 28 Jun 2014 16:35:25 -0400
Subject: mac80211: mesh_plink: handle confirm frames with new plid

The 802.11 standard says when processing a plink confirm
frame:

"If the peerLinkID in the mesh peering instance has not been
set, the Local Link ID field of the Mesh Peering Confirm
request shall be copied into the peerLinkID in the mesh
peering instance."

We were only doing this when receiving an open peering frame,
but it could happen that the open frame gets lost and so we
should handle this case rather than rejecting the confirm and
failing the whole peering process.

Reported-by: Yu Niiro <yu.niiro@gmail.com>
Signed-off-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh_plink.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 63b8741..c47194d 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -959,7 +959,8 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata,
 		if (!matches_local)
 			event = CNF_RJCT;
 		if (!mesh_plink_free_count(sdata) ||
-		    (sta->llid != llid || sta->plid != plid))
+		    sta->llid != llid ||
+		    (sta->plid && sta->plid != plid))
 			event = CNF_IGNR;
 		else
 			event = CNF_ACPT;
@@ -1080,6 +1081,10 @@ mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata,
 		goto unlock_rcu;
 	}
 
+	/* 802.11-2012 13.3.7.2 - update plid on CNF if not set */
+	if (!sta->plid && event == CNF_ACPT)
+		sta->plid = plid;
+
 	changed |= mesh_plink_fsm(sdata, sta, event);
 
 unlock_rcu:
-- 
cgit v1.1


From c7dcb45facedbff84237adb77bd3ba50f75c0de4 Mon Sep 17 00:00:00 2001
From: Denton Gentry <denton.gentry@gmail.com>
Date: Mon, 28 Jul 2014 23:36:32 -0700
Subject: mac80211: fix start_seq_num in Rx reorder offload

sta->last_seq_ctrl is the seq_ctrl field from the last header
seen, need to shift it 4 bits to extract the sequence number.
Otherwise the ieee80211_sn_less() check at the top of
ieee80211_sta_manage_reorder_buf drops frames until the sequence
number catches up.

Cc: Michal Kazior <michal.kazior@tieto.com>
Signed-off-by: Denton Gentry <denton.gentry@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/iface.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 01eede7..f75e5f1 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1175,8 +1175,8 @@ static void ieee80211_iface_work(struct work_struct *work)
 			if (sta) {
 				u16 last_seq;
 
-				last_seq = le16_to_cpu(
-					sta->last_seq_ctrl[rx_agg->tid]);
+				last_seq = IEEE80211_SEQ_TO_SN(le16_to_cpu(
+					sta->last_seq_ctrl[rx_agg->tid]));
 
 				__ieee80211_start_rx_ba_session(sta,
 						0, 0,
-- 
cgit v1.1


From 14b058bbce9279ee432f0944ca14df69f4a0d170 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 5 Aug 2014 09:34:05 +0200
Subject: mac80211: fix agg_status debugfs file alignment

The "RX active" string is too long, so the columns get
shifted. Change it to just "RX" to avoid this.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/debugfs_sta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 3db96648..86173c0 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -167,7 +167,7 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 	p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n",
 			sta->ampdu_mlme.dialog_token_allocator + 1);
 	p += scnprintf(p, sizeof(buf) + buf - p,
-		       "TID\t\tRX active\tDTKN\tSSN\t\tTX\tDTKN\tpending\n");
+		       "TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tpending\n");
 
 	for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
 		tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]);
-- 
cgit v1.1


From ea1d5d7755a3e556de78cc757d1895d5c7180548 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 1 Aug 2014 10:36:17 +0300
Subject: ipvs: properly declare tunnel encapsulation

The tunneling method should properly use tunnel encapsulation.
Fixes problem with CHECKSUM_PARTIAL packets when TCP/UDP csum
offload is supported.

Thanks to Alex Gartrell for reporting the problem, providing
solution and for all suggestions.

Reported-by: Alex Gartrell <agartrell@fb.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Alex Gartrell <agartrell@fb.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_xmit.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 6f70bdd..56896a41 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -38,6 +38,7 @@
 #include <net/route.h>                  /* for ip_route_output */
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
+#include <net/ip_tunnels.h>
 #include <net/addrconf.h>
 #include <linux/icmpv6.h>
 #include <linux/netfilter.h>
@@ -862,11 +863,15 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		old_iph = ip_hdr(skb);
 	}
 
-	skb->transport_header = skb->network_header;
-
 	/* fix old IP header checksum */
 	ip_send_check(old_iph);
 
+	skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+	if (IS_ERR(skb))
+		goto tx_error;
+
+	skb->transport_header = skb->network_header;
+
 	skb_push(skb, sizeof(struct iphdr));
 	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -900,7 +905,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	return NF_STOLEN;
 
   tx_error:
-	kfree_skb(skb);
+	if (!IS_ERR(skb))
+		kfree_skb(skb);
 	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -953,6 +959,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		old_iph = ipv6_hdr(skb);
 	}
 
+	/* GSO: we need to provide proper SKB_GSO_ value for IPv6 */
+	skb = iptunnel_handle_offloads(skb, false, 0); /* SKB_GSO_SIT/IPV6 */
+	if (IS_ERR(skb))
+		goto tx_error;
+
 	skb->transport_header = skb->network_header;
 
 	skb_push(skb, sizeof(struct ipv6hdr));
@@ -988,7 +999,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	return NF_STOLEN;
 
 tx_error:
-	kfree_skb(skb);
+	if (!IS_ERR(skb))
+		kfree_skb(skb);
 	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
-- 
cgit v1.1


From eb90b0c734ad793d5f5bf230a9e9a4dcc48df8aa Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 22 Aug 2014 17:53:41 +0300
Subject: ipvs: fix ipv6 hook registration for local replies

commit fc604767613b6d2036cdc35b660bc39451040a47
("ipvs: changes for local real server") from 2.6.37
introduced DNAT support to local real server but the
IPv6 LOCAL_OUT handler ip_vs_local_reply6() is
registered incorrectly as IPv4 hook causing any outgoing
IPv4 traffic to be dropped depending on the IP header values.

Chris tracked down the problem to CONFIG_IP_VS_IPV6=y
Bug report: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1349768

Reported-by: Chris J Arges <chris.j.arges@canonical.com>
Tested-by: Chris J Arges <chris.j.arges@canonical.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e683675..5c34e8d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1906,7 +1906,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	{
 		.hook		= ip_vs_local_reply6,
 		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
+		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_NAT_DST + 1,
 	},
-- 
cgit v1.1


From d9b2938aabf757da2d40153489b251d4fc3fdd18 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 27 Aug 2014 20:49:34 -0700
Subject: net: attempt a single high order allocation

In commit ed98df3361f0 ("net: use __GFP_NORETRY for high order
allocations") we tried to address one issue caused by order-3
allocations.

We still observe high latencies and system overhead in situations where
compaction is not successful.

Instead of trying order-3, order-2, and order-1, do a single order-3
best effort and immediately fallback to plain order-0.

This mimics slub strategy to fallback to slab min order if the high
order allocation used for performance failed.

Order-3 allocations give a performance boost only if they can be done
without recurring and expensive memory scan.

Quoting David :

The page allocator relies on synchronous (sync light) memory compaction
after direct reclaim for allocations that don't retry and deferred
compaction doesn't work with this strategy because the allocation order
is always decreasing from the previous failed attempt.

This means sync light compaction will always be encountered if memory
cannot be defragmented or reclaimed several times during the
skb_page_frag_refill() iteration.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 2714811..2987057 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1822,6 +1822,9 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
 							   order);
 					if (page)
 						goto fill_page;
+					/* Do not retry other high order allocations */
+					order = 1;
+					max_page_order = 0;
 				}
 				order--;
 			}
@@ -1869,10 +1872,8 @@ EXPORT_SYMBOL(sock_alloc_send_skb);
  * no guarantee that allocations succeed. Therefore, @sz MUST be
  * less or equal than PAGE_SIZE.
  */
-bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio)
+bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
 {
-	int order;
-
 	if (pfrag->page) {
 		if (atomic_read(&pfrag->page->_count) == 1) {
 			pfrag->offset = 0;
@@ -1883,20 +1884,21 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio)
 		put_page(pfrag->page);
 	}
 
-	order = SKB_FRAG_PAGE_ORDER;
-	do {
-		gfp_t gfp = prio;
-
-		if (order)
-			gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
-		pfrag->page = alloc_pages(gfp, order);
+	pfrag->offset = 0;
+	if (SKB_FRAG_PAGE_ORDER) {
+		pfrag->page = alloc_pages(gfp | __GFP_COMP |
+					  __GFP_NOWARN | __GFP_NORETRY,
+					  SKB_FRAG_PAGE_ORDER);
 		if (likely(pfrag->page)) {
-			pfrag->offset = 0;
-			pfrag->size = PAGE_SIZE << order;
+			pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
 			return true;
 		}
-	} while (--order >= 0);
-
+	}
+	pfrag->page = alloc_page(gfp);
+	if (likely(pfrag->page)) {
+		pfrag->size = PAGE_SIZE;
+		return true;
+	}
 	return false;
 }
 EXPORT_SYMBOL(skb_page_frag_refill);
-- 
cgit v1.1


From 38ab1fa981d543e1b00f4ffbce4ddb480cd2effe Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 28 Aug 2014 15:28:26 +0200
Subject: net: sctp: fix ABI mismatch through sctp_assoc_to_state helper

Since SCTP day 1, that is, 19b55a2af145 ("Initial commit") from lksctp
tree, the official <netinet/sctp.h> header carries a copy of enum
sctp_sstat_state that looks like (compared to the current in-kernel
enumeration):

  User definition:                     Kernel definition:

  enum sctp_sstat_state {              typedef enum {
    SCTP_EMPTY             = 0,          <removed>
    SCTP_CLOSED            = 1,          SCTP_STATE_CLOSED            = 0,
    SCTP_COOKIE_WAIT       = 2,          SCTP_STATE_COOKIE_WAIT       = 1,
    SCTP_COOKIE_ECHOED     = 3,          SCTP_STATE_COOKIE_ECHOED     = 2,
    SCTP_ESTABLISHED       = 4,          SCTP_STATE_ESTABLISHED       = 3,
    SCTP_SHUTDOWN_PENDING  = 5,          SCTP_STATE_SHUTDOWN_PENDING  = 4,
    SCTP_SHUTDOWN_SENT     = 6,          SCTP_STATE_SHUTDOWN_SENT     = 5,
    SCTP_SHUTDOWN_RECEIVED = 7,          SCTP_STATE_SHUTDOWN_RECEIVED = 6,
    SCTP_SHUTDOWN_ACK_SENT = 8,          SCTP_STATE_SHUTDOWN_ACK_SENT = 7,
  };                                   } sctp_state_t;

This header was later on also placed into the uapi, so that user space
programs can compile without having <netinet/sctp.h>, but the shipped
with <linux/sctp.h> instead.

While RFC6458 under 8.2.1.Association Status (SCTP_STATUS) says that
sstat_state can range from SCTP_CLOSED to SCTP_SHUTDOWN_ACK_SENT, we
nevertheless have a what it appears to be dummy SCTP_EMPTY state from
the very early days.

While it seems to do just nothing, commit 0b8f9e25b0aa ("sctp: remove
completely unsed EMPTY state") did the right thing and removed this dead
code. That however, causes an off-by-one when the user asks the SCTP
stack via SCTP_STATUS API and checks for the current socket state thus
yielding possibly undefined behaviour in applications as they expect
the kernel to tell the right thing.

The enumeration had to be changed however as based on the current socket
state, we access a function pointer lookup-table through this. Therefore,
I think the best way to deal with this is just to add a helper function
sctp_assoc_to_state() to encapsulate the off-by-one quirk.

Reported-by: Tristan Su <sooqing@gmail.com>
Fixes: 0b8f9e25b0aa ("sctp: remove completely unsed EMPTY state")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index eb71d49..634a2ab 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4243,7 +4243,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
 	transport = asoc->peer.primary_path;
 
 	status.sstat_assoc_id = sctp_assoc2id(asoc);
-	status.sstat_state = asoc->state;
+	status.sstat_state = sctp_assoc_to_state(asoc);
 	status.sstat_rwnd =  asoc->peer.rwnd;
 	status.sstat_unackdata = asoc->unack_data;
 
-- 
cgit v1.1


From d79a61d646db950b68dd79ecc627cb5f11e0d8ac Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 29 Aug 2014 09:34:49 +0200
Subject: netfilter: NETFILTER_XT_TARGET_LOG selects NF_LOG_*
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CONFIG_NETFILTER_XT_TARGET_LOG is not selected anymore when jumping
from 3.16 to 3.17-rc1 if you don't set on the new NF_LOG_IPV4 and
NF_LOG_IPV6 switches.

Change this to select the three new symbols NF_LOG_COMMON, NF_LOG_IPV4
and NF_LOG_IPV6 instead, so NETFILTER_XT_TARGET_LOG remains enabled
when moving from old to new kernels.

Reported-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/Kconfig | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 05eb177..4bef6eb 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -747,7 +747,9 @@ config NETFILTER_XT_TARGET_LED
 
 config NETFILTER_XT_TARGET_LOG
 	tristate "LOG target support"
-	depends on NF_LOG_IPV4 && NF_LOG_IPV6
+	select NF_LOG_COMMON
+	select NF_LOG_IPV4
+	select NF_LOG_IPV6 if IPV6
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This option adds a `LOG' target, which allows you to create rules in
-- 
cgit v1.1


From 41ad82f7f8f59a472c8e8ccca56a9c6bdf3c5092 Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Tue, 2 Sep 2014 14:26:17 +0200
Subject: netfilter: fix missing dependencies in NETFILTER_XT_TARGET_LOG

make defconfig reports:

warning: (NETFILTER_XT_TARGET_LOG) selects NF_LOG_IPV6 which has unmet direct dependencies (NET && INET && IPV6 && NETFILTER && NETFILTER_ADVANCED)

Fixes: d79a61d netfilter: NETFILTER_XT_TARGET_LOG selects NF_LOG_*
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index cf0b88f..2812816 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -57,7 +57,7 @@ config NFT_REJECT_IPV6
 
 config NF_LOG_IPV6
 	tristate "IPv6 packet logging"
-	depends on NETFILTER_ADVANCED
+	default m if NETFILTER_ADVANCED=n
 	select NF_LOG_COMMON
 
 config NF_NAT_IPV6
-- 
cgit v1.1


From 4ee45ea05c8710c7ab8a5eb1a72700b874712746 Mon Sep 17 00:00:00 2001
From: Li RongQing <roy.qing.li@gmail.com>
Date: Tue, 2 Sep 2014 20:52:28 +0800
Subject: openvswitch: fix a memory leak

The user_skb maybe be leaked if the operation on it failed and codes
skipped into the label "out:" without calling genlmsg_unicast.

Cc: Pravin Shelar <pshelar@nicira.com>
Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/datapath.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 7228ec3..35d866f 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -404,7 +404,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 {
 	struct ovs_header *upcall;
 	struct sk_buff *nskb = NULL;
-	struct sk_buff *user_skb; /* to be queued to userspace */
+	struct sk_buff *user_skb = NULL; /* to be queued to userspace */
 	struct nlattr *nla;
 	struct genl_info info = {
 		.dst_sk = ovs_dp_get_net(dp)->genl_sock,
@@ -494,9 +494,11 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 	((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
 
 	err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
+	user_skb = NULL;
 out:
 	if (err)
 		skb_tx_error(skb);
+	kfree_skb(user_skb);
 	kfree_skb(nskb);
 	return err;
 }
-- 
cgit v1.1


From bd8c78e78d5011d8111bc2533ee73b13a3bd6c42 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 30 Jul 2014 14:55:26 +0200
Subject: nl80211: clear skb cb before passing to netlink

In testmode and vendor command reply/event SKBs we use the
skb cb data to store nl80211 parameters between allocation
and sending. This causes the code for CONFIG_NETLINK_MMAP
to get confused, because it takes ownership of the skb cb
data when the SKB is handed off to netlink, and it doesn't
explicitly clear it.

Clear the skb cb explicitly when we're done and before it
gets passed to netlink to avoid this issue.

Cc: stable@vger.kernel.org [this goes way back]
Reported-by: Assaf Azulay <assaf.azulay@intel.com>
Reported-by: David Spinadel <david.spinadel@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index df7b133..7257164 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6969,6 +6969,9 @@ void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp)
 	struct nlattr *data = ((void **)skb->cb)[2];
 	enum nl80211_multicast_groups mcgrp = NL80211_MCGRP_TESTMODE;
 
+	/* clear CB data for netlink core to own from now on */
+	memset(skb->cb, 0, sizeof(skb->cb));
+
 	nla_nest_end(skb, data);
 	genlmsg_end(skb, hdr);
 
@@ -9294,6 +9297,9 @@ int cfg80211_vendor_cmd_reply(struct sk_buff *skb)
 	void *hdr = ((void **)skb->cb)[1];
 	struct nlattr *data = ((void **)skb->cb)[2];
 
+	/* clear CB data for netlink core to own from now on */
+	memset(skb->cb, 0, sizeof(skb->cb));
+
 	if (WARN_ON(!rdev->cur_cmd_info)) {
 		kfree_skb(skb);
 		return -EINVAL;
-- 
cgit v1.1


From c5eba0b6f84eb4f0fdc1d8a4abc1c7d40db6e8a6 Mon Sep 17 00:00:00 2001
From: Li RongQing <roy.qing.li@gmail.com>
Date: Wed, 3 Sep 2014 17:43:45 +0800
Subject: openvswitch: distinguish between the dropped and consumed skb

distinguish between the dropped and consumed skb, not assume the skb
is consumed always

Cc: Thomas Graf <tgraf@noironetworks.com>
Cc: Pravin Shelar <pshelar@nicira.com>
Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/datapath.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 35d866f..91d66b7 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -265,8 +265,11 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 		upcall.key = &key;
 		upcall.userdata = NULL;
 		upcall.portid = ovs_vport_find_upcall_portid(p, skb);
-		ovs_dp_upcall(dp, skb, &upcall);
-		consume_skb(skb);
+		error = ovs_dp_upcall(dp, skb, &upcall);
+		if (unlikely(error))
+			kfree_skb(skb);
+		else
+			consume_skb(skb);
 		stats_counter = &stats->n_missed;
 		goto out;
 	}
-- 
cgit v1.1


From 785e21a89d77923852869f83ebd2689ec4d5ce54 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Wed, 3 Sep 2014 15:25:04 +0300
Subject: mac80211: use bss_conf->dtim_period instead of conf.ps_dtim_period

sta_set_sinfo is obviously takes data for specific station.
This specific station is attached to a specific virtual
interface. Hence we should use the dtim_period from this
virtual interface rather than the system wide dtim_period.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 441875f..a1e433b 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1822,7 +1822,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE;
 	if (sdata->vif.bss_conf.use_short_slot)
 		sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME;
-	sinfo->bss_param.dtim_period = sdata->local->hw.conf.ps_dtim_period;
+	sinfo->bss_param.dtim_period = sdata->vif.bss_conf.dtim_period;
 	sinfo->bss_param.beacon_interval = sdata->vif.bss_conf.beacon_int;
 
 	sinfo->sta_flags.set = 0;
-- 
cgit v1.1


From a9ed4a2986e13011fcf4ed2d1a1647c53112f55b Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 2 Sep 2014 10:29:29 +0200
Subject: ipv6: fix rtnl locking in setsockopt for anycast and multicast

Calling setsockopt with IPV6_JOIN_ANYCAST or IPV6_LEAVE_ANYCAST
triggers the assertion in addrconf_join_solict()/addrconf_leave_solict()

ipv6_sock_ac_join(), ipv6_sock_ac_drop(), ipv6_sock_ac_close() need to
take RTNL before calling ipv6_dev_ac_inc/dec. Same thing with
ipv6_sock_mc_join(), ipv6_sock_mc_drop(), ipv6_sock_mc_close() before
calling ipv6_dev_mc_inc/dec.

This patch moves ASSERT_RTNL() up a level in the call stack.

Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reported-by: Tommi Rantala <tt.rantala@gmail.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 15 +++++----------
 net/ipv6/anycast.c  | 12 ++++++++++++
 net/ipv6/mcast.c    | 14 ++++++++++++++
 3 files changed, 31 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0b239fc..aa0e135 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1690,14 +1690,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 	addrconf_mod_dad_work(ifp, 0);
 }
 
-/* Join to solicited addr multicast group. */
-
+/* Join to solicited addr multicast group.
+ * caller must hold RTNL */
 void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
 {
 	struct in6_addr maddr;
 
-	ASSERT_RTNL();
-
 	if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
 		return;
 
@@ -1705,12 +1703,11 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
 	ipv6_dev_mc_inc(dev, &maddr);
 }
 
+/* caller must hold RTNL */
 void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
 {
 	struct in6_addr maddr;
 
-	ASSERT_RTNL();
-
 	if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
 		return;
 
@@ -1718,12 +1715,11 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
 	__ipv6_dev_mc_dec(idev, &maddr);
 }
 
+/* caller must hold RTNL */
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
 {
 	struct in6_addr addr;
 
-	ASSERT_RTNL();
-
 	if (ifp->prefix_len >= 127) /* RFC 6164 */
 		return;
 	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -1732,12 +1728,11 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
 	ipv6_dev_ac_inc(ifp->idev->dev, &addr);
 }
 
+/* caller must hold RTNL */
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
 {
 	struct in6_addr addr;
 
-	ASSERT_RTNL();
-
 	if (ifp->prefix_len >= 127) /* RFC 6164 */
 		return;
 	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 2101832..45b9d81 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -77,6 +77,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	pac->acl_next = NULL;
 	pac->acl_addr = *addr;
 
+	rtnl_lock();
 	rcu_read_lock();
 	if (ifindex == 0) {
 		struct rt6_info *rt;
@@ -137,6 +138,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 
 error:
 	rcu_read_unlock();
+	rtnl_unlock();
 	if (pac)
 		sock_kfree_s(sk, pac, sizeof(*pac));
 	return err;
@@ -171,13 +173,17 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 
 	spin_unlock_bh(&ipv6_sk_ac_lock);
 
+	rtnl_lock();
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
 	if (dev)
 		ipv6_dev_ac_dec(dev, &pac->acl_addr);
 	rcu_read_unlock();
+	rtnl_unlock();
 
 	sock_kfree_s(sk, pac, sizeof(*pac));
+	if (!dev)
+		return -ENODEV;
 	return 0;
 }
 
@@ -198,6 +204,7 @@ void ipv6_sock_ac_close(struct sock *sk)
 	spin_unlock_bh(&ipv6_sk_ac_lock);
 
 	prev_index = 0;
+	rtnl_lock();
 	rcu_read_lock();
 	while (pac) {
 		struct ipv6_ac_socklist *next = pac->acl_next;
@@ -212,6 +219,7 @@ void ipv6_sock_ac_close(struct sock *sk)
 		pac = next;
 	}
 	rcu_read_unlock();
+	rtnl_unlock();
 }
 
 static void aca_put(struct ifacaddr6 *ac)
@@ -233,6 +241,8 @@ int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
 	struct rt6_info *rt;
 	int err;
 
+	ASSERT_RTNL();
+
 	idev = in6_dev_get(dev);
 
 	if (idev == NULL)
@@ -302,6 +312,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 {
 	struct ifacaddr6 *aca, *prev_aca;
 
+	ASSERT_RTNL();
+
 	write_lock_bh(&idev->lock);
 	prev_aca = NULL;
 	for (aca = idev->ac_list; aca; aca = aca->aca_next) {
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 617f095..a23b655 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -172,6 +172,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	mc_lst->next = NULL;
 	mc_lst->addr = *addr;
 
+	rtnl_lock();
 	rcu_read_lock();
 	if (ifindex == 0) {
 		struct rt6_info *rt;
@@ -185,6 +186,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 
 	if (dev == NULL) {
 		rcu_read_unlock();
+		rtnl_unlock();
 		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 		return -ENODEV;
 	}
@@ -202,6 +204,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 
 	if (err) {
 		rcu_read_unlock();
+		rtnl_unlock();
 		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 		return err;
 	}
@@ -212,6 +215,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	spin_unlock(&ipv6_sk_mc_lock);
 
 	rcu_read_unlock();
+	rtnl_unlock();
 
 	return 0;
 }
@@ -229,6 +233,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	if (!ipv6_addr_is_multicast(addr))
 		return -EINVAL;
 
+	rtnl_lock();
 	spin_lock(&ipv6_sk_mc_lock);
 	for (lnk = &np->ipv6_mc_list;
 	     (mc_lst = rcu_dereference_protected(*lnk,
@@ -252,12 +257,15 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 			} else
 				(void) ip6_mc_leave_src(sk, mc_lst, NULL);
 			rcu_read_unlock();
+			rtnl_unlock();
+
 			atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
 			kfree_rcu(mc_lst, rcu);
 			return 0;
 		}
 	}
 	spin_unlock(&ipv6_sk_mc_lock);
+	rtnl_unlock();
 
 	return -EADDRNOTAVAIL;
 }
@@ -302,6 +310,7 @@ void ipv6_sock_mc_close(struct sock *sk)
 	if (!rcu_access_pointer(np->ipv6_mc_list))
 		return;
 
+	rtnl_lock();
 	spin_lock(&ipv6_sk_mc_lock);
 	while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list,
 				lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) {
@@ -328,6 +337,7 @@ void ipv6_sock_mc_close(struct sock *sk)
 		spin_lock(&ipv6_sk_mc_lock);
 	}
 	spin_unlock(&ipv6_sk_mc_lock);
+	rtnl_unlock();
 }
 
 int ip6_mc_source(int add, int omode, struct sock *sk,
@@ -845,6 +855,8 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
 	struct ifmcaddr6 *mc;
 	struct inet6_dev *idev;
 
+	ASSERT_RTNL();
+
 	/* we need to take a reference on idev */
 	idev = in6_dev_get(dev);
 
@@ -916,6 +928,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 {
 	struct ifmcaddr6 *ma, **map;
 
+	ASSERT_RTNL();
+
 	write_lock_bh(&idev->lock);
 	for (map = &idev->mc_list; (ma=*map) != NULL; map = &ma->next) {
 		if (ipv6_addr_equal(&ma->mca_addr, addr)) {
-- 
cgit v1.1


From eed4d839b0cdf9d84b0a9bc63de90fd5e1e886fb Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Wed, 3 Sep 2014 14:12:55 +0200
Subject: l2tp: fix race while getting PMTU on PPP pseudo-wire

Use dst_entry held by sk_dst_get() to retrieve tunnel's PMTU.

The dst_mtu(__sk_dst_get(tunnel->sock)) call was racy. __sk_dst_get()
could return NULL if tunnel->sock->sk_dst_cache was reset just before the
call, thus making dst_mtu() dereference a NULL pointer:

[ 1937.661598] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020
[ 1937.664005] IP: [<ffffffffa049db88>] pppol2tp_connect+0x33d/0x41e [l2tp_ppp]
[ 1937.664005] PGD daf0c067 PUD d9f93067 PMD 0
[ 1937.664005] Oops: 0000 [#1] SMP
[ 1937.664005] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core ip6table_filter ip6_tables iptable_filter ip_tables ebtable_nat ebtables x_tables udp_tunnel pppoe pppox ppp_generic slhc deflate ctr twofish_generic twofish_x86_64_3way xts lrw gf128mul glue_helper twofish_x86_64 twofish_common blowfish_generic blowfish_x86_64 blowfish_common des_generic cbc xcbc rmd160 sha512_generic hmac crypto_null af_key xfrm_algo 8021q garp bridge stp llc tun atmtcp clip atm ext3 mbcache jbd iTCO_wdt coretemp kvm_intel iTCO_vendor_support kvm pcspkr evdev ehci_pci lpc_ich mfd_core i5400_edac edac_core i5k_amb shpchp button processor thermal_sys xfs crc32c_generic libcrc32c dm_mod usbhid sg hid sr_mod sd_mod cdrom crc_t10dif crct10dif_common ata_generic ahci ata_piix tg3 libahci libata uhci_hcd ptp ehci_hcd pps_core usbcore scsi_mod libphy usb_common [last unloaded: l2tp_core]
[ 1937.664005] CPU: 0 PID: 10022 Comm: l2tpstress Tainted: G           O   3.17.0-rc1 #1
[ 1937.664005] Hardware name: HP ProLiant DL160 G5, BIOS O12 08/22/2008
[ 1937.664005] task: ffff8800d8fda790 ti: ffff8800c43c4000 task.ti: ffff8800c43c4000
[ 1937.664005] RIP: 0010:[<ffffffffa049db88>]  [<ffffffffa049db88>] pppol2tp_connect+0x33d/0x41e [l2tp_ppp]
[ 1937.664005] RSP: 0018:ffff8800c43c7de8  EFLAGS: 00010282
[ 1937.664005] RAX: ffff8800da8a7240 RBX: ffff8800d8c64600 RCX: 000001c325a137b5
[ 1937.664005] RDX: 8c6318c6318c6320 RSI: 000000000000010c RDI: 0000000000000000
[ 1937.664005] RBP: ffff8800c43c7ea8 R08: 0000000000000000 R09: 0000000000000000
[ 1937.664005] R10: ffffffffa048e2c0 R11: ffff8800d8c64600 R12: ffff8800ca7a5000
[ 1937.664005] R13: ffff8800c439bf40 R14: 000000000000000c R15: 0000000000000009
[ 1937.664005] FS:  00007fd7f610f700(0000) GS:ffff88011a600000(0000) knlGS:0000000000000000
[ 1937.664005] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 1937.664005] CR2: 0000000000000020 CR3: 00000000d9d75000 CR4: 00000000000027e0
[ 1937.664005] Stack:
[ 1937.664005]  ffffffffa049da80 ffff8800d8fda790 000000000000005b ffff880000000009
[ 1937.664005]  ffff8800daf3f200 0000000000000003 ffff8800c43c7e48 ffffffff81109b57
[ 1937.664005]  ffffffff81109b0e ffffffff8114c566 0000000000000000 0000000000000000
[ 1937.664005] Call Trace:
[ 1937.664005]  [<ffffffffa049da80>] ? pppol2tp_connect+0x235/0x41e [l2tp_ppp]
[ 1937.664005]  [<ffffffff81109b57>] ? might_fault+0x9e/0xa5
[ 1937.664005]  [<ffffffff81109b0e>] ? might_fault+0x55/0xa5
[ 1937.664005]  [<ffffffff8114c566>] ? rcu_read_unlock+0x1c/0x26
[ 1937.664005]  [<ffffffff81309196>] SYSC_connect+0x87/0xb1
[ 1937.664005]  [<ffffffff813e56f7>] ? sysret_check+0x1b/0x56
[ 1937.664005]  [<ffffffff8107590d>] ? trace_hardirqs_on_caller+0x145/0x1a1
[ 1937.664005]  [<ffffffff81213dee>] ? trace_hardirqs_on_thunk+0x3a/0x3f
[ 1937.664005]  [<ffffffff8114c262>] ? spin_lock+0x9/0xb
[ 1937.664005]  [<ffffffff813092b4>] SyS_connect+0x9/0xb
[ 1937.664005]  [<ffffffff813e56d2>] system_call_fastpath+0x16/0x1b
[ 1937.664005] Code: 10 2a 84 81 e8 65 76 bd e0 65 ff 0c 25 10 bb 00 00 4d 85 ed 74 37 48 8b 85 60 ff ff ff 48 8b 80 88 01 00 00 48 8b b8 10 02 00 00 <48> 8b 47 20 ff 50 20 85 c0 74 0f 83 e8 28 89 83 10 01 00 00 89
[ 1937.664005] RIP  [<ffffffffa049db88>] pppol2tp_connect+0x33d/0x41e [l2tp_ppp]
[ 1937.664005]  RSP <ffff8800c43c7de8>
[ 1937.664005] CR2: 0000000000000020
[ 1939.559375] ---[ end trace 82d44500f28f8708 ]---

Fixes: f34c4a35d879 ("l2tp: take PMTU from tunnel UDP socket")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 13752d9..b704a93 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -755,7 +755,8 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	/* If PMTU discovery was enabled, use the MTU that was discovered */
 	dst = sk_dst_get(tunnel->sock);
 	if (dst != NULL) {
-		u32 pmtu = dst_mtu(__sk_dst_get(tunnel->sock));
+		u32 pmtu = dst_mtu(dst);
+
 		if (pmtu != 0)
 			session->mtu = session->mru = pmtu -
 				PPPOL2TP_HEADER_OVERHEAD;
-- 
cgit v1.1


From c199105d154e029cd8c94cccd35bd073e64acc45 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 3 Sep 2014 12:01:18 -0400
Subject: net-timestamp: only report sw timestamp if reporting bit is set

The timestamping API has separate bits for generating and reporting
timestamps. A software timestamp should only be reported for a packet
when the packet has the relevant generation flag (SKBTX_..) set
and the socket has reporting bit SOF_TIMESTAMPING_SOFTWARE set.

The second check was accidentally removed. Reinstitute the original
behavior.

Tested:
  Without this patch, Documentation/networking/txtimestamp reports
  timestamps regardless of whether SOF_TIMESTAMPING_SOFTWARE is set.
  After the patch, it only reports them when the flag is set.

Fixes: f24b9be5957b ("net-timestamp: extend SCM_TIMESTAMPING ancillary data struct")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 95ee7d8..4eb09b3 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -734,8 +734,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 	}
 
 	memset(&tss, 0, sizeof(tss));
-	if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE ||
-	     skb_shinfo(skb)->tx_flags & SKBTX_ANY_SW_TSTAMP) &&
+	if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
 	    ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
 		empty = 0;
 	if (shhwtstamps &&
-- 
cgit v1.1


From f24062b07dda89b0e24fa48e7bc3865a725f5ee6 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Wed, 3 Sep 2014 23:59:21 +0200
Subject: ipv6: fix a refcnt leak with peer addr

There is no reason to take a refcnt before deleting the peer address route.
It's done some lines below for the local prefix route because
inet6_ifa_finish_destroy() will release it at the end.
For the peer address route, we want to free it right now.

This bug has been introduced by commit
caeaba79009c ("ipv6: add support of peer address").

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index aa0e135..ce761c7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4772,11 +4772,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 
 			rt = rt6_lookup(dev_net(dev), &ifp->peer_addr, NULL,
 					dev->ifindex, 1);
-			if (rt) {
-				dst_hold(&rt->dst);
-				if (ip6_del_rt(rt))
-					dst_free(&rt->dst);
-			}
+			if (rt && ip6_del_rt(rt))
+				dst_free(&rt->dst);
 		}
 		dst_hold(&ifp->rt->dst);
 
-- 
cgit v1.1


From e7478dfc4656f4a739ed1b07cfd59c12f8eb112e Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Wed, 3 Sep 2014 23:59:22 +0200
Subject: ipv6: use addrconf_get_prefix_route() to remove peer addr

addrconf_get_prefix_route() ensures to get the right route in the right table.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ce761c7..fc1fac2 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4768,10 +4768,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
 		if (!ipv6_addr_any(&ifp->peer_addr)) {
 			struct rt6_info *rt;
-			struct net_device *dev = ifp->idev->dev;
 
-			rt = rt6_lookup(dev_net(dev), &ifp->peer_addr, NULL,
-					dev->ifindex, 1);
+			rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
+						       ifp->idev->dev, 0, 0);
 			if (rt && ip6_del_rt(rt))
 				dst_free(&rt->dst);
 		}
-- 
cgit v1.1


From 84a59ca55f699d1d1fbfffd75445bcfe0c3daf06 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 4 Sep 2014 09:47:21 +0200
Subject: netfilter: add explicit Kconfig for NETFILTER_XT_NAT

Paul Bolle reports that 'select NETFILTER_XT_NAT' from the IPV4 and IPV6
NAT tables becomes noop since there is no Kconfig switch for it. Add the
Kconfig switch to resolve this problem.

Fixes: 8993cf8 netfilter: move NAT Kconfig switches out of the iptables scope
Reported-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 4bef6eb..b5c1d3a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -766,6 +766,14 @@ config NETFILTER_XT_TARGET_MARK
 	(e.g. when running oldconfig). It selects
 	CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
 
+config NETFILTER_XT_NAT
+	tristate '"SNAT and DNAT" targets support'
+	depends on NF_NAT
+	---help---
+	This option enables the SNAT and DNAT targets.
+
+	To compile it as a module, choose M here. If unsure, say N.
+
 config NETFILTER_XT_TARGET_NETMAP
 	tristate '"NETMAP" target support'
 	depends on NF_NAT
-- 
cgit v1.1


From e793c0f70e9bdf4a2e71c151a1a3cf85c4db92ad Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Thu, 4 Sep 2014 23:44:36 +0900
Subject: net: treewide: Fix typo found in DocBook/networking.xml

This patch fix spelling typo found in DocBook/networking.xml.
It is because the neworking.xml is generated from comments
in the source, I have to fix typo in comments within the source.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c      | 2 +-
 net/core/gen_estimator.c | 2 +-
 net/core/gen_stats.c     | 2 +-
 net/core/skbuff.c        | 4 ++--
 net/core/sock.c          | 4 ++--
 net/socket.c             | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 488dd1a..fdbc9a8 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -775,7 +775,7 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb)
 EXPORT_SYMBOL(__skb_checksum_complete);
 
 /**
- *	skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
+ *	skb_copy_and_csum_datagram_iovec - Copy and checksum skb to user iovec.
  *	@skb: skbuff
  *	@hlen: hardware length
  *	@iov: io vector
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 6b5b6e7..9d33dff 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -197,7 +197,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
  * as destination. A new timer with the interval specified in the
  * configuration TLV is created. Upon each interval, the latest statistics
  * will be read from &bstats and the estimated rate will be stored in
- * &rate_est with the statistics lock grabed during this period.
+ * &rate_est with the statistics lock grabbed during this period.
  *
  * Returns 0 on success or a negative error code.
  *
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 9d3d9e7..2ddbce4 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -206,7 +206,7 @@ EXPORT_SYMBOL(gnet_stats_copy_queue);
  * @st: application specific statistics data
  * @len: length of data
  *
- * Appends the application sepecific statistics to the top level TLV created by
+ * Appends the application specific statistics to the top level TLV created by
  * gnet_stats_start_copy() and remembers the data for XSTATS if the dumping
  * handle is in backward compatibility mode.
  *
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 163b673..da1378a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2647,7 +2647,7 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
  * skb_seq_read() will return the remaining part of the block.
  *
  * Note 1: The size of each block of data returned can be arbitrary,
- *       this limitation is the cost for zerocopy seqeuental
+ *       this limitation is the cost for zerocopy sequential
  *       reads of potentially non linear data.
  *
  * Note 2: Fragment lists within fragments are not implemented
@@ -2781,7 +2781,7 @@ EXPORT_SYMBOL(skb_find_text);
 /**
  * skb_append_datato_frags - append the user data to a skb
  * @sk: sock  structure
- * @skb: skb structure to be appened with user data.
+ * @skb: skb structure to be appended with user data.
  * @getfrag: call back function to be used for getting the user data
  * @from: pointer to user message iov
  * @length: length of the iov message
diff --git a/net/core/sock.c b/net/core/sock.c
index 2987057..d372b4b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -166,7 +166,7 @@ EXPORT_SYMBOL(sk_ns_capable);
 /**
  * sk_capable - Socket global capability test
  * @sk: Socket to use a capability on or through
- * @cap: The global capbility to use
+ * @cap: The global capability to use
  *
  * Test to see if the opener of the socket had when the socket was
  * created and the current process has the capability @cap in all user
@@ -183,7 +183,7 @@ EXPORT_SYMBOL(sk_capable);
  * @sk: Socket to use a capability on or through
  * @cap: The capability to use
  *
- * Test to see if the opener of the socket had when the socke was created
+ * Test to see if the opener of the socket had when the socket was created
  * and the current process has the capability @cap over the network namespace
  * the socket is a member of.
  */
diff --git a/net/socket.c b/net/socket.c
index 4eb09b3..2e2586e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2601,7 +2601,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
  *
  *	This function is called by a protocol handler that wants to
  *	advertise its address family, and have it linked into the
- *	socket interface. The value ops->family coresponds to the
+ *	socket interface. The value ops->family corresponds to the
  *	socket system call protocol family.
  */
 int sock_register(const struct net_proto_family *ops)
-- 
cgit v1.1


From de185ab46cb02df9738b0d898b0c3a89181c5526 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Fri, 5 Sep 2014 14:33:00 -0700
Subject: ipv6: restore the behavior of ipv6_sock_ac_drop()

It is possible that the interface is already gone after joining
the list of anycast on this interface as we don't hold a refcount
for the device, in this case we are safe to ignore the error.

What's more important, for API compatibility we should not
change this behavior for applications even if it were correct.

Fixes: commit a9ed4a2986e13011 ("ipv6: fix rtnl locking in setsockopt for anycast and multicast")
Cc: Sabrina Dubroca <sd@queasysnail.net>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/anycast.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 45b9d81..ff2de7d 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -182,8 +182,6 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	rtnl_unlock();
 
 	sock_kfree_s(sk, pac, sizeof(*pac));
-	if (!dev)
-		return -ENODEV;
 	return 0;
 }
 
-- 
cgit v1.1


From 82d5e2b8b466d5bfc7c6278a7c04a53b9b287673 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 8 Sep 2014 04:00:00 -0700
Subject: net: fix skb_page_frag_refill() kerneldoc

In commit d9b2938aabf7 ("net: attempt a single high order allocation)
I forgot to update kerneldoc, as @prio parameter was renamed to @gfp

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index d372b4b..9c3f823 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1866,7 +1866,7 @@ EXPORT_SYMBOL(sock_alloc_send_skb);
  * skb_page_frag_refill - check that a page_frag contains enough room
  * @sz: minimum size of the fragment we want to get
  * @pfrag: pointer to page_frag
- * @prio: priority for memory allocation
+ * @gfp: priority for memory allocation
  *
  * Note: While this allocator tries to use high order pages, there is
  * no guarantee that allocations succeed. Therefore, @sz MUST be
-- 
cgit v1.1


From 6a2a2b3ae0759843b22c929881cc184b00cc63ff Mon Sep 17 00:00:00 2001
From: Ani Sinha <ani@arista.com>
Date: Mon, 8 Sep 2014 14:49:59 -0700
Subject: net:socket: set msg_namelen to 0 if msg_name is passed as NULL in
 msghdr struct from userland.

Linux manpage for recvmsg and sendmsg calls does not explicitly mention setting msg_namelen to 0 when
msg_name passed set as NULL. When developers don't set msg_namelen member in msghdr, it might contain garbage
value which will fail the validation check and sendmsg and recvmsg calls from kernel will return EINVAL. This will
break old binaries and any code for which there is no access to source code.
To fix this, we set msg_namelen to 0 when msg_name is passed as NULL from userland.

Signed-off-by: Ani Sinha <ani@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 2e2586e..4cdbc10 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1996,6 +1996,9 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
 	if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
 		return -EFAULT;
 
+	if (kmsg->msg_name == NULL)
+		kmsg->msg_namelen = 0;
+
 	if (kmsg->msg_namelen < 0)
 		return -EINVAL;
 
-- 
cgit v1.1


From ed3bfdfdced76aa7edb0b05c0d739ee3a2a6e619 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 9 Sep 2014 15:19:44 +0100
Subject: RxRPC: Fix missing __user annotation

Fix a missing __user annotation in a cast of a user space pointer (found by
checker).

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-key.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index b45d080..1b24191 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -1143,7 +1143,7 @@ static long rxrpc_read(const struct key *key,
 		if (copy_to_user(xdr, (s), _l) != 0)			\
 			goto fault;					\
 		if (_l & 3 &&						\
-		    copy_to_user((u8 *)xdr + _l, &zero, 4 - (_l & 3)) != 0) \
+		    copy_to_user((u8 __user *)xdr + _l, &zero, 4 - (_l & 3)) != 0) \
 			goto fault;					\
 		xdr += (_l + 3) >> 2;					\
 	} while(0)
-- 
cgit v1.1


From 73c3d4812b4c755efeca0140f606f83772a39ce4 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@redhat.com>
Date: Mon, 4 Aug 2014 07:01:54 -0700
Subject: libceph: gracefully handle large reply messages from the mon

We preallocate a few of the message types we get back from the mon.  If we
get a larger message than we are expecting, fall back to trying to allocate
a new one instead of blindly using the one we have.

CC: stable@vger.kernel.org
Signed-off-by: Sage Weil <sage@redhat.com>
Reviewed-by: Ilya Dryomov <ilya.dryomov@inktank.com>
---
 net/ceph/mon_client.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 067d3af..61fcfc3 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1181,7 +1181,15 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
 	if (!m) {
 		pr_info("alloc_msg unknown type %d\n", type);
 		*skip = 1;
+	} else if (front_len > m->front_alloc_len) {
+		pr_warning("mon_alloc_msg front %d > prealloc %d (%u#%llu)\n",
+			   front_len, m->front_alloc_len,
+			   (unsigned int)con->peer_name.type,
+			   le64_to_cpu(con->peer_name.num));
+		ceph_msg_put(m);
+		m = ceph_msg_new(type, front_len, GFP_NOFS, false);
 	}
+
 	return m;
 }
 
-- 
cgit v1.1


From 597cda357716a3cf8d994cb11927af917c8d71fa Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Mon, 8 Sep 2014 17:25:34 +0400
Subject: libceph: add process_one_ticket() helper

Add a helper for processing individual cephx auth tickets.  Needed for
the next commit, which deals with allocating ticket buffers.  (Most of
the diff here is whitespace - view with git diff -b).

Cc: stable@vger.kernel.org
Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 net/ceph/auth_x.c | 228 +++++++++++++++++++++++++++++-------------------------
 1 file changed, 124 insertions(+), 104 deletions(-)

(limited to 'net')

diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 96238ba..0eb146d 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -129,17 +129,131 @@ static void remove_ticket_handler(struct ceph_auth_client *ac,
 	kfree(th);
 }
 
+static int process_one_ticket(struct ceph_auth_client *ac,
+			      struct ceph_crypto_key *secret,
+			      void **p, void *end,
+			      void *dbuf, void *ticket_buf)
+{
+	struct ceph_x_info *xi = ac->private;
+	int type;
+	u8 tkt_struct_v, blob_struct_v;
+	struct ceph_x_ticket_handler *th;
+	void *dp, *dend;
+	int dlen;
+	char is_enc;
+	struct timespec validity;
+	struct ceph_crypto_key old_key;
+	void *tp, *tpend;
+	struct ceph_timespec new_validity;
+	struct ceph_crypto_key new_session_key;
+	struct ceph_buffer *new_ticket_blob;
+	unsigned long new_expires, new_renew_after;
+	u64 new_secret_id;
+	int ret;
+
+	ceph_decode_need(p, end, sizeof(u32) + 1, bad);
+
+	type = ceph_decode_32(p);
+	dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
+
+	tkt_struct_v = ceph_decode_8(p);
+	if (tkt_struct_v != 1)
+		goto bad;
+
+	th = get_ticket_handler(ac, type);
+	if (IS_ERR(th)) {
+		ret = PTR_ERR(th);
+		goto out;
+	}
+
+	/* blob for me */
+	dlen = ceph_x_decrypt(secret, p, end, dbuf,
+			      TEMP_TICKET_BUF_LEN);
+	if (dlen <= 0) {
+		ret = dlen;
+		goto out;
+	}
+	dout(" decrypted %d bytes\n", dlen);
+	dp = dbuf;
+	dend = dp + dlen;
+
+	tkt_struct_v = ceph_decode_8(&dp);
+	if (tkt_struct_v != 1)
+		goto bad;
+
+	memcpy(&old_key, &th->session_key, sizeof(old_key));
+	ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
+	if (ret)
+		goto out;
+
+	ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
+	ceph_decode_timespec(&validity, &new_validity);
+	new_expires = get_seconds() + validity.tv_sec;
+	new_renew_after = new_expires - (validity.tv_sec / 4);
+	dout(" expires=%lu renew_after=%lu\n", new_expires,
+	     new_renew_after);
+
+	/* ticket blob for service */
+	ceph_decode_8_safe(p, end, is_enc, bad);
+	tp = ticket_buf;
+	if (is_enc) {
+		/* encrypted */
+		dout(" encrypted ticket\n");
+		dlen = ceph_x_decrypt(&old_key, p, end, ticket_buf,
+				      TEMP_TICKET_BUF_LEN);
+		if (dlen < 0) {
+			ret = dlen;
+			goto out;
+		}
+		dlen = ceph_decode_32(&tp);
+	} else {
+		/* unencrypted */
+		ceph_decode_32_safe(p, end, dlen, bad);
+		ceph_decode_need(p, end, dlen, bad);
+		ceph_decode_copy(p, ticket_buf, dlen);
+	}
+	tpend = tp + dlen;
+	dout(" ticket blob is %d bytes\n", dlen);
+	ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
+	blob_struct_v = ceph_decode_8(&tp);
+	new_secret_id = ceph_decode_64(&tp);
+	ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
+	if (ret)
+		goto out;
+
+	/* all is well, update our ticket */
+	ceph_crypto_key_destroy(&th->session_key);
+	if (th->ticket_blob)
+		ceph_buffer_put(th->ticket_blob);
+	th->session_key = new_session_key;
+	th->ticket_blob = new_ticket_blob;
+	th->validity = new_validity;
+	th->secret_id = new_secret_id;
+	th->expires = new_expires;
+	th->renew_after = new_renew_after;
+	dout(" got ticket service %d (%s) secret_id %lld len %d\n",
+	     type, ceph_entity_type_name(type), th->secret_id,
+	     (int)th->ticket_blob->vec.iov_len);
+	xi->have_keys |= th->service;
+
+out:
+	return ret;
+
+bad:
+	ret = -EINVAL;
+	goto out;
+}
+
 static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 				    struct ceph_crypto_key *secret,
 				    void *buf, void *end)
 {
-	struct ceph_x_info *xi = ac->private;
-	int num;
 	void *p = buf;
-	int ret;
 	char *dbuf;
 	char *ticket_buf;
 	u8 reply_struct_v;
+	u32 num;
+	int ret;
 
 	dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
 	if (!dbuf)
@@ -150,112 +264,18 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 	if (!ticket_buf)
 		goto out_dbuf;
 
-	ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
-	reply_struct_v = ceph_decode_8(&p);
+	ceph_decode_8_safe(&p, end, reply_struct_v, bad);
 	if (reply_struct_v != 1)
-		goto bad;
-	num = ceph_decode_32(&p);
-	dout("%d tickets\n", num);
-	while (num--) {
-		int type;
-		u8 tkt_struct_v, blob_struct_v;
-		struct ceph_x_ticket_handler *th;
-		void *dp, *dend;
-		int dlen;
-		char is_enc;
-		struct timespec validity;
-		struct ceph_crypto_key old_key;
-		void *tp, *tpend;
-		struct ceph_timespec new_validity;
-		struct ceph_crypto_key new_session_key;
-		struct ceph_buffer *new_ticket_blob;
-		unsigned long new_expires, new_renew_after;
-		u64 new_secret_id;
-
-		ceph_decode_need(&p, end, sizeof(u32) + 1, bad);
-
-		type = ceph_decode_32(&p);
-		dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
-
-		tkt_struct_v = ceph_decode_8(&p);
-		if (tkt_struct_v != 1)
-			goto bad;
-
-		th = get_ticket_handler(ac, type);
-		if (IS_ERR(th)) {
-			ret = PTR_ERR(th);
-			goto out;
-		}
-
-		/* blob for me */
-		dlen = ceph_x_decrypt(secret, &p, end, dbuf,
-				      TEMP_TICKET_BUF_LEN);
-		if (dlen <= 0) {
-			ret = dlen;
-			goto out;
-		}
-		dout(" decrypted %d bytes\n", dlen);
-		dend = dbuf + dlen;
-		dp = dbuf;
-
-		tkt_struct_v = ceph_decode_8(&dp);
-		if (tkt_struct_v != 1)
-			goto bad;
+		return -EINVAL;
 
-		memcpy(&old_key, &th->session_key, sizeof(old_key));
-		ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
-		if (ret)
-			goto out;
+	ceph_decode_32_safe(&p, end, num, bad);
+	dout("%d tickets\n", num);
 
-		ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
-		ceph_decode_timespec(&validity, &new_validity);
-		new_expires = get_seconds() + validity.tv_sec;
-		new_renew_after = new_expires - (validity.tv_sec / 4);
-		dout(" expires=%lu renew_after=%lu\n", new_expires,
-		     new_renew_after);
-
-		/* ticket blob for service */
-		ceph_decode_8_safe(&p, end, is_enc, bad);
-		tp = ticket_buf;
-		if (is_enc) {
-			/* encrypted */
-			dout(" encrypted ticket\n");
-			dlen = ceph_x_decrypt(&old_key, &p, end, ticket_buf,
-					      TEMP_TICKET_BUF_LEN);
-			if (dlen < 0) {
-				ret = dlen;
-				goto out;
-			}
-			dlen = ceph_decode_32(&tp);
-		} else {
-			/* unencrypted */
-			ceph_decode_32_safe(&p, end, dlen, bad);
-			ceph_decode_need(&p, end, dlen, bad);
-			ceph_decode_copy(&p, ticket_buf, dlen);
-		}
-		tpend = tp + dlen;
-		dout(" ticket blob is %d bytes\n", dlen);
-		ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
-		blob_struct_v = ceph_decode_8(&tp);
-		new_secret_id = ceph_decode_64(&tp);
-		ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
+	while (num--) {
+		ret = process_one_ticket(ac, secret, &p, end,
+					 dbuf, ticket_buf);
 		if (ret)
 			goto out;
-
-		/* all is well, update our ticket */
-		ceph_crypto_key_destroy(&th->session_key);
-		if (th->ticket_blob)
-			ceph_buffer_put(th->ticket_blob);
-		th->session_key = new_session_key;
-		th->ticket_blob = new_ticket_blob;
-		th->validity = new_validity;
-		th->secret_id = new_secret_id;
-		th->expires = new_expires;
-		th->renew_after = new_renew_after;
-		dout(" got ticket service %d (%s) secret_id %lld len %d\n",
-		     type, ceph_entity_type_name(type), th->secret_id,
-		     (int)th->ticket_blob->vec.iov_len);
-		xi->have_keys |= th->service;
 	}
 
 	ret = 0;
-- 
cgit v1.1


From c27a3e4d667fdcad3db7b104f75659478e0c68d8 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Tue, 9 Sep 2014 19:39:15 +0400
Subject: libceph: do not hard code max auth ticket len

We hard code cephx auth ticket buffer size to 256 bytes.  This isn't
enough for any moderate setups and, in case tickets themselves are not
encrypted, leads to buffer overflows (ceph_x_decrypt() errors out, but
ceph_decode_copy() doesn't - it's just a memcpy() wrapper).  Since the
buffer is allocated dynamically anyway, allocated it a bit later, at
the point where we know how much is going to be needed.

Fixes: http://tracker.ceph.com/issues/8979

Cc: stable@vger.kernel.org
Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 net/ceph/auth_x.c | 64 +++++++++++++++++++++++++------------------------------
 1 file changed, 29 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 0eb146d..de6662b 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -13,8 +13,6 @@
 #include "auth_x.h"
 #include "auth_x_protocol.h"
 
-#define TEMP_TICKET_BUF_LEN	256
-
 static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
 
 static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
@@ -64,7 +62,7 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret,
 }
 
 static int ceph_x_decrypt(struct ceph_crypto_key *secret,
-			  void **p, void *end, void *obuf, size_t olen)
+			  void **p, void *end, void **obuf, size_t olen)
 {
 	struct ceph_x_encrypt_header head;
 	size_t head_len = sizeof(head);
@@ -75,8 +73,14 @@ static int ceph_x_decrypt(struct ceph_crypto_key *secret,
 		return -EINVAL;
 
 	dout("ceph_x_decrypt len %d\n", len);
-	ret = ceph_decrypt2(secret, &head, &head_len, obuf, &olen,
-			    *p, len);
+	if (*obuf == NULL) {
+		*obuf = kmalloc(len, GFP_NOFS);
+		if (!*obuf)
+			return -ENOMEM;
+		olen = len;
+	}
+
+	ret = ceph_decrypt2(secret, &head, &head_len, *obuf, &olen, *p, len);
 	if (ret)
 		return ret;
 	if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC)
@@ -131,18 +135,19 @@ static void remove_ticket_handler(struct ceph_auth_client *ac,
 
 static int process_one_ticket(struct ceph_auth_client *ac,
 			      struct ceph_crypto_key *secret,
-			      void **p, void *end,
-			      void *dbuf, void *ticket_buf)
+			      void **p, void *end)
 {
 	struct ceph_x_info *xi = ac->private;
 	int type;
 	u8 tkt_struct_v, blob_struct_v;
 	struct ceph_x_ticket_handler *th;
+	void *dbuf = NULL;
 	void *dp, *dend;
 	int dlen;
 	char is_enc;
 	struct timespec validity;
 	struct ceph_crypto_key old_key;
+	void *ticket_buf = NULL;
 	void *tp, *tpend;
 	struct ceph_timespec new_validity;
 	struct ceph_crypto_key new_session_key;
@@ -167,8 +172,7 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	}
 
 	/* blob for me */
-	dlen = ceph_x_decrypt(secret, p, end, dbuf,
-			      TEMP_TICKET_BUF_LEN);
+	dlen = ceph_x_decrypt(secret, p, end, &dbuf, 0);
 	if (dlen <= 0) {
 		ret = dlen;
 		goto out;
@@ -195,20 +199,25 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 
 	/* ticket blob for service */
 	ceph_decode_8_safe(p, end, is_enc, bad);
-	tp = ticket_buf;
 	if (is_enc) {
 		/* encrypted */
 		dout(" encrypted ticket\n");
-		dlen = ceph_x_decrypt(&old_key, p, end, ticket_buf,
-				      TEMP_TICKET_BUF_LEN);
+		dlen = ceph_x_decrypt(&old_key, p, end, &ticket_buf, 0);
 		if (dlen < 0) {
 			ret = dlen;
 			goto out;
 		}
+		tp = ticket_buf;
 		dlen = ceph_decode_32(&tp);
 	} else {
 		/* unencrypted */
 		ceph_decode_32_safe(p, end, dlen, bad);
+		ticket_buf = kmalloc(dlen, GFP_NOFS);
+		if (!ticket_buf) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		tp = ticket_buf;
 		ceph_decode_need(p, end, dlen, bad);
 		ceph_decode_copy(p, ticket_buf, dlen);
 	}
@@ -237,6 +246,8 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	xi->have_keys |= th->service;
 
 out:
+	kfree(ticket_buf);
+	kfree(dbuf);
 	return ret;
 
 bad:
@@ -249,21 +260,10 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 				    void *buf, void *end)
 {
 	void *p = buf;
-	char *dbuf;
-	char *ticket_buf;
 	u8 reply_struct_v;
 	u32 num;
 	int ret;
 
-	dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
-	if (!dbuf)
-		return -ENOMEM;
-
-	ret = -ENOMEM;
-	ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
-	if (!ticket_buf)
-		goto out_dbuf;
-
 	ceph_decode_8_safe(&p, end, reply_struct_v, bad);
 	if (reply_struct_v != 1)
 		return -EINVAL;
@@ -272,22 +272,15 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 	dout("%d tickets\n", num);
 
 	while (num--) {
-		ret = process_one_ticket(ac, secret, &p, end,
-					 dbuf, ticket_buf);
+		ret = process_one_ticket(ac, secret, &p, end);
 		if (ret)
-			goto out;
+			return ret;
 	}
 
-	ret = 0;
-out:
-	kfree(ticket_buf);
-out_dbuf:
-	kfree(dbuf);
-	return ret;
+	return 0;
 
 bad:
-	ret = -EINVAL;
-	goto out;
+	return -EINVAL;
 }
 
 static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
@@ -603,13 +596,14 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
 	struct ceph_x_ticket_handler *th;
 	int ret = 0;
 	struct ceph_x_authorize_reply reply;
+	void *preply = &reply;
 	void *p = au->reply_buf;
 	void *end = p + sizeof(au->reply_buf);
 
 	th = get_ticket_handler(ac, au->service);
 	if (IS_ERR(th))
 		return PTR_ERR(th);
-	ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply));
+	ret = ceph_x_decrypt(&th->session_key, &p, end, &preply, sizeof(reply));
 	if (ret < 0)
 		return ret;
 	if (ret != sizeof(reply))
-- 
cgit v1.1


From 381f4dca48d23e155b936b86ccd3ff12f073cf0f Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Wed, 10 Sep 2014 23:23:02 +0200
Subject: ipv6: clean up anycast when an interface is destroyed

If we try to rmmod the driver for an interface while sockets with
setsockopt(JOIN_ANYCAST) are alive, some refcounts aren't cleaned up
and we get stuck on:

  unregister_netdevice: waiting for ens3 to become free. Usage count = 1

If we LEAVE_ANYCAST/close everything before rmmod'ing, there is no
problem.

We need to perform a cleanup similar to the one for multicast in
addrconf_ifdown(how == 1).

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c |  8 +++++---
 net/ipv6/anycast.c  | 21 +++++++++++++++++++++
 2 files changed, 26 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index fc1fac2..3342ee6 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3094,11 +3094,13 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 	write_unlock_bh(&idev->lock);
 
-	/* Step 5: Discard multicast list */
-	if (how)
+	/* Step 5: Discard anycast and multicast list */
+	if (how) {
+		ipv6_ac_destroy_dev(idev);
 		ipv6_mc_destroy_dev(idev);
-	else
+	} else {
 		ipv6_mc_down(idev);
+	}
 
 	idev->tstamp = jiffies;
 
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index ff2de7d..9a38684 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -351,6 +351,27 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
 	return __ipv6_dev_ac_dec(idev, addr);
 }
 
+void ipv6_ac_destroy_dev(struct inet6_dev *idev)
+{
+	struct ifacaddr6 *aca;
+
+	write_lock_bh(&idev->lock);
+	while ((aca = idev->ac_list) != NULL) {
+		idev->ac_list = aca->aca_next;
+		write_unlock_bh(&idev->lock);
+
+		addrconf_leave_solict(idev, &aca->aca_addr);
+
+		dst_hold(&aca->aca_rt->dst);
+		ip6_del_rt(aca->aca_rt);
+
+		aca_put(aca);
+
+		write_lock_bh(&idev->lock);
+	}
+	write_unlock_bh(&idev->lock);
+}
+
 /*
  *	check if the interface has this anycast address
  *	called with rcu_read_lock()
-- 
cgit v1.1


From 20adfa1a81af00bf2027644507ad4fa9cd2849cf Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Fri, 12 Sep 2014 16:26:16 -0400
Subject: bridge: Check if vlan filtering is enabled only once.

The bridge code checks if vlan filtering is enabled on both
ingress and egress.   When the state flip happens, it
is possible for the bridge to currently be forwarding packets
and forwarding behavior becomes non-deterministic.  Bridge
may drop packets on some interfaces, but not others.

This patch solves this by caching the filtered state of the
packet into skb_cb on ingress.  The skb_cb is guaranteed to
not be over-written between the time packet entres bridge
forwarding path and the time it leaves it.  On egress, we
can then check the cached state to see if we need to
apply filtering information.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_private.h |  3 +++
 net/bridge/br_vlan.c    | 14 ++++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 62a7fa2..b6c04cb 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -309,6 +309,9 @@ struct br_input_skb_cb {
 	int igmp;
 	int mrouters_only;
 #endif
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+	bool vlan_filtered;
+#endif
 };
 
 #define BR_INPUT_SKB_CB(__skb)	((struct br_input_skb_cb *)(__skb)->cb)
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index e1bcd65..f645197 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -125,7 +125,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
 {
 	u16 vid;
 
-	if (!br->vlan_enabled)
+	/* If this packet was not filtered at input, let it pass */
+	if (!BR_INPUT_SKB_CB(skb)->vlan_filtered)
 		goto out;
 
 	/* Vlan filter table must be configured at this point.  The
@@ -164,8 +165,10 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
 	/* If VLAN filtering is disabled on the bridge, all packets are
 	 * permitted.
 	 */
-	if (!br->vlan_enabled)
+	if (!br->vlan_enabled) {
+		BR_INPUT_SKB_CB(skb)->vlan_filtered = false;
 		return true;
+	}
 
 	/* If there are no vlan in the permitted list, all packets are
 	 * rejected.
@@ -173,6 +176,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
 	if (!v)
 		goto drop;
 
+	BR_INPUT_SKB_CB(skb)->vlan_filtered = true;
 	proto = br->vlan_proto;
 
 	/* If vlan tx offload is disabled on bridge device and frame was
@@ -251,7 +255,8 @@ bool br_allowed_egress(struct net_bridge *br,
 {
 	u16 vid;
 
-	if (!br->vlan_enabled)
+	/* If this packet was not filtered at input, let it pass */
+	if (!BR_INPUT_SKB_CB(skb)->vlan_filtered)
 		return true;
 
 	if (!v)
@@ -270,7 +275,8 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
 	struct net_bridge *br = p->br;
 	struct net_port_vlans *v;
 
-	if (!br->vlan_enabled)
+	/* If filtering was disabled at input, let it pass. */
+	if (!BR_INPUT_SKB_CB(skb)->vlan_filtered)
 		return true;
 
 	v = rcu_dereference(p->vlan_info);
-- 
cgit v1.1


From 635126b7ca13a01d6322fbf7bdc5d1c738d26807 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Fri, 12 Sep 2014 16:26:17 -0400
Subject: bridge: Allow clearing of pvid and untagged bitmap

Currently, it is possible to modify the vlan filter
configuration to add pvid or untagged support.
For example:
  bridge vlan add vid 10 dev eth0
  bridge vlan add vid 10 dev eth0 untagged pvid

The second statement will modify vlan 10 to
include untagged and pvid configuration.
However, it is currently impossible to go backwards
  bridge vlan add vid 10 dev eth0 untagged pvid
  bridge vlan add vid 10 dev eth0

Here nothing happens.  This patch correct this so
that any modifiers not supplied are removed from
the configuration.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_vlan.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index f645197..4b86738 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -27,9 +27,13 @@ static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags)
 {
 	if (flags & BRIDGE_VLAN_INFO_PVID)
 		__vlan_add_pvid(v, vid);
+	else
+		__vlan_delete_pvid(v, vid);
 
 	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
 		set_bit(vid, v->untagged_bitmap);
+	else
+		clear_bit(vid, v->untagged_bitmap);
 }
 
 static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
-- 
cgit v1.1


From 7ce64c79c4decdeb1afe0bf2f6ef834b382871d1 Mon Sep 17 00:00:00 2001
From: "Alexander Y. Fomichev" <git.user@gmail.com>
Date: Mon, 15 Sep 2014 14:22:35 +0400
Subject: net: fix creation adjacent device symlinks

__netdev_adjacent_dev_insert may add adjust device of different net
namespace, without proper check it leads to emergence of broken
sysfs links from/to devices in another namespace.
Fix: rewrite netdev_adjacent_is_neigh_list macro as a function,
     move net_eq check into netdev_adjacent_is_neigh_list.
     (thanks David)
     related to: 4c75431ac3520631f1d9e74aa88407e6374dbbc4

Signed-off-by: Alexander Fomichev <git.user@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index ab9a165..cf8a95f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4809,9 +4809,14 @@ static void netdev_adjacent_sysfs_del(struct net_device *dev,
 	sysfs_remove_link(&(dev->dev.kobj), linkname);
 }
 
-#define netdev_adjacent_is_neigh_list(dev, dev_list) \
-		(dev_list == &dev->adj_list.upper || \
-		 dev_list == &dev->adj_list.lower)
+static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
+						 struct net_device *adj_dev,
+						 struct list_head *dev_list)
+{
+	return (dev_list == &dev->adj_list.upper ||
+		dev_list == &dev->adj_list.lower) &&
+		net_eq(dev_net(dev), dev_net(adj_dev));
+}
 
 static int __netdev_adjacent_dev_insert(struct net_device *dev,
 					struct net_device *adj_dev,
@@ -4841,7 +4846,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
 	pr_debug("dev_hold for %s, because of link added from %s to %s\n",
 		 adj_dev->name, dev->name, adj_dev->name);
 
-	if (netdev_adjacent_is_neigh_list(dev, dev_list)) {
+	if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
 		ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
 		if (ret)
 			goto free_adj;
@@ -4862,7 +4867,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
 	return 0;
 
 remove_symlinks:
-	if (netdev_adjacent_is_neigh_list(dev, dev_list))
+	if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
 		netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
 free_adj:
 	kfree(adj);
@@ -4895,8 +4900,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
 	if (adj->master)
 		sysfs_remove_link(&(dev->dev.kobj), "master");
 
-	if (netdev_adjacent_is_neigh_list(dev, dev_list) &&
-	    net_eq(dev_net(dev),dev_net(adj_dev)))
+	if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
 		netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
 
 	list_del_rcu(&adj->list);
-- 
cgit v1.1


From c095f248e63ada504dd90c90baae673ae10ee3fe Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Mon, 15 Sep 2014 15:24:26 -0400
Subject: bridge: Fix br_should_learn to check vlan_enabled

As Toshiaki Makita pointed out, the BRIDGE_INPUT_SKB_CB will
not be initialized in br_should_learn() as that function
is called only from br_handle_local_finish().  That is
an input handler for link-local ethernet traffic so it perfectly
correct to check br->vlan_enabled here.

Reported-by: Toshiaki Makita<toshiaki.makita1@gmail.com>
Fixes: 20adfa1 bridge: Check if vlan filtering is enabled only once.
Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_vlan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 4b86738..3ba57fc 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -280,7 +280,7 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
 	struct net_port_vlans *v;
 
 	/* If filtering was disabled at input, let it pass. */
-	if (!BR_INPUT_SKB_CB(skb)->vlan_filtered)
+	if (!br->vlan_enabled)
 		return true;
 
 	v = rcu_dereference(p->vlan_info);
-- 
cgit v1.1


From f92ee61982d6da15a9e49664ecd6405a15a2ee56 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 16 Sep 2014 10:08:40 +0200
Subject: xfrm: Generate blackhole routes only from route lookup functions

Currently we genarate a blackhole route route whenever we have
matching policies but can not resolve the states. Here we assume
that dst_output() is called to kill the balckholed packets.
Unfortunately this assumption is not true in all cases, so
it is possible that these packets leave the system unwanted.

We fix this by generating blackhole routes only from the
route lookup functions, here we can guarantee a call to
dst_output() afterwards.

Fixes: 2774c131b1d ("xfrm: Handle blackhole route creation via afinfo.")
Reported-by: Konstantinos Kolelis <k.kolelis@sirrix.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/route.c       |  6 +++---
 net/ipv6/ip6_output.c  |  4 ++--
 net/xfrm/xfrm_policy.c | 18 +++++++++++++++++-
 3 files changed, 22 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index eaa4b00..173e7ea 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2265,9 +2265,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
 		return rt;
 
 	if (flp4->flowi4_proto)
-		rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
-						   flowi4_to_flowi(flp4),
-						   sk, 0);
+		rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
+							flowi4_to_flowi(flp4),
+							sk, 0);
 
 	return rt;
 }
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 315a55d..0a3448b2 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1009,7 +1009,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 	if (final_dst)
 		fl6->daddr = *final_dst;
 
-	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
 }
 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
 
@@ -1041,7 +1041,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 	if (final_dst)
 		fl6->daddr = *final_dst;
 
-	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
 }
 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index beeed60..7505674 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2138,7 +2138,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 			xfrm_pols_put(pols, drop_pols);
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
 
-			return make_blackhole(net, family, dst_orig);
+			return ERR_PTR(-EREMOTE);
 		}
 
 		err = -EAGAIN;
@@ -2195,6 +2195,22 @@ dropdst:
 }
 EXPORT_SYMBOL(xfrm_lookup);
 
+/* Callers of xfrm_lookup_route() must ensure a call to dst_output().
+ * Otherwise we may send out blackholed packets.
+ */
+struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
+				    const struct flowi *fl,
+				    struct sock *sk, int flags)
+{
+	struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, flags);
+
+	if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
+		return make_blackhole(net, dst_orig->ops->family, dst_orig);
+
+	return dst;
+}
+EXPORT_SYMBOL(xfrm_lookup_route);
+
 static inline int
 xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
 {
-- 
cgit v1.1


From b8c203b2d2fc961bafd53b41d5396bbcdec55998 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 16 Sep 2014 10:08:49 +0200
Subject: xfrm: Generate queueing routes only from route lookup functions

Currently we genarate a queueing route if we have matching policies
but can not resolve the states and the sysctl xfrm_larval_drop is
disabled. Here we assume that dst_output() is called to kill the
queued packets. Unfortunately this assumption is not true in all
cases, so it is possible that these packets leave the system unwanted.

We fix this by generating queueing routes only from the
route lookup functions, here we can guarantee a call to
dst_output() afterwards.

Fixes: a0073fe18e71 ("xfrm: Add a state resolution packet queue")
Reported-by: Konstantinos Kolelis <k.kolelis@sirrix.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7505674..fdde51f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -39,6 +39,11 @@
 #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
 #define XFRM_MAX_QUEUE_LEN	100
 
+struct xfrm_flo {
+	struct dst_entry *dst_orig;
+	u8 flags;
+};
+
 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
 						__read_mostly;
@@ -1877,13 +1882,14 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
 }
 
 static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
-						 struct dst_entry *dst,
+						 struct xfrm_flo *xflo,
 						 const struct flowi *fl,
 						 int num_xfrms,
 						 u16 family)
 {
 	int err;
 	struct net_device *dev;
+	struct dst_entry *dst;
 	struct dst_entry *dst1;
 	struct xfrm_dst *xdst;
 
@@ -1891,9 +1897,12 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
 	if (IS_ERR(xdst))
 		return xdst;
 
-	if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0)
+	if (!(xflo->flags & XFRM_LOOKUP_QUEUE) ||
+	    net->xfrm.sysctl_larval_drop ||
+	    num_xfrms <= 0)
 		return xdst;
 
+	dst = xflo->dst_orig;
 	dst1 = &xdst->u.dst;
 	dst_hold(dst);
 	xdst->route = dst;
@@ -1935,7 +1944,7 @@ static struct flow_cache_object *
 xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 		   struct flow_cache_object *oldflo, void *ctx)
 {
-	struct dst_entry *dst_orig = (struct dst_entry *)ctx;
+	struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	struct xfrm_dst *xdst, *new_xdst;
 	int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
@@ -1976,7 +1985,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 			goto make_dummy_bundle;
 	}
 
-	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
+	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
+						  xflo->dst_orig);
 	if (IS_ERR(new_xdst)) {
 		err = PTR_ERR(new_xdst);
 		if (err != -EAGAIN)
@@ -2010,7 +2020,7 @@ make_dummy_bundle:
 	/* We found policies, but there's no bundles to instantiate:
 	 * either because the policy blocks, has no transformations or
 	 * we could not build template (no xfrm_states).*/
-	xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family);
+	xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family);
 	if (IS_ERR(xdst)) {
 		xfrm_pols_put(pols, num_pols);
 		return ERR_CAST(xdst);
@@ -2104,13 +2114,18 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 	}
 
 	if (xdst == NULL) {
+		struct xfrm_flo xflo;
+
+		xflo.dst_orig = dst_orig;
+		xflo.flags = flags;
+
 		/* To accelerate a bit...  */
 		if ((dst_orig->flags & DST_NOXFRM) ||
 		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
 			goto nopol;
 
 		flo = flow_cache_lookup(net, fl, family, dir,
-					xfrm_bundle_lookup, dst_orig);
+					xfrm_bundle_lookup, &xflo);
 		if (flo == NULL)
 			goto nopol;
 		if (IS_ERR(flo)) {
@@ -2202,7 +2217,8 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
 				    const struct flowi *fl,
 				    struct sock *sk, int flags)
 {
-	struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, flags);
+	struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
+					    flags | XFRM_LOOKUP_QUEUE);
 
 	if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
 		return make_blackhole(net, dst_orig->ops->family, dst_orig);
@@ -2476,7 +2492,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 
 	skb_dst_force(skb);
 
-	dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0);
+	dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
 	if (IS_ERR(dst)) {
 		res = 0;
 		dst = NULL;
-- 
cgit v1.1


From dda3b191eb6c5a56d443723dcb71ade60d97c04f Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 12 Sep 2014 21:49:28 +0200
Subject: net: rfkill: gpio: Enable module auto-loading for ACPI based switches

For the ACPI based switches the MODULE_DEVICE_TABLE is missing to
export the entries for module auto-loading.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill-gpio.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 02a86a2..5fa54dd 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -163,6 +163,7 @@ static const struct acpi_device_id rfkill_acpi_match[] = {
 	{ "LNV4752", RFKILL_TYPE_GPS },
 	{ },
 };
+MODULE_DEVICE_TABLE(acpi, rfkill_acpi_match);
 #endif
 
 static struct platform_driver rfkill_gpio_driver = {
-- 
cgit v1.1


From 9b67aa4a82492f128adfccc63e61ab57c1ce1dfd Mon Sep 17 00:00:00 2001
From: Samuel Gauthier <samuel.gauthier@6wind.com>
Date: Thu, 18 Sep 2014 10:31:04 +0200
Subject: openvswitch: restore OVS_FLOW_CMD_NEW notifications

Since commit fb5d1e9e127a ("openvswitch: Build flow cmd netlink reply only if needed."),
the new flows are not notified to the listeners of OVS_FLOW_MCGROUP.

This commit fixes the problem by using the genl function, ie
genl_has_listerners() instead of netlink_has_listeners().

Signed-off-by: Samuel Gauthier <samuel.gauthier@6wind.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/datapath.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 91d66b7..64dc864 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -78,11 +78,12 @@ static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
 
 /* Check if need to build a reply message.
  * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
-static bool ovs_must_notify(struct genl_info *info,
-			    const struct genl_multicast_group *grp)
+static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
+			    unsigned int group)
 {
 	return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
-		netlink_has_listeners(genl_info_net(info)->genl_sock, 0);
+	       genl_has_listeners(family, genl_info_net(info)->genl_sock,
+				  group);
 }
 
 static void ovs_notify(struct genl_family *family,
@@ -763,7 +764,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
 {
 	struct sk_buff *skb;
 
-	if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group))
+	if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
 		return NULL;
 
 	skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL);
-- 
cgit v1.1


From 257117862634d89de33fec74858b1a0ba5ab444b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 18 Sep 2014 08:02:05 -0700
Subject: net: sched: shrink struct qdisc_skb_cb to 28 bytes

We cannot make struct qdisc_skb_cb bigger without impacting IPoIB,
or increasing skb->cb[] size.

Commit e0f31d849867 ("flow_keys: Record IP layer protocol in
skb_flow_dissect()") broke IPoIB.

Only current offender is sch_choke, and this one do not need an
absolutely precise flow key.

If we store 17 bytes of flow key, its more than enough. (Its the actual
size of flow_keys if it was a packed structure, but we might add new
fields at the end of it later)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: e0f31d849867 ("flow_keys: Record IP layer protocol in skb_flow_dissect()")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_choke.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index ed30e43..fb666d1 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -133,10 +133,16 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
 	--sch->q.qlen;
 }
 
+/* private part of skb->cb[] that a qdisc is allowed to use
+ * is limited to QDISC_CB_PRIV_LEN bytes.
+ * As a flow key might be too large, we store a part of it only.
+ */
+#define CHOKE_K_LEN min_t(u32, sizeof(struct flow_keys), QDISC_CB_PRIV_LEN - 3)
+
 struct choke_skb_cb {
 	u16			classid;
 	u8			keys_valid;
-	struct flow_keys	keys;
+	u8			keys[QDISC_CB_PRIV_LEN - 3];
 };
 
 static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
@@ -163,22 +169,26 @@ static u16 choke_get_classid(const struct sk_buff *skb)
 static bool choke_match_flow(struct sk_buff *skb1,
 			     struct sk_buff *skb2)
 {
+	struct flow_keys temp;
+
 	if (skb1->protocol != skb2->protocol)
 		return false;
 
 	if (!choke_skb_cb(skb1)->keys_valid) {
 		choke_skb_cb(skb1)->keys_valid = 1;
-		skb_flow_dissect(skb1, &choke_skb_cb(skb1)->keys);
+		skb_flow_dissect(skb1, &temp);
+		memcpy(&choke_skb_cb(skb1)->keys, &temp, CHOKE_K_LEN);
 	}
 
 	if (!choke_skb_cb(skb2)->keys_valid) {
 		choke_skb_cb(skb2)->keys_valid = 1;
-		skb_flow_dissect(skb2, &choke_skb_cb(skb2)->keys);
+		skb_flow_dissect(skb2, &temp);
+		memcpy(&choke_skb_cb(skb2)->keys, &temp, CHOKE_K_LEN);
 	}
 
 	return !memcmp(&choke_skb_cb(skb1)->keys,
 		       &choke_skb_cb(skb2)->keys,
-		       sizeof(struct flow_keys));
+		       CHOKE_K_LEN);
 }
 
 /*
-- 
cgit v1.1


From a35165ca101695aa2cc5a6300ef69ae60be39a49 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 22 Sep 2014 10:38:16 -0700
Subject: ipv4: do not use this_cpu_ptr() in preemptible context

this_cpu_ptr() in preemptible context is generally bad

Sep 22 05:05:55 br kernel: [   94.608310] BUG: using smp_processor_id()
in
preemptible [00000000] code: ip/2261
Sep 22 05:05:55 br kernel: [   94.608316] caller is
tunnel_dst_set.isra.28+0x20/0x60 [ip_tunnel]
Sep 22 05:05:55 br kernel: [   94.608319] CPU: 3 PID: 2261 Comm: ip Not
tainted
3.17.0-rc5 #82

We can simply use raw_cpu_ptr(), as preemption is safe in these
contexts.

Should fix https://bugzilla.kernel.org/show_bug.cgi?id=84991

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Joe <joe9mail@gmail.com>
Fixes: 9a4aa9af447f ("ipv4: Use percpu Cache route in IP tunnels")
Acked-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_tunnel.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index afed1aa..bd41dd1 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -79,10 +79,10 @@ static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
 	idst->saddr = saddr;
 }
 
-static void tunnel_dst_set(struct ip_tunnel *t,
+static noinline void tunnel_dst_set(struct ip_tunnel *t,
 			   struct dst_entry *dst, __be32 saddr)
 {
-	__tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst, saddr);
+	__tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
 }
 
 static void tunnel_dst_reset(struct ip_tunnel *t)
@@ -106,7 +106,7 @@ static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
 	struct dst_entry *dst;
 
 	rcu_read_lock();
-	idst = this_cpu_ptr(t->dst_cache);
+	idst = raw_cpu_ptr(t->dst_cache);
 	dst = rcu_dereference(idst->dst);
 	if (dst && !atomic_inc_not_zero(&dst->__refcnt))
 		dst = NULL;
-- 
cgit v1.1