From b4ace4f1ae07691b4f6ea9f3e92efbec083df058 Mon Sep 17 00:00:00 2001
From: Craig Gallek <kraig@google.com>
Date: Tue, 19 Jan 2016 14:27:08 -0500
Subject: soreuseport: fix NULL ptr dereference SO_REUSEPORT after bind

Marc Dionne discovered a NULL pointer dereference when setting
SO_REUSEPORT on a socket after it is bound.
This patch removes the assumption that at least one socket in the
reuseport group is bound with the SO_REUSEPORT option before other
bind calls occur.

Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
Reported-by: Marc Dionne <marc.c.dionne@gmail.com>
Signed-off-by: Craig Gallek <kraig@google.com>
Tested-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock_reuseport.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 7dda3d7..aecd303 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -16,7 +16,7 @@ struct sock_reuseport {
 };
 
 extern int reuseport_alloc(struct sock *sk);
-extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2);
+extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
 extern void reuseport_detach_sock(struct sock *sk);
 extern struct sock *reuseport_select_sock(struct sock *sk,
 					  u32 hash,
-- 
cgit v1.1


From b16c29191dc89bd877af99a7b04ce4866728a3e0 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Mon, 18 Jan 2016 19:23:51 -0500
Subject: netfilter: nf_conntrack: use safer way to lock all buckets

When we need to lock all buckets in the connection hashtable we'd attempt to
lock 1024 spinlocks, which is way more preemption levels than supported by
the kernel. Furthermore, this behavior was hidden by checking if lockdep is
enabled, and if it was - use only 8 buckets(!).

Fix this by using a global lock and synchronize all buckets on it when we
need to lock them all. This is pretty heavyweight, but is only done when we
need to resize the hashtable, and that doesn't happen often enough (or at all).

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 788ef58..62e17d1 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -79,12 +79,10 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
             const struct nf_conntrack_l3proto *l3proto,
             const struct nf_conntrack_l4proto *proto);
 
-#ifdef CONFIG_LOCKDEP
-# define CONNTRACK_LOCKS 8
-#else
-# define CONNTRACK_LOCKS 1024
-#endif
+#define CONNTRACK_LOCKS 1024
+
 extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
+void nf_conntrack_lock(spinlock_t *lock);
 
 extern spinlock_t nf_conntrack_expect_lock;
 
-- 
cgit v1.1


From ce87fc6ce3f9f4488546187e3757cf666d9d4a2a Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@kernel.org>
Date: Wed, 20 Jan 2016 17:59:49 -0800
Subject: gro: Make GRO aware of lightweight tunnels.

GRO is currently not aware of tunnel metadata generated by lightweight
tunnels and stored in the dst. This leads to two possible problems:
 * Incorrectly merging two frames that have different metadata.
 * Leaking of allocated metadata from merged frames.

This avoids those problems by comparing the tunnel information before
merging, similar to how we handle other metadata (such as vlan tags),
and releasing any state when we are done.

Reported-by: John <john.phillips5@hpe.com>
Fixes: 2e15ea39 ("ip_gre: Add support to collect tunnel metadata.")
Signed-off-by: Jesse Gross <jesse@kernel.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst_metadata.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 6816f0f..30a56ab 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -44,6 +44,24 @@ static inline bool skb_valid_dst(const struct sk_buff *skb)
 	return dst && !(dst->flags & DST_METADATA);
 }
 
+static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a,
+				       const struct sk_buff *skb_b)
+{
+	const struct metadata_dst *a, *b;
+
+	if (!(skb_a->_skb_refdst | skb_b->_skb_refdst))
+		return 0;
+
+	a = (const struct metadata_dst *) skb_dst(skb_a);
+	b = (const struct metadata_dst *) skb_dst(skb_b);
+
+	if (!a != !b || a->u.tun_info.options_len != b->u.tun_info.options_len)
+		return 1;
+
+	return memcmp(&a->u.tun_info, &b->u.tun_info,
+		      sizeof(a->u.tun_info) + a->u.tun_info.options_len);
+}
+
 struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags);
 struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags);
 
-- 
cgit v1.1


From 4877be9019baaf1432f9117bff4873e4ad518d91 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 21 Jan 2016 15:56:28 -0500
Subject: net: sock: remove dead cgroup methods from struct proto

The cgroup methods are no longer used after baac50bbc3cd ("net:
tcp_memcontrol: simplify linkage between socket and page counter").
The hunk to delete them was included in the original patch but must
have gotten lost during conflict resolution on the way upstream.

Fixes: baac50bbc3cd ("net: tcp_memcontrol: simplify linkage between socket and page counter")
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index b9e7b3d..f5ea148 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1036,18 +1036,6 @@ struct proto {
 #ifdef SOCK_REFCNT_DEBUG
 	atomic_t		socks;
 #endif
-#ifdef CONFIG_MEMCG_KMEM
-	/*
-	 * cgroup specific init/deinit functions. Called once for all
-	 * protocols that implement it, from cgroups populate function.
-	 * This function has to setup any files the protocol want to
-	 * appear in the kmem cgroup filesystem.
-	 */
-	int			(*init_cgroup)(struct mem_cgroup *memcg,
-					       struct cgroup_subsys *ss);
-	void			(*destroy_cgroup)(struct mem_cgroup *memcg);
-	struct cg_proto		*(*proto_cgroup)(struct mem_cgroup *memcg);
-#endif
 	int			(*diag_destroy)(struct sock *sk, int err);
 };
 
-- 
cgit v1.1


From 6a84f57241e1fb9fb6772256f538d1073359a32d Mon Sep 17 00:00:00 2001
From: Gayatri Kammela <gayatri.kammela@intel.com>
Date: Thu, 21 Jan 2016 14:02:39 -0800
Subject: raid6/algos.c : bug fix : Add the missing definitions to the pq.h
 file

Adding these pr_info and pr_err definitions so as to allow code to be
compiled successfully for testing in userspace, since the printk has
been replaced by pr_info and pr_err in algos.c

Absence of these definitions result in the compilation errors
such as ' undefined reference to `pr_info' ' ' undefined reference to
`pr_err' '

Cc: NeilBrown <neilb@suse.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Gayatri Kammela <gayatri.kammela@intel.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 include/linux/raid/pq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index a7a06d1..a0118d5 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -152,6 +152,8 @@ void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
 
 # define jiffies	raid6_jiffies()
 # define printk 	printf
+# define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
+# define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__)
 # define GFP_KERNEL	0
 # define __get_free_pages(x, y)	((unsigned long)mmap(NULL, PAGE_SIZE << (y), \
 						     PROT_READ|PROT_WRITE,   \
-- 
cgit v1.1


From facc432faa59414bd7c60c307ff1645154a66c98 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 22 Jan 2016 11:43:44 +0100
Subject: net: simplify napi_synchronize() to avoid warnings

The napi_synchronize() function is defined twice: The definition
for SMP builds waits for other CPUs to be done, while the uniprocessor
variant just contains a barrier and ignores its argument.

In the mvneta driver, this leads to a warning about an unused variable
when we lookup the NAPI struct of another CPU and then don't use it:

ethernet/marvell/mvneta.c: In function 'mvneta_percpu_notifier':
ethernet/marvell/mvneta.c:2910:30: error: unused variable 'other_port' [-Werror=unused-variable]

There are no other CPUs on a UP build, so that code never runs, but
gcc does not know this.

The nicest solution seems to be to turn the napi_synchronize() helper
into an inline function for the UP case as well, as that leads gcc to
not complain about the argument being unused. Once we do that, we can
also combine the two cases into a single function definition and use
if(IS_ENABLED()) rather than #ifdef to make it look a bit nicer.

The warning first came up in linux-4.4, but I failed to catch it
earlier.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: f86428854480 ("net: mvneta: Statically assign queues to CPUs")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5ac140d..289c231 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -512,7 +512,6 @@ static inline void napi_enable(struct napi_struct *n)
 	clear_bit(NAPI_STATE_NPSVC, &n->state);
 }
 
-#ifdef CONFIG_SMP
 /**
  *	napi_synchronize - wait until NAPI is not running
  *	@n: napi context
@@ -523,12 +522,12 @@ static inline void napi_enable(struct napi_struct *n)
  */
 static inline void napi_synchronize(const struct napi_struct *n)
 {
-	while (test_bit(NAPI_STATE_SCHED, &n->state))
-		msleep(1);
+	if (IS_ENABLED(CONFIG_SMP))
+		while (test_bit(NAPI_STATE_SCHED, &n->state))
+			msleep(1);
+	else
+		barrier();
 }
-#else
-# define napi_synchronize(n)	barrier()
-#endif
 
 enum netdev_queue_state_t {
 	__QUEUE_STATE_DRV_XOFF,
-- 
cgit v1.1


From 71f50c6d9a2276f3ec85384bffe2aee1962f4669 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 22 Jan 2016 01:33:51 +0900
Subject: of: drop symbols declared by _OF_DECLARE() from modules

The users of this macro (OF_EARLYCON_DECLARE, CLK_OF_DECLARE,
IRQCHIP_DECLARE, etc.) are only parsed in the early boot stage.
Such symbols contained in modules are never used.

This commit fixes the link error introduced by commit b8d20e06eaad
("serial: 8250_uniphier: add earlycon support"); the combination
of CONFIG_SERIAL_8250_UNIPHIER=m and CONFIG_SERIAL_8250_CONSOLE=y
fails to link:

ERROR: "early_serial8250_setup" [drivers/tty/serial/8250/8250_uniphier.ko] undefined!

Fixes: b8d20e06eaad ("serial: 8250_uniphier: add earlycon support")
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index dd10626..dc6e396 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -929,7 +929,7 @@ static inline int of_get_available_child_count(const struct device_node *np)
 	return num;
 }
 
-#ifdef CONFIG_OF
+#if defined(CONFIG_OF) && !defined(MODULE)
 #define _OF_DECLARE(table, name, compat, fn, fn_type)			\
 	static const struct of_device_id __of_table_##name		\
 		__used __section(__##table##_of_table)			\
-- 
cgit v1.1


From 1eed677933b816978abc4e3e18ecae5f254cb9be Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 22 Jan 2016 01:49:07 +0800
Subject: sctp: fix the transport dead race check by using atomic_add_unless on
 refcnt

Now when __sctp_lookup_association is running in BH, it will try to
check if t->dead is set, but meanwhile other CPUs may be freeing this
transport and this assoc and if it happens that
__sctp_lookup_association checked t->dead a bit too early, it may think
that the association is still good while it was already freed.

So we fix this race by using atomic_add_unless in sctp_transport_hold.
After we get one transport from hashtable, we will hold it only when
this transport's refcnt is not 0, so that we can make sure t->asoc
cannot be freed before we hold the asoc again.

Note that sctp association is not freed using RCU so we can't use
atomic_add_unless() with it as it may just be too late for that either.

Fixes: 4f0087812648 ("sctp: apply rhashtable api to send/recv path")
Reported-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 20e7212..344da04 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -955,7 +955,7 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
 void sctp_transport_pmtu(struct sctp_transport *, struct sock *sk);
 void sctp_transport_free(struct sctp_transport *);
 void sctp_transport_reset_timers(struct sctp_transport *);
-void sctp_transport_hold(struct sctp_transport *);
+int sctp_transport_hold(struct sctp_transport *);
 void sctp_transport_put(struct sctp_transport *);
 void sctp_transport_update_rto(struct sctp_transport *, __u32);
 void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32);
-- 
cgit v1.1


From 47faa1e4c50ec26e6e75dcd1ce53f064bd45f729 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 22 Jan 2016 01:49:09 +0800
Subject: sctp: remove the dead field of sctp_transport

After we use refcnt to check if transport is alive, the dead can be
removed from sctp_transport.

The traversal of transport_addr_list in procfs dump is using
list_for_each_entry_rcu, no need to check if it has been freed.

sctp_generate_t3_rtx_event and sctp_generate_heartbeat_event is
protected by sock lock, it's not necessary to check dead, either.
also, the timers are cancelled when sctp_transport_free() is
called, that it doesn't wait for refcnt to reach 0 to cancel them.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 344da04..205630b 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -756,7 +756,6 @@ struct sctp_transport {
 
 	/* Reference counting. */
 	atomic_t refcnt;
-	__u32	 dead:1,
 		/* RTO-Pending : A flag used to track if one of the DATA
 		 *		chunks sent to this address is currently being
 		 *		used to compute a RTT. If this flag is 0,
@@ -766,7 +765,7 @@ struct sctp_transport {
 		 *		calculation completes (i.e. the DATA chunk
 		 *		is SACK'd) clear this flag.
 		 */
-		 rto_pending:1,
+	__u32	rto_pending:1,
 
 		/*
 		 * hb_sent : a flag that signals that we have a pending
-- 
cgit v1.1


From 114f9f1e038eb23935c20fb54f49f07caaa0546d Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 26 Jan 2016 17:19:09 -0500
Subject: Bluetooth: L2CAP: Introduce proper defines for PSM ranges

Having proper defines makes the code a bit readable, it also avoids
duplicating hard-coded values since these are also needed when
auto-allocating PSM values (in a subsequent patch).

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 5289929..5ee3c68 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -252,6 +252,12 @@ struct l2cap_conn_rsp {
 #define L2CAP_PSM_3DSP		0x0021
 #define L2CAP_PSM_IPSP		0x0023 /* 6LoWPAN */
 
+#define L2CAP_PSM_DYN_START	0x1001
+#define L2CAP_PSM_DYN_END	0xffff
+#define L2CAP_PSM_AUTO_END	0x10ff
+#define L2CAP_PSM_LE_DYN_START  0x0080
+#define L2CAP_PSM_LE_DYN_END	0x00ff
+
 /* channel identifier */
 #define L2CAP_CID_SIGNALING	0x0001
 #define L2CAP_CID_CONN_LESS	0x0002
-- 
cgit v1.1


From 21603fc45bdef3696f45d80a1c9d730e06b925c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= <joerg@higgsboson.tk>
Date: Wed, 27 Jan 2016 22:54:06 +0100
Subject: tcp: Change reference to experimental CWND RFC.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jörg Thalheim <joerg@higgsboson.tk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8ea1997..f6f8f03 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -216,7 +216,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* TCP thin-stream limits */
 #define TCP_THIN_LINEAR_RETRIES 6       /* After 6 linear retries, do exp. backoff */
 
-/* TCP initial congestion window as per draft-hkchu-tcpm-initcwnd-01 */
+/* TCP initial congestion window as per rfc6928 */
 #define TCP_INIT_CWND		10
 
 /* Bit Flags for sysctl_tcp_fastopen */
-- 
cgit v1.1


From 6f21c96a78b835259546d8f3fb4edff0f651d478 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 29 Jan 2016 12:30:19 +0100
Subject: ipv6: enforce flowi6_oif usage in ip6_dst_lookup_tail()

The current implementation of ip6_dst_lookup_tail basically
ignore the egress ifindex match: if the saddr is set,
ip6_route_output() purposefully ignores flowi6_oif, due
to the commit d46a9d678e4c ("net: ipv6: Dont add RT6_LOOKUP_F_IFACE
flag if saddr set"), if the saddr is 'any' the first route lookup
in ip6_dst_lookup_tail fails, but upon failure a second lookup will
be performed with saddr set, thus ignoring the ifindex constraint.

This commit adds an output route lookup function variant, which
allows the caller to specify lookup flags, and modify
ip6_dst_lookup_tail() to enforce the ifindex match on the second
lookup via said helper.

ip6_route_output() becames now a static inline function build on
top of ip6_route_output_flags(); as a side effect, out-of-tree
modules need now a GPL license to access the output route lookup
functionality.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 877f682..295d291 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -64,8 +64,16 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
 
 void ip6_route_input(struct sk_buff *skb);
 
-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
-				   struct flowi6 *fl6);
+struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
+					 struct flowi6 *fl6, int flags);
+
+static inline struct dst_entry *ip6_route_output(struct net *net,
+						 const struct sock *sk,
+						 struct flowi6 *fl6)
+{
+	return ip6_route_output_flags(net, sk, fl6, 0);
+}
+
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 				   int flags);
 
-- 
cgit v1.1


From 65f87ee71852a754f7981d0653e7136039b8798a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 25 Jan 2016 17:23:18 -0800
Subject: fs, block: force direct-I/O for dax-enabled block devices

Similar to the file I/O path, re-direct all I/O to the DAX path for I/O
to a block-device special file.  Both regular files and device special
files can use the common filp->f_mapping->host lookup to determing is
DAX is enabled.

Otherwise, we confuse the DAX code that does not expect to find live
data in the page cache:

    ------------[ cut here ]------------
    WARNING: CPU: 0 PID: 7676 at mm/filemap.c:217
    __delete_from_page_cache+0x9f6/0xb60()
    Modules linked in:
    CPU: 0 PID: 7676 Comm: a.out Not tainted 4.4.0+ #276
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
     00000000ffffffff ffff88006d3f7738 ffffffff82999e2d 0000000000000000
     ffff8800620a0000 ffffffff86473d20 ffff88006d3f7778 ffffffff81352089
     ffffffff81658d36 ffffffff86473d20 00000000000000d9 ffffea0000009d60
    Call Trace:
     [<     inline     >] __dump_stack lib/dump_stack.c:15
     [<ffffffff82999e2d>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50
     [<ffffffff81352089>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:482
     [<ffffffff813522b9>] warn_slowpath_null+0x29/0x30 kernel/panic.c:515
     [<ffffffff81658d36>] __delete_from_page_cache+0x9f6/0xb60 mm/filemap.c:217
     [<ffffffff81658fb2>] delete_from_page_cache+0x112/0x200 mm/filemap.c:244
     [<ffffffff818af369>] __dax_fault+0x859/0x1800 fs/dax.c:487
     [<ffffffff8186f4f6>] blkdev_dax_fault+0x26/0x30 fs/block_dev.c:1730
     [<     inline     >] wp_pfn_shared mm/memory.c:2208
     [<ffffffff816e9145>] do_wp_page+0xc85/0x14f0 mm/memory.c:2307
     [<     inline     >] handle_pte_fault mm/memory.c:3323
     [<     inline     >] __handle_mm_fault mm/memory.c:3417
     [<ffffffff816ecec3>] handle_mm_fault+0x2483/0x4640 mm/memory.c:3446
     [<ffffffff8127eff6>] __do_page_fault+0x376/0x960 arch/x86/mm/fault.c:1238
     [<ffffffff8127f738>] trace_do_page_fault+0xe8/0x420 arch/x86/mm/fault.c:1331
     [<ffffffff812705c4>] do_async_page_fault+0x14/0xd0 arch/x86/kernel/kvm.c:264
     [<ffffffff86338f78>] async_page_fault+0x28/0x30 arch/x86/entry/entry_64.S:986
     [<ffffffff86336c36>] entry_SYSCALL_64_fastpath+0x16/0x7a
    arch/x86/entry/entry_64.S:185
    ---[ end trace dae21e0f85f1f98c ]---

Fixes: 5a023cdba50c ("block: enable dax for raw block devices")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Reported-by: Kirill A. Shutemov <kirill@shutemov.name>
Suggested-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Suggested-by: Matthew Wilcox <willy@linux.intel.com>
Tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1a20462..b10002d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2907,7 +2907,7 @@ extern void replace_mount_options(struct super_block *sb, char *options);
 
 static inline bool io_is_direct(struct file *filp)
 {
-	return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp));
+	return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
 }
 
 static inline int iocb_flags(struct file *file)
-- 
cgit v1.1


From 9f4736fe7ca804aa79b5916221bb13dfc6221a0f Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 28 Jan 2016 20:13:39 -0800
Subject: block: revert runtime dax control of the raw block device

Dynamically enabling DAX requires that the page cache first be flushed
and invalidated.  This must occur atomically with the change of DAX mode
otherwise we confuse the fsync/msync tracking and violate data
durability guarantees.  Eliminate the possibilty of DAX-disabled to
DAX-enabled transitions for now and revisit this for the next cycle.

Cc: Jan Kara <jack@suse.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/fs.h      | 3 ---
 include/uapi/linux/fs.h | 1 -
 2 files changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index b10002d..ae68100 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -484,9 +484,6 @@ struct block_device {
 	int			bd_fsfreeze_count;
 	/* Mutex for freeze */
 	struct mutex		bd_fsfreeze_mutex;
-#ifdef CONFIG_FS_DAX
-	int			bd_map_count;
-#endif
 };
 
 /*
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 41e0433..149bec8 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -222,7 +222,6 @@ struct fsxattr {
 #define BLKSECDISCARD _IO(0x12,125)
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
-#define BLKDAXSET _IO(0x12,128)
 #define BLKDAXGET _IO(0x12,129)
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
-- 
cgit v1.1


From d1a5f2b4d8a125943dcb6b032fc7eaefc2c78296 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 28 Jan 2016 20:25:31 -0800
Subject: block: use DAX for partition table reads

Avoid populating pagecache when the block device is in DAX mode.
Otherwise these page cache entries collide with the fsync/msync
implementation and break data durability guarantees.

Cc: Jan Kara <jack@suse.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reported-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dax.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 8204c3d..818e450 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -14,6 +14,17 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
 		dax_iodone_t);
 int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
 		dax_iodone_t);
+
+#ifdef CONFIG_FS_DAX
+struct page *read_dax_sector(struct block_device *bdev, sector_t n);
+#else
+static inline struct page *read_dax_sector(struct block_device *bdev,
+		sector_t n)
+{
+	return ERR_PTR(-ENXIO);
+}
+#endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
 				unsigned int flags, get_block_t, dax_iodone_t);
-- 
cgit v1.1


From 76e9f0ee52b0be5761e29847e0ef01f23f24f1df Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 22 Jan 2016 09:43:28 -0800
Subject: phys_to_pfn_t: use phys_addr_t

A dma_addr_t is potentially smaller than a phys_addr_t on some archs.
Don't truncate the address when doing the pfn conversion.

Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Reported-by: Matthew Wilcox <willy@linux.intel.com>
[willy: fix pfn_t_to_phys as well]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/pfn_t.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index 0703b53..37448ab 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -29,7 +29,7 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
 	return __pfn_to_pfn_t(pfn, 0);
 }
 
-extern pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags);
+extern pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags);
 
 static inline bool pfn_t_has_page(pfn_t pfn)
 {
@@ -48,7 +48,7 @@ static inline struct page *pfn_t_to_page(pfn_t pfn)
 	return NULL;
 }
 
-static inline dma_addr_t pfn_t_to_phys(pfn_t pfn)
+static inline phys_addr_t pfn_t_to_phys(pfn_t pfn)
 {
 	return PFN_PHYS(pfn_t_to_pfn(pfn));
 }
-- 
cgit v1.1


From 4b0e4e4af6c6dc8354dcb72182d52c1bc55f12fc Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Sat, 30 Jan 2016 07:59:32 +0200
Subject: drm: add helper to check for wc memory support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 include/drm/drm_cache.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
index 7bfb063..461a055 100644
--- a/include/drm/drm_cache.h
+++ b/include/drm/drm_cache.h
@@ -35,4 +35,13 @@
 
 void drm_clflush_pages(struct page *pages[], unsigned long num_pages);
 
+static inline bool drm_arch_can_wc_memory(void)
+{
+#if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE)
+	return false;
+#else
+	return true;
+#endif
+}
+
 #endif
-- 
cgit v1.1


From 2db8f9a1d86aa32a9cbf1a975882b4fa162f040f Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@codeaurora.org>
Date: Tue, 26 Jan 2016 12:43:00 -0600
Subject: ACPI / CPPC: remove redundant mbox_send_message() declaration

Remove a redundant function declaration in cppc_acpi.h for
mbox_send_message().  That function is defined in mailbox_client.h,
which is already included.

Signed-off-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/cppc_acpi.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 717a298..dad8af3 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -133,6 +133,5 @@ extern int acpi_get_psd_map(struct cpudata **);
 /* Methods to interact with the PCC mailbox controller. */
 extern struct mbox_chan *
 	pcc_mbox_request_channel(struct mbox_client *, unsigned int);
-extern int mbox_send_message(struct mbox_chan *chan, void *mssg);
 
 #endif /* _CPPC_ACPI_H*/
-- 
cgit v1.1


From 06ab30034ed9c200a570ab13c017bde248ddb2a6 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 31 Jan 2016 11:57:41 +0100
Subject: ALSA: rawmidi: Make snd_rawmidi_transmit() race-free

A kernel WARNING in snd_rawmidi_transmit_ack() is triggered by
syzkaller fuzzer:
  WARNING: CPU: 1 PID: 20739 at sound/core/rawmidi.c:1136
Call Trace:
 [<     inline     >] __dump_stack lib/dump_stack.c:15
 [<ffffffff82999e2d>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50
 [<ffffffff81352089>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:482
 [<ffffffff813522b9>] warn_slowpath_null+0x29/0x30 kernel/panic.c:515
 [<ffffffff84f80bd5>] snd_rawmidi_transmit_ack+0x275/0x400 sound/core/rawmidi.c:1136
 [<ffffffff84fdb3c1>] snd_virmidi_output_trigger+0x4b1/0x5a0 sound/core/seq/seq_virmidi.c:163
 [<     inline     >] snd_rawmidi_output_trigger sound/core/rawmidi.c:150
 [<ffffffff84f87ed9>] snd_rawmidi_kernel_write1+0x549/0x780 sound/core/rawmidi.c:1223
 [<ffffffff84f89fd3>] snd_rawmidi_write+0x543/0xb30 sound/core/rawmidi.c:1273
 [<ffffffff817b0323>] __vfs_write+0x113/0x480 fs/read_write.c:528
 [<ffffffff817b1db7>] vfs_write+0x167/0x4a0 fs/read_write.c:577
 [<     inline     >] SYSC_write fs/read_write.c:624
 [<ffffffff817b50a1>] SyS_write+0x111/0x220 fs/read_write.c:616
 [<ffffffff86336c36>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185

Also a similar warning is found but in another path:
Call Trace:
 [<     inline     >] __dump_stack lib/dump_stack.c:15
 [<ffffffff82be2c0d>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50
 [<ffffffff81355139>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:482
 [<ffffffff81355369>] warn_slowpath_null+0x29/0x30 kernel/panic.c:515
 [<ffffffff8527e69a>] rawmidi_transmit_ack+0x24a/0x3b0 sound/core/rawmidi.c:1133
 [<ffffffff8527e851>] snd_rawmidi_transmit_ack+0x51/0x80 sound/core/rawmidi.c:1163
 [<ffffffff852d9046>] snd_virmidi_output_trigger+0x2b6/0x570 sound/core/seq/seq_virmidi.c:185
 [<     inline     >] snd_rawmidi_output_trigger sound/core/rawmidi.c:150
 [<ffffffff85285a0b>] snd_rawmidi_kernel_write1+0x4bb/0x760 sound/core/rawmidi.c:1252
 [<ffffffff85287b73>] snd_rawmidi_write+0x543/0xb30 sound/core/rawmidi.c:1302
 [<ffffffff817ba5f3>] __vfs_write+0x113/0x480 fs/read_write.c:528
 [<ffffffff817bc087>] vfs_write+0x167/0x4a0 fs/read_write.c:577
 [<     inline     >] SYSC_write fs/read_write.c:624
 [<ffffffff817bf371>] SyS_write+0x111/0x220 fs/read_write.c:616
 [<ffffffff86660276>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185

In the former case, the reason is that virmidi has an open code
calling snd_rawmidi_transmit_ack() with the value calculated outside
the spinlock.   We may use snd_rawmidi_transmit() in a loop just for
consuming the input data, but even there, there is a race between
snd_rawmidi_transmit_peek() and snd_rawmidi_tranmit_ack().

Similarly in the latter case, it calls snd_rawmidi_transmit_peek() and
snd_rawmidi_tranmit_ack() separately without protection, so they are
racy as well.

The patch tries to address these issues by the following ways:
- Introduce the unlocked versions of snd_rawmidi_transmit_peek() and
  snd_rawmidi_transmit_ack() to be called inside the explicit lock.
- Rewrite snd_rawmidi_transmit() to be race-free (the former case).
- Make the split calls (the latter case) protected in the rawmidi spin
  lock.

BugLink: http://lkml.kernel.org/r/CACT4Y+YPq1+cYLkadwjWa5XjzF1_Vki1eHnVn-Lm0hzhSpu5PA@mail.gmail.com
BugLink: http://lkml.kernel.org/r/CACT4Y+acG4iyphdOZx47Nyq_VHGbpJQK-6xNpiqUjaZYqsXOGw@mail.gmail.com
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Tested-by: Dmitry Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/rawmidi.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h
index fdabbb4..f730b91 100644
--- a/include/sound/rawmidi.h
+++ b/include/sound/rawmidi.h
@@ -167,6 +167,10 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
 int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count);
 int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream,
 			 unsigned char *buffer, int count);
+int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
+			      unsigned char *buffer, int count);
+int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream,
+			       int count);
 
 /* main midi functions */
 
-- 
cgit v1.1


From a3d0a918502cc73af4f60da2cc4c5cac5573f183 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 2 Feb 2016 16:57:08 -0800
Subject: thp: make split_queue per-node

Andrea Arcangeli suggested to make split queue per-node to improve
scalability.  Let's do it.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Suggested-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 33bb1b1..7b6c2cf 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -682,6 +682,12 @@ typedef struct pglist_data {
 	 */
 	unsigned long first_deferred_pfn;
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	spinlock_t split_queue_lock;
+	struct list_head split_queue;
+	unsigned long split_queue_len;
+#endif
 } pg_data_t;
 
 #define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
-- 
cgit v1.1


From 65376df582174ffcec9e6471bf5b0dd79ba05e4a Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 2 Feb 2016 16:57:29 -0800
Subject: proc: revert /proc/<pid>/maps [stack:TID] annotation

Commit b76437579d13 ("procfs: mark thread stack correctly in
proc/<pid>/maps") added [stack:TID] annotation to /proc/<pid>/maps.

Finding the task of a stack VMA requires walking the entire thread list,
turning this into quadratic behavior: a thousand threads means a
thousand stacks, so the rendering of /proc/<pid>/maps needs to look at a
million combinations.

The cost is not in proportion to the usefulness as described in the
patch.

Drop the [stack:TID] annotation to make /proc/<pid>/maps (and
/proc/<pid>/numa_maps) usable again for higher thread counts.

The [stack] annotation inside /proc/<pid>/task/<tid>/maps is retained, as
identifying the stack VMA there is an O(1) operation.

Siddesh said:
 "The end users needed a way to identify thread stacks programmatically and
  there wasn't a way to do that.  I'm afraid I no longer remember (or have
  access to the resources that would aid my memory since I changed
  employers) the details of their requirement.  However, I did do this on my
  own time because I thought it was an interesting project for me and nobody
  really gave any feedback then as to its utility, so as far as I am
  concerned you could roll back the main thread maps information since the
  information is available in the thread-specific files"

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Siddhesh Poyarekar <siddhesh.poyarekar@gmail.com>
Cc: Shaohua Li <shli@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f1cd22f..0b50d78 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1341,8 +1341,7 @@ static inline int stack_guard_page_end(struct vm_area_struct *vma,
 		!vma_growsup(vma->vm_next, addr);
 }
 
-extern struct task_struct *task_of_stack(struct task_struct *task,
-				struct vm_area_struct *vma, bool in_group);
+int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t);
 
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
-- 
cgit v1.1


From c792e8240338e41eda4d06a3a71a7bb7af4e6156 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 2 Feb 2016 16:57:38 -0800
Subject: mm: memcontrol: drop superfluous entry in the per-memcg stats array

MEM_CGROUP_STAT_NSTATS is just a delimiter for cgroup1 statistics, not
an actual array entry.  Reuse it for the first cgroup2 stat entry, like
in the event array.

Fixes: b2807f07f4f8 ("mm: memcontrol: add "sock" to cgroup2 memory.stat")
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9ae48d4..792c898 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -51,7 +51,7 @@ enum mem_cgroup_stat_index {
 	MEM_CGROUP_STAT_SWAP,		/* # of pages, swapped out */
 	MEM_CGROUP_STAT_NSTATS,
 	/* default hierarchy stats */
-	MEMCG_SOCK,
+	MEMCG_SOCK = MEM_CGROUP_STAT_NSTATS,
 	MEMCG_NR_STAT,
 };
 
-- 
cgit v1.1


From 30bdbb78009e67767983085e302bec6d97afc679 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <koct9i@gmail.com>
Date: Tue, 2 Feb 2016 16:57:46 -0800
Subject: mm: polish virtual memory accounting

* add VM_STACK as alias for VM_GROWSUP/DOWN depending on architecture
* always account VMAs with flag VM_STACK as stack (as it was before)
* cleanup classifying helpers
* update comments and documentation

Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com>
Tested-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h       | 6 ++++--
 include/linux/mm_types.h | 6 +++---
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0b50d78..516e149 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -201,11 +201,13 @@ extern unsigned int kobjsize(const void *objp);
 #endif
 
 #ifdef CONFIG_STACK_GROWSUP
-#define VM_STACK_FLAGS	(VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK	VM_GROWSUP
 #else
-#define VM_STACK_FLAGS	(VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK	VM_GROWSDOWN
 #endif
 
+#define VM_STACK_FLAGS	(VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+
 /*
  * Special vmas that are non-mergable, non-mlock()able.
  * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d3ebb9d..624b78b 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -424,9 +424,9 @@ struct mm_struct {
 	unsigned long total_vm;		/* Total pages mapped */
 	unsigned long locked_vm;	/* Pages that have PG_mlocked set */
 	unsigned long pinned_vm;	/* Refcount permanently increased */
-	unsigned long data_vm;		/* VM_WRITE & ~VM_SHARED/GROWSDOWN */
-	unsigned long exec_vm;		/* VM_EXEC & ~VM_WRITE */
-	unsigned long stack_vm;		/* VM_GROWSUP/DOWN */
+	unsigned long data_vm;		/* VM_WRITE & ~VM_SHARED & ~VM_STACK */
+	unsigned long exec_vm;		/* VM_EXEC & ~VM_WRITE & ~VM_STACK */
+	unsigned long stack_vm;		/* VM_STACK */
 	unsigned long def_flags;
 	unsigned long start_code, end_code, start_data, end_data;
 	unsigned long start_brk, brk, start_stack;
-- 
cgit v1.1


From 46437f9a554fbe3e110580ca08ab703b59f2f95a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 2 Feb 2016 16:57:52 -0800
Subject: radix-tree: fix race in gang lookup

If the indirect_ptr bit is set on a slot, that indicates we need to redo
the lookup.  Introduce a new function radix_tree_iter_retry() which
forces the loop to retry the lookup by setting 'slot' to NULL and
turning the iterator back to point at the problematic entry.

This is a pretty rare problem to hit at the moment; the lookup has to
race with a grow of the radix tree from a height of 0.  The consequences
of hitting this race are that gang lookup could return a pointer to a
radix_tree_node instead of a pointer to whatever the user had inserted
in the tree.

Fixes: cebbd29e1c2f ("radix-tree: rewrite gang lookup using iterator")
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ohad Ben-Cohen <ohad@wizery.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 7c88ad1..00b17c5 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -379,6 +379,22 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
 			     struct radix_tree_iter *iter, unsigned flags);
 
 /**
+ * radix_tree_iter_retry - retry this chunk of the iteration
+ * @iter:	iterator state
+ *
+ * If we iterate over a tree protected only by the RCU lock, a race
+ * against deletion or creation may result in seeing a slot for which
+ * radix_tree_deref_retry() returns true.  If so, call this function
+ * and continue the iteration.
+ */
+static inline __must_check
+void **radix_tree_iter_retry(struct radix_tree_iter *iter)
+{
+	iter->next_index = iter->index;
+	return NULL;
+}
+
+/**
  * radix_tree_chunk_size - get current chunk size
  *
  * @iter:	pointer to radix tree iterator
-- 
cgit v1.1


From fac710e45d0b12443acd4d905c9acec27ab4ca14 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Wed, 27 Jan 2016 10:08:42 -0200
Subject: [media] vb2: fix nasty vb2_thread regression

The vb2_thread implementation was made generic and was moved from
videobuf2-v4l2.c to videobuf2-core.c in commit af3bac1a. Unfortunately
that clearly was never tested since it broke read() causing NULL address
references.

The root cause was confused handling of vb2_buffer vs v4l2_buffer (the pb
pointer in various core functions).

The v4l2_buffer no longer exists after moving the code into the core and
it is no longer needed. However, the vb2_thread code passed a pointer to
a vb2_buffer to the core functions were a v4l2_buffer pointer was expected
and vb2_thread expected that the vb2_buffer fields would be filled in
correctly.

This is obviously wrong since v4l2_buffer != vb2_buffer. Note that the
pb pointer is a void pointer, so no type-checking took place.

This patch fixes this problem:

1) allow pb to be NULL for vb2_core_(d)qbuf. The vb2_thread code will use
   a NULL pointer here since they don't care about v4l2_buffer anyway.
2) let vb2_core_dqbuf pass back the index of the received buffer. This is
   all vb2_thread needs: this index is the index into the q->bufs array
   and vb2_thread just gets the vb2_buffer from there.
3) the fileio->b pointer (that originally contained a v4l2_buffer) is
   removed altogether since it is no longer needed.

Tested with vivid and the cobalt driver.

Cc: stable@vger.kernel.org # Kernel >= 4.3
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Reported-by: Matthias Schwarzott <zzam@gentoo.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/media/videobuf2-core.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index ef03ae5..8a0f55b 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -533,7 +533,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
 		const unsigned int requested_sizes[]);
 int vb2_core_prepare_buf(struct vb2_queue *q, unsigned int index, void *pb);
 int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb);
-int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking);
+int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb,
+		   bool nonblocking);
 
 int vb2_core_streamon(struct vb2_queue *q, unsigned int type);
 int vb2_core_streamoff(struct vb2_queue *q, unsigned int type);
-- 
cgit v1.1


From dc6ae6d8e7726bad4f1c87244b49cac851746c65 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Sun, 31 Jan 2016 14:36:07 +0100
Subject: crush: add chooseleaf_stable tunable

Add a tunable to fix the bug that chooseleaf may cause unnecessary pg
migrations when some device fails.

Reflects ceph.git commit fdb3f664448e80d984470f32f04e2e6f03ab52ec.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/crush/crush.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 48b4930..be8f12b 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -59,7 +59,8 @@ enum {
 	CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
 	CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
 	CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
-	CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12
+	CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12,
+	CRUSH_RULE_SET_CHOOSELEAF_STABLE = 13
 };
 
 /*
@@ -205,6 +206,11 @@ struct crush_map {
 	 * mappings line up a bit better with previous mappings. */
 	__u8 chooseleaf_vary_r;
 
+	/* if true, it makes chooseleaf firstn to return stable results (if
+	 * no local retry) so that data migrations would be optimal when some
+	 * device fails. */
+	__u8 chooseleaf_stable;
+
 #ifndef __KERNEL__
 	/*
 	 * version 0 (original) of straw_calc has various flaws.  version 1
-- 
cgit v1.1


From 97db9a88186e3a7d3a1942370c836bf221d3ab90 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 1 Feb 2016 17:08:33 +0100
Subject: libceph: advertise support for TUNABLES5

Add TUNABLES5 feature (chooseleaf_stable tunable) to a set of features
supported by default.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/ceph_features.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index f89b31d..c3b211c 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -63,6 +63,16 @@
 #define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49)
 // duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
 #define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49)  /* overlap w/ above */
+#define CEPH_FEATURE_MON_METADATA (1ULL<<50)
+#define CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT (1ULL<<51) /* can sort objs bitwise */
+#define CEPH_FEATURE_OSD_PROXY_WRITE_FEATURES (1ULL<<52)
+#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V3 (1ULL<<53)
+#define CEPH_FEATURE_OSD_HITSET_GMT (1ULL<<54)
+#define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55)
+#define CEPH_FEATURE_NEW_OSDOP_ENCODING   (1ULL<<56) /* New, v7 encoding */
+#define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */
+#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */
+#define CEPH_FEATURE_CRUSH_TUNABLES5	(1ULL<<58) /* chooseleaf stable mode */
 
 /*
  * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@@ -108,7 +118,8 @@ static inline u64 ceph_sanitize_features(u64 features)
 	 CEPH_FEATURE_CRUSH_TUNABLES3 |		\
 	 CEPH_FEATURE_OSD_PRIMARY_AFFINITY |	\
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
-	 CEPH_FEATURE_CRUSH_V4)
+	 CEPH_FEATURE_CRUSH_V4 |		\
+	 CEPH_FEATURE_CRUSH_TUNABLES5)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
 	(CEPH_FEATURE_NOSRCADDR |	 \
-- 
cgit v1.1


From b0b31a8ffe54abf0a455bcaee54dd92f08817164 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 3 Feb 2016 15:25:48 +0100
Subject: libceph: MOSDOpReply v7 encoding

Empty request_redirect_t (struct ceph_request_redirect in the kernel
client) is now encoded with a bool.  NEW_OSDOPREPLY_ENCODING feature
bit overlaps with already supported CRUSH_TUNABLES5.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/ceph_features.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index c3b211c..c1ef6f1 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -73,6 +73,8 @@
 #define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */
 #define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */
 #define CEPH_FEATURE_CRUSH_TUNABLES5	(1ULL<<58) /* chooseleaf stable mode */
+// duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5
+#define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING   (1ULL<<58) /* New, v7 encoding */
 
 /*
  * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@@ -119,7 +121,8 @@ static inline u64 ceph_sanitize_features(u64 features)
 	 CEPH_FEATURE_OSD_PRIMARY_AFFINITY |	\
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
 	 CEPH_FEATURE_CRUSH_V4 |		\
-	 CEPH_FEATURE_CRUSH_TUNABLES5)
+	 CEPH_FEATURE_CRUSH_TUNABLES5 |		\
+	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
 	(CEPH_FEATURE_NOSRCADDR |	 \
-- 
cgit v1.1


From 64566b5e767f9bc3161055ca1b443a51afb52aad Mon Sep 17 00:00:00 2001
From: Harry Wentland <harry.wentland@amd.com>
Date: Fri, 22 Jan 2016 17:07:25 -0500
Subject: drm: Add drm_fixp_from_fraction and drm_fixp2int_ceil

drm_fixp_from_fraction allows us to create a fixed point directly
from a fraction, rather than creating fixed point values and dividing
later. This avoids overflow of our 64 bit value for large numbers.

drm_fixp2int_ceil allows us to return the ceiling of our fixed point
value.

[airlied: squash Jordan's fix]
32-bit-build-fix: Jordan Lazare <Jordan.Lazare@amd.com>
Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Cc: stable@vger.kernel.org
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_fixed.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 51 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h
index d639049..553210c 100644
--- a/include/drm/drm_fixed.h
+++ b/include/drm/drm_fixed.h
@@ -73,18 +73,28 @@ static inline u32 dfixed_div(fixed20_12 A, fixed20_12 B)
 #define DRM_FIXED_ONE		(1ULL << DRM_FIXED_POINT)
 #define DRM_FIXED_DECIMAL_MASK	(DRM_FIXED_ONE - 1)
 #define DRM_FIXED_DIGITS_MASK	(~DRM_FIXED_DECIMAL_MASK)
+#define DRM_FIXED_EPSILON	1LL
+#define DRM_FIXED_ALMOST_ONE	(DRM_FIXED_ONE - DRM_FIXED_EPSILON)
 
 static inline s64 drm_int2fixp(int a)
 {
 	return ((s64)a) << DRM_FIXED_POINT;
 }
 
-static inline int drm_fixp2int(int64_t a)
+static inline int drm_fixp2int(s64 a)
 {
 	return ((s64)a) >> DRM_FIXED_POINT;
 }
 
-static inline unsigned drm_fixp_msbset(int64_t a)
+static inline int drm_fixp2int_ceil(s64 a)
+{
+	if (a > 0)
+		return drm_fixp2int(a + DRM_FIXED_ALMOST_ONE);
+	else
+		return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE);
+}
+
+static inline unsigned drm_fixp_msbset(s64 a)
 {
 	unsigned shift, sign = (a >> 63) & 1;
 
@@ -136,6 +146,45 @@ static inline s64 drm_fixp_div(s64 a, s64 b)
 	return result;
 }
 
+static inline s64 drm_fixp_from_fraction(s64 a, s64 b)
+{
+	s64 res;
+	bool a_neg = a < 0;
+	bool b_neg = b < 0;
+	u64 a_abs = a_neg ? -a : a;
+	u64 b_abs = b_neg ? -b : b;
+	u64 rem;
+
+	/* determine integer part */
+	u64 res_abs  = div64_u64_rem(a_abs, b_abs, &rem);
+
+	/* determine fractional part */
+	{
+		u32 i = DRM_FIXED_POINT;
+
+		do {
+			rem <<= 1;
+			res_abs <<= 1;
+			if (rem >= b_abs) {
+				res_abs |= 1;
+				rem -= b_abs;
+			}
+		} while (--i != 0);
+	}
+
+	/* round up LSB */
+	{
+		u64 summand = (rem << 1) >= b_abs;
+
+		res_abs += summand;
+	}
+
+	res = (s64) res_abs;
+	if (a_neg ^ b_neg)
+		res = -res;
+	return res;
+}
+
 static inline s64 drm_fixp_exp(s64 x)
 {
 	s64 tolerance = div64_s64(DRM_FIXED_ONE, 1000000);
-- 
cgit v1.1


From 5e93b8208d3c419b515fb75e2601931c027e12ab Mon Sep 17 00:00:00 2001
From: Hersen Wu <hersenxs.wu@amd.com>
Date: Fri, 22 Jan 2016 17:07:28 -0500
Subject: drm/dp/mst: move GUID storage from mgr, port to only mst branch

Previous implementation does not handle case below: boot up one MST branch
to DP connector of ASIC. After boot up, hot plug 2nd MST branch to DP output
of 1st MST, GUID is not created for 2nd MST branch. When downstream port of
2nd MST branch send upstream request, it fails because 2nd MST branch GUID
is not available.

New Implementation: only create GUID for MST branch and save it within Branch.

Signed-off-by: Hersen Wu <hersenxs.wu@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Cc: stable@vger.kernel.org
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_dp_mst_helper.h | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h
index 24ab178..fdb4705 100644
--- a/include/drm/drm_dp_mst_helper.h
+++ b/include/drm/drm_dp_mst_helper.h
@@ -44,8 +44,6 @@ struct drm_dp_vcpi {
 /**
  * struct drm_dp_mst_port - MST port
  * @kref: reference count for this port.
- * @guid_valid: for DP 1.2 devices if we have validated the GUID.
- * @guid: guid for DP 1.2 device on this port.
  * @port_num: port number
  * @input: if this port is an input port.
  * @mcs: message capability status - DP 1.2 spec.
@@ -70,10 +68,6 @@ struct drm_dp_vcpi {
 struct drm_dp_mst_port {
 	struct kref kref;
 
-	/* if dpcd 1.2 device is on this port - its GUID info */
-	bool guid_valid;
-	u8 guid[16];
-
 	u8 port_num;
 	bool input;
 	bool mcs;
@@ -110,10 +104,12 @@ struct drm_dp_mst_port {
  * @tx_slots: transmission slots for this device.
  * @last_seqno: last sequence number used to talk to this.
  * @link_address_sent: if a link address message has been sent to this device yet.
+ * @guid: guid for DP 1.2 branch device. port under this branch can be
+ * identified by port #.
  *
  * This structure represents an MST branch device, there is one
- * primary branch device at the root, along with any others connected
- * to downstream ports
+ * primary branch device at the root, along with any other branches connected
+ * to downstream port of parent branches.
  */
 struct drm_dp_mst_branch {
 	struct kref kref;
@@ -132,6 +128,9 @@ struct drm_dp_mst_branch {
 	struct drm_dp_sideband_msg_tx *tx_slots[2];
 	int last_seqno;
 	bool link_address_sent;
+
+	/* global unique identifier to identify branch devices */
+	u8 guid[16];
 };
 
 
@@ -406,11 +405,9 @@ struct drm_dp_payload {
  * @conn_base_id: DRM connector ID this mgr is connected to.
  * @down_rep_recv: msg receiver state for down replies.
  * @up_req_recv: msg receiver state for up requests.
- * @lock: protects mst state, primary, guid, dpcd.
+ * @lock: protects mst state, primary, dpcd.
  * @mst_state: if this manager is enabled for an MST capable port.
  * @mst_primary: pointer to the primary branch device.
- * @guid_valid: GUID valid for the primary branch device.
- * @guid: GUID for primary port.
  * @dpcd: cache of DPCD for primary port.
  * @pbn_div: PBN to slots divisor.
  *
@@ -432,13 +429,11 @@ struct drm_dp_mst_topology_mgr {
 	struct drm_dp_sideband_msg_rx up_req_recv;
 
 	/* pointer to info about the initial MST device */
-	struct mutex lock; /* protects mst_state + primary + guid + dpcd */
+	struct mutex lock; /* protects mst_state + primary + dpcd */
 
 	bool mst_state;
 	struct drm_dp_mst_branch *mst_primary;
-	/* primary MST device GUID */
-	bool guid_valid;
-	u8 guid[16];
+
 	u8 dpcd[DP_RECEIVER_CAP_SIZE];
 	u8 sink_count;
 	int pbn_div;
-- 
cgit v1.1


From 080fe2068e1c7f19f565b30b78baf78edf16a980 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 5 Feb 2016 15:36:41 -0800
Subject: mm, hugetlb: don't require CMA for runtime gigantic pages

Commit 944d9fec8d7a ("hugetlb: add support for gigantic page allocation
at runtime") has added the runtime gigantic page allocation via
alloc_contig_range(), making this support available only when CONFIG_CMA
is enabled.  Because it doesn't depend on MIGRATE_CMA pageblocks and the
associated infrastructure, it is possible with few simple adjustments to
require only CONFIG_MEMORY_ISOLATION instead of full CONFIG_CMA.

After this patch, alloc_contig_range() and related functions are
available and used for gigantic pages with just CONFIG_MEMORY_ISOLATION
enabled.  Note CONFIG_CMA selects CONFIG_MEMORY_ISOLATION.  This allows
supporting runtime gigantic pages without the CMA-specific checks in
page allocator fastpaths.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 28ad5f6..af1f2b2 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -547,16 +547,16 @@ static inline bool pm_suspended_storage(void)
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_CMA
-
+#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
 /* The below functions must be run on a range from a single zone. */
 extern int alloc_contig_range(unsigned long start, unsigned long end,
 			      unsigned migratetype);
 extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
+#endif
 
+#ifdef CONFIG_CMA
 /* CMA stuff */
 extern void init_cma_reserved_pageblock(struct page *page);
-
 #endif
 
 #endif /* __LINUX_GFP_H */
-- 
cgit v1.1


From 12352d3cae2cebe18805a91fab34b534d7444231 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <koct9i@gmail.com>
Date: Fri, 5 Feb 2016 15:36:50 -0800
Subject: mm: replace vma_lock_anon_vma with anon_vma_lock_read/write

Sequence vma_lock_anon_vma() - vma_unlock_anon_vma() isn't safe if
anon_vma appeared between lock and unlock.  We have to check anon_vma
first or call anon_vma_prepare() to be sure that it's here.  There are
only few users of these legacy helpers.  Let's get rid of them.

This patch fixes anon_vma lock imbalance in validate_mm().  Write lock
isn't required here, read lock is enough.

And reorders expand_downwards/expand_upwards: security_mmap_addr() and
wrapping-around check don't have to be under anon vma lock.

Link: https://lkml.kernel.org/r/CACT4Y+Y908EjM2z=706dv4rV6dWtxTLK9nFg9_7DhRMLppBo2g@mail.gmail.com
Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bdf597c..a07f42b 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -109,20 +109,6 @@ static inline void put_anon_vma(struct anon_vma *anon_vma)
 		__put_anon_vma(anon_vma);
 }
 
-static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
-{
-	struct anon_vma *anon_vma = vma->anon_vma;
-	if (anon_vma)
-		down_write(&anon_vma->root->rwsem);
-}
-
-static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
-{
-	struct anon_vma *anon_vma = vma->anon_vma;
-	if (anon_vma)
-		up_write(&anon_vma->root->rwsem);
-}
-
 static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
 {
 	down_write(&anon_vma->root->rwsem);
-- 
cgit v1.1


From 732042821cfa106b3c20b9780e4c60fee9d68900 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <koct9i@gmail.com>
Date: Fri, 5 Feb 2016 15:37:01 -0800
Subject: radix-tree: fix oops after radix_tree_iter_retry

Helper radix_tree_iter_retry() resets next_index to the current index.
In following radix_tree_next_slot current chunk size becomes zero.  This
isn't checked and it tries to dereference null pointer in slot.

Tagged iterator is fine because retry happens only at slot 0 where tag
bitmask in iter->tags is filled with single bit.

Fixes: 46437f9a554f ("radix-tree: fix race in gang lookup")
Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ohad Ben-Cohen <ohad@wizery.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 00b17c5..f54be70 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -400,7 +400,7 @@ void **radix_tree_iter_retry(struct radix_tree_iter *iter)
  * @iter:	pointer to radix tree iterator
  * Returns:	current chunk size
  */
-static __always_inline unsigned
+static __always_inline long
 radix_tree_chunk_size(struct radix_tree_iter *iter)
 {
 	return iter->next_index - iter->index;
@@ -434,9 +434,9 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
 			return slot + offset + 1;
 		}
 	} else {
-		unsigned size = radix_tree_chunk_size(iter) - 1;
+		long size = radix_tree_chunk_size(iter);
 
-		while (size--) {
+		while (--size > 0) {
 			slot++;
 			iter->index++;
 			if (likely(*slot))
-- 
cgit v1.1