From d0164adc89f6bb374d304ffcc375c6d2652fe67d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:21 -0800
Subject: mm, page_alloc: distinguish between being unable to sleep, unwilling
 to sleep and avoiding waking kswapd

__GFP_WAIT has been used to identify atomic context in callers that hold
spinlocks or are in interrupts.  They are expected to be high priority and
have access one of two watermarks lower than "min" which can be referred
to as the "atomic reserve".  __GFP_HIGH users get access to the first
lower watermark and can be called the "high priority reserve".

Over time, callers had a requirement to not block when fallback options
were available.  Some have abused __GFP_WAIT leading to a situation where
an optimisitic allocation with a fallback option can access atomic
reserves.

This patch uses __GFP_ATOMIC to identify callers that are truely atomic,
cannot sleep and have no alternative.  High priority users continue to use
__GFP_HIGH.  __GFP_DIRECT_RECLAIM identifies callers that can sleep and
are willing to enter direct reclaim.  __GFP_KSWAPD_RECLAIM to identify
callers that want to wake kswapd for background reclaim.  __GFP_WAIT is
redefined as a caller that is willing to enter direct reclaim and wake
kswapd for background reclaim.

This patch then converts a number of sites

o __GFP_ATOMIC is used by callers that are high priority and have memory
  pools for those requests. GFP_ATOMIC uses this flag.

o Callers that have a limited mempool to guarantee forward progress clear
  __GFP_DIRECT_RECLAIM but keep __GFP_KSWAPD_RECLAIM. bio allocations fall
  into this category where kswapd will still be woken but atomic reserves
  are not used as there is a one-entry mempool to guarantee progress.

o Callers that are checking if they are non-blocking should use the
  helper gfpflags_allow_blocking() where possible. This is because
  checking for __GFP_WAIT as was done historically now can trigger false
  positives. Some exceptions like dm-crypt.c exist where the code intent
  is clearer if __GFP_DIRECT_RECLAIM is used instead of the helper due to
  flag manipulations.

o Callers that built their own GFP flags instead of starting with GFP_KERNEL
  and friends now also need to specify __GFP_KSWAPD_RECLAIM.

The first key hazard to watch out for is callers that removed __GFP_WAIT
and was depending on access to atomic reserves for inconspicuous reasons.
In some cases it may be appropriate for them to use __GFP_HIGH.

The second key hazard is callers that assembled their own combination of
GFP flags instead of starting with something like GFP_KERNEL.  They may
now wish to specify __GFP_KSWAPD_RECLAIM.  It's almost certainly harmless
if it's missed in most cases as other activity will wake kswapd.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/audit.c           | 6 +++---
 kernel/cgroup.c          | 2 +-
 kernel/locking/lockdep.c | 2 +-
 kernel/power/snapshot.c  | 2 +-
 kernel/smp.c             | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/kernel/audit.c b/kernel/audit.c
index 8a056a3..5ffcbd3 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1371,16 +1371,16 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
 	if (unlikely(audit_filter_type(type)))
 		return NULL;
 
-	if (gfp_mask & __GFP_WAIT) {
+	if (gfp_mask & __GFP_DIRECT_RECLAIM) {
 		if (audit_pid && audit_pid == current->pid)
-			gfp_mask &= ~__GFP_WAIT;
+			gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 		else
 			reserve = 0;
 	}
 
 	while (audit_backlog_limit
 	       && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) {
-		if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time) {
+		if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) {
 			long sleep_time;
 
 			sleep_time = timeout_start + audit_backlog_wait_time - jiffies;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b9d0cce..f1603c1 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -299,7 +299,7 @@ static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
 
 	idr_preload(gfp_mask);
 	spin_lock_bh(&cgroup_idr_lock);
-	ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_WAIT);
+	ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
 	spin_unlock_bh(&cgroup_idr_lock);
 	idr_preload_end();
 	return ret;
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 4e49cc4..deae390 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2738,7 +2738,7 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
 		return;
 
 	/* no reclaim without waiting on it */
-	if (!(gfp_mask & __GFP_WAIT))
+	if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
 		return;
 
 	/* this guy won't enter reclaim */
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5235dd4..3a97060 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1779,7 +1779,7 @@ alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
 	while (to_alloc-- > 0) {
 		struct page *page;
 
-		page = alloc_image_page(__GFP_HIGHMEM);
+		page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM);
 		memory_bm_set_bit(bm, page_to_pfn(page));
 	}
 	return nr_highmem;
diff --git a/kernel/smp.c b/kernel/smp.c
index 0785447..d903c02 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -669,7 +669,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
 	cpumask_var_t cpus;
 	int cpu, ret;
 
-	might_sleep_if(gfp_flags & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(gfp_flags));
 
 	if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
 		preempt_disable();
-- 
cgit v1.1


From 71baba4b92dc1fa1bc461742c6ab1942ec6034e9 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:28 -0800
Subject: mm, page_alloc: rename __GFP_WAIT to __GFP_RECLAIM

__GFP_WAIT was used to signal that the caller was in atomic context and
could not sleep.  Now it is possible to distinguish between true atomic
context and callers that are not willing to sleep.  The latter should
clear __GFP_DIRECT_RECLAIM so kswapd will still wake.  As clearing
__GFP_WAIT behaves differently, there is a risk that people will clear the
wrong flags.  This patch renames __GFP_WAIT to __GFP_RECLAIM to clearly
indicate what it does -- setting it allows all reclaim activity, clearing
them prevents it.

[akpm@linux-foundation.org: fix build]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/power/swap.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index b2066fb5..12cd989 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -257,7 +257,7 @@ static int hib_submit_io(int rw, pgoff_t page_off, void *addr,
 	struct bio *bio;
 	int error = 0;
 
-	bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
+	bio = bio_alloc(__GFP_RECLAIM | __GFP_HIGH, 1);
 	bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
 	bio->bi_bdev = hib_resume_bdev;
 
@@ -356,7 +356,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
 		return -ENOSPC;
 
 	if (hb) {
-		src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN |
+		src = (void *)__get_free_page(__GFP_RECLAIM | __GFP_NOWARN |
 		                              __GFP_NORETRY);
 		if (src) {
 			copy_page(src, buf);
@@ -364,7 +364,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
 			ret = hib_wait_io(hb); /* Free pages */
 			if (ret)
 				return ret;
-			src = (void *)__get_free_page(__GFP_WAIT |
+			src = (void *)__get_free_page(__GFP_RECLAIM |
 			                              __GFP_NOWARN |
 			                              __GFP_NORETRY);
 			if (src) {
@@ -672,7 +672,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
 	nr_threads = num_online_cpus() - 1;
 	nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
 
-	page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+	page = (void *)__get_free_page(__GFP_RECLAIM | __GFP_HIGH);
 	if (!page) {
 		printk(KERN_ERR "PM: Failed to allocate LZO page\n");
 		ret = -ENOMEM;
@@ -975,7 +975,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
 		last = tmp;
 
 		tmp->map = (struct swap_map_page *)
-		           __get_free_page(__GFP_WAIT | __GFP_HIGH);
+			   __get_free_page(__GFP_RECLAIM | __GFP_HIGH);
 		if (!tmp->map) {
 			release_swap_reader(handle);
 			return -ENOMEM;
@@ -1242,9 +1242,9 @@ static int load_image_lzo(struct swap_map_handle *handle,
 
 	for (i = 0; i < read_pages; i++) {
 		page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
-		                                  __GFP_WAIT | __GFP_HIGH :
-		                                  __GFP_WAIT | __GFP_NOWARN |
-		                                  __GFP_NORETRY);
+						  __GFP_RECLAIM | __GFP_HIGH :
+						  __GFP_RECLAIM | __GFP_NOWARN |
+						  __GFP_NORETRY);
 
 		if (!page[i]) {
 			if (i < LZO_CMP_PAGES) {
-- 
cgit v1.1


From 3d9c637f4ae74b45d95bb6cbd793fbffad0a709c Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Fri, 6 Nov 2015 16:29:12 -0800
Subject: module: export param_free_charp()

Change the param_free_charp() function from static to exported.

It is used by zswap in the next patch ("zswap: use charp for zswap param
strings").

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Seth Jennings <sjennings@variantweb.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/params.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/params.c b/kernel/params.c
index b6554aa..93a380a 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -325,10 +325,11 @@ int param_get_charp(char *buffer, const struct kernel_param *kp)
 }
 EXPORT_SYMBOL(param_get_charp);
 
-static void param_free_charp(void *arg)
+void param_free_charp(void *arg)
 {
 	maybe_kfree_parameter(*((char **)arg));
 }
+EXPORT_SYMBOL(param_free_charp);
 
 const struct kernel_param_ops param_ops_charp = {
 	.set = param_set_charp,
-- 
cgit v1.1


From 3824657c522f19f85a76bd932821174a5557a382 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Fri, 6 Nov 2015 16:30:38 -0800
Subject: printk: prevent userland from spoofing kernel messages

The following statement of ABI/testing/dev-kmsg is not quite right:

   It is not possible to inject messages from userspace with the
   facility number LOG_KERN (0), to make sure that the origin of the
   messages can always be reliably determined.

Userland actually can inject messages with a facility of 0 by abusing the
fact that the facility is stored in a u8 data type.  By using a facility
which is a multiple of 256 the assignment of msg->facility in log_store()
implicitly truncates it to 0, i.e.  LOG_KERN, allowing users of /dev/kmsg
to spoof kernel messages as shown below:

The following call...
   # printf '<%d>Kernel panic - not syncing: beer empty\n' 0 >/dev/kmsg
...leads to the following log entry (dmesg -x | tail -n 1):
   user  :emerg : [   66.137758] Kernel panic - not syncing: beer empty

However, this call...
   # printf '<%d>Kernel panic - not syncing: beer empty\n' 0x800 >/dev/kmsg
...leads to the slightly different log entry (note the kernel facility):
   kern  :emerg : [   74.177343] Kernel panic - not syncing: beer empty

Fix that by limiting the user provided facility to 8 bit right from the
beginning and catch the truncation early.

Fixes: 7ff9554bb578 ("printk: convert byte-buffer to variable-length...")
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Petr Mladek <pmladek@suse.cz>
Cc: Alex Elder <elder@linaro.org>
Cc: Joe Perches <joe@perches.com>
Cc: Kay Sievers <kay@vrfy.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/printk/printk.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index b16f354..2ce8826 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -269,6 +269,9 @@ static u32 clear_idx;
 #define PREFIX_MAX		32
 #define LOG_LINE_MAX		(1024 - PREFIX_MAX)
 
+#define LOG_LEVEL(v)		((v) & 0x07)
+#define LOG_FACILITY(v)		((v) >> 3 & 0xff)
+
 /* record buffer */
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
 #define LOG_ALIGN 4
@@ -612,7 +615,6 @@ struct devkmsg_user {
 static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
 {
 	char *buf, *line;
-	int i;
 	int level = default_message_loglevel;
 	int facility = 1;	/* LOG_USER */
 	size_t len = iov_iter_count(from);
@@ -642,12 +644,13 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
 	line = buf;
 	if (line[0] == '<') {
 		char *endp = NULL;
+		unsigned int u;
 
-		i = simple_strtoul(line+1, &endp, 10);
+		u = simple_strtoul(line + 1, &endp, 10);
 		if (endp && endp[0] == '>') {
-			level = i & 7;
-			if (i >> 3)
-				facility = i >> 3;
+			level = LOG_LEVEL(u);
+			if (LOG_FACILITY(u) != 0)
+				facility = LOG_FACILITY(u);
 			endp++;
 			len -= endp - line;
 			line = endp;
-- 
cgit v1.1


From 2e01fabe67ccaff1d59bda01e60a61f5fb0aa7b6 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 6 Nov 2015 16:32:19 -0800
Subject: signals: kill block_all_signals() and unblock_all_signals()

It is hardly possible to enumerate all problems with block_all_signals()
and unblock_all_signals().  Just for example,

1. block_all_signals(SIGSTOP/etc) simply can't help if the caller is
   multithreaded. Another thread can dequeue the signal and force the
   group stop.

2. Even is the caller is single-threaded, it will "stop" anyway. It
   will not sleep, but it will spin in kernel space until SIGCONT or
   SIGKILL.

And a lot more. In short, this interface doesn't work at all, at least
the last 10+ years.

Daniel said:

  Yeah the only times I played around with the DRM_LOCK stuff was when
  old drivers accidentally deadlocked - my impression is that the entire
  DRM_LOCK thing was never really tested properly ;-) Hence I'm all for
  purging where this leaks out of the drm subsystem.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Dave Airlie <airlied@redhat.com>
Cc: Richard Weinberger <richard@nod.at>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/signal.c | 51 +--------------------------------------------------
 1 file changed, 1 insertion(+), 50 deletions(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index 0f6bbbe..f2cbd4e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -503,41 +503,6 @@ int unhandled_signal(struct task_struct *tsk, int sig)
 	return !tsk->ptrace;
 }
 
-/*
- * Notify the system that a driver wants to block all signals for this
- * process, and wants to be notified if any signals at all were to be
- * sent/acted upon.  If the notifier routine returns non-zero, then the
- * signal will be acted upon after all.  If the notifier routine returns 0,
- * then then signal will be blocked.  Only one block per process is
- * allowed.  priv is a pointer to private data that the notifier routine
- * can use to determine if the signal should be blocked or not.
- */
-void
-block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&current->sighand->siglock, flags);
-	current->notifier_mask = mask;
-	current->notifier_data = priv;
-	current->notifier = notifier;
-	spin_unlock_irqrestore(&current->sighand->siglock, flags);
-}
-
-/* Notify the system that blocking has ended. */
-
-void
-unblock_all_signals(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&current->sighand->siglock, flags);
-	current->notifier = NULL;
-	current->notifier_data = NULL;
-	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sighand->siglock, flags);
-}
-
 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
 {
 	struct sigqueue *q, *first = NULL;
@@ -580,19 +545,8 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
 {
 	int sig = next_signal(pending, mask);
 
-	if (sig) {
-		if (current->notifier) {
-			if (sigismember(current->notifier_mask, sig)) {
-				if (!(current->notifier)(current->notifier_data)) {
-					clear_thread_flag(TIF_SIGPENDING);
-					return 0;
-				}
-			}
-		}
-
+	if (sig)
 		collect_signal(sig, pending, info);
-	}
-
 	return sig;
 }
 
@@ -2483,9 +2437,6 @@ EXPORT_SYMBOL(force_sig);
 EXPORT_SYMBOL(send_sig);
 EXPORT_SYMBOL(send_sig_info);
 EXPORT_SYMBOL(sigprocmask);
-EXPORT_SYMBOL(block_all_signals);
-EXPORT_SYMBOL(unblock_all_signals);
-
 
 /*
  * System call entry points.
-- 
cgit v1.1


From 5fa534c987784c4811757a34c425aff3ce3b5037 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 6 Nov 2015 16:32:31 -0800
Subject: coredump: ensure all coredumping tasks have SIGNAL_GROUP_COREDUMP

task_will_free_mem() is wrong in many ways, and in particular the
SIGNAL_GROUP_COREDUMP check is not reliable: a task can participate in the
coredumping without SIGNAL_GROUP_COREDUMP bit set.

change zap_threads() paths to always set SIGNAL_GROUP_COREDUMP even if
other CLONE_VM processes can't react to SIGKILL.  Fortunately, at least
oom-kill case if fine; it kills all tasks sharing the same mm, so it
should also kill the process which actually dumps the core.

The change in prepare_signal() is not strictly necessary, it just ensures
that the patch does not bring another subtle behavioural change.  But it
reminds us that this SIGNAL_GROUP_EXIT/COREDUMP case needs more changes.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Kyle Walker <kwalker@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Stanislav Kozina <skozina@redhat.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/signal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index f2cbd4e..c0b01fe 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -788,7 +788,7 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
 	sigset_t flush;
 
 	if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) {
-		if (signal->flags & SIGNAL_GROUP_COREDUMP)
+		if (!(signal->flags & SIGNAL_GROUP_EXIT))
 			return sig == SIGKILL;
 		/*
 		 * The process is in the middle of dying, nothing to do.
-- 
cgit v1.1


From de90a6bcaede81f35e8caf4566d1006267230377 Mon Sep 17 00:00:00 2001
From: Minfei Huang <mnfhuang@gmail.com>
Date: Fri, 6 Nov 2015 16:32:45 -0800
Subject: kexec: use file name as the output message prefix

kexec output message misses the prefix "kexec", when Dave Young split the
kexec code.  Now, we use file name as the output message prefix.

Currently, the format of output message:
[  140.290795] SYSC_kexec_load: hello, world
[  140.291534] kexec: sanity_check_segment_list: hello, world

Ideally, the format of output message:
[   30.791503] kexec: SYSC_kexec_load, Hello, world
[   79.182752] kexec_core: sanity_check_segment_list, Hello, world

Remove the custom prefix "kexec" in output message.

Signed-off-by: Minfei Huang <mnfhuang@gmail.com>
Acked-by: Dave Young <dyoung@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kexec.c      | 2 ++
 kernel/kexec_core.c | 4 ++--
 kernel/kexec_file.c | 2 ++
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kexec.c b/kernel/kexec.c
index 4c5edc3..d873b64 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -6,6 +6,8 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/capability.h>
 #include <linux/mm.h>
 #include <linux/file.h>
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index bd9f8a0..11b64a6 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -6,7 +6,7 @@
  * Version 2.  See the file COPYING for more details.
  */
 
-#define pr_fmt(fmt)	"kexec: " fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/capability.h>
 #include <linux/mm.h>
@@ -1027,7 +1027,7 @@ static int __init crash_notes_memory_init(void)
 
 	crash_notes = __alloc_percpu(size, align);
 	if (!crash_notes) {
-		pr_warn("Kexec: Memory allocation for saving cpu register states failed\n");
+		pr_warn("Memory allocation for saving cpu register states failed\n");
 		return -ENOMEM;
 	}
 	return 0;
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 6a9a3f2..b70ada0 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -9,6 +9,8 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/capability.h>
 #include <linux/mm.h>
 #include <linux/file.h>
-- 
cgit v1.1


From 8639b46139b0e4ea3b1ab1c274e410ee327f1d89 Mon Sep 17 00:00:00 2001
From: Ben Segall <bsegall@google.com>
Date: Fri, 6 Nov 2015 16:32:48 -0800
Subject: pidns: fix set/getpriority and ioprio_set/get in PRIO_USER mode

setpriority(PRIO_USER, 0, x) will change the priority of tasks outside of
the current pid namespace.  This is in contrast to both the other modes of
setpriority and the example of kill(-1).  Fix this.  getpriority and
ioprio have the same failure mode, fix them too.

Eric said:

: After some more thinking about it this patch sounds justifiable.
:
: My goal with namespaces is not to build perfect isolation mechanisms
: as that can get into ill defined territory, but to build well defined
: mechanisms.  And to handle the corner cases so you can use only
: a single namespace with well defined results.
:
: In this case you have found the two interfaces I am aware of that
: identify processes by uid instead of by pid.  Which quite frankly is
: weird.  Unfortunately the weird unexpected cases are hard to handle
: in the usual way.
:
: I was hoping for a little more information.  Changes like this one we
: have to be careful of because someone might be depending on the current
: behavior.  I don't think they are and I do think this make sense as part
: of the pid namespace.

Signed-off-by: Ben Segall <bsegall@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Ambrose Feinstein <ambrose@google.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sys.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sys.c b/kernel/sys.c
index fa2f2f6..6af9212 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -222,7 +222,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
 				goto out_unlock;	/* No processes for this user */
 		}
 		do_each_thread(g, p) {
-			if (uid_eq(task_uid(p), uid))
+			if (uid_eq(task_uid(p), uid) && task_pid_vnr(p))
 				error = set_one_prio(p, niceval, error);
 		} while_each_thread(g, p);
 		if (!uid_eq(uid, cred->uid))
@@ -290,7 +290,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
 				goto out_unlock;	/* No processes for this user */
 		}
 		do_each_thread(g, p) {
-			if (uid_eq(task_uid(p), uid)) {
+			if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) {
 				niceval = nice_to_rlimit(task_nice(p));
 				if (niceval > retval)
 					retval = niceval;
-- 
cgit v1.1


From 08d78658f393fefaa2e6507ea052c6f8ef4002a2 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 6 Nov 2015 16:32:58 -0800
Subject: panic: release stale console lock to always get the logbuf printed
 out

In some cases we may end up killing the CPU holding the console lock
while still having valuable data in logbuf. E.g. I'm observing the
following:

- A crash is happening on one CPU and console_unlock() is being called on
  some other.

- console_unlock() tries to print out the buffer before releasing the lock
  and on slow console it takes time.

- in the meanwhile crashing CPU does lots of printk()-s with valuable data
  (which go to the logbuf) and sends IPIs to all other CPUs.

- console_unlock() finishes printing previous chunk and enables interrupts
  before trying to print out the rest, the CPU catches the IPI and never
  releases console lock.

This is not the only possible case: in VT/fb subsystems we have many other
console_lock()/console_unlock() users.  Non-masked interrupts (or
receiving NMI in case of extreme slowness) will have the same result.
Getting the whole console buffer printed out on crash should be top
priority.

[akpm@linux-foundation.org: tweak comment text]
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Baoquan He <bhe@redhat.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Xie XiuQi <xiexiuqi@huawei.com>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/panic.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'kernel')

diff --git a/kernel/panic.c b/kernel/panic.c
index 04e91ff..4579dbb 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -23,6 +23,7 @@
 #include <linux/sysrq.h>
 #include <linux/init.h>
 #include <linux/nmi.h>
+#include <linux/console.h>
 
 #define PANIC_TIMER_STEP 100
 #define PANIC_BLINK_SPD 18
@@ -147,6 +148,15 @@ void panic(const char *fmt, ...)
 
 	bust_spinlocks(0);
 
+	/*
+	 * We may have ended up stopping the CPU holding the lock (in
+	 * smp_send_stop()) while still having some valuable data in the console
+	 * buffer.  Try to acquire the lock then release it regardless of the
+	 * result.  The release will also print the buffers out.
+	 */
+	console_trylock();
+	console_unlock();
+
 	if (!panic_blink)
 		panic_blink = no_blink;
 
-- 
cgit v1.1