From 7d669f50847481c52faf0656aea7b4be63113210 Mon Sep 17 00:00:00 2001
From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Date: Wed, 15 Jun 2016 17:23:45 -0500
Subject: kvm: svm: Fix implicit declaration for
 __default_cpu_present_to_apicid()

The commit 8221c1370056 ("svm: Manage vcpu load/unload when enable AVIC")
introduces a build error due to implicit function declaration
when #ifdef CONFIG_X86_32 and #ifndef CONFIG_X86_LOCAL_APIC
(as reported by Kbuild test robot i386-randconfig-x0-06121009).

So, this patch introduces kvm_cpu_get_apicid() wrapper
around __default_cpu_present_to_apicid() with additional
handling if CONFIG_X86_LOCAL_APIC is not defined.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Fixes: commit 8221c1370056 ("svm: Manage vcpu load/unload when enable AVIC")
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e0fbe7e..69e62862 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -27,6 +27,7 @@
 #include <linux/irqbypass.h>
 #include <linux/hyperv.h>
 
+#include <asm/apic.h>
 #include <asm/pvclock-abi.h>
 #include <asm/desc.h>
 #include <asm/mtrr.h>
@@ -1368,4 +1369,14 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
+static inline int kvm_cpu_get_apicid(int mps_cpu)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	return __default_cpu_present_to_apicid(mps_cpu);
+#else
+	WARN_ON_ONCE(1);
+	return BAD_APICID;
+#endif
+}
+
 #endif /* _ASM_X86_KVM_HOST_H */
-- 
cgit v1.1


From da01e18a37a57f360222d3a123b8f6994aa1ad14 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 23 Jun 2016 12:20:01 -0700
Subject: x86: avoid avoid passing around 'thread_info' in stack dumping code

None of the code actually wants a thread_info, it all wants a
task_struct, and it's just converting to a thread_info pointer much too
early.

No semantic change.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/stacktrace.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 7c247e7..0944218 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -14,7 +14,7 @@ extern int kstack_depth_to_print;
 struct thread_info;
 struct stacktrace_ops;
 
-typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo,
+typedef unsigned long (*walk_stack_t)(struct task_struct *task,
 				      unsigned long *stack,
 				      unsigned long bp,
 				      const struct stacktrace_ops *ops,
@@ -23,13 +23,13 @@ typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo,
 				      int *graph);
 
 extern unsigned long
-print_context_stack(struct thread_info *tinfo,
+print_context_stack(struct task_struct *task,
 		    unsigned long *stack, unsigned long bp,
 		    const struct stacktrace_ops *ops, void *data,
 		    unsigned long *end, int *graph);
 
 extern unsigned long
-print_context_stack_bp(struct thread_info *tinfo,
+print_context_stack_bp(struct task_struct *task,
 		       unsigned long *stack, unsigned long bp,
 		       const struct stacktrace_ops *ops, void *data,
 		       unsigned long *end, int *graph);
-- 
cgit v1.1


From aca9c293d098292579e345b2b39b394778d41526 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 24 Jun 2016 16:55:53 -0700
Subject: x86: fix up a few misc stack pointer vs thread_info confusions

As the actual pointer value is the same for the thread stack allocation
and the thread_info, code that confused the two worked fine, but will
break when the thread info is moved away from the stack allocation.  It
also looks very confusing.

For example, the kprobe code wanted to know the current top of stack.
To do that, it used this:

	(unsigned long)current_thread_info() + THREAD_SIZE

which did indeed give the correct value.  But it's not only a fairly
nonsensical expression, it's also rather complex, especially since we
actually have this:

	static inline unsigned long current_top_of_stack(void)

which not only gives us the value we are interested in, but happens to
be how "current_thread_info()" is currently defined as:

	(struct thread_info *)(current_top_of_stack() - THREAD_SIZE);

so using current_thread_info() to figure out the top of the stack really
is a very round-about thing to do.

The other cases are just simpler confusion about task_thread_info() vs
task_stack_page(), which currently return the same pointer - but if you
want the stack page, you really should be using the latter one.

And there was one entirely unused assignment of the current stack to a
thread_info pointer.

All cleaned up to make more sense today, and make it easier to move the
thread_info away from the stack in the future.

No semantic changes.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/kprobes.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 4421b5d..d1d1e50 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -38,12 +38,11 @@ typedef u8 kprobe_opcode_t;
 #define RELATIVECALL_OPCODE 0xe8
 #define RELATIVE_ADDR_SIZE 4
 #define MAX_STACK_SIZE 64
-#define MIN_STACK_SIZE(ADDR)					       \
-	(((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \
-			      THREAD_SIZE - (unsigned long)(ADDR)))    \
-	 ? (MAX_STACK_SIZE)					       \
-	 : (((unsigned long)current_thread_info()) +		       \
-	    THREAD_SIZE - (unsigned long)(ADDR)))
+#define CUR_STACK_SIZE(ADDR) \
+	(current_top_of_stack() - (unsigned long)(ADDR))
+#define MIN_STACK_SIZE(ADDR)				\
+	(MAX_STACK_SIZE < CUR_STACK_SIZE(ADDR) ?	\
+	 MAX_STACK_SIZE : CUR_STACK_SIZE(ADDR))
 
 #define flush_insn_slot(p)	do { } while (0)
 
-- 
cgit v1.1


From 32d6bd9059f265f617f6502c68dfbcae7e515add Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:48:47 -0700
Subject: tree wide: get rid of __GFP_REPEAT for order-0 allocations part I

This is the third version of the patchset previously sent [1].  I have
basically only rebased it on top of 4.7-rc1 tree and dropped "dm: get
rid of superfluous gfp flags" which went through dm tree.  I am sending
it now because it is tree wide and chances for conflicts are reduced
considerably when we want to target rc2.  I plan to send the next step
and rename the flag and move to a better semantic later during this
release cycle so we will have a new semantic ready for 4.8 merge window
hopefully.

Motivation:

While working on something unrelated I've checked the current usage of
__GFP_REPEAT in the tree.  It seems that a majority of the usage is and
always has been bogus because __GFP_REPEAT has always been about costly
high order allocations while we are using it for order-0 or very small
orders very often.  It seems that a big pile of them is just a
copy&paste when a code has been adopted from one arch to another.

I think it makes some sense to get rid of them because they are just
making the semantic more unclear.  Please note that GFP_REPEAT is
documented as

* __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt

* _might_ fail.  This depends upon the particular VM implementation.
  while !costly requests have basically nofail semantic.  So one could
  reasonably expect that order-0 request with __GFP_REPEAT will not loop
  for ever.  This is not implemented right now though.

I would like to move on with __GFP_REPEAT and define a better semantic
for it.

  $ git grep __GFP_REPEAT origin/master | wc -l
  111
  $ git grep __GFP_REPEAT | wc -l
  36

So we are down to the third after this patch series.  The remaining
places really seem to be relying on __GFP_REPEAT due to large allocation
requests.  This still needs some double checking which I will do later
after all the simple ones are sorted out.

I am touching a lot of arch specific code here and I hope I got it right
but as a matter of fact I even didn't compile test for some archs as I
do not have cross compiler for them.  Patches should be quite trivial to
review for stupid compile mistakes though.  The tricky parts are usually
hidden by macro definitions and thats where I would appreciate help from
arch maintainers.

[1] http://lkml.kernel.org/r/1461849846-27209-1-git-send-email-mhocko@kernel.org

This patch (of 19):

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.  Yet we
have the full kernel tree with its usage for apparently order-0
allocations.  This is really confusing because __GFP_REPEAT is
explicitly documented to allow allocation failures which is a weaker
semantic than the current order-0 has (basically nofail).

Let's simply drop __GFP_REPEAT from those places.  This would allow to
identify place which really need allocator to retry harder and formulate
a more specific semantic for what the flag is supposed to do actually.

Link: http://lkml.kernel.org/r/1464599699-30131-2-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen Liqin <liqin.linux@gmail.com>
Cc: Chris Metcalf <cmetcalf@mellanox.com> [for tile]
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: John Crispin <blogic@openwrt.org>
Cc: Lennox Wu <lennox.wu@gmail.com>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/pgalloc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index bf7f8b5..574c23c 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -81,7 +81,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	struct page *page;
-	page = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0);
+	page = alloc_pages(GFP_KERNEL |  __GFP_ZERO, 0);
 	if (!page)
 		return NULL;
 	if (!pgtable_pmd_page_ctor(page)) {
@@ -125,7 +125,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+	return (pud_t *)get_zeroed_page(GFP_KERNEL);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-- 
cgit v1.1


From 749d088b8e7f4b9826ede02b9a043e417fa84aa1 Mon Sep 17 00:00:00 2001
From: Minfei Huang <mnghuan@gmail.com>
Date: Fri, 27 May 2016 14:17:10 +0800
Subject: pvclock: Add CPU barriers to get correct version value

Protocol for the "version" fields is: hypervisor raises it (making it
uneven) before it starts updating the fields and raises it again (making
it even) when it is done.  Thus the guest can make sure the time values
it got are consistent by checking the version before and after reading
them.

Add CPU barries after getting version value just like what function
vread_pvclock does, because all of callees in this function is inline.

Fixes: 502dfeff239e8313bfbe906ca0a1a6827ac8481b
Cc: stable@vger.kernel.org
Signed-off-by: Minfei Huang <mnghuan@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/pvclock.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index fdcc040..538ae94 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -85,6 +85,8 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
 	u8 ret_flags;
 
 	version = src->version;
+	/* Make the latest version visible */
+	smp_rmb();
 
 	offset = pvclock_get_nsec_offset(src);
 	ret = src->system_time + offset;
-- 
cgit v1.1


From f7550d076d55c1b5f02f95012e3a5a84d264c47d Mon Sep 17 00:00:00 2001
From: Minfei Huang <mnghuan@gmail.com>
Date: Fri, 27 May 2016 14:17:11 +0800
Subject: pvclock: Cleanup to remove function pvclock_get_nsec_offset

Function __pvclock_read_cycles is short enough, so there is no need to
have another function pvclock_get_nsec_offset to calculate tsc delta.
It's better to combine it into function __pvclock_read_cycles.

Remove useless variables in function __pvclock_read_cycles.

Signed-off-by: Minfei Huang <mnghuan@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/pvclock.h | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 538ae94..7c1c895 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -69,31 +69,22 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
 }
 
 static __always_inline
-u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src)
-{
-	u64 delta = rdtsc_ordered() - src->tsc_timestamp;
-	return pvclock_scale_delta(delta, src->tsc_to_system_mul,
-				   src->tsc_shift);
-}
-
-static __always_inline
 unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
 			       cycle_t *cycles, u8 *flags)
 {
 	unsigned version;
-	cycle_t ret, offset;
-	u8 ret_flags;
+	cycle_t offset;
+	u64 delta;
 
 	version = src->version;
 	/* Make the latest version visible */
 	smp_rmb();
 
-	offset = pvclock_get_nsec_offset(src);
-	ret = src->system_time + offset;
-	ret_flags = src->flags;
-
-	*cycles = ret;
-	*flags = ret_flags;
+	delta = rdtsc_ordered() - src->tsc_timestamp;
+	offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+				   src->tsc_shift);
+	*cycles = src->system_time + offset;
+	*flags = src->flags;
 	return version;
 }
 
-- 
cgit v1.1