From 354dbaa7ff5b53a0ed1c0f7a9773d5953b3a1bb9 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 8 Oct 2015 18:56:26 +0300
Subject: x86/cpu/intel: Enable X86_FEATURE_NONSTOP_TSC_S3 for Merrifield

The Intel Merrifield SoC is a successor of the Intel MID line of
SoCs. Let's set the neccessary capability for that chip. See commit
c54fdbb2823d (x86: Add cpu capability flag X86_FEATURE_NONSTOP_TSC_S3)
for the details.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: http://lkml.kernel.org/r/1444319786-36125-1-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/intel.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 98a13db..209ac1e 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -97,6 +97,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 		switch (c->x86_model) {
 		case 0x27:	/* Penwell */
 		case 0x35:	/* Cloverview */
+		case 0x4a:	/* Merrifield */
 			set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
 			break;
 		default:
-- 
cgit v1.1


From 8c058b0b9c34d8c8d7912880956543769323e2d8 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 3 Nov 2015 10:40:14 +0100
Subject: x86/irq: Probe for PIC presence before allocating descs for legacy
 IRQs

Commit d32932d02e18 ("x86/irq: Convert IOAPIC to use hierarchical irqdomain
interfaces") brought a regression for Hyper-V Gen2 instances. These
instances don't have i8259 legacy PIC but they use legacy IRQs for serial
port, rtc, and acpi. With this commit included we end up with these IRQs
not initialized. Earlier, there was a special workaround for legacy IRQs
in mp_map_pin_to_irq() doing mp_irqdomain_map() without looking at
nr_legacy_irqs() and now we fail in __irq_domain_alloc_irqs() when
irq_domain_alloc_descs() returns -EEXIST.

The essence of the issue seems to be that early_irq_init() calls
arch_probe_nr_irqs() to figure out the number of legacy IRQs before
we probe for i8259 and gets 16. Later when init_8259A() is called we switch
to NULL legacy PIC and nr_legacy_irqs() starts to return 0 but we already
have 16 descs allocated.

Solve the issue by separating i8259 probe from init and calling it in
arch_probe_nr_irqs() before we actually use nr_legacy_irqs() information.

Fixes: d32932d02e18 ("x86/irq: Convert IOAPIC to use hierarchical irqdomain interfaces")
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1446543614-3621-1-git-send-email-vkuznets@redhat.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/i8259.h  |  1 +
 arch/x86/kernel/apic/vector.c |  6 +++++-
 arch/x86/kernel/i8259.c       | 29 +++++++++++++++++++++--------
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index ccffa53..39bcefc 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -60,6 +60,7 @@ struct legacy_pic {
 	void (*mask_all)(void);
 	void (*restore_mask)(void);
 	void (*init)(int auto_eoi);
+	int (*probe)(void);
 	int (*irq_pending)(unsigned int irq);
 	void (*make_irq)(unsigned int irq);
 };
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 836d11b..861bc59 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -361,7 +361,11 @@ int __init arch_probe_nr_irqs(void)
 	if (nr < nr_irqs)
 		nr_irqs = nr;
 
-	return nr_legacy_irqs();
+	/*
+	 * We don't know if PIC is present at this point so we need to do
+	 * probe() to get the right number of legacy IRQs.
+	 */
+	return legacy_pic->probe();
 }
 
 #ifdef	CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 16cb827..be22f5a 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -295,16 +295,11 @@ static void unmask_8259A(void)
 	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
 }
 
-static void init_8259A(int auto_eoi)
+static int probe_8259A(void)
 {
 	unsigned long flags;
 	unsigned char probe_val = ~(1 << PIC_CASCADE_IR);
 	unsigned char new_val;
-
-	i8259A_auto_eoi = auto_eoi;
-
-	raw_spin_lock_irqsave(&i8259A_lock, flags);
-
 	/*
 	 * Check to see if we have a PIC.
 	 * Mask all except the cascade and read
@@ -312,16 +307,28 @@ static void init_8259A(int auto_eoi)
 	 * have a PIC, we will read 0xff as opposed to the
 	 * value we wrote.
 	 */
+	raw_spin_lock_irqsave(&i8259A_lock, flags);
+
 	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
 	outb(probe_val, PIC_MASTER_IMR);
 	new_val = inb(PIC_MASTER_IMR);
 	if (new_val != probe_val) {
 		printk(KERN_INFO "Using NULL legacy PIC\n");
 		legacy_pic = &null_legacy_pic;
-		raw_spin_unlock_irqrestore(&i8259A_lock, flags);
-		return;
 	}
 
+	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
+	return nr_legacy_irqs();
+}
+
+static void init_8259A(int auto_eoi)
+{
+	unsigned long flags;
+
+	i8259A_auto_eoi = auto_eoi;
+
+	raw_spin_lock_irqsave(&i8259A_lock, flags);
+
 	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
 
 	/*
@@ -379,6 +386,10 @@ static int legacy_pic_irq_pending_noop(unsigned int irq)
 {
 	return 0;
 }
+static int legacy_pic_probe(void)
+{
+	return 0;
+}
 
 struct legacy_pic null_legacy_pic = {
 	.nr_legacy_irqs = 0,
@@ -388,6 +399,7 @@ struct legacy_pic null_legacy_pic = {
 	.mask_all = legacy_pic_noop,
 	.restore_mask = legacy_pic_noop,
 	.init = legacy_pic_int_noop,
+	.probe = legacy_pic_probe,
 	.irq_pending = legacy_pic_irq_pending_noop,
 	.make_irq = legacy_pic_uint_noop,
 };
@@ -400,6 +412,7 @@ struct legacy_pic default_legacy_pic = {
 	.mask_all = mask_8259A,
 	.restore_mask = unmask_8259A,
 	.init = init_8259A,
+	.probe = probe_8259A,
 	.irq_pending = i8259A_irq_pending,
 	.make_irq = make_8259A_irq,
 };
-- 
cgit v1.1


From 3849e91f571dcb48cf2c8143480c59137d44d6bc Mon Sep 17 00:00:00 2001
From: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
Date: Wed, 4 Nov 2015 12:49:42 +0100
Subject: x86/AMD: Fix last level cache topology for AMD Fam17h systems

On AMD Fam17h systems, the last level cache is not resident in the
northbridge. Therefore, we cannot assign cpu_llc_id to the same value as
Node ID as we have been doing until now.

We should rather look at the ApicID bits of the core to provide us the
last level cache ID info.

Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huang Rui <ray.huang@amd.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jacob Shin <jacob.w.shin@gmail.com>
Link: http://lkml.kernel.org/r/1446582899-9378-1-git-send-email-Aravind.Gopalakrishnan@amd.com
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/amd.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 4a70fc6..a8816b3 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -352,6 +352,7 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
 #ifdef CONFIG_SMP
 	unsigned bits;
 	int cpu = smp_processor_id();
+	unsigned int socket_id, core_complex_id;
 
 	bits = c->x86_coreid_bits;
 	/* Low order bits define the core id (index of core in socket) */
@@ -361,6 +362,18 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
 	/* use socket ID also for last level cache */
 	per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
 	amd_get_topology(c);
+
+	/*
+	 * Fix percpu cpu_llc_id here as LLC topology is different
+	 * for Fam17h systems.
+	 */
+	 if (c->x86 != 0x17 || !cpuid_edx(0x80000006))
+		return;
+
+	socket_id	= (c->apicid >> bits) - 1;
+	core_complex_id	= (c->apicid & ((1 << bits) - 1)) >> 3;
+
+	per_cpu(cpu_llc_id, cpu) = (socket_id << 3) | core_complex_id;
 #endif
 }
 
-- 
cgit v1.1


From f4e342c87776884f0309942a3880ca7e835239f9 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Thu, 5 Nov 2015 13:56:35 -0500
Subject: x86/mm: Skip the hypervisor range when walking PGD

The range between 0xffff800000000000 and 0xffff87ffffffffff is reserved
for hypervisor and therefore we should not try to follow PGD's indexes
corresponding to those addresses.

While this has always been a problem, with the new W+X warning
mechanism ptdump_walk_pgd_level_core() can now be called during boot,
causing a PV Xen guest to crash.

[ tglx: Replaced the macro with a readable inline ]

Fixes: e1a58320a38d "x86/mm: Warn on W^X mappings"
Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: xen-devel@lists.xen.org
Link: http://lkml.kernel.org/r/1446749795-27764-1-git-send-email-boris.ostrovsky@oracle.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/dump_pagetables.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 1bf417e..a035c2a 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -358,6 +358,21 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
 #define pgd_none(a)  pud_none(__pud(pgd_val(a)))
 #endif
 
+#ifdef CONFIG_X86_64
+static inline bool is_hypervisor_range(int idx)
+{
+	/*
+	 * ffff800000000000 - ffff87ffffffffff is reserved for
+	 * the hypervisor.
+	 */
+	return paravirt_enabled() &&
+		(idx >= pgd_index(__PAGE_OFFSET) - 16) &&
+		(idx < pgd_index(__PAGE_OFFSET));
+}
+#else
+static inline bool is_hypervisor_range(int idx) { return false; }
+#endif
+
 static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
 				       bool checkwx)
 {
@@ -381,7 +396,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
 
 	for (i = 0; i < PTRS_PER_PGD; i++) {
 		st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
-		if (!pgd_none(*start)) {
+		if (!pgd_none(*start) && !is_hypervisor_range(i)) {
 			if (pgd_large(*start) || !pgd_present(*start)) {
 				prot = pgd_flags(*start);
 				note_page(m, &st, __pgprot(prot), 1);
-- 
cgit v1.1


From 68accac392d859d24adcf1be3a90e41f978bd54c Mon Sep 17 00:00:00 2001
From: Krzysztof Mazur <krzysiek@podlesie.net>
Date: Fri, 6 Nov 2015 14:18:36 +0100
Subject: x86/setup: Fix low identity map for >= 2GB kernel range

The commit f5f3497cad8c extended the low identity mapping. However, if
the kernel uses more than 2 GB (VMSPLIT_2G_OPT or VMSPLIT_1G memory
split), the normal memory mapping is overwritten by the low identity
mapping causing a crash. To avoid overwritting, limit the low identity
map to cover only memory before kernel range (PAGE_OFFSET).

Fixes: f5f3497cad8c "x86/setup: Extend low identity map to cover whole kernel range
Signed-off-by: Krzysztof Mazur <krzysiek@podlesie.net>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Matt Fleming <matt.fleming@intel.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1446815916-22105-1-git-send-email-krzysiek@podlesie.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a1e4da9..29db25f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1188,7 +1188,7 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	clone_pgd_range(initial_page_table,
 			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-			KERNEL_PGD_PTRS);
+			min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
 #endif
 
 	tboot_probe();
-- 
cgit v1.1


From 04633df0c43d710e5f696b06539c100898678235 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 5 Nov 2015 16:57:56 +0100
Subject: x86/cpu: Call verify_cpu() after having entered long mode too

When we get loaded by a 64-bit bootloader, kernel entry point is
startup_64 in head_64.S. We don't trust any and all bootloaders because
some will fiddle with CPU configuration so we go ahead and massage each
CPU into sanity again.

For example, some dell BIOSes have this XD disable feature which set
IA32_MISC_ENABLE[34] and disable NX. This might be some dumb workaround
for other OSes but Linux sure doesn't need it.

A similar thing is present in the Surface 3 firmware - see
https://bugzilla.kernel.org/show_bug.cgi?id=106051 - which sets this bit
only on the BSP:

  # rdmsr -a 0x1a0
  400850089
  850089
  850089
  850089

I know, right?!

There's not even an off switch in there.

So fix all those cases by sanitizing the 64-bit entry point too. For
that, make verify_cpu() callable in 64-bit mode also.

Requested-and-debugged-by: "H. Peter Anvin" <hpa@zytor.com>
Reported-and-tested-by: Bastien Nocera <bugzilla@hadess.net>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1446739076-21303-1-git-send-email-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/head_64.S    |  8 ++++++++
 arch/x86/kernel/verify_cpu.S | 12 +++++++-----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 1d40ca8..ffdc0e8 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -65,6 +65,9 @@ startup_64:
 	 * tables and then reload them.
 	 */
 
+	/* Sanitize CPU configuration */
+	call verify_cpu
+
 	/*
 	 * Compute the delta between the address I am compiled to run at and the
 	 * address I am actually running at.
@@ -174,6 +177,9 @@ ENTRY(secondary_startup_64)
 	 * after the boot processor executes this code.
 	 */
 
+	/* Sanitize CPU configuration */
+	call verify_cpu
+
 	movq	$(init_level4_pgt - __START_KERNEL_map), %rax
 1:
 
@@ -288,6 +294,8 @@ ENTRY(secondary_startup_64)
 	pushq	%rax		# target address in negative space
 	lretq
 
+#include "verify_cpu.S"
+
 #ifdef CONFIG_HOTPLUG_CPU
 /*
  * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index b9242ba..4cf401f 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -34,10 +34,11 @@
 #include <asm/msr-index.h>
 
 verify_cpu:
-	pushfl				# Save caller passed flags
-	pushl	$0			# Kill any dangerous flags
-	popfl
+	pushf				# Save caller passed flags
+	push	$0			# Kill any dangerous flags
+	popf
 
+#ifndef __x86_64__
 	pushfl				# standard way to check for cpuid
 	popl	%eax
 	movl	%eax,%ebx
@@ -48,6 +49,7 @@ verify_cpu:
 	popl	%eax
 	cmpl	%eax,%ebx
 	jz	verify_cpu_no_longmode	# cpu has no cpuid
+#endif
 
 	movl	$0x0,%eax		# See if cpuid 1 is implemented
 	cpuid
@@ -130,10 +132,10 @@ verify_cpu_sse_test:
 	jmp	verify_cpu_sse_test	# try again
 
 verify_cpu_no_longmode:
-	popfl				# Restore caller passed flags
+	popf				# Restore caller passed flags
 	movl $1,%eax
 	ret
 verify_cpu_sse_ok:
-	popfl				# Restore caller passed flags
+	popf				# Restore caller passed flags
 	xorl %eax, %eax
 	ret
-- 
cgit v1.1