From b568b8601f05a591a7ff09d8ee1cedb5b2e815fe Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Tue, 20 Jan 2015 10:21:05 +0800
Subject: x86/xen: Treat SCI interrupt as normal GSI interrupt

Currently Xen Domain0 has special treatment for ACPI SCI interrupt,
that is initialize irq for ACPI SCI at early stage in a special way as:
xen_init_IRQ()
	->pci_xen_initial_domain()
		->xen_setup_acpi_sci()
			Allocate and initialize irq for ACPI SCI

Function xen_setup_acpi_sci() calls acpi_gsi_to_irq() to get an irq
number for ACPI SCI. But unfortunately acpi_gsi_to_irq() depends on
IOAPIC irqdomains through following path
acpi_gsi_to_irq()
	->mp_map_gsi_to_irq()
		->mp_map_pin_to_irq()
			->check IOAPIC irqdomain

For PV domains, it uses Xen event based interrupt manangement and
doesn't make uses of native IOAPIC, so no irqdomains created for IOAPIC.
This causes Xen domain0 fail to install interrupt handler for ACPI SCI
and all ACPI events will be lost. Please refer to:
https://lkml.org/lkml/2014/12/19/178

So the fix is to get rid of special treatment for ACPI SCI, just treat
ACPI SCI as normal GSI interrupt as:
acpi_gsi_to_irq()
	->acpi_register_gsi()
		->acpi_register_gsi_xen()
			->xen_register_gsi()

With above change, there's no need for xen_setup_acpi_sci() anymore.
The above change also works with bare metal kernel too.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Tested-by: Sander Eikelenboom <linux@eikelenboom.it>
Cc: Tony Luck <tony.luck@intel.com>
Cc: xen-devel@lists.xenproject.org
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Len Brown <len.brown@intel.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Link: http://lkml.kernel.org/r/1421720467-7709-2-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/acpi/boot.c | 26 ++++++++++++-------------
 arch/x86/pci/xen.c          | 47 ---------------------------------------------
 2 files changed, 13 insertions(+), 60 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d162636..b9e30da 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -611,20 +611,20 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
 
 int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
 {
-	int irq;
-
-	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
-		*irqp = gsi;
-	} else {
-		mutex_lock(&acpi_ioapic_lock);
-		irq = mp_map_gsi_to_irq(gsi,
-					IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK);
-		mutex_unlock(&acpi_ioapic_lock);
-		if (irq < 0)
-			return -1;
-		*irqp = irq;
+	int rc, irq, trigger, polarity;
+
+	rc = acpi_get_override_irq(gsi, &trigger, &polarity);
+	if (rc == 0) {
+		trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
+		polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
+		irq = acpi_register_gsi(NULL, gsi, trigger, polarity);
+		if (irq >= 0) {
+			*irqp = irq;
+			return 0;
+		}
 	}
-	return 0;
+
+	return -1;
 }
 EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
 
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index c489ef2..6e5e89c 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -471,52 +471,6 @@ int __init pci_xen_hvm_init(void)
 }
 
 #ifdef CONFIG_XEN_DOM0
-static __init void xen_setup_acpi_sci(void)
-{
-	int rc;
-	int trigger, polarity;
-	int gsi = acpi_sci_override_gsi;
-	int irq = -1;
-	int gsi_override = -1;
-
-	if (!gsi)
-		return;
-
-	rc = acpi_get_override_irq(gsi, &trigger, &polarity);
-	if (rc) {
-		printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi"
-				" sci, rc=%d\n", rc);
-		return;
-	}
-	trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
-	polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
-
-	printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d "
-			"polarity=%d\n", gsi, trigger, polarity);
-
-	/* Before we bind the GSI to a Linux IRQ, check whether
-	 * we need to override it with bus_irq (IRQ) value. Usually for
-	 * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so:
-	 *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level)
-	 * but there are oddballs where the IRQ != GSI:
-	 *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level)
-	 * which ends up being: gsi_to_irq[9] == 20
-	 * (which is what acpi_gsi_to_irq ends up calling when starting the
-	 * the ACPI interpreter and keels over since IRQ 9 has not been
-	 * setup as we had setup IRQ 20 for it).
-	 */
-	if (acpi_gsi_to_irq(gsi, &irq) == 0) {
-		/* Use the provided value if it's valid. */
-		if (irq >= 0)
-			gsi_override = irq;
-	}
-
-	gsi = xen_register_gsi(gsi, gsi_override, trigger, polarity);
-	printk(KERN_INFO "xen: acpi sci %d\n", gsi);
-
-	return;
-}
-
 int __init pci_xen_initial_domain(void)
 {
 	int irq;
@@ -527,7 +481,6 @@ int __init pci_xen_initial_domain(void)
 	x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
 	pci_msi_ignore_mask = 1;
 #endif
-	xen_setup_acpi_sci();
 	__acpi_register_gsi = acpi_register_gsi_xen;
 	/* Pre-allocate legacy irqs */
 	for (irq = 0; irq < nr_legacy_irqs(); irq++) {
-- 
cgit v1.1


From 9889eaeb7c999cae64006bb98c47f40f412ec875 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Tue, 20 Jan 2015 10:21:06 +0800
Subject: ACPI: pci: Do not clear pci_dev->irq in acpi_pci_irq_disable()

Xen pciback driver assumes that pci_dev->irq won't change after calling
pci_disable_device(). But commit cffe0a2b5a34c95a4dadc9ec7132690a5b0f6687
("x86, irq: Keep balance of IOAPIC pin reference count") frees irq
resources and resets pci_dev->irq to zero when pci_disable_device() is
called.

So this is a hotfix for 3.19 to avoid resetting pci_dev->irq, and
another proper fix will be prepared for next merging window.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Tested-by: Sander Eikelenboom <linux@eikelenboom.it>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Len Brown <lenb@kernel.org>
Link: http://lkml.kernel.org/r/1421720467-7709-3-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/acpi/pci_irq.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 5277a0e..b1def41 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -512,7 +512,6 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
 	dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin));
 	if (gsi >= 0) {
 		acpi_unregister_gsi(gsi);
-		dev->irq = 0;
 		dev->irq_managed = 0;
 	}
 }
-- 
cgit v1.1


From 8abb850a03a3a8b11a0e92949e5b99d9cc178e35 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Tue, 20 Jan 2015 10:21:07 +0800
Subject: x86/xen: Override ACPI IRQ management callback __acpi_unregister_gsi

Xen overrides __acpi_register_gsi and leaves __acpi_unregister_gsi as is.
That means, an IRQ allocated by acpi_register_gsi_xen_hvm() or
acpi_register_gsi_xen() will be freed by acpi_unregister_gsi_ioapic(),
which may cause undesired effects. So override __acpi_unregister_gsi to
NULL for safety.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Tested-by: Sander Eikelenboom <linux@eikelenboom.it>
Cc: Tony Luck <tony.luck@intel.com>
Cc: xen-devel@lists.xenproject.org
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Graeme Gregory <graeme.gregory@linaro.org>
Cc: Lv Zheng <lv.zheng@intel.com>
Link: http://lkml.kernel.org/r/1421720467-7709-4-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/acpi.h | 1 +
 arch/x86/pci/xen.c          | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 0ab4f9f..3a45668 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -50,6 +50,7 @@ void acpi_pic_sci_set_trigger(unsigned int, u16);
 
 extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
 				  int trigger, int polarity);
+extern void (*__acpi_unregister_gsi)(u32 gsi);
 
 static inline void disable_acpi(void)
 {
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 6e5e89c..9098d88 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -458,6 +458,7 @@ int __init pci_xen_hvm_init(void)
 	 * just how GSIs get registered.
 	 */
 	__acpi_register_gsi = acpi_register_gsi_xen_hvm;
+	__acpi_unregister_gsi = NULL;
 #endif
 
 #ifdef CONFIG_PCI_MSI
@@ -482,6 +483,7 @@ int __init pci_xen_initial_domain(void)
 	pci_msi_ignore_mask = 1;
 #endif
 	__acpi_register_gsi = acpi_register_gsi_xen;
+	__acpi_unregister_gsi = NULL;
 	/* Pre-allocate legacy irqs */
 	for (irq = 0; irq < nr_legacy_irqs(); irq++) {
 		int trigger, polarity;
-- 
cgit v1.1


From f285f4a21c3253887caceed493089ece17579d59 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 15 Jan 2015 16:51:46 -0800
Subject: x86, boot: Skip relocs when load address unchanged

On 64-bit, relocation is not required unless the load address gets
changed. Without this, relocations do unexpected things when the kernel
is above 4G.

Reported-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Tested-by: Thomas D. <whissi@whissi.de>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Junjie Mao <eternal.n08@gmail.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20150116005146.GA4212@www.outflux.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/compressed/misc.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index dcc1c53..a950864 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -373,6 +373,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
 				  unsigned long output_len,
 				  unsigned long run_size)
 {
+	unsigned char *output_orig = output;
+
 	real_mode = rmode;
 
 	sanitize_boot_params(real_mode);
@@ -421,7 +423,12 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
 	debug_putstr("\nDecompressing Linux... ");
 	decompress(input_data, input_len, NULL, NULL, output, NULL, error);
 	parse_elf(output);
-	handle_relocations(output, output_len);
+	/*
+	 * 32-bit always performs relocations. 64-bit relocations are only
+	 * needed if kASLR has chosen a different load address.
+	 */
+	if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig)
+		handle_relocations(output, output_len);
 	debug_putstr("done.\nBooting the kernel.\n");
 	return output;
 }
-- 
cgit v1.1


From 4a0d3107d6b19125f21172c2b7d95f9c30ecaf6f Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Fri, 16 Jan 2015 15:47:07 +0000
Subject: x86, irq: Properly tag virtualization entry in /proc/interrupts

The mis-naming likely was a copy-and-paste effect.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/54B9408B0200007800055E8B@mail.emea.novell.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 6307a0f..705ef8d 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -127,7 +127,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	seq_puts(p, "  Machine check polls\n");
 #endif
 #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
-	seq_printf(p, "%*s: ", prec, "THR");
+	seq_printf(p, "%*s: ", prec, "HYP");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count);
 	seq_puts(p, "  Hypervisor callback interrupts\n");
-- 
cgit v1.1


From 9d34cfdf47963905d792ae9c000efa522739abe4 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Mon, 12 Jan 2015 06:15:45 +0100
Subject: x86: Don't rely on VMWare emulating PAT MSR correctly

VMWare seems not to emulate the PAT MSR correctly: reaeding
MSR_IA32_CR_PAT returns 0 even after writing another value to it.

Commit bd809af16e3ab triggers this VMWare bug when the kernel is
booted as a VMWare guest.

Detect this bug and don't use the read value if it is 0.

Fixes: bd809af16e3ab "x86: Enable PAT to use cache mode translation tables"
Reported-and-tested-by: Jongman Heo <jongman.heo@samsung.com>
Acked-by: Alok N Kataria <akataria@vmware.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Link: http://lkml.kernel.org/r/1421039745-14335-1-git-send-email-jgross@suse.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pat.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index edf299c..7ac6869 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -234,8 +234,13 @@ void pat_init(void)
 	      PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
 
 	/* Boot CPU check */
-	if (!boot_pat_state)
+	if (!boot_pat_state) {
 		rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
+		if (!boot_pat_state) {
+			pat_disable("PAT read returns always zero, disabled.");
+			return;
+		}
+	}
 
 	wrmsrl(MSR_IA32_CR_PAT, pat);
 
-- 
cgit v1.1


From 32c6590d126836a062b3140ed52d898507987017 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Mon, 12 Jan 2015 16:26:02 -0800
Subject: x86, hyperv: Mark the Hyper-V clocksource as being continuous

The Hyper-V clocksource is continuous; mark it accordingly.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Acked-by: jasowang@redhat.com
Cc: gregkh@linuxfoundation.org
Cc: devel@linuxdriverproject.org
Cc: olaf@aepfle.de
Cc: apw@canonical.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1421108762-3331-1-git-send-email-kys@microsoft.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mshyperv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index a450373..939155f 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -107,6 +107,7 @@ static struct clocksource hyperv_cs = {
 	.rating		= 400, /* use this when running on Hyperv*/
 	.read		= read_hv_clock,
 	.mask		= CLOCKSOURCE_MASK(64),
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 static void __init ms_hyperv_init_platform(void)
-- 
cgit v1.1


From 814564a0a1d2faee11ff9de43245d78cb79c85ac Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 8 Jan 2015 14:30:20 -0800
Subject: x86, mpx: Explicitly disable 32-bit MPX support on 64-bit kernels

We had originally planned on submitting MPX support in one patch
set.  We eventually broke it up in to two pieces for easier
review.  One of the features that didn't make the first round
was supporting 32-bit binaries on 64-bit kernels.

Once we split the set up, we never added code to restrict 32-bit
binaries from _using_ MPX on 64-bit kernels.

The 32-bit bounds tables are a different format than the 64-bit
ones.  Without this patch, the kernel will try to read a 32-bit
binary's tables as if they were the 64-bit version.  They will
likely be noticed as being invalid rather quickly and the app
will get killed, but that's kinda mean.

This patch adds an explicit check, and will make a 64-bit kernel
essentially behave as if it has no MPX support when called from
a 32-bit binary.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Hansen <dave@sr71.net>
Link: http://lkml.kernel.org/r/20150108223020.9E9AA511@viggo.jf.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/mpx.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 67ebf57..c439ec4 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -349,6 +349,12 @@ static __user void *task_get_bounds_dir(struct task_struct *tsk)
 		return MPX_INVALID_BOUNDS_DIR;
 
 	/*
+	 * 32-bit binaries on 64-bit kernels are currently
+	 * unsupported.
+	 */
+	if (IS_ENABLED(CONFIG_X86_64) && test_thread_flag(TIF_IA32))
+		return MPX_INVALID_BOUNDS_DIR;
+	/*
 	 * The bounds directory pointer is stored in a register
 	 * only accessible if we first do an xsave.
 	 */
-- 
cgit v1.1


From c922228efeeefa32e57f875764bfa6ca8053a68a Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 8 Jan 2015 14:30:21 -0800
Subject: x86, mpx: Fix potential performance issue on unmaps

The 3.19 merge window saw some TLB modifications merged which caused a
performance regression. They were fixed in commit 045bbb9fa.

Once that fix was applied, I also noticed that there was a small
but intermittent regression still present.  It was not present
consistently enough to bisect reliably, but I'm fairly confident
that it came from (my own) MPX patches.  The source was reading
a relatively unused field in the mm_struct via arch_unmap.

I also noted that this code was in the main instruction flow of
do_munmap() and probably had more icache impact than we want.

This patch does two things:
1. Adds a static (via Kconfig) and dynamic (via cpuid) check
   for MPX with cpu_feature_enabled().  This keeps us from
   reading that cacheline in the mm and trades it for a check
   of the global CPUID variables at least on CPUs without MPX.
2. Adds an unlikely() to ensure that the MPX call ends up out
   of the main instruction flow in do_munmap().  I've added
   a detailed comment about why this was done and why we want
   it even on systems where MPX is present.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: luto@amacapital.net
Cc: Dave Hansen <dave@sr71.net>
Link: http://lkml.kernel.org/r/20150108223021.AEEAB987@viggo.jf.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/mmu_context.h | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 40269a2..4b75d59 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -130,7 +130,25 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
 static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
 			      unsigned long start, unsigned long end)
 {
-	mpx_notify_unmap(mm, vma, start, end);
+	/*
+	 * mpx_notify_unmap() goes and reads a rarely-hot
+	 * cacheline in the mm_struct.  That can be expensive
+	 * enough to be seen in profiles.
+	 *
+	 * The mpx_notify_unmap() call and its contents have been
+	 * observed to affect munmap() performance on hardware
+	 * where MPX is not present.
+	 *
+	 * The unlikely() optimizes for the fast case: no MPX
+	 * in the CPU, or no MPX use in the process.  Even if
+	 * we get this wrong (in the unlikely event that MPX
+	 * is widely enabled on some system) the overhead of
+	 * MPX itself (reading bounds tables) is expected to
+	 * overwhelm the overhead of getting this unlikely()
+	 * consistently wrong.
+	 */
+	if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
+		mpx_notify_unmap(mm, vma, start, end);
 }
 
 #endif /* _ASM_X86_MMU_CONTEXT_H */
-- 
cgit v1.1


From e9d1b4f3c60997fe197bf0243cb4a41a44387a88 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 8 Jan 2015 14:30:22 -0800
Subject: x86, mpx: Strictly enforce empty prctl() args

Description from Michael Kerrisk.  He suggested an identical patch
to one I had already coded up and tested.

commit fe3d197f8431 "x86, mpx: On-demand kernel allocation of bounds
tables" added two new prctl() operations, PR_MPX_ENABLE_MANAGEMENT and
PR_MPX_DISABLE_MANAGEMENT.  However, no checks were included to ensure
that unused arguments are zero, as is done in many existing prctl()s
and as should be done for all new prctl()s. This patch adds the
required checks.

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Suggested-by: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Hansen <dave@sr71.net>
Link: http://lkml.kernel.org/r/20150108223022.7F56FD13@viggo.jf.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sys.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/sys.c b/kernel/sys.c
index a8c9f5a..ea9c881 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2210,9 +2210,13 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		up_write(&me->mm->mmap_sem);
 		break;
 	case PR_MPX_ENABLE_MANAGEMENT:
+		if (arg2 || arg3 || arg4 || arg5)
+			return -EINVAL;
 		error = MPX_ENABLE_MANAGEMENT(me);
 		break;
 	case PR_MPX_DISABLE_MANAGEMENT:
+		if (arg2 || arg3 || arg4 || arg5)
+			return -EINVAL;
 		error = MPX_DISABLE_MANAGEMENT(me);
 		break;
 	default:
-- 
cgit v1.1


From e30ab185c490e9a9381385529e0fd32f0a399495 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Thu, 22 Jan 2015 11:27:58 -0800
Subject: x86, tls, ldt: Stop checking lm in LDT_empty

32-bit programs don't have an lm bit in their ABI, so they can't
reliably cause LDT_empty to return true without resorting to memset.
They shouldn't need to do this.

This should fix a longstanding, if minor, issue in all 64-bit kernels
as well as a potential regression in the TLS hardening code.

Fixes: 41bdc78544b8 x86/tls: Validate TLS entries to protect espfix
Cc: stable@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Cc: torvalds@linux-foundation.org
Link: http://lkml.kernel.org/r/72a059de55e86ad5e2935c80aa91880ddf19d07c.1421954363.git.luto@amacapital.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/desc.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 50d033a..fc237fd 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -251,7 +251,8 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
 		gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
 }
 
-#define _LDT_empty(info)				\
+/* This intentionally ignores lm, since 32-bit apps don't have that field. */
+#define LDT_empty(info)					\
 	((info)->base_addr		== 0	&&	\
 	 (info)->limit			== 0	&&	\
 	 (info)->contents		== 0	&&	\
@@ -261,12 +262,6 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
 	 (info)->seg_not_present	== 1	&&	\
 	 (info)->useable		== 0)
 
-#ifdef CONFIG_X86_64
-#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
-#else
-#define LDT_empty(info) (_LDT_empty(info))
-#endif
-
 static inline void clear_LDT(void)
 {
 	set_ldt(NULL, 0);
-- 
cgit v1.1


From 3669ef9fa7d35f573ec9c0e0341b29251c2734a7 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Thu, 22 Jan 2015 11:27:59 -0800
Subject: x86, tls: Interpret an all-zero struct user_desc as "no segment"

The Witcher 2 did something like this to allocate a TLS segment index:

        struct user_desc u_info;
        bzero(&u_info, sizeof(u_info));
        u_info.entry_number = (uint32_t)-1;

        syscall(SYS_set_thread_area, &u_info);

Strictly speaking, this code was never correct.  It should have set
read_exec_only and seg_not_present to 1 to indicate that it wanted
to find a free slot without putting anything there, or it should
have put something sensible in the TLS slot if it wanted to allocate
a TLS entry for real.  The actual effect of this code was to
allocate a bogus segment that could be used to exploit espfix.

The set_thread_area hardening patches changed the behavior, causing
set_thread_area to return -EINVAL and crashing the game.

This changes set_thread_area to interpret this as a request to find
a free slot and to leave it empty, which isn't *quite* what the game
expects but should be close enough to keep it working.  In
particular, using the code above to allocate two segments will
allocate the same segment both times.

According to FrostbittenKing on Github, this fixes The Witcher 2.

If this somehow still causes problems, we could instead allocate
a limit==0 32-bit data segment, but that seems rather ugly to me.

Fixes: 41bdc78544b8 x86/tls: Validate TLS entries to protect espfix
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Cc: stable@vger.kernel.org
Cc: torvalds@linux-foundation.org
Link: http://lkml.kernel.org/r/0cb251abe1ff0958b8e468a9a9a905b80ae3a746.1421954363.git.luto@amacapital.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/desc.h | 13 +++++++++++++
 arch/x86/kernel/tls.c       | 25 +++++++++++++++++++++++--
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index fc237fd..a94b82e 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -262,6 +262,19 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
 	 (info)->seg_not_present	== 1	&&	\
 	 (info)->useable		== 0)
 
+/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */
+static inline bool LDT_zero(const struct user_desc *info)
+{
+	return (info->base_addr		== 0 &&
+		info->limit		== 0 &&
+		info->contents		== 0 &&
+		info->read_exec_only	== 0 &&
+		info->seg_32bit		== 0 &&
+		info->limit_in_pages	== 0 &&
+		info->seg_not_present	== 0 &&
+		info->useable		== 0);
+}
+
 static inline void clear_LDT(void)
 {
 	set_ldt(NULL, 0);
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 4e942f3..7fc5e84 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -29,7 +29,28 @@ static int get_free_idx(void)
 
 static bool tls_desc_okay(const struct user_desc *info)
 {
-	if (LDT_empty(info))
+	/*
+	 * For historical reasons (i.e. no one ever documented how any
+	 * of the segmentation APIs work), user programs can and do
+	 * assume that a struct user_desc that's all zeros except for
+	 * entry_number means "no segment at all".  This never actually
+	 * worked.  In fact, up to Linux 3.19, a struct user_desc like
+	 * this would create a 16-bit read-write segment with base and
+	 * limit both equal to zero.
+	 *
+	 * That was close enough to "no segment at all" until we
+	 * hardened this function to disallow 16-bit TLS segments.  Fix
+	 * it up by interpreting these zeroed segments the way that they
+	 * were almost certainly intended to be interpreted.
+	 *
+	 * The correct way to ask for "no segment at all" is to specify
+	 * a user_desc that satisfies LDT_empty.  To keep everything
+	 * working, we accept both.
+	 *
+	 * Note that there's a similar kludge in modify_ldt -- look at
+	 * the distinction between modes 1 and 0x11.
+	 */
+	if (LDT_empty(info) || LDT_zero(info))
 		return true;
 
 	/*
@@ -71,7 +92,7 @@ static void set_tls_desc(struct task_struct *p, int idx,
 	cpu = get_cpu();
 
 	while (n-- > 0) {
-		if (LDT_empty(info))
+		if (LDT_empty(info) || LDT_zero(info))
 			desc->a = desc->b = 0;
 		else
 			fill_ldt(desc, info);
-- 
cgit v1.1


From 31bb7723706ba9660504a6c3903ea46198f98fd1 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 22 Jan 2015 12:43:17 +0100
Subject: x86, mm: Change cachemode exports to non-gpl

Commit 281d4078bec3 ("x86: Make page cache mode a real type")
introduced the symbols __cachemode2pte_tbl and __pte2cachemode_tbl and
exported them via EXPORT_SYMBOL_GPL.  The exports are part of a
replacement of code which has been EXPORT_SYMBOL before these changes
resulting in build breakage of out-of-tree non-gpl modules.

Change EXPORT_SYMBOL_GPL to EXPORT-SYMBOL for these two symbols.

Fixes: 281d4078bec3 "x86: Make page cache mode a real type"
Reported-and-tested-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Toshi Kani <toshi.kani@hp.com>
Link: http://lkml.kernel.org/r/1421926997-28615-1-git-send-email-jgross@suse.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 08a7d31..079c3b6 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -43,7 +43,7 @@ uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
 	[_PAGE_CACHE_MODE_WT]		= _PAGE_PCD,
 	[_PAGE_CACHE_MODE_WP]		= _PAGE_PCD,
 };
-EXPORT_SYMBOL_GPL(__cachemode2pte_tbl);
+EXPORT_SYMBOL(__cachemode2pte_tbl);
 uint8_t __pte2cachemode_tbl[8] = {
 	[__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB,
 	[__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC,
@@ -54,7 +54,7 @@ uint8_t __pte2cachemode_tbl[8] = {
 	[__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
 	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
 };
-EXPORT_SYMBOL_GPL(__pte2cachemode_tbl);
+EXPORT_SYMBOL(__pte2cachemode_tbl);
 
 static unsigned long __initdata pgt_buf_start;
 static unsigned long __initdata pgt_buf_end;
-- 
cgit v1.1


From 38a1dfda8e77d7ba74c94d06d8bc41ba98a4bc8c Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <pure.logic@nexus-software.ie>
Date: Thu, 22 Jan 2015 22:58:49 +0000
Subject: x86/apic: Re-enable PCI_MSI support for non-SMP X86_32

Commit 0dbc6078c06bc0 ('x86, build, pci: Fix PCI_MSI build on !SMP')
introduced the dependency that X86_UP_APIC is only available when
PCI_MSI is false. This effectively prevents PCI_MSI support on 32bit
UP systems because it disables both APIC and IO-APIC. But APIC support
is architecturally required for PCI_MSI.

The intention of the patch was to enforce APIC support when PCI_MSI is
enabled, but failed to do so.

Remove the !PCI_MSI dependency from X86_UP_APIC and enforce
X86_UP_APIC when PCI_MSI support is enabled on 32bit UP systems.

[ tglx: Massaged changelog ]

Fixes 0dbc6078c06bc0 'x86, build, pci: Fix PCI_MSI build on !SMP'
Signed-off-by: Bryan O'Donoghue <pure.logic@nexus-software.ie>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1421967529-9037-1-git-send-email-pure.logic@nexus-software.ie
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ba397bd..0dc9d01 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -857,7 +857,7 @@ source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
 	bool "Local APIC support on uniprocessors"
-	depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI
+	depends on X86_32 && !SMP && !X86_32_NON_STANDARD
 	---help---
 	  A local APIC (Advanced Programmable Interrupt Controller) is an
 	  integrated interrupt controller in the CPU. If you have a single-CPU
@@ -868,6 +868,10 @@ config X86_UP_APIC
 	  performance counters), and the NMI watchdog which detects hard
 	  lockups.
 
+config X86_UP_APIC_MSI
+	def_bool y
+	select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI
+
 config X86_UP_IOAPIC
 	bool "IO-APIC support on uniprocessors"
 	depends on X86_UP_APIC
-- 
cgit v1.1


From 520452172e6b318f3a8bd9d4fe1e25066393de25 Mon Sep 17 00:00:00 2001
From: Alexandre Demers <alexandre.f.demers@gmail.com>
Date: Tue, 9 Dec 2014 01:27:50 -0500
Subject: x86/tsc: Change Fast TSC calibration failed from error to info

Many users see this message when booting without knowning that it is
of no importance and that TSC calibration may have succeeded by
another way.

As explained by Paul Bolle in
http://lkml.kernel.org/r/1348488259.1436.22.camel@x61.thuisdomein

  "Fast TSC calibration failed" should not be considered as an error
  since other calibration methods are being tried afterward. At most,
  those send a warning if they fail (not an error). So let's change
  the message from error to warning.

[ tglx: Make if pr_info. It's really not important at all ]

Fixes: c767a54ba065 x86/debug: Add KERN_<LEVEL> to bare printks, convert printks to pr_<level>
Signed-off-by: Alexandre Demers <alexandre.f.demers@gmail.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1418106470-6906-1-git-send-email-alexandre.f.demers@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index b7e50bb..5054497 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -617,7 +617,7 @@ static unsigned long quick_pit_calibrate(void)
 			goto success;
 		}
 	}
-	pr_err("Fast TSC calibration failed\n");
+	pr_info("Fast TSC calibration failed\n");
 	return 0;
 
 success:
-- 
cgit v1.1