From b568b8601f05a591a7ff09d8ee1cedb5b2e815fe Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 20 Jan 2015 10:21:05 +0800 Subject: x86/xen: Treat SCI interrupt as normal GSI interrupt Currently Xen Domain0 has special treatment for ACPI SCI interrupt, that is initialize irq for ACPI SCI at early stage in a special way as: xen_init_IRQ() ->pci_xen_initial_domain() ->xen_setup_acpi_sci() Allocate and initialize irq for ACPI SCI Function xen_setup_acpi_sci() calls acpi_gsi_to_irq() to get an irq number for ACPI SCI. But unfortunately acpi_gsi_to_irq() depends on IOAPIC irqdomains through following path acpi_gsi_to_irq() ->mp_map_gsi_to_irq() ->mp_map_pin_to_irq() ->check IOAPIC irqdomain For PV domains, it uses Xen event based interrupt manangement and doesn't make uses of native IOAPIC, so no irqdomains created for IOAPIC. This causes Xen domain0 fail to install interrupt handler for ACPI SCI and all ACPI events will be lost. Please refer to: https://lkml.org/lkml/2014/12/19/178 So the fix is to get rid of special treatment for ACPI SCI, just treat ACPI SCI as normal GSI interrupt as: acpi_gsi_to_irq() ->acpi_register_gsi() ->acpi_register_gsi_xen() ->xen_register_gsi() With above change, there's no need for xen_setup_acpi_sci() anymore. The above change also works with bare metal kernel too. Signed-off-by: Jiang Liu Tested-by: Sander Eikelenboom Cc: Tony Luck Cc: xen-devel@lists.xenproject.org Cc: Konrad Rzeszutek Wilk Cc: David Vrabel Cc: Rafael J. Wysocki Cc: Len Brown Cc: Pavel Machek Cc: Bjorn Helgaas Link: http://lkml.kernel.org/r/1421720467-7709-2-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 26 ++++++++++++------------- arch/x86/pci/xen.c | 47 --------------------------------------------- 2 files changed, 13 insertions(+), 60 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d162636..b9e30da 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -611,20 +611,20 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) { - int irq; - - if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { - *irqp = gsi; - } else { - mutex_lock(&acpi_ioapic_lock); - irq = mp_map_gsi_to_irq(gsi, - IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); - mutex_unlock(&acpi_ioapic_lock); - if (irq < 0) - return -1; - *irqp = irq; + int rc, irq, trigger, polarity; + + rc = acpi_get_override_irq(gsi, &trigger, &polarity); + if (rc == 0) { + trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; + polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; + irq = acpi_register_gsi(NULL, gsi, trigger, polarity); + if (irq >= 0) { + *irqp = irq; + return 0; + } } - return 0; + + return -1; } EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index c489ef2..6e5e89c 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -471,52 +471,6 @@ int __init pci_xen_hvm_init(void) } #ifdef CONFIG_XEN_DOM0 -static __init void xen_setup_acpi_sci(void) -{ - int rc; - int trigger, polarity; - int gsi = acpi_sci_override_gsi; - int irq = -1; - int gsi_override = -1; - - if (!gsi) - return; - - rc = acpi_get_override_irq(gsi, &trigger, &polarity); - if (rc) { - printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi" - " sci, rc=%d\n", rc); - return; - } - trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; - polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; - - printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d " - "polarity=%d\n", gsi, trigger, polarity); - - /* Before we bind the GSI to a Linux IRQ, check whether - * we need to override it with bus_irq (IRQ) value. Usually for - * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so: - * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level) - * but there are oddballs where the IRQ != GSI: - * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level) - * which ends up being: gsi_to_irq[9] == 20 - * (which is what acpi_gsi_to_irq ends up calling when starting the - * the ACPI interpreter and keels over since IRQ 9 has not been - * setup as we had setup IRQ 20 for it). - */ - if (acpi_gsi_to_irq(gsi, &irq) == 0) { - /* Use the provided value if it's valid. */ - if (irq >= 0) - gsi_override = irq; - } - - gsi = xen_register_gsi(gsi, gsi_override, trigger, polarity); - printk(KERN_INFO "xen: acpi sci %d\n", gsi); - - return; -} - int __init pci_xen_initial_domain(void) { int irq; @@ -527,7 +481,6 @@ int __init pci_xen_initial_domain(void) x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs; pci_msi_ignore_mask = 1; #endif - xen_setup_acpi_sci(); __acpi_register_gsi = acpi_register_gsi_xen; /* Pre-allocate legacy irqs */ for (irq = 0; irq < nr_legacy_irqs(); irq++) { -- cgit v1.1 From 9889eaeb7c999cae64006bb98c47f40f412ec875 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 20 Jan 2015 10:21:06 +0800 Subject: ACPI: pci: Do not clear pci_dev->irq in acpi_pci_irq_disable() Xen pciback driver assumes that pci_dev->irq won't change after calling pci_disable_device(). But commit cffe0a2b5a34c95a4dadc9ec7132690a5b0f6687 ("x86, irq: Keep balance of IOAPIC pin reference count") frees irq resources and resets pci_dev->irq to zero when pci_disable_device() is called. So this is a hotfix for 3.19 to avoid resetting pci_dev->irq, and another proper fix will be prepared for next merging window. Signed-off-by: Jiang Liu Tested-by: Sander Eikelenboom Cc: Tony Luck Cc: Konrad Rzeszutek Wilk Cc: David Vrabel Cc: Rafael J. Wysocki Cc: Len Brown Link: http://lkml.kernel.org/r/1421720467-7709-3-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- drivers/acpi/pci_irq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 5277a0e..b1def41 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -512,7 +512,6 @@ void acpi_pci_irq_disable(struct pci_dev *dev) dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin)); if (gsi >= 0) { acpi_unregister_gsi(gsi); - dev->irq = 0; dev->irq_managed = 0; } } -- cgit v1.1 From 8abb850a03a3a8b11a0e92949e5b99d9cc178e35 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 20 Jan 2015 10:21:07 +0800 Subject: x86/xen: Override ACPI IRQ management callback __acpi_unregister_gsi Xen overrides __acpi_register_gsi and leaves __acpi_unregister_gsi as is. That means, an IRQ allocated by acpi_register_gsi_xen_hvm() or acpi_register_gsi_xen() will be freed by acpi_unregister_gsi_ioapic(), which may cause undesired effects. So override __acpi_unregister_gsi to NULL for safety. Signed-off-by: Jiang Liu Tested-by: Sander Eikelenboom Cc: Tony Luck Cc: xen-devel@lists.xenproject.org Cc: Konrad Rzeszutek Wilk Cc: David Vrabel Cc: Bjorn Helgaas Cc: Graeme Gregory Cc: Lv Zheng Link: http://lkml.kernel.org/r/1421720467-7709-4-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/acpi.h | 1 + arch/x86/pci/xen.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 0ab4f9f..3a45668 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -50,6 +50,7 @@ void acpi_pic_sci_set_trigger(unsigned int, u16); extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi, int trigger, int polarity); +extern void (*__acpi_unregister_gsi)(u32 gsi); static inline void disable_acpi(void) { diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 6e5e89c..9098d88 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -458,6 +458,7 @@ int __init pci_xen_hvm_init(void) * just how GSIs get registered. */ __acpi_register_gsi = acpi_register_gsi_xen_hvm; + __acpi_unregister_gsi = NULL; #endif #ifdef CONFIG_PCI_MSI @@ -482,6 +483,7 @@ int __init pci_xen_initial_domain(void) pci_msi_ignore_mask = 1; #endif __acpi_register_gsi = acpi_register_gsi_xen; + __acpi_unregister_gsi = NULL; /* Pre-allocate legacy irqs */ for (irq = 0; irq < nr_legacy_irqs(); irq++) { int trigger, polarity; -- cgit v1.1 From f285f4a21c3253887caceed493089ece17579d59 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 15 Jan 2015 16:51:46 -0800 Subject: x86, boot: Skip relocs when load address unchanged On 64-bit, relocation is not required unless the load address gets changed. Without this, relocations do unexpected things when the kernel is above 4G. Reported-by: Baoquan He Signed-off-by: Kees Cook Tested-by: Thomas D. Cc: Vivek Goyal Cc: Jan Beulich Cc: Junjie Mao Cc: Andi Kleen Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20150116005146.GA4212@www.outflux.net Signed-off-by: Thomas Gleixner --- arch/x86/boot/compressed/misc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index dcc1c53..a950864 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -373,6 +373,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, unsigned long output_len, unsigned long run_size) { + unsigned char *output_orig = output; + real_mode = rmode; sanitize_boot_params(real_mode); @@ -421,7 +423,12 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, debug_putstr("\nDecompressing Linux... "); decompress(input_data, input_len, NULL, NULL, output, NULL, error); parse_elf(output); - handle_relocations(output, output_len); + /* + * 32-bit always performs relocations. 64-bit relocations are only + * needed if kASLR has chosen a different load address. + */ + if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig) + handle_relocations(output, output_len); debug_putstr("done.\nBooting the kernel.\n"); return output; } -- cgit v1.1 From 4a0d3107d6b19125f21172c2b7d95f9c30ecaf6f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 16 Jan 2015 15:47:07 +0000 Subject: x86, irq: Properly tag virtualization entry in /proc/interrupts The mis-naming likely was a copy-and-paste effect. Signed-off-by: Jan Beulich Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/54B9408B0200007800055E8B@mail.emea.novell.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 6307a0f..705ef8d 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -127,7 +127,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_puts(p, " Machine check polls\n"); #endif #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) - seq_printf(p, "%*s: ", prec, "THR"); + seq_printf(p, "%*s: ", prec, "HYP"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count); seq_puts(p, " Hypervisor callback interrupts\n"); -- cgit v1.1 From 9d34cfdf47963905d792ae9c000efa522739abe4 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 12 Jan 2015 06:15:45 +0100 Subject: x86: Don't rely on VMWare emulating PAT MSR correctly VMWare seems not to emulate the PAT MSR correctly: reaeding MSR_IA32_CR_PAT returns 0 even after writing another value to it. Commit bd809af16e3ab triggers this VMWare bug when the kernel is booted as a VMWare guest. Detect this bug and don't use the read value if it is 0. Fixes: bd809af16e3ab "x86: Enable PAT to use cache mode translation tables" Reported-and-tested-by: Jongman Heo Acked-by: Alok N Kataria Signed-off-by: Juergen Gross Link: http://lkml.kernel.org/r/1421039745-14335-1-git-send-email-jgross@suse.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/pat.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index edf299c..7ac6869 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -234,8 +234,13 @@ void pat_init(void) PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); /* Boot CPU check */ - if (!boot_pat_state) + if (!boot_pat_state) { rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); + if (!boot_pat_state) { + pat_disable("PAT read returns always zero, disabled."); + return; + } + } wrmsrl(MSR_IA32_CR_PAT, pat); -- cgit v1.1 From 32c6590d126836a062b3140ed52d898507987017 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Mon, 12 Jan 2015 16:26:02 -0800 Subject: x86, hyperv: Mark the Hyper-V clocksource as being continuous The Hyper-V clocksource is continuous; mark it accordingly. Signed-off-by: K. Y. Srinivasan Acked-by: jasowang@redhat.com Cc: gregkh@linuxfoundation.org Cc: devel@linuxdriverproject.org Cc: olaf@aepfle.de Cc: apw@canonical.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1421108762-3331-1-git-send-email-kys@microsoft.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mshyperv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index a450373..939155f 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -107,6 +107,7 @@ static struct clocksource hyperv_cs = { .rating = 400, /* use this when running on Hyperv*/ .read = read_hv_clock, .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; static void __init ms_hyperv_init_platform(void) -- cgit v1.1 From 814564a0a1d2faee11ff9de43245d78cb79c85ac Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 8 Jan 2015 14:30:20 -0800 Subject: x86, mpx: Explicitly disable 32-bit MPX support on 64-bit kernels We had originally planned on submitting MPX support in one patch set. We eventually broke it up in to two pieces for easier review. One of the features that didn't make the first round was supporting 32-bit binaries on 64-bit kernels. Once we split the set up, we never added code to restrict 32-bit binaries from _using_ MPX on 64-bit kernels. The 32-bit bounds tables are a different format than the 64-bit ones. Without this patch, the kernel will try to read a 32-bit binary's tables as if they were the 64-bit version. They will likely be noticed as being invalid rather quickly and the app will get killed, but that's kinda mean. This patch adds an explicit check, and will make a 64-bit kernel essentially behave as if it has no MPX support when called from a 32-bit binary. Signed-off-by: Dave Hansen Cc: Andy Lutomirski Cc: Dave Hansen Link: http://lkml.kernel.org/r/20150108223020.9E9AA511@viggo.jf.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/mpx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 67ebf57..c439ec4 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -349,6 +349,12 @@ static __user void *task_get_bounds_dir(struct task_struct *tsk) return MPX_INVALID_BOUNDS_DIR; /* + * 32-bit binaries on 64-bit kernels are currently + * unsupported. + */ + if (IS_ENABLED(CONFIG_X86_64) && test_thread_flag(TIF_IA32)) + return MPX_INVALID_BOUNDS_DIR; + /* * The bounds directory pointer is stored in a register * only accessible if we first do an xsave. */ -- cgit v1.1 From c922228efeeefa32e57f875764bfa6ca8053a68a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 8 Jan 2015 14:30:21 -0800 Subject: x86, mpx: Fix potential performance issue on unmaps The 3.19 merge window saw some TLB modifications merged which caused a performance regression. They were fixed in commit 045bbb9fa. Once that fix was applied, I also noticed that there was a small but intermittent regression still present. It was not present consistently enough to bisect reliably, but I'm fairly confident that it came from (my own) MPX patches. The source was reading a relatively unused field in the mm_struct via arch_unmap. I also noted that this code was in the main instruction flow of do_munmap() and probably had more icache impact than we want. This patch does two things: 1. Adds a static (via Kconfig) and dynamic (via cpuid) check for MPX with cpu_feature_enabled(). This keeps us from reading that cacheline in the mm and trades it for a check of the global CPUID variables at least on CPUs without MPX. 2. Adds an unlikely() to ensure that the MPX call ends up out of the main instruction flow in do_munmap(). I've added a detailed comment about why this was done and why we want it even on systems where MPX is present. Signed-off-by: Dave Hansen Cc: luto@amacapital.net Cc: Dave Hansen Link: http://lkml.kernel.org/r/20150108223021.AEEAB987@viggo.jf.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/mmu_context.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 40269a2..4b75d59 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -130,7 +130,25 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long start, unsigned long end) { - mpx_notify_unmap(mm, vma, start, end); + /* + * mpx_notify_unmap() goes and reads a rarely-hot + * cacheline in the mm_struct. That can be expensive + * enough to be seen in profiles. + * + * The mpx_notify_unmap() call and its contents have been + * observed to affect munmap() performance on hardware + * where MPX is not present. + * + * The unlikely() optimizes for the fast case: no MPX + * in the CPU, or no MPX use in the process. Even if + * we get this wrong (in the unlikely event that MPX + * is widely enabled on some system) the overhead of + * MPX itself (reading bounds tables) is expected to + * overwhelm the overhead of getting this unlikely() + * consistently wrong. + */ + if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX))) + mpx_notify_unmap(mm, vma, start, end); } #endif /* _ASM_X86_MMU_CONTEXT_H */ -- cgit v1.1 From e9d1b4f3c60997fe197bf0243cb4a41a44387a88 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 8 Jan 2015 14:30:22 -0800 Subject: x86, mpx: Strictly enforce empty prctl() args Description from Michael Kerrisk. He suggested an identical patch to one I had already coded up and tested. commit fe3d197f8431 "x86, mpx: On-demand kernel allocation of bounds tables" added two new prctl() operations, PR_MPX_ENABLE_MANAGEMENT and PR_MPX_DISABLE_MANAGEMENT. However, no checks were included to ensure that unused arguments are zero, as is done in many existing prctl()s and as should be done for all new prctl()s. This patch adds the required checks. Suggested-by: Andy Lutomirski Suggested-by: Michael Kerrisk Signed-off-by: Dave Hansen Cc: Dave Hansen Link: http://lkml.kernel.org/r/20150108223022.7F56FD13@viggo.jf.intel.com Signed-off-by: Thomas Gleixner --- kernel/sys.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/sys.c b/kernel/sys.c index a8c9f5a..ea9c881 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2210,9 +2210,13 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, up_write(&me->mm->mmap_sem); break; case PR_MPX_ENABLE_MANAGEMENT: + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; error = MPX_ENABLE_MANAGEMENT(me); break; case PR_MPX_DISABLE_MANAGEMENT: + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; error = MPX_DISABLE_MANAGEMENT(me); break; default: -- cgit v1.1 From e30ab185c490e9a9381385529e0fd32f0a399495 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 22 Jan 2015 11:27:58 -0800 Subject: x86, tls, ldt: Stop checking lm in LDT_empty 32-bit programs don't have an lm bit in their ABI, so they can't reliably cause LDT_empty to return true without resorting to memset. They shouldn't need to do this. This should fix a longstanding, if minor, issue in all 64-bit kernels as well as a potential regression in the TLS hardening code. Fixes: 41bdc78544b8 x86/tls: Validate TLS entries to protect espfix Cc: stable@vger.kernel.org Signed-off-by: Andy Lutomirski Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/72a059de55e86ad5e2935c80aa91880ddf19d07c.1421954363.git.luto@amacapital.net Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/desc.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 50d033a..fc237fd 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -251,7 +251,8 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; } -#define _LDT_empty(info) \ +/* This intentionally ignores lm, since 32-bit apps don't have that field. */ +#define LDT_empty(info) \ ((info)->base_addr == 0 && \ (info)->limit == 0 && \ (info)->contents == 0 && \ @@ -261,12 +262,6 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) (info)->seg_not_present == 1 && \ (info)->useable == 0) -#ifdef CONFIG_X86_64 -#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0)) -#else -#define LDT_empty(info) (_LDT_empty(info)) -#endif - static inline void clear_LDT(void) { set_ldt(NULL, 0); -- cgit v1.1 From 3669ef9fa7d35f573ec9c0e0341b29251c2734a7 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 22 Jan 2015 11:27:59 -0800 Subject: x86, tls: Interpret an all-zero struct user_desc as "no segment" The Witcher 2 did something like this to allocate a TLS segment index: struct user_desc u_info; bzero(&u_info, sizeof(u_info)); u_info.entry_number = (uint32_t)-1; syscall(SYS_set_thread_area, &u_info); Strictly speaking, this code was never correct. It should have set read_exec_only and seg_not_present to 1 to indicate that it wanted to find a free slot without putting anything there, or it should have put something sensible in the TLS slot if it wanted to allocate a TLS entry for real. The actual effect of this code was to allocate a bogus segment that could be used to exploit espfix. The set_thread_area hardening patches changed the behavior, causing set_thread_area to return -EINVAL and crashing the game. This changes set_thread_area to interpret this as a request to find a free slot and to leave it empty, which isn't *quite* what the game expects but should be close enough to keep it working. In particular, using the code above to allocate two segments will allocate the same segment both times. According to FrostbittenKing on Github, this fixes The Witcher 2. If this somehow still causes problems, we could instead allocate a limit==0 32-bit data segment, but that seems rather ugly to me. Fixes: 41bdc78544b8 x86/tls: Validate TLS entries to protect espfix Signed-off-by: Andy Lutomirski Cc: stable@vger.kernel.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/0cb251abe1ff0958b8e468a9a9a905b80ae3a746.1421954363.git.luto@amacapital.net Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/desc.h | 13 +++++++++++++ arch/x86/kernel/tls.c | 25 +++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index fc237fd..a94b82e 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -262,6 +262,19 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) (info)->seg_not_present == 1 && \ (info)->useable == 0) +/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */ +static inline bool LDT_zero(const struct user_desc *info) +{ + return (info->base_addr == 0 && + info->limit == 0 && + info->contents == 0 && + info->read_exec_only == 0 && + info->seg_32bit == 0 && + info->limit_in_pages == 0 && + info->seg_not_present == 0 && + info->useable == 0); +} + static inline void clear_LDT(void) { set_ldt(NULL, 0); diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 4e942f3..7fc5e84 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -29,7 +29,28 @@ static int get_free_idx(void) static bool tls_desc_okay(const struct user_desc *info) { - if (LDT_empty(info)) + /* + * For historical reasons (i.e. no one ever documented how any + * of the segmentation APIs work), user programs can and do + * assume that a struct user_desc that's all zeros except for + * entry_number means "no segment at all". This never actually + * worked. In fact, up to Linux 3.19, a struct user_desc like + * this would create a 16-bit read-write segment with base and + * limit both equal to zero. + * + * That was close enough to "no segment at all" until we + * hardened this function to disallow 16-bit TLS segments. Fix + * it up by interpreting these zeroed segments the way that they + * were almost certainly intended to be interpreted. + * + * The correct way to ask for "no segment at all" is to specify + * a user_desc that satisfies LDT_empty. To keep everything + * working, we accept both. + * + * Note that there's a similar kludge in modify_ldt -- look at + * the distinction between modes 1 and 0x11. + */ + if (LDT_empty(info) || LDT_zero(info)) return true; /* @@ -71,7 +92,7 @@ static void set_tls_desc(struct task_struct *p, int idx, cpu = get_cpu(); while (n-- > 0) { - if (LDT_empty(info)) + if (LDT_empty(info) || LDT_zero(info)) desc->a = desc->b = 0; else fill_ldt(desc, info); -- cgit v1.1 From 31bb7723706ba9660504a6c3903ea46198f98fd1 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 22 Jan 2015 12:43:17 +0100 Subject: x86, mm: Change cachemode exports to non-gpl Commit 281d4078bec3 ("x86: Make page cache mode a real type") introduced the symbols __cachemode2pte_tbl and __pte2cachemode_tbl and exported them via EXPORT_SYMBOL_GPL. The exports are part of a replacement of code which has been EXPORT_SYMBOL before these changes resulting in build breakage of out-of-tree non-gpl modules. Change EXPORT_SYMBOL_GPL to EXPORT-SYMBOL for these two symbols. Fixes: 281d4078bec3 "x86: Make page cache mode a real type" Reported-and-tested-by: Steven Noonan Signed-off-by: Juergen Gross Reviewed-by: Toshi Kani Link: http://lkml.kernel.org/r/1421926997-28615-1-git-send-email-jgross@suse.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 08a7d31..079c3b6 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -43,7 +43,7 @@ uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { [_PAGE_CACHE_MODE_WT] = _PAGE_PCD, [_PAGE_CACHE_MODE_WP] = _PAGE_PCD, }; -EXPORT_SYMBOL_GPL(__cachemode2pte_tbl); +EXPORT_SYMBOL(__cachemode2pte_tbl); uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB, [__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC, @@ -54,7 +54,7 @@ uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, }; -EXPORT_SYMBOL_GPL(__pte2cachemode_tbl); +EXPORT_SYMBOL(__pte2cachemode_tbl); static unsigned long __initdata pgt_buf_start; static unsigned long __initdata pgt_buf_end; -- cgit v1.1 From 38a1dfda8e77d7ba74c94d06d8bc41ba98a4bc8c Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Thu, 22 Jan 2015 22:58:49 +0000 Subject: x86/apic: Re-enable PCI_MSI support for non-SMP X86_32 Commit 0dbc6078c06bc0 ('x86, build, pci: Fix PCI_MSI build on !SMP') introduced the dependency that X86_UP_APIC is only available when PCI_MSI is false. This effectively prevents PCI_MSI support on 32bit UP systems because it disables both APIC and IO-APIC. But APIC support is architecturally required for PCI_MSI. The intention of the patch was to enforce APIC support when PCI_MSI is enabled, but failed to do so. Remove the !PCI_MSI dependency from X86_UP_APIC and enforce X86_UP_APIC when PCI_MSI support is enabled on 32bit UP systems. [ tglx: Massaged changelog ] Fixes 0dbc6078c06bc0 'x86, build, pci: Fix PCI_MSI build on !SMP' Signed-off-by: Bryan O'Donoghue Suggested-by: Thomas Gleixner Reviewed-by: Andy Shevchenko Cc: Thomas Petazzoni Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1421967529-9037-1-git-send-email-pure.logic@nexus-software.ie Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ba397bd..0dc9d01 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -857,7 +857,7 @@ source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" - depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI + depends on X86_32 && !SMP && !X86_32_NON_STANDARD ---help--- A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU @@ -868,6 +868,10 @@ config X86_UP_APIC performance counters), and the NMI watchdog which detects hard lockups. +config X86_UP_APIC_MSI + def_bool y + select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI + config X86_UP_IOAPIC bool "IO-APIC support on uniprocessors" depends on X86_UP_APIC -- cgit v1.1 From 520452172e6b318f3a8bd9d4fe1e25066393de25 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Tue, 9 Dec 2014 01:27:50 -0500 Subject: x86/tsc: Change Fast TSC calibration failed from error to info Many users see this message when booting without knowning that it is of no importance and that TSC calibration may have succeeded by another way. As explained by Paul Bolle in http://lkml.kernel.org/r/1348488259.1436.22.camel@x61.thuisdomein "Fast TSC calibration failed" should not be considered as an error since other calibration methods are being tried afterward. At most, those send a warning if they fail (not an error). So let's change the message from error to warning. [ tglx: Make if pr_info. It's really not important at all ] Fixes: c767a54ba065 x86/debug: Add KERN_ to bare printks, convert printks to pr_ Signed-off-by: Alexandre Demers Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1418106470-6906-1-git-send-email-alexandre.f.demers@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index b7e50bb..5054497 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -617,7 +617,7 @@ static unsigned long quick_pit_calibrate(void) goto success; } } - pr_err("Fast TSC calibration failed\n"); + pr_info("Fast TSC calibration failed\n"); return 0; success: -- cgit v1.1