From de53c3786a3ce162a1c815d0c04c766c23ec9c0a Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 5 Jan 2018 22:35:41 +0100 Subject: x86/pti: Unbreak EFI old_memmap EFI_OLD_MEMMAP's efi_call_phys_prolog() calls set_pgd() with swapper PGD that has PAGE_USER set, which makes PTI set NX on it, and therefore EFI can't execute it's code. Fix that by forcefully clearing _PAGE_NX from the PGD (this can't be done by the pgprot API). _PAGE_NX will be automatically reintroduced in efi_call_phys_epilog(), as _set_pgd() will again notice that this is _PAGE_USER, and set _PAGE_NX on it. Tested-by: Dimitri Sivanich Signed-off-by: Jiri Kosina Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Acked-by: Dave Hansen Cc: Andrea Arcangeli Cc: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/nycvar.YFH.7.76.1801052215460.11852@cbobk.fhfr.pm --- arch/x86/platform/efi/efi_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 39c4b35..61975b6 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -134,7 +134,9 @@ pgd_t * __init efi_call_phys_prolog(void) pud[j] = *pud_offset(p4d_k, vaddr); } } + pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX; } + out: __flush_tlb_all(); -- cgit v1.1 From 99c6fa2511d8a683e61468be91b83f85452115fa Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 6 Jan 2018 11:49:23 +0000 Subject: x86/cpufeatures: Add X86_BUG_SPECTRE_V[12] Add the bug bits for spectre v1/2 and force them unconditionally for all cpus. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1515239374-23361-2-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/kernel/cpu/common.c | 3 +++ 2 files changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 21ac898..1641c2f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -342,5 +342,7 @@ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2d3bd22..372ba3f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -902,6 +902,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (c->x86_vendor != X86_VENDOR_AMD) setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + fpu__init_system(c); #ifdef CONFIG_X86_32 -- cgit v1.1 From 61dc0f555b5c761cdafb0ba5bd41ecf22d68a4c4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 7 Jan 2018 22:48:01 +0100 Subject: x86/cpu: Implement CPU vulnerabilites sysfs functions Implement the CPU vulnerabilty show functions for meltdown, spectre_v1 and spectre_v2. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Cc: Will Deacon Cc: Dave Hansen Cc: Linus Torvalds Cc: Borislav Petkov Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180107214913.177414879@linutronix.de --- arch/x86/Kconfig | 1 + arch/x86/kernel/cpu/bugs.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cd5199d..e23d21a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -89,6 +89,7 @@ config X86 select GENERIC_CLOCKEVENTS_MIN_ADJUST select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_FIND_FIRST_BIT select GENERIC_IOMAP diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ba0b242..76ad6cb 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -10,6 +10,7 @@ */ #include #include +#include #include #include #include @@ -60,3 +61,31 @@ void __init check_bugs(void) set_memory_4k((unsigned long)__va(0), 1); #endif } + +#ifdef CONFIG_SYSFS +ssize_t cpu_show_meltdown(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + return sprintf(buf, "Not affected\n"); + if (boot_cpu_has(X86_FEATURE_PTI)) + return sprintf(buf, "Mitigation: PTI\n"); + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) + return sprintf(buf, "Not affected\n"); + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return sprintf(buf, "Not affected\n"); + return sprintf(buf, "Vulnerable\n"); +} +#endif -- cgit v1.1 From 262b6b30087246abf09d6275eb0c0dc421bcbe38 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sat, 6 Jan 2018 18:41:14 +0100 Subject: x86/tboot: Unbreak tboot with PTI enabled This is another case similar to what EFI does: create a new set of page tables, map some code at a low address, and jump to it. PTI mistakes this low address for userspace and mistakenly marks it non-executable in an effort to make it unusable for userspace. Undo the poison to allow execution. Fixes: 385ce0ea4c07 ("x86/mm/pti: Add Kconfig") Signed-off-by: Dave Hansen Signed-off-by: Andrea Arcangeli Signed-off-by: Thomas Gleixner Cc: Alan Cox Cc: Tim Chen Cc: Jon Masters Cc: Dave Hansen Cc: Andi Kleen Cc: Jeff Law Cc: Paolo Bonzini Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: David" Cc: Nick Clifton Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180108102805.GK25546@redhat.com --- arch/x86/kernel/tboot.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index a4eb279..75869a4 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -127,6 +127,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, p4d = p4d_alloc(&tboot_mm, pgd, vaddr); if (!p4d) return -1; + pgd->pgd &= ~_PAGE_NX; pud = pud_alloc(&tboot_mm, p4d, vaddr); if (!pud) return -1; -- cgit v1.1 From 8d56eff266f3e41a6c39926269c4c3f58f881a8e Mon Sep 17 00:00:00 2001 From: Jike Song Date: Tue, 9 Jan 2018 00:03:41 +0800 Subject: x86/mm/pti: Remove dead logic in pti_user_pagetable_walk*() The following code contains dead logic: 162 if (pgd_none(*pgd)) { 163 unsigned long new_p4d_page = __get_free_page(gfp); 164 if (!new_p4d_page) 165 return NULL; 166 167 if (pgd_none(*pgd)) { 168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); 169 new_p4d_page = 0; 170 } 171 if (new_p4d_page) 172 free_page(new_p4d_page); 173 } There can't be any difference between two pgd_none(*pgd) at L162 and L167, so it's always false at L171. Dave Hansen explained: Yes, the double-test was part of an optimization where we attempted to avoid using a global spinlock in the fork() path. We would check for unallocated mid-level page tables without the lock. The lock was only taken when we needed to *make* an entry to avoid collisions. Now that it is all single-threaded, there is no chance of a collision, no need for a lock, and no need for the re-check. As all these functions are only called during init, mark them __init as well. Fixes: 03f4424f348e ("x86/mm/pti: Add functions to clone kernel PMDs") Signed-off-by: Jike Song Signed-off-by: Thomas Gleixner Cc: Alan Cox Cc: Andi Kleen Cc: Tom Lendacky Cc: Peter Zijlstra Cc: Tim Chen Cc: Jiri Koshina Cc: Dave Hansen Cc: Borislav Petkov Cc: Kees Cook Cc: Andi Lutomirski Cc: Linus Torvalds Cc: Greg KH Cc: David Woodhouse Cc: Paul Turner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180108160341.3461-1-albcamus@gmail.com --- arch/x86/mm/pti.c | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 43d4a4a..ce38f16 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -149,7 +149,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) * * Returns a pointer to a P4D on success, or NULL on failure. */ -static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) +static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) { pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address)); gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); @@ -164,12 +164,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) if (!new_p4d_page) return NULL; - if (pgd_none(*pgd)) { - set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); - new_p4d_page = 0; - } - if (new_p4d_page) - free_page(new_p4d_page); + set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); } BUILD_BUG_ON(pgd_large(*pgd) != 0); @@ -182,7 +177,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) * * Returns a pointer to a PMD on success, or NULL on failure. */ -static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) +static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) { gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); p4d_t *p4d = pti_user_pagetable_walk_p4d(address); @@ -194,12 +189,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) if (!new_pud_page) return NULL; - if (p4d_none(*p4d)) { - set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); - new_pud_page = 0; - } - if (new_pud_page) - free_page(new_pud_page); + set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); } pud = pud_offset(p4d, address); @@ -213,12 +203,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) if (!new_pmd_page) return NULL; - if (pud_none(*pud)) { - set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); - new_pmd_page = 0; - } - if (new_pmd_page) - free_page(new_pmd_page); + set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); } return pmd_offset(pud, address); @@ -251,12 +236,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address) if (!new_pte_page) return NULL; - if (pmd_none(*pmd)) { - set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); - new_pte_page = 0; - } - if (new_pte_page) - free_page(new_pte_page); + set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); } pte = pte_offset_kernel(pmd, address); -- cgit v1.1 From e4d0e84e490790798691aaa0f2e598637f1867ec Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 8 Jan 2018 16:09:21 -0600 Subject: x86/cpu/AMD: Make LFENCE a serializing instruction To aid in speculation control, make LFENCE a serializing instruction since it has less overhead than MFENCE. This is done by setting bit 1 of MSR 0xc0011029 (DE_CFG). Some families that support LFENCE do not have this MSR. For these families, the LFENCE instruction is already serializing. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Reviewed-by: Borislav Petkov Cc: Peter Zijlstra Cc: Tim Chen Cc: Dave Hansen Cc: Borislav Petkov Cc: Dan Williams Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: David Woodhouse Cc: Paul Turner Link: https://lkml.kernel.org/r/20180108220921.12580.71694.stgit@tlendack-t1.amdoffice.net --- arch/x86/include/asm/msr-index.h | 2 ++ arch/x86/kernel/cpu/amd.c | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ab02261..1e7d710 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -352,6 +352,8 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c +#define MSR_F10H_DECFG 0xc0011029 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bcb75dc..5b438d8 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -829,6 +829,16 @@ static void init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_K8); if (cpu_has(c, X86_FEATURE_XMM2)) { + /* + * A serializing LFENCE has less overhead than MFENCE, so + * use it for execution serialization. On families which + * don't have that MSR, LFENCE is already serializing. + * msr_set_bit() uses the safe accessors, too, even if the MSR + * is not present. + */ + msr_set_bit(MSR_F10H_DECFG, + MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } -- cgit v1.1 From 9c6a73c75864ad9fa49e5fa6513e4c4071c0e29f Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 8 Jan 2018 16:09:32 -0600 Subject: x86/cpu/AMD: Use LFENCE_RDTSC in preference to MFENCE_RDTSC With LFENCE now a serializing instruction, use LFENCE_RDTSC in preference to MFENCE_RDTSC. However, since the kernel could be running under a hypervisor that does not support writing that MSR, read the MSR back and verify that the bit has been set successfully. If the MSR can be read and the bit is set, then set the LFENCE_RDTSC feature, otherwise set the MFENCE_RDTSC feature. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Reviewed-by: Borislav Petkov Cc: Peter Zijlstra Cc: Tim Chen Cc: Dave Hansen Cc: Borislav Petkov Cc: Dan Williams Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: David Woodhouse Cc: Paul Turner Link: https://lkml.kernel.org/r/20180108220932.12580.52458.stgit@tlendack-t1.amdoffice.net --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/amd.c | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1e7d710..fa11fb1 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -354,6 +354,7 @@ #define MSR_FAM10H_NODE_ID 0xc001100c #define MSR_F10H_DECFG 0xc0011029 #define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 5b438d8..ea831c8 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -829,6 +829,9 @@ static void init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_K8); if (cpu_has(c, X86_FEATURE_XMM2)) { + unsigned long long val; + int ret; + /* * A serializing LFENCE has less overhead than MFENCE, so * use it for execution serialization. On families which @@ -839,8 +842,19 @@ static void init_amd(struct cpuinfo_x86 *c) msr_set_bit(MSR_F10H_DECFG, MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + /* + * Verify that the MSR write was successful (could be running + * under a hypervisor) and only then assume that LFENCE is + * serializing. + */ + ret = rdmsrl_safe(MSR_F10H_DECFG, &val); + if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) { + /* A serializing LFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + } else { + /* MFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + } } /* -- cgit v1.1 From 612e8e9350fd19cae6900cf36ea0c6892d1a0dca Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 10 Jan 2018 12:28:16 +0100 Subject: x86/alternatives: Fix optimize_nops() checking The alternatives code checks only the first byte whether it is a NOP, but with NOPs in front of the payload and having actual instructions after it breaks the "optimized' test. Make sure to scan all bytes before deciding to optimize the NOPs in there. Reported-by: David Woodhouse Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Tom Lendacky Cc: Andi Kleen Cc: Tim Chen Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Dave Hansen Cc: Andi Kleen Cc: Andrew Lutomirski Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/20180110112815.mgciyf5acwacphkq@pd.tnic --- arch/x86/kernel/alternative.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 3344d33..e0b97e4 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -344,9 +344,12 @@ done: static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) { unsigned long flags; + int i; - if (instr[0] != 0x90) - return; + for (i = 0; i < a->padlen; i++) { + if (instr[i] != 0x90) + return; + } local_irq_save(flags); add_nops(instr + (a->instrlen - a->padlen), a->padlen); -- cgit v1.1 From 445b69e3b75e42362a5bdc13c8b8f61599e2228a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 10 Jan 2018 14:49:39 -0800 Subject: x86/pti: Make unpoison of pgd for trusted boot work for real The inital fix for trusted boot and PTI potentially misses the pgd clearing if pud_alloc() sets a PGD. It probably works in *practice* because for two adjacent calls to map_tboot_page() that share a PGD entry, the first will clear NX, *then* allocate and set the PGD (without NX clear). The second call will *not* allocate but will clear the NX bit. Defer the NX clearing to a point after it is known that all top-level allocations have occurred. Add a comment to clarify why. [ tglx: Massaged changelog ] Fixes: 262b6b30087 ("x86/tboot: Unbreak tboot with PTI enabled") Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Reviewed-by: Andrea Arcangeli Cc: Jon Masters Cc: "Tim Chen" Cc: gnomes@lxorguk.ukuu.org.uk Cc: peterz@infradead.org Cc: ning.sun@intel.com Cc: tboot-devel@lists.sourceforge.net Cc: andi@firstfloor.org Cc: luto@kernel.org Cc: law@redhat.com Cc: pbonzini@redhat.com Cc: torvalds@linux-foundation.org Cc: gregkh@linux-foundation.org Cc: dwmw@amazon.co.uk Cc: nickc@redhat.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180110224939.2695CD47@viggo.jf.intel.com --- arch/x86/kernel/tboot.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 75869a4..a2486f4 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -127,7 +127,6 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, p4d = p4d_alloc(&tboot_mm, pgd, vaddr); if (!p4d) return -1; - pgd->pgd &= ~_PAGE_NX; pud = pud_alloc(&tboot_mm, p4d, vaddr); if (!pud) return -1; @@ -139,6 +138,17 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, return -1; set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); pte_unmap(pte); + + /* + * PTI poisons low addresses in the kernel page tables in the + * name of making them unusable for userspace. To execute + * code at such a low address, the poison must be cleared. + * + * Note: 'pgd' actually gets set in p4d_alloc() _or_ + * pud_alloc() depending on 4/5-level paging. + */ + pgd->pgd &= ~_PAGE_NX; + return 0; } -- cgit v1.1 From 76b043848fd22dbf7f8bf3a1452f8c70d557b860 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:25 +0000 Subject: x86/retpoline: Add initial retpoline support Enable the use of -mindirect-branch=thunk-extern in newer GCC, and provide the corresponding thunks. Provide assembler macros for invoking the thunks in the same way that GCC does, from native and inline assembler. This adds X86_FEATURE_RETPOLINE and sets it by default on all CPUs. In some circumstances, IBRS microcode features may be used instead, and the retpoline can be disabled. On AMD CPUs if lfence is serialising, the retpoline can be dramatically simplified to a simple "lfence; jmp *\reg". A future patch, after it has been verified that lfence really is serialising in all circumstances, can enable this by setting the X86_FEATURE_RETPOLINE_AMD feature bit in addition to X86_FEATURE_RETPOLINE. Do not align the retpoline in the altinstr section, because there is no guarantee that it stays aligned when it's copied over the oldinstr during alternative patching. [ Andi Kleen: Rename the macros, add CONFIG_RETPOLINE option, export thunks] [ tglx: Put actual function CALL/JMP in front of the macros, convert to symbolic labels ] [ dwmw2: Convert back to numeric labels, merge objtool fixes ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-4-git-send-email-dwmw@amazon.co.uk --- arch/x86/Kconfig | 13 ++++ arch/x86/Makefile | 10 +++ arch/x86/include/asm/asm-prototypes.h | 25 +++++++ arch/x86/include/asm/cpufeatures.h | 2 + arch/x86/include/asm/nospec-branch.h | 128 ++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/common.c | 4 ++ arch/x86/lib/Makefile | 1 + arch/x86/lib/retpoline.S | 48 +++++++++++++ 8 files changed, 231 insertions(+) create mode 100644 arch/x86/include/asm/nospec-branch.h create mode 100644 arch/x86/lib/retpoline.S (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e23d21a..d181916 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -429,6 +429,19 @@ config GOLDFISH def_bool y depends on X86_GOLDFISH +config RETPOLINE + bool "Avoid speculative indirect branches in kernel" + default y + help + Compile kernel with the retpoline compiler options to guard against + kernel-to-user data leaks by avoiding speculative indirect + branches. Requires a compiler with -mindirect-branch=thunk-extern + support for full protection. The kernel may run slower. + + Without compiler support, at least indirect branches in assembler + code are eliminated. Since this includes the syscall entry path, + it is not entirely pointless. + config INTEL_RDT bool "Intel Resource Director Technology support" default n diff --git a/arch/x86/Makefile b/arch/x86/Makefile index a20eacd..974c618 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -235,6 +235,16 @@ KBUILD_CFLAGS += -Wno-sign-compare # KBUILD_CFLAGS += -fno-asynchronous-unwind-tables +# Avoid indirect branches in kernel to deal with Spectre +ifdef CONFIG_RETPOLINE + RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) + ifneq ($(RETPOLINE_CFLAGS),) + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE + else + $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.) + endif +endif + archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/x86/tools relocs diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index ff700d8..0927cdc 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -11,7 +11,32 @@ #include #include #include +#include #ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); #endif + +#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_X86_32 +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void); +#else +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void); +INDIRECT_THUNK(8) +INDIRECT_THUNK(9) +INDIRECT_THUNK(10) +INDIRECT_THUNK(11) +INDIRECT_THUNK(12) +INDIRECT_THUNK(13) +INDIRECT_THUNK(14) +INDIRECT_THUNK(15) +#endif +INDIRECT_THUNK(ax) +INDIRECT_THUNK(bx) +INDIRECT_THUNK(cx) +INDIRECT_THUNK(dx) +INDIRECT_THUNK(si) +INDIRECT_THUNK(di) +INDIRECT_THUNK(bp) +INDIRECT_THUNK(sp) +#endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 1641c2f..f275447 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -203,6 +203,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h new file mode 100644 index 0000000..e20e92e --- /dev/null +++ b/arch/x86/include/asm/nospec-branch.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __NOSPEC_BRANCH_H__ +#define __NOSPEC_BRANCH_H__ + +#include +#include +#include + +#ifdef __ASSEMBLY__ + +/* + * This should be used immediately before a retpoline alternative. It tells + * objtool where the retpolines are so that it can make sense of the control + * flow by just reading the original instruction(s) and ignoring the + * alternatives. + */ +.macro ANNOTATE_NOSPEC_ALTERNATIVE + .Lannotate_\@: + .pushsection .discard.nospec + .long .Lannotate_\@ - . + .popsection +.endm + +/* + * These are the bare retpoline primitives for indirect jmp and call. + * Do not use these directly; they only exist to make the ALTERNATIVE + * invocation below less ugly. + */ +.macro RETPOLINE_JMP reg:req + call .Ldo_rop_\@ +.Lspec_trap_\@: + pause + jmp .Lspec_trap_\@ +.Ldo_rop_\@: + mov \reg, (%_ASM_SP) + ret +.endm + +/* + * This is a wrapper around RETPOLINE_JMP so the called function in reg + * returns to the instruction after the macro. + */ +.macro RETPOLINE_CALL reg:req + jmp .Ldo_call_\@ +.Ldo_retpoline_jmp_\@: + RETPOLINE_JMP \reg +.Ldo_call_\@: + call .Ldo_retpoline_jmp_\@ +.endm + +/* + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple + * indirect jmp/call which may be susceptible to the Spectre variant 2 + * attack. + */ +.macro JMP_NOSPEC reg:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE_2 __stringify(jmp *\reg), \ + __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD +#else + jmp *\reg +#endif +.endm + +.macro CALL_NOSPEC reg:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE_2 __stringify(call *\reg), \ + __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ + __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD +#else + call *\reg +#endif +.endm + +#else /* __ASSEMBLY__ */ + +#define ANNOTATE_NOSPEC_ALTERNATIVE \ + "999:\n\t" \ + ".pushsection .discard.nospec\n\t" \ + ".long 999b - .\n\t" \ + ".popsection\n\t" + +#if defined(CONFIG_X86_64) && defined(RETPOLINE) + +/* + * Since the inline asm uses the %V modifier which is only in newer GCC, + * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE. + */ +# define CALL_NOSPEC \ + ANNOTATE_NOSPEC_ALTERNATIVE \ + ALTERNATIVE( \ + "call *%[thunk_target]\n", \ + "call __x86_indirect_thunk_%V[thunk_target]\n", \ + X86_FEATURE_RETPOLINE) +# define THUNK_TARGET(addr) [thunk_target] "r" (addr) + +#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) +/* + * For i386 we use the original ret-equivalent retpoline, because + * otherwise we'll run out of registers. We don't care about CET + * here, anyway. + */ +# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \ + " jmp 904f;\n" \ + " .align 16\n" \ + "901: call 903f;\n" \ + "902: pause;\n" \ + " jmp 902b;\n" \ + " .align 16\n" \ + "903: addl $4, %%esp;\n" \ + " pushl %[thunk_target];\n" \ + " ret;\n" \ + " .align 16\n" \ + "904: call 901b;\n", \ + X86_FEATURE_RETPOLINE) + +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#else /* No retpoline */ +# define CALL_NOSPEC "call *%[thunk_target]\n" +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 372ba3f..7a671d1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -905,6 +905,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); +#ifdef CONFIG_RETPOLINE + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); +#endif + fpu__init_system(c); #ifdef CONFIG_X86_32 diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 457f681..d435c89 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +lib-$(CONFIG_RETPOLINE) += retpoline.o obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S new file mode 100644 index 0000000..cb45c6c --- /dev/null +++ b/arch/x86/lib/retpoline.S @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include +#include +#include +#include + +.macro THUNK reg + .section .text.__x86.indirect_thunk.\reg + +ENTRY(__x86_indirect_thunk_\reg) + CFI_STARTPROC + JMP_NOSPEC %\reg + CFI_ENDPROC +ENDPROC(__x86_indirect_thunk_\reg) +.endm + +/* + * Despite being an assembler file we can't just use .irp here + * because __KSYM_DEPS__ only uses the C preprocessor and would + * only see one instance of "__x86_indirect_thunk_\reg" rather + * than one per register with the correct names. So we do it + * the simple and nasty way... + */ +#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg) +#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) + +GENERATE_THUNK(_ASM_AX) +GENERATE_THUNK(_ASM_BX) +GENERATE_THUNK(_ASM_CX) +GENERATE_THUNK(_ASM_DX) +GENERATE_THUNK(_ASM_SI) +GENERATE_THUNK(_ASM_DI) +GENERATE_THUNK(_ASM_BP) +GENERATE_THUNK(_ASM_SP) +#ifdef CONFIG_64BIT +GENERATE_THUNK(r8) +GENERATE_THUNK(r9) +GENERATE_THUNK(r10) +GENERATE_THUNK(r11) +GENERATE_THUNK(r12) +GENERATE_THUNK(r13) +GENERATE_THUNK(r14) +GENERATE_THUNK(r15) +#endif -- cgit v1.1 From da285121560e769cc31797bba6422eea71d473e0 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:26 +0000 Subject: x86/spectre: Add boot time option to select Spectre v2 mitigation Add a spectre_v2= option to select the mitigation used for the indirect branch speculation vulnerability. Currently, the only option available is retpoline, in its various forms. This will be expanded to cover the new IBRS/IBPB microcode features. The RETPOLINE_AMD feature relies on a serializing LFENCE for speculation control. For AMD hardware, only set RETPOLINE_AMD if LFENCE is a serializing instruction, which is indicated by the LFENCE_RDTSC feature. [ tglx: Folded back the LFENCE/AMD fixes and reworked it so IBRS integration becomes simple ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-5-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/nospec-branch.h | 10 +++ arch/x86/kernel/cpu/bugs.c | 158 ++++++++++++++++++++++++++++++++++- arch/x86/kernel/cpu/common.c | 4 - 3 files changed, 167 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index e20e92e..ea034fa 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -124,5 +124,15 @@ # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #endif +/* The Spectre V2 mitigation variants */ +enum spectre_v2_mitigation { + SPECTRE_V2_NONE, + SPECTRE_V2_RETPOLINE_MINIMAL, + SPECTRE_V2_RETPOLINE_MINIMAL_AMD, + SPECTRE_V2_RETPOLINE_GENERIC, + SPECTRE_V2_RETPOLINE_AMD, + SPECTRE_V2_IBRS, +}; + #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 76ad6cb..e4dc261 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -11,6 +11,9 @@ #include #include #include + +#include +#include #include #include #include @@ -21,6 +24,8 @@ #include #include +static void __init spectre_v2_select_mitigation(void); + void __init check_bugs(void) { identify_boot_cpu(); @@ -30,6 +35,9 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); } + /* Select the proper spectre mitigation before patching alternatives */ + spectre_v2_select_mitigation(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -62,6 +70,153 @@ void __init check_bugs(void) #endif } +/* The kernel command line selection */ +enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_NONE, + SPECTRE_V2_CMD_AUTO, + SPECTRE_V2_CMD_FORCE, + SPECTRE_V2_CMD_RETPOLINE, + SPECTRE_V2_CMD_RETPOLINE_GENERIC, + SPECTRE_V2_CMD_RETPOLINE_AMD, +}; + +static const char *spectre_v2_strings[] = { + [SPECTRE_V2_NONE] = "Vulnerable", + [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", + [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", +}; + +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt + +static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; + +static void __init spec2_print_if_insecure(const char *reason) +{ + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s\n", reason); +} + +static void __init spec2_print_if_secure(const char *reason) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s\n", reason); +} + +static inline bool retp_compiler(void) +{ + return __is_defined(RETPOLINE); +} + +static inline bool match_option(const char *arg, int arglen, const char *opt) +{ + int len = strlen(opt); + + return len == arglen && !strncmp(arg, opt, len); +} + +static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) +{ + char arg[20]; + int ret; + + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, + sizeof(arg)); + if (ret > 0) { + if (match_option(arg, ret, "off")) { + goto disable; + } else if (match_option(arg, ret, "on")) { + spec2_print_if_secure("force enabled on command line."); + return SPECTRE_V2_CMD_FORCE; + } else if (match_option(arg, ret, "retpoline")) { + spec2_print_if_insecure("retpoline selected on command line."); + return SPECTRE_V2_CMD_RETPOLINE; + } else if (match_option(arg, ret, "retpoline,amd")) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); + return SPECTRE_V2_CMD_AUTO; + } + spec2_print_if_insecure("AMD retpoline selected on command line."); + return SPECTRE_V2_CMD_RETPOLINE_AMD; + } else if (match_option(arg, ret, "retpoline,generic")) { + spec2_print_if_insecure("generic retpoline selected on command line."); + return SPECTRE_V2_CMD_RETPOLINE_GENERIC; + } else if (match_option(arg, ret, "auto")) { + return SPECTRE_V2_CMD_AUTO; + } + } + + if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + return SPECTRE_V2_CMD_AUTO; +disable: + spec2_print_if_insecure("disabled on command line."); + return SPECTRE_V2_CMD_NONE; +} + +static void __init spectre_v2_select_mitigation(void) +{ + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); + enum spectre_v2_mitigation mode = SPECTRE_V2_NONE; + + /* + * If the CPU is not affected and the command line mode is NONE or AUTO + * then nothing to do. + */ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) && + (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO)) + return; + + switch (cmd) { + case SPECTRE_V2_CMD_NONE: + return; + + case SPECTRE_V2_CMD_FORCE: + /* FALLTRHU */ + case SPECTRE_V2_CMD_AUTO: + goto retpoline_auto; + + case SPECTRE_V2_CMD_RETPOLINE_AMD: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_amd; + break; + case SPECTRE_V2_CMD_RETPOLINE_GENERIC: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_generic; + break; + case SPECTRE_V2_CMD_RETPOLINE: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_auto; + break; + } + pr_err("kernel not compiled with retpoline; no mitigation available!"); + return; + +retpoline_auto: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + retpoline_amd: + if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { + pr_err("LFENCE not serializing. Switching to generic retpoline\n"); + goto retpoline_generic; + } + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : + SPECTRE_V2_RETPOLINE_MINIMAL_AMD; + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + } else { + retpoline_generic: + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC : + SPECTRE_V2_RETPOLINE_MINIMAL; + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + } + + spectre_v2_enabled = mode; + pr_info("%s\n", spectre_v2_strings[mode]); +} + +#undef pr_fmt + #ifdef CONFIG_SYSFS ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) @@ -86,6 +241,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Vulnerable\n"); + + return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); } #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7a671d1..372ba3f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -905,10 +905,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); -#ifdef CONFIG_RETPOLINE - setup_force_cpu_cap(X86_FEATURE_RETPOLINE); -#endif - fpu__init_system(c); #ifdef CONFIG_X86_32 -- cgit v1.1 From 9697fa39efd3fc3692f2949d4045f393ec58450b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:27 +0000 Subject: x86/retpoline/crypto: Convert crypto assembler indirect jumps Convert all indirect jumps in crypto assembler code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-6-git-send-email-dwmw@amazon.co.uk --- arch/x86/crypto/aesni-intel_asm.S | 5 +++-- arch/x86/crypto/camellia-aesni-avx-asm_64.S | 3 ++- arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 3 ++- arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 16627fe..3d09e3a 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -32,6 +32,7 @@ #include #include #include +#include /* * The following macros are used to move an (un)aligned 16 byte value to/from @@ -2884,7 +2885,7 @@ ENTRY(aesni_xts_crypt8) pxor INC, STATE4 movdqu IV, 0x30(OUTP) - call *%r11 + CALL_NOSPEC %r11 movdqu 0x00(OUTP), INC pxor INC, STATE1 @@ -2929,7 +2930,7 @@ ENTRY(aesni_xts_crypt8) _aesni_gf128mul_x_ble() movups IV, (IVP) - call *%r11 + CALL_NOSPEC %r11 movdqu 0x40(OUTP), INC pxor INC, STATE1 diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index f7c495e..a14af6e 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -17,6 +17,7 @@ #include #include +#include #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -1227,7 +1228,7 @@ camellia_xts_crypt_16way: vpxor 14 * 16(%rax), %xmm15, %xmm14; vpxor 15 * 16(%rax), %xmm15, %xmm15; - call *%r9; + CALL_NOSPEC %r9; addq $(16 * 16), %rsp; diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index eee5b39..b66bbfa 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -12,6 +12,7 @@ #include #include +#include #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -1343,7 +1344,7 @@ camellia_xts_crypt_32way: vpxor 14 * 32(%rax), %ymm15, %ymm14; vpxor 15 * 32(%rax), %ymm15, %ymm15; - call *%r9; + CALL_NOSPEC %r9; addq $(16 * 32), %rsp; diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 7a7de27..d9b734d 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -45,6 +45,7 @@ #include #include +#include ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction @@ -172,7 +173,7 @@ continue_block: movzxw (bufp, %rax, 2), len lea crc_array(%rip), bufp lea (bufp, len, 1), bufp - jmp *bufp + JMP_NOSPEC bufp ################################################################ ## 2a) PROCESS FULL BLOCKS: -- cgit v1.1 From 2641f08bb7fc63a636a2b18173221d7040a3512e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:28 +0000 Subject: x86/retpoline/entry: Convert entry assembler indirect jumps Convert indirect jumps in core 32/64bit entry assembler code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Don't use CALL_NOSPEC in entry_SYSCALL_64_fastpath because the return address after the 'call' instruction must be *precisely* at the .Lentry_SYSCALL_64_after_fastpath label for stub_ptregs_64 to work, and the use of alternatives will mess that up unless we play horrid games to prepend with NOPs and make the variants the same length. It's not worth it; in the case where we ALTERNATIVE out the retpoline, the first instruction at __x86.indirect_thunk.rax is going to be a bare jmp *%rax anyway. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Acked-by: Arjan van de Ven Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-7-git-send-email-dwmw@amazon.co.uk --- arch/x86/entry/entry_32.S | 5 +++-- arch/x86/entry/entry_64.S | 12 +++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index ace8f32..a1f28a5 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -44,6 +44,7 @@ #include #include #include +#include .section .entry.text, "ax" @@ -290,7 +291,7 @@ ENTRY(ret_from_fork) /* kernel thread */ 1: movl %edi, %eax - call *%ebx + CALL_NOSPEC %ebx /* * A kernel thread is allowed to return here after successfully * calling do_execve(). Exit to userspace to complete the execve() @@ -919,7 +920,7 @@ common_exception: movl %ecx, %es TRACE_IRQS_OFF movl %esp, %eax # pt_regs pointer - call *%edi + CALL_NOSPEC %edi jmp ret_from_exception END(common_exception) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ed31d00..59874bc 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -37,6 +37,7 @@ #include #include #include +#include #include #include "calling.h" @@ -187,7 +188,7 @@ ENTRY(entry_SYSCALL_64_trampoline) */ pushq %rdi movq $entry_SYSCALL_64_stage2, %rdi - jmp *%rdi + JMP_NOSPEC %rdi END(entry_SYSCALL_64_trampoline) .popsection @@ -266,7 +267,12 @@ entry_SYSCALL_64_fastpath: * It might end up jumping to the slow path. If it jumps, RAX * and all argument registers are clobbered. */ +#ifdef CONFIG_RETPOLINE + movq sys_call_table(, %rax, 8), %rax + call __x86_indirect_thunk_rax +#else call *sys_call_table(, %rax, 8) +#endif .Lentry_SYSCALL_64_after_fastpath_call: movq %rax, RAX(%rsp) @@ -438,7 +444,7 @@ ENTRY(stub_ptregs_64) jmp entry_SYSCALL64_slow_path 1: - jmp *%rax /* Called from C */ + JMP_NOSPEC %rax /* Called from C */ END(stub_ptregs_64) .macro ptregs_stub func @@ -517,7 +523,7 @@ ENTRY(ret_from_fork) 1: /* kernel thread */ movq %r12, %rdi - call *%rbx + CALL_NOSPEC %rbx /* * A kernel thread is allowed to return here after successfully * calling do_execve(). Exit to userspace to complete the execve() -- cgit v1.1 From 9351803bd803cdbeb9b5a7850b7b6f464806e3db Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:29 +0000 Subject: x86/retpoline/ftrace: Convert ftrace assembler indirect jumps Convert all indirect jumps in ftrace assembler code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-8-git-send-email-dwmw@amazon.co.uk --- arch/x86/kernel/ftrace_32.S | 6 ++++-- arch/x86/kernel/ftrace_64.S | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index b6c6468..4c8440d 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef CC_USING_FENTRY # define function_hook __fentry__ @@ -197,7 +198,8 @@ ftrace_stub: movl 0x4(%ebp), %edx subl $MCOUNT_INSN_SIZE, %eax - call *ftrace_trace_function + movl ftrace_trace_function, %ecx + CALL_NOSPEC %ecx popl %edx popl %ecx @@ -241,5 +243,5 @@ return_to_handler: movl %eax, %ecx popl %edx popl %eax - jmp *%ecx + JMP_NOSPEC %ecx #endif diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index c832291..7cb8ba0 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -7,7 +7,7 @@ #include #include #include - +#include .code64 .section .entry.text, "ax" @@ -286,8 +286,8 @@ trace: * ip and parent ip are used and the list function is called when * function tracing is enabled. */ - call *ftrace_trace_function - + movq ftrace_trace_function, %r8 + CALL_NOSPEC %r8 restore_mcount_regs jmp fgraph_trace @@ -329,5 +329,5 @@ GLOBAL(return_to_handler) movq 8(%rsp), %rdx movq (%rsp), %rax addq $24, %rsp - jmp *%rdi + JMP_NOSPEC %rdi #endif -- cgit v1.1 From e70e5892b28c18f517f29ab6e83bd57705104b31 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:30 +0000 Subject: x86/retpoline/hyperv: Convert assembler indirect jumps Convert all indirect jumps in hyperv inline asm code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-9-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/mshyperv.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 581bb54..5119e4b 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -7,6 +7,7 @@ #include #include #include +#include /* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent @@ -186,10 +187,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) return U64_MAX; __asm__ __volatile__("mov %4, %%r8\n" - "call *%5" + CALL_NOSPEC : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input_address) - : "r" (output_address), "m" (hv_hypercall_pg) + : "r" (output_address), + THUNK_TARGET(hv_hypercall_pg) : "cc", "memory", "r8", "r9", "r10", "r11"); #else u32 input_address_hi = upper_32_bits(input_address); @@ -200,13 +202,13 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) if (!hv_hypercall_pg) return U64_MAX; - __asm__ __volatile__("call *%7" + __asm__ __volatile__(CALL_NOSPEC : "=A" (hv_status), "+c" (input_address_lo), ASM_CALL_CONSTRAINT : "A" (control), "b" (input_address_hi), "D"(output_address_hi), "S"(output_address_lo), - "m" (hv_hypercall_pg) + THUNK_TARGET(hv_hypercall_pg) : "cc", "memory"); #endif /* !x86_64 */ return hv_status; @@ -227,10 +229,10 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) #ifdef CONFIG_X86_64 { - __asm__ __volatile__("call *%4" + __asm__ __volatile__(CALL_NOSPEC : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input1) - : "m" (hv_hypercall_pg) + : THUNK_TARGET(hv_hypercall_pg) : "cc", "r8", "r9", "r10", "r11"); } #else @@ -238,13 +240,13 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) u32 input1_hi = upper_32_bits(input1); u32 input1_lo = lower_32_bits(input1); - __asm__ __volatile__ ("call *%5" + __asm__ __volatile__ (CALL_NOSPEC : "=A"(hv_status), "+c"(input1_lo), ASM_CALL_CONSTRAINT : "A" (control), "b" (input1_hi), - "m" (hv_hypercall_pg) + THUNK_TARGET(hv_hypercall_pg) : "cc", "edi", "esi"); } #endif -- cgit v1.1 From ea08816d5b185ab3d09e95e393f265af54560350 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:31 +0000 Subject: x86/retpoline/xen: Convert Xen hypercall indirect jumps Convert indirect call in Xen hypercall to use non-speculative sequence, when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Reviewed-by: Juergen Gross Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-10-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/xen/hypercall.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 7cb282e..bfd8826 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -217,9 +218,9 @@ privcmd_call(unsigned call, __HYPERCALL_5ARG(a1, a2, a3, a4, a5); stac(); - asm volatile("call *%[call]" + asm volatile(CALL_NOSPEC : __HYPERCALL_5PARAM - : [call] "a" (&hypercall_page[call]) + : [thunk_target] "a" (&hypercall_page[call]) : __HYPERCALL_CLOBBER5); clac(); -- cgit v1.1 From 5096732f6f695001fa2d6f1335a2680b37912c69 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:32 +0000 Subject: x86/retpoline/checksum32: Convert assembler indirect jumps Convert all indirect jumps in 32bit checksum assembler code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-11-git-send-email-dwmw@amazon.co.uk --- arch/x86/lib/checksum_32.S | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 4d34bb5..46e71a7 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -29,7 +29,8 @@ #include #include #include - +#include + /* * computes a partial checksum, e.g. for TCP/UDP fragments */ @@ -156,7 +157,7 @@ ENTRY(csum_partial) negl %ebx lea 45f(%ebx,%ebx,2), %ebx testl %esi, %esi - jmp *%ebx + JMP_NOSPEC %ebx # Handle 2-byte-aligned regions 20: addw (%esi), %ax @@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic) andl $-32,%edx lea 3f(%ebx,%ebx), %ebx testl %esi, %esi - jmp *%ebx + JMP_NOSPEC %ebx 1: addl $64,%esi addl $64,%edi SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) -- cgit v1.1 From 7614e913db1f40fff819b36216484dc3808995d4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 11 Jan 2018 21:46:33 +0000 Subject: x86/retpoline/irq32: Convert assembler indirect jumps Convert all indirect jumps in 32bit irq inline asm code to use non speculative sequences. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-12-git-send-email-dwmw@amazon.co.uk --- arch/x86/kernel/irq_32.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index a83b334..c1bdbd3 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -20,6 +20,7 @@ #include #include +#include #ifdef CONFIG_DEBUG_STACKOVERFLOW @@ -55,11 +56,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack); static void call_on_stack(void *func, void *stack) { asm volatile("xchgl %%ebx,%%esp \n" - "call *%%edi \n" + CALL_NOSPEC "movl %%ebx,%%esp \n" : "=b" (stack) : "0" (stack), - "D"(func) + [thunk_target] "D"(func) : "memory", "cc", "edx", "ecx", "eax"); } @@ -95,11 +96,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) call_on_stack(print_stack_overflow, isp); asm volatile("xchgl %%ebx,%%esp \n" - "call *%%edi \n" + CALL_NOSPEC "movl %%ebx,%%esp \n" : "=a" (arg1), "=b" (isp) : "0" (desc), "1" (isp), - "D" (desc->handle_irq) + [thunk_target] "D" (desc->handle_irq) : "memory", "cc", "ecx"); return 1; } -- cgit v1.1 From 117cc7a908c83697b0b737d15ae1eb5943afe35b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 12 Jan 2018 11:11:27 +0000 Subject: x86/retpoline: Fill return stack buffer on vmexit In accordance with the Intel and AMD documentation, we need to overwrite all entries in the RSB on exiting a guest, to prevent malicious branch target predictions from affecting the host kernel. This is needed both for retpoline and for IBRS. [ak: numbers again for the RSB stuffing labels] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Tested-by: Peter Zijlstra (Intel) Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515755487-8524-1-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/nospec-branch.h | 78 +++++++++++++++++++++++++++++++++++- arch/x86/kvm/svm.c | 4 ++ arch/x86/kvm/vmx.c | 4 ++ 3 files changed, 85 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index ea034fa..402a11c 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -7,6 +7,48 @@ #include #include +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * We define a CPP macro such that it can be used from both .S files and + * inline assembly. It's possible to do a .macro and then include that + * from C via asm(".include ") but let's not go there. + */ + +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ + +/* + * Google experimented with loop-unrolling and this turned out to be + * the optimal version — two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +#define __FILL_RETURN_BUFFER(reg, nr, sp) \ + mov $(nr/2), reg; \ +771: \ + call 772f; \ +773: /* speculation trap */ \ + pause; \ + jmp 773b; \ +772: \ + call 774f; \ +775: /* speculation trap */ \ + pause; \ + jmp 775b; \ +774: \ + dec reg; \ + jnz 771b; \ + add $(BITS_PER_LONG/8) * nr, sp; + #ifdef __ASSEMBLY__ /* @@ -76,6 +118,20 @@ #endif .endm + /* + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP + * monstrosity above, manually. + */ +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE "jmp .Lskip_rsb_\@", \ + __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ + \ftr +.Lskip_rsb_\@: +#endif +.endm + #else /* __ASSEMBLY__ */ #define ANNOTATE_NOSPEC_ALTERNATIVE \ @@ -119,7 +175,7 @@ X86_FEATURE_RETPOLINE) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) -#else /* No retpoline */ +#else /* No retpoline for C / inline asm */ # define CALL_NOSPEC "call *%[thunk_target]\n" # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #endif @@ -134,5 +190,25 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; +/* + * On VMEXIT we must ensure that no RSB predictions learned in the guest + * can be followed in the host, by overwriting the RSB completely. Both + * retpoline and IBRS mitigations for Spectre v2 need this; only on future + * CPUs with IBRS_ATT *might* it be avoided. + */ +static inline void vmexit_fill_RSB(void) +{ +#ifdef CONFIG_RETPOLINE + unsigned long loops = RSB_CLEAR_LOOPS / 2; + + asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE("jmp 910f", + __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), + X86_FEATURE_RETPOLINE) + "910:" + : "=&r" (loops), ASM_CALL_CONSTRAINT + : "r" (loops) : "memory" ); +#endif +} #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0e68f0b..2744b973 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include "trace.h" @@ -4985,6 +4986,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + #ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, svm->host.gs_base); #else diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 62ee436..d1e25db 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "trace.h" #include "pmu.h" @@ -9403,6 +9404,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ if (debugctlmsr) update_debugctlmsr(debugctlmsr); -- cgit v1.1 From f10ee3dcc9f0aba92a5c4c064628be5200765dc2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Jan 2018 00:23:57 +0100 Subject: x86/pti: Fix !PCID and sanitize defines The switch to the user space page tables in the low level ASM code sets unconditionally bit 12 and bit 11 of CR3. Bit 12 is switching the base address of the page directory to the user part, bit 11 is switching the PCID to the PCID associated with the user page tables. This fails on a machine which lacks PCID support because bit 11 is set in CR3. Bit 11 is reserved when PCID is inactive. While the Intel SDM claims that the reserved bits are ignored when PCID is disabled, the AMD APM states that they should be cleared. This went unnoticed as the AMD APM was not checked when the code was developed and reviewed and test systems with Intel CPUs never failed to boot. The report is against a Centos 6 host where the guest fails to boot, so it's not yet clear whether this is a virt issue or can happen on real hardware too, but thats irrelevant as the AMD APM clearly ask for clearing the reserved bits. Make sure that on non PCID machines bit 11 is not set by the page table switching code. Andy suggested to rename the related bits and masks so they are clearly describing what they should be used for, which is done as well for clarity. That split could have been done with alternatives but the macro hell is horrible and ugly. This can be done on top if someone cares to remove the extra orq. For now it's a straight forward fix. Fixes: 6fd166aae78c ("x86/mm: Use/Fix PCID to optimize user/kernel switches") Reported-by: Laura Abbott Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: stable Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Willy Tarreau Cc: David Woodhouse Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801140009150.2371@nanos --- arch/x86/entry/calling.h | 36 ++++++++++++++++++---------------- arch/x86/include/asm/processor-flags.h | 2 +- arch/x86/include/asm/tlbflush.h | 6 +++--- 3 files changed, 23 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 45a63e0..3f48f69 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -198,8 +198,11 @@ For 32-bit we have the following conventions - kernel is built with * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two * halves: */ -#define PTI_SWITCH_PGTABLES_MASK (1<= (1 << X86_CR3_PTI_SWITCH_BIT)); + BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT)); /* * The ASID being passed in here should have respected the * MAX_ASID_AVAILABLE and thus never have the switch bit set. */ - VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT)); + VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT)); #endif /* * The dynamically-assigned ASIDs that get passed in are small @@ -112,7 +112,7 @@ static inline u16 user_pcid(u16 asid) { u16 ret = kern_pcid(asid); #ifdef CONFIG_PAGE_TABLE_ISOLATION - ret |= 1 << X86_CR3_PTI_SWITCH_BIT; + ret |= 1 << X86_CR3_PTI_PCID_USER_BIT; #endif return ret; } -- cgit v1.1 From 99a9dc98ba52267ce5e062b52de88ea1f1b2a7d8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 14 Jan 2018 11:27:13 +0100 Subject: x86,perf: Disable intel_bts when PTI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intel_bts driver does not use the 'normal' BTS buffer which is exposed through the cpu_entry_area but instead uses the memory allocated for the perf AUX buffer. This obviously comes apart when using PTI because then the kernel mapping; which includes that AUX buffer memory; disappears. Fixing this requires to expose a mapping which is visible in all context and that's not trivial. As a quick fix disable this driver when PTI is enabled to prevent malfunction. Fixes: 385ce0ea4c07 ("x86/mm/pti: Add Kconfig") Reported-by: Vince Weaver Reported-by: Robert Święcki Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Alexander Shishkin Cc: greg@kroah.com Cc: hughd@google.com Cc: luto@amacapital.net Cc: Vince Weaver Cc: torvalds@linux-foundation.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180114102713.GB6166@worktop.programming.kicks-ass.net --- arch/x86/events/intel/bts.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 141e07b..24ffa1e 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -582,6 +582,24 @@ static __init int bts_init(void) if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) return -ENODEV; + if (boot_cpu_has(X86_FEATURE_PTI)) { + /* + * BTS hardware writes through a virtual memory map we must + * either use the kernel physical map, or the user mapping of + * the AUX buffer. + * + * However, since this driver supports per-CPU and per-task inherit + * we cannot use the user mapping since it will not be availble + * if we're not running the owning process. + * + * With PTI we can't use the kernal map either, because its not + * there when we run userspace. + * + * For now, disable this driver when using PTI. + */ + return -ENODEV; + } + bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE | PERF_PMU_CAP_EXCLUSIVE; bts_pmu.task_ctx_nr = perf_sw_context; -- cgit v1.1