From 21cdb6b568435738cc0b303b2b3b82742396310c Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 27 Nov 2015 21:09:30 +0000 Subject: x86/mm: Page align the '_end' symbol to avoid pfn conversion bugs Ingo noted that if we can guarantee _end is aligned to PAGE_SIZE we can automatically avoid bugs along the lines of, size = _end - _text >> PAGE_SHIFT which is missing a call to PFN_ALIGN(). The EFI mixed mode contains this bug, for example. _text is already aligned to PAGE_SIZE through the use of LOAD_PHYSICAL_ADDR, and the BSS and BRK sections are explicitly aligned in the linker script, so it makes sense to align _end to match. Reported-by: Ingo Molnar Signed-off-by: Matt Fleming Acked-by: Borislav Petkov Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1448658575-17029-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 74e4bf1..4f19942 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -325,6 +325,7 @@ SECTIONS __brk_limit = .; } + . = ALIGN(PAGE_SIZE); _end = .; STABS_DEBUG -- cgit v1.1 From edc3b9129cecd0f0857112136f5b8b1bc1d45918 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 27 Nov 2015 21:09:31 +0000 Subject: x86/mm/pat: Ensure cpa->pfn only contains page frame numbers The x86 pageattr code is confused about the data that is stored in cpa->pfn, sometimes it's treated as a page frame number, sometimes it's treated as an unshifted physical address, and in one place it's treated as a pte. The result of this is that the mapping functions do not map the intended physical address. This isn't a problem in practice because most of the addresses we're mapping in the EFI code paths are already mapped in 'trampoline_pgd' and so the pageattr mapping functions don't actually do anything in this case. But when we move to using a separate page table for the EFI runtime this will be an issue. Signed-off-by: Matt Fleming Reviewed-by: Borislav Petkov Acked-by: Borislav Petkov Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1448658575-17029-3-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 17 ++++++----------- arch/x86/platform/efi/efi_64.c | 16 ++++++++++------ 2 files changed, 16 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index a3137a4..c70e420 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -905,15 +905,10 @@ static void populate_pte(struct cpa_data *cpa, pte = pte_offset_kernel(pmd, start); while (num_pages-- && start < end) { - - /* deal with the NX bit */ - if (!(pgprot_val(pgprot) & _PAGE_NX)) - cpa->pfn &= ~_PAGE_NX; - - set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot)); + set_pte(pte, pfn_pte(cpa->pfn, pgprot)); start += PAGE_SIZE; - cpa->pfn += PAGE_SIZE; + cpa->pfn++; pte++; } } @@ -969,11 +964,11 @@ static int populate_pmd(struct cpa_data *cpa, pmd = pmd_offset(pud, start); - set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | + set_pmd(pmd, __pmd(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | massage_pgprot(pmd_pgprot))); start += PMD_SIZE; - cpa->pfn += PMD_SIZE; + cpa->pfn += PMD_SIZE >> PAGE_SHIFT; cur_pages += PMD_SIZE >> PAGE_SHIFT; } @@ -1042,11 +1037,11 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, * Map everything starting from the Gb boundary, possibly with 1G pages */ while (end - start >= PUD_SIZE) { - set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | + set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | massage_pgprot(pud_pgprot))); start += PUD_SIZE; - cpa->pfn += PUD_SIZE; + cpa->pfn += PUD_SIZE >> PAGE_SHIFT; cur_pages += PUD_SIZE >> PAGE_SHIFT; pud++; } diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index a0ac0f9..5aa186d 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -143,7 +143,7 @@ void efi_sync_low_kernel_mappings(void) int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - unsigned long text; + unsigned long pfn, text; struct page *page; unsigned npages; pgd_t *pgd; @@ -160,7 +160,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * and ident-map those pages containing the map before calling * phys_efi_set_virtual_address_map(). */ - if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) { + pfn = pa_memmap >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX)) { pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); return 1; } @@ -185,8 +186,9 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) npages = (_end - _text) >> PAGE_SHIFT; text = __pa(_text); + pfn = text >> PAGE_SHIFT; - if (kernel_map_pages_in_pgd(pgd, text >> PAGE_SHIFT, text, npages, 0)) { + if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, 0)) { pr_err("Failed to map kernel text 1:1\n"); return 1; } @@ -204,12 +206,14 @@ void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) static void __init __map_region(efi_memory_desc_t *md, u64 va) { pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); - unsigned long pf = 0; + unsigned long flags = 0; + unsigned long pfn; if (!(md->attribute & EFI_MEMORY_WB)) - pf |= _PAGE_PCD; + flags |= _PAGE_PCD; - if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf)) + pfn = md->phys_addr >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, va, md->num_pages, flags)) pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", md->phys_addr, va); } -- cgit v1.1 From b61a76f8850d2979550abc42d7e09154ebb8d785 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 27 Nov 2015 21:09:32 +0000 Subject: x86/efi: Map RAM into the identity page table for mixed mode We are relying on the pre-existing mappings in 'trampoline_pgd' when accessing function arguments in the EFI mixed mode thunking code. Instead let's map memory explicitly so that things will continue to work when we move to a separate page table in the future. Signed-off-by: Matt Fleming Reviewed-by: Borislav Petkov Acked-by: Borislav Petkov Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1448658575-17029-4-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 5aa186d..102976d 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -144,6 +144,7 @@ void efi_sync_low_kernel_mappings(void) int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { unsigned long pfn, text; + efi_memory_desc_t *md; struct page *page; unsigned npages; pgd_t *pgd; @@ -177,6 +178,25 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) if (!IS_ENABLED(CONFIG_EFI_MIXED)) return 0; + /* + * Map all of RAM so that we can access arguments in the 1:1 + * mapping when making EFI runtime calls. + */ + for_each_efi_memory_desc(&memmap, md) { + if (md->type != EFI_CONVENTIONAL_MEMORY && + md->type != EFI_LOADER_DATA && + md->type != EFI_LOADER_CODE) + continue; + + pfn = md->phys_addr >> PAGE_SHIFT; + npages = md->num_pages; + + if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, npages, 0)) { + pr_err("Failed to map 1:1 memory\n"); + return 1; + } + } + page = alloc_page(GFP_KERNEL|__GFP_DMA32); if (!page) panic("Unable to allocate EFI runtime stack < 4GB\n"); -- cgit v1.1 From c9f2a9a65e4855b74d92cdad688f6ee4a1a323ff Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 27 Nov 2015 21:09:33 +0000 Subject: x86/efi: Hoist page table switching code into efi_call_virt() This change is a prerequisite for pending patches that switch to a dedicated EFI page table, instead of using 'trampoline_pgd' which shares PGD entries with 'swapper_pg_dir'. The pending patches make it impossible to dereference the runtime service function pointer without first switching %cr3. It's true that we now have duplicated switching code in efi_call_virt() and efi_call_phys_{prolog,epilog}() but we are sacrificing code duplication for a little more clarity and the ease of writing the page table switching code in C instead of asm. Signed-off-by: Matt Fleming Reviewed-by: Borislav Petkov Acked-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Jones Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1448658575-17029-5-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 25 +++++++++++++++++++++ arch/x86/platform/efi/efi_64.c | 24 ++++++++++----------- arch/x86/platform/efi/efi_stub_64.S | 43 ------------------------------------- 3 files changed, 36 insertions(+), 56 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 0010c78..347eeac 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -3,6 +3,7 @@ #include #include +#include /* * We map the EFI regions needed for runtime services non-contiguously, @@ -64,6 +65,17 @@ extern u64 asmlinkage efi_call(void *fp, ...); #define efi_call_phys(f, args...) efi_call((f), args) +/* + * Scratch space used for switching the pagetable in the EFI stub + */ +struct efi_scratch { + u64 r15; + u64 prev_cr3; + pgd_t *efi_pgt; + bool use_pgd; + u64 phys_stack; +} __packed; + #define efi_call_virt(f, ...) \ ({ \ efi_status_t __s; \ @@ -71,7 +83,20 @@ extern u64 asmlinkage efi_call(void *fp, ...); efi_sync_low_kernel_mappings(); \ preempt_disable(); \ __kernel_fpu_begin(); \ + \ + if (efi_scratch.use_pgd) { \ + efi_scratch.prev_cr3 = read_cr3(); \ + write_cr3((unsigned long)efi_scratch.efi_pgt); \ + __flush_tlb_all(); \ + } \ + \ __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \ + \ + if (efi_scratch.use_pgd) { \ + write_cr3(efi_scratch.prev_cr3); \ + __flush_tlb_all(); \ + } \ + \ __kernel_fpu_end(); \ preempt_enable(); \ __s; \ diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 102976d..b19cdac 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -47,16 +47,7 @@ */ static u64 efi_va = EFI_VA_START; -/* - * Scratch space used for switching the pagetable in the EFI stub - */ -struct efi_scratch { - u64 r15; - u64 prev_cr3; - pgd_t *efi_pgt; - bool use_pgd; - u64 phys_stack; -} __packed; +struct efi_scratch efi_scratch; static void __init early_code_mapping_set_exec(int executable) { @@ -83,8 +74,11 @@ pgd_t * __init efi_call_phys_prolog(void) int pgd; int n_pgds; - if (!efi_enabled(EFI_OLD_MEMMAP)) - return NULL; + if (!efi_enabled(EFI_OLD_MEMMAP)) { + save_pgd = (pgd_t *)read_cr3(); + write_cr3((unsigned long)efi_scratch.efi_pgt); + goto out; + } early_code_mapping_set_exec(1); @@ -96,6 +90,7 @@ pgd_t * __init efi_call_phys_prolog(void) vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); } +out: __flush_tlb_all(); return save_pgd; @@ -109,8 +104,11 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) int pgd_idx; int nr_pgds; - if (!save_pgd) + if (!efi_enabled(EFI_OLD_MEMMAP)) { + write_cr3((unsigned long)save_pgd); + __flush_tlb_all(); return; + } nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 86d0f9e..32020cb 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -38,41 +38,6 @@ mov %rsi, %cr0; \ mov (%rsp), %rsp - /* stolen from gcc */ - .macro FLUSH_TLB_ALL - movq %r15, efi_scratch(%rip) - movq %r14, efi_scratch+8(%rip) - movq %cr4, %r15 - movq %r15, %r14 - andb $0x7f, %r14b - movq %r14, %cr4 - movq %r15, %cr4 - movq efi_scratch+8(%rip), %r14 - movq efi_scratch(%rip), %r15 - .endm - - .macro SWITCH_PGT - cmpb $0, efi_scratch+24(%rip) - je 1f - movq %r15, efi_scratch(%rip) # r15 - # save previous CR3 - movq %cr3, %r15 - movq %r15, efi_scratch+8(%rip) # prev_cr3 - movq efi_scratch+16(%rip), %r15 # EFI pgt - movq %r15, %cr3 - 1: - .endm - - .macro RESTORE_PGT - cmpb $0, efi_scratch+24(%rip) - je 2f - movq efi_scratch+8(%rip), %r15 - movq %r15, %cr3 - movq efi_scratch(%rip), %r15 - FLUSH_TLB_ALL - 2: - .endm - ENTRY(efi_call) SAVE_XMM mov (%rsp), %rax @@ -83,16 +48,8 @@ ENTRY(efi_call) mov %r8, %r9 mov %rcx, %r8 mov %rsi, %rcx - SWITCH_PGT call *%rdi - RESTORE_PGT addq $48, %rsp RESTORE_XMM ret ENDPROC(efi_call) - - .data -ENTRY(efi_scratch) - .fill 3,8,0 - .byte 0 - .quad 0 -- cgit v1.1 From 67a9108ed4313b85a9c53406d80dc1ae3f8c3e36 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 27 Nov 2015 21:09:34 +0000 Subject: x86/efi: Build our own page table structures With commit e1a58320a38d ("x86/mm: Warn on W^X mappings") all users booting on 64-bit UEFI machines see the following warning, ------------[ cut here ]------------ WARNING: CPU: 7 PID: 1 at arch/x86/mm/dump_pagetables.c:225 note_page+0x5dc/0x780() x86/mm: Found insecure W+X mapping at address ffff88000005f000/0xffff88000005f000 ... x86/mm: Checked W+X mappings: FAILED, 165660 W+X pages found. ... This is caused by mapping EFI regions with RWX permissions. There isn't much we can do to restrict the permissions for these regions due to the way the firmware toolchains mix code and data, but we can at least isolate these mappings so that they do not appear in the regular kernel page tables. In commit d2f7cbe7b26a ("x86/efi: Runtime services virtual mapping") we started using 'trampoline_pgd' to map the EFI regions because there was an existing identity mapping there which we use during the SetVirtualAddressMap() call and for broken firmware that accesses those addresses. But 'trampoline_pgd' shares some PGD entries with 'swapper_pg_dir' and does not provide the isolation we require. Notably the virtual address for __START_KERNEL_map and MODULES_START are mapped by the same PGD entry so we need to be more careful when copying changes over in efi_sync_low_kernel_mappings(). This patch doesn't go the full mile, we still want to share some PGD entries with 'swapper_pg_dir'. Having completely separate page tables brings its own issues such as synchronising new mappings after memory hotplug and module loading. Sharing also keeps memory usage down. Signed-off-by: Matt Fleming Reviewed-by: Borislav Petkov Acked-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Jones Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1448658575-17029-6-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 1 + arch/x86/platform/efi/efi.c | 39 ++++++----------- arch/x86/platform/efi/efi_32.c | 5 +++ arch/x86/platform/efi/efi_64.c | 97 +++++++++++++++++++++++++++++++++++------- 4 files changed, 102 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 347eeac..8fd9e63 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -136,6 +136,7 @@ extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); +extern int __init efi_alloc_page_tables(void); extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init old_map_region(efi_memory_desc_t *md); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index ad28540..3c1f3cd 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -869,7 +869,7 @@ static void __init kexec_enter_virtual_mode(void) * This function will switch the EFI runtime services to virtual mode. * Essentially, we look through the EFI memmap and map every region that * has the runtime attribute bit set in its memory descriptor into the - * ->trampoline_pgd page table using a top-down VA allocation scheme. + * efi_pgd page table. * * The old method which used to update that memory descriptor with the * virtual address obtained from ioremap() is still supported when the @@ -879,8 +879,8 @@ static void __init kexec_enter_virtual_mode(void) * * The new method does a pagetable switch in a preemption-safe manner * so that we're in a different address space when calling a runtime - * function. For function arguments passing we do copy the PGDs of the - * kernel page table into ->trampoline_pgd prior to each call. + * function. For function arguments passing we do copy the PUDs of the + * kernel page table into efi_pgd prior to each call. * * Specially for kexec boot, efi runtime maps in previous kernel should * be passed in via setup_data. In that case runtime ranges will be mapped @@ -895,6 +895,12 @@ static void __init __efi_enter_virtual_mode(void) efi.systab = NULL; + if (efi_alloc_page_tables()) { + pr_err("Failed to allocate EFI page tables\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + efi_merge_regions(); new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { @@ -954,28 +960,11 @@ static void __init __efi_enter_virtual_mode(void) efi_runtime_mkexec(); /* - * We mapped the descriptor array into the EFI pagetable above but we're - * not unmapping it here. Here's why: - * - * We're copying select PGDs from the kernel page table to the EFI page - * table and when we do so and make changes to those PGDs like unmapping - * stuff from them, those changes appear in the kernel page table and we - * go boom. - * - * From setup_real_mode(): - * - * ... - * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; - * - * In this particular case, our allocation is in PGD 0 of the EFI page - * table but we've copied that PGD from PGD[272] of the EFI page table: - * - * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272 - * - * where the direct memory mapping in kernel space is. - * - * new_memmap's VA comes from that direct mapping and thus clearing it, - * it would get cleared in the kernel page table too. + * We mapped the descriptor array into the EFI pagetable above + * but we're not unmapping it here because if we're running in + * EFI mixed mode we need all of memory to be accessible when + * we pass parameters to the EFI runtime services in the + * thunking code. * * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); */ diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index ed5b673..58d669b 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -38,6 +38,11 @@ * say 0 - 3G. */ +int __init efi_alloc_page_tables(void) +{ + return 0; +} + void efi_sync_low_kernel_mappings(void) {} void __init efi_dump_pagetable(void) {} int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index b19cdac..4897f51 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -40,6 +40,7 @@ #include #include #include +#include /* * We allocate runtime services regions bottom-up, starting from -4G, i.e. @@ -121,22 +122,92 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) early_code_mapping_set_exec(0); } +static pgd_t *efi_pgd; + +/* + * We need our own copy of the higher levels of the page tables + * because we want to avoid inserting EFI region mappings (EFI_VA_END + * to EFI_VA_START) into the standard kernel page tables. Everything + * else can be shared, see efi_sync_low_kernel_mappings(). + */ +int __init efi_alloc_page_tables(void) +{ + pgd_t *pgd; + pud_t *pud; + gfp_t gfp_mask; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return 0; + + gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO; + efi_pgd = (pgd_t *)__get_free_page(gfp_mask); + if (!efi_pgd) + return -ENOMEM; + + pgd = efi_pgd + pgd_index(EFI_VA_END); + + pud = pud_alloc_one(NULL, 0); + if (!pud) { + free_page((unsigned long)efi_pgd); + return -ENOMEM; + } + + pgd_populate(NULL, pgd, pud); + + return 0; +} + /* * Add low kernel mappings for passing arguments to EFI functions. */ void efi_sync_low_kernel_mappings(void) { - unsigned num_pgds; - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + unsigned num_entries; + pgd_t *pgd_k, *pgd_efi; + pud_t *pud_k, *pud_efi; if (efi_enabled(EFI_OLD_MEMMAP)) return; - num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); + /* + * We can share all PGD entries apart from the one entry that + * covers the EFI runtime mapping space. + * + * Make sure the EFI runtime region mappings are guaranteed to + * only span a single PGD entry and that the entry also maps + * other important kernel regions. + */ + BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); + BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != + (EFI_VA_END & PGDIR_MASK)); + + pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); + pgd_k = pgd_offset_k(PAGE_OFFSET); + + num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); + memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); - memcpy(pgd + pgd_index(PAGE_OFFSET), - init_mm.pgd + pgd_index(PAGE_OFFSET), - sizeof(pgd_t) * num_pgds); + /* + * We share all the PUD entries apart from those that map the + * EFI regions. Copy around them. + */ + BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0); + BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0); + + pgd_efi = efi_pgd + pgd_index(EFI_VA_END); + pud_efi = pud_offset(pgd_efi, 0); + + pgd_k = pgd_offset_k(EFI_VA_END); + pud_k = pud_offset(pgd_k, 0); + + num_entries = pud_index(EFI_VA_END); + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + + pud_efi = pud_offset(pgd_efi, EFI_VA_START); + pud_k = pud_offset(pgd_k, EFI_VA_START); + + num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START); + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); } int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) @@ -150,8 +221,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) if (efi_enabled(EFI_OLD_MEMMAP)) return 0; - efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; - pgd = __va(efi_scratch.efi_pgt); + efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd); + pgd = efi_pgd; /* * It can happen that the physical address of new_memmap lands in memory @@ -216,16 +287,14 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); - - kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages); + kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages); } static void __init __map_region(efi_memory_desc_t *md, u64 va) { - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); unsigned long flags = 0; unsigned long pfn; + pgd_t *pgd = efi_pgd; if (!(md->attribute & EFI_MEMORY_WB)) flags |= _PAGE_PCD; @@ -334,9 +403,7 @@ void __init efi_runtime_mkexec(void) void __init efi_dump_pagetable(void) { #ifdef CONFIG_EFI_PGT_DUMP - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); - - ptdump_walk_pgd_level(NULL, pgd); + ptdump_walk_pgd_level(NULL, efi_pgd); #endif } -- cgit v1.1 From 26d7f65fbd22168c33d2350f3e7e3021f5761256 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 25 Oct 2015 10:26:35 +0000 Subject: x86/efi: Preface all print statements with efi* tag The pr_*() calls in the x86 EFI code may or may not include a subsystem tag, which makes it difficult to grep the kernel log for all relevant EFI messages and leads users to miss important information. Recently, a bug reporter provided all the EFI print messages from the kernel log when trying to diagnose an issue but missed the following statement because it wasn't prefixed with anything indicating it was related to EFI, pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); Cc: Borislav Petkov Reviewed-by: Josh Triplett Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi-bgrt.c | 3 +++ arch/x86/platform/efi/efi_64.c | 2 ++ arch/x86/platform/efi/quirks.c | 4 +++- 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index ea48449..9a52b5c 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -10,6 +10,9 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index a0ac0f9..d347e85 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -15,6 +15,8 @@ * */ +#define pr_fmt(fmt) "efi: " fmt + #include #include #include diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 1c7380d..6452070 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) "efi: " fmt + #include #include #include @@ -256,7 +258,7 @@ void __init efi_apply_memmap_quirks(void) * services. */ if (!efi_runtime_supported()) { - pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); + pr_info("Setup done, disabling due to 32/64-bit mismatch\n"); efi_unmap_memmap(); } -- cgit v1.1 From 50a0cb565246f20d59cdb161778531e4b19d35ac Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Wed, 9 Dec 2015 15:41:08 -0800 Subject: x86/efi-bgrt: Fix kernel panic when mapping BGRT data Starting with this commit 35eb8b81edd4 ("x86/efi: Build our own page table structures") efi regions have a separate page directory called "efi_pgd". In order to access any efi region we have to first shift %cr3 to this page table. In the bgrt code we are trying to copy bgrt_header and image, but these regions fall under "EFI_BOOT_SERVICES_DATA" and to access these regions we have to shift %cr3 to efi_pgd and not doing so will cause page fault as shown below. [ 0.251599] Last level dTLB entries: 4KB 64, 2MB 0, 4MB 0, 1GB 4 [ 0.259126] Freeing SMP alternatives memory: 32K (ffffffff8230e000 - ffffffff82316000) [ 0.271803] BUG: unable to handle kernel paging request at fffffffefce35002 [ 0.279740] IP: [] efi_bgrt_init+0x144/0x1fd [ 0.286383] PGD 300f067 PUD 0 [ 0.289879] Oops: 0000 [#1] SMP [ 0.293566] Modules linked in: [ 0.297039] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.4.0-rc1-eywa-eywa-built-in-47041+ #2 [ 0.306619] Hardware name: Intel Corporation Skylake Client platform/Skylake Y LPDDR3 RVP3, BIOS SKLSE2R1.R00.B104.B01.1511110114 11/11/2015 [ 0.320925] task: ffffffff820134c0 ti: ffffffff82000000 task.ti: ffffffff82000000 [ 0.329420] RIP: 0010:[] [] efi_bgrt_init+0x144/0x1fd [ 0.338821] RSP: 0000:ffffffff82003f18 EFLAGS: 00010246 [ 0.344852] RAX: fffffffefce35000 RBX: fffffffefce35000 RCX: fffffffefce2b000 [ 0.352952] RDX: 000000008a82b000 RSI: ffffffff8235bb80 RDI: 000000008a835000 [ 0.361050] RBP: ffffffff82003f30 R08: 000000008a865000 R09: ffffffffff202850 [ 0.369149] R10: ffffffff811ad62f R11: 0000000000000000 R12: 0000000000000000 [ 0.377248] R13: ffff88016dbaea40 R14: ffffffff822622c0 R15: ffffffff82003fb0 [ 0.385348] FS: 0000000000000000(0000) GS:ffff88016d800000(0000) knlGS:0000000000000000 [ 0.394533] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 0.401054] CR2: fffffffefce35002 CR3: 000000000300c000 CR4: 00000000003406f0 [ 0.409153] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 0.417252] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 0.425350] Stack: [ 0.427638] ffffffffffffffff ffffffff82256900 ffff88016dbaea40 ffffffff82003f40 [ 0.436086] ffffffff821bbce0 ffffffff82003f88 ffffffff8219c0c2 0000000000000000 [ 0.444533] ffffffff8219ba4a ffffffff822622c0 0000000000083000 00000000ffffffff [ 0.452978] Call Trace: [ 0.455763] [] efi_late_init+0x9/0xb [ 0.461697] [] start_kernel+0x463/0x47f [ 0.467928] [] ? set_init_arg+0x55/0x55 [ 0.474159] [] ? early_idt_handler_array+0x120/0x120 [ 0.481669] [] x86_64_start_reservations+0x2a/0x2c [ 0.488982] [] x86_64_start_kernel+0x13d/0x14c [ 0.495897] Code: 00 41 b4 01 48 8b 78 28 e8 09 36 01 00 48 85 c0 48 89 c3 75 13 48 c7 c7 f8 ac d3 81 31 c0 e8 d7 3b fb fe e9 b5 00 00 00 45 84 e4 <44> 8b 6b 02 74 0d be 06 00 00 00 48 89 df e8 ae 34 0$ [ 0.518151] RIP [] efi_bgrt_init+0x144/0x1fd [ 0.524888] RSP [ 0.528851] CR2: fffffffefce35002 [ 0.532615] ---[ end trace 7b06521e6ebf2aea ]--- [ 0.537852] Kernel panic - not syncing: Attempted to kill the idle task! As said above one way to fix this bug is to shift %cr3 to efi_pgd but we are not doing that way because it leaks inner details of how we switch to EFI page tables into a new call site and it also adds duplicate code. Instead, we remove the call to efi_lookup_mapped_addr() and always perform early_mem*() instead of early_io*() because we want to remap RAM regions and not I/O regions. We also delete efi_lookup_mapped_addr() because we are no longer using it. Signed-off-by: Sai Praneeth Prakhya Reported-by: Wendy Wang Cc: Borislav Petkov Cc: Josh Triplett Cc: Ricardo Neri Cc: Ravi Shankar Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi-bgrt.c | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index 9a52b5c..bf51f4c 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -31,8 +31,7 @@ struct bmp_header { void __init efi_bgrt_init(void) { acpi_status status; - void __iomem *image; - bool ioremapped = false; + void *image; struct bmp_header bmp_header; if (acpi_disabled) @@ -73,20 +72,14 @@ void __init efi_bgrt_init(void) return; } - image = efi_lookup_mapped_addr(bgrt_tab->image_address); + image = early_memremap(bgrt_tab->image_address, sizeof(bmp_header)); if (!image) { - image = early_ioremap(bgrt_tab->image_address, - sizeof(bmp_header)); - ioremapped = true; - if (!image) { - pr_err("Ignoring BGRT: failed to map image header memory\n"); - return; - } + pr_err("Ignoring BGRT: failed to map image header memory\n"); + return; } - memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); - if (ioremapped) - early_iounmap(image, sizeof(bmp_header)); + memcpy(&bmp_header, image, sizeof(bmp_header)); + early_memunmap(image, sizeof(bmp_header)); bgrt_image_size = bmp_header.size; bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN); @@ -96,18 +89,14 @@ void __init efi_bgrt_init(void) return; } - if (ioremapped) { - image = early_ioremap(bgrt_tab->image_address, - bmp_header.size); - if (!image) { - pr_err("Ignoring BGRT: failed to map image memory\n"); - kfree(bgrt_image); - bgrt_image = NULL; - return; - } + image = early_memremap(bgrt_tab->image_address, bmp_header.size); + if (!image) { + pr_err("Ignoring BGRT: failed to map image memory\n"); + kfree(bgrt_image); + bgrt_image = NULL; + return; } - memcpy_fromio(bgrt_image, image, bgrt_image_size); - if (ioremapped) - early_iounmap(image, bmp_header.size); + memcpy(bgrt_image, image, bgrt_image_size); + early_memunmap(image, bmp_header.size); } -- cgit v1.1 From e2c90dd7e11e3025b46719a79fb4bb1e7a5cef9f Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 21 Dec 2015 14:12:52 +0000 Subject: x86/efi-bgrt: Replace early_memremap() with memremap() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Môshe reported the following warning triggered on his machine since commit 50a0cb565246 ("x86/efi-bgrt: Fix kernel panic when mapping BGRT data"), [ 0.026936] ------------[ cut here ]------------ [ 0.026941] WARNING: CPU: 0 PID: 0 at mm/early_ioremap.c:137 __early_ioremap+0x102/0x1bb() [ 0.026941] Modules linked in: [ 0.026944] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.4.0-rc1 #2 [ 0.026945] Hardware name: Dell Inc. XPS 13 9343/09K8G1, BIOS A05 07/14/2015 [ 0.026946] 0000000000000000 900f03d5a116524d ffffffff81c03e60 ffffffff813a3fff [ 0.026948] 0000000000000000 ffffffff81c03e98 ffffffff810a0852 00000000d7b76000 [ 0.026949] 0000000000000000 0000000000000001 0000000000000001 000000000000017c [ 0.026951] Call Trace: [ 0.026955] [] dump_stack+0x44/0x55 [ 0.026958] [] warn_slowpath_common+0x82/0xc0 [ 0.026959] [] warn_slowpath_null+0x1a/0x20 [ 0.026961] [] __early_ioremap+0x102/0x1bb [ 0.026962] [] early_memremap+0x13/0x15 [ 0.026964] [] efi_bgrt_init+0x162/0x1ad [ 0.026966] [] efi_late_init+0x9/0xb [ 0.026968] [] start_kernel+0x46f/0x49f [ 0.026970] [] ? early_idt_handler_array+0x120/0x120 [ 0.026972] [] x86_64_start_reservations+0x2a/0x2c [ 0.026974] [] x86_64_start_kernel+0x14a/0x16d [ 0.026977] ---[ end trace f9b3812eb8e24c58 ]--- [ 0.026978] efi_bgrt: Ignoring BGRT: failed to map image memory early_memremap() has an upper limit on the size of mapping it can handle which is ~200KB. Clearly the BGRT image on Môshe's machine is much larger than that. There's actually no reason to restrict ourselves to using the early_* version of memremap() - the ACPI BGRT driver is invoked late enough in boot that we can use the standard version, with the benefit that the late version allows mappings of arbitrary size. Reported-by: Môshe van der Sterre Tested-by: Môshe van der Sterre Signed-off-by: Matt Fleming Cc: Josh Triplett Cc: Sai Praneeth Prakhya Cc: Borislav Petkov Link: http://lkml.kernel.org/r/1450707172-12561-1-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Thomas Gleixner --- arch/x86/platform/efi/efi-bgrt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index bf51f4c..b097066 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -72,14 +72,14 @@ void __init efi_bgrt_init(void) return; } - image = early_memremap(bgrt_tab->image_address, sizeof(bmp_header)); + image = memremap(bgrt_tab->image_address, sizeof(bmp_header), MEMREMAP_WB); if (!image) { pr_err("Ignoring BGRT: failed to map image header memory\n"); return; } memcpy(&bmp_header, image, sizeof(bmp_header)); - early_memunmap(image, sizeof(bmp_header)); + memunmap(image); bgrt_image_size = bmp_header.size; bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN); @@ -89,7 +89,7 @@ void __init efi_bgrt_init(void) return; } - image = early_memremap(bgrt_tab->image_address, bmp_header.size); + image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB); if (!image) { pr_err("Ignoring BGRT: failed to map image memory\n"); kfree(bgrt_image); @@ -98,5 +98,5 @@ void __init efi_bgrt_init(void) } memcpy(bgrt_image, image, bgrt_image_size); - early_memunmap(image, bmp_header.size); + memunmap(image); } -- cgit v1.1 From 753b11ef8e92a1c1bbe97f2a5ec14bdd1ef2e6fe Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Thu, 21 Jan 2016 14:11:59 +0000 Subject: x86/efi: Setup separate EFI page tables in kexec paths The switch to using a new dedicated page table for EFI runtime calls in commit commit 67a9108ed431 ("x86/efi: Build our own page table structures") failed to take into account changes required for the kexec code paths, which are unfortunately duplicated in the EFI code. Call the allocation and setup functions in kexec_enter_virtual_mode() just like we do for __efi_enter_virtual_mode() to avoid hitting NULL-pointer dereferences when making EFI runtime calls. At the very least, the call to efi_setup_page_tables() should have existed for kexec before the following commit: 67a9108ed431 ("x86/efi: Build our own page table structures") Things just magically worked because we were actually using the kernel's page tables that contained the required mappings. Reported-by: Srikar Dronamraju Tested-by: Srikar Dronamraju Signed-off-by: Matt Fleming Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Raghavendra K T Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1453385519-11477-1-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 3c1f3cd..bdd9477 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -815,6 +815,7 @@ static void __init kexec_enter_virtual_mode(void) { #ifdef CONFIG_KEXEC_CORE efi_memory_desc_t *md; + unsigned int num_pages; void *p; efi.systab = NULL; @@ -829,6 +830,12 @@ static void __init kexec_enter_virtual_mode(void) return; } + if (efi_alloc_page_tables()) { + pr_err("Failed to allocate EFI page tables\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + /* * Map efi regions which were passed via setup_data. The virt_addr is a * fixed addr which was used in first kernel of a kexec boot. @@ -843,6 +850,14 @@ static void __init kexec_enter_virtual_mode(void) BUG_ON(!efi.systab); + num_pages = ALIGN(memmap.nr_map * memmap.desc_size, PAGE_SIZE); + num_pages >>= PAGE_SHIFT; + + if (efi_setup_page_tables(memmap.phys_map, num_pages)) { + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + efi_sync_low_kernel_mappings(); /* -- cgit v1.1 From ca0e30dcaa53a3fcb2dfdf74252d30bc40603eea Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2016 22:06:58 +0000 Subject: efi: Add nonblocking option to efi_query_variable_store() The function efi_query_variable_store() may be invoked by efivar_entry_set_nonblocking(), which itself takes care to only call a non-blocking version of the SetVariable() runtime wrapper. However, efi_query_variable_store() may call the SetVariable() wrapper directly, as well as the wrapper for QueryVariableInfo(), both of which could deadlock in the same way we are trying to prevent by calling efivar_entry_set_nonblocking() in the first place. So instead, modify efi_query_variable_store() to use the non-blocking variants of QueryVariableInfo() (and give up rather than free up space if the available space is below EFI_MIN_RESERVE) if invoked with the 'nonblocking' argument set to true. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1454364428-494-5-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/quirks.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 4535046..2326bf5 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -57,13 +57,41 @@ void efi_delete_dummy_variable(void) } /* + * In the nonblocking case we do not attempt to perform garbage + * collection if we do not have enough free space. Rather, we do the + * bare minimum check and give up immediately if the available space + * is below EFI_MIN_RESERVE. + * + * This function is intended to be small and simple because it is + * invoked from crash handler paths. + */ +static efi_status_t +query_variable_store_nonblocking(u32 attributes, unsigned long size) +{ + efi_status_t status; + u64 storage_size, remaining_size, max_size; + + status = efi.query_variable_info_nonblocking(attributes, &storage_size, + &remaining_size, + &max_size); + if (status != EFI_SUCCESS) + return status; + + if (remaining_size - size < EFI_MIN_RESERVE) + return EFI_OUT_OF_RESOURCES; + + return EFI_SUCCESS; +} + +/* * Some firmware implementations refuse to boot if there's insufficient space * in the variable store. Ensure that we never use more than a safe limit. * * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable * store. */ -efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) +efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, + bool nonblocking) { efi_status_t status; u64 storage_size, remaining_size, max_size; @@ -71,6 +99,9 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) if (!(attributes & EFI_VARIABLE_NON_VOLATILE)) return 0; + if (nonblocking) + return query_variable_store_nonblocking(attributes, size); + status = efi.query_variable_info(attributes, &storage_size, &remaining_size, &max_size); if (status != EFI_SUCCESS) -- cgit v1.1 From 66dbe99cfe30e113d2e571e68b9b6a1a8985a157 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=B4she=20van=20der=20Sterre?= Date: Mon, 1 Feb 2016 22:07:03 +0000 Subject: x86/efi/bgrt: Don't ignore the BGRT if the 'valid' bit is 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unintuitively, the BGRT graphic is apparently meant to be usable if the valid bit in not set. The valid bit only conveys uncertainty about the validity in relation to the screen state. Windows 10 actually uses the BGRT image for its boot screen even if not 'valid', for example when the user triggered the boot menu. Because it is unclear if all firmwares will provide a usable graphic in this case, we now look at the BMP magic number as an additional check. Reviewed-by: Josh Triplett Signed-off-by: Môshe van der Sterre Signed-off-by: Matt Fleming Cc: =?UTF-8?q?M=C3=B4she=20van=20der=20Sterre?= Link: http://lkml.kernel.org/r/1454364428-494-10-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi-bgrt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index b097066..a243381 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -57,11 +57,6 @@ void __init efi_bgrt_init(void) bgrt_tab->status); return; } - if (bgrt_tab->status != 1) { - pr_debug("Ignoring BGRT: invalid status %u (expected 1)\n", - bgrt_tab->status); - return; - } if (bgrt_tab->image_type != 0) { pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n", bgrt_tab->image_type); @@ -80,6 +75,11 @@ void __init efi_bgrt_init(void) memcpy(&bmp_header, image, sizeof(bmp_header)); memunmap(image); + if (bmp_header.id != 0x4d42) { + pr_err("Ignoring BGRT: Incorrect BMP magic number 0x%x (expected 0x4d42)\n", + bmp_header.id); + return; + } bgrt_image_size = bmp_header.size; bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN); -- cgit v1.1 From 1e82b94790709fb2a22d16d53bb04d751fb3878d Mon Sep 17 00:00:00 2001 From: Robert Elliott Date: Mon, 1 Feb 2016 22:07:05 +0000 Subject: x86/efi: Show actual ending addresses in efi_print_memmap Adjust efi_print_memmap to print the real end address of each range, not 1 byte beyond. This matches other prints like those for SRAT and nosave memory. While investigating grub persistent memory corruption issues, it was helpful to make this table match the ending address convention used by: * the kernel's e820 table prints BIOS-e820: [mem 0x0000001680000000-0x0000001c7fffffff] reserved * the kernel's nosave memory prints PM: Registered nosave memory: [mem 0x880000000-0xc7fffffff] * the kernel's ACPI System Resource Affinity Table prints SRAT: Node 1 PXM 1 [mem 0x480000000-0x87fffffff] * grub's lsmmap and lsefimmap commands reserved 0000001680000000-0000001c7fffffff 00600000 24GiB UC WC WT WB NV * the UEFI shell's memmap command Reserved 000000007FC00000-000000007FFFFFFF 0000000000000400 0000000000000001 For example, if you grep all the various logs for c7fffffff, you won't find the kernel's line if it uses c80000000. Also, change the closing ) to ] to match the opening [. old: efi: mem61: [Persistent Memory | | | | | | | |WB|WT|WC|UC] range=[0x0000000880000000-0x0000000c80000000) (16384MB) new: efi: mem61: [Persistent Memory | | | | | | | |WB|WT|WC|UC] range=[0x0000000880000000-0x0000000c7fffffff] (16384MB) Signed-off-by: Robert Elliott Signed-off-by: Matt Fleming Reviewed-by: Laszlo Ersek Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Leif Lindholm Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1454364428-494-12-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index bdd9477..e80826e 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -235,10 +235,10 @@ void __init efi_print_memmap(void) char buf[64]; md = p; - pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n", + pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n", i, efi_md_typeattr_format(buf, sizeof(buf), md), md->phys_addr, - md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1, (md->num_pages >> (20 - EFI_PAGE_SHIFT))); } #endif /* EFI_DEBUG */ -- cgit v1.1 From 397630150632639b3ca5b4414accd5011c45e276 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Wed, 17 Feb 2016 12:35:56 +0000 Subject: x86/mm/pat: Use _PAGE_GLOBAL bit for EFI page table mappings Since EFI page tables can be treated as kernel page tables they should be global. All the other page mapping functions in pageattr.c set the _PAGE_GLOBAL bit and we want to avoid inconsistencies when we map a page in the EFI code paths, for example when that page is split in __split_large_page(), etc. It also makes it easier to validate that the EFI region mappings have the correct attributes because there are fewer differences compared with regular kernel mappings. Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Matt Fleming Cc: Andrew Morton Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Ricardo Neri Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-4-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 632d34d..bf312da 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -909,6 +909,20 @@ static void populate_pte(struct cpa_data *cpa, pte = pte_offset_kernel(pmd, start); + /* + * Set the GLOBAL flags only if the PRESENT flag is + * set otherwise pte_present will return true even on + * a non present pte. The canon_pgprot will clear + * _PAGE_GLOBAL for the ancient hardware that doesn't + * support it. + */ + if (pgprot_val(pgprot) & _PAGE_PRESENT) + pgprot_val(pgprot) |= _PAGE_GLOBAL; + else + pgprot_val(pgprot) &= ~_PAGE_GLOBAL; + + pgprot = canon_pgprot(pgprot); + while (num_pages-- && start < end) { set_pte(pte, pfn_pte(cpa->pfn, pgprot)); -- cgit v1.1 From 1ce99bf45306ba889faadced6baabebf7770c546 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 17 Feb 2016 12:35:58 +0000 Subject: arm64/vmlinux.lds.S: Handle .init.rodata.xxx and .init.bss sections The EFI stub is typically built into the decompressor (x86, ARM) so none of its symbols are annotated as __init. However, on arm64, the stub is linked into the kernel proper, and the code is __init annotated at the section level by prepending all names of SHF_ALLOC sections with '.init'. This results in section names like .init.rodata.str1.8 (for string literals) and .init.bss (which is tiny), both of which can be moved into the .init.data output section. Tested-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Acked-by: Will Deacon Acked-by: Mark Rutland Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-6-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/arm64/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index e3928f5..cbf4db4 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -134,6 +134,7 @@ SECTIONS CON_INITCALL SECURITY_INITCALL INIT_RAM_FS + *(.init.rodata.* .init.bss) /* from the EFI stub */ } .exit.data : { ARM_EXIT_KEEP(EXIT_DATA) -- cgit v1.1 From 15f003d20782a4079e078d16df57081ebd1fc150 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Wed, 17 Feb 2016 12:36:04 +0000 Subject: x86/mm/pat: Don't implicitly allow _PAGE_RW in kernel_map_pages_in_pgd() As part of the preparation for the EFI_MEMORY_RO flag added in the UEFI 2.5 specification, we need the ability to map pages in kernel page tables without _PAGE_RW being set. Modify kernel_map_pages_in_pgd() to require its callers to pass _PAGE_RW if the pages need to be mapped read/write. Otherwise, we'll map the pages as read-only. Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Matt Fleming Cc: Andrew Morton Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Lee, Chun-Yi Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Ricardo Neri Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-12-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 3 +++ arch/x86/platform/efi/efi_64.c | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index bf312da..14c38ae 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1971,6 +1971,9 @@ int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, if (!(page_flags & _PAGE_NX)) cpa.mask_clr = __pgprot(_PAGE_NX); + if (!(page_flags & _PAGE_RW)) + cpa.mask_clr = __pgprot(_PAGE_RW); + cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); retval = __change_page_attr_set_clr(&cpa, 0); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index b492521..b0965b2 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -233,7 +233,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * phys_efi_set_virtual_address_map(). */ pfn = pa_memmap >> PAGE_SHIFT; - if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX)) { + if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW)) { pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); return 1; } @@ -262,7 +262,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) pfn = md->phys_addr >> PAGE_SHIFT; npages = md->num_pages; - if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, npages, 0)) { + if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, npages, _PAGE_RW)) { pr_err("Failed to map 1:1 memory\n"); return 1; } @@ -279,7 +279,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) text = __pa(_text); pfn = text >> PAGE_SHIFT; - if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, 0)) { + if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) { pr_err("Failed to map kernel text 1:1\n"); return 1; } @@ -294,7 +294,7 @@ void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) static void __init __map_region(efi_memory_desc_t *md, u64 va) { - unsigned long flags = 0; + unsigned long flags = _PAGE_RW; unsigned long pfn; pgd_t *pgd = efi_pgd; -- cgit v1.1 From 6d0cc887d571e96f928be83f094322451fd4bf6f Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Wed, 17 Feb 2016 12:36:05 +0000 Subject: x86/efi: Map EFI_MEMORY_{XP,RO} memory region bits to EFI page tables Now that we have EFI memory region bits that indicate which regions do not need execute permission or read/write permission in the page tables, let's use them. We also check for EFI_NX_PE_DATA and only enforce the restrictive mappings if it's present (to allow us to ignore buggy firmware that sets bits it didn't mean to and to preserve backwards compatibility). Instead of assuming that firmware would set appropriate attributes in memory descriptor like EFI_MEMORY_RO for code and EFI_MEMORY_XP for data, we can expect some firmware out there which might only set *type* in memory descriptor to be EFI_RUNTIME_SERVICES_CODE or EFI_RUNTIME_SERVICES_DATA leaving away attribute. This will lead to improper mappings of EFI runtime regions. In order to avoid it, we check attribute and type of memory descriptor to update mappings and moreover Windows works this way. Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Matt Fleming Cc: Andrew Morton Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Kees Cook Cc: Lee, Chun-Yi Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Ricardo Neri Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-13-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 2 +- arch/x86/platform/efi/efi.c | 9 +++++++-- arch/x86/platform/efi/efi_32.c | 2 +- arch/x86/platform/efi/efi_64.c | 45 ++++++++++++++++++++++++++++++++++++++---- 4 files changed, 50 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 8fd9e63..7bb206f 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -141,7 +141,7 @@ extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pa extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init old_map_region(efi_memory_desc_t *md); extern void __init runtime_code_page_mkexec(void); -extern void __init efi_runtime_mkexec(void); +extern void __init efi_runtime_update_mappings(void); extern void __init efi_dump_pagetable(void); extern void __init efi_apply_memmap_quirks(void); extern int __init efi_reuse_config(u64 tables, int nr_tables); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index e80826e..994a7df8 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -934,7 +934,6 @@ static void __init __efi_enter_virtual_mode(void) } efi_sync_low_kernel_mappings(); - efi_dump_pagetable(); if (efi_is_native()) { status = phys_efi_set_virtual_address_map( @@ -972,7 +971,13 @@ static void __init __efi_enter_virtual_mode(void) efi.set_virtual_address_map = NULL; - efi_runtime_mkexec(); + /* + * Apply more restrictive page table mapping attributes now that + * SVAM() has been called and the firmware has performed all + * necessary relocation fixups for the new virtual addresses. + */ + efi_runtime_update_mappings(); + efi_dump_pagetable(); /* * We mapped the descriptor array into the EFI pagetable above diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 58d669b..338402b 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -90,7 +90,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) __flush_tlb_all(); } -void __init efi_runtime_mkexec(void) +void __init efi_runtime_update_mappings(void) { if (__supported_pte_mask & _PAGE_NX) runtime_code_page_mkexec(); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index b0965b2..40d2f44 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -393,13 +393,50 @@ void __init parse_efi_setup(u64 phys_addr, u32 data_len) efi_setup = phys_addr + sizeof(struct setup_data); } -void __init efi_runtime_mkexec(void) +void __init efi_runtime_update_mappings(void) { - if (!efi_enabled(EFI_OLD_MEMMAP)) + unsigned long pfn; + pgd_t *pgd = efi_pgd; + efi_memory_desc_t *md; + void *p; + + if (efi_enabled(EFI_OLD_MEMMAP)) { + if (__supported_pte_mask & _PAGE_NX) + runtime_code_page_mkexec(); + return; + } + + if (!efi_enabled(EFI_NX_PE_DATA)) return; - if (__supported_pte_mask & _PAGE_NX) - runtime_code_page_mkexec(); + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + unsigned long pf = 0; + md = p; + + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + + if (!(md->attribute & EFI_MEMORY_WB)) + pf |= _PAGE_PCD; + + if ((md->attribute & EFI_MEMORY_XP) || + (md->type == EFI_RUNTIME_SERVICES_DATA)) + pf |= _PAGE_NX; + + if (!(md->attribute & EFI_MEMORY_RO) && + (md->type != EFI_RUNTIME_SERVICES_CODE)) + pf |= _PAGE_RW; + + /* Update the 1:1 mapping */ + pfn = md->phys_addr >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, md->num_pages, pf)) + pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, md->virt_addr); + + if (kernel_map_pages_in_pgd(pgd, pfn, md->virt_addr, md->num_pages, pf)) + pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, md->virt_addr); + } } void __init efi_dump_pagetable(void) -- cgit v1.1 From 2ad510dc372c2caac9aada9ff6dd10e787616e1d Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Wed, 17 Feb 2016 12:36:06 +0000 Subject: x86/efi: Only map kernel text for EFI mixed mode The correct symbol to use when figuring out the size of the kernel text is '_etext', not '_end' which is the symbol for the entire kernel image includes data and debug sections. Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Ricardo Neri Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-14-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 40d2f44..49e4dd4 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -275,7 +275,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) efi_scratch.phys_stack = virt_to_phys(page_address(page)); efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ - npages = (_end - _text) >> PAGE_SHIFT; + npages = (_etext - _text) >> PAGE_SHIFT; text = __pa(_text); pfn = text >> PAGE_SHIFT; -- cgit v1.1 From d367cef0a7f0c6ee86e997c0cb455b21b3c6b9ba Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 14 Mar 2016 10:33:01 +0000 Subject: x86/mm/pat: Fix boot crash when 1GB pages are not supported by the CPU Scott reports that with the new separate EFI page tables he's seeing the following error on boot, caused by setting reserved bits in the page table structures (fault code is PF_RSVD | PF_PROT), swapper/0: Corrupted page table at address 17b102020 PGD 17b0e5063 PUD 1400000e3 Bad pagetable: 0009 [#1] SMP On first inspection the PUD is using a 1GB page size (_PAGE_PSE) and looks fine but that's only true if support for 1GB PUD pages ("pdpe1gb") is present in the CPU. Scott's Intel Celeron N2820 does not have that feature and so the _PAGE_PSE bit is reserved. Fix this issue by making the 1GB mapping code in conditional on "cpu_has_gbpages". This issue didn't come up in the past because the required mapping for the faulting address (0x17b102020) will already have been setup by the kernel in early boot before we got to efi_map_regions(), but we no longer use the standard kernel page tables during EFI calls. Reported-by: Scott Ashcroft Tested-by: Scott Ashcroft Signed-off-by: Matt Fleming Acked-by: Borislav Petkov Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Ben Hutchings Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Matthew Garrett Cc: Peter Zijlstra Cc: Raphael Hertzog Cc: Roger Shimizu Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1457951581-27353-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 14c38ae..fcf8e29 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1055,7 +1055,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, /* * Map everything starting from the Gb boundary, possibly with 1G pages */ - while (end - start >= PUD_SIZE) { + while (cpu_has_gbpages && end - start >= PUD_SIZE) { set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | massage_pgprot(pud_pgprot))); -- cgit v1.1