From 7d7b4ae418728916456c6fd03a97ef35345ff30d Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 25 Mar 2016 17:30:07 +0800 Subject: arm64: cpufeature: append additional id_aa64mmfr2 fields to cpufeature There are some new cpu features which can be identified by id_aa64mmfr2, this patch appends all fields of it. Signed-off-by: Kefeng Wang Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 943f514..677f17c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -130,7 +130,11 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { }; static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0), ARM64_FTR_END, }; -- cgit v1.1 From b5fda7ed5c63297d0f43e608b455d82ceabdebf0 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 25 Mar 2016 11:08:55 +0800 Subject: arm64: cpuidle: make arm_cpuidle_suspend() a bit more efficient Currently, we check two pointers: cpu_ops and cpu_suspend on every idle state entry. These pointers check can be avoided: If cpu_ops has not been registered, arm_cpuidle_init() will return -EOPNOTSUPP, so arm_cpuidle_suspend() will never have chance to run. In other word, the cpu_ops check can be avoid. Similarly, the cpu_suspend check could be avoided in this hot path by moving it into arm_cpuidle_init(). I measured the 4096 * time from arm_cpuidle_suspend entry point to the cpu_psci_cpu_suspend entry point. HW platform is Marvell BG4CT STB board. 1. only one shell, no other process, hot-unplug secondary cpus, execute the following cmd while true do sleep 0.2 done before the patch: 1581220ns after the patch: 1579630ns reduced by 0.1% 2. only one shell, no other process, hot-unplug secondary cpus, execute the following cmd while true do md5sum /tmp/testfile sleep 0.2 done NOTE: the testfile size should be larger than L1+L2 cache size before the patch: 1961960ns after the patch: 1912500ns reduced by 2.5% So the more complex the system load, the bigger the improvement. Signed-off-by: Jisheng Zhang Acked-by: Lorenzo Pieralisi Signed-off-by: Will Deacon --- arch/arm64/kernel/cpuidle.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 9047cab6..e11857f 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -19,7 +19,8 @@ int __init arm_cpuidle_init(unsigned int cpu) { int ret = -EOPNOTSUPP; - if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_init_idle) + if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_suspend && + cpu_ops[cpu]->cpu_init_idle) ret = cpu_ops[cpu]->cpu_init_idle(cpu); return ret; @@ -36,11 +37,5 @@ int arm_cpuidle_suspend(int index) { int cpu = smp_processor_id(); - /* - * If cpu_ops have not been registered or suspend - * has not been initialized, cpu_suspend call fails early. - */ - if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) - return -EOPNOTSUPP; return cpu_ops[cpu]->cpu_suspend(index); } -- cgit v1.1 From 8923a16686569375bfa25b877769f9a3093e9f22 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 30 Mar 2016 15:18:43 +0200 Subject: arm64: remove the now unneeded relocate_initrd() This removes the relocate_initrd() implementation and invocation, which are no longer needed now that the placement of the initrd is guaranteed to be covered by the linear mapping. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/setup.c | 64 ----------------------------------------------- 1 file changed, 64 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 9dc6776..7b85b1d 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -224,69 +224,6 @@ static void __init request_standard_resources(void) } } -#ifdef CONFIG_BLK_DEV_INITRD -/* - * Relocate initrd if it is not completely within the linear mapping. - * This would be the case if mem= cuts out all or part of it. - */ -static void __init relocate_initrd(void) -{ - phys_addr_t orig_start = __virt_to_phys(initrd_start); - phys_addr_t orig_end = __virt_to_phys(initrd_end); - phys_addr_t ram_end = memblock_end_of_DRAM(); - phys_addr_t new_start; - unsigned long size, to_free = 0; - void *dest; - - if (orig_end <= ram_end) - return; - - /* - * Any of the original initrd which overlaps the linear map should - * be freed after relocating. - */ - if (orig_start < ram_end) - to_free = ram_end - orig_start; - - size = orig_end - orig_start; - if (!size) - return; - - /* initrd needs to be relocated completely inside linear mapping */ - new_start = memblock_find_in_range(0, PFN_PHYS(max_pfn), - size, PAGE_SIZE); - if (!new_start) - panic("Cannot relocate initrd of size %ld\n", size); - memblock_reserve(new_start, size); - - initrd_start = __phys_to_virt(new_start); - initrd_end = initrd_start + size; - - pr_info("Moving initrd from [%llx-%llx] to [%llx-%llx]\n", - orig_start, orig_start + size - 1, - new_start, new_start + size - 1); - - dest = (void *)initrd_start; - - if (to_free) { - memcpy(dest, (void *)__phys_to_virt(orig_start), to_free); - dest += to_free; - } - - copy_from_early_mem(dest, orig_start + to_free, size - to_free); - - if (to_free) { - pr_info("Freeing original RAMDISK from [%llx-%llx]\n", - orig_start, orig_start + to_free - 1); - memblock_free(orig_start, to_free); - } -} -#else -static inline void __init relocate_initrd(void) -{ -} -#endif - u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; void __init setup_arch(char **cmdline_p) @@ -327,7 +264,6 @@ void __init setup_arch(char **cmdline_p) acpi_boot_table_init(); paging_init(); - relocate_initrd(); kasan_init(); -- cgit v1.1 From 97bbb54e4f27f4c63645f603bf767183ee0dd549 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 30 Mar 2016 16:45:56 +0200 Subject: arm64: vdso: avoid virt_to_page() translations on kernel symbols The translation performed by virt_to_page() is only valid for linear addresses, and kernel symbols are no longer in the linear mapping. So perform the __pa() translation explicitly, which does the right thing in either case, and only then translate to a struct page offset. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 97bc68f..64fc030 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -131,11 +131,11 @@ static int __init vdso_init(void) return -ENOMEM; /* Grab the vDSO data page. */ - vdso_pagelist[0] = virt_to_page(vdso_data); + vdso_pagelist[0] = pfn_to_page(PHYS_PFN(__pa(vdso_data))); /* Grab the vDSO code pages. */ for (i = 0; i < vdso_pages; i++) - vdso_pagelist[i + 1] = virt_to_page(&vdso_start + i * PAGE_SIZE); + vdso_pagelist[i + 1] = pfn_to_page(PHYS_PFN(__pa(&vdso_start)) + i); /* Populate the special mapping structures */ vdso_spec[0] = (struct vm_special_mapping) { -- cgit v1.1 From e44308e62e19b42810207780a2a32148af0cb5d9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 30 Mar 2016 16:45:59 +0200 Subject: arm64: insn: avoid virt_to_page() translations on core kernel symbols Before restricting virt_to_page() to the linear mapping, ensure that the text patching code does not use it to resolve references into the core kernel text, which is mapped in the vmalloc area. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/insn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 7371455..368c082 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -96,7 +96,7 @@ static void __kprobes *patch_map(void *addr, int fixmap) if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX)) page = vmalloc_to_page(addr); else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA)) - page = virt_to_page(addr); + page = pfn_to_page(PHYS_PFN(__pa(addr))); else return addr; -- cgit v1.1 From 546c8c44f092b2f23291fe499c221efc8cabbb67 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 30 Mar 2016 17:43:07 +0200 Subject: arm64: move early boot code to the .init segment Apart from the arm64/linux and EFI header data structures, there is nothing in the .head.text section that must reside at the beginning of the Image. So let's move it to the .init section where it belongs. Note that this involves some minor tweaking of the EFI header, primarily because the address of 'stext' no longer coincides with the start of the .text section. It also requires a couple of relocated symbol references to be slightly rewritten or their definition moved to the linker script. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/efi-entry.S | 2 +- arch/arm64/kernel/head.S | 32 +++++++++++++++----------------- arch/arm64/kernel/image.h | 4 ++++ 3 files changed, 20 insertions(+), 18 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index cae3112..e88c064 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -62,7 +62,7 @@ ENTRY(entry) */ mov x20, x0 // DTB address ldr x0, [sp, #16] // relocated _text address - movz x21, #:abs_g0:stext_offset + ldr w21, =stext_offset add x21, x0, x21 /* diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4203d5f..b434176 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -102,8 +102,6 @@ _head: #endif #ifdef CONFIG_EFI - .globl __efistub_stext_offset - .set __efistub_stext_offset, stext - _head .align 3 pe_header: .ascii "PE" @@ -123,11 +121,11 @@ optional_header: .short 0x20b // PE32+ format .byte 0x02 // MajorLinkerVersion .byte 0x14 // MinorLinkerVersion - .long _end - stext // SizeOfCode + .long _end - efi_header_end // SizeOfCode .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData .long __efistub_entry - _head // AddressOfEntryPoint - .long __efistub_stext_offset // BaseOfCode + .long efi_header_end - _head // BaseOfCode extra_header_fields: .quad 0 // ImageBase @@ -144,7 +142,7 @@ extra_header_fields: .long _end - _head // SizeOfImage // Everything before the kernel image is considered part of the header - .long __efistub_stext_offset // SizeOfHeaders + .long efi_header_end - _head // SizeOfHeaders .long 0 // CheckSum .short 0xa // Subsystem (EFI application) .short 0 // DllCharacteristics @@ -188,10 +186,10 @@ section_table: .byte 0 .byte 0 .byte 0 // end of 0 padding of section name - .long _end - stext // VirtualSize - .long __efistub_stext_offset // VirtualAddress - .long _edata - stext // SizeOfRawData - .long __efistub_stext_offset // PointerToRawData + .long _end - efi_header_end // VirtualSize + .long efi_header_end - _head // VirtualAddress + .long _edata - efi_header_end // SizeOfRawData + .long efi_header_end - _head // PointerToRawData .long 0 // PointerToRelocations (0 for executables) .long 0 // PointerToLineNumbers (0 for executables) @@ -200,15 +198,18 @@ section_table: .long 0xe0500020 // Characteristics (section flags) /* - * EFI will load stext onwards at the 4k section alignment + * EFI will load .text onwards at the 4k section alignment * described in the PE/COFF header. To ensure that instruction * sequences using an adrp and a :lo12: immediate will function - * correctly at this alignment, we must ensure that stext is + * correctly at this alignment, we must ensure that .text is * placed at a 4k boundary in the Image to begin with. */ .align 12 +efi_header_end: #endif + __INIT + ENTRY(stext) bl preserve_boot_args bl el2_setup // Drop to EL1, w20=cpu_boot_mode @@ -223,12 +224,12 @@ ENTRY(stext) * the TCR will have been set. */ ldr x27, 0f // address to jump to after - // MMU has been enabled + neg x27, x27 // MMU has been enabled adr_l lr, __enable_mmu // return (PIC) address b __cpu_setup // initialise processor ENDPROC(stext) .align 3 -0: .quad __mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR +0: .quad (_text - TEXT_OFFSET) - __mmap_switched - KIMAGE_VADDR /* * Preserve the arguments passed by the bootloader in x0 .. x3 @@ -397,7 +398,7 @@ __create_page_tables: ldr x5, =KIMAGE_VADDR add x5, x5, x23 // add KASLR displacement create_pgd_entry x0, x5, x3, x6 - ldr w6, kernel_img_size + ldr w6, =kernel_img_size add x6, x6, x5 mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 @@ -414,9 +415,6 @@ __create_page_tables: ret x28 ENDPROC(__create_page_tables) - -kernel_img_size: - .long _end - (_head - TEXT_OFFSET) .ltorg /* diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 5e360ce..4fd72da 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -71,8 +71,12 @@ DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \ DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS); +kernel_img_size = _end - (_text - TEXT_OFFSET); + #ifdef CONFIG_EFI +__efistub_stext_offset = stext - _text; + /* * Prevent the symbol aliases below from being emitted into the kallsyms * table, by forcing them to be absolute symbols (which are conveniently -- cgit v1.1 From 7eb90f2ff7e3ee814ff12f3cd909b965cdd4a869 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 30 Mar 2016 17:43:08 +0200 Subject: arm64: cover the .head.text section in the .text segment mapping Keeping .head.text out of the .text mapping buys us very little: its actual payload is only 4 KB, most of which is padding, but the page alignment may add up to 2 MB (in case of CONFIG_DEBUG_ALIGN_RODATA=y) of additional padding to the uncompressed kernel Image. Also, on 4 KB granule kernels, the 4 KB misalignment of .text forces us to map the adjacent 56 KB of code without the PTE_CONT attribute, and since this region contains things like the vector table and the GIC interrupt handling entry point, this region is likely to benefit from the reduced TLB pressure that results from PTE_CONT mappings. So remove the alignment between the .head.text and .text sections, and use the [_text, _etext) rather than the [_stext, _etext) interval for mapping the .text segment. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/vmlinux.lds.S | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 5a1939a..61a1075 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -96,7 +96,6 @@ SECTIONS _text = .; HEAD_TEXT } - ALIGN_DEBUG_RO_MIN(PAGE_SIZE) .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ __exception_text_start = .; -- cgit v1.1 From 97740051dd31d200a0efaa84544fe5e4713aac40 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 30 Mar 2016 17:43:09 +0200 Subject: arm64: simplify kernel segment mapping granularity The mapping of the kernel consist of four segments, each of which is mapped with different permission attributes and/or lifetimes. To optimize the TLB and translation table footprint, we define various opaque constants in the linker script that resolve to different aligment values depending on the page size and whether CONFIG_DEBUG_ALIGN_RODATA is set. Considering that - a 4 KB granule kernel benefits from a 64 KB segment alignment (due to the fact that it allows the use of the contiguous bit), - the minimum alignment of the .data segment is THREAD_SIZE already, not PAGE_SIZE (i.e., we already have padding between _data and the start of the .data payload in many cases), - 2 MB is a suitable alignment value on all granule sizes, either for mapping directly (level 2 on 4 KB), or via the contiguous bit (level 3 on 16 KB and 64 KB), - anything beyond 2 MB exceeds the minimum alignment mandated by the boot protocol, and can only be mapped efficiently if the physical alignment happens to be the same, we can simplify this by standardizing on 64 KB (or 2 MB) explicitly, i.e., regardless of granule size, all segments are aligned either to 64 KB, or to 2 MB if CONFIG_DEBUG_ALIGN_RODATA=y. This also means we can drop the Kconfig dependency of CONFIG_DEBUG_ALIGN_RODATA on CONFIG_ARM64_4K_PAGES. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/vmlinux.lds.S | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 61a1075..77d86c9 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -63,14 +63,19 @@ PECOFF_FILE_ALIGNMENT = 0x200; #endif #if defined(CONFIG_DEBUG_ALIGN_RODATA) -#define ALIGN_DEBUG_RO . = ALIGN(1< Date: Wed, 6 Apr 2016 10:42:28 +0200 Subject: arm64/debug: Remove superfluous SMP function call Since commit 1cf4f629d9d2 ("cpu/hotplug: Move online calls to hotplugged cpu") it is ensured that callbacks of CPU_ONLINE and CPU_DOWN_PREPARE are processed on the hotplugged CPU. Due to this SMP function calls are no longer required. Replace smp_call_function_single() with a direct call to clear_os_lock(). The function writes the OSLAR register to clear OS locking. This does not require to be called with interrupts disabled, therefore the smp_call_function_single() calling convention is not preserved. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Anna-Maria Gleixner Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index c45f296..4fbf3c5 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -135,9 +135,8 @@ static void clear_os_lock(void *unused) static int os_lock_notify(struct notifier_block *self, unsigned long action, void *data) { - int cpu = (unsigned long)data; if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) - smp_call_function_single(cpu, clear_os_lock, NULL, 1); + clear_os_lock(NULL); return NOTIFY_OK; } -- cgit v1.1 From 4bc49274403395db41dc4ae7d2080346b2319d34 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Wed, 6 Apr 2016 10:42:49 +0200 Subject: arm64: hw-breakpoint: Remove superfluous SMP function call Since commit 1cf4f629d9d2 ("cpu/hotplug: Move online calls to hotplugged cpu") it is ensured that callbacks of CPU_ONLINE and CPU_DOWN_PREPARE are processed on the hotplugged CPU. Due to this SMP function calls are no longer required. Replace smp_call_function_single() with a direct call of hw_breakpoint_reset(). To keep the calling convention, interrupts are explicitly disabled around the call. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Anna-Maria Gleixner Signed-off-by: Will Deacon --- arch/arm64/kernel/hw_breakpoint.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index b45c95d..11dc50a 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -886,9 +886,11 @@ static int hw_breakpoint_reset_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - int cpu = (long)hcpu; - if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) - smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1); + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) { + local_irq_disable(); + hw_breakpoint_reset(NULL); + local_irq_enable(); + } return NOTIFY_OK; } -- cgit v1.1 From ac1ad20f9ed73a22b0a72eb83206302f5fbff55c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 13 Apr 2016 14:41:33 +0100 Subject: arm64: vhe: Verify CPU Exception Levels With a VHE capable CPU, kernel can run at EL2 and is a decided at early boot. If some of the CPUs didn't start it EL2 or doesn't have VHE, we could have CPUs running at different exception levels, all in the same kernel! This patch adds an early check for the secondary CPUs to detect such situations. For each non-boot CPU add a sanity check to make sure we don't have different run levels w.r.t the boot CPU. We save the information on whether the boot CPU is running in hyp mode or not and ensure the remaining CPUs match it. Cc: Marc Zyngier Cc: Will Deacon Cc: Mark Rutland Cc: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Suzuki K Poulose [will: made boot_cpu_hyp_mode static] Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 1 + arch/arm64/kernel/smp.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 677f17c..f586343 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -912,6 +912,7 @@ static u64 __raw_read_system_reg(u32 sys_id) */ static void check_early_cpu_features(void) { + verify_cpu_run_el(); verify_cpu_asid_bits(); } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index b2d5f4e..5fbab1c 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -75,6 +75,43 @@ enum ipi_msg_type { IPI_WAKEUP }; +#ifdef CONFIG_ARM64_VHE + +/* Whether the boot CPU is running in HYP mode or not*/ +static bool boot_cpu_hyp_mode; + +static inline void save_boot_cpu_run_el(void) +{ + boot_cpu_hyp_mode = is_kernel_in_hyp_mode(); +} + +static inline bool is_boot_cpu_in_hyp_mode(void) +{ + return boot_cpu_hyp_mode; +} + +/* + * Verify that a secondary CPU is running the kernel at the same + * EL as that of the boot CPU. + */ +void verify_cpu_run_el(void) +{ + bool in_el2 = is_kernel_in_hyp_mode(); + bool boot_cpu_el2 = is_boot_cpu_in_hyp_mode(); + + if (in_el2 ^ boot_cpu_el2) { + pr_crit("CPU%d: mismatched Exception Level(EL%d) with boot CPU(EL%d)\n", + smp_processor_id(), + in_el2 ? 2 : 1, + boot_cpu_el2 ? 2 : 1); + cpu_panic_kernel(); + } +} + +#else +static inline void save_boot_cpu_run_el(void) {} +#endif + #ifdef CONFIG_HOTPLUG_CPU static int op_cpu_kill(unsigned int cpu); #else @@ -401,6 +438,7 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { cpuinfo_store_boot_cpu(); + save_boot_cpu_run_el(); set_my_cpu_offset(per_cpu_offset(smp_processor_id())); } -- cgit v1.1 From 3194ac6e66cc7a00c1fa9fecf33a7c376b489497 Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 8 Apr 2016 15:50:26 -0700 Subject: arm64: Move unflatten_device_tree() call earlier. In order to extract NUMA information from the device tree, we need to have the tree in its unflattened form. Move the call to bootmem_init() in the tail of paging_init() into setup_arch, and adjust header files so that its declaration is visible. Move the unflatten_device_tree() call between the calls to paging_init() and bootmem_init(). Follow on patches add NUMA handling to bootmem_init(). Signed-off-by: David Daney Signed-off-by: Will Deacon --- arch/arm64/kernel/setup.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 7b85b1d..432bc7f 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -265,18 +265,22 @@ void __init setup_arch(char **cmdline_p) paging_init(); + if (acpi_disabled) + unflatten_device_tree(); + + bootmem_init(); + kasan_init(); request_standard_resources(); early_ioremap_reset(); - if (acpi_disabled) { - unflatten_device_tree(); + if (acpi_disabled) psci_dt_init(); - } else { + else psci_acpi_init(); - } + xen_early_init(); cpu_read_bootcpu_ops(); -- cgit v1.1 From 1a2db300348b799479d2d22b84d51b27ad0458c7 Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Fri, 8 Apr 2016 15:50:27 -0700 Subject: arm64, numa: Add NUMA support for arm64 platforms. Attempt to get the memory and CPU NUMA node via of_numa. If that fails, default the dummy NUMA node and map all memory and CPUs to node 0. Tested-by: Shannon Zhao Reviewed-by: Robert Richter Signed-off-by: Ganapatrao Kulkarni Signed-off-by: David Daney Signed-off-by: Will Deacon --- arch/arm64/kernel/pci.c | 10 ++++++++++ arch/arm64/kernel/setup.c | 4 ++++ arch/arm64/kernel/smp.c | 4 ++++ 3 files changed, 18 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index c72de66..3c4e308 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -74,6 +74,16 @@ int raw_pci_write(unsigned int domain, unsigned int bus, return -ENXIO; } +#ifdef CONFIG_NUMA + +int pcibus_to_node(struct pci_bus *bus) +{ + return dev_to_node(&bus->dev); +} +EXPORT_SYMBOL(pcibus_to_node); + +#endif + #ifdef CONFIG_ACPI /* Root bridge scanning */ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 432bc7f..65f5159 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -319,6 +320,9 @@ static int __init topology_init(void) { int i; + for_each_online_node(i) + register_one_node(i); + for_each_possible_cpu(i) { struct cpu *cpu = &per_cpu(cpu_data.cpu, i); cpu->hotpluggable = 1; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5fbab1c..622bd6c 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -203,6 +204,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) static void smp_store_cpu_info(unsigned int cpuid) { store_cpu_topology(cpuid); + numa_store_cpu_info(cpuid); } /* @@ -633,6 +635,8 @@ static void __init of_parse_and_init_cpus(void) pr_debug("cpu logical map 0x%llx\n", hwid); cpu_logical_map(cpu_count) = hwid; + + early_map_cpu_to_node(cpu_count, of_node_to_nid(dn)); next: cpu_count++; } -- cgit v1.1 From 82611c14c4e479715c071fb0f44ddd72dc632037 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 18 Apr 2016 09:43:33 +0200 Subject: arm64: Reduce verbosity on SMP CPU stop When CPUs are stopped during an abnormal operation like panic for each CPU a line is printed and the stack trace is dumped. This information is only interesting for the aborting CPU and on systems with many CPUs it only makes it harder to debug if after the aborting CPU the log is flooded with data about all other CPUs too. Therefore remove the stack dump and printk of other CPUs and only print a single line that the other CPUs are going to be stopped and, in case any CPUs remain online list them. Signed-off-by: Jan Glauber Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 622bd6c..fef2e73 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -805,21 +805,11 @@ void arch_irq_work_raise(void) } #endif -static DEFINE_RAW_SPINLOCK(stop_lock); - /* * ipi_cpu_stop - handle IPI from smp_send_stop() */ static void ipi_cpu_stop(unsigned int cpu) { - if (system_state == SYSTEM_BOOTING || - system_state == SYSTEM_RUNNING) { - raw_spin_lock(&stop_lock); - pr_crit("CPU%u: stopping\n", cpu); - dump_stack(); - raw_spin_unlock(&stop_lock); - } - set_cpu_online(cpu, false); local_irq_disable(); @@ -914,6 +904,9 @@ void smp_send_stop(void) cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); + if (system_state == SYSTEM_BOOTING || + system_state == SYSTEM_RUNNING) + pr_crit("SMP: stopping secondary CPUs\n"); smp_cross_call(&mask, IPI_CPU_STOP); } @@ -923,7 +916,8 @@ void smp_send_stop(void) udelay(1); if (num_online_cpus() > 1) - pr_warning("SMP: failed to stop secondary CPUs\n"); + pr_warning("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(cpu_online_mask)); } /* -- cgit v1.1 From f3efb67590e8f68918a7a63ab0d2debee04fba9e Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 18 Apr 2016 10:28:32 +0100 Subject: arm64: hwcaps: Cleanup naming We use hwcaps for referring to ELF hwcaps capability information. However this can be confusing with 'cpu_hwcaps' which stands for the CPU capability bit field. This patch cleans up the names to make it a bit more readable. Tested-by: Yury Norov Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f586343..e3576d9 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -744,7 +744,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .hwcap = cap, \ } -static const struct arm64_cpu_capabilities arm64_hwcaps[] = { +static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1), @@ -765,7 +765,7 @@ static const struct arm64_cpu_capabilities arm64_hwcaps[] = { {}, }; -static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap) +static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap) { switch (cap->hwcap_type) { case CAP_HWCAP: @@ -786,7 +786,7 @@ static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap) } /* Check if we have a particular HWCAP enabled */ -static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities *cap) +static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap) { bool rc; @@ -810,14 +810,14 @@ static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities * return rc; } -static void __init setup_cpu_hwcaps(void) +static void __init setup_elf_hwcaps(void) { int i; - const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps; + const struct arm64_cpu_capabilities *hwcaps = arm64_elf_hwcaps; for (i = 0; hwcaps[i].matches; i++) if (hwcaps[i].matches(&hwcaps[i])) - cap_set_hwcap(&hwcaps[i]); + cap_set_elf_hwcap(&hwcaps[i]); } void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, @@ -955,8 +955,8 @@ void verify_local_cpu_capabilities(void) caps[i].enable(NULL); } - for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) { - if (!cpus_have_hwcap(&caps[i])) + for (i = 0, caps = arm64_elf_hwcaps; caps[i].matches; i++) { + if (!cpus_have_elf_hwcap(&caps[i])) continue; if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) { pr_crit("CPU%d: missing HWCAP: %s\n", @@ -979,7 +979,7 @@ void __init setup_cpu_features(void) /* Set the CPU feature capabilies */ setup_feature_capabilities(); - setup_cpu_hwcaps(); + setup_elf_hwcaps(); /* Advertise that we have computed the system capabilities */ set_sys_caps_initialised(); -- cgit v1.1 From 752835019c15f720a51433e6bc1c5c515c01d1a2 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 18 Apr 2016 10:28:33 +0100 Subject: arm64: HWCAP: Split COMPAT HWCAP table entries In order to handle systems which do not support 32bit at EL0, split the COMPAT HWCAP entries into a separate table which can be processed, only if the support is available. Tested-by: Yury Norov Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 104 ++++++++++++++++++++++------------------- 1 file changed, 56 insertions(+), 48 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e3576d9..cac7c41 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -755,6 +755,10 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), + {}, +}; + +static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), @@ -810,28 +814,23 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap) return rc; } -static void __init setup_elf_hwcaps(void) +static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) { - int i; - const struct arm64_cpu_capabilities *hwcaps = arm64_elf_hwcaps; - - for (i = 0; hwcaps[i].matches; i++) - if (hwcaps[i].matches(&hwcaps[i])) - cap_set_elf_hwcap(&hwcaps[i]); + for (; hwcaps->matches; hwcaps++) + if (hwcaps->matches(hwcaps)) + cap_set_elf_hwcap(hwcaps); } void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info) { - int i; - - for (i = 0; caps[i].matches; i++) { - if (!caps[i].matches(&caps[i])) + for (; caps->matches; caps++) { + if (!caps->matches(caps)) continue; - if (!cpus_have_cap(caps[i].capability) && caps[i].desc) - pr_info("%s %s\n", info, caps[i].desc); - cpus_set_cap(caps[i].capability); + if (!cpus_have_cap(caps->capability) && caps->desc) + pr_info("%s %s\n", info, caps->desc); + cpus_set_cap(caps->capability); } } @@ -842,11 +841,9 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, static void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { - int i; - - for (i = 0; caps[i].matches; i++) - if (caps[i].enable && cpus_have_cap(caps[i].capability)) - on_each_cpu(caps[i].enable, NULL, true); + for (; caps->matches; caps++) + if (caps->enable && cpus_have_cap(caps->capability)) + on_each_cpu(caps->enable, NULL, true); } /* @@ -916,6 +913,41 @@ static void check_early_cpu_features(void) verify_cpu_asid_bits(); } +static void +verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) +{ + + for (; caps->matches; caps++) { + if (!cpus_have_elf_hwcap(caps)) + continue; + if (!feature_matches(__raw_read_system_reg(caps->sys_reg), caps)) { + pr_crit("CPU%d: missing HWCAP: %s\n", + smp_processor_id(), caps->desc); + cpu_die_early(); + } + } +} + +static void +verify_local_cpu_features(const struct arm64_cpu_capabilities *caps) +{ + for (; caps->matches; caps++) { + if (!cpus_have_cap(caps->capability) || !caps->sys_reg) + continue; + /* + * If the new CPU misses an advertised feature, we cannot proceed + * further, park the cpu. + */ + if (!feature_matches(__raw_read_system_reg(caps->sys_reg), caps)) { + pr_crit("CPU%d: missing feature: %s\n", + smp_processor_id(), caps->desc); + cpu_die_early(); + } + if (caps->enable) + caps->enable(NULL); + } +} + /* * Run through the enabled system capabilities and enable() it on this CPU. * The capabilities were decided based on the available CPUs at the boot time. @@ -926,8 +958,6 @@ static void check_early_cpu_features(void) */ void verify_local_cpu_capabilities(void) { - int i; - const struct arm64_cpu_capabilities *caps; check_early_cpu_features(); @@ -938,32 +968,9 @@ void verify_local_cpu_capabilities(void) if (!sys_caps_initialised) return; - caps = arm64_features; - for (i = 0; caps[i].matches; i++) { - if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg) - continue; - /* - * If the new CPU misses an advertised feature, we cannot proceed - * further, park the cpu. - */ - if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) { - pr_crit("CPU%d: missing feature: %s\n", - smp_processor_id(), caps[i].desc); - cpu_die_early(); - } - if (caps[i].enable) - caps[i].enable(NULL); - } - - for (i = 0, caps = arm64_elf_hwcaps; caps[i].matches; i++) { - if (!cpus_have_elf_hwcap(&caps[i])) - continue; - if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) { - pr_crit("CPU%d: missing HWCAP: %s\n", - smp_processor_id(), caps[i].desc); - cpu_die_early(); - } - } + verify_local_cpu_features(arm64_features); + verify_local_elf_hwcaps(arm64_elf_hwcaps); + verify_local_elf_hwcaps(compat_elf_hwcaps); } static void __init setup_feature_capabilities(void) @@ -979,7 +986,8 @@ void __init setup_cpu_features(void) /* Set the CPU feature capabilies */ setup_feature_capabilities(); - setup_elf_hwcaps(); + setup_elf_hwcaps(arm64_elf_hwcaps); + setup_elf_hwcaps(compat_elf_hwcaps); /* Advertise that we have computed the system capabilities */ set_sys_caps_initialised(); -- cgit v1.1 From a6dc3cd718ec2a19b8ca812087666899852af78f Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 18 Apr 2016 10:28:35 +0100 Subject: arm64: cpufeature: Check availability of AArch32 On ARMv8 support for AArch32 state is optional. Hence it is not safe to check the AArch32 ID registers for sanity, which could lead to false warnings. This patch makes sure that the AArch32 state is implemented before we keep track of the 32bit ID registers. As per ARM ARM (D.1.21.2 - Support for Exception Levels and Execution States, DDI0487A.h), checking the support for AArch32 at EL0 is good enough to check the support for AArch32 (i.e, AArch32 at EL1 => AArch32 at EL0, but not vice versa). Tested-by: Yury Norov Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 86 +++++++++++++++++++++++------------------- arch/arm64/kernel/cpuinfo.c | 37 +++++++++--------- 2 files changed, 67 insertions(+), 56 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index cac7c41..9097ce9 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -439,22 +439,26 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); - init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); - init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); - init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); - init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); - init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); - init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); - init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); - init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); - init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); - init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); - init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); - init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); - init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); - init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); - init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); - init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); + + if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { + init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); + init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); + init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); + init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); + init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); + init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); + init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); + init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); + init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); + init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); + init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); + init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); + init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); + init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); + init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); + init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); + } + } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) @@ -559,47 +563,51 @@ void update_cpu_features(int cpu, info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1); /* - * If we have AArch32, we care about 32-bit features for compat. These - * registers should be RES0 otherwise. + * If we have AArch32, we care about 32-bit features for compat. + * If the system doesn't support AArch32, don't update them. */ - taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, + if (id_aa64pfr0_32bit_el0(read_system_reg(SYS_ID_AA64PFR0_EL1)) && + id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { + + taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, info->reg_id_dfr0, boot->reg_id_dfr0); - taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, info->reg_id_isar0, boot->reg_id_isar0); - taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu, info->reg_id_isar1, boot->reg_id_isar1); - taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu, info->reg_id_isar2, boot->reg_id_isar2); - taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu, info->reg_id_isar3, boot->reg_id_isar3); - taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu, info->reg_id_isar4, boot->reg_id_isar4); - taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu, info->reg_id_isar5, boot->reg_id_isar5); - /* - * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and - * ACTLR formats could differ across CPUs and therefore would have to - * be trapped for virtualization anyway. - */ - taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu, + /* + * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and + * ACTLR formats could differ across CPUs and therefore would have to + * be trapped for virtualization anyway. + */ + taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu, info->reg_id_mmfr0, boot->reg_id_mmfr0); - taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu, info->reg_id_mmfr1, boot->reg_id_mmfr1); - taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu, info->reg_id_mmfr2, boot->reg_id_mmfr2); - taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu, info->reg_id_mmfr3, boot->reg_id_mmfr3); - taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu, info->reg_id_pfr0, boot->reg_id_pfr0); - taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu, info->reg_id_pfr1, boot->reg_id_pfr1); - taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu, + taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu, info->reg_mvfr0, boot->reg_mvfr0); - taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu, + taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu, info->reg_mvfr1, boot->reg_mvfr1); - taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu, + taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu, info->reg_mvfr2, boot->reg_mvfr2); + } /* * Mismatched CPU features are a recipe for disaster. Don't even diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 84c8684..92189e2 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -216,23 +216,26 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); - info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1); - info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); - info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); - info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); - info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); - info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); - info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); - info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); - info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); - info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); - info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); - info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); - info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); - - info->reg_mvfr0 = read_cpuid(MVFR0_EL1); - info->reg_mvfr1 = read_cpuid(MVFR1_EL1); - info->reg_mvfr2 = read_cpuid(MVFR2_EL1); + /* Update the 32bit ID registers only if AArch32 is implemented */ + if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { + info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1); + info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); + info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); + info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); + info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); + info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); + info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); + info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); + info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); + info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); + info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); + info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); + info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); + + info->reg_mvfr0 = read_cpuid(MVFR0_EL1); + info->reg_mvfr1 = read_cpuid(MVFR1_EL1); + info->reg_mvfr2 = read_cpuid(MVFR2_EL1); + } cpuinfo_detect_icache_policy(info); -- cgit v1.1 From 042446a31e3803d81c7e618dd80928dc3dce70c5 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 18 Apr 2016 10:28:36 +0100 Subject: arm64: cpufeature: Track 32bit EL0 support Add cpu_hwcap bit for keeping track of the support for 32bit EL0. Tested-by: Yury Norov Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9097ce9..40a23f2 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -737,6 +737,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_VIRT_HOST_EXTN, .matches = runs_at_el2, }, + { + .desc = "32-bit EL0 Support", + .capability = ARM64_HAS_32BIT_EL0, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR0_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR0_EL0_SHIFT, + .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT, + }, {}, }; -- cgit v1.1 From 643d703d2d2dbf8e2f16efa0a6a32b1eca101d02 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 18 Apr 2016 10:28:37 +0100 Subject: arm64: compat: Check for AArch32 state Make sure we have AArch32 state available for running COMPAT binaries and also for switching the personality to PER_LINUX32. Signed-off-by: Yury Norov [ Added cap bit, checks for HWCAP, personality ] Signed-off-by: Suzuki K Poulose Tested-by: Yury Norov Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 7 +++++-- arch/arm64/kernel/sys.c | 10 ++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 40a23f2..8e62d61 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -987,7 +987,8 @@ void verify_local_cpu_capabilities(void) verify_local_cpu_features(arm64_features); verify_local_elf_hwcaps(arm64_elf_hwcaps); - verify_local_elf_hwcaps(compat_elf_hwcaps); + if (system_supports_32bit_el0()) + verify_local_elf_hwcaps(compat_elf_hwcaps); } static void __init setup_feature_capabilities(void) @@ -1004,7 +1005,9 @@ void __init setup_cpu_features(void) /* Set the CPU feature capabilies */ setup_feature_capabilities(); setup_elf_hwcaps(arm64_elf_hwcaps); - setup_elf_hwcaps(compat_elf_hwcaps); + + if (system_supports_32bit_el0()) + setup_elf_hwcaps(compat_elf_hwcaps); /* Advertise that we have computed the system capabilities */ set_sys_caps_initialised(); diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index 75151aa..26fe8ea 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -25,6 +25,7 @@ #include #include #include +#include asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -36,11 +37,20 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); } +SYSCALL_DEFINE1(arm64_personality, unsigned int, personality) +{ + if (personality(personality) == PER_LINUX32 && + !system_supports_32bit_el0()) + return -EINVAL; + return sys_personality(personality); +} + /* * Wrappers to pass the pt_regs argument. */ asmlinkage long sys_rt_sigreturn_wrapper(void); #define sys_rt_sigreturn sys_rt_sigreturn_wrapper +#define sys_personality sys_arm64_personality #undef __SYSCALL #define __SYSCALL(nr, sym) [nr] = sym, -- cgit v1.1 From 2366c7fdb59812bbd1382afed69e250582c64187 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 7 Apr 2016 20:03:29 +0800 Subject: ARM64: ACPI: Check if it runs on Xen to enable or disable ACPI When it's a Xen domain0 booting with ACPI, it will supply a /chosen and a /hypervisor node in DT. So check if it needs to enable ACPI. Signed-off-by: Shannon Zhao Reviewed-by: Stefano Stabellini Acked-by: Hanjun Guo Tested-by: Julien Grall Acked-by: Mark Rutland Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/acpi.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index d1ce8e2..57ee317 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -67,10 +67,15 @@ static int __init dt_scan_depth1_nodes(unsigned long node, { /* * Return 1 as soon as we encounter a node at depth 1 that is - * not the /chosen node. + * not the /chosen node, or /hypervisor node with compatible + * string "xen,xen". */ - if (depth == 1 && (strcmp(uname, "chosen") != 0)) - return 1; + if (depth == 1 && (strcmp(uname, "chosen") != 0)) { + if (strcmp(uname, "hypervisor") != 0 || + !of_flat_dt_is_compatible(node, "xen,xen")) + return 1; + } + return 0; } @@ -184,7 +189,8 @@ void __init acpi_boot_table_init(void) /* * Enable ACPI instead of device tree unless * - ACPI has been disabled explicitly (acpi=off), or - * - the device tree is not empty (it has more than just a /chosen node) + * - the device tree is not empty (it has more than just a /chosen node, + * and a /hypervisor node when running on Xen) * and ACPI has not been force enabled (acpi=force) */ if (param_acpi_off || -- cgit v1.1 From 9981293fb0f02e6127d6ab00218bf091f9f6c89b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Apr 2016 11:25:11 +0100 Subject: arm64: make dt_scan_depth1_nodes more readable Improve the readability of dt_scan_depth1_nodes by removing the nested conditionals. Signed-off-by: Mark Rutland Signed-off-by: Stefano Stabellini Signed-off-by: Will Deacon --- arch/arm64/kernel/acpi.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 57ee317..6884c76 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -66,17 +66,24 @@ static int __init dt_scan_depth1_nodes(unsigned long node, void *data) { /* - * Return 1 as soon as we encounter a node at depth 1 that is - * not the /chosen node, or /hypervisor node with compatible - * string "xen,xen". + * Ignore anything not directly under the root node; we'll + * catch its parent instead. */ - if (depth == 1 && (strcmp(uname, "chosen") != 0)) { - if (strcmp(uname, "hypervisor") != 0 || - !of_flat_dt_is_compatible(node, "xen,xen")) - return 1; - } + if (depth != 1) + return 0; - return 0; + if (strcmp(uname, "chosen") == 0) + return 0; + + if (strcmp(uname, "hypervisor") == 0 && + of_flat_dt_is_compatible(node, "xen,xen")) + return 0; + + /* + * This node at depth 1 is neither a chosen node nor a xen node, + * which we do not expect. + */ + return 1; } /* -- cgit v1.1 From 92406f0cc9e3d5cc77bf3de6d68c9c2373dcd701 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 22 Apr 2016 12:25:31 +0100 Subject: arm64: cpufeature: Add scope for capability check Add scope parameter to the arm64_cpu_capabilities::matches(), so that this can be reused for checking the capability on a given CPU vs the system wide. The system uses the default scope associated with the capability for initialising the CPU_HWCAPs and ELF_HWCAPs. Cc: James Morse Cc: Marc Zyngier Cc: Andre Przywara Cc: Will Deacon Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 4 +- arch/arm64/kernel/cpufeature.c | 131 ++++++++++++++++++++++------------------- 2 files changed, 75 insertions(+), 60 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 06afd04..2fdecd7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -22,14 +22,16 @@ #include static bool __maybe_unused -is_affected_midr_range(const struct arm64_cpu_capabilities *entry) +is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) { + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model, entry->midr_range_min, entry->midr_range_max); } #define MIDR_RANGE(model, min, max) \ + .def_scope = SCOPE_LOCAL_CPU, \ .matches = is_affected_midr_range, \ .midr_model = model, \ .midr_range_min = min, \ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 8e62d61..54abd9b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -71,7 +71,8 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); /* meta feature for alternatives */ static bool __maybe_unused -cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry); +cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused); + static struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), @@ -626,6 +627,49 @@ u64 read_system_reg(u32 id) return regp->sys_val; } +/* + * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated. + * Read the system register on the current CPU + */ +static u64 __raw_read_system_reg(u32 sys_id) +{ + switch (sys_id) { + case SYS_ID_PFR0_EL1: return read_cpuid(ID_PFR0_EL1); + case SYS_ID_PFR1_EL1: return read_cpuid(ID_PFR1_EL1); + case SYS_ID_DFR0_EL1: return read_cpuid(ID_DFR0_EL1); + case SYS_ID_MMFR0_EL1: return read_cpuid(ID_MMFR0_EL1); + case SYS_ID_MMFR1_EL1: return read_cpuid(ID_MMFR1_EL1); + case SYS_ID_MMFR2_EL1: return read_cpuid(ID_MMFR2_EL1); + case SYS_ID_MMFR3_EL1: return read_cpuid(ID_MMFR3_EL1); + case SYS_ID_ISAR0_EL1: return read_cpuid(ID_ISAR0_EL1); + case SYS_ID_ISAR1_EL1: return read_cpuid(ID_ISAR1_EL1); + case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1); + case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1); + case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1); + case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR4_EL1); + case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1); + case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1); + case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1); + + case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1); + case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1); + case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1); + case SYS_ID_AA64ISAR0_EL1: return read_cpuid(ID_AA64ISAR0_EL1); + case SYS_ID_AA64ISAR1_EL1: return read_cpuid(ID_AA64ISAR1_EL1); + + case SYS_CNTFRQ_EL0: return read_cpuid(CNTFRQ_EL0); + case SYS_CTR_EL0: return read_cpuid(CTR_EL0); + case SYS_DCZID_EL0: return read_cpuid(DCZID_EL0); + default: + BUG(); + return 0; + } +} + #include static bool @@ -637,19 +681,24 @@ feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) } static bool -has_cpuid_feature(const struct arm64_cpu_capabilities *entry) +has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) { u64 val; - val = read_system_reg(entry->sys_reg); + WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); + if (scope == SCOPE_SYSTEM) + val = read_system_reg(entry->sys_reg); + else + val = __raw_read_system_reg(entry->sys_reg); + return feature_matches(val, entry); } -static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry) +static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry, int scope) { bool has_sre; - if (!has_cpuid_feature(entry)) + if (!has_cpuid_feature(entry, scope)) return false; has_sre = gic_enable_sre(); @@ -660,7 +709,7 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry) return has_sre; } -static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry) +static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int __unused) { u32 midr = read_cpuid_id(); u32 rv_min, rv_max; @@ -672,7 +721,7 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry) return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max); } -static bool runs_at_el2(const struct arm64_cpu_capabilities *entry) +static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) { return is_kernel_in_hyp_mode(); } @@ -681,6 +730,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_SYSREG_GIC_CPUIF, + .def_scope = SCOPE_SYSTEM, .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, @@ -691,6 +741,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Privileged Access Never", .capability = ARM64_HAS_PAN, + .def_scope = SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, @@ -703,6 +754,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, + .def_scope = SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, @@ -713,12 +765,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Software prefetching using PRFM", .capability = ARM64_HAS_NO_HW_PREFETCH, + .def_scope = SCOPE_SYSTEM, .matches = has_no_hw_prefetch, }, #ifdef CONFIG_ARM64_UAO { .desc = "User Access Override", .capability = ARM64_HAS_UAO, + .def_scope = SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR2_EL1, .field_pos = ID_AA64MMFR2_UAO_SHIFT, @@ -729,17 +783,20 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #ifdef CONFIG_ARM64_PAN { .capability = ARM64_ALT_PAN_NOT_UAO, + .def_scope = SCOPE_SYSTEM, .matches = cpufeature_pan_not_uao, }, #endif /* CONFIG_ARM64_PAN */ { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, + .def_scope = SCOPE_SYSTEM, .matches = runs_at_el2, }, { .desc = "32-bit EL0 Support", .capability = ARM64_HAS_32BIT_EL0, + .def_scope = SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, @@ -752,6 +809,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #define HWCAP_CAP(reg, field, s, min_value, type, cap) \ { \ .desc = #cap, \ + .def_scope = SCOPE_SYSTEM, \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ @@ -834,7 +892,7 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap) static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) { for (; hwcaps->matches; hwcaps++) - if (hwcaps->matches(hwcaps)) + if (hwcaps->matches(hwcaps, hwcaps->def_scope)) cap_set_elf_hwcap(hwcaps); } @@ -842,7 +900,7 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info) { for (; caps->matches; caps++) { - if (!caps->matches(caps)) + if (!caps->matches(caps, caps->def_scope)) continue; if (!cpus_have_cap(caps->capability) && caps->desc) @@ -879,48 +937,6 @@ static inline void set_sys_caps_initialised(void) } /* - * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated. - */ -static u64 __raw_read_system_reg(u32 sys_id) -{ - switch (sys_id) { - case SYS_ID_PFR0_EL1: return read_cpuid(ID_PFR0_EL1); - case SYS_ID_PFR1_EL1: return read_cpuid(ID_PFR1_EL1); - case SYS_ID_DFR0_EL1: return read_cpuid(ID_DFR0_EL1); - case SYS_ID_MMFR0_EL1: return read_cpuid(ID_MMFR0_EL1); - case SYS_ID_MMFR1_EL1: return read_cpuid(ID_MMFR1_EL1); - case SYS_ID_MMFR2_EL1: return read_cpuid(ID_MMFR2_EL1); - case SYS_ID_MMFR3_EL1: return read_cpuid(ID_MMFR3_EL1); - case SYS_ID_ISAR0_EL1: return read_cpuid(ID_ISAR0_EL1); - case SYS_ID_ISAR1_EL1: return read_cpuid(ID_ISAR1_EL1); - case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1); - case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1); - case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1); - case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR4_EL1); - case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1); - case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1); - case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1); - - case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1); - case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1); - case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1); - case SYS_ID_AA64ISAR0_EL1: return read_cpuid(ID_AA64ISAR0_EL1); - case SYS_ID_AA64ISAR1_EL1: return read_cpuid(ID_AA64ISAR1_EL1); - - case SYS_CNTFRQ_EL0: return read_cpuid(CNTFRQ_EL0); - case SYS_CTR_EL0: return read_cpuid(CTR_EL0); - case SYS_DCZID_EL0: return read_cpuid(DCZID_EL0); - default: - BUG(); - return 0; - } -} - -/* * Check for CPU features that are used in early boot * based on the Boot CPU value. */ @@ -934,28 +950,25 @@ static void verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) { - for (; caps->matches; caps++) { - if (!cpus_have_elf_hwcap(caps)) - continue; - if (!feature_matches(__raw_read_system_reg(caps->sys_reg), caps)) { + for (; caps->matches; caps++) + if (cpus_have_elf_hwcap(caps) && !caps->matches(caps, SCOPE_LOCAL_CPU)) { pr_crit("CPU%d: missing HWCAP: %s\n", smp_processor_id(), caps->desc); cpu_die_early(); } - } } static void verify_local_cpu_features(const struct arm64_cpu_capabilities *caps) { for (; caps->matches; caps++) { - if (!cpus_have_cap(caps->capability) || !caps->sys_reg) + if (!cpus_have_cap(caps->capability)) continue; /* * If the new CPU misses an advertised feature, we cannot proceed * further, park the cpu. */ - if (!feature_matches(__raw_read_system_reg(caps->sys_reg), caps)) { + if (!caps->matches(caps, SCOPE_LOCAL_CPU)) { pr_crit("CPU%d: missing feature: %s\n", smp_processor_id(), caps->desc); cpu_die_early(); @@ -1026,7 +1039,7 @@ void __init setup_cpu_features(void) } static bool __maybe_unused -cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry) +cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) { return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO)); } -- cgit v1.1 From e3661b128e53ee281e1e7c589a5b647890bd6d7c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 22 Apr 2016 12:25:32 +0100 Subject: arm64: Allow a capability to be checked on a single CPU Now that the capabilities are only available once all the CPUs have booted, we're unable to check for a particular feature in any subsystem that gets initialized before then. In order to support this, introduce a local_cpu_has_cap() function that tests for the presence of a given capability independently of the whole framework. Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier [ Added preemptible() check ] Signed-off-by: Suzuki K Poulose [will: remove duplicate initialisation of caps in this_cpu_has_cap] Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 54abd9b..2b09dc4 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1010,6 +1010,24 @@ static void __init setup_feature_capabilities(void) enable_cpu_capabilities(arm64_features); } +/* + * Check if the current CPU has a given feature capability. + * Should be called from non-preemptible context. + */ +bool this_cpu_has_cap(unsigned int cap) +{ + const struct arm64_cpu_capabilities *caps; + + if (WARN_ON(preemptible())) + return false; + + for (caps = arm64_features; caps->desc; caps++) + if (caps->capability == cap && caps->matches) + return caps->matches(caps, SCOPE_LOCAL_CPU); + + return false; +} + void __init setup_cpu_features(void) { u32 cwg; -- cgit v1.1 From 6a6efbb45b7d95c84840010095367eb06a64f342 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 22 Apr 2016 12:25:34 +0100 Subject: arm64: Verify CPU errata work arounds on hotplugged CPU CPU Errata work arounds are detected and applied to the kernel code at boot time and the data is then freed up. If a new hotplugged CPU requires a work around which was not applied at boot time, there is nothing we can do but simply fail the booting. Cc: Will Deacon Cc: Mark Rutland Cc: Andre Przywara Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 20 ++++++++++++++++++++ arch/arm64/kernel/cpufeature.c | 1 + 2 files changed, 21 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 2fdecd7..d427894 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -103,6 +103,26 @@ const struct arm64_cpu_capabilities arm64_errata[] = { } }; +/* + * The CPU Errata work arounds are detected and applied at boot time + * and the related information is freed soon after. If the new CPU requires + * an errata not detected at boot, fail this CPU. + */ +void verify_local_cpu_errata(void) +{ + const struct arm64_cpu_capabilities *caps = arm64_errata; + + for (; caps->matches; caps++) + if (!cpus_have_cap(caps->capability) && + caps->matches(caps, SCOPE_LOCAL_CPU)) { + pr_crit("CPU%d: Requires work around for %s, not detected" + " at boot time\n", + smp_processor_id(), + caps->desc ? : "an erratum"); + cpu_die_early(); + } +} + void check_local_cpu_errata(void) { update_cpu_capabilities(arm64_errata, "enabling workaround for"); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 2b09dc4..811773d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -998,6 +998,7 @@ void verify_local_cpu_capabilities(void) if (!sys_caps_initialised) return; + verify_local_cpu_errata(); verify_local_cpu_features(arm64_features); verify_local_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) -- cgit v1.1 From 44dbcc93ab67145bf8ebe3e91123303443c4a3bf Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 22 Apr 2016 12:25:35 +0100 Subject: arm64: Fix behavior of maxcpus=N maxcpu=n sets the number of CPUs activated at boot time to a max of n, but allowing the remaining CPUs to be brought up later if the user decides to do so. However, on arm64 due to various reasons, we disallowed hotplugging CPUs beyond n, by marking them not present. Now that we have checks in place to make sure the hotplugged CPUs have compatible features with system and requires no new errata, relax the restriction. Cc: Will Deacon Cc: Mark Rutland Cc: James Morse Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index fef2e73..dc96475 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -689,33 +689,18 @@ void __init smp_init_cpus(void) void __init smp_prepare_cpus(unsigned int max_cpus) { int err; - unsigned int cpu, ncores = num_possible_cpus(); + unsigned int cpu; init_cpu_topology(); smp_store_cpu_info(smp_processor_id()); /* - * are we trying to boot more cores than exist? - */ - if (max_cpus > ncores) - max_cpus = ncores; - - /* Don't bother if we're effectively UP */ - if (max_cpus <= 1) - return; - - /* * Initialise the present map (which describes the set of CPUs * actually populated at the present time) and release the * secondaries from the bootloader. - * - * Make sure we online at most (max_cpus - 1) additional CPUs. */ - max_cpus--; for_each_possible_cpu(cpu) { - if (max_cpus == 0) - break; if (cpu == smp_processor_id()) continue; @@ -728,7 +713,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) continue; set_cpu_present(cpu, true); - max_cpus--; } } -- cgit v1.1 From 190c056fc32a528979807cb5d5a0d68285933073 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 18 Apr 2016 17:09:41 +0200 Subject: arm64: kernel: don't export local symbols from head.S This unexports some symbols from head.S that are only used locally. Acked-by: Catalin Marinas Acked-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index b434176..ac27d8d 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -638,7 +638,7 @@ ENDPROC(el2_setup) * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed * in x20. See arch/arm64/include/asm/virt.h for more info. */ -ENTRY(set_cpu_boot_mode_flag) +set_cpu_boot_mode_flag: adr_l x1, __boot_cpu_mode cmp w20, #BOOT_CPU_MODE_EL2 b.ne 1f @@ -691,7 +691,7 @@ ENTRY(secondary_entry) b secondary_startup ENDPROC(secondary_entry) -ENTRY(secondary_startup) +secondary_startup: /* * Common entry point for secondary CPUs. */ @@ -706,7 +706,7 @@ ENTRY(secondary_startup) ENDPROC(secondary_startup) 0: .long (_text - TEXT_OFFSET) - __secondary_switched -ENTRY(__secondary_switched) +__secondary_switched: adr_l x5, vectors msr vbar_el1, x5 isb -- cgit v1.1 From e5ebeec879b726c755af0c1c15f3699b53268cd5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 18 Apr 2016 17:09:42 +0200 Subject: arm64: kernel: use literal for relocated address of __secondary_switched We can simply use a relocated 64-bit literal to store the address of __secondary_switched(), and the relocation code will ensure that it holds the correct value at secondary entry time, as long as we make sure that the literal is not dereferenced until after we have enabled the MMU. So jump via a small __secondary_switch() function covered by the ID map that performs the literal load and branch-to-register. Acked-by: Catalin Marinas Acked-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index ac27d8d..f13276d 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -468,9 +468,7 @@ __mmap_switched: str x15, [x11, x23] b 0b -2: adr_l x8, kimage_vaddr // make relocated kimage_vaddr - dc cvac, x8 // value visible to secondaries - dsb sy // with MMU off +2: #endif adr_l sp, initial_sp, x4 @@ -699,12 +697,9 @@ secondary_startup: adrp x26, swapper_pg_dir bl __cpu_setup // initialise processor - ldr x8, kimage_vaddr - ldr w9, 0f - sub x27, x8, w9, sxtw // address to jump to after enabling the MMU + adr_l x27, __secondary_switch // address to jump to after enabling the MMU b __enable_mmu ENDPROC(secondary_startup) -0: .long (_text - TEXT_OFFSET) - __secondary_switched __secondary_switched: adr_l x5, vectors @@ -806,3 +801,8 @@ __no_granule_support: wfi b 1b ENDPROC(__no_granule_support) + +__secondary_switch: + ldr x8, =__secondary_switched + br x8 +ENDPROC(__secondary_switch) -- cgit v1.1 From 0cd3defe0af4153ffc5fe39bcfa4abfc301984e9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 18 Apr 2016 17:09:43 +0200 Subject: arm64: kernel: perform relocation processing from ID map Refactor the relocation processing so that the code executes from the ID map while accessing the relocation tables via the virtual mapping. This way, we can use literals containing virtual addresses as before, instead of having to use convoluted absolute expressions. For symmetry with the secondary code path, the relocation code and the subsequent jump to the virtual entry point are implemented in a function called __primary_switch(), and __mmap_switched() is renamed to __primary_switched(). Also, the call sequence in stext() is aligned with the one in secondary_startup(), by replacing the awkward 'adr_l lr' and 'b cpu_setup' sequence with a simple branch and link. Acked-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 96 ++++++++++++++++++++++------------------- arch/arm64/kernel/vmlinux.lds.S | 7 +-- 2 files changed, 55 insertions(+), 48 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index f13276d..0d487d9 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -223,13 +223,11 @@ ENTRY(stext) * On return, the CPU will be ready for the MMU to be turned on and * the TCR will have been set. */ - ldr x27, 0f // address to jump to after - neg x27, x27 // MMU has been enabled - adr_l lr, __enable_mmu // return (PIC) address - b __cpu_setup // initialise processor + bl __cpu_setup // initialise processor + adr_l x27, __primary_switch // address to jump to after + // MMU has been enabled + b __enable_mmu ENDPROC(stext) - .align 3 -0: .quad (_text - TEXT_OFFSET) - __mmap_switched - KIMAGE_VADDR /* * Preserve the arguments passed by the bootloader in x0 .. x3 @@ -421,7 +419,7 @@ ENDPROC(__create_page_tables) * The following fragment of code is executed with the MMU enabled. */ .set initial_sp, init_thread_union + THREAD_START_SP -__mmap_switched: +__primary_switched: mov x28, lr // preserve LR adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address @@ -435,42 +433,6 @@ __mmap_switched: bl __pi_memset dsb ishst // Make zero page visible to PTW -#ifdef CONFIG_RELOCATABLE - - /* - * Iterate over each entry in the relocation table, and apply the - * relocations in place. - */ - adr_l x8, __dynsym_start // start of symbol table - adr_l x9, __reloc_start // start of reloc table - adr_l x10, __reloc_end // end of reloc table - -0: cmp x9, x10 - b.hs 2f - ldp x11, x12, [x9], #24 - ldr x13, [x9, #-8] - cmp w12, #R_AARCH64_RELATIVE - b.ne 1f - add x13, x13, x23 // relocate - str x13, [x11, x23] - b 0b - -1: cmp w12, #R_AARCH64_ABS64 - b.ne 0b - add x12, x12, x12, lsl #1 // symtab offset: 24x top word - add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word - ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx - ldr x15, [x12, #8] // Elf64_Sym::st_value - cmp w14, #-0xf // SHN_ABS (0xfff1) ? - add x14, x15, x23 // relocate - csel x15, x14, x15, ne - add x15, x13, x15 - str x15, [x11, x23] - b 0b - -2: -#endif - adr_l sp, initial_sp, x4 mov x4, sp and x4, x4, #~(THREAD_SIZE - 1) @@ -496,7 +458,7 @@ __mmap_switched: 0: #endif b start_kernel -ENDPROC(__mmap_switched) +ENDPROC(__primary_switched) /* * end early head section, begin head code that is also used for @@ -788,7 +750,6 @@ __enable_mmu: ic iallu // flush instructions fetched dsb nsh // via old mapping isb - add x27, x27, x23 // relocated __mmap_switched #endif br x27 ENDPROC(__enable_mmu) @@ -802,6 +763,51 @@ __no_granule_support: b 1b ENDPROC(__no_granule_support) +__primary_switch: +#ifdef CONFIG_RELOCATABLE + /* + * Iterate over each entry in the relocation table, and apply the + * relocations in place. + */ + ldr w8, =__dynsym_offset // offset to symbol table + ldr w9, =__rela_offset // offset to reloc table + ldr w10, =__rela_size // size of reloc table + + ldr x11, =KIMAGE_VADDR // default virtual offset + add x11, x11, x23 // actual virtual offset + add x8, x8, x11 // __va(.dynsym) + add x9, x9, x11 // __va(.rela) + add x10, x9, x10 // __va(.rela) + sizeof(.rela) + +0: cmp x9, x10 + b.hs 2f + ldp x11, x12, [x9], #24 + ldr x13, [x9, #-8] + cmp w12, #R_AARCH64_RELATIVE + b.ne 1f + add x13, x13, x23 // relocate + str x13, [x11, x23] + b 0b + +1: cmp w12, #R_AARCH64_ABS64 + b.ne 0b + add x12, x12, x12, lsl #1 // symtab offset: 24x top word + add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word + ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx + ldr x15, [x12, #8] // Elf64_Sym::st_value + cmp w14, #-0xf // SHN_ABS (0xfff1) ? + add x14, x15, x23 // relocate + csel x15, x14, x15, ne + add x15, x13, x15 + str x15, [x11, x23] + b 0b + +2: +#endif + ldr x8, =__primary_switched + br x8 +ENDPROC(__primary_switch) + __secondary_switch: ldr x8, =__secondary_switched br x8 diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 77d86c9..8918b30 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -158,12 +158,9 @@ SECTIONS *(.altinstr_replacement) } .rela : ALIGN(8) { - __reloc_start = .; *(.rela .rela*) - __reloc_end = .; } .dynsym : ALIGN(8) { - __dynsym_start = .; *(.dynsym) } .dynstr : { @@ -173,6 +170,10 @@ SECTIONS *(.hash) } + __rela_offset = ADDR(.rela) - KIMAGE_VADDR; + __rela_size = SIZEOF(.rela); + __dynsym_offset = ADDR(.dynsym) - KIMAGE_VADDR; + . = ALIGN(SEGMENT_ALIGN); __init_end = .; -- cgit v1.1 From b03cc885328e3c0de61843737d42eb0a0f112aab Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 18 Apr 2016 17:09:45 +0200 Subject: arm64: kernel: replace early 64-bit literal loads with move-immediates When building a relocatable kernel, we currently rely on the fact that early 64-bit literal loads need to be deferred to after the relocation has been performed only if they involve symbol references, and not if they involve assemble time constants. While this is not an unreasonable assumption to make, it is better to switch to movk/movz sequences, since these are guaranteed to be resolved at link time, simply because there are no dynamic relocation types to describe them. Acked-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 0d487d9..dae9cab 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -337,7 +337,7 @@ __create_page_tables: cmp x0, x6 b.lo 1b - ldr x7, =SWAPPER_MM_MMUFLAGS + mov x7, SWAPPER_MM_MMUFLAGS /* * Create the identity mapping. @@ -393,7 +393,7 @@ __create_page_tables: * Map the kernel image (starting with PHYS_OFFSET). */ mov x0, x26 // swapper_pg_dir - ldr x5, =KIMAGE_VADDR + mov_q x5, KIMAGE_VADDR add x5, x5, x23 // add KASLR displacement create_pgd_entry x0, x5, x3, x6 ldr w6, =kernel_img_size @@ -631,7 +631,7 @@ ENTRY(secondary_holding_pen) bl el2_setup // Drop to EL1, w20=cpu_boot_mode bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 - ldr x1, =MPIDR_HWID_BITMASK + mov_q x1, MPIDR_HWID_BITMASK and x0, x0, x1 adr_l x3, secondary_holding_pen_release pen: ldr x4, [x3] @@ -773,7 +773,7 @@ __primary_switch: ldr w9, =__rela_offset // offset to reloc table ldr w10, =__rela_size // size of reloc table - ldr x11, =KIMAGE_VADDR // default virtual offset + mov_q x11, KIMAGE_VADDR // default virtual offset add x11, x11, x23 // actual virtual offset add x8, x8, x11 // __va(.dynsym) add x9, x9, x11 // __va(.rela) -- cgit v1.1 From 18b9c0d641938242d8bcdba3c14a8f2beec2a97e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 18 Apr 2016 17:09:46 +0200 Subject: arm64: don't map TEXT_OFFSET bytes below the kernel if we can avoid it For historical reasons, the kernel Image must be loaded into physical memory at a 512 KB offset above a 2 MB aligned base address. The region between the base address and the start of the kernel Image has no significance to the kernel itself, but it is currently mapped explicitly into the early kernel VMA range for all translation granules. In some cases (i.e., 4 KB granule), this is unavoidable, due to the 2 MB granularity of the early kernel mappings. However, in other cases, e.g., when running with larger page sizes, or in the future, with more granular KASLR, there is no reason to map it explicitly like we do currently. So update the logic so that the region is mapped only if that happens as a side effect of rounding the start address of the kernel to swapper block size, and leave it unmapped otherwise. Since the symbol kernel_img_size now simply resolves to the memory footprint of the kernel Image, we can drop its definition from image.h and opencode its calculation. Acked-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 9 +++++---- arch/arm64/kernel/image.h | 2 -- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index dae9cab..c5e5edc 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -393,12 +393,13 @@ __create_page_tables: * Map the kernel image (starting with PHYS_OFFSET). */ mov x0, x26 // swapper_pg_dir - mov_q x5, KIMAGE_VADDR + mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement create_pgd_entry x0, x5, x3, x6 - ldr w6, =kernel_img_size - add x6, x6, x5 - mov x3, x24 // phys offset + adrp x6, _end // runtime __pa(_end) + adrp x3, _text // runtime __pa(_text) + sub x6, x6, x3 // _end - _text + add x6, x6, x5 // runtime __va(_end) create_block_map x0, x7, x3, x5, x6 /* diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 4fd72da..86d444f 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -71,8 +71,6 @@ DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \ DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS); -kernel_img_size = _end - (_text - TEXT_OFFSET); - #ifdef CONFIG_EFI __efistub_stext_offset = stext - _text; -- cgit v1.1 From 08cdac619c81b3fa8cd73aeed2330ffe0a0b73ca Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 18 Apr 2016 17:09:47 +0200 Subject: arm64: relocatable: deal with physically misaligned kernel images When booting a relocatable kernel image, there is no practical reason to refuse an image whose load address is not exactly TEXT_OFFSET bytes above a 2 MB aligned base address, as long as the physical and virtual misalignment with respect to the swapper block size are equal, and are both aligned to THREAD_SIZE. Since the virtual misalignment is under our control when we first enter the kernel proper, we can simply choose its value to be equal to the physical misalignment. So treat the misalignment of the physical load address as the initial KASLR offset, and fix up the remaining code to deal with that. Acked-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 9 ++++++--- arch/arm64/kernel/kaslr.c | 6 +++--- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index c5e5edc..00a3210 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -213,8 +214,8 @@ efi_header_end: ENTRY(stext) bl preserve_boot_args bl el2_setup // Drop to EL1, w20=cpu_boot_mode - mov x23, xzr // KASLR offset, defaults to 0 adrp x24, __PHYS_OFFSET + and x23, x24, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 bl set_cpu_boot_mode_flag bl __create_page_tables // x25=TTBR0, x26=TTBR1 /* @@ -449,11 +450,13 @@ __primary_switched: bl kasan_early_init #endif #ifdef CONFIG_RANDOMIZE_BASE - cbnz x23, 0f // already running randomized? + tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized? + b.ne 0f mov x0, x21 // pass FDT address in x0 + mov x1, x23 // pass modulo offset in x1 bl kaslr_early_init // parse FDT for KASLR options cbz x0, 0f // KASLR disabled? just proceed - mov x23, x0 // record KASLR offset + orr x23, x23, x0 // record KASLR offset ret x28 // we must enable KASLR, return // to __enable_mmu() 0: diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 5829839..b054691 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -74,7 +74,7 @@ extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, * containing function pointers) to be reinitialized, and zero-initialized * .bss variables will be reset to 0. */ -u64 __init kaslr_early_init(u64 dt_phys) +u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset) { void *fdt; u64 seed, offset, mask, module_range; @@ -132,8 +132,8 @@ u64 __init kaslr_early_init(u64 dt_phys) * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this * happens, increase the KASLR offset by the size of the kernel image. */ - if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) != - (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) + if ((((u64)_text + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT) != + (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT)) offset = (offset + (u64)(_end - _text)) & mask; if (IS_ENABLED(CONFIG_KASAN)) -- cgit v1.1 From 6a1f5471144754f165427a93f35c897f85680594 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 12 Apr 2016 16:09:11 +0200 Subject: arm64: acpi: add acpi=on cmdline option to prefer ACPI boot over DT If both ACPI and DT platform descriptions are available, and the kernel was configured at build time to support both flavours, the default policy is to prefer DT over ACPI, and preferring ACPI over DT while still allowing DT as a fallback is not possible. Since some enterprise features (such as RAS) depend on ACPI, it may be desirable for, e.g., distro installers to prefer ACPI boot but fall back to DT rather than failing completely if no ACPI tables are available. So introduce the 'acpi=on' kernel command line parameter for arm64, which signifies that ACPI should be used if available, and DT should only be used as a fallback. Acked-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/acpi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 6884c76..3e4f1a4 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -42,6 +42,7 @@ int acpi_pci_disabled = 1; /* skip ACPI PCI scan and IRQ initialization */ EXPORT_SYMBOL(acpi_pci_disabled); static bool param_acpi_off __initdata; +static bool param_acpi_on __initdata; static bool param_acpi_force __initdata; static int __init parse_acpi(char *arg) @@ -52,6 +53,8 @@ static int __init parse_acpi(char *arg) /* "acpi=off" disables both ACPI table parsing and interpreter */ if (strcmp(arg, "off") == 0) param_acpi_off = true; + else if (strcmp(arg, "on") == 0) /* prefer ACPI over DT */ + param_acpi_on = true; else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */ param_acpi_force = true; else @@ -198,10 +201,11 @@ void __init acpi_boot_table_init(void) * - ACPI has been disabled explicitly (acpi=off), or * - the device tree is not empty (it has more than just a /chosen node, * and a /hypervisor node when running on Xen) - * and ACPI has not been force enabled (acpi=force) + * and ACPI has not been [force] enabled (acpi=on|force) */ if (param_acpi_off || - (!param_acpi_force && of_scan_flat_dt(dt_scan_depth1_nodes, NULL))) + (!param_acpi_on && !param_acpi_force && + of_scan_flat_dt(dt_scan_depth1_nodes, NULL))) return; /* -- cgit v1.1 From 00a44cdaba0900c63a003e0c431f506f49376a90 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 27 Apr 2016 17:47:02 +0100 Subject: arm64: kvm: Move lr save/restore from do_el2_call into EL1 Today the 'hvc' calling KVM or the hyp-stub is expected to preserve all registers. KVM saves/restores the registers it needs on the EL2 stack using do_el2_call(). The hyp-stub has no stack, later patches need to be able to be able to clobber the link register. Move the link register save/restore to the the call sites. Signed-off-by: James Morse Acked-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/kernel/hyp-stub.S | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index a272f33..7eab8ac 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -101,10 +101,16 @@ ENDPROC(\label) */ ENTRY(__hyp_get_vectors) + str lr, [sp, #-16]! mov x0, xzr - // fall through -ENTRY(__hyp_set_vectors) hvc #0 + ldr lr, [sp], #16 ret ENDPROC(__hyp_get_vectors) + +ENTRY(__hyp_set_vectors) + str lr, [sp, #-16]! + hvc #0 + ldr lr, [sp], #16 + ret ENDPROC(__hyp_set_vectors) -- cgit v1.1 From ad72e59ff2bad55f6b9e7ac1fe5d824831ea2550 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Wed, 27 Apr 2016 17:47:03 +0100 Subject: arm64: hyp/kvm: Make hyp-stub extensible The existing arm64 hcall implementations are limited in that they only allow for two distinct hcalls; with the x0 register either zero or not zero. Also, the API of the hyp-stub exception vector routines and the KVM exception vector routines differ; hyp-stub uses a non-zero value in x0 to implement __hyp_set_vectors, whereas KVM uses it to implement kvm_call_hyp. To allow for additional hcalls to be defined and to make the arm64 hcall API more consistent across exception vector routines, change the hcall implementations to reserve all x0 values below 0xfff for hcalls such as {s,g}et_vectors(). Define two new preprocessor macros HVC_GET_VECTORS, and HVC_SET_VECTORS to be used as hcall type specifiers and convert the existing __hyp_get_vectors() and __hyp_set_vectors() routines to use these new macros when executing an HVC call. Also, change the corresponding hyp-stub and KVM el1_sync exception vector routines to use these new macros. Signed-off-by: Geoff Levand [Merged two hcall patches, moved immediate value from esr to x0, use lr as a scratch register, changed limit to 0xfff] Signed-off-by: James Morse Acked-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/kernel/hyp-stub.S | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 7eab8ac..894fb40 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -53,15 +54,26 @@ ENDPROC(__hyp_stub_vectors) .align 11 el1_sync: - mrs x1, esr_el2 - lsr x1, x1, #26 - cmp x1, #0x16 - b.ne 2f // Not an HVC trap - cbz x0, 1f - msr vbar_el2, x0 // Set vbar_el2 - b 2f -1: mrs x0, vbar_el2 // Return vbar_el2 -2: eret + mrs x30, esr_el2 + lsr x30, x30, #ESR_ELx_EC_SHIFT + + cmp x30, #ESR_ELx_EC_HVC64 + b.ne 9f // Not an HVC trap + + cmp x0, #HVC_GET_VECTORS + b.ne 1f + mrs x0, vbar_el2 + b 9f + +1: cmp x0, #HVC_SET_VECTORS + b.ne 2f + msr vbar_el2, x1 + b 9f + + /* Unrecognised call type */ +2: mov x0, xzr + +9: eret ENDPROC(el1_sync) .macro invalid_vector label @@ -102,7 +114,7 @@ ENDPROC(\label) ENTRY(__hyp_get_vectors) str lr, [sp, #-16]! - mov x0, xzr + mov x0, #HVC_GET_VECTORS hvc #0 ldr lr, [sp], #16 ret @@ -110,6 +122,8 @@ ENDPROC(__hyp_get_vectors) ENTRY(__hyp_set_vectors) str lr, [sp, #-16]! + mov x1, x0 + mov x0, #HVC_SET_VECTORS hvc #0 ldr lr, [sp], #16 ret -- cgit v1.1 From c94b0cf28281d483c8b43b4874fcb7ab14ade1b1 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 27 Apr 2016 17:47:04 +0100 Subject: arm64: hyp/kvm: Make hyp-stub reject kvm_call_hyp() A later patch implements kvm_arch_hardware_disable(), to remove kvm from el2, and re-instate the hyp-stub. This can happen while guests are running, particularly when kvm_reboot() calls kvm_arch_hardware_disable() on each cpu. This can interrupt a guest, remove kvm, then allow the guest to be scheduled again. This causes kvm_call_hyp() to be run against the hyp-stub. Change the hyp-stub to return a new exception type when this happens, and add code to kvm's handle_exit() to tell userspace we failed to enter the guest. Signed-off-by: James Morse Acked-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/kernel/hyp-stub.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 894fb40..8727f44 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -70,8 +71,8 @@ el1_sync: msr vbar_el2, x1 b 9f - /* Unrecognised call type */ -2: mov x0, xzr + /* Someone called kvm_call_hyp() against the hyp-stub... */ +2: mov x0, #ARM_EXCEPTION_HYP_GONE 9: eret ENDPROC(el1_sync) -- cgit v1.1 From adc9b2dfd00924e9e9b98613f36a6cb8c51f0dc6 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 27 Apr 2016 17:47:06 +0100 Subject: arm64: kernel: Rework finisher callback out of __cpu_suspend_enter() Hibernate could make use of the cpu_suspend() code to save/restore cpu state, however it needs to be able to return '0' from the 'finisher'. Rework cpu_suspend() so that the finisher is called from C code, independently from the save/restore of cpu state. Space to save the context in is allocated in the caller's stack frame, and passed into __cpu_suspend_enter(). Hibernate's use of this API will look like a copy of the cpu_suspend() function. Signed-off-by: James Morse Acked-by: Lorenzo Pieralisi Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/asm-offsets.c | 2 + arch/arm64/kernel/sleep.S | 93 +++++++++++++++-------------------------- arch/arm64/kernel/suspend.c | 72 ++++++++++++++++++------------- 3 files changed, 77 insertions(+), 90 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 3ae6b31..0902acb 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -122,6 +122,8 @@ int main(void) DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); + DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs)); + DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs)); #endif DEFINE(ARM_SMCCC_RES_X0_OFFS, offsetof(struct arm_smccc_res, a0)); DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index fd10eb6..b7c4b68 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -49,37 +49,30 @@ orr \dst, \dst, \mask // dst|=(aff3>>rs3) .endm /* - * Save CPU state for a suspend and execute the suspend finisher. - * On success it will return 0 through cpu_resume - ie through a CPU - * soft/hard reboot from the reset vector. - * On failure it returns the suspend finisher return value or force - * -EOPNOTSUPP if the finisher erroneously returns 0 (the suspend finisher - * is not allowed to return, if it does this must be considered failure). - * It saves callee registers, and allocates space on the kernel stack - * to save the CPU specific registers + some other data for resume. + * Save CPU state in the provided sleep_stack_data area, and publish its + * location for cpu_resume()'s use in sleep_save_stash. * - * x0 = suspend finisher argument - * x1 = suspend finisher function pointer + * cpu_resume() will restore this saved state, and return. Because the + * link-register is saved and restored, it will appear to return from this + * function. So that the caller can tell the suspend/resume paths apart, + * __cpu_suspend_enter() will always return a non-zero value, whereas the + * path through cpu_resume() will return 0. + * + * x0 = struct sleep_stack_data area */ ENTRY(__cpu_suspend_enter) - stp x29, lr, [sp, #-96]! - stp x19, x20, [sp,#16] - stp x21, x22, [sp,#32] - stp x23, x24, [sp,#48] - stp x25, x26, [sp,#64] - stp x27, x28, [sp,#80] - /* - * Stash suspend finisher and its argument in x20 and x19 - */ - mov x19, x0 - mov x20, x1 + stp x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS] + stp x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16] + stp x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32] + stp x23, x24, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+48] + stp x25, x26, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+64] + stp x27, x28, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+80] + + /* save the sp in cpu_suspend_ctx */ mov x2, sp - sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx - mov x0, sp - /* - * x0 now points to struct cpu_suspend_ctx allocated on the stack - */ - str x2, [x0, #CPU_CTX_SP] + str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP] + + /* find the mpidr_hash */ ldr x1, =sleep_save_sp ldr x1, [x1, #SLEEP_SAVE_SP_VIRT] mrs x7, mpidr_el1 @@ -93,34 +86,11 @@ ENTRY(__cpu_suspend_enter) ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 add x1, x1, x8, lsl #3 + + stp x29, lr, [sp, #-16]! bl __cpu_suspend_save - /* - * Grab suspend finisher in x20 and its argument in x19 - */ - mov x0, x19 - mov x1, x20 - /* - * We are ready for power down, fire off the suspend finisher - * in x1, with argument in x0 - */ - blr x1 - /* - * Never gets here, unless suspend finisher fails. - * Successful cpu_suspend should return from cpu_resume, returning - * through this code path is considered an error - * If the return value is set to 0 force x0 = -EOPNOTSUPP - * to make sure a proper error condition is propagated - */ - cmp x0, #0 - mov x3, #-EOPNOTSUPP - csel x0, x3, x0, eq - add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer - ldp x19, x20, [sp, #16] - ldp x21, x22, [sp, #32] - ldp x23, x24, [sp, #48] - ldp x25, x26, [sp, #64] - ldp x27, x28, [sp, #80] - ldp x29, lr, [sp], #96 + ldp x29, lr, [sp], #16 + mov x0, #1 ret ENDPROC(__cpu_suspend_enter) .ltorg @@ -150,12 +120,6 @@ cpu_resume_after_mmu: bl kasan_unpoison_remaining_stack #endif mov x0, #0 // return zero on success - ldp x19, x20, [sp, #16] - ldp x21, x22, [sp, #32] - ldp x23, x24, [sp, #48] - ldp x25, x26, [sp, #64] - ldp x27, x28, [sp, #80] - ldp x29, lr, [sp], #96 ret ENDPROC(cpu_resume_after_mmu) @@ -172,6 +136,8 @@ ENTRY(cpu_resume) /* x7 contains hash index, let's use it to grab context pointer */ ldr_l x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS ldr x0, [x0, x7, lsl #3] + add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS + add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS /* load sp from context */ ldr x2, [x0, #CPU_CTX_SP] /* load physical address of identity map page table in x1 */ @@ -185,5 +151,12 @@ ENTRY(cpu_resume) * pointer and x1 to contain physical address of 1:1 page tables */ bl cpu_do_resume // PC relative jump, MMU off + /* Can't access these by physical address once the MMU is on */ + ldp x19, x20, [x29, #16] + ldp x21, x22, [x29, #32] + ldp x23, x24, [x29, #48] + ldp x25, x26, [x29, #64] + ldp x27, x28, [x29, #80] + ldp x29, lr, [x29] b cpu_resume_mmu // Resume MMU, never returns ENDPROC(cpu_resume) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 6605539..0d52ec9 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -10,22 +10,22 @@ #include #include -extern int __cpu_suspend_enter(unsigned long arg, int (*fn)(unsigned long)); + /* * This is called by __cpu_suspend_enter() to save the state, and do whatever * flushing is required to ensure that when the CPU goes to sleep we have * the necessary data available when the caches are not searched. * - * ptr: CPU context virtual address + * ptr: sleep_stack_data containing cpu state virtual address. * save_ptr: address of the location where the context physical address * must be saved */ -void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr, +void notrace __cpu_suspend_save(struct sleep_stack_data *ptr, phys_addr_t *save_ptr) { *save_ptr = virt_to_phys(ptr); - cpu_do_suspend(ptr); + cpu_do_suspend(&ptr->system_regs); /* * Only flush the context that must be retrieved with the MMU * off. VA primitives ensure the flush is applied to all @@ -51,6 +51,30 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) hw_breakpoint_restore = hw_bp_restore; } +void notrace __cpu_suspend_exit(void) +{ + /* + * We are resuming from reset with the idmap active in TTBR0_EL1. + * We must uninstall the idmap and restore the expected MMU + * state before we can possibly return to userspace. + */ + cpu_uninstall_idmap(); + + /* + * Restore per-cpu offset before any kernel + * subsystem relying on it has a chance to run. + */ + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + + /* + * Restore HW breakpoint registers to sane values + * before debug exceptions are possibly reenabled + * through local_dbg_restore. + */ + if (hw_breakpoint_restore) + hw_breakpoint_restore(NULL); +} + /* * cpu_suspend * @@ -60,8 +84,9 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) */ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { - int ret; + int ret = 0; unsigned long flags; + struct sleep_stack_data state; /* * From this point debug exceptions are disabled to prevent @@ -77,34 +102,21 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) */ pause_graph_tracing(); - /* - * mm context saved on the stack, it will be restored when - * the cpu comes out of reset through the identity mapped - * page tables, so that the thread address space is properly - * set-up on function return. - */ - ret = __cpu_suspend_enter(arg, fn); - if (ret == 0) { - /* - * We are resuming from reset with the idmap active in TTBR0_EL1. - * We must uninstall the idmap and restore the expected MMU - * state before we can possibly return to userspace. - */ - cpu_uninstall_idmap(); - - /* - * Restore per-cpu offset before any kernel - * subsystem relying on it has a chance to run. - */ - set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + if (__cpu_suspend_enter(&state)) { + /* Call the suspend finisher */ + ret = fn(arg); /* - * Restore HW breakpoint registers to sane values - * before debug exceptions are possibly reenabled - * through local_dbg_restore. + * Never gets here, unless the suspend finisher fails. + * Successful cpu_suspend() should return from cpu_resume(), + * returning through this code path is considered an error + * If the return value is set to 0 force ret = -EOPNOTSUPP + * to make sure a proper error condition is propagated */ - if (hw_breakpoint_restore) - hw_breakpoint_restore(NULL); + if (!ret) + ret = -EOPNOTSUPP; + } else { + __cpu_suspend_exit(); } unpause_graph_tracing(); -- cgit v1.1 From cabe1c81ea5be983425d117912d7883e252a3b09 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 27 Apr 2016 17:47:07 +0100 Subject: arm64: Change cpu_resume() to enable mmu early then access sleep_sp by va By enabling the MMU early in cpu_resume(), the sleep_save_sp and stack can be accessed by VA, which avoids the need to convert-addresses and clean to PoC on the suspend path. MMU setup is shared with the boot path, meaning the swapper_pg_dir is restored directly: ttbr1_el1 is no longer saved/restored. struct sleep_save_sp is removed, replacing it with a single array of pointers. cpu_do_{suspend,resume} could be further reduced to not restore: cpacr_el1, mdscr_el1, tcr_el1, vbar_el1 and sctlr_el1, all of which are set by __cpu_setup(). However these values all contain res0 bits that may be used to enable future features. Signed-off-by: James Morse Reviewed-by: Lorenzo Pieralisi Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/asm-offsets.c | 3 -- arch/arm64/kernel/head.S | 2 +- arch/arm64/kernel/setup.c | 1 - arch/arm64/kernel/sleep.S | 66 ++++++++++++++++------------------------- arch/arm64/kernel/suspend.c | 38 ++++-------------------- 5 files changed, 33 insertions(+), 77 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 0902acb..ac742ef 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -119,9 +119,6 @@ int main(void) DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask)); DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff)); - DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); - DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); - DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs)); DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs)); #endif diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 00a3210..0674384 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -716,7 +716,7 @@ ENTRY(__early_cpu_boot_status) * If it isn't, park the CPU */ .section ".idmap.text", "ax" -__enable_mmu: +ENTRY(__enable_mmu) mrs x22, sctlr_el1 // preserve old SCTLR_EL1 value mrs x1, ID_AA64MMFR0_EL1 ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 65f5159..3279def 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -176,7 +176,6 @@ static void __init smp_build_mpidr_hash(void) */ if (mpidr_hash_size() > 4 * num_possible_cpus()) pr_warn("Large number of MPIDR hash buckets detected\n"); - __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); } static void __init setup_machine_fdt(phys_addr_t dt_phys) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index b7c4b68..9a3aec9 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -73,8 +73,8 @@ ENTRY(__cpu_suspend_enter) str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP] /* find the mpidr_hash */ - ldr x1, =sleep_save_sp - ldr x1, [x1, #SLEEP_SAVE_SP_VIRT] + ldr x1, =sleep_save_stash + ldr x1, [x1] mrs x7, mpidr_el1 ldr x9, =mpidr_hash ldr x10, [x9, #MPIDR_HASH_MASK] @@ -87,44 +87,27 @@ ENTRY(__cpu_suspend_enter) compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 add x1, x1, x8, lsl #3 + str x0, [x1] + add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS stp x29, lr, [sp, #-16]! - bl __cpu_suspend_save + bl cpu_do_suspend ldp x29, lr, [sp], #16 mov x0, #1 ret ENDPROC(__cpu_suspend_enter) .ltorg -/* - * x0 must contain the sctlr value retrieved from restored context - */ - .pushsection ".idmap.text", "ax" -ENTRY(cpu_resume_mmu) - ldr x3, =cpu_resume_after_mmu - msr sctlr_el1, x0 // restore sctlr_el1 - isb - /* - * Invalidate the local I-cache so that any instructions fetched - * speculatively from the PoC are discarded, since they may have - * been dynamically patched at the PoU. - */ - ic iallu - dsb nsh - isb - br x3 // global jump to virtual address -ENDPROC(cpu_resume_mmu) - .popsection -cpu_resume_after_mmu: -#ifdef CONFIG_KASAN - mov x0, sp - bl kasan_unpoison_remaining_stack -#endif - mov x0, #0 // return zero on success - ret -ENDPROC(cpu_resume_after_mmu) - ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly + /* enable the MMU early - so we can access sleep_save_stash by va */ + adr_l lr, __enable_mmu /* __cpu_setup will return here */ + ldr x27, =_cpu_resume /* __enable_mmu will branch here */ + adrp x25, idmap_pg_dir + adrp x26, swapper_pg_dir + b __cpu_setup +ENDPROC(cpu_resume) + +ENTRY(_cpu_resume) mrs x1, mpidr_el1 adrp x8, mpidr_hash add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address @@ -134,29 +117,32 @@ ENTRY(cpu_resume) ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 /* x7 contains hash index, let's use it to grab context pointer */ - ldr_l x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS + ldr_l x0, sleep_save_stash ldr x0, [x0, x7, lsl #3] add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS /* load sp from context */ ldr x2, [x0, #CPU_CTX_SP] - /* load physical address of identity map page table in x1 */ - adrp x1, idmap_pg_dir mov sp, x2 /* save thread_info */ and x2, x2, #~(THREAD_SIZE - 1) msr sp_el0, x2 /* - * cpu_do_resume expects x0 to contain context physical address - * pointer and x1 to contain physical address of 1:1 page tables + * cpu_do_resume expects x0 to contain context address pointer */ - bl cpu_do_resume // PC relative jump, MMU off - /* Can't access these by physical address once the MMU is on */ + bl cpu_do_resume + +#ifdef CONFIG_KASAN + mov x0, sp + bl kasan_unpoison_remaining_stack +#endif + ldp x19, x20, [x29, #16] ldp x21, x22, [x29, #32] ldp x23, x24, [x29, #48] ldp x25, x26, [x29, #64] ldp x27, x28, [x29, #80] ldp x29, lr, [x29] - b cpu_resume_mmu // Resume MMU, never returns -ENDPROC(cpu_resume) + mov x0, #0 + ret +ENDPROC(_cpu_resume) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 0d52ec9..b616e36 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -10,30 +10,11 @@ #include #include - /* - * This is called by __cpu_suspend_enter() to save the state, and do whatever - * flushing is required to ensure that when the CPU goes to sleep we have - * the necessary data available when the caches are not searched. - * - * ptr: sleep_stack_data containing cpu state virtual address. - * save_ptr: address of the location where the context physical address - * must be saved + * This is allocated by cpu_suspend_init(), and used to store a pointer to + * the 'struct sleep_stack_data' the contains a particular CPUs state. */ -void notrace __cpu_suspend_save(struct sleep_stack_data *ptr, - phys_addr_t *save_ptr) -{ - *save_ptr = virt_to_phys(ptr); - - cpu_do_suspend(&ptr->system_regs); - /* - * Only flush the context that must be retrieved with the MMU - * off. VA primitives ensure the flush is applied to all - * cache levels so context is pushed to DRAM. - */ - __flush_dcache_area(ptr, sizeof(*ptr)); - __flush_dcache_area(save_ptr, sizeof(*save_ptr)); -} +unsigned long *sleep_save_stash; /* * This hook is provided so that cpu_suspend code can restore HW @@ -131,22 +112,15 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) return ret; } -struct sleep_save_sp sleep_save_sp; - static int __init cpu_suspend_init(void) { - void *ctx_ptr; - /* ctx_ptr is an array of physical addresses */ - ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL); + sleep_save_stash = kcalloc(mpidr_hash_size(), sizeof(*sleep_save_stash), + GFP_KERNEL); - if (WARN_ON(!ctx_ptr)) + if (WARN_ON(!sleep_save_stash)) return -ENOMEM; - sleep_save_sp.save_ptr_stash = ctx_ptr; - sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); - __flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp)); - return 0; } early_initcall(cpu_suspend_init); -- cgit v1.1 From 28c7258330ee4ce701a4da7af96d6605d1a0b3bd Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 27 Apr 2016 17:47:09 +0100 Subject: arm64: Promote KERNEL_START/KERNEL_END definitions to a header file KERNEL_START and KERNEL_END are useful outside head.S, move them to a header file. Signed-off-by: James Morse Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 0674384..564c340 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -52,9 +52,6 @@ #error TEXT_OFFSET must be less than 2MB #endif -#define KERNEL_START _text -#define KERNEL_END _end - /* * Kernel startup entry point. * --------------------------- -- cgit v1.1 From 82869ac57b5d3b550446932c918dbf2caf020c9e Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 27 Apr 2016 17:47:12 +0100 Subject: arm64: kernel: Add support for hibernate/suspend-to-disk Add support for hibernate/suspend-to-disk. Suspend borrows code from cpu_suspend() to write cpu state onto the stack, before calling swsusp_save() to save the memory image. Restore creates a set of temporary page tables, covering only the linear map, copies the restore code to a 'safe' page, then uses the copy to restore the memory image. The copied code executes in the lower half of the address space, and once complete, restores the original kernel's page tables. It then calls into cpu_resume(), and follows the normal cpu_suspend() path back into the suspend code. To restore a kernel using KASLR, the address of the page tables, and cpu_resume() are stored in the hibernate arch-header and the el2 vectors are pivotted via the 'safe' page in low memory. Reviewed-by: Catalin Marinas Tested-by: Kevin Hilman # Tested on Juno R2 Signed-off-by: James Morse Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/asm-offsets.c | 5 + arch/arm64/kernel/hibernate-asm.S | 176 +++++++++++++++ arch/arm64/kernel/hibernate.c | 461 ++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 15 ++ 5 files changed, 658 insertions(+) create mode 100644 arch/arm64/kernel/hibernate-asm.S create mode 100644 arch/arm64/kernel/hibernate.c (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 3793003..2173149 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -45,6 +45,7 @@ arm64-obj-$(CONFIG_ACPI) += acpi.o arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index ac742ef..f8e5d47 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -124,5 +125,9 @@ int main(void) #endif DEFINE(ARM_SMCCC_RES_X0_OFFS, offsetof(struct arm_smccc_res, a0)); DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); + BLANK(); + DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); + DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); + DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next)); return 0; } diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S new file mode 100644 index 0000000..46f29b6 --- /dev/null +++ b/arch/arm64/kernel/hibernate-asm.S @@ -0,0 +1,176 @@ +/* + * Hibernate low-level support + * + * Copyright (C) 2016 ARM Ltd. + * Author: James Morse + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include + +#include +#include +#include +#include +#include +#include + +/* + * To prevent the possibility of old and new partial table walks being visible + * in the tlb, switch the ttbr to a zero page when we invalidate the old + * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i + * Even switching to our copied tables will cause a changed output address at + * each stage of the walk. + */ +.macro break_before_make_ttbr_switch zero_page, page_table + msr ttbr1_el1, \zero_page + isb + tlbi vmalle1is + dsb ish + msr ttbr1_el1, \page_table + isb +.endm + + +/* + * Resume from hibernate + * + * Loads temporary page tables then restores the memory image. + * Finally branches to cpu_resume() to restore the state saved by + * swsusp_arch_suspend(). + * + * Because this code has to be copied to a 'safe' page, it can't call out to + * other functions by PC-relative address. Also remember that it may be + * mid-way through over-writing other functions. For this reason it contains + * code from flush_icache_range() and uses the copy_page() macro. + * + * This 'safe' page is mapped via ttbr0, and executed from there. This function + * switches to a copy of the linear map in ttbr1, performs the restore, then + * switches ttbr1 to the original kernel's swapper_pg_dir. + * + * All of memory gets written to, including code. We need to clean the kernel + * text to the Point of Coherence (PoC) before secondary cores can be booted. + * Because the kernel modules and executable pages mapped to user space are + * also written as data, we clean all pages we touch to the Point of + * Unification (PoU). + * + * x0: physical address of temporary page tables + * x1: physical address of swapper page tables + * x2: address of cpu_resume + * x3: linear map address of restore_pblist in the current kernel + * x4: physical address of __hyp_stub_vectors, or 0 + * x5: physical address of a zero page that remains zero after resume + */ +.pushsection ".hibernate_exit.text", "ax" +ENTRY(swsusp_arch_suspend_exit) + /* + * We execute from ttbr0, change ttbr1 to our copied linear map tables + * with a break-before-make via the zero page + */ + break_before_make_ttbr_switch x5, x0 + + mov x21, x1 + mov x30, x2 + mov x24, x4 + mov x25, x5 + + /* walk the restore_pblist and use copy_page() to over-write memory */ + mov x19, x3 + +1: ldr x10, [x19, #HIBERN_PBE_ORIG] + mov x0, x10 + ldr x1, [x19, #HIBERN_PBE_ADDR] + + copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9 + + add x1, x10, #PAGE_SIZE + /* Clean the copied page to PoU - based on flush_icache_range() */ + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x4, x10, x3 +2: dc cvau, x4 /* clean D line / unified line */ + add x4, x4, x2 + cmp x4, x1 + b.lo 2b + + ldr x19, [x19, #HIBERN_PBE_NEXT] + cbnz x19, 1b + dsb ish /* wait for PoU cleaning to finish */ + + /* switch to the restored kernels page tables */ + break_before_make_ttbr_switch x25, x21 + + ic ialluis + dsb ish + isb + + cbz x24, 3f /* Do we need to re-initialise EL2? */ + hvc #0 +3: ret + + .ltorg +ENDPROC(swsusp_arch_suspend_exit) + +/* + * Restore the hyp stub. + * This must be done before the hibernate page is unmapped by _cpu_resume(), + * but happens before any of the hyp-stub's code is cleaned to PoC. + * + * x24: The physical address of __hyp_stub_vectors + */ +el1_sync: + msr vbar_el2, x24 + eret +ENDPROC(el1_sync) + +.macro invalid_vector label +\label: + b \label +ENDPROC(\label) +.endm + + invalid_vector el2_sync_invalid + invalid_vector el2_irq_invalid + invalid_vector el2_fiq_invalid + invalid_vector el2_error_invalid + invalid_vector el1_sync_invalid + invalid_vector el1_irq_invalid + invalid_vector el1_fiq_invalid + invalid_vector el1_error_invalid + +/* el2 vectors - switch el2 here while we restore the memory image. */ + .align 11 +ENTRY(hibernate_el2_vectors) + ventry el2_sync_invalid // Synchronous EL2t + ventry el2_irq_invalid // IRQ EL2t + ventry el2_fiq_invalid // FIQ EL2t + ventry el2_error_invalid // Error EL2t + + ventry el2_sync_invalid // Synchronous EL2h + ventry el2_irq_invalid // IRQ EL2h + ventry el2_fiq_invalid // FIQ EL2h + ventry el2_error_invalid // Error EL2h + + ventry el1_sync // Synchronous 64-bit EL1 + ventry el1_irq_invalid // IRQ 64-bit EL1 + ventry el1_fiq_invalid // FIQ 64-bit EL1 + ventry el1_error_invalid // Error 64-bit EL1 + + ventry el1_sync_invalid // Synchronous 32-bit EL1 + ventry el1_irq_invalid // IRQ 32-bit EL1 + ventry el1_fiq_invalid // FIQ 32-bit EL1 + ventry el1_error_invalid // Error 32-bit EL1 +END(hibernate_el2_vectors) + +.popsection diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c new file mode 100644 index 0000000..7e16fb3 --- /dev/null +++ b/arch/arm64/kernel/hibernate.c @@ -0,0 +1,461 @@ +/*: + * Hibernate support specific for ARM64 + * + * Derived from work on ARM hibernation support by: + * + * Ubuntu project, hibernation support for mach-dove + * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu) + * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.) + * https://lkml.org/lkml/2010/6/18/4 + * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html + * https://patchwork.kernel.org/patch/96442/ + * + * Copyright (C) 2006 Rafael J. Wysocki + * + * License terms: GNU General Public License (GPL) version 2 + */ +#define pr_fmt(x) "hibernate: " x +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Hibernate core relies on this value being 0 on resume, and marks it + * __nosavedata assuming it will keep the resume kernel's '0' value. This + * doesn't happen with either KASLR. + * + * defined as "__visible int in_suspend __nosavedata" in + * kernel/power/hibernate.c + */ +extern int in_suspend; + +/* Find a symbols alias in the linear map */ +#define LMADDR(x) phys_to_virt(virt_to_phys(x)) + +/* Do we need to reset el2? */ +#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) + +/* + * Start/end of the hibernate exit code, this must be copied to a 'safe' + * location in memory, and executed from there. + */ +extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; + +/* temporary el2 vectors in the __hibernate_exit_text section. */ +extern char hibernate_el2_vectors[]; + +/* hyp-stub vectors, used to restore el2 during resume from hibernate. */ +extern char __hyp_stub_vectors[]; + +/* + * Values that may not change over hibernate/resume. We put the build number + * and date in here so that we guarantee not to resume with a different + * kernel. + */ +struct arch_hibernate_hdr_invariants { + char uts_version[__NEW_UTS_LEN + 1]; +}; + +/* These values need to be know across a hibernate/restore. */ +static struct arch_hibernate_hdr { + struct arch_hibernate_hdr_invariants invariants; + + /* These are needed to find the relocated kernel if built with kaslr */ + phys_addr_t ttbr1_el1; + void (*reenter_kernel)(void); + + /* + * We need to know where the __hyp_stub_vectors are after restore to + * re-configure el2. + */ + phys_addr_t __hyp_stub_vectors; +} resume_hdr; + +static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i) +{ + memset(i, 0, sizeof(*i)); + memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version)); +} + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin); + unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1); + + return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn); +} + +void notrace save_processor_state(void) +{ + WARN_ON(num_online_cpus() != 1); +} + +void notrace restore_processor_state(void) +{ +} + +int arch_hibernation_header_save(void *addr, unsigned int max_size) +{ + struct arch_hibernate_hdr *hdr = addr; + + if (max_size < sizeof(*hdr)) + return -EOVERFLOW; + + arch_hdr_invariants(&hdr->invariants); + hdr->ttbr1_el1 = virt_to_phys(swapper_pg_dir); + hdr->reenter_kernel = _cpu_resume; + + /* We can't use __hyp_get_vectors() because kvm may still be loaded */ + if (el2_reset_needed()) + hdr->__hyp_stub_vectors = virt_to_phys(__hyp_stub_vectors); + else + hdr->__hyp_stub_vectors = 0; + + return 0; +} +EXPORT_SYMBOL(arch_hibernation_header_save); + +int arch_hibernation_header_restore(void *addr) +{ + struct arch_hibernate_hdr_invariants invariants; + struct arch_hibernate_hdr *hdr = addr; + + arch_hdr_invariants(&invariants); + if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) { + pr_crit("Hibernate image not generated by this kernel!\n"); + return -EINVAL; + } + + resume_hdr = *hdr; + + return 0; +} +EXPORT_SYMBOL(arch_hibernation_header_restore); + +/* + * Copies length bytes, starting at src_start into an new page, + * perform cache maintentance, then maps it at the specified address low + * address as executable. + * + * This is used by hibernate to copy the code it needs to execute when + * overwriting the kernel text. This function generates a new set of page + * tables, which it loads into ttbr0. + * + * Length is provided as we probably only want 4K of data, even on a 64K + * page system. + */ +static int create_safe_exec_page(void *src_start, size_t length, + unsigned long dst_addr, + phys_addr_t *phys_dst_addr, + void *(*allocator)(gfp_t mask), + gfp_t mask) +{ + int rc = 0; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long dst = (unsigned long)allocator(mask); + + if (!dst) { + rc = -ENOMEM; + goto out; + } + + memcpy((void *)dst, src_start, length); + flush_icache_range(dst, dst + length); + + pgd = pgd_offset_raw(allocator(mask), dst_addr); + if (pgd_none(*pgd)) { + pud = allocator(mask); + if (!pud) { + rc = -ENOMEM; + goto out; + } + pgd_populate(&init_mm, pgd, pud); + } + + pud = pud_offset(pgd, dst_addr); + if (pud_none(*pud)) { + pmd = allocator(mask); + if (!pmd) { + rc = -ENOMEM; + goto out; + } + pud_populate(&init_mm, pud, pmd); + } + + pmd = pmd_offset(pud, dst_addr); + if (pmd_none(*pmd)) { + pte = allocator(mask); + if (!pte) { + rc = -ENOMEM; + goto out; + } + pmd_populate_kernel(&init_mm, pmd, pte); + } + + pte = pte_offset_kernel(pmd, dst_addr); + set_pte(pte, __pte(virt_to_phys((void *)dst) | + pgprot_val(PAGE_KERNEL_EXEC))); + + /* Load our new page tables */ + asm volatile("msr ttbr0_el1, %0;" + "isb;" + "tlbi vmalle1is;" + "dsb ish;" + "isb" : : "r"(virt_to_phys(pgd))); + + *phys_dst_addr = virt_to_phys((void *)dst); + +out: + return rc; +} + + +int swsusp_arch_suspend(void) +{ + int ret = 0; + unsigned long flags; + struct sleep_stack_data state; + + local_dbg_save(flags); + + if (__cpu_suspend_enter(&state)) { + ret = swsusp_save(); + } else { + /* Clean kernel to PoC for secondary core startup */ + __flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START); + + /* + * Tell the hibernation core that we've just restored + * the memory + */ + in_suspend = 0; + + __cpu_suspend_exit(); + } + + local_dbg_restore(flags); + + return ret; +} + +static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, + unsigned long end) +{ + pte_t *src_pte; + pte_t *dst_pte; + unsigned long addr = start; + + dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pte) + return -ENOMEM; + pmd_populate_kernel(&init_mm, dst_pmd, dst_pte); + dst_pte = pte_offset_kernel(dst_pmd, start); + + src_pte = pte_offset_kernel(src_pmd, start); + do { + if (!pte_none(*src_pte)) + /* + * Resume will overwrite areas that may be marked + * read only (code, rodata). Clear the RDONLY bit from + * the temporary mappings we use during restore. + */ + set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY)); + } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); + + return 0; +} + +static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start, + unsigned long end) +{ + pmd_t *src_pmd; + pmd_t *dst_pmd; + unsigned long next; + unsigned long addr = start; + + if (pud_none(*dst_pud)) { + dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pmd) + return -ENOMEM; + pud_populate(&init_mm, dst_pud, dst_pmd); + } + dst_pmd = pmd_offset(dst_pud, start); + + src_pmd = pmd_offset(src_pud, start); + do { + next = pmd_addr_end(addr, end); + if (pmd_none(*src_pmd)) + continue; + if (pmd_table(*src_pmd)) { + if (copy_pte(dst_pmd, src_pmd, addr, next)) + return -ENOMEM; + } else { + set_pmd(dst_pmd, + __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY)); + } + } while (dst_pmd++, src_pmd++, addr = next, addr != end); + + return 0; +} + +static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start, + unsigned long end) +{ + pud_t *dst_pud; + pud_t *src_pud; + unsigned long next; + unsigned long addr = start; + + if (pgd_none(*dst_pgd)) { + dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pud) + return -ENOMEM; + pgd_populate(&init_mm, dst_pgd, dst_pud); + } + dst_pud = pud_offset(dst_pgd, start); + + src_pud = pud_offset(src_pgd, start); + do { + next = pud_addr_end(addr, end); + if (pud_none(*src_pud)) + continue; + if (pud_table(*(src_pud))) { + if (copy_pmd(dst_pud, src_pud, addr, next)) + return -ENOMEM; + } else { + set_pud(dst_pud, + __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY)); + } + } while (dst_pud++, src_pud++, addr = next, addr != end); + + return 0; +} + +static int copy_page_tables(pgd_t *dst_pgd, unsigned long start, + unsigned long end) +{ + unsigned long next; + unsigned long addr = start; + pgd_t *src_pgd = pgd_offset_k(start); + + dst_pgd = pgd_offset_raw(dst_pgd, start); + do { + next = pgd_addr_end(addr, end); + if (pgd_none(*src_pgd)) + continue; + if (copy_pud(dst_pgd, src_pgd, addr, next)) + return -ENOMEM; + } while (dst_pgd++, src_pgd++, addr = next, addr != end); + + return 0; +} + +/* + * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit(). + * + * Memory allocated by get_safe_page() will be dealt with by the hibernate code, + * we don't need to free it here. + */ +int swsusp_arch_resume(void) +{ + int rc = 0; + void *zero_page; + size_t exit_size; + pgd_t *tmp_pg_dir; + void *lm_restore_pblist; + phys_addr_t phys_hibernate_exit; + void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, + void *, phys_addr_t, phys_addr_t); + + /* + * Locate the exit code in the bottom-but-one page, so that *NULL + * still has disastrous affects. + */ + hibernate_exit = (void *)PAGE_SIZE; + exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start; + /* + * Copy swsusp_arch_suspend_exit() to a safe page. This will generate + * a new set of ttbr0 page tables and load them. + */ + rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size, + (unsigned long)hibernate_exit, + &phys_hibernate_exit, + (void *)get_safe_page, GFP_ATOMIC); + if (rc) { + pr_err("Failed to create safe executable page for hibernate_exit code."); + goto out; + } + + /* + * The hibernate exit text contains a set of el2 vectors, that will + * be executed at el2 with the mmu off in order to reload hyp-stub. + */ + __flush_dcache_area(hibernate_exit, exit_size); + + /* + * Restoring the memory image will overwrite the ttbr1 page tables. + * Create a second copy of just the linear map, and use this when + * restoring. + */ + tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); + if (!tmp_pg_dir) { + pr_err("Failed to allocate memory for temporary page tables."); + rc = -ENOMEM; + goto out; + } + rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0); + if (rc) + goto out; + + /* + * Since we only copied the linear map, we need to find restore_pblist's + * linear map address. + */ + lm_restore_pblist = LMADDR(restore_pblist); + + /* + * KASLR will cause the el2 vectors to be in a different location in + * the resumed kernel. Load hibernate's temporary copy into el2. + * + * We can skip this step if we booted at EL1, or are running with VHE. + */ + if (el2_reset_needed()) { + phys_addr_t el2_vectors = phys_hibernate_exit; /* base */ + el2_vectors += hibernate_el2_vectors - + __hibernate_exit_text_start; /* offset */ + + __hyp_set_vectors(el2_vectors); + } + + /* + * We need a zero page that is zero before & after resume in order to + * to break before make on the ttbr1 page tables. + */ + zero_page = (void *)get_safe_page(GFP_ATOMIC); + + hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1, + resume_hdr.reenter_kernel, lm_restore_pblist, + resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page)); + +out: + return rc; +} diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 8918b30..435e820 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -46,6 +46,16 @@ jiffies = jiffies_64; *(.idmap.text) \ VMLINUX_SYMBOL(__idmap_text_end) = .; +#ifdef CONFIG_HIBERNATION +#define HIBERNATE_TEXT \ + . = ALIGN(SZ_4K); \ + VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\ + *(.hibernate_exit.text) \ + VMLINUX_SYMBOL(__hibernate_exit_text_end) = .; +#else +#define HIBERNATE_TEXT +#endif + /* * The size of the PE/COFF section that covers the kernel image, which * runs from stext to _edata, must be a round multiple of the PE/COFF @@ -113,6 +123,7 @@ SECTIONS LOCK_TEXT HYPERVISOR_TEXT IDMAP_TEXT + HIBERNATE_TEXT *(.fixup) *(.gnu.warning) . = ALIGN(16); @@ -206,6 +217,10 @@ ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, "HYP init code too big or misaligned") ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, "ID map text too big or misaligned") +#ifdef CONFIG_HIBERNATION +ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) + <= SZ_4K, "Hibernate exit text too big or misaligned") +#endif /* * If padding is applied before .head.text, virt<->phys conversions will fail. -- cgit v1.1 From 1fe492ce6482b77807b25d29690a48c46456beee Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 27 Apr 2016 17:47:13 +0100 Subject: arm64: hibernate: Refuse to hibernate if the boot cpu is offline Hibernation represents a system state save/restore through a system reboot; this implies that the logical cpus carrying out hibernation/thawing must be the same, so that the context saved in the snapshot image on hibernation is consistent with the state of the system on resume. If resume from hibernation is driven through kernel command line parameter, the cpu responsible for thawing the system will be whatever CPU firmware boots the system on upon cold-boot (ie logical cpu 0); this means that in order to keep system context consistent between the hibernate snapshot image and system state on kernel resume from hibernate, logical cpu 0 must be online on hibernation and must be the logical cpu that creates the snapshot image. This patch adds a PM notifier that enforces logical cpu 0 is online when the hibernation is started (and prevents hibernation if it is not), which is sufficient to guarantee it will be the one creating the snapshot image therefore providing the resume cpu a consistent snapshot of the system to resume to. Signed-off-by: James Morse Acked-by: Lorenzo Pieralisi Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/hibernate.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 7e16fb3..f8df75d 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -17,6 +17,7 @@ #define pr_fmt(x) "hibernate: " x #include #include +#include #include #include #include @@ -459,3 +460,28 @@ int swsusp_arch_resume(void) out: return rc; } + +static int check_boot_cpu_online_pm_callback(struct notifier_block *nb, + unsigned long action, void *ptr) +{ + if (action == PM_HIBERNATION_PREPARE && + cpumask_first(cpu_online_mask) != 0) { + pr_warn("CPU0 is offline.\n"); + return notifier_from_errno(-ENODEV); + } + + return NOTIFY_OK; +} + +static int __init check_boot_cpu_online_init(void) +{ + /* + * Set this pm_notifier callback with a lower priority than + * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be + * called earlier to disable cpu hotplug before the cpu online check. + */ + pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX); + + return 0; +} +core_initcall(check_boot_cpu_online_init); -- cgit v1.1 From 99aa036241ed4a08a71b627bb903b5d7c75d78c1 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 10 May 2016 11:14:41 +0100 Subject: arm64: secondary_start_kernel: Remove unnecessary barrier Remove the unnecessary smp_wmb(), which was added to make sure that the update_cpu_boot_status() completes before we mark the CPU online. But update_cpu_boot_status() already has dsb() (required for the failing CPUs) to ensure the correct behavior. Cc: Catalin Marinas Acked-by: Mark Rutland Reported-by: Dennis Chen Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index dc96475..678e084 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -264,8 +264,6 @@ asmlinkage void secondary_start_kernel(void) pr_info("CPU%u: Booted secondary processor [%08x]\n", cpu, read_cpuid_id()); update_cpu_boot_status(CPU_BOOT_SUCCESS); - /* Make sure the status update is visible before we complete */ - smp_wmb(); set_cpu_online(cpu, true); complete(&cpu_running); -- cgit v1.1 From f228b494e56d949be8d8ea09d4f973d1979201bf Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Tue, 10 May 2016 15:40:31 +0100 Subject: arm64: cpuinfo: Missing NULL terminator in compat_hwcap_str The loop that browses the array compat_hwcap_str will stop when a NULL is encountered, however NULL is missing at the end of array. This will lead to overrun until a NULL is found somewhere in the following memory. In reality, this works out because the compat_hwcap2_str array tends to follow immediately in memory, and that *is* terminated correctly. Furthermore, the unsigned int compat_elf_hwcap is checked before printing each capability, so we end up doing the right thing because the size of the two arrays is less than 32. Still, this is an obvious mistake and should be fixed. Note for backporting: commit 12d11817eaafa414 ("arm64: Move /proc/cpuinfo handling code") moved this code in v4.4. Prior to that commit, the same change should be made in arch/arm64/kernel/setup.c. Fixes: 44b82b7700d0 "arm64: Fix up /proc/cpuinfo" Cc: # v3.19+ (but see note above prior to v4.4) Signed-off-by: Julien Grall Signed-off-by: Will Deacon --- arch/arm64/kernel/cpuinfo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 92189e2..3808470 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -87,7 +87,8 @@ static const char *const compat_hwcap_str[] = { "idivt", "vfpd32", "lpae", - "evtstrm" + "evtstrm", + NULL }; static const char *const compat_hwcap2_str[] = { -- cgit v1.1 From 61462c8a6b140fe2f93cb911684837e05950e680 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 May 2016 10:55:49 -0700 Subject: arm64: kernel: Fix incorrect brk randomization This fixes two issues with the arm64 brk randomziation. First, the STACK_RND_MASK was being used incorrectly. The original code was: unsigned long range_end = base + (STACK_RND_MASK << PAGE_SHIFT) + 1; STACK_RND_MASK is 0x7ff (32-bit) or 0x3ffff (64-bit), with 4K pages where PAGE_SHIFT is 12: #define STACK_RND_MASK (test_thread_flag(TIF_32BIT) ? \ 0x7ff >> (PAGE_SHIFT - 12) : \ 0x3ffff >> (PAGE_SHIFT - 12)) This means the resulting offset from base would be 0x7ff0001 or 0x3ffff0001, which is wrong since it creates an unaligned end address. It was likely intended to be: unsigned long range_end = base + ((STACK_RND_MASK + 1) << PAGE_SHIFT) Which would result in offsets of 0x800000 (32-bit) and 0x40000000 (64-bit). However, even this corrected 32-bit compat offset (0x00800000) is much smaller than native ARM's brk randomization value (0x02000000): unsigned long arch_randomize_brk(struct mm_struct *mm) { unsigned long range_end = mm->brk + 0x02000000; return randomize_range(mm->brk, range_end, 0) ? : mm->brk; } So, instead of basing arm64's brk randomization on mistaken STACK_RND_MASK calculations, just use specific corrected values for compat (0x2000000) and native arm64 (0x40000000). Reviewed-by: Jon Medhurst Signed-off-by: Kees Cook [will: use is_compat_task() as suggested by tixy] Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 8062482..ad4a7e1 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -382,13 +382,14 @@ unsigned long arch_align_stack(unsigned long sp) return sp & ~0xf; } -static unsigned long randomize_base(unsigned long base) -{ - unsigned long range_end = base + (STACK_RND_MASK << PAGE_SHIFT) + 1; - return randomize_range(base, range_end, 0) ? : base; -} - unsigned long arch_randomize_brk(struct mm_struct *mm) { - return randomize_base(mm->brk); + unsigned long range_end = mm->brk; + + if (is_compat_task()) + range_end += 0x02000000; + else + range_end += 0x40000000; + + return randomize_range(mm->brk, range_end, 0) ? : mm->brk; } -- cgit v1.1 From e6d9a52543338603e25e71e0e4942f05dae0dd8a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 11 May 2016 17:56:54 +0100 Subject: arm64: do not enforce strict 16 byte alignment to stack pointer copy_thread should not be enforcing 16 byte aligment and returning -EINVAL. Other architectures trap misaligned stack access with SIGBUS so arm64 should follow this convention, so remove the strict enforcement check. For example, currently clone(2) fails with -EINVAL when passing a misaligned stack and this gives little clue to what is wrong. Instead, it is arguable that a SIGBUS on the fist access to a misaligned stack allows one to figure out that it is a misaligned stack issue rather than trying to figure out why an unconventional (and undocumented) -EINVAL is being returned. Acked-by: Catalin Marinas Signed-off-by: Colin Ian King Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index ad4a7e1..48eea68 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -265,9 +265,6 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, if (stack_start) { if (is_compat_thread(task_thread_info(p))) childregs->compat_sp = stack_start; - /* 16-byte aligned stack mandatory on AArch64 */ - else if (stack_start & 15) - return -EINVAL; else childregs->sp = stack_start; } -- cgit v1.1