diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-03 16:13:28 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-03 16:13:28 -0700 |
commit | 1a4a2bc460721bc8f91e4c1294d39b38e5af132f (patch) | |
tree | fe646d05f6e17f05601e0a32cc796bec718ab6e7 /arch/x86/mm | |
parent | 110a9e42b68719f584879c5c5c727bbae90d15f9 (diff) | |
parent | 1ef55be16ed69538f89e0a6508be5e62fdc9851c (diff) | |
download | op-kernel-dev-1a4a2bc460721bc8f91e4c1294d39b38e5af132f.zip op-kernel-dev-1a4a2bc460721bc8f91e4c1294d39b38e5af132f.tar.gz |
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull low-level x86 updates from Ingo Molnar:
"In this cycle this topic tree has become one of those 'super topics'
that accumulated a lot of changes:
- Add CONFIG_VMAP_STACK=y support to the core kernel and enable it on
x86 - preceded by an array of changes. v4.8 saw preparatory changes
in this area already - this is the rest of the work. Includes the
thread stack caching performance optimization. (Andy Lutomirski)
- switch_to() cleanups and all around enhancements. (Brian Gerst)
- A large number of dumpstack infrastructure enhancements and an
unwinder abstraction. The secret long term plan is safe(r) live
patching plus maybe another attempt at debuginfo based unwinding -
but all these current bits are standalone enhancements in a frame
pointer based debug environment as well. (Josh Poimboeuf)
- More __ro_after_init and const annotations. (Kees Cook)
- Enable KASLR for the vmemmap memory region. (Thomas Garnier)"
[ The virtually mapped stack changes are pretty fundamental, and not
x86-specific per se, even if they are only used on x86 right now. ]
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (70 commits)
x86/asm: Get rid of __read_cr4_safe()
thread_info: Use unsigned long for flags
x86/alternatives: Add stack frame dependency to alternative_call_2()
x86/dumpstack: Fix show_stack() task pointer regression
x86/dumpstack: Remove dump_trace() and related callbacks
x86/dumpstack: Convert show_trace_log_lvl() to use the new unwinder
oprofile/x86: Convert x86_backtrace() to use the new unwinder
x86/stacktrace: Convert save_stack_trace_*() to use the new unwinder
perf/x86: Convert perf_callchain_kernel() to use the new unwinder
x86/unwind: Add new unwind interface and implementations
x86/dumpstack: Remove NULL task pointer convention
fork: Optimize task creation by caching two thread stacks per CPU if CONFIG_VMAP_STACK=y
sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK
lib/syscall: Pin the task stack in collect_syscall()
x86/process: Pin the target stack in get_wchan()
x86/dumpstack: Pin the target stack when dumping it
kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function
sched/core: Add try_get_task_stack() and put_task_stack()
x86/entry/64: Fix a minor comment rebase error
iommu/amd: Don't put completion-wait semaphore on stack
...
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/fault.c | 32 | ||||
-rw-r--r-- | arch/x86/mm/kaslr.c | 26 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 15 |
3 files changed, 71 insertions, 2 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index dc80230..0b92fce 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -753,6 +753,38 @@ no_context(struct pt_regs *regs, unsigned long error_code, return; } +#ifdef CONFIG_VMAP_STACK + /* + * Stack overflow? During boot, we can fault near the initial + * stack in the direct map, but that's not an overflow -- check + * that we're in vmalloc space to avoid this. + */ + if (is_vmalloc_addr((void *)address) && + (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) || + address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) { + register void *__sp asm("rsp"); + unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *); + /* + * We're likely to be running with very little stack space + * left. It's plausible that we'd hit this condition but + * double-fault even before we get this far, in which case + * we're fine: the double-fault handler will deal with it. + * + * We don't want to make it all the way into the oops code + * and then double-fault, though, because we're likely to + * break the console driver and lose most of the stack dump. + */ + asm volatile ("movq %[stack], %%rsp\n\t" + "call handle_stack_overflow\n\t" + "1: jmp 1b" + : "+r" (__sp) + : "D" ("kernel stack overflow (page fault)"), + "S" (regs), "d" (address), + [stack] "rm" (stack)); + unreachable(); + } +#endif + /* * 32-bit: * diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index bda8d5e..ddd2661 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -40,17 +40,26 @@ * You need to add an if/def entry if you introduce a new memory region * compatible with KASLR. Your entry must be in logical order with memory * layout. For example, ESPFIX is before EFI because its virtual address is - * before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to + * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to * ensure that this order is correct and won't be changed. */ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; -static const unsigned long vaddr_end = VMEMMAP_START; + +#if defined(CONFIG_X86_ESPFIX64) +static const unsigned long vaddr_end = ESPFIX_BASE_ADDR; +#elif defined(CONFIG_EFI) +static const unsigned long vaddr_end = EFI_VA_START; +#else +static const unsigned long vaddr_end = __START_KERNEL_map; +#endif /* Default values */ unsigned long page_offset_base = __PAGE_OFFSET_BASE; EXPORT_SYMBOL(page_offset_base); unsigned long vmalloc_base = __VMALLOC_BASE; EXPORT_SYMBOL(vmalloc_base); +unsigned long vmemmap_base = __VMEMMAP_BASE; +EXPORT_SYMBOL(vmemmap_base); /* * Memory regions randomized by KASLR (except modules that use a separate logic @@ -63,6 +72,7 @@ static __initdata struct kaslr_memory_region { } kaslr_regions[] = { { &page_offset_base, 64/* Maximum */ }, { &vmalloc_base, VMALLOC_SIZE_TB }, + { &vmemmap_base, 1 }, }; /* Get size in bytes used by the memory region */ @@ -89,6 +99,18 @@ void __init kernel_randomize_memory(void) struct rnd_state rand_state; unsigned long remain_entropy; + /* + * All these BUILD_BUG_ON checks ensures the memory layout is + * consistent with the vaddr_start/vaddr_end variables. + */ + BUILD_BUG_ON(vaddr_start >= vaddr_end); + BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) && + vaddr_end >= EFI_VA_START); + BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) || + config_enabled(CONFIG_EFI)) && + vaddr_end >= __START_KERNEL_map); + BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); + if (!kaslr_memory_enabled()) return; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 4dbe656..a7655f6 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -77,10 +77,25 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, unsigned cpu = smp_processor_id(); if (likely(prev != next)) { + if (IS_ENABLED(CONFIG_VMAP_STACK)) { + /* + * If our current stack is in vmalloc space and isn't + * mapped in the new pgd, we'll double-fault. Forcibly + * map it. + */ + unsigned int stack_pgd_index = pgd_index(current_stack_pointer()); + + pgd_t *pgd = next->pgd + stack_pgd_index; + + if (unlikely(pgd_none(*pgd))) + set_pgd(pgd, init_mm.pgd[stack_pgd_index]); + } + #ifdef CONFIG_SMP this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); this_cpu_write(cpu_tlbstate.active_mm, next); #endif + cpumask_set_cpu(cpu, mm_cpumask(next)); /* |