From 759496ba6407c6994d6a5ce3a5e74937d7816208 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 12 Sep 2013 15:13:39 -0700 Subject: arch: mm: pass userspace fault flag to generic fault handler Unlike global OOM handling, memory cgroup code will invoke the OOM killer in any OOM situation because it has no way of telling faults occuring in kernel context - which could be handled more gracefully - from user-triggered faults. Pass a flag that identifies faults originating in user space from the architecture-specific fault handlers to generic code so that memcg OOM handling can be improved. Signed-off-by: Johannes Weiner Reviewed-by: Michal Hocko Cc: David Rientjes Cc: KAMEZAWA Hiroyuki Cc: azurIt Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/fault.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 654be4a..6d77c38 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1011,9 +1011,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) unsigned long address; struct mm_struct *mm; int fault; - int write = error_code & PF_WRITE; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; tsk = current; mm = tsk->mm; @@ -1083,6 +1081,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) if (user_mode_vm(regs)) { local_irq_enable(); error_code |= PF_USER; + flags |= FAULT_FLAG_USER; } else { if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); @@ -1109,6 +1108,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) return; } + if (error_code & PF_WRITE) + flags |= FAULT_FLAG_WRITE; + /* * When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in -- cgit v1.1 From 3a13c4d761b4b979ba8767f42345fed3274991b0 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 12 Sep 2013 15:13:40 -0700 Subject: x86: finish user fault error path with fatal signal The x86 fault handler bails in the middle of error handling when the task has a fatal signal pending. For a subsequent patch this is a problem in OOM situations because it relies on pagefault_out_of_memory() being called even when the task has been killed, to perform proper per-task OOM state unwinding. Shortcutting the fault like this is a rather minor optimization that saves a few instructions in rare cases. Just remove it for user-triggered faults. Use the opportunity to split the fault retry handling from actual fault errors and add locking documentation that reads suprisingly similar to ARM's. Signed-off-by: Johannes Weiner Reviewed-by: Michal Hocko Acked-by: KOSAKI Motohiro Cc: David Rientjes Cc: KAMEZAWA Hiroyuki Cc: azurIt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/fault.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 6d77c38..3aaeffc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -842,23 +842,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, force_sig_info_fault(SIGBUS, code, address, tsk, fault); } -static noinline int +static noinline void mm_fault_error(struct pt_regs *regs, unsigned long error_code, unsigned long address, unsigned int fault) { - /* - * Pagefault was interrupted by SIGKILL. We have no reason to - * continue pagefault. - */ - if (fatal_signal_pending(current)) { - if (!(fault & VM_FAULT_RETRY)) - up_read(¤t->mm->mmap_sem); - if (!(error_code & PF_USER)) - no_context(regs, error_code, address, 0, 0); - return 1; + if (fatal_signal_pending(current) && !(error_code & PF_USER)) { + up_read(¤t->mm->mmap_sem); + no_context(regs, error_code, address, 0, 0); + return; } - if (!(fault & VM_FAULT_ERROR)) - return 0; if (fault & VM_FAULT_OOM) { /* Kernel mode? Handle exceptions or die: */ @@ -866,7 +858,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, up_read(¤t->mm->mmap_sem); no_context(regs, error_code, address, SIGSEGV, SEGV_MAPERR); - return 1; + return; } up_read(¤t->mm->mmap_sem); @@ -884,7 +876,6 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, else BUG(); } - return 1; } static int spurious_fault_check(unsigned long error_code, pte_t *pte) @@ -1189,9 +1180,17 @@ good_area: */ fault = handle_mm_fault(mm, vma, address, flags); - if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { - if (mm_fault_error(regs, error_code, address, fault)) - return; + /* + * If we need to retry but a fatal signal is pending, handle the + * signal first. We do not need to release the mmap_sem because it + * would already be released in __lock_page_or_retry in mm/filemap.c. + */ + if (unlikely((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))) + return; + + if (unlikely(fault & VM_FAULT_ERROR)) { + mm_fault_error(regs, error_code, address, fault); + return; } /* -- cgit v1.1