From 4db0c3c2983cc6b7a08a33542af5e14de8a9258c Mon Sep 17 00:00:00 2001 From: Jason Low Date: Wed, 15 Apr 2015 16:14:08 -0700 Subject: mm: remove rest of ACCESS_ONCE() usages We converted some of the usages of ACCESS_ONCE to READ_ONCE in the mm/ tree since it doesn't work reliably on non-scalar types. This patch removes the rest of the usages of ACCESS_ONCE, and use the new READ_ONCE API for the read accesses. This makes things cleaner, instead of using separate/multiple sets of APIs. Signed-off-by: Jason Low Acked-by: Michal Hocko Acked-by: Davidlohr Bueso Acked-by: Rik van Riel Reviewed-by: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index ac20b2a..656593f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2845,7 +2845,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address, struct vm_fault vmf; int off; - nr_pages = ACCESS_ONCE(fault_around_bytes) >> PAGE_SHIFT; + nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT; mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; start_addr = max(address & mask, vma->vm_start); -- cgit v1.1 From 2682582a6ea118d974c33da64923ae8c687fdd0b Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 15 Apr 2015 16:15:08 -0700 Subject: mm/memory: also print a_ops->readpage in print_bad_pte() A lot of filesystems use generic_file_mmap() and filemap_fault(), f_op->mmap and vm_ops->fault aren't enough to identify filesystem. This prints file name, vm_ops->fault, f_op->mmap and a_ops->readpage (which is almost always implemented and filesystem-specific). Example: [ 23.676410] BUG: Bad page map in process sh pte:1b7e6025 pmd:19bbd067 [ 23.676887] page:ffffea00006df980 count:4 mapcount:1 mapping:ffff8800196426c0 index:0x97 [ 23.677481] flags: 0x10000000000000c(referenced|uptodate) [ 23.677896] page dumped because: bad pte [ 23.678205] addr:00007f52fcb17000 vm_flags:00000075 anon_vma: (null) mapping:ffff8800196426c0 index:97 [ 23.678922] file:libc-2.19.so fault:filemap_fault mmap:generic_file_readonly_mmap readpage:v9fs_vfs_readpage [akpm@linux-foundation.org: use pr_alert, per Kirill] Signed-off-by: Konstantin Khlebnikov Cc: Sasha Levin Acked-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 656593f..f9628e5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -690,12 +690,11 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, /* * Choose text because data symbols depend on CONFIG_KALLSYMS_ALL=y */ - if (vma->vm_ops) - printk(KERN_ALERT "vma->vm_ops->fault: %pSR\n", - vma->vm_ops->fault); - if (vma->vm_file) - printk(KERN_ALERT "vma->vm_file->f_op->mmap: %pSR\n", - vma->vm_file->f_op->mmap); + pr_alert("file:%pD fault:%pf mmap:%pf readpage:%pf\n", + vma->vm_file, + vma->vm_ops ? vma->vm_ops->fault : NULL, + vma->vm_file ? vma->vm_file->f_op->mmap : NULL, + mapping ? mapping->a_ops->readpage : NULL); dump_stack(); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } -- cgit v1.1 From dd9061846a3ba01b0fa45423aaa087e4a69187fa Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 15 Apr 2015 16:15:11 -0700 Subject: mm: new pfn_mkwrite same as page_mkwrite for VM_PFNMAP This will allow FS that uses VM_PFNMAP | VM_MIXEDMAP (no page structs) to get notified when access is a write to a read-only PFN. This can happen if we mmap() a file then first mmap-read from it to page-in a read-only PFN, than we mmap-write to the same page. We need this functionality to fix a DAX bug, where in the scenario above we fail to set ctime/mtime though we modified the file. An xfstest is attached to this patchset that shows the failure and the fix. (A DAX patch will follow) This functionality is extra important for us, because upon dirtying of a pmem page we also want to RDMA the page to a remote cluster node. We define a new pfn_mkwrite and do not reuse page_mkwrite because 1 - The name ;-) 2 - But mainly because it would take a very long and tedious audit of all page_mkwrite functions of VM_MIXEDMAP/VM_PFNMAP users. To make sure they do not now CRASH. For example current DAX code (which this is for) would crash. If we would want to reuse page_mkwrite, We will need to first patch all users, so to not-crash-on-no-page. Then enable this patch. But even if I did that I would not sleep so well at night. Adding a new vector is the safest thing to do, and is not that expensive. an extra pointer at a static function vector per driver. Also the new vector is better for performance, because else we Will call all current Kernel vectors, so to: check-ha-no-page-do-nothing and return. No need to call it from do_shared_fault because do_wp_page is called to change pte permissions anyway. Signed-off-by: Yigal Korman Signed-off-by: Boaz Harrosh Acked-by: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Jan Kara Cc: Hugh Dickins Cc: Mel Gorman Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index f9628e5..22e037e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2180,6 +2180,42 @@ oom: return VM_FAULT_OOM; } +/* + * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED + * mapping + */ +static int wp_pfn_shared(struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long address, + pte_t *page_table, spinlock_t *ptl, pte_t orig_pte, + pmd_t *pmd) +{ + if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { + struct vm_fault vmf = { + .page = NULL, + .pgoff = linear_page_index(vma, address), + .virtual_address = (void __user *)(address & PAGE_MASK), + .flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE, + }; + int ret; + + pte_unmap_unlock(page_table, ptl); + ret = vma->vm_ops->pfn_mkwrite(vma, &vmf); + if (ret & VM_FAULT_ERROR) + return ret; + page_table = pte_offset_map_lock(mm, pmd, address, &ptl); + /* + * We might have raced with another page fault while we + * released the pte_offset_map_lock. + */ + if (!pte_same(*page_table, orig_pte)) { + pte_unmap_unlock(page_table, ptl); + return 0; + } + } + return wp_page_reuse(mm, vma, address, page_table, ptl, orig_pte, + NULL, 0, 0); +} + static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, spinlock_t *ptl, pte_t orig_pte, @@ -2258,13 +2294,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, * VM_PFNMAP VMA. * * We should not cow pages in a shared writeable mapping. - * Just mark the pages writable as we can't do any dirty - * accounting on raw pfn maps. + * Just mark the pages writable and/or call ops->pfn_mkwrite. */ if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED)) - return wp_page_reuse(mm, vma, address, page_table, ptl, - orig_pte, old_page, 0, 0); + return wp_pfn_shared(mm, vma, address, page_table, ptl, + orig_pte, pmd); pte_unmap_unlock(page_table, ptl); return wp_page_copy(mm, vma, address, page_table, pmd, -- cgit v1.1