From 9a0dd42251439de635088b97533109a935864a84 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:39 +0100 Subject: dax: Allow dax_iomap_fault() to return pfn For synchronous page fault dax_iomap_fault() will need to return PFN which will then need to be inserted into page tables after fsync() completes. Add necessary parameter to dax_iomap_fault(). Reviewed-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/xfs/xfs_file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 309e26c..7c6b8de 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1040,7 +1040,7 @@ __xfs_filemap_fault( xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) { - ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, pe_size, NULL, &xfs_iomap_ops); } else { if (write_fault) ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops); @@ -1111,7 +1111,7 @@ xfs_filemap_pfn_mkwrite( if (vmf->pgoff >= size) ret = VM_FAULT_SIGBUS; else if (IS_DAX(inode)) - ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, &xfs_iomap_ops); xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); sb_end_pagefault(inode->i_sb); return ret; -- cgit v1.1 From 7b565c9f965bac269cc6a1f25c819f77044abf9d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:46 +0100 Subject: xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault() xfs_filemap_pfn_mkwrite() duplicates a lot of __xfs_filemap_fault(). It will also need to handle flushing for synchronous page faults. So just make that function use __xfs_filemap_fault(). Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/xfs/xfs_file.c | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 7c6b8de..4496b45 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1085,37 +1085,16 @@ xfs_filemap_page_mkwrite( } /* - * pfn_mkwrite was originally inteneded to ensure we capture time stamp - * updates on write faults. In reality, it's need to serialise against - * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED - * to ensure we serialise the fault barrier in place. + * pfn_mkwrite was originally intended to ensure we capture time stamp updates + * on write faults. In reality, it needs to serialise against truncate and + * prepare memory for writing so handle is as standard write fault. */ static int xfs_filemap_pfn_mkwrite( struct vm_fault *vmf) { - struct inode *inode = file_inode(vmf->vma->vm_file); - struct xfs_inode *ip = XFS_I(inode); - int ret = VM_FAULT_NOPAGE; - loff_t size; - - trace_xfs_filemap_pfn_mkwrite(ip); - - sb_start_pagefault(inode->i_sb); - file_update_time(vmf->vma->vm_file); - - /* check if the faulting page hasn't raced with truncate */ - xfs_ilock(ip, XFS_MMAPLOCK_SHARED); - size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (vmf->pgoff >= size) - ret = VM_FAULT_SIGBUS; - else if (IS_DAX(inode)) - ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, &xfs_iomap_ops); - xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); - sb_end_pagefault(inode->i_sb); - return ret; - + return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true); } static const struct vm_operations_struct xfs_file_vm_ops = { -- cgit v1.1 From a39e596baa07cb1dc19c2ead14c9fd2a30f22352 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 Nov 2017 16:36:47 +0100 Subject: xfs: support for synchronous DAX faults Return IOMAP_F_DIRTY from xfs_file_iomap_begin() when asked to prepare blocks for writing and the inode is pinned, and has dirty fields other than the timestamps. In __xfs_filemap_fault() we then detect this case and call dax_finish_sync_fault() to make sure all metadata is committed, and to insert the page table entry. Note that this will also dirty corresponding radix tree entry which is what we want - fsync(2) will still provide data integrity guarantees for applications not using userspace flushing. And applications using userspace flushing can avoid calling fsync(2) and thus avoid the performance overhead. [JK: Added VM_SYNC flag handling] Reviewed-by: Ross Zwisler Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/xfs/xfs_file.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 4496b45..4827e82 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -44,6 +44,7 @@ #include #include #include +#include static const struct vm_operations_struct xfs_file_vm_ops; @@ -1040,7 +1041,11 @@ __xfs_filemap_fault( xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) { - ret = dax_iomap_fault(vmf, pe_size, NULL, &xfs_iomap_ops); + pfn_t pfn; + + ret = dax_iomap_fault(vmf, pe_size, &pfn, &xfs_iomap_ops); + if (ret & VM_FAULT_NEEDDSYNC) + ret = dax_finish_sync_fault(vmf, pe_size, pfn); } else { if (write_fault) ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops); @@ -1110,6 +1115,13 @@ xfs_file_mmap( struct file *filp, struct vm_area_struct *vma) { + /* + * We don't support synchronous mappings for non-DAX files. At least + * until someone comes with a sensible use case. + */ + if (!IS_DAX(file_inode(filp)) && (vma->vm_flags & VM_SYNC)) + return -EOPNOTSUPP; + file_accessed(filp); vma->vm_ops = &xfs_file_vm_ops; if (IS_DAX(file_inode(filp))) @@ -1128,6 +1140,7 @@ const struct file_operations xfs_file_operations = { .compat_ioctl = xfs_file_compat_ioctl, #endif .mmap = xfs_file_mmap, + .mmap_supported_flags = MAP_SYNC, .open = xfs_file_open, .release = xfs_file_release, .fsync = xfs_file_fsync, -- cgit v1.1