From e7638488434415aa478e78435cac8f0365737638 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 16 May 2018 11:46:08 -0700 Subject: mm: introduce MEMORY_DEVICE_FS_DAX and CONFIG_DEV_PAGEMAP_OPS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for fixing dax-dma-vs-unmap issues, filesystems need to be able to rely on the fact that they will get wakeups on dev_pagemap page-idle events. Introduce MEMORY_DEVICE_FS_DAX and generic_dax_page_free() as common indicator / infrastructure for dax filesytems to require. With this change there are no users of the MEMORY_DEVICE_HOST designation, so remove it. The HMM sub-system extended dev_pagemap to arrange a callback when a dev_pagemap managed page is freed. Since a dev_pagemap page is free / idle when its reference count is 1 it requires an additional branch to check the page-type at put_page() time. Given put_page() is a hot-path we do not want to incur that check if HMM is not in use, so a static branch is used to avoid that overhead when not necessary. Now, the FS_DAX implementation wants to reuse this mechanism for receiving dev_pagemap ->page_free() callbacks. Rework the HMM-specific static-key into a generic mechanism that either HMM or FS_DAX code paths can enable. For ARCH=um builds, and any other arch that lacks ZONE_DEVICE support, care must be taken to compile out the DEV_PAGEMAP_OPS infrastructure. However, we still need to support FS_DAX in the FS_DAX_LIMITED case implemented by the s390/dcssblk driver. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Michal Hocko Reported-by: kbuild test robot Reported-by: Thomas Meyer Reported-by: Dave Jiang Cc: "Jérôme Glisse" Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Dan Williams --- mm/Kconfig | 5 +++++ mm/hmm.c | 13 ++----------- mm/swap.c | 3 ++- 3 files changed, 9 insertions(+), 12 deletions(-) (limited to 'mm') diff --git a/mm/Kconfig b/mm/Kconfig index d5004d8..bf9d636 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -692,6 +692,9 @@ config ARCH_HAS_HMM config MIGRATE_VMA_HELPER bool +config DEV_PAGEMAP_OPS + bool + config HMM bool select MIGRATE_VMA_HELPER @@ -712,6 +715,7 @@ config DEVICE_PRIVATE bool "Unaddressable device memory (GPU memory, ...)" depends on ARCH_HAS_HMM select HMM + select DEV_PAGEMAP_OPS help Allows creation of struct pages to represent unaddressable device @@ -722,6 +726,7 @@ config DEVICE_PUBLIC bool "Addressable device memory (like GPU memory)" depends on ARCH_HAS_HMM select HMM + select DEV_PAGEMAP_OPS help Allows creation of struct pages to represent addressable device diff --git a/mm/hmm.c b/mm/hmm.c index 486dc39..de7b6bf 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -35,15 +35,6 @@ #define PA_SECTION_SIZE (1UL << PA_SECTION_SHIFT) -#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC) -/* - * Device private memory see HMM (Documentation/vm/hmm.txt) or hmm.h - */ -DEFINE_STATIC_KEY_FALSE(device_private_key); -EXPORT_SYMBOL(device_private_key); -#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ - - #if IS_ENABLED(CONFIG_HMM_MIRROR) static const struct mmu_notifier_ops hmm_mmu_notifier_ops; @@ -1167,7 +1158,7 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, resource_size_t addr; int ret; - static_branch_enable(&device_private_key); + dev_pagemap_get_ops(); devmem = devres_alloc_node(&hmm_devmem_release, sizeof(*devmem), GFP_KERNEL, dev_to_node(device)); @@ -1261,7 +1252,7 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops, if (res->desc != IORES_DESC_DEVICE_PUBLIC_MEMORY) return ERR_PTR(-EINVAL); - static_branch_enable(&device_private_key); + dev_pagemap_get_ops(); devmem = devres_alloc_node(&hmm_devmem_release, sizeof(*devmem), GFP_KERNEL, dev_to_node(device)); diff --git a/mm/swap.c b/mm/swap.c index 3dd5188..26fc9b5 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -743,7 +744,7 @@ void release_pages(struct page **pages, int nr) flags); locked_pgdat = NULL; } - put_zone_device_private_or_public_page(page); + put_devmap_managed_page(page); continue; } -- cgit v1.1 From a9b6de77b1a3ff729f7bfc54b2e17711776a416c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 19 Apr 2018 21:32:19 -0700 Subject: mm: fix __gup_device_huge vs unmap get_user_pages_fast() for device pages is missing the typical validation that all page references have been taken while the mapping was valid. Without this validation truncate operations can not reliably coordinate against new page reference events like O_DIRECT. Cc: Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Reported-by: Jan Kara Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- mm/gup.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) (limited to 'mm') diff --git a/mm/gup.c b/mm/gup.c index 76af4cf..84dd206 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1456,32 +1456,48 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, return 1; } -static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, +static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, unsigned long end, struct page **pages, int *nr) { unsigned long fault_pfn; + int nr_start = *nr; + + fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + if (!__gup_device_huge(fault_pfn, addr, end, pages, nr)) + return 0; - fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - return __gup_device_huge(fault_pfn, addr, end, pages, nr); + if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) { + undo_dev_pagemap(nr, nr_start, pages); + return 0; + } + return 1; } -static int __gup_device_huge_pud(pud_t pud, unsigned long addr, +static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, unsigned long end, struct page **pages, int *nr) { unsigned long fault_pfn; + int nr_start = *nr; + + fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + if (!__gup_device_huge(fault_pfn, addr, end, pages, nr)) + return 0; - fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); - return __gup_device_huge(fault_pfn, addr, end, pages, nr); + if (unlikely(pud_val(orig) != pud_val(*pudp))) { + undo_dev_pagemap(nr, nr_start, pages); + return 0; + } + return 1; } #else -static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, +static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, unsigned long end, struct page **pages, int *nr) { BUILD_BUG(); return 0; } -static int __gup_device_huge_pud(pud_t pud, unsigned long addr, +static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr, unsigned long end, struct page **pages, int *nr) { BUILD_BUG(); @@ -1499,7 +1515,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, return 0; if (pmd_devmap(orig)) - return __gup_device_huge_pmd(orig, addr, end, pages, nr); + return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr); refs = 0; page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); @@ -1537,7 +1553,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, return 0; if (pud_devmap(orig)) - return __gup_device_huge_pud(orig, addr, end, pages, nr); + return __gup_device_huge_pud(orig, pudp, addr, end, pages, nr); refs = 0; page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); -- cgit v1.1