From 167f5594b5efa20a26ff03b3424f793887e6b448 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 6 Mar 2018 15:56:47 +1100 Subject: kernel/memremap: Remove stale devres_free() call devm_memremap_pages() was re-worked in e8d513483300 "memremap: change devm_memremap_pages interface to use struct dev_pagemap" to take a caller allocated struct dev_pagemap as a function parameter. A call to devres_free() was left in the error cleanup path which results in a kernel panic if the remap fails for some reason. Remove it to fix the panic and let devm_memremap_pages() fail gracefully. Fixes: e8d513483300 ("memremap: change devm_memremap_pages interface...") Signed-off-by: Oliver O'Halloran Reviewed-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Signed-off-by: Dan Williams --- kernel/memremap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/memremap.c b/kernel/memremap.c index 4dd4274..895e6b7 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -427,7 +427,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) err_pfn_remap: err_radix: pgmap_radix_release(res, pgoff); - devres_free(pgmap); return ERR_PTR(error); } EXPORT_SYMBOL(devm_memremap_pages); -- cgit v1.1 From 3ffb0ba9b567a8efb9a04ed3d1ec15ff333ada22 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Mon, 5 Mar 2018 16:56:13 -0700 Subject: libnvdimm, {btt, blk}: do integrity setup before add_disk() Prior to 25520d55cdb6 ("block: Inline blk_integrity in struct gendisk") we needed to temporarily add a zero-capacity disk before registering for blk-integrity. But adding a zero-capacity disk caused the partition table scanning to bail early, and this resulted in partitions not coming up after a probe of the BTT or blk namespaces. We can now register for integrity before the disk has been added, and this fixes the rescan problems. Fixes: 25520d55cdb6 ("block: Inline blk_integrity in struct gendisk") Reported-by: Dariusz Dokupil Cc: Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/blk.c | 3 +-- drivers/nvdimm/btt.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 345acca..1bd7b37 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -278,8 +278,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) disk->queue = q; disk->flags = GENHD_FL_EXT_DEVT; nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name); - set_capacity(disk, 0); - device_add_disk(dev, disk); if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk)) return -ENOMEM; @@ -292,6 +290,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) } set_capacity(disk, available_disk_size >> SECTOR_SHIFT); + device_add_disk(dev, disk); revalidate_disk(disk); return 0; } diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 2ef544f..4b95ac5 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1545,8 +1545,6 @@ static int btt_blk_init(struct btt *btt) queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue); btt->btt_queue->queuedata = btt; - set_capacity(btt->btt_disk, 0); - device_add_disk(&btt->nd_btt->dev, btt->btt_disk); if (btt_meta_size(btt)) { int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt)); @@ -1558,6 +1556,7 @@ static int btt_blk_init(struct btt *btt) } } set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); + device_add_disk(&btt->nd_btt->dev, btt->btt_disk); btt->nd_btt->size = btt->nlba * (u64)btt->sector_size; revalidate_disk(btt->btt_disk); -- cgit v1.1 From 0cbfeef230571b132d2ac2ea4346b200b311258f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 5 Feb 2018 14:08:52 +0000 Subject: libnvdimm: remove redundant assignment to pointer 'dev' Pointer dev is being assigned a value that is never read, it is being re-assigned the same value later on, hence the initialization is redundant and can be removed. Cleans up clang warning: drivers/nvdimm/pfn_devs.c:307:17: warning: Value stored to 'dev' during its initialization is never read Signed-off-by: Colin Ian King Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index f5c4e8c..2f4d187 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -304,7 +304,7 @@ static const struct attribute_group *nd_pfn_attribute_groups[] = { struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, struct nd_namespace_common *ndns) { - struct device *dev = &nd_pfn->dev; + struct device *dev; if (!nd_pfn) return NULL; -- cgit v1.1 From a7e6c7015bf3e0cb467a2f6c0e1de985ee1a0ecb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 13 Mar 2018 21:36:22 -0700 Subject: x86, memremap: fix altmap accounting at free Commit 24b6d4164348 "mm: pass the vmem_altmap to vmemmap_free" converted the vmemmap_free() path to pass the altmap argument all the way through the call chain rather than looking it up based on the page. Unfortunately that ends up over freeing altmap allocated pages in some cases since free_pagetable() is used to free both memmap space and pte space, where only the memmap stored in huge pages uses altmap allocations. Given that altmap allocations for memmap space are special cased in vmemmap_populate_hugepages() add a symmetric / special case free_hugepage_table() to handle altmap freeing, and cleanup the unneeded passing of altmap to leaf functions that do not require it. Without this change the sanity check accounting in devm_memremap_pages_release() will throw a warning with the following signature. nd_pmem pfn10.1: devm_memremap_pages_release: failed to free all reserved pages WARNING: CPU: 44 PID: 3539 at kernel/memremap.c:310 devm_memremap_pages_release+0x1c7/0x220 CPU: 44 PID: 3539 Comm: ndctl Tainted: G L 4.16.0-rc1-linux-stable #7 RIP: 0010:devm_memremap_pages_release+0x1c7/0x220 [..] Call Trace: release_nodes+0x225/0x270 device_release_driver_internal+0x15d/0x210 bus_remove_device+0xe2/0x160 device_del+0x130/0x310 ? klist_release+0x56/0x100 ? nd_region_notify+0xc0/0xc0 [libnvdimm] device_unregister+0x16/0x60 This was missed in testing since not all configurations will trigger this warning. Fixes: 24b6d4164348 ("mm: pass the vmem_altmap to vmemmap_free") Reported-by: Jane Chu Cc: Ross Zwisler Reviewed-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/x86/mm/init_64.c | 60 ++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 8b72923..af11a28 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -800,17 +800,11 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, #define PAGE_INUSE 0xFD -static void __meminit free_pagetable(struct page *page, int order, - struct vmem_altmap *altmap) +static void __meminit free_pagetable(struct page *page, int order) { unsigned long magic; unsigned int nr_pages = 1 << order; - if (altmap) { - vmem_altmap_free(altmap, nr_pages); - return; - } - /* bootmem page has reserved flag */ if (PageReserved(page)) { __ClearPageReserved(page); @@ -826,9 +820,17 @@ static void __meminit free_pagetable(struct page *page, int order, free_pages((unsigned long)page_address(page), order); } -static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd, +static void __meminit free_hugepage_table(struct page *page, struct vmem_altmap *altmap) { + if (altmap) + vmem_altmap_free(altmap, PMD_SIZE / PAGE_SIZE); + else + free_pagetable(page, get_order(PMD_SIZE)); +} + +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) +{ pte_t *pte; int i; @@ -839,14 +841,13 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd, } /* free a pte talbe */ - free_pagetable(pmd_page(*pmd), 0, altmap); + free_pagetable(pmd_page(*pmd), 0); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, - struct vmem_altmap *altmap) +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) { pmd_t *pmd; int i; @@ -858,14 +859,13 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, } /* free a pmd talbe */ - free_pagetable(pud_page(*pud), 0, altmap); + free_pagetable(pud_page(*pud), 0); spin_lock(&init_mm.page_table_lock); pud_clear(pud); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, - struct vmem_altmap *altmap) +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) { pud_t *pud; int i; @@ -877,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, } /* free a pud talbe */ - free_pagetable(p4d_page(*p4d), 0, altmap); + free_pagetable(p4d_page(*p4d), 0); spin_lock(&init_mm.page_table_lock); p4d_clear(p4d); spin_unlock(&init_mm.page_table_lock); @@ -885,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, - struct vmem_altmap *altmap, bool direct) + bool direct) { unsigned long next, pages = 0; pte_t *pte; @@ -916,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, * freed when offlining, or simplely not in use. */ if (!direct) - free_pagetable(pte_page(*pte), 0, altmap); + free_pagetable(pte_page(*pte), 0); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -939,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, page_addr = page_address(pte_page(*pte)); if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { - free_pagetable(pte_page(*pte), 0, altmap); + free_pagetable(pte_page(*pte), 0); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -974,9 +974,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE)) { if (!direct) - free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE), - altmap); + free_hugepage_table(pmd_page(*pmd), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -989,9 +988,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, page_addr = page_address(pmd_page(*pmd)); if (!memchr_inv(page_addr, PAGE_INUSE, PMD_SIZE)) { - free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE), - altmap); + free_hugepage_table(pmd_page(*pmd), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -1003,8 +1001,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, } pte_base = (pte_t *)pmd_page_vaddr(*pmd); - remove_pte_table(pte_base, addr, next, altmap, direct); - free_pte_table(pte_base, pmd, altmap); + remove_pte_table(pte_base, addr, next, direct); + free_pte_table(pte_base, pmd); } /* Call free_pmd_table() in remove_pud_table(). */ @@ -1033,8 +1031,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, IS_ALIGNED(next, PUD_SIZE)) { if (!direct) free_pagetable(pud_page(*pud), - get_order(PUD_SIZE), - altmap); + get_order(PUD_SIZE)); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1048,8 +1045,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, if (!memchr_inv(page_addr, PAGE_INUSE, PUD_SIZE)) { free_pagetable(pud_page(*pud), - get_order(PUD_SIZE), - altmap); + get_order(PUD_SIZE)); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1062,7 +1058,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, pmd_base = pmd_offset(pud, 0); remove_pmd_table(pmd_base, addr, next, direct, altmap); - free_pmd_table(pmd_base, pud, altmap); + free_pmd_table(pmd_base, pud); } if (direct) @@ -1094,7 +1090,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, * to adapt for boot-time switching between 4 and 5 level page tables. */ if (CONFIG_PGTABLE_LEVELS == 5) - free_pud_table(pud_base, p4d, altmap); + free_pud_table(pud_base, p4d); } if (direct) -- cgit v1.1 From dc9e0a9347e932e3fd3cd03e7ff241022ed6ea8a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 15 Mar 2018 19:49:14 -0700 Subject: acpi, numa: fix pxm to online numa node associations Commit 99759869faf1 "acpi: Add acpi_map_pxm_to_online_node()" added support for mapping a given proximity to its nearest, by SLIT distance, online node. However, it sometimes returns unexpected results due to the fact that it switches from comparing the PXM node to the last node that was closer than the current max. for_each_online_node(n) { dist = node_distance(node, n); if (dist < min_dist) { min_dist = dist; node = n; <---- from this point we're using the wrong node for node_distance() Fixes: 99759869faf1 ("acpi: Add acpi_map_pxm_to_online_node()") Cc: Reviewed-by: Toshi Kani Acked-by: Rafael J. Wysocki > Signed-off-by: Dan Williams --- drivers/acpi/numa.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 8ccaae3..8516760 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -103,25 +103,27 @@ int acpi_map_pxm_to_node(int pxm) */ int acpi_map_pxm_to_online_node(int pxm) { - int node, n, dist, min_dist; + int node, min_node; node = acpi_map_pxm_to_node(pxm); if (node == NUMA_NO_NODE) node = 0; + min_node = node; if (!node_online(node)) { - min_dist = INT_MAX; + int min_dist = INT_MAX, dist, n; + for_each_online_node(n) { dist = node_distance(node, n); if (dist < min_dist) { min_dist = dist; - node = n; + min_node = n; } } } - return node; + return min_node; } EXPORT_SYMBOL(acpi_map_pxm_to_online_node); -- cgit v1.1 From 896196dc4e419a9d0782404e0befac17d638fc01 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 21 Mar 2018 14:06:23 -0700 Subject: libnvdimm, region: hide persistence_domain when unknown Similar to other region attributes, do not emit the persistence_domain attribute if its contents are empty. Fixes: 96c3a239054a ("libnvdimm: expose platform persistence attr...") Cc: Dave Jiang Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/nvdimm/region_devs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index e6d0191..a8e9d42 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -593,6 +593,13 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) return 0; } + if (a == &dev_attr_persistence_domain.attr) { + if ((nd_region->flags & (BIT(ND_REGION_PERSIST_CACHE) + | BIT(ND_REGION_PERSIST_MEMCTRL))) == 0) + return 0; + return a->mode; + } + if (a != &dev_attr_set_cookie.attr && a != &dev_attr_available_size.attr) return a->mode; -- cgit v1.1 From fe9a552e715dfe5167d52deb74ea16335896bdaf Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 21 Mar 2018 15:12:07 -0700 Subject: libnvdimm, nfit: fix persistence domain reporting The persistence domain is a point in the platform where once writes reach that destination the platform claims it will make them persistent relative to power loss. In the ACPI NFIT this is currently communicated as 2 bits in the "NFIT - Platform Capabilities Structure". The bits comprise a hierarchy, i.e. bit0 "CPU Cache Flush to NVDIMM Durability on Power Loss Capable" implies bit1 "Memory Controller Flush to NVDIMM Durability on Power Loss Capable". Commit 96c3a239054a "libnvdimm: expose platform persistence attr..." shows the persistence domain as flags, but it's really an enumerated hierarchy. Fix this newly introduced user ABI to show the closest available persistence domain before userspace develops dependencies on seeing, or needing to develop code to tolerate, the raw NFIT flags communicated through the libnvdimm-generic region attribute. Fixes: 96c3a239054a ("libnvdimm: expose platform persistence attr...") Reviewed-by: Dave Jiang Cc: "Rafael J. Wysocki" Cc: Ross Zwisler Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 10 +++++++--- drivers/nvdimm/region_devs.c | 10 ++++++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index bbe48ad..eb09ef5 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2675,10 +2675,14 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, else ndr_desc->numa_node = NUMA_NO_NODE; - if(acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_CACHE_FLUSH) + /* + * Persistence domain bits are hierarchical, if + * ACPI_NFIT_CAPABILITY_CACHE_FLUSH is set then + * ACPI_NFIT_CAPABILITY_MEM_FLUSH is implied. + */ + if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_CACHE_FLUSH) set_bit(ND_REGION_PERSIST_CACHE, &ndr_desc->flags); - - if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_MEM_FLUSH) + else if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_MEM_FLUSH) set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc->flags); list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index a8e9d42..1593e18 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -532,11 +532,13 @@ static ssize_t persistence_domain_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nd_region *nd_region = to_nd_region(dev); - unsigned long flags = nd_region->flags; - return sprintf(buf, "%s%s\n", - flags & BIT(ND_REGION_PERSIST_CACHE) ? "cpu_cache " : "", - flags & BIT(ND_REGION_PERSIST_MEMCTRL) ? "memory_controller " : ""); + if (test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags)) + return sprintf(buf, "cpu_cache\n"); + else if (test_bit(ND_REGION_PERSIST_MEMCTRL, &nd_region->flags)) + return sprintf(buf, "memory_controller\n"); + else + return sprintf(buf, "\n"); } static DEVICE_ATTR_RO(persistence_domain); -- cgit v1.1