From 1c29f25bf5d6c557017f619b638c619cbbf798c4 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:28 +0100 Subject: memremap: Change region_intersects() to take @flags and @desc Change region_intersects() to identify a target with @flags and @desc, instead of @name with strcmp(). Change the callers of region_intersects(), memremap() and devm_memremap(), to set IORESOURCE_SYSTEM_RAM in @flags and IORES_DESC_NONE in @desc when searching System RAM. Also, export region_intersects() so that the ACPI EINJ error injection driver can call this function in a later patch. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Acked-by: Dan Williams Cc: Andrew Morton Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jakub Sitnicki Cc: Jan Kara Cc: Jiang Liu Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Peter Zijlstra Cc: Tejun Heo Cc: Thomas Gleixner Cc: Toshi Kani Cc: Vlastimil Babka Cc: linux-arch@vger.kernel.org Cc: linux-mm Link: http://lkml.kernel.org/r/1453841853-11383-13-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- kernel/memremap.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'kernel/memremap.c') diff --git a/kernel/memremap.c b/kernel/memremap.c index e517a16..293309c 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -47,7 +47,7 @@ static void *try_ram_remap(resource_size_t offset, size_t size) * being mapped does not have i/o side effects and the __iomem * annotation is not applicable. * - * MEMREMAP_WB - matches the default mapping for "System RAM" on + * MEMREMAP_WB - matches the default mapping for System RAM on * the architecture. This is usually a read-allocate write-back cache. * Morever, if MEMREMAP_WB is specified and the requested remap region is RAM * memremap() will bypass establishing a new mapping and instead return @@ -56,11 +56,12 @@ static void *try_ram_remap(resource_size_t offset, size_t size) * MEMREMAP_WT - establish a mapping whereby writes either bypass the * cache or are written through to memory and never exist in a * cache-dirty state with respect to program visibility. Attempts to - * map "System RAM" with this mapping type will fail. + * map System RAM with this mapping type will fail. */ void *memremap(resource_size_t offset, size_t size, unsigned long flags) { - int is_ram = region_intersects(offset, size, "System RAM"); + int is_ram = region_intersects(offset, size, + IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE); void *addr = NULL; if (is_ram == REGION_MIXED) { @@ -76,7 +77,7 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags) * MEMREMAP_WB is special in that it can be satisifed * from the direct map. Some archs depend on the * capability of memremap() to autodetect cases where - * the requested range is potentially in "System RAM" + * the requested range is potentially in System RAM. */ if (is_ram == REGION_INTERSECTS) addr = try_ram_remap(offset, size); @@ -88,7 +89,7 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags) * If we don't have a mapping yet and more request flags are * pending then we will be attempting to establish a new virtual * address mapping. Enforce that this mapping is not aliasing - * "System RAM" + * System RAM. */ if (!addr && is_ram == REGION_INTERSECTS && flags) { WARN_ONCE(1, "memremap attempted on ram %pa size: %#lx\n", @@ -266,7 +267,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, struct vmem_altmap *altmap) { int is_ram = region_intersects(res->start, resource_size(res), - "System RAM"); + IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE); resource_size_t key, align_start, align_size; struct dev_pagemap *pgmap; struct page_map *page_map; -- cgit v1.1 From d77a117e6871ff78a06def46583d23752593de60 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 9 Mar 2016 14:08:10 -0800 Subject: list: kill list_force_poison() Given we have uninitialized list_heads being passed to list_add() it will always be the case that those uninitialized values randomly trigger the poison value. Especially since a list_add() operation will seed the stack with the poison value for later stack allocations to trip over. For example, see these two false positive reports: list_add attempted on force-poisoned entry WARNING: at lib/list_debug.c:34 [..] NIP [c00000000043c390] __list_add+0xb0/0x150 LR [c00000000043c38c] __list_add+0xac/0x150 Call Trace: __list_add+0xac/0x150 (unreliable) __down+0x4c/0xf8 down+0x68/0x70 xfs_buf_lock+0x4c/0x150 [xfs] list_add attempted on force-poisoned entry(0000000000000500), new->next == d0000000059ecdb0, new->prev == 0000000000000500 WARNING: at lib/list_debug.c:33 [..] NIP [c00000000042db78] __list_add+0xa8/0x140 LR [c00000000042db74] __list_add+0xa4/0x140 Call Trace: __list_add+0xa4/0x140 (unreliable) rwsem_down_read_failed+0x6c/0x1a0 down_read+0x58/0x60 xfs_log_commit_cil+0x7c/0x600 [xfs] Fixes: commit 5c2c2587b132 ("mm, dax, pmem: introduce {get|put}_dev_pagemap() for dax-gup") Signed-off-by: Dan Williams Reported-by: Eryu Guan Tested-by: Eryu Guan Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/memremap.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'kernel/memremap.c') diff --git a/kernel/memremap.c b/kernel/memremap.c index b981a7b..778191e 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -351,8 +351,13 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, for_each_device_pfn(pfn, page_map) { struct page *page = pfn_to_page(pfn); - /* ZONE_DEVICE pages must never appear on a slab lru */ - list_force_poison(&page->lru); + /* + * ZONE_DEVICE pages union ->lru with a ->pgmap back + * pointer. It is a bug if a ZONE_DEVICE page is ever + * freed or placed on a driver-private list. Seed the + * storage with LIST_POISON* values. + */ + list_del(&page->lru); page->pgmap = pgmap; } devres_add(dev, page_map); -- cgit v1.1 From 5f29a77cd95771edebbf41e5800fdd557c69302a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 9 Mar 2016 14:08:13 -0800 Subject: mm: fix mixed zone detection in devm_memremap_pages The check for whether we overlap "System RAM" needs to be done at section granularity. For example a system with the following mapping: 100000000-37bffffff : System RAM 37c000000-837ffffff : Persistent Memory ...is unable to use devm_memremap_pages() as it would result in two zones colliding within a given section. Signed-off-by: Dan Williams Cc: Ross Zwisler Reviewed-by: Toshi Kani Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/memremap.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel/memremap.c') diff --git a/kernel/memremap.c b/kernel/memremap.c index 778191e..60baf4d3 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -270,13 +270,16 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys) void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, struct vmem_altmap *altmap) { - int is_ram = region_intersects(res->start, resource_size(res), - "System RAM"); resource_size_t key, align_start, align_size, align_end; struct dev_pagemap *pgmap; struct page_map *page_map; + int error, nid, is_ram; unsigned long pfn; - int error, nid; + + align_start = res->start & ~(SECTION_SIZE - 1); + align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) + - align_start; + is_ram = region_intersects(align_start, align_size, "System RAM"); if (is_ram == REGION_MIXED) { WARN_ONCE(1, "%s attempted on mixed region %pr\n", @@ -314,8 +317,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, mutex_lock(&pgmap_lock); error = 0; - align_start = res->start & ~(SECTION_SIZE - 1); - align_size = ALIGN(resource_size(res), SECTION_SIZE); align_end = align_start + align_size - 1; for (key = align_start; key <= align_end; key += SECTION_SIZE) { struct dev_pagemap *dup; -- cgit v1.1 From ac343e882a8377caef5fa75d9093cb77e9d4bf6d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 9 Mar 2016 14:08:32 -0800 Subject: memremap: check pfn validity before passing to pfn_to_page() In memremap's helper function try_ram_remap(), we dereference a struct page pointer that was derived from a PFN that is known to be covered by a 'System RAM' iomem region, and is thus assumed to be a 'valid' PFN, i.e., a PFN that has a struct page associated with it and is covered by the kernel direct mapping. However, the assumption that there is a 1:1 relation between the System RAM iomem region and the kernel direct mapping is not universally valid on all architectures, and on ARM and arm64, 'System RAM' may include regions for which pfn_valid() returns false. Generally speaking, both __va() and pfn_to_page() should only ever be called on PFNs/physical addresses for which pfn_valid() returns true, so add that check to try_ram_remap(). Signed-off-by: Ard Biesheuvel Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/memremap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/memremap.c') diff --git a/kernel/memremap.c b/kernel/memremap.c index 60baf4d3..6cf5461 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -29,10 +29,10 @@ __weak void __iomem *ioremap_cache(resource_size_t offset, unsigned long size) static void *try_ram_remap(resource_size_t offset, size_t size) { - struct page *page = pfn_to_page(offset >> PAGE_SHIFT); + unsigned long pfn = PHYS_PFN(offset); /* In the simple case just return the existing linear address */ - if (!PageHighMem(page)) + if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn))) return __va(offset); return NULL; /* fallback to ioremap_cache */ } -- cgit v1.1