summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.com>2017-07-06 15:38:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-06 16:24:32 -0700
commitc246a213f5bad687c6c2cea27d7265eaf8f6f5d7 (patch)
treef3d1f51d6eed2b3b97b5d85e981aa748e9bc70f9
parenta69578a154ee1c00b572171f5bb5da7a83f9cd77 (diff)
downloadop-kernel-dev-c246a213f5bad687c6c2cea27d7265eaf8f6f5d7.zip
op-kernel-dev-c246a213f5bad687c6c2cea27d7265eaf8f6f5d7.tar.gz
mm, memory_hotplug: do not assume ZONE_NORMAL is default kernel zone
Heiko Carstens has noticed that he can generate overlapping zones for ZONE_DMA and ZONE_NORMAL: DMA [mem 0x0000000000000000-0x000000007fffffff] Normal [mem 0x0000000080000000-0x000000017fffffff] $ cat /sys/devices/system/memory/block_size_bytes 10000000 $ cat /sys/devices/system/memory/memory5/valid_zones DMA $ echo 0 > /sys/devices/system/memory/memory5/online $ cat /sys/devices/system/memory/memory5/valid_zones Normal $ echo 1 > /sys/devices/system/memory/memory5/online Normal $ cat /proc/zoneinfo Node 0, zone DMA spanned 524288 <----- present 458752 managed 455078 start_pfn: 0 <----- Node 0, zone Normal spanned 720896 present 589824 managed 571648 start_pfn: 327680 <----- The reason is that we assume that the default zone for kernel onlining is ZONE_NORMAL. This was a simplification introduced by the memory hotplug rework and it is easily fixable by checking the range overlap in the zone order and considering the first matching zone as the default one. If there is no such zone then assume ZONE_NORMAL as we have been doing so far. Fixes: "mm, memory_hotplug: do not associate hotadded memory to zones until online" Link: http://lkml.kernel.org/r/20170601083746.4924-3-mhocko@kernel.org Signed-off-by: Michal Hocko <mhocko@suse.com> Reported-by: Heiko Carstens <heiko.carstens@de.ibm.com> Tested-by: Heiko Carstens <heiko.carstens@de.ibm.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Reza Arbab <arbab@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/base/memory.c2
-rw-r--r--include/linux/memory_hotplug.h2
-rw-r--r--mm/memory_hotplug.c27
3 files changed, 27 insertions, 4 deletions
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index b86fda3..c7c4e03 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -419,7 +419,7 @@ static ssize_t show_valid_zones(struct device *dev,
nid = pfn_to_nid(start_pfn);
if (allow_online_pfn_range(nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL)) {
- strcat(buf, NODE_DATA(nid)->node_zones[ZONE_NORMAL].name);
+ strcat(buf, default_zone_for_pfn(nid, start_pfn, nr_pages)->name);
append = true;
}
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 8a07a49..4d65a2f 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -311,4 +311,6 @@ extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum);
extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
int online_type);
+extern struct zone *default_zone_for_pfn(int nid, unsigned long pfn,
+ unsigned long nr_pages);
#endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1a20e44..4263fa6 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1028,7 +1028,7 @@ bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
{
struct pglist_data *pgdat = NODE_DATA(nid);
struct zone *movable_zone = &pgdat->node_zones[ZONE_MOVABLE];
- struct zone *normal_zone = &pgdat->node_zones[ZONE_NORMAL];
+ struct zone *default_zone = default_zone_for_pfn(nid, pfn, nr_pages);
/*
* TODO there shouldn't be any inherent reason to have ZONE_NORMAL
@@ -1042,7 +1042,7 @@ bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
return true;
return movable_zone->zone_start_pfn >= pfn + nr_pages;
} else if (online_type == MMOP_ONLINE_MOVABLE) {
- return zone_end_pfn(normal_zone) <= pfn;
+ return zone_end_pfn(default_zone) <= pfn;
}
/* MMOP_ONLINE_KEEP will always succeed and inherits the current zone */
@@ -1103,6 +1103,27 @@ void move_pfn_range_to_zone(struct zone *zone,
}
/*
+ * Returns a default kernel memory zone for the given pfn range.
+ * If no kernel zone covers this pfn range it will automatically go
+ * to the ZONE_NORMAL.
+ */
+struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn,
+ unsigned long nr_pages)
+{
+ struct pglist_data *pgdat = NODE_DATA(nid);
+ int zid;
+
+ for (zid = 0; zid <= ZONE_NORMAL; zid++) {
+ struct zone *zone = &pgdat->node_zones[zid];
+
+ if (zone_intersects(zone, start_pfn, nr_pages))
+ return zone;
+ }
+
+ return &pgdat->node_zones[ZONE_NORMAL];
+}
+
+/*
* Associates the given pfn range with the given node and the zone appropriate
* for the given online type.
*/
@@ -1110,7 +1131,7 @@ static struct zone * __meminit move_pfn_range(int online_type, int nid,
unsigned long start_pfn, unsigned long nr_pages)
{
struct pglist_data *pgdat = NODE_DATA(nid);
- struct zone *zone = &pgdat->node_zones[ZONE_NORMAL];
+ struct zone *zone = default_zone_for_pfn(nid, start_pfn, nr_pages);
if (online_type == MMOP_ONLINE_KEEP) {
struct zone *movable_zone = &pgdat->node_zones[ZONE_MOVABLE];
OpenPOWER on IntegriCloud