diff options
author | Yasunori Goto <y-goto@jp.fujitsu.com> | 2006-06-23 02:03:11 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-23 07:42:46 -0700 |
commit | 6811378e7d8b9aa4fca2a1ca73d24c9d67c9cb12 (patch) | |
tree | 37f2f5a2bf2e60848a571f8f43685c7406d7b238 | |
parent | cca448fe92246fb59efe55ba2e048ded0971a9af (diff) | |
download | op-kernel-dev-6811378e7d8b9aa4fca2a1ca73d24c9d67c9cb12.zip op-kernel-dev-6811378e7d8b9aa4fca2a1ca73d24c9d67c9cb12.tar.gz |
[PATCH] wait_table and zonelist initializing for memory hotadd: update zonelists
In current code, zonelist is considered to be build once, no modification.
But MemoryHotplug can add new zone/pgdat. It must be updated.
This patch modifies build_all_zonelists(). By this, build_all_zonelist() can
reconfig pgdat's zonelists.
To update them safety, this patch use stop_machine_run(). Other cpus don't
touch among updating them by using it.
In old version (V2 of node hotadd), kernel updated them after zone
initialization. But present_page of its new zone is still 0, because
online_page() is not called yet at this time. Build_zonelists() checks
present_pages to find present zone. It was too early. So, I changed it after
online_pages().
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | mm/memory_hotplug.c | 12 | ||||
-rw-r--r-- | mm/page_alloc.c | 26 |
2 files changed, 33 insertions, 5 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 71da5c9..1b1ac3d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -127,6 +127,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) unsigned long flags; unsigned long onlined_pages = 0; struct zone *zone; + int need_zonelists_rebuild = 0; /* * This doesn't need a lock to do pfn_to_page(). @@ -139,6 +140,14 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages); pgdat_resize_unlock(zone->zone_pgdat, &flags); + /* + * If this zone is not populated, then it is not in zonelist. + * This means the page allocator ignores this zone. + * So, zonelist must be updated after online. + */ + if (!populated_zone(zone)) + need_zonelists_rebuild = 1; + for (i = 0; i < nr_pages; i++) { struct page *page = pfn_to_page(pfn + i); online_page(page); @@ -149,5 +158,8 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) setup_per_zone_pages_min(); + if (need_zonelists_rebuild) + build_all_zonelists(); + return 0; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 62564e2..9dfbe6b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -37,6 +37,7 @@ #include <linux/nodemask.h> #include <linux/vmalloc.h> #include <linux/mempolicy.h> +#include <linux/stop_machine.h> #include <asm/tlbflush.h> #include <asm/div64.h> @@ -1704,14 +1705,29 @@ static void __meminit build_zonelists(pg_data_t *pgdat) #endif /* CONFIG_NUMA */ -void __init build_all_zonelists(void) +/* return values int ....just for stop_machine_run() */ +static int __meminit __build_all_zonelists(void *dummy) { - int i; + int nid; + for_each_online_node(nid) + build_zonelists(NODE_DATA(nid)); + return 0; +} + +void __meminit build_all_zonelists(void) +{ + if (system_state == SYSTEM_BOOTING) { + __build_all_zonelists(0); + cpuset_init_current_mems_allowed(); + } else { + /* we have to stop all cpus to guaranntee there is no user + of zonelist */ + stop_machine_run(__build_all_zonelists, NULL, NR_CPUS); + /* cpuset refresh routine should be here */ + } - for_each_online_node(i) - build_zonelists(NODE_DATA(i)); printk("Built %i zonelists\n", num_online_nodes()); - cpuset_init_current_mems_allowed(); + } /* |