diff options
author | Yinghai Lu <yhlu.kernel@gmail.com> | 2008-03-18 12:52:37 -0700 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-26 22:51:08 +0200 |
commit | 1a27fc0a42162964d758e9d36d2d1b49c082a67c (patch) | |
tree | b94c4864edd0869d8ab7b25b9c3942a14bb6f9db /arch | |
parent | 8b3cd09ed23049fcb02479c6286744b36324ac9d (diff) | |
download | op-kernel-dev-1a27fc0a42162964d758e9d36d2d1b49c082a67c.zip op-kernel-dev-1a27fc0a42162964d758e9d36d2d1b49c082a67c.tar.gz |
x86_64: fix setup_node_bootmem to support big mem excluding with memmap
typical case: four sockets system, every node has 4g ram, and we are using:
memmap=10g$4g
to mask out memory on node1 and node2
when numa is enabled, early_node_mem is used to get node_data and node_bootmap.
if it can not get memory from the same node with find_e820_area(), it will
use alloc_bootmem to get buff from previous nodes.
so check it and print out some info about it.
need to move early_res_to_bootmem into every setup_node_bootmem.
and it takes range that node has. otherwise alloc_bootmem could return addr
that reserved early.
depends on "mm: make reserve_bootmem can crossed the nodes".
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/kernel/e820_64.c | 13 | ||||
-rw-r--r-- | arch/x86/kernel/setup_64.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 42 |
3 files changed, 46 insertions, 12 deletions
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 79f0d52..645ee5e 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -106,14 +106,19 @@ void __init free_early(unsigned long start, unsigned long end) early_res[j - 1].end = 0; } -void __init early_res_to_bootmem(void) +void __init early_res_to_bootmem(unsigned long start, unsigned long end) { int i; + unsigned long final_start, final_end; for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { struct early_res *r = &early_res[i]; - printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i, - r->start, r->end - 1, r->name); - reserve_bootmem_generic(r->start, r->end - r->start); + final_start = max(start, r->start); + final_end = min(end, r->end); + if (final_start >= final_end) + continue; + printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, + final_start, final_end - 1, r->name); + reserve_bootmem_generic(final_start, final_end - final_start); } } diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index b04e2c0..60e64c8 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -190,6 +190,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); e820_register_active_regions(0, start_pfn, end_pfn); free_bootmem_with_active_regions(0, end_pfn); + early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); } #endif @@ -421,8 +422,6 @@ void __init setup_arch(char **cmdline_p) contig_initmem_init(0, end_pfn); #endif - early_res_to_bootmem(); - dma32_reserve_bootmem(); #ifdef CONFIG_ACPI_SLEEP diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 9a68922..c5066d5 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -196,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long bootmap_start, nodedata_phys; void *bootmap; const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); + int nid; start = round_up(start, ZONE_ALIGN); @@ -218,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, NODE_DATA(nodeid)->node_start_pfn = start_pfn; NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; - /* Find a place for the bootmem map */ + /* + * Find a place for the bootmem map + * nodedata_phys could be on other nodes by alloc_bootmem, + * so need to sure bootmap_start not to be small, otherwise + * early_node_mem will get that with find_e820_area instead + * of alloc_bootmem, that could clash with reserved range + */ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); + nid = phys_to_nid(nodedata_phys); + if (nid == nodeid) + bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); + else + bootmap_start = round_up(start, PAGE_SIZE); /* * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like * to use that to align to PAGE_SIZE @@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, free_bootmem_with_active_regions(nodeid, end); - reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size, - BOOTMEM_DEFAULT); - reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, - bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); + /* + * convert early reserve to bootmem reserve earlier + * otherwise early_node_mem could use early reserved mem + * on previous node + */ + early_res_to_bootmem(start, end); + + /* + * in some case early_node_mem could use alloc_bootmem + * to get range on other node, don't reserve that again + */ + if (nid != nodeid) + printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); + else + reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, + pgdat_size, BOOTMEM_DEFAULT); + nid = phys_to_nid(bootmap_start); + if (nid != nodeid) + printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); + else + reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, + bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); + #ifdef CONFIG_ACPI_NUMA srat_reserve_add_area(nodeid); #endif |