diff options
author | Yinghai Lu <yinghai@kernel.org> | 2010-02-10 01:20:22 -0800 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2010-02-12 09:42:38 -0800 |
commit | 9bdac914240759457175ac0d6529a37d2820bc4d (patch) | |
tree | 8fb7d26a351d2cd526835f1494ebeb818e988abb /mm/sparse-vmemmap.c | |
parent | a4322e1bad91fbca27056fc38d2cbca3f1eae0cf (diff) | |
download | op-kernel-dev-9bdac914240759457175ac0d6529a37d2820bc4d.zip op-kernel-dev-9bdac914240759457175ac0d6529a37d2820bc4d.tar.gz |
sparsemem: Put mem map for one node together.
Add vmemmap_alloc_block_buf for mem map only.
It will fallback to the old way if it cannot get a block that big.
Before this patch, when a node have 128g ram installed, memmap are
split into two parts or more.
[ 0.000000] [ffffea0000000000-ffffea003fffffff] PMD -> [ffff880100600000-ffff88013e9fffff] on node 1
[ 0.000000] [ffffea0040000000-ffffea006fffffff] PMD -> [ffff88013ec00000-ffff88016ebfffff] on node 1
[ 0.000000] [ffffea0070000000-ffffea007fffffff] PMD -> [ffff882000600000-ffff8820105fffff] on node 0
[ 0.000000] [ffffea0080000000-ffffea00bfffffff] PMD -> [ffff882010800000-ffff8820507fffff] on node 0
[ 0.000000] [ffffea00c0000000-ffffea00dfffffff] PMD -> [ffff882050a00000-ffff8820709fffff] on node 0
[ 0.000000] [ffffea00e0000000-ffffea00ffffffff] PMD -> [ffff884000600000-ffff8840205fffff] on node 2
[ 0.000000] [ffffea0100000000-ffffea013fffffff] PMD -> [ffff884020800000-ffff8840607fffff] on node 2
[ 0.000000] [ffffea0140000000-ffffea014fffffff] PMD -> [ffff884060a00000-ffff8840709fffff] on node 2
[ 0.000000] [ffffea0150000000-ffffea017fffffff] PMD -> [ffff886000600000-ffff8860305fffff] on node 3
[ 0.000000] [ffffea0180000000-ffffea01bfffffff] PMD -> [ffff886030800000-ffff8860707fffff] on node 3
[ 0.000000] [ffffea01c0000000-ffffea01ffffffff] PMD -> [ffff888000600000-ffff8880405fffff] on node 4
[ 0.000000] [ffffea0200000000-ffffea022fffffff] PMD -> [ffff888040800000-ffff8880707fffff] on node 4
[ 0.000000] [ffffea0230000000-ffffea023fffffff] PMD -> [ffff88a000600000-ffff88a0105fffff] on node 5
[ 0.000000] [ffffea0240000000-ffffea027fffffff] PMD -> [ffff88a010800000-ffff88a0507fffff] on node 5
[ 0.000000] [ffffea0280000000-ffffea029fffffff] PMD -> [ffff88a050a00000-ffff88a0709fffff] on node 5
[ 0.000000] [ffffea02a0000000-ffffea02bfffffff] PMD -> [ffff88c000600000-ffff88c0205fffff] on node 6
[ 0.000000] [ffffea02c0000000-ffffea02ffffffff] PMD -> [ffff88c020800000-ffff88c0607fffff] on node 6
[ 0.000000] [ffffea0300000000-ffffea030fffffff] PMD -> [ffff88c060a00000-ffff88c0709fffff] on node 6
[ 0.000000] [ffffea0310000000-ffffea033fffffff] PMD -> [ffff88e000600000-ffff88e0305fffff] on node 7
[ 0.000000] [ffffea0340000000-ffffea037fffffff] PMD -> [ffff88e030800000-ffff88e0707fffff] on node 7
after patch will get
[ 0.000000] [ffffea0000000000-ffffea006fffffff] PMD -> [ffff880100200000-ffff88016e5fffff] on node 0
[ 0.000000] [ffffea0070000000-ffffea00dfffffff] PMD -> [ffff882000200000-ffff8820701fffff] on node 1
[ 0.000000] [ffffea00e0000000-ffffea014fffffff] PMD -> [ffff884000200000-ffff8840701fffff] on node 2
[ 0.000000] [ffffea0150000000-ffffea01bfffffff] PMD -> [ffff886000200000-ffff8860701fffff] on node 3
[ 0.000000] [ffffea01c0000000-ffffea022fffffff] PMD -> [ffff888000200000-ffff8880701fffff] on node 4
[ 0.000000] [ffffea0230000000-ffffea029fffffff] PMD -> [ffff88a000200000-ffff88a0701fffff] on node 5
[ 0.000000] [ffffea02a0000000-ffffea030fffffff] PMD -> [ffff88c000200000-ffff88c0701fffff] on node 6
[ 0.000000] [ffffea0310000000-ffffea037fffffff] PMD -> [ffff88e000200000-ffff88e0701fffff] on node 7
-v2: change buf to vmemmap_buf instead according to Ingo
also add CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER according to Ingo
-v3: according to Andrew, use sizeof(name) instead of hard coded 15
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <1265793639-15071-19-git-send-email-yinghai@kernel.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'mm/sparse-vmemmap.c')
-rw-r--r-- | mm/sparse-vmemmap.c | 74 |
1 files changed, 73 insertions, 1 deletions
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 9506c39..392b9bb 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -43,6 +43,8 @@ static void * __init_refok __earlyonly_bootmem_alloc(int node, return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal); } +static void *vmemmap_buf; +static void *vmemmap_buf_end; void * __meminit vmemmap_alloc_block(unsigned long size, int node) { @@ -64,6 +66,24 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node) __pa(MAX_DMA_ADDRESS)); } +/* need to make sure size is all the same during early stage */ +void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node) +{ + void *ptr; + + if (!vmemmap_buf) + return vmemmap_alloc_block(size, node); + + /* take the from buf */ + ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size); + if (ptr + size > vmemmap_buf_end) + return vmemmap_alloc_block(size, node); + + vmemmap_buf = ptr + size; + + return ptr; +} + void __meminit vmemmap_verify(pte_t *pte, int node, unsigned long start, unsigned long end) { @@ -80,7 +100,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) pte_t *pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) { pte_t entry; - void *p = vmemmap_alloc_block(PAGE_SIZE, node); + void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); if (!p) return NULL; entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); @@ -163,3 +183,55 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) return map; } + +void __init sparse_mem_maps_populate_node(struct page **map_map, + unsigned long pnum_begin, + unsigned long pnum_end, + unsigned long map_count, int nodeid) +{ + unsigned long pnum; + unsigned long size = sizeof(struct page) * PAGES_PER_SECTION; + void *vmemmap_buf_start; + + size = ALIGN(size, PMD_SIZE); + vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count, + PMD_SIZE, __pa(MAX_DMA_ADDRESS)); + + if (vmemmap_buf_start) { + vmemmap_buf = vmemmap_buf_start; + vmemmap_buf_end = vmemmap_buf_start + size * map_count; + } + + for (pnum = pnum_begin; pnum < pnum_end; pnum++) { + struct mem_section *ms; + + if (!present_section_nr(pnum)) + continue; + + map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); + if (map_map[pnum]) + continue; + ms = __nr_to_section(pnum); + printk(KERN_ERR "%s: sparsemem memory map backing failed " + "some memory will not be available.\n", __func__); + ms->section_mem_map = 0; + } + + if (vmemmap_buf_start) { + /* need to free left buf */ +#ifdef CONFIG_NO_BOOTMEM + free_early(__pa(vmemmap_buf_start), __pa(vmemmap_buf_end)); + if (vmemmap_buf_start < vmemmap_buf) { + char name[15]; + + snprintf(name, sizeof(name), "MEMMAP %d", nodeid); + reserve_early_without_check(__pa(vmemmap_buf_start), + __pa(vmemmap_buf), name); + } +#else + free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf); +#endif + vmemmap_buf = NULL; + vmemmap_buf_end = NULL; + } +} |