diff options
author | Yinghai Lu <yinghai@kernel.org> | 2010-08-25 13:39:17 -0700 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2010-08-27 11:12:29 -0700 |
commit | 72d7c3b33c980843e756681fb4867dc1efd62a76 (patch) | |
tree | 9607345d9fa055dd501aacf0772258fb72897035 /arch/x86/kernel | |
parent | 301ff3e88ef9ff4bdb92f36a3e6170fce4c9dd34 (diff) | |
download | op-kernel-dev-72d7c3b33c980843e756681fb4867dc1efd62a76.zip op-kernel-dev-72d7c3b33c980843e756681fb4867dc1efd62a76.tar.gz |
x86: Use memblock to replace early_res
1. replace find_e820_area with memblock_find_in_range
2. replace reserve_early with memblock_x86_reserve_range
3. replace free_early with memblock_x86_free_range.
4. NO_BOOTMEM will switch to use memblock too.
5. use _e820, _early wrap in the patch, in following patch, will
replace them all
6. because memblock_x86_free_range support partial free, we can remove some special care
7. Need to make sure that memblock_find_in_range() is called after memblock_x86_fill()
so adjust some calling later in setup.c::setup_arch()
-- corruption_check and mptable_update
-v2: Move reserve_brk() early
Before fill_memblock_area, to avoid overlap between brk and memblock_find_in_range()
that could happen We have more then 128 RAM entry in E820 tables, and
memblock_x86_fill() could use memblock_find_in_range() to find a new place for
memblock.memory.region array.
and We don't need to use extend_brk() after fill_memblock_area()
So move reserve_brk() early before fill_memblock_area().
-v3: Move find_smp_config early
To make sure memblock_find_in_range not find wrong place, if BIOS doesn't put mptable
in right place.
-v4: Treat RESERVED_KERN as RAM in memblock.memory. and they are already in
memblock.reserved already..
use __NOT_KEEP_MEMBLOCK to make sure memblock related code could be freed later.
-v5: Generic version __memblock_find_in_range() is going from high to low, and for 32bit
active_region for 32bit does include high pages
need to replace the limit with memblock.default_alloc_limit, aka get_max_mapped()
-v6: Use current_limit instead
-v7: check with MEMBLOCK_ERROR instead of -1ULL or -1L
-v8: Set memblock_can_resize early to handle EFI with more RAM entries
-v9: update after kmemleak changes in mainline
Suggested-by: David S. Miller <davem@davemloft.net>
Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/check.c | 16 | ||||
-rw-r--r-- | arch/x86/kernel/e820.c | 159 | ||||
-rw-r--r-- | arch/x86/kernel/head.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/head32.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/head64.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/mpparse.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 46 | ||||
-rw-r--r-- | arch/x86/kernel/setup_percpu.c | 6 |
8 files changed, 107 insertions, 137 deletions
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index fc999e6..13a3891 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c @@ -2,7 +2,8 @@ #include <linux/sched.h> #include <linux/kthread.h> #include <linux/workqueue.h> -#include <asm/e820.h> +#include <linux/memblock.h> + #include <asm/proto.h> /* @@ -18,10 +19,12 @@ static int __read_mostly memory_corruption_check = -1; static unsigned __read_mostly corruption_check_size = 64*1024; static unsigned __read_mostly corruption_check_period = 60; /* seconds */ -static struct e820entry scan_areas[MAX_SCAN_AREAS]; +static struct scan_area { + u64 addr; + u64 size; +} scan_areas[MAX_SCAN_AREAS]; static int num_scan_areas; - static __init int set_corruption_check(char *arg) { char *end; @@ -81,9 +84,9 @@ void __init setup_bios_corruption_check(void) while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { u64 size; - addr = find_e820_area_size(addr, &size, PAGE_SIZE); + addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE); - if (!(addr + 1)) + if (addr == MEMBLOCK_ERROR) break; if (addr >= corruption_check_size) @@ -92,7 +95,7 @@ void __init setup_bios_corruption_check(void) if ((addr + size) > corruption_check_size) size = corruption_check_size - addr; - e820_update_range(addr, size, E820_RAM, E820_RESERVED); + memblock_x86_reserve_range(addr, addr + size, "SCAN RAM"); scan_areas[num_scan_areas].addr = addr; scan_areas[num_scan_areas].size = size; num_scan_areas++; @@ -105,7 +108,6 @@ void __init setup_bios_corruption_check(void) printk(KERN_INFO "Scanning %d areas for low memory corruption\n", num_scan_areas); - update_e820(); } diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 0d6fc71..a9221d1 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -15,6 +15,7 @@ #include <linux/pfn.h> #include <linux/suspend.h> #include <linux/firmware-map.h> +#include <linux/memblock.h> #include <asm/e820.h> #include <asm/proto.h> @@ -742,69 +743,29 @@ core_initcall(e820_mark_nvs_memory); */ u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) { - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 addr; - u64 ei_start, ei_last; + u64 mem = memblock_find_in_range(start, end, size, align); - if (ei->type != E820_RAM) - continue; - - ei_last = ei->addr + ei->size; - ei_start = ei->addr; - addr = find_early_area(ei_start, ei_last, start, end, - size, align); - - if (addr != -1ULL) - return addr; - } - return -1ULL; -} + if (mem == MEMBLOCK_ERROR) + return -1ULL; -u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align) -{ - return find_e820_area(start, end, size, align); + return mem; } -u64 __init get_max_mapped(void) -{ - u64 end = max_pfn_mapped; - - end <<= PAGE_SHIFT; - - return end; -} /* * Find next free range after *start */ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) { - int i; + u64 mem = memblock_x86_find_in_range_size(start, sizep, align); - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 addr; - u64 ei_start, ei_last; - - if (ei->type != E820_RAM) - continue; - - ei_last = ei->addr + ei->size; - ei_start = ei->addr; - addr = find_early_area_size(ei_start, ei_last, start, - sizep, align); + if (mem == MEMBLOCK_ERROR) + return -1ULL - if (addr != -1ULL) - return addr; - } - - return -1ULL; + return mem; } /* - * pre allocated 4k and reserved it in e820 + * pre allocated 4k and reserved it in memblock and e820_saved */ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) { @@ -813,8 +774,8 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) u64 start; for (start = startt; ; start += size) { - start = find_e820_area_size(start, &size, align); - if (!(start + 1)) + start = memblock_x86_find_in_range_size(start, &size, align); + if (start == MEMBLOCK_ERROR) return 0; if (size >= sizet) break; @@ -830,10 +791,9 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) addr = round_down(start + size - sizet, align); if (addr < start) return 0; - e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); + memblock_x86_reserve_range(addr, addr + sizet, "new next"); e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); - printk(KERN_INFO "update e820 for early_reserve_e820\n"); - update_e820(); + printk(KERN_INFO "update e820_saved for early_reserve_e820\n"); update_e820_saved(); return addr; @@ -895,52 +855,12 @@ unsigned long __init e820_end_of_low_ram_pfn(void) { return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); } -/* - * Finds an active region in the address range from start_pfn to last_pfn and - * returns its range in ei_startpfn and ei_endpfn for the e820 entry. - */ -int __init e820_find_active_region(const struct e820entry *ei, - unsigned long start_pfn, - unsigned long last_pfn, - unsigned long *ei_startpfn, - unsigned long *ei_endpfn) -{ - u64 align = PAGE_SIZE; - - *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; - *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; - - /* Skip map entries smaller than a page */ - if (*ei_startpfn >= *ei_endpfn) - return 0; - - /* Skip if map is outside the node */ - if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || - *ei_startpfn >= last_pfn) - return 0; - - /* Check for overlaps */ - if (*ei_startpfn < start_pfn) - *ei_startpfn = start_pfn; - if (*ei_endpfn > last_pfn) - *ei_endpfn = last_pfn; - - return 1; -} /* Walk the e820 map and register active regions within a node */ void __init e820_register_active_regions(int nid, unsigned long start_pfn, unsigned long last_pfn) { - unsigned long ei_startpfn; - unsigned long ei_endpfn; - int i; - - for (i = 0; i < e820.nr_map; i++) - if (e820_find_active_region(&e820.map[i], - start_pfn, last_pfn, - &ei_startpfn, &ei_endpfn)) - add_active_range(nid, ei_startpfn, ei_endpfn); + memblock_x86_register_active_regions(nid, start_pfn, last_pfn); } /* @@ -950,18 +870,16 @@ void __init e820_register_active_regions(int nid, unsigned long start_pfn, */ u64 __init e820_hole_size(u64 start, u64 end) { - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long last_pfn = end >> PAGE_SHIFT; - unsigned long ei_startpfn, ei_endpfn, ram = 0; - int i; + return memblock_x86_hole_size(start, end); +} - for (i = 0; i < e820.nr_map; i++) { - if (e820_find_active_region(&e820.map[i], - start_pfn, last_pfn, - &ei_startpfn, &ei_endpfn)) - ram += ei_endpfn - ei_startpfn; - } - return end - start - ((u64)ram << PAGE_SHIFT); +void reserve_early(u64 start, u64 end, char *name) +{ + memblock_x86_reserve_range(start, end, name); +} +void free_early(u64 start, u64 end) +{ + memblock_x86_free_range(start, end); } static void early_panic(char *msg) @@ -1210,3 +1128,32 @@ void __init setup_memory_map(void) printk(KERN_INFO "BIOS-provided physical RAM map:\n"); e820_print_map(who); } + +void __init memblock_x86_fill(void) +{ + int i; + u64 end; + + /* + * EFI may have more than 128 entries + * We are safe to enable resizing, beause memblock_x86_fill() + * is rather later for x86 + */ + memblock_can_resize = 1; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + end = ei->addr + ei->size; + if (end != (resource_size_t)end) + continue; + + if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) + continue; + + memblock_add(ei->addr, ei->size); + } + + memblock_analyze(); + memblock_dump_all(); +} diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index 3e66bd3..af0699b 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c @@ -1,5 +1,6 @@ #include <linux/kernel.h> #include <linux/init.h> +#include <linux/memblock.h> #include <asm/setup.h> #include <asm/bios_ebda.h> @@ -51,5 +52,5 @@ void __init reserve_ebda_region(void) lowmem = 0x9f000; /* reserve all memory between lowmem and the 1MB mark */ - reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved"); + memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved"); } diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index b2e2460..da60aa8 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -8,6 +8,7 @@ #include <linux/init.h> #include <linux/start_kernel.h> #include <linux/mm.h> +#include <linux/memblock.h> #include <asm/setup.h> #include <asm/sections.h> @@ -30,14 +31,15 @@ static void __init i386_default_early_setup(void) void __init i386_start_kernel(void) { + memblock_init(); + #ifdef CONFIG_X86_TRAMPOLINE /* * But first pinch a few for the stack/trampoline stuff * FIXME: Don't need the extra page at 4K, but need to fix * trampoline before removing it. (see the GDT stuff) */ - reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, - "EX TRAMPOLINE"); + memblock_x86_reserve_range(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); #endif reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 7147143..8ee930f 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -12,6 +12,7 @@ #include <linux/percpu.h> #include <linux/start_kernel.h> #include <linux/io.h> +#include <linux/memblock.h> #include <asm/processor.h> #include <asm/proto.h> @@ -98,6 +99,8 @@ void __init x86_64_start_reservations(char *real_mode_data) { copy_bootdata(__va(real_mode_data)); + memblock_init(); + reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index d86dbf7..8252545 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -11,6 +11,7 @@ #include <linux/init.h> #include <linux/delay.h> #include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/kernel_stat.h> #include <linux/mc146818rtc.h> #include <linux/bitops.h> @@ -641,7 +642,7 @@ static void __init smp_reserve_memory(struct mpf_intel *mpf) { unsigned long size = get_mpc_size(mpf->physptr); - reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc"); + memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc"); } static int __init smp_scan_config(unsigned long base, unsigned long length) @@ -670,7 +671,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) mpf, (u64)virt_to_phys(mpf)); mem = virt_to_phys(mpf); - reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf"); + memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf"); if (mpf->physptr) smp_reserve_memory(mpf); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b4ae4ac..bbe0aaf 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -31,6 +31,7 @@ #include <linux/apm_bios.h> #include <linux/initrd.h> #include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/seq_file.h> #include <linux/console.h> #include <linux/mca.h> @@ -614,7 +615,7 @@ static __init void reserve_ibft_region(void) addr = find_ibft_region(&size); if (size) - reserve_early_overlap_ok(addr, addr + size, "ibft"); + memblock_x86_reserve_range(addr, addr + size, "* ibft"); } #ifdef CONFIG_X86_RESERVE_LOW_64K @@ -708,6 +709,15 @@ static void __init trim_bios_range(void) sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); } +static u64 __init get_max_mapped(void) +{ + u64 end = max_pfn_mapped; + + end <<= PAGE_SHIFT; + + return end; +} + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -891,8 +901,6 @@ void __init setup_arch(char **cmdline_p) */ max_pfn = e820_end_of_ram_pfn(); - /* preallocate 4k for mptable mpc */ - early_reserve_e820_mpc_new(); /* update e820 for memory not covered by WB MTRRs */ mtrr_bp_init(); if (mtrr_trim_uncached_memory(max_pfn)) @@ -917,15 +925,6 @@ void __init setup_arch(char **cmdline_p) max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; #endif -#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION - setup_bios_corruption_check(); -#endif - - printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", - max_pfn_mapped<<PAGE_SHIFT); - - reserve_brk(); - /* * Find and reserve possible boot-time SMP configuration: */ @@ -933,6 +932,26 @@ void __init setup_arch(char **cmdline_p) reserve_ibft_region(); + /* + * Need to conclude brk, before memblock_x86_fill() + * it could use memblock_find_in_range, could overlap with + * brk area. + */ + reserve_brk(); + + memblock.current_limit = get_max_mapped(); + memblock_x86_fill(); + + /* preallocate 4k for mptable mpc */ + early_reserve_e820_mpc_new(); + +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION + setup_bios_corruption_check(); +#endif + + printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", + max_pfn_mapped<<PAGE_SHIFT); + reserve_trampoline_memory(); #ifdef CONFIG_ACPI_SLEEP @@ -956,6 +975,7 @@ void __init setup_arch(char **cmdline_p) max_low_pfn = max_pfn; } #endif + memblock.current_limit = get_max_mapped(); /* * NOTE: On x86-32, only from this point on, fixmaps are ready for use. @@ -995,7 +1015,7 @@ void __init setup_arch(char **cmdline_p) initmem_init(0, max_pfn, acpi, k8); #ifndef CONFIG_NO_BOOTMEM - early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); + memblock_x86_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); #endif dma32_reserve_bootmem(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index a60df9a..42e2633 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -131,13 +131,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) static void __init pcpu_fc_free(void *ptr, size_t size) { -#ifdef CONFIG_NO_BOOTMEM - u64 start = __pa(ptr); - u64 end = start + size; - free_early_partial(start, end); -#else free_bootmem(__pa(ptr), size); -#endif } static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) |