diff options
Diffstat (limited to 'arch')
37 files changed, 195 insertions, 82 deletions
diff --git a/arch/alpha/include/uapi/asm/errno.h b/arch/alpha/include/uapi/asm/errno.h index e5f29ca..17f92aa 100644 --- a/arch/alpha/include/uapi/asm/errno.h +++ b/arch/alpha/include/uapi/asm/errno.h @@ -43,7 +43,7 @@ #define EUSERS 68 /* Too many users */ #define EDQUOT 69 /* Quota exceeded */ -#define ESTALE 70 /* Stale NFS file handle */ +#define ESTALE 70 /* Stale file handle */ #define EREMOTE 71 /* Object is remote */ #define ENOLCK 77 /* No record locks available */ diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 084dc88..c9dfff3 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -40,7 +40,7 @@ void *module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, -1, + GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE, __builtin_return_address(0)); } #endif diff --git a/arch/arm/mach-davinci/sram.c b/arch/arm/mach-davinci/sram.c index f18928b..8540ddd 100644 --- a/arch/arm/mach-davinci/sram.c +++ b/arch/arm/mach-davinci/sram.c @@ -25,7 +25,6 @@ struct gen_pool *sram_get_gen_pool(void) void *sram_alloc(size_t len, dma_addr_t *dma) { - unsigned long vaddr; dma_addr_t dma_base = davinci_soc_info.sram_dma; if (dma) @@ -33,13 +32,7 @@ void *sram_alloc(size_t len, dma_addr_t *dma) if (!sram_pool || (dma && !dma_base)) return NULL; - vaddr = gen_pool_alloc(sram_pool, len); - if (!vaddr) - return NULL; - - if (dma) - *dma = gen_pool_virt_to_phys(sram_pool, vaddr); - return (void *)vaddr; + return gen_pool_dma_alloc(sram_pool, len, dma); } EXPORT_SYMBOL(sram_alloc); diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 2c28a6c..e2ad0d8 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -29,7 +29,7 @@ void *module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, -1, + GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/cris/include/asm/io.h b/arch/cris/include/asm/io.h index 5d3047e..4353cf2 100644 --- a/arch/cris/include/asm/io.h +++ b/arch/cris/include/asm/io.h @@ -3,6 +3,7 @@ #include <asm/page.h> /* for __va, __pa */ #include <arch/io.h> +#include <asm-generic/iomap.h> #include <linux/kernel.h> struct cris_io_operations diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index e0a899a..5a84b3a 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -319,7 +319,7 @@ struct thread_struct { regs->loadrs = 0; \ regs->r8 = get_dumpable(current->mm); /* set "don't zap registers" flag */ \ regs->r12 = new_sp - 16; /* allocate 16 byte scratch area */ \ - if (unlikely(!get_dumpable(current->mm))) { \ + if (unlikely(get_dumpable(current->mm) != SUID_DUMP_USER)) { \ /* \ * Zap scratch regs to avoid leaking bits between processes with different \ * uid/privileges. \ diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index b6f7f43..88504ab 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -357,9 +357,7 @@ int vmemmap_find_next_valid_pfn(int node, int i) end_address = (unsigned long) &vmem_map[pgdat->node_start_pfn + i]; end_address = PAGE_ALIGN(end_address); - - stop_address = (unsigned long) &vmem_map[ - pgdat->node_start_pfn + pgdat->node_spanned_pages]; + stop_address = (unsigned long) &vmem_map[pgdat_end_pfn(pgdat)]; do { pgd_t *pgd; diff --git a/arch/metag/kernel/dma.c b/arch/metag/kernel/dma.c index 8c00ded..db589ad 100644 --- a/arch/metag/kernel/dma.c +++ b/arch/metag/kernel/dma.c @@ -305,9 +305,7 @@ void dma_free_coherent(struct device *dev, size_t size, if (pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); - ClearPageReserved(page); - - __free_page(page); + __free_reserved_page(page); continue; } } diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c index 249fff6..3cd6288f 100644 --- a/arch/metag/mm/init.c +++ b/arch/metag/mm/init.c @@ -148,7 +148,7 @@ static void __init bootmem_init_one_node(unsigned int nid) if (!p->node_spanned_pages) return; - end_pfn = p->node_start_pfn + p->node_spanned_pages; + end_pfn = pgdat_end_pfn(p); #ifdef CONFIG_HIGHMEM if (end_pfn > max_low_pfn) end_pfn = max_low_pfn; diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c index 5226b09..dbbf224 100644 --- a/arch/microblaze/mm/consistent.c +++ b/arch/microblaze/mm/consistent.c @@ -176,8 +176,7 @@ void consistent_free(size_t size, void *vaddr) page = virt_to_page(vaddr); do { - ClearPageReserved(page); - __free_page(page); + __free_reserved_page(page); page++; } while (size -= PAGE_SIZE); #else @@ -194,9 +193,7 @@ void consistent_free(size_t size, void *vaddr) pte_clear(&init_mm, (unsigned int)vaddr, ptep); if (pfn_valid(pfn)) { page = pfn_to_page(pfn); - - ClearPageReserved(page); - __free_page(page); + __free_reserved_page(page); } } vaddr += PAGE_SIZE; diff --git a/arch/mips/include/uapi/asm/errno.h b/arch/mips/include/uapi/asm/errno.h index 31575e2f..02d645d 100644 --- a/arch/mips/include/uapi/asm/errno.h +++ b/arch/mips/include/uapi/asm/errno.h @@ -102,7 +102,7 @@ #define EWOULDBLOCK EAGAIN /* Operation would block */ #define EALREADY 149 /* Operation already in progress */ #define EINPROGRESS 150 /* Operation now in progress */ -#define ESTALE 151 /* Stale NFS file handle */ +#define ESTALE 151 /* Stale file handle */ #define ECANCELED 158 /* AIO operation canceled */ /* diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h index 135ad60..f3a8aa5 100644 --- a/arch/parisc/include/uapi/asm/errno.h +++ b/arch/parisc/include/uapi/asm/errno.h @@ -37,7 +37,7 @@ #define EBADMSG 67 /* Not a data message */ #define EUSERS 68 /* Too many users */ #define EDQUOT 69 /* Quota exceeded */ -#define ESTALE 70 /* Stale NFS file handle */ +#define ESTALE 70 /* Stale file handle */ #define EREMOTE 71 /* Object is remote */ #define EOVERFLOW 72 /* Value too large for defined data type */ diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 2a625fb..50dfafc 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -219,7 +219,7 @@ void *module_alloc(unsigned long size) * init_data correctly */ return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, GFP_KERNEL | __GFP_HIGHMEM, - PAGE_KERNEL_RWX, -1, + PAGE_KERNEL_RWX, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index 6747eec..7b6c107 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -287,9 +287,7 @@ void __dma_free_coherent(size_t size, void *vaddr) pte_clear(&init_mm, addr, ptep); if (pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); - - ClearPageReserved(page); - __free_page(page); + __free_reserved_page(page); } } addr += PAGE_SIZE; diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index d67db4b..90bb6d9 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -633,8 +633,6 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, /* * This function frees user-level page tables of a process. - * - * Must be called with pagetable lock held. */ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 33d6784..078d3e0 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -938,8 +938,7 @@ static void __init mark_reserved_regions_for_nid(int nid) unsigned long start_pfn = physbase >> PAGE_SHIFT; unsigned long end_pfn = PFN_UP(physbase + size); struct node_active_region node_ar; - unsigned long node_end_pfn = node->node_start_pfn + - node->node_spanned_pages; + unsigned long node_end_pfn = pgdat_end_pfn(node); /* * Check to make sure that this memblock.reserved area is diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 7845e15..b89b591 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -50,7 +50,7 @@ void *module_alloc(unsigned long size) if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL, -1, + GFP_KERNEL, PAGE_KERNEL, NUMA_NO_NODE, __builtin_return_address(0)); } #endif diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 6bcb045..9b436c2 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -64,6 +64,11 @@ static unsigned long mmap_rnd(void) return (get_random_int() & 0x7ffUL) << PAGE_SHIFT; } +static unsigned long mmap_base_legacy(void) +{ + return TASK_UNMAPPED_BASE + mmap_rnd(); +} + static inline unsigned long mmap_base(void) { unsigned long gap = rlimit(RLIMIT_STACK); @@ -89,7 +94,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) * bit is set, or if the expected stack growth is unlimited: */ if (mmap_is_legacy()) { - mm->mmap_base = TASK_UNMAPPED_BASE; + mm->mmap_base = mmap_base_legacy(); mm->get_unmapped_area = arch_get_unmapped_area; } else { mm->mmap_base = mmap_base(); @@ -164,7 +169,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) * bit is set, or if the expected stack growth is unlimited: */ if (mmap_is_legacy()) { - mm->mmap_base = TASK_UNMAPPED_BASE; + mm->mmap_base = mmap_base_legacy(); mm->get_unmapped_area = s390_get_unmapped_area; } else { mm->mmap_base = mmap_base(); diff --git a/arch/sh/include/asm/fpu.h b/arch/sh/include/asm/fpu.h index 06c4281..09fc2bc 100644 --- a/arch/sh/include/asm/fpu.h +++ b/arch/sh/include/asm/fpu.h @@ -46,7 +46,7 @@ static inline void __unlazy_fpu(struct task_struct *tsk, struct pt_regs *regs) save_fpu(tsk); release_fpu(regs); } else - tsk->fpu_counter = 0; + tsk->thread.fpu_counter = 0; } static inline void unlazy_fpu(struct task_struct *tsk, struct pt_regs *regs) diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h index e699a12..18e0377 100644 --- a/arch/sh/include/asm/processor_32.h +++ b/arch/sh/include/asm/processor_32.h @@ -111,6 +111,16 @@ struct thread_struct { /* Extended processor state */ union thread_xstate *xstate; + + /* + * fpu_counter contains the number of consecutive context switches + * that the FPU is used. If this is over a threshold, the lazy fpu + * saving becomes unlazy to save the trap. This is an unsigned char + * so that after 256 times the counter wraps and the behavior turns + * lazy again; this to deal with bursty apps that only use FPU for + * a short time + */ + unsigned char fpu_counter; }; #define INIT_THREAD { \ diff --git a/arch/sh/include/asm/processor_64.h b/arch/sh/include/asm/processor_64.h index 1cc7d31..eedd4f6 100644 --- a/arch/sh/include/asm/processor_64.h +++ b/arch/sh/include/asm/processor_64.h @@ -126,6 +126,16 @@ struct thread_struct { /* floating point info */ union thread_xstate *xstate; + + /* + * fpu_counter contains the number of consecutive context switches + * that the FPU is used. If this is over a threshold, the lazy fpu + * saving becomes unlazy to save the trap. This is an unsigned char + * so that after 256 times the counter wraps and the behavior turns + * lazy again; this to deal with bursty apps that only use FPU for + * a short time + */ + unsigned char fpu_counter; }; #define INIT_MMAP \ diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c index f8f7af5..4e33224 100644 --- a/arch/sh/kernel/cpu/fpu.c +++ b/arch/sh/kernel/cpu/fpu.c @@ -44,7 +44,7 @@ void __fpu_state_restore(void) restore_fpu(tsk); task_thread_info(tsk)->status |= TS_USEDFPU; - tsk->fpu_counter++; + tsk->thread.fpu_counter++; } void fpu_state_restore(struct pt_regs *regs) diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index ebd3933..2885fc9 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -156,7 +156,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, #endif ti->addr_limit = KERNEL_DS; ti->status &= ~TS_USEDFPU; - p->fpu_counter = 0; + p->thread.fpu_counter = 0; return 0; } *childregs = *current_pt_regs(); @@ -189,7 +189,7 @@ __switch_to(struct task_struct *prev, struct task_struct *next) unlazy_fpu(prev, task_pt_regs(prev)); /* we're going to use this soon, after a few expensive things */ - if (next->fpu_counter > 5) + if (next->thread.fpu_counter > 5) prefetch(next_t->xstate); #ifdef CONFIG_MMU @@ -207,7 +207,7 @@ __switch_to(struct task_struct *prev, struct task_struct *next) * restore of the math state immediately to avoid the trap; the * chances of needing FPU soon are obviously high now */ - if (next->fpu_counter > 5) + if (next->thread.fpu_counter > 5) __fpu_state_restore(); return prev; diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index 174d124b..e2062e6 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -374,7 +374,7 @@ asmlinkage void ret_from_kernel_thread(void); int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, struct task_struct *p) { - struct pt_regs *childregs, *regs = current_pt_regs(); + struct pt_regs *childregs; #ifdef CONFIG_SH_FPU /* can't happen for a kernel thread */ @@ -393,7 +393,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, if (unlikely(p->flags & PF_KTHREAD)) { memset(childregs, 0, sizeof(struct pt_regs)); childregs->regs[2] = (unsigned long)arg; - childregs->regs[3] = (unsigned long)fn; + childregs->regs[3] = (unsigned long)usp; childregs->sr = (1 << 30); /* not user_mode */ childregs->sr |= SR_FD; /* Invalidate FPU flag */ p->thread.pc = (unsigned long) ret_from_kernel_thread; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 33890fd..2d089fe 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -231,7 +231,7 @@ static void __init bootmem_init_one_node(unsigned int nid) if (!p->node_spanned_pages) return; - end_pfn = p->node_start_pfn + p->node_spanned_pages; + end_pfn = pgdat_end_pfn(p); total_pages = bootmem_bootmap_pages(p->node_spanned_pages); diff --git a/arch/sparc/include/uapi/asm/errno.h b/arch/sparc/include/uapi/asm/errno.h index c351aba..20423e17 100644 --- a/arch/sparc/include/uapi/asm/errno.h +++ b/arch/sparc/include/uapi/asm/errno.h @@ -40,7 +40,7 @@ #define EPROCLIM 67 /* SUNOS: Too many processes */ #define EUSERS 68 /* Too many users */ #define EDQUOT 69 /* Quota exceeded */ -#define ESTALE 70 /* Stale NFS file handle */ +#define ESTALE 70 /* Stale file handle */ #define EREMOTE 71 /* Object is remote */ #define ENOSTR 72 /* Device not a stream */ #define ETIME 73 /* Timer expired */ diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index 4435488..97655e0 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -29,7 +29,7 @@ static void *module_map(unsigned long size) if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL, -1, + GFP_KERNEL, PAGE_KERNEL, NUMA_NO_NODE, __builtin_return_address(0)); } #else diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 4d0bda7..c49a613 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -365,7 +365,7 @@ static inline void drop_fpu(struct task_struct *tsk) * Forget coprocessor state.. */ preempt_disable(); - tsk->fpu_counter = 0; + tsk->thread.fpu_counter = 0; __drop_fpu(tsk); clear_used_math(); preempt_enable(); @@ -424,7 +424,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta * or if the past 5 consecutive context-switches used math. */ fpu.preload = tsk_used_math(new) && (use_eager_fpu() || - new->fpu_counter > 5); + new->thread.fpu_counter > 5); if (__thread_has_fpu(old)) { if (!__save_init_fpu(old)) cpu = ~0; @@ -433,16 +433,16 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta /* Don't change CR0.TS if we just switch! */ if (fpu.preload) { - new->fpu_counter++; + new->thread.fpu_counter++; __thread_set_has_fpu(new); prefetch(new->thread.fpu.state); } else if (!use_eager_fpu()) stts(); } else { - old->fpu_counter = 0; + old->thread.fpu_counter = 0; old->thread.fpu.last_cpu = ~0; if (fpu.preload) { - new->fpu_counter++; + new->thread.fpu_counter++; if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) fpu.preload = 0; else diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 987c75e..7b034a4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -488,6 +488,15 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; + /* + * fpu_counter contains the number of consecutive context switches + * that the FPU is used. If this is over a threshold, the lazy fpu + * saving becomes unlazy to save the trap. This is an unsigned char + * so that after 256 times the counter wraps and the behavior turns + * lazy again; this to deal with bursty apps that only use FPU for + * a short time + */ + unsigned char fpu_counter; }; /* diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 5d576ab..e8368c6 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -100,7 +100,7 @@ void unlazy_fpu(struct task_struct *tsk) __save_init_fpu(tsk); __thread_fpu_end(tsk); } else - tsk->fpu_counter = 0; + tsk->thread.fpu_counter = 0; preempt_enable(); } EXPORT_SYMBOL(unlazy_fpu); diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 216a4d7..18be1893 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -49,7 +49,7 @@ void *module_alloc(unsigned long size) return NULL; return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, - -1, __builtin_return_address(0)); + NUMA_NO_NODE, __builtin_return_address(0)); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index c2ec1aa..6f1236c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -153,7 +153,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, childregs->orig_ax = -1; childregs->cs = __KERNEL_CS | get_kernel_rpl(); childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; - p->fpu_counter = 0; + p->thread.fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); return 0; @@ -166,7 +166,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.ip = (unsigned long) ret_from_fork; task_user_gs(p) = get_user_gs(current_pt_regs()); - p->fpu_counter = 0; + p->thread.fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; tsk = current; err = -ENOMEM; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 45ab4d6..10fe4c1 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -163,7 +163,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.sp = (unsigned long) childregs; p->thread.usersp = me->thread.usersp; set_tsk_thread_flag(p, TIF_FORK); - p->fpu_counter = 0; + p->thread.fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 918d489..cb233bc 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1121,8 +1121,6 @@ void __init setup_arch(char **cmdline_p) acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start); #endif - reserve_crashkernel(); - vsmp_init(); io_delay_init(); @@ -1135,6 +1133,13 @@ void __init setup_arch(char **cmdline_p) early_acpi_boot_init(); initmem_init(); + + /* + * Reserve memory for crash kernel after SRAT is parsed so that it + * won't consume hotpluggable memory. + */ + reserve_crashkernel(); + memblock_find_dma_reserve(); #ifdef CONFIG_KVM_GUEST diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 729aa77..996ce23 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -653,7 +653,7 @@ void math_state_restore(void) return; } - tsk->fpu_counter++; + tsk->thread.fpu_counter++; } EXPORT_SYMBOL_GPL(math_state_restore); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ce32017..f971306 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -53,12 +53,12 @@ __ref void *alloc_low_pages(unsigned int num) if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) { unsigned long ret; if (min_pfn_mapped >= max_pfn_mapped) - panic("alloc_low_page: ran out of memory"); + panic("alloc_low_pages: ran out of memory"); ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT, max_pfn_mapped << PAGE_SHIFT, PAGE_SIZE * num , PAGE_SIZE); if (!ret) - panic("alloc_low_page: can not alloc memory"); + panic("alloc_low_pages: can not alloc memory"); memblock_reserve(ret, PAGE_SIZE * num); pfn = ret >> PAGE_SHIFT; } else { @@ -418,27 +418,27 @@ static unsigned long __init get_new_step_size(unsigned long step_size) return step_size << 5; } -void __init init_mem_mapping(void) +/** + * memory_map_top_down - Map [map_start, map_end) top down + * @map_start: start address of the target memory range + * @map_end: end address of the target memory range + * + * This function will setup direct mapping for memory range + * [map_start, map_end) in top-down. That said, the page tables + * will be allocated at the end of the memory, and we map the + * memory in top-down. + */ +static void __init memory_map_top_down(unsigned long map_start, + unsigned long map_end) { - unsigned long end, real_end, start, last_start; + unsigned long real_end, start, last_start; unsigned long step_size; unsigned long addr; unsigned long mapped_ram_size = 0; unsigned long new_mapped_ram_size; - probe_page_size_mask(); - -#ifdef CONFIG_X86_64 - end = max_pfn << PAGE_SHIFT; -#else - end = max_low_pfn << PAGE_SHIFT; -#endif - - /* the ISA range is always mapped regardless of memory holes */ - init_memory_mapping(0, ISA_END_ADDRESS); - /* xen has big range in reserved near end of ram, skip it at first.*/ - addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, PMD_SIZE); + addr = memblock_find_in_range(map_start, map_end, PMD_SIZE, PMD_SIZE); real_end = addr + PMD_SIZE; /* step_size need to be small so pgt_buf from BRK could cover it */ @@ -453,13 +453,13 @@ void __init init_mem_mapping(void) * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages * for page table. */ - while (last_start > ISA_END_ADDRESS) { + while (last_start > map_start) { if (last_start > step_size) { start = round_down(last_start - 1, step_size); - if (start < ISA_END_ADDRESS) - start = ISA_END_ADDRESS; + if (start < map_start) + start = map_start; } else - start = ISA_END_ADDRESS; + start = map_start; new_mapped_ram_size = init_range_memory_mapping(start, last_start); last_start = start; @@ -470,8 +470,89 @@ void __init init_mem_mapping(void) mapped_ram_size += new_mapped_ram_size; } - if (real_end < end) - init_range_memory_mapping(real_end, end); + if (real_end < map_end) + init_range_memory_mapping(real_end, map_end); +} + +/** + * memory_map_bottom_up - Map [map_start, map_end) bottom up + * @map_start: start address of the target memory range + * @map_end: end address of the target memory range + * + * This function will setup direct mapping for memory range + * [map_start, map_end) in bottom-up. Since we have limited the + * bottom-up allocation above the kernel, the page tables will + * be allocated just above the kernel and we map the memory + * in [map_start, map_end) in bottom-up. + */ +static void __init memory_map_bottom_up(unsigned long map_start, + unsigned long map_end) +{ + unsigned long next, new_mapped_ram_size, start; + unsigned long mapped_ram_size = 0; + /* step_size need to be small so pgt_buf from BRK could cover it */ + unsigned long step_size = PMD_SIZE; + + start = map_start; + min_pfn_mapped = start >> PAGE_SHIFT; + + /* + * We start from the bottom (@map_start) and go to the top (@map_end). + * The memblock_find_in_range() gets us a block of RAM from the + * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages + * for page table. + */ + while (start < map_end) { + if (map_end - start > step_size) { + next = round_up(start + 1, step_size); + if (next > map_end) + next = map_end; + } else + next = map_end; + + new_mapped_ram_size = init_range_memory_mapping(start, next); + start = next; + + if (new_mapped_ram_size > mapped_ram_size) + step_size = get_new_step_size(step_size); + mapped_ram_size += new_mapped_ram_size; + } +} + +void __init init_mem_mapping(void) +{ + unsigned long end; + + probe_page_size_mask(); + +#ifdef CONFIG_X86_64 + end = max_pfn << PAGE_SHIFT; +#else + end = max_low_pfn << PAGE_SHIFT; +#endif + + /* the ISA range is always mapped regardless of memory holes */ + init_memory_mapping(0, ISA_END_ADDRESS); + + /* + * If the allocation is in bottom-up direction, we setup direct mapping + * in bottom-up, otherwise we setup direct mapping in top-down. + */ + if (memblock_bottom_up()) { + unsigned long kernel_end = __pa_symbol(_end); + + /* + * we need two separate calls here. This is because we want to + * allocate page tables above the kernel. So we first map + * [kernel_end, end) to make memory above the kernel be mapped + * as soon as possible. And then use page tables allocated above + * the kernel to map [ISA_END_ADDRESS, kernel_end). + */ + memory_map_bottom_up(kernel_end, end); + memory_map_bottom_up(ISA_END_ADDRESS, kernel_end); + } else { + memory_map_top_down(ISA_END_ADDRESS, end); + } #ifdef CONFIG_X86_64 if (max_pfn > max_low_pfn) { diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 8bf93ba..24aec58 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -567,6 +567,17 @@ static int __init numa_init(int (*init_func)(void)) ret = init_func(); if (ret < 0) return ret; + + /* + * We reset memblock back to the top-down direction + * here because if we configured ACPI_NUMA, we have + * parsed SRAT in init_func(). It is ok to have the + * reset here even if we did't configure ACPI_NUMA + * or acpi numa init fails and fallbacks to dummy + * numa init. + */ + memblock_set_bottom_up(false); + ret = numa_cleanup_meminfo(&numa_meminfo); if (ret < 0) return ret; |