diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-15 09:39:44 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-15 09:39:44 -0700 |
commit | ada3fa15057205b7d3f727bba5cd26b5912e350f (patch) | |
tree | 60962fc9e4021b92f484d1a58e72cd3906d4f3db /arch | |
parent | 2f82af08fcc7dc01a7e98a49a5995a77e32a2925 (diff) | |
parent | 5579fd7e6aed8860ea0c8e3f11897493153b10ad (diff) | |
download | op-kernel-dev-ada3fa15057205b7d3f727bba5cd26b5912e350f.zip op-kernel-dev-ada3fa15057205b7d3f727bba5cd26b5912e350f.tar.gz |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: (46 commits)
powerpc64: convert to dynamic percpu allocator
sparc64: use embedding percpu first chunk allocator
percpu: kill lpage first chunk allocator
x86,percpu: use embedding for 64bit NUMA and page for 32bit NUMA
percpu: update embedding first chunk allocator to handle sparse units
percpu: use group information to allocate vmap areas sparsely
vmalloc: implement pcpu_get_vm_areas()
vmalloc: separate out insert_vmalloc_vm()
percpu: add chunk->base_addr
percpu: add pcpu_unit_offsets[]
percpu: introduce pcpu_alloc_info and pcpu_group_info
percpu: move pcpu_lpage_build_unit_map() and pcpul_lpage_dump_cfg() upward
percpu: add @align to pcpu_fc_alloc_fn_t
percpu: make @dyn_size mandatory for pcpu_setup_first_chunk()
percpu: drop @static_size from first chunk allocators
percpu: generalize first chunk allocator selection
percpu: build first chunk allocators selectively
percpu: rename 4k first chunk allocator to page
percpu: improve boot messages
percpu: fix pcpu_reclaim() locking
...
Fix trivial conflict as by Tejun Heo in kernel/sched.c
Diffstat (limited to 'arch')
49 files changed, 246 insertions, 745 deletions
diff --git a/arch/alpha/include/asm/percpu.h b/arch/alpha/include/asm/percpu.h index b663f1f..2c12378 100644 --- a/arch/alpha/include/asm/percpu.h +++ b/arch/alpha/include/asm/percpu.h @@ -1,102 +1,18 @@ #ifndef __ALPHA_PERCPU_H #define __ALPHA_PERCPU_H -#include <linux/compiler.h> -#include <linux/threads.h> -#include <linux/percpu-defs.h> - -/* - * Determine the real variable name from the name visible in the - * kernel sources. - */ -#define per_cpu_var(var) per_cpu__##var - -#ifdef CONFIG_SMP - -/* - * per_cpu_offset() is the offset that has to be added to a - * percpu variable to get to the instance for a certain processor. - */ -extern unsigned long __per_cpu_offset[NR_CPUS]; - -#define per_cpu_offset(x) (__per_cpu_offset[x]) - -#define __my_cpu_offset per_cpu_offset(raw_smp_processor_id()) -#ifdef CONFIG_DEBUG_PREEMPT -#define my_cpu_offset per_cpu_offset(smp_processor_id()) -#else -#define my_cpu_offset __my_cpu_offset -#endif - -#ifndef MODULE -#define SHIFT_PERCPU_PTR(var, offset) RELOC_HIDE(&per_cpu_var(var), (offset)) -#define PER_CPU_DEF_ATTRIBUTES -#else /* - * To calculate addresses of locally defined variables, GCC uses 32-bit - * displacement from the GP. Which doesn't work for per cpu variables in - * modules, as an offset to the kernel per cpu area is way above 4G. + * To calculate addresses of locally defined variables, GCC uses + * 32-bit displacement from the GP. Which doesn't work for per cpu + * variables in modules, as an offset to the kernel per cpu area is + * way above 4G. * - * This forces allocation of a GOT entry for per cpu variable using - * ldq instruction with a 'literal' relocation. - */ -#define SHIFT_PERCPU_PTR(var, offset) ({ \ - extern int simple_identifier_##var(void); \ - unsigned long __ptr, tmp_gp; \ - asm ( "br %1, 1f \n\ - 1: ldgp %1, 0(%1) \n\ - ldq %0, per_cpu__" #var"(%1)\t!literal" \ - : "=&r"(__ptr), "=&r"(tmp_gp)); \ - (typeof(&per_cpu_var(var)))(__ptr + (offset)); }) - -#define PER_CPU_DEF_ATTRIBUTES __used - -#endif /* MODULE */ - -/* - * A percpu variable may point to a discarded regions. The following are - * established ways to produce a usable pointer from the percpu variable - * offset. + * Always use weak definitions for percpu variables in modules. */ -#define per_cpu(var, cpu) \ - (*SHIFT_PERCPU_PTR(var, per_cpu_offset(cpu))) -#define __get_cpu_var(var) \ - (*SHIFT_PERCPU_PTR(var, my_cpu_offset)) -#define __raw_get_cpu_var(var) \ - (*SHIFT_PERCPU_PTR(var, __my_cpu_offset)) - -#else /* ! SMP */ - -#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var))) -#define __get_cpu_var(var) per_cpu_var(var) -#define __raw_get_cpu_var(var) per_cpu_var(var) - -#define PER_CPU_DEF_ATTRIBUTES - -#endif /* SMP */ - -#ifdef CONFIG_SMP -#define PER_CPU_BASE_SECTION ".data.percpu" -#else -#define PER_CPU_BASE_SECTION ".data" -#endif - -#ifdef CONFIG_SMP - -#ifdef MODULE -#define PER_CPU_SHARED_ALIGNED_SECTION "" -#else -#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned" -#endif -#define PER_CPU_FIRST_SECTION ".first" - -#else - -#define PER_CPU_SHARED_ALIGNED_SECTION "" -#define PER_CPU_FIRST_SECTION "" - +#if defined(MODULE) && defined(CONFIG_SMP) +#define ARCH_NEEDS_WEAK_PER_CPU #endif -#define PER_CPU_ATTRIBUTES +#include <asm-generic/percpu.h> #endif /* __ALPHA_PERCPU_H */ diff --git a/arch/alpha/include/asm/tlbflush.h b/arch/alpha/include/asm/tlbflush.h index 9d87aaa..e89e0c2 100644 --- a/arch/alpha/include/asm/tlbflush.h +++ b/arch/alpha/include/asm/tlbflush.h @@ -2,6 +2,7 @@ #define _ALPHA_TLBFLUSH_H #include <linux/mm.h> +#include <linux/sched.h> #include <asm/compiler.h> #include <asm/pgalloc.h> diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index b9d6568..6dc03c3 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -134,13 +134,6 @@ SECTIONS __bss_stop = .; _end = .; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - } - .mdebug 0 : { *(.mdebug) } @@ -150,4 +143,6 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + + DISCARDS } diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 6937102..5cc4812 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -83,6 +83,7 @@ SECTIONS EXIT_TEXT EXIT_DATA *(.exitcall.exit) + *(.discard) *(.ARM.exidx.exit.text) *(.ARM.extab.exit.text) #ifndef CONFIG_HOTPLUG_CPU diff --git a/arch/avr32/kernel/vmlinux.lds.S b/arch/avr32/kernel/vmlinux.lds.S index 7910d41..c4b5665 100644 --- a/arch/avr32/kernel/vmlinux.lds.S +++ b/arch/avr32/kernel/vmlinux.lds.S @@ -124,14 +124,11 @@ SECTIONS _end = .; } + DWARF_DEBUG + /* When something in the kernel is NOT compiled as a module, the module * cleanup code and data are put into these segments. Both can then be * thrown away, as cleanup code is never called unless it's a module. */ - /DISCARD/ : { - EXIT_DATA - *(.exitcall.exit) - } - - DWARF_DEBUG + DISCARDS } diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S index 6ac307c..d7ffe29 100644 --- a/arch/blackfin/kernel/vmlinux.lds.S +++ b/arch/blackfin/kernel/vmlinux.lds.S @@ -277,8 +277,5 @@ SECTIONS DWARF_DEBUG - /DISCARD/ : - { - *(.exitcall.exit) - } + DISCARDS } diff --git a/arch/blackfin/mm/sram-alloc.c b/arch/blackfin/mm/sram-alloc.c index 0bc3c4e..99e4dbb 100644 --- a/arch/blackfin/mm/sram-alloc.c +++ b/arch/blackfin/mm/sram-alloc.c @@ -42,9 +42,9 @@ #include <asm/mem_map.h> #include "blackfin_sram.h" -static DEFINE_PER_CPU(spinlock_t, l1sram_lock) ____cacheline_aligned_in_smp; -static DEFINE_PER_CPU(spinlock_t, l1_data_sram_lock) ____cacheline_aligned_in_smp; -static DEFINE_PER_CPU(spinlock_t, l1_inst_sram_lock) ____cacheline_aligned_in_smp; +static DEFINE_PER_CPU_SHARED_ALIGNED(spinlock_t, l1sram_lock); +static DEFINE_PER_CPU_SHARED_ALIGNED(spinlock_t, l1_data_sram_lock); +static DEFINE_PER_CPU_SHARED_ALIGNED(spinlock_t, l1_inst_sram_lock); static spinlock_t l2_sram_lock ____cacheline_aligned_in_smp; /* the data structure for L1 scratchpad and DATA SRAM */ diff --git a/arch/cris/include/asm/mmu_context.h b/arch/cris/include/asm/mmu_context.h index 72ba08d..1d45fd6 100644 --- a/arch/cris/include/asm/mmu_context.h +++ b/arch/cris/include/asm/mmu_context.h @@ -17,7 +17,8 @@ extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, * registers like cr3 on the i386 */ -extern volatile DEFINE_PER_CPU(pgd_t *,current_pgd); /* defined in arch/cris/mm/fault.c */ +/* defined in arch/cris/mm/fault.c */ +DECLARE_PER_CPU(pgd_t *, current_pgd); static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S index 0d2adfc..6c81836 100644 --- a/arch/cris/kernel/vmlinux.lds.S +++ b/arch/cris/kernel/vmlinux.lds.S @@ -140,12 +140,7 @@ SECTIONS _end = .; __end = .; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - } - dram_end = dram_start + (CONFIG_ETRAX_DRAM_SIZE - __CONFIG_ETRAX_VMEM_SIZE)*1024*1024; + + DISCARDS } diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index f925115..4a7cdd9 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -29,7 +29,7 @@ extern void die_if_kernel(const char *, struct pt_regs *, long); /* current active page directory */ -volatile DEFINE_PER_CPU(pgd_t *,current_pgd); +DEFINE_PER_CPU(pgd_t *, current_pgd); unsigned long cris_signal_return_page; /* diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index 22d9787..7dbf41f 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S @@ -177,6 +177,8 @@ SECTIONS .debug_ranges 0 : { *(.debug_ranges) } .comment 0 : { *(.comment) } + + DISCARDS } __kernel_image_size_no_bss = __bss_start - __kernel_image_start; diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S index 43a87b9..662b02e 100644 --- a/arch/h8300/kernel/vmlinux.lds.S +++ b/arch/h8300/kernel/vmlinux.lds.S @@ -152,9 +152,6 @@ SECTIONS __end = . ; __ramstart = .; } - /DISCARD/ : { - *(.exitcall.exit) - } .romfs : { *(.romfs*) @@ -165,4 +162,6 @@ SECTIONS COMMAND_START = . - 0x200 ; __ramend = . ; } + + DISCARDS } diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index e624611..011a1cd 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -89,6 +89,9 @@ config GENERIC_TIME_VSYSCALL bool default y +config HAVE_LEGACY_PER_CPU_AREA + def_bool y + config HAVE_SETUP_PER_CPU_AREA def_bool y diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 1b23ec12..1de86c9 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -855,11 +855,17 @@ identify_cpu (struct cpuinfo_ia64 *c) c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1)); } +/* + * In UP configuration, setup_per_cpu_areas() is defined in + * include/linux/percpu.h + */ +#ifdef CONFIG_SMP void __init setup_per_cpu_areas (void) { /* start_kernel() requires this... */ } +#endif /* * Do the following calculations: diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index f0c521b..93ebfea 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -58,7 +58,8 @@ static struct local_tlb_flush_counts { unsigned int count; } __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS]; -static DEFINE_PER_CPU(unsigned short, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned; +static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned short [NR_CPUS], + shadow_flush_counts); #define IPI_CALL_FUNC 0 #define IPI_CPU_STOP 1 diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 4a95e86..eb4214d 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -24,14 +24,14 @@ PHDRS { } SECTIONS { - /* Sections to be discarded */ + /* unwind exit sections must be discarded before the rest of the + sections get included. */ /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) *(.IA_64.unwind.exit.text) *(.IA_64.unwind_info.exit.text) - } + *(.comment) + *(.note) + } v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */ phys_start = _start - LOAD_OFFSET; @@ -316,7 +316,7 @@ SECTIONS .debug_funcnames 0 : { *(.debug_funcnames) } .debug_typenames 0 : { *(.debug_typenames) } .debug_varnames 0 : { *(.debug_varnames) } - /* These must appear regardless of . */ - /DISCARD/ : { *(.comment) } - /DISCARD/ : { *(.note) } + + /* Default discards */ + DISCARDS } diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index e456f06..ece1bf9 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -71,7 +71,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); EXPORT_PER_CPU_SYMBOL(__sn_hub_info); -DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); +DEFINE_PER_CPU(short [MAX_COMPACT_NODES], __sn_cnodeid_to_nasid); EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index 4179adf..de5e21c 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S @@ -120,13 +120,6 @@ SECTIONS _end = . ; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - } - /* Stabs debugging sections. */ .stab 0 : { *(.stab) } .stabstr 0 : { *(.stabstr) } @@ -135,4 +128,7 @@ SECTIONS .stab.index 0 : { *(.stab.index) } .stab.indexstr 0 : { *(.stab.indexstr) } .comment 0 : { *(.comment) } + + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds index 01d212b..47eac19 100644 --- a/arch/m68k/kernel/vmlinux-std.lds +++ b/arch/m68k/kernel/vmlinux-std.lds @@ -82,13 +82,6 @@ SECTIONS _end = . ; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - } - /* Stabs debugging sections. */ .stab 0 : { *(.stab) } .stabstr 0 : { *(.stabstr) } @@ -97,4 +90,7 @@ SECTIONS .stab.index 0 : { *(.stab.index) } .stab.indexstr 0 : { *(.stab.indexstr) } .comment 0 : { *(.comment) } + + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds index c192f77..03efaf0 100644 --- a/arch/m68k/kernel/vmlinux-sun3.lds +++ b/arch/m68k/kernel/vmlinux-sun3.lds @@ -77,13 +77,6 @@ __init_begin = .; _end = . ; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - } - .crap : { /* Stabs debugging sections. */ *(.stab) @@ -96,4 +89,6 @@ __init_begin = .; *(.note) } + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S index b7fe505..2736a5e 100644 --- a/arch/m68knommu/kernel/vmlinux.lds.S +++ b/arch/m68knommu/kernel/vmlinux.lds.S @@ -184,12 +184,6 @@ SECTIONS { __init_end = .; } > INIT - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - } - .bss : { . = ALIGN(4); _sbss = . ; @@ -200,5 +194,6 @@ SECTIONS { _end = . ; } > BSS + DISCARDS } diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S index d34d38d..ec5fa91 100644 --- a/arch/microblaze/kernel/vmlinux.lds.S +++ b/arch/microblaze/kernel/vmlinux.lds.S @@ -23,8 +23,8 @@ SECTIONS { _stext = . ; *(.text .text.*) *(.fixup) - - *(.exitcall.exit) + EXIT_TEXT + EXIT_CALL SCHED_TEXT LOCK_TEXT KPROBES_TEXT @@ -162,4 +162,6 @@ SECTIONS { } . = ALIGN(4096); _end = .; + + DISCARDS } diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 58738c8..1474c18 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -176,17 +176,6 @@ SECTIONS _end = . ; - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - - /* ABI crap starts here */ - *(.MIPS.options) - *(.options) - *(.pdr) - *(.reginfo) - } - /* These mark the ABI of the kernel for debuggers. */ .mdebug.abi32 : { KEEP(*(.mdebug.abi32)) @@ -212,4 +201,14 @@ SECTIONS *(.gptab.bss) *(.gptab.sbss) } + + /* Sections to be discarded */ + DISCARDS + /DISCARD/ : { + /* ABI crap starts here */ + *(.MIPS.options) + *(.options) + *(.pdr) + *(.reginfo) + } } diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S index f4aa079..76f41bd 100644 --- a/arch/mn10300/kernel/vmlinux.lds.S +++ b/arch/mn10300/kernel/vmlinux.lds.S @@ -115,12 +115,10 @@ SECTIONS . = ALIGN(PAGE_SIZE); pg0 = .; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_CALL - } - STABS_DEBUG DWARF_DEBUG + + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index fd2cc4f..aea1784 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -237,9 +237,12 @@ SECTIONS /* freed after init ends here */ _end = . ; + STABS_DEBUG + .note 0 : { *(.note) } + /* Sections to be discarded */ + DISCARDS /DISCARD/ : { - *(.exitcall.exit) #ifdef CONFIG_64BIT /* temporary hack until binutils is fixed to not emit these * for static binaries @@ -252,7 +255,4 @@ SECTIONS *(.gnu.hash) #endif } - - STABS_DEBUG - .note 0 : { *(.note) } } diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d00131c..2c42e15 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -49,6 +49,9 @@ config GENERIC_HARDIRQS_NO__DO_IRQ config HAVE_SETUP_PER_CPU_AREA def_bool PPC64 +config NEED_PER_CPU_EMBED_FIRST_CHUNK + def_bool PPC64 + config IRQ_PER_CPU bool default y diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 1f68160..aa6e450 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -57,6 +57,7 @@ #include <asm/cache.h> #include <asm/page.h> #include <asm/mmu.h> +#include <asm/mmu-hash64.h> #include <asm/firmware.h> #include <asm/xmon.h> #include <asm/udbg.h> @@ -569,25 +570,53 @@ void cpu_die(void) } #ifdef CONFIG_SMP -void __init setup_per_cpu_areas(void) +#define PCPU_DYN_SIZE () + +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) { - int i; - unsigned long size; - char *ptr; - - /* Copy section for each CPU (we discard the original) */ - size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); -#ifdef CONFIG_MODULES - if (size < PERCPU_ENOUGH_ROOM) - size = PERCPU_ENOUGH_ROOM; -#endif + return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align, + __pa(MAX_DMA_ADDRESS)); +} - for_each_possible_cpu(i) { - ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); +static void __init pcpu_fc_free(void *ptr, size_t size) +{ + free_bootmem(__pa(ptr), size); +} - paca[i].data_offset = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - } +static int pcpu_cpu_distance(unsigned int from, unsigned int to) +{ + if (cpu_to_node(from) == cpu_to_node(to)) + return LOCAL_DISTANCE; + else + return REMOTE_DISTANCE; +} + +void __init setup_per_cpu_areas(void) +{ + const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; + size_t atom_size; + unsigned long delta; + unsigned int cpu; + int rc; + + /* + * Linear mapping is one of 4K, 1M and 16M. For 4K, no need + * to group units. For larger mappings, use 1M atom which + * should be large enough to contain a number of units. + */ + if (mmu_linear_psize == MMU_PAGE_4K) + atom_size = PAGE_SIZE; + else + atom_size = 1 << 20; + + rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, + pcpu_fc_alloc, pcpu_fc_free); + if (rc < 0) + panic("cannot initialize percpu area (err=%d)", rc); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + paca[cpu].data_offset = delta + pcpu_unit_offsets[cpu]; } #endif diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8ef8a14..244e365 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -37,12 +37,6 @@ jiffies = jiffies_64 + 4; #endif SECTIONS { - /* Sections to be discarded. */ - /DISCARD/ : { - *(.exitcall.exit) - EXIT_DATA - } - . = KERNELBASE; /* @@ -298,4 +292,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); _end = . ; PROVIDE32 (end = .); + + /* Sections to be discarded. */ + DISCARDS } diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index ab5fb48..687fdda 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -31,7 +31,7 @@ struct stab_entry { #define NR_STAB_CACHE_ENTRIES 8 static DEFINE_PER_CPU(long, stab_cache_ptr); -static DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); +static DEFINE_PER_CPU(long [NR_STAB_CACHE_ENTRIES], stab_cache); /* * Create a segment table entry for the given esid/vsid pair. diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c index f6e04bc..51ffde4 100644 --- a/arch/powerpc/platforms/ps3/smp.c +++ b/arch/powerpc/platforms/ps3/smp.c @@ -37,7 +37,7 @@ */ #define MSG_COUNT 4 -static DEFINE_PER_CPU(unsigned int, ps3_ipi_virqs[MSG_COUNT]); +static DEFINE_PER_CPU(unsigned int [MSG_COUNT], ps3_ipi_virqs); static void do_message_pass(int target, int msg) { diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 408d60b..f7ad871 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -1,37 +1,21 @@ #ifndef __ARCH_S390_PERCPU__ #define __ARCH_S390_PERCPU__ -#include <linux/compiler.h> -#include <asm/lowcore.h> - /* * s390 uses its own implementation for per cpu data, the offset of * the cpu local data area is cached in the cpu's lowcore memory. - * For 64 bit module code s390 forces the use of a GOT slot for the - * address of the per cpu variable. This is needed because the module - * may be more than 4G above the per cpu area. */ -#if defined(__s390x__) && defined(MODULE) - -#define SHIFT_PERCPU_PTR(ptr,offset) (({ \ - extern int simple_identifier_##var(void); \ - unsigned long *__ptr; \ - asm ( "larl %0, %1@GOTENT" \ - : "=a" (__ptr) : "X" (ptr) ); \ - (typeof(ptr))((*__ptr) + (offset)); })) - -#else - -#define SHIFT_PERCPU_PTR(ptr, offset) (({ \ - extern int simple_identifier_##var(void); \ - unsigned long __ptr; \ - asm ( "" : "=a" (__ptr) : "0" (ptr) ); \ - (typeof(ptr)) (__ptr + (offset)); })) +#define __my_cpu_offset S390_lowcore.percpu_offset +/* + * For 64 bit module code, the module may be more than 4G above the + * per cpu area, use weak definitions to force the compiler to + * generate external references. + */ +#if defined(CONFIG_SMP) && defined(__s390x__) && defined(MODULE) +#define ARCH_NEEDS_WEAK_PER_CPU #endif -#define __my_cpu_offset S390_lowcore.percpu_offset - #include <asm-generic/percpu.h> #endif /* __ARCH_S390_PERCPU__ */ diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 7315f9e..bc15ef9 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -84,13 +84,10 @@ SECTIONS _end = . ; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_DATA - *(.exitcall.exit) - } - /* Debugging sections. */ STABS_DEBUG DWARF_DEBUG + + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index f53c76a..0ce254b 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S @@ -163,16 +163,14 @@ SECTIONS _end = . ; } + STABS_DEBUG + DWARF_DEBUG + /* * When something in the kernel is NOT compiled as a module, the * module cleanup code and data are put into these segments. Both * can then be thrown away, as cleanup code is never called unless * it's a module. */ - /DISCARD/ : { - *(.exitcall.exit) - } - - STABS_DEBUG - DWARF_DEBUG + DISCARDS } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 2bd5c28..86b8234 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -99,7 +99,7 @@ config AUDIT_ARCH config HAVE_SETUP_PER_CPU_AREA def_bool y if SPARC64 -config HAVE_DYNAMIC_PER_CPU_AREA +config NEED_PER_CPU_EMBED_FIRST_CHUNK def_bool y if SPARC64 config GENERIC_HARDIRQS_NO__DO_IRQ diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 3691907..ff68373 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1389,8 +1389,8 @@ void smp_send_stop(void) * RETURNS: * Pointer to the allocated area on success, NULL on failure. */ -static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, - unsigned long align) +static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, + size_t align) { const unsigned long goal = __pa(MAX_DMA_ADDRESS); #ifdef CONFIG_NEED_MULTIPLE_NODES @@ -1415,127 +1415,35 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, #endif } -static size_t pcpur_size __initdata; -static void **pcpur_ptrs __initdata; - -static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) +static void __init pcpu_free_bootmem(void *ptr, size_t size) { - size_t off = (size_t)pageno << PAGE_SHIFT; - - if (off >= pcpur_size) - return NULL; - - return virt_to_page(pcpur_ptrs[cpu] + off); + free_bootmem(__pa(ptr), size); } -#define PCPU_CHUNK_SIZE (4UL * 1024UL * 1024UL) - -static void __init pcpu_map_range(unsigned long start, unsigned long end, - struct page *page) +static int pcpu_cpu_distance(unsigned int from, unsigned int to) { - unsigned long pfn = page_to_pfn(page); - unsigned long pte_base; - - BUG_ON((pfn<<PAGE_SHIFT)&(PCPU_CHUNK_SIZE - 1UL)); - - pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U | - _PAGE_CP_4U | _PAGE_CV_4U | - _PAGE_P_4U | _PAGE_W_4U); - if (tlb_type == hypervisor) - pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V | - _PAGE_CP_4V | _PAGE_CV_4V | - _PAGE_P_4V | _PAGE_W_4V); - - while (start < end) { - pgd_t *pgd = pgd_offset_k(start); - unsigned long this_end; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - pud = pud_offset(pgd, start); - if (pud_none(*pud)) { - pmd_t *new; - - new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); - pud_populate(&init_mm, pud, new); - } - - pmd = pmd_offset(pud, start); - if (!pmd_present(*pmd)) { - pte_t *new; - - new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); - pmd_populate_kernel(&init_mm, pmd, new); - } - - pte = pte_offset_kernel(pmd, start); - this_end = (start + PMD_SIZE) & PMD_MASK; - if (this_end > end) - this_end = end; - - while (start < this_end) { - unsigned long paddr = pfn << PAGE_SHIFT; - - pte_val(*pte) = (paddr | pte_base); - - start += PAGE_SIZE; - pte++; - pfn++; - } - } + if (cpu_to_node(from) == cpu_to_node(to)) + return LOCAL_DISTANCE; + else + return REMOTE_DISTANCE; } void __init setup_per_cpu_areas(void) { - size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start; - static struct vm_struct vm; - unsigned long delta, cpu; - size_t pcpu_unit_size; - size_t ptrs_size; - - pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + - PERCPU_DYNAMIC_RESERVE); - dyn_size = pcpur_size - static_size - PERCPU_MODULE_RESERVE; - + unsigned long delta; + unsigned int cpu; + int rc; - ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpur_ptrs[0])); - pcpur_ptrs = alloc_bootmem(ptrs_size); - - for_each_possible_cpu(cpu) { - pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE, - PCPU_CHUNK_SIZE); - - free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), - PCPU_CHUNK_SIZE - pcpur_size); - - memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); - } - - /* allocate address and map */ - vm.flags = VM_ALLOC; - vm.size = nr_cpu_ids * PCPU_CHUNK_SIZE; - vm_area_register_early(&vm, PCPU_CHUNK_SIZE); - - for_each_possible_cpu(cpu) { - unsigned long start = (unsigned long) vm.addr; - unsigned long end; - - start += cpu * PCPU_CHUNK_SIZE; - end = start + PCPU_CHUNK_SIZE; - pcpu_map_range(start, end, virt_to_page(pcpur_ptrs[cpu])); - } - - pcpu_unit_size = pcpu_setup_first_chunk(pcpur_get_page, static_size, - PERCPU_MODULE_RESERVE, dyn_size, - PCPU_CHUNK_SIZE, vm.addr, NULL); - - free_bootmem(__pa(pcpur_ptrs), ptrs_size); + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, 4 << 20, + pcpu_cpu_distance, pcpu_alloc_bootmem, + pcpu_free_bootmem); + if (rc) + panic("failed to initialize first chunk (%d)", rc); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; - for_each_possible_cpu(cpu) { - __per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; - } + for_each_possible_cpu(cpu) + __per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; /* Setup %g5 for the boot cpu. */ __local_per_cpu_offset = __per_cpu_offset(smp_processor_id()); diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index fcbbd00..866390f 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -171,12 +171,8 @@ SECTIONS } _end = . ; - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - } - STABS_DEBUG DWARF_DEBUG + + DISCARDS } diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S index cb02486..37ecc55 100644 --- a/arch/um/include/asm/common.lds.S +++ b/arch/um/include/asm/common.lds.S @@ -123,8 +123,3 @@ __initramfs_end = .; } - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - } - diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index 9975e1a..715a188 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -156,4 +156,6 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + + DISCARDS } diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 11b8352..2ebd397 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -100,4 +100,6 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + + DISCARDS } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a800b0f..e98e81a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -150,7 +150,10 @@ config ARCH_HAS_CACHE_LINE_SIZE config HAVE_SETUP_PER_CPU_AREA def_bool y -config HAVE_DYNAMIC_PER_CPU_AREA +config NEED_PER_CPU_EMBED_FIRST_CHUNK + def_bool y + +config NEED_PER_CPU_PAGE_FIRST_CHUNK def_bool y config HAVE_CPUMASK_OF_CPU_MAP diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 04eacef..b65a36d 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -168,15 +168,6 @@ do { \ /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU(unsigned long, this_cpu_off); -#ifdef CONFIG_NEED_MULTIPLE_NODES -void *pcpu_lpage_remapped(void *kaddr); -#else -static inline void *pcpu_lpage_remapped(void *kaddr) -{ - return NULL; -} -#endif - #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c index 6b2a52d..dca325c 100644 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ b/arch/x86/kernel/cpu/cpu_debug.c @@ -30,8 +30,8 @@ #include <asm/apic.h> #include <asm/desc.h> -static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]); -static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]); +static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr); +static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr); static DEFINE_PER_CPU(int, cpu_priv_count); static DEFINE_MUTEX(cpu_debug_lock); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 9bfe9d2..fdd51b5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1101,7 +1101,7 @@ void mce_log_therm_throt_event(__u64 status) */ static int check_interval = 5 * 60; /* 5 minutes */ -static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ +static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(struct timer_list, mce_timer); static void mcheck_timer(unsigned long data) @@ -1120,7 +1120,7 @@ static void mcheck_timer(unsigned long data) * Alert userspace if needed. If we logged an MCE, reduce the * polling interval, otherwise increase the polling interval. */ - n = &__get_cpu_var(next_interval); + n = &__get_cpu_var(mce_next_interval); if (mce_notify_irq()) *n = max(*n/2, HZ/100); else @@ -1335,7 +1335,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) static void mce_init_timer(void) { struct timer_list *t = &__get_cpu_var(mce_timer); - int *n = &__get_cpu_var(next_interval); + int *n = &__get_cpu_var(mce_next_interval); if (mce_ignore_ce) return; @@ -1935,7 +1935,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: t->expires = round_jiffies(jiffies + - __get_cpu_var(next_interval)); + __get_cpu_var(mce_next_interval)); add_timer_on(t, cpu); smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); break; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 1fecba4..8cd5224 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -69,7 +69,7 @@ struct threshold_bank { struct threshold_block *blocks; cpumask_var_t cpus; }; -static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); +static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); #ifdef CONFIG_SMP static unsigned char shared_bank[NR_BANKS] = { diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index f9cd084..2732e2c 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1211,7 +1211,7 @@ amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) x86_pmu_disable_counter(hwc, idx); } -static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); +static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); /* * Set the next IRQ period, based on the hwc->period_left value. @@ -1253,7 +1253,7 @@ x86_perf_counter_set_period(struct perf_counter *counter, if (left > x86_pmu.max_period) left = x86_pmu.max_period; - per_cpu(prev_left[idx], smp_processor_id()) = left; + per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; /* * The hw counter starts counting from this counter offset, @@ -1470,7 +1470,7 @@ void perf_counter_print_debug(void) rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); rdmsrl(x86_pmu.perfctr + idx, pmc_count); - prev_left = per_cpu(prev_left[idx], cpu); + prev_left = per_cpu(pmc_prev_left[idx], cpu); pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", cpu, idx, pmc_ctrl); @@ -2110,8 +2110,8 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) entry->ip[entry->nr++] = ip; } -static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); -static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); +static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); +static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); static DEFINE_PER_CPU(int, in_nmi_frame); @@ -2264,9 +2264,9 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) struct perf_callchain_entry *entry; if (in_nmi()) - entry = &__get_cpu_var(nmi_entry); + entry = &__get_cpu_var(pmc_nmi_entry); else - entry = &__get_cpu_var(irq_entry); + entry = &__get_cpu_var(pmc_irq_entry); entry->nr = 0; diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 07d8191..d559af9 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -55,6 +55,7 @@ EXPORT_SYMBOL(__per_cpu_offset); #define PERCPU_FIRST_CHUNK_RESERVE 0 #endif +#ifdef CONFIG_X86_32 /** * pcpu_need_numa - determine percpu allocation needs to consider NUMA * @@ -83,6 +84,7 @@ static bool __init pcpu_need_numa(void) #endif return false; } +#endif /** * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu @@ -124,308 +126,35 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, } /* - * Large page remap allocator - * - * This allocator uses PMD page as unit. A PMD page is allocated for - * each cpu and each is remapped into vmalloc area using PMD mapping. - * As PMD page is quite large, only part of it is used for the first - * chunk. Unused part is returned to the bootmem allocator. - * - * So, the PMD pages are mapped twice - once to the physical mapping - * and to the vmalloc area for the first percpu chunk. The double - * mapping does add one more PMD TLB entry pressure but still is much - * better than only using 4k mappings while still being NUMA friendly. + * Helpers for first chunk memory allocation */ -#ifdef CONFIG_NEED_MULTIPLE_NODES -struct pcpul_ent { - unsigned int cpu; - void *ptr; -}; - -static size_t pcpul_size; -static struct pcpul_ent *pcpul_map; -static struct vm_struct pcpul_vm; - -static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) { - size_t off = (size_t)pageno << PAGE_SHIFT; - - if (off >= pcpul_size) - return NULL; - - return virt_to_page(pcpul_map[cpu].ptr + off); + return pcpu_alloc_bootmem(cpu, size, align); } -static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) +static void __init pcpu_fc_free(void *ptr, size_t size) { - size_t map_size, dyn_size; - unsigned int cpu; - int i, j; - ssize_t ret; - - if (!chosen) { - size_t vm_size = VMALLOC_END - VMALLOC_START; - size_t tot_size = nr_cpu_ids * PMD_SIZE; - - /* on non-NUMA, embedding is better */ - if (!pcpu_need_numa()) - return -EINVAL; - - /* don't consume more than 20% of vmalloc area */ - if (tot_size > vm_size / 5) { - pr_info("PERCPU: too large chunk size %zuMB for " - "large page remap\n", tot_size >> 20); - return -EINVAL; - } - } - - /* need PSE */ - if (!cpu_has_pse) { - pr_warning("PERCPU: lpage allocator requires PSE\n"); - return -EINVAL; - } - - /* - * Currently supports only single page. Supporting multiple - * pages won't be too difficult if it ever becomes necessary. - */ - pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + - PERCPU_DYNAMIC_RESERVE); - if (pcpul_size > PMD_SIZE) { - pr_warning("PERCPU: static data is larger than large page, " - "can't use large page\n"); - return -EINVAL; - } - dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; - - /* allocate pointer array and alloc large pages */ - map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0])); - pcpul_map = alloc_bootmem(map_size); - - for_each_possible_cpu(cpu) { - pcpul_map[cpu].cpu = cpu; - pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, - PMD_SIZE); - if (!pcpul_map[cpu].ptr) { - pr_warning("PERCPU: failed to allocate large page " - "for cpu%u\n", cpu); - goto enomem; - } - - /* - * Only use pcpul_size bytes and give back the rest. - * - * Ingo: The 2MB up-rounding bootmem is needed to make - * sure the partial 2MB page is still fully RAM - it's - * not well-specified to have a PAT-incompatible area - * (unmapped RAM, device memory, etc.) in that hole. - */ - free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size), - PMD_SIZE - pcpul_size); - - memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size); - } - - /* allocate address and map */ - pcpul_vm.flags = VM_ALLOC; - pcpul_vm.size = nr_cpu_ids * PMD_SIZE; - vm_area_register_early(&pcpul_vm, PMD_SIZE); - - for_each_possible_cpu(cpu) { - pmd_t *pmd, pmd_v; - - pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr + - cpu * PMD_SIZE); - pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)), - PAGE_KERNEL_LARGE); - set_pmd(pmd, pmd_v); - } - - /* we're ready, commit */ - pr_info("PERCPU: Remapped at %p with large pages, static data " - "%zu bytes\n", pcpul_vm.addr, static_size); - - ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, dyn_size, - PMD_SIZE, pcpul_vm.addr, NULL); - - /* sort pcpul_map array for pcpu_lpage_remapped() */ - for (i = 0; i < nr_cpu_ids - 1; i++) - for (j = i + 1; j < nr_cpu_ids; j++) - if (pcpul_map[i].ptr > pcpul_map[j].ptr) { - struct pcpul_ent tmp = pcpul_map[i]; - pcpul_map[i] = pcpul_map[j]; - pcpul_map[j] = tmp; - } - - return ret; - -enomem: - for_each_possible_cpu(cpu) - if (pcpul_map[cpu].ptr) - free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size); - free_bootmem(__pa(pcpul_map), map_size); - return -ENOMEM; + free_bootmem(__pa(ptr), size); } -/** - * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area - * @kaddr: the kernel address in question - * - * Determine whether @kaddr falls in the pcpul recycled area. This is - * used by pageattr to detect VM aliases and break up the pcpu PMD - * mapping such that the same physical page is not mapped under - * different attributes. - * - * The recycled area is always at the tail of a partially used PMD - * page. - * - * RETURNS: - * Address of corresponding remapped pcpu address if match is found; - * otherwise, NULL. - */ -void *pcpu_lpage_remapped(void *kaddr) +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) { - void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); - unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; - int left = 0, right = nr_cpu_ids - 1; - int pos; - - /* pcpul in use at all? */ - if (!pcpul_map) - return NULL; - - /* okay, perform binary search */ - while (left <= right) { - pos = (left + right) / 2; - - if (pcpul_map[pos].ptr < pmd_addr) - left = pos + 1; - else if (pcpul_map[pos].ptr > pmd_addr) - right = pos - 1; - else { - /* it shouldn't be in the area for the first chunk */ - WARN_ON(offset < pcpul_size); - - return pcpul_vm.addr + - pcpul_map[pos].cpu * PMD_SIZE + offset; - } - } - - return NULL; -} +#ifdef CONFIG_NEED_MULTIPLE_NODES + if (early_cpu_to_node(from) == early_cpu_to_node(to)) + return LOCAL_DISTANCE; + else + return REMOTE_DISTANCE; #else -static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) -{ - return -EINVAL; -} + return LOCAL_DISTANCE; #endif - -/* - * Embedding allocator - * - * The first chunk is sized to just contain the static area plus - * module and dynamic reserves and embedded into linear physical - * mapping so that it can use PMD mapping without additional TLB - * pressure. - */ -static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) -{ - size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; - - /* - * If large page isn't supported, there's no benefit in doing - * this. Also, embedding allocation doesn't play well with - * NUMA. - */ - if (!chosen && (!cpu_has_pse || pcpu_need_numa())) - return -EINVAL; - - return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, - reserve - PERCPU_FIRST_CHUNK_RESERVE, -1); } -/* - * 4k page allocator - * - * This is the basic allocator. Static percpu area is allocated - * page-by-page and most of initialization is done by the generic - * setup function. - */ -static struct page **pcpu4k_pages __initdata; -static int pcpu4k_nr_static_pages __initdata; - -static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno) -{ - if (pageno < pcpu4k_nr_static_pages) - return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno]; - return NULL; -} - -static void __init pcpu4k_populate_pte(unsigned long addr) +static void __init pcpup_populate_pte(unsigned long addr) { populate_extra_pte(addr); } -static ssize_t __init setup_pcpu_4k(size_t static_size) -{ - size_t pages_size; - unsigned int cpu; - int i, j; - ssize_t ret; - - pcpu4k_nr_static_pages = PFN_UP(static_size); - - /* unaligned allocations can't be freed, round up to page size */ - pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids - * sizeof(pcpu4k_pages[0])); - pcpu4k_pages = alloc_bootmem(pages_size); - - /* allocate and copy */ - j = 0; - for_each_possible_cpu(cpu) - for (i = 0; i < pcpu4k_nr_static_pages; i++) { - void *ptr; - - ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); - if (!ptr) { - pr_warning("PERCPU: failed to allocate " - "4k page for cpu%u\n", cpu); - goto enomem; - } - - memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); - pcpu4k_pages[j++] = virt_to_page(ptr); - } - - /* we're ready, commit */ - pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", - pcpu4k_nr_static_pages, static_size); - - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, -1, - -1, NULL, pcpu4k_populate_pte); - goto out_free_ar; - -enomem: - while (--j >= 0) - free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE); - ret = -ENOMEM; -out_free_ar: - free_bootmem(__pa(pcpu4k_pages), pages_size); - return ret; -} - -/* for explicit first chunk allocator selection */ -static char pcpu_chosen_alloc[16] __initdata; - -static int __init percpu_alloc_setup(char *str) -{ - strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1); - return 0; -} -early_param("percpu_alloc", percpu_alloc_setup); - static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 @@ -441,52 +170,49 @@ static inline void setup_percpu_segment(int cpu) void __init setup_per_cpu_areas(void) { - size_t static_size = __per_cpu_end - __per_cpu_start; unsigned int cpu; unsigned long delta; - size_t pcpu_unit_size; - ssize_t ret; + int rc; pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); /* - * Allocate percpu area. If PSE is supported, try to make use - * of large page mappings. Please read comments on top of - * each allocator for details. + * Allocate percpu area. Embedding allocator is our favorite; + * however, on NUMA configurations, it can result in very + * sparse unit mapping and vmalloc area isn't spacious enough + * on 32bit. Use page in that case. */ - ret = -EINVAL; - if (strlen(pcpu_chosen_alloc)) { - if (strcmp(pcpu_chosen_alloc, "4k")) { - if (!strcmp(pcpu_chosen_alloc, "lpage")) - ret = setup_pcpu_lpage(static_size, true); - else if (!strcmp(pcpu_chosen_alloc, "embed")) - ret = setup_pcpu_embed(static_size, true); - else - pr_warning("PERCPU: unknown allocator %s " - "specified\n", pcpu_chosen_alloc); - if (ret < 0) - pr_warning("PERCPU: %s allocator failed (%zd), " - "falling back to 4k\n", - pcpu_chosen_alloc, ret); - } - } else { - ret = setup_pcpu_lpage(static_size, false); - if (ret < 0) - ret = setup_pcpu_embed(static_size, false); +#ifdef CONFIG_X86_32 + if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa()) + pcpu_chosen_fc = PCPU_FC_PAGE; +#endif + rc = -EINVAL; + if (pcpu_chosen_fc != PCPU_FC_PAGE) { + const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE; + const size_t dyn_size = PERCPU_MODULE_RESERVE + + PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; + + rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, + dyn_size, atom_size, + pcpu_cpu_distance, + pcpu_fc_alloc, pcpu_fc_free); + if (rc < 0) + pr_warning("PERCPU: %s allocator failed (%d), " + "falling back to page size\n", + pcpu_fc_names[pcpu_chosen_fc], rc); } - if (ret < 0) - ret = setup_pcpu_4k(static_size); - if (ret < 0) - panic("cannot allocate static percpu area (%zu bytes, err=%zd)", - static_size, ret); - - pcpu_unit_size = ret; + if (rc < 0) + rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, + pcpu_fc_alloc, pcpu_fc_free, + pcpup_populate_pte); + if (rc < 0) + panic("cannot initialize percpu area (err=%d)", rc); /* alrighty, percpu areas up and running */ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { - per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; + per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 9fc1782..0ccb57d 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -348,15 +348,12 @@ SECTIONS _end = .; } - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - *(.eh_frame) - *(.discard) - } - STABS_DEBUG DWARF_DEBUG + + /* Sections to be discarded */ + DISCARDS + /DISCARD/ : { *(.eh_frame) } } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e245775..24952fd 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -12,6 +12,7 @@ #include <linux/seq_file.h> #include <linux/debugfs.h> #include <linux/pfn.h> +#include <linux/percpu.h> #include <asm/e820.h> #include <asm/processor.h> @@ -686,7 +687,7 @@ static int cpa_process_alias(struct cpa_data *cpa) { struct cpa_data alias_cpa; unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); - unsigned long vaddr, remapped; + unsigned long vaddr; int ret; if (cpa->pfn >= max_pfn_mapped) @@ -744,24 +745,6 @@ static int cpa_process_alias(struct cpa_data *cpa) } #endif - /* - * If the PMD page was partially used for per-cpu remapping, - * the recycled area needs to be split and modified. Because - * the area is always proper subset of a PMD page - * cpa->numpages is guaranteed to be 1 for these areas, so - * there's no need to loop over and check for further remaps. - */ - remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr); - if (remapped) { - WARN_ON(cpa->numpages > 1); - alias_cpa = *cpa; - alias_cpa.vaddr = &remapped; - alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); - ret = __change_page_attr_set_clr(&alias_cpa, 0); - if (ret) - return ret; - } - return 0; } diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index 41c159c..921b6ff 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -280,15 +280,6 @@ SECTIONS *(.ResetVector.text) } - /* Sections to be discarded */ - /DISCARD/ : - { - *(.exit.literal) - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - } - .xt.lit : { *(.xt.lit) } .xt.prop : { *(.xt.prop) } @@ -321,4 +312,8 @@ SECTIONS *(.xt.lit) *(.gnu.linkonce.p*) } + + /* Sections to be discarded */ + DISCARDS + /DISCARD/ : { *(.exit.literal) } } |