diff options
Diffstat (limited to 'arch')
89 files changed, 594 insertions, 446 deletions
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index 058937b..b183416 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -452,10 +452,14 @@ #define __NR_fanotify_init 494 #define __NR_fanotify_mark 495 #define __NR_prlimit64 496 +#define __NR_name_to_handle_at 497 +#define __NR_open_by_handle_at 498 +#define __NR_clock_adjtime 499 +#define __NR_syncfs 500 #ifdef __KERNEL__ -#define NR_SYSCALLS 497 +#define NR_SYSCALLS 501 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index a6a1de9..15f999d 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -498,23 +498,27 @@ sys_call_table: .quad sys_ni_syscall /* sys_timerfd */ .quad sys_eventfd .quad sys_recvmmsg - .quad sys_fallocate /* 480 */ + .quad sys_fallocate /* 480 */ .quad sys_timerfd_create .quad sys_timerfd_settime .quad sys_timerfd_gettime .quad sys_signalfd4 - .quad sys_eventfd2 /* 485 */ + .quad sys_eventfd2 /* 485 */ .quad sys_epoll_create1 .quad sys_dup3 .quad sys_pipe2 .quad sys_inotify_init1 - .quad sys_preadv /* 490 */ + .quad sys_preadv /* 490 */ .quad sys_pwritev .quad sys_rt_tgsigqueueinfo .quad sys_perf_event_open .quad sys_fanotify_init - .quad sys_fanotify_mark /* 495 */ + .quad sys_fanotify_mark /* 495 */ .quad sys_prlimit64 + .quad sys_name_to_handle_at + .quad sys_open_by_handle_at + .quad sys_clock_adjtime + .quad sys_syncfs /* 500 */ .size sys_call_table, . - sys_call_table .type sys_call_table, @object diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index 918e8e0..818e74e 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -375,8 +375,7 @@ static struct clocksource clocksource_rpcc = { static inline void register_rpcc_clocksource(long cycle_freq) { - clocksource_calc_mult_shift(&clocksource_rpcc, cycle_freq, 4); - clocksource_register(&clocksource_rpcc); + clocksource_register_hz(&clocksource_rpcc, cycle_freq); } #else /* !CONFIG_SMP */ static inline void register_rpcc_clocksource(long cycle_freq) diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 8ebbb51..0c6852d 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -74,7 +74,7 @@ ZTEXTADDR := $(CONFIG_ZBOOT_ROM_TEXT) ZBSSADDR := $(CONFIG_ZBOOT_ROM_BSS) else ZTEXTADDR := 0 -ZBSSADDR := ALIGN(4) +ZBSSADDR := ALIGN(8) endif SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/ diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index adf583c..49f5b2e 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -179,15 +179,14 @@ not_angel: bl cache_on restart: adr r0, LC0 - ldmia r0, {r1, r2, r3, r5, r6, r9, r11, r12} - ldr sp, [r0, #32] + ldmia r0, {r1, r2, r3, r6, r9, r11, r12} + ldr sp, [r0, #28] /* * We might be running at a different address. We need * to fix up various pointers. */ sub r0, r0, r1 @ calculate the delta offset - add r5, r5, r0 @ _start add r6, r6, r0 @ _edata #ifndef CONFIG_ZBOOT_ROM @@ -206,31 +205,40 @@ restart: adr r0, LC0 /* * Check to see if we will overwrite ourselves. * r4 = final kernel address - * r5 = start of this image * r9 = size of decompressed image * r10 = end of this image, including bss/stack/malloc space if non XIP * We basically want: - * r4 >= r10 -> OK - * r4 + image length <= r5 -> OK + * r4 - 16k page directory >= r10 -> OK + * r4 + image length <= current position (pc) -> OK */ + add r10, r10, #16384 cmp r4, r10 bhs wont_overwrite add r10, r4, r9 - cmp r10, r5 + ARM( cmp r10, pc ) + THUMB( mov lr, pc ) + THUMB( cmp r10, lr ) bls wont_overwrite /* * Relocate ourselves past the end of the decompressed kernel. - * r5 = start of this image * r6 = _edata * r10 = end of the decompressed kernel * Because we always copy ahead, we need to do it from the end and go * backward in case the source and destination overlap. */ - /* Round up to next 256-byte boundary. */ - add r10, r10, #256 + /* + * Bump to the next 256-byte boundary with the size of + * the relocation code added. This avoids overwriting + * ourself when the offset is small. + */ + add r10, r10, #((reloc_code_end - restart + 256) & ~255) bic r10, r10, #255 + /* Get start of code we want to copy and align it down. */ + adr r5, restart + bic r5, r5, #31 + sub r9, r6, r5 @ size to copy add r9, r9, #31 @ rounded up to a multiple bic r9, r9, #31 @ ... of 32 bytes @@ -245,6 +253,11 @@ restart: adr r0, LC0 /* Preserve offset to relocated code. */ sub r6, r9, r6 +#ifndef CONFIG_ZBOOT_ROM + /* cache_clean_flush may use the stack, so relocate it */ + add sp, sp, r6 +#endif + bl cache_clean_flush adr r0, BSYM(restart) @@ -333,7 +346,6 @@ not_relocated: mov r0, #0 LC0: .word LC0 @ r1 .word __bss_start @ r2 .word _end @ r3 - .word _start @ r5 .word _edata @ r6 .word _image_size @ r9 .word _got_start @ r11 @@ -1062,6 +1074,7 @@ memdump: mov r12, r0 #endif .ltorg +reloc_code_end: .align .section ".stack", "aw", %nobits diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in index 5309909..ea80abe 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.in +++ b/arch/arm/boot/compressed/vmlinux.lds.in @@ -54,6 +54,7 @@ SECTIONS .bss : { *(.bss) } _end = .; + . = ALIGN(8); /* the stack must be 64-bit aligned */ .stack : { *(.stack) } .stab 0 : { *(.stab) } diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 885be09..832888d 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -159,7 +159,7 @@ extern unsigned int user_debug; #include <mach/barriers.h> #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP) #define mb() do { dsb(); outer_sync(); } while (0) -#define rmb() dmb() +#define rmb() dsb() #define wmb() mb() #else #include <asm/memory.h> diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 2bf27f3..8182f45 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -767,12 +767,20 @@ long arch_ptrace(struct task_struct *child, long request, #ifdef CONFIG_HAVE_HW_BREAKPOINT case PTRACE_GETHBPREGS: + if (ptrace_get_breakpoints(child) < 0) + return -ESRCH; + ret = ptrace_gethbpregs(child, addr, (unsigned long __user *)data); + ptrace_put_breakpoints(child); break; case PTRACE_SETHBPREGS: + if (ptrace_get_breakpoints(child) < 0) + return -ESRCH; + ret = ptrace_sethbpregs(child, addr, (unsigned long __user *)data); + ptrace_put_breakpoints(child); break; #endif diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index cb83983..0340224 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -597,19 +597,13 @@ setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, return err; } -static inline void setup_syscall_restart(struct pt_regs *regs) -{ - regs->ARM_r0 = regs->ARM_ORIG_r0; - regs->ARM_pc -= thumb_mode(regs) ? 2 : 4; -} - /* * OK, we're invoking a handler */ static int handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, - struct pt_regs * regs, int syscall) + struct pt_regs * regs) { struct thread_info *thread = current_thread_info(); struct task_struct *tsk = current; @@ -617,26 +611,6 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, int ret; /* - * If we were from a system call, check for system call restarting... - */ - if (syscall) { - switch (regs->ARM_r0) { - case -ERESTART_RESTARTBLOCK: - case -ERESTARTNOHAND: - regs->ARM_r0 = -EINTR; - break; - case -ERESTARTSYS: - if (!(ka->sa.sa_flags & SA_RESTART)) { - regs->ARM_r0 = -EINTR; - break; - } - /* fallthrough */ - case -ERESTARTNOINTR: - setup_syscall_restart(regs); - } - } - - /* * translate the signal */ if (usig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap) @@ -685,6 +659,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, */ static void do_signal(struct pt_regs *regs, int syscall) { + unsigned int retval = 0, continue_addr = 0, restart_addr = 0; struct k_sigaction ka; siginfo_t info; int signr; @@ -698,18 +673,61 @@ static void do_signal(struct pt_regs *regs, int syscall) if (!user_mode(regs)) return; + /* + * If we were from a system call, check for system call restarting... + */ + if (syscall) { + continue_addr = regs->ARM_pc; + restart_addr = continue_addr - (thumb_mode(regs) ? 2 : 4); + retval = regs->ARM_r0; + + /* + * Prepare for system call restart. We do this here so that a + * debugger will see the already changed PSW. + */ + switch (retval) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + regs->ARM_r0 = regs->ARM_ORIG_r0; + regs->ARM_pc = restart_addr; + break; + case -ERESTART_RESTARTBLOCK: + regs->ARM_r0 = -EINTR; + break; + } + } + if (try_to_freeze()) goto no_signal; + /* + * Get the signal to deliver. When running under ptrace, at this + * point the debugger may change all our registers ... + */ signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { sigset_t *oldset; + /* + * Depending on the signal settings we may need to revert the + * decision to restart the system call. But skip this if a + * debugger has chosen to restart at a different PC. + */ + if (regs->ARM_pc == restart_addr) { + if (retval == -ERESTARTNOHAND + || (retval == -ERESTARTSYS + && !(ka.sa.sa_flags & SA_RESTART))) { + regs->ARM_r0 = -EINTR; + regs->ARM_pc = continue_addr; + } + } + if (test_thread_flag(TIF_RESTORE_SIGMASK)) oldset = ¤t->saved_sigmask; else oldset = ¤t->blocked; - if (handle_signal(signr, &ka, &info, oldset, regs, syscall) == 0) { + if (handle_signal(signr, &ka, &info, oldset, regs) == 0) { /* * A signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, @@ -723,11 +741,14 @@ static void do_signal(struct pt_regs *regs, int syscall) } no_signal: - /* - * No signal to deliver to the process - restart the syscall. - */ if (syscall) { - if (regs->ARM_r0 == -ERESTART_RESTARTBLOCK) { + /* + * Handle restarting a different system call. As above, + * if a debugger has chosen to restart at a different PC, + * ignore the restart. + */ + if (retval == -ERESTART_RESTARTBLOCK + && regs->ARM_pc == continue_addr) { if (thumb_mode(regs)) { regs->ARM_r7 = __NR_restart_syscall - __NR_SYSCALL_BASE; regs->ARM_pc -= 2; @@ -750,11 +771,6 @@ static void do_signal(struct pt_regs *regs, int syscall) #endif } } - if (regs->ARM_r0 == -ERESTARTNOHAND || - regs->ARM_r0 == -ERESTARTSYS || - regs->ARM_r0 == -ERESTARTNOINTR) { - setup_syscall_restart(regs); - } /* If there's no signal to deliver, we just put the saved sigmask * back. diff --git a/arch/arm/mach-omap2/clkt34xx_dpll3m2.c b/arch/arm/mach-omap2/clkt34xx_dpll3m2.c index b2b1e37..d6e34dd 100644 --- a/arch/arm/mach-omap2/clkt34xx_dpll3m2.c +++ b/arch/arm/mach-omap2/clkt34xx_dpll3m2.c @@ -115,6 +115,7 @@ int omap3_core_dpll_m2_set_rate(struct clk *clk, unsigned long rate) sdrc_cs0->rfr_ctrl, sdrc_cs0->actim_ctrla, sdrc_cs0->actim_ctrlb, sdrc_cs0->mr, 0, 0, 0, 0); + clk->rate = rate; return 0; } diff --git a/arch/arm/mach-realview/include/mach/barriers.h b/arch/arm/mach-realview/include/mach/barriers.h index 0c5d749..9a73219 100644 --- a/arch/arm/mach-realview/include/mach/barriers.h +++ b/arch/arm/mach-realview/include/mach/barriers.h @@ -4,5 +4,5 @@ * operation to deadlock the system. */ #define mb() dsb() -#define rmb() dmb() +#define rmb() dsb() #define wmb() mb() diff --git a/arch/arm/mach-tegra/include/mach/barriers.h b/arch/arm/mach-tegra/include/mach/barriers.h index cc11517..425b42e 100644 --- a/arch/arm/mach-tegra/include/mach/barriers.h +++ b/arch/arm/mach-tegra/include/mach/barriers.h @@ -23,7 +23,7 @@ #include <asm/outercache.h> -#define rmb() dmb() +#define rmb() dsb() #define wmb() do { dsb(); outer_sync(); } while (0) #define mb() wmb() diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index e5f6fc4..e591513 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -392,7 +392,7 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn) * Convert start_pfn/end_pfn to a struct page pointer. */ start_pg = pfn_to_page(start_pfn - 1) + 1; - end_pg = pfn_to_page(end_pfn); + end_pg = pfn_to_page(end_pfn - 1) + 1; /* * Convert to physical addresses, and @@ -426,6 +426,14 @@ static void __init free_unused_memmap(struct meminfo *mi) bank_start = bank_pfn_start(bank); +#ifdef CONFIG_SPARSEMEM + /* + * Take care not to free memmap entries that don't exist + * due to SPARSEMEM sections which aren't present. + */ + bank_start = min(bank_start, + ALIGN(prev_bank_end, PAGES_PER_SECTION)); +#endif /* * If we had a previous bank, and there is a space * between the current bank and the previous, free it. @@ -440,6 +448,12 @@ static void __init free_unused_memmap(struct meminfo *mi) */ prev_bank_end = ALIGN(bank_pfn_end(bank), MAX_ORDER_NR_PAGES); } + +#ifdef CONFIG_SPARSEMEM + if (!IS_ALIGNED(prev_bank_end, PAGES_PER_SECTION)) + free_memmap(prev_bank_end, + ALIGN(prev_bank_end, PAGES_PER_SECTION)); +#endif } static void __init free_highpages(void) diff --git a/arch/arm/plat-omap/iommu.c b/arch/arm/plat-omap/iommu.c index 8a51fd5..34fc31e 100644 --- a/arch/arm/plat-omap/iommu.c +++ b/arch/arm/plat-omap/iommu.c @@ -793,6 +793,8 @@ static irqreturn_t iommu_fault_handler(int irq, void *data) clk_enable(obj->clk); errs = iommu_report_fault(obj, &da); clk_disable(obj->clk); + if (errs == 0) + return IRQ_HANDLED; /* Fault callback or TLB/PTE Dynamic loading */ if (obj->isr && !obj->isr(obj, da, errs, obj->isr_priv)) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 8e256cc..351c80f 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -997,9 +997,6 @@ config IRQ_GT641XX config IRQ_GIC bool -config IRQ_CPU_OCTEON - bool - config MIPS_BOARDS_GEN bool @@ -1359,8 +1356,6 @@ config CPU_SB1 config CPU_CAVIUM_OCTEON bool "Cavium Octeon processor" depends on SYS_HAS_CPU_CAVIUM_OCTEON - select IRQ_CPU - select IRQ_CPU_OCTEON select CPU_HAS_PREFETCH select CPU_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_SMP diff --git a/arch/mips/alchemy/devboards/db1x00/board_setup.c b/arch/mips/alchemy/devboards/db1x00/board_setup.c index 05f120f..5c956fe 100644 --- a/arch/mips/alchemy/devboards/db1x00/board_setup.c +++ b/arch/mips/alchemy/devboards/db1x00/board_setup.c @@ -127,13 +127,10 @@ const char *get_system_type(void) void __init board_setup(void) { unsigned long bcsr1, bcsr2; - u32 pin_func; bcsr1 = DB1000_BCSR_PHYS_ADDR; bcsr2 = DB1000_BCSR_PHYS_ADDR + DB1000_BCSR_HEXLED_OFS; - pin_func = 0; - #ifdef CONFIG_MIPS_DB1000 printk(KERN_INFO "AMD Alchemy Au1000/Db1000 Board\n"); #endif @@ -164,12 +161,16 @@ void __init board_setup(void) /* Not valid for Au1550 */ #if defined(CONFIG_IRDA) && \ (defined(CONFIG_SOC_AU1000) || defined(CONFIG_SOC_AU1100)) - /* Set IRFIRSEL instead of GPIO15 */ - pin_func = au_readl(SYS_PINFUNC) | SYS_PF_IRF; - au_writel(pin_func, SYS_PINFUNC); - /* Power off until the driver is in use */ - bcsr_mod(BCSR_RESETS, BCSR_RESETS_IRDA_MODE_MASK, - BCSR_RESETS_IRDA_MODE_OFF); + { + u32 pin_func; + + /* Set IRFIRSEL instead of GPIO15 */ + pin_func = au_readl(SYS_PINFUNC) | SYS_PF_IRF; + au_writel(pin_func, SYS_PINFUNC); + /* Power off until the driver is in use */ + bcsr_mod(BCSR_RESETS, BCSR_RESETS_IRDA_MODE_MASK, + BCSR_RESETS_IRDA_MODE_OFF); + } #endif bcsr_write(BCSR_PCMCIA, 0); /* turn off PCMCIA power */ @@ -177,31 +178,35 @@ void __init board_setup(void) alchemy_gpio1_input_enable(); #ifdef CONFIG_MIPS_MIRAGE - /* GPIO[20] is output */ - alchemy_gpio_direction_output(20, 0); + { + u32 pin_func; - /* Set GPIO[210:208] instead of SSI_0 */ - pin_func = au_readl(SYS_PINFUNC) | SYS_PF_S0; + /* GPIO[20] is output */ + alchemy_gpio_direction_output(20, 0); - /* Set GPIO[215:211] for LEDs */ - pin_func |= 5 << 2; + /* Set GPIO[210:208] instead of SSI_0 */ + pin_func = au_readl(SYS_PINFUNC) | SYS_PF_S0; - /* Set GPIO[214:213] for more LEDs */ - pin_func |= 5 << 12; + /* Set GPIO[215:211] for LEDs */ + pin_func |= 5 << 2; - /* Set GPIO[207:200] instead of PCMCIA/LCD */ - pin_func |= SYS_PF_LCD | SYS_PF_PC; - au_writel(pin_func, SYS_PINFUNC); + /* Set GPIO[214:213] for more LEDs */ + pin_func |= 5 << 12; - /* - * Enable speaker amplifier. This should - * be part of the audio driver. - */ - alchemy_gpio_direction_output(209, 1); + /* Set GPIO[207:200] instead of PCMCIA/LCD */ + pin_func |= SYS_PF_LCD | SYS_PF_PC; + au_writel(pin_func, SYS_PINFUNC); - pm_power_off = mirage_power_off; - _machine_halt = mirage_power_off; - _machine_restart = (void(*)(char *))mips_softreset; + /* + * Enable speaker amplifier. This should + * be part of the audio driver. + */ + alchemy_gpio_direction_output(209, 1); + + pm_power_off = mirage_power_off; + _machine_halt = mirage_power_off; + _machine_restart = (void(*)(char *))mips_softreset; + } #endif #ifdef CONFIG_MIPS_BOSPORUS diff --git a/arch/mips/alchemy/xxs1500/init.c b/arch/mips/alchemy/xxs1500/init.c index 15125c2..34a90a4 100644 --- a/arch/mips/alchemy/xxs1500/init.c +++ b/arch/mips/alchemy/xxs1500/init.c @@ -51,10 +51,9 @@ void __init prom_init(void) prom_init_cmdline(); memsize_str = prom_getenv("memsize"); - if (!memsize_str) + if (!memsize_str || strict_strtoul(memsize_str, 0, &memsize)) memsize = 0x04000000; - else - strict_strtoul(memsize_str, 0, &memsize); + add_memory_region(0, memsize, BOOT_MEM_RAM); } diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c index 425dfa5..bb571bc 100644 --- a/arch/mips/ar7/gpio.c +++ b/arch/mips/ar7/gpio.c @@ -325,9 +325,7 @@ int __init ar7_gpio_init(void) size = 0x1f; } - gpch->regs = ioremap_nocache(AR7_REGS_GPIO, - AR7_REGS_GPIO + 0x10); - + gpch->regs = ioremap_nocache(AR7_REGS_GPIO, size); if (!gpch->regs) { printk(KERN_ERR "%s: failed to ioremap regs\n", gpch->chip.label); diff --git a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c index 88c9d963..9a62436 100644 --- a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c +++ b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c @@ -16,8 +16,8 @@ int main(int argc, char *argv[]) { + unsigned long long vmlinux_size, vmlinux_load_addr, vmlinuz_load_addr; struct stat sb; - uint64_t vmlinux_size, vmlinux_load_addr, vmlinuz_load_addr; if (argc != 3) { fprintf(stderr, "Usage: %s <pathname> <vmlinux_load_addr>\n", diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig index caae228..cad555e 100644 --- a/arch/mips/cavium-octeon/Kconfig +++ b/arch/mips/cavium-octeon/Kconfig @@ -1,11 +1,7 @@ -config CAVIUM_OCTEON_SPECIFIC_OPTIONS - bool "Enable Octeon specific options" - depends on CPU_CAVIUM_OCTEON - default "y" +if CPU_CAVIUM_OCTEON config CAVIUM_CN63XXP1 bool "Enable CN63XXP1 errata worarounds" - depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS default "n" help The CN63XXP1 chip requires build time workarounds to @@ -16,7 +12,6 @@ config CAVIUM_CN63XXP1 config CAVIUM_OCTEON_2ND_KERNEL bool "Build the kernel to be used as a 2nd kernel on the same chip" - depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS default "n" help This option configures this kernel to be linked at a different @@ -26,7 +21,6 @@ config CAVIUM_OCTEON_2ND_KERNEL config CAVIUM_OCTEON_HW_FIX_UNALIGNED bool "Enable hardware fixups of unaligned loads and stores" - depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS default "y" help Configure the Octeon hardware to automatically fix unaligned loads @@ -38,7 +32,6 @@ config CAVIUM_OCTEON_HW_FIX_UNALIGNED config CAVIUM_OCTEON_CVMSEG_SIZE int "Number of L1 cache lines reserved for CVMSEG memory" - depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS range 0 54 default 1 help @@ -50,7 +43,6 @@ config CAVIUM_OCTEON_CVMSEG_SIZE config CAVIUM_OCTEON_LOCK_L2 bool "Lock often used kernel code in the L2" - depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS default "y" help Enable locking parts of the kernel into the L2 cache. @@ -93,7 +85,6 @@ config CAVIUM_OCTEON_LOCK_L2_MEMCPY config ARCH_SPARSEMEM_ENABLE def_bool y select SPARSEMEM_STATIC - depends on CPU_CAVIUM_OCTEON config CAVIUM_OCTEON_HELPER def_bool y @@ -107,6 +98,8 @@ config NEED_SG_DMA_LENGTH config SWIOTLB def_bool y - depends on CPU_CAVIUM_OCTEON select IOMMU_HELPER select NEED_SG_DMA_LENGTH + + +endif # CPU_CAVIUM_OCTEON diff --git a/arch/mips/include/asm/cache.h b/arch/mips/include/asm/cache.h index 650ac9b..b4db69f 100644 --- a/arch/mips/include/asm/cache.h +++ b/arch/mips/include/asm/cache.h @@ -17,6 +17,6 @@ #define SMP_CACHE_SHIFT L1_CACHE_SHIFT #define SMP_CACHE_BYTES L1_CACHE_BYTES -#define __read_mostly __attribute__((__section__(".data.read_mostly"))) +#define __read_mostly __attribute__((__section__(".data..read_mostly"))) #endif /* _ASM_CACHE_H */ diff --git a/arch/mips/include/asm/cevt-r4k.h b/arch/mips/include/asm/cevt-r4k.h index fa4328f..65f9bdd 100644 --- a/arch/mips/include/asm/cevt-r4k.h +++ b/arch/mips/include/asm/cevt-r4k.h @@ -14,6 +14,9 @@ #ifndef __ASM_CEVT_R4K_H #define __ASM_CEVT_R4K_H +#include <linux/clockchips.h> +#include <asm/time.h> + DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device); void mips_event_handler(struct clock_event_device *dev); diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index 655f849..7aa37dd 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h @@ -5,7 +5,9 @@ #include <asm/cache.h> #include <asm-generic/dma-coherent.h> +#ifndef CONFIG_SGI_IP27 /* Kludge to fix 2.6.39 build for IP27 */ #include <dma-coherence.h> +#endif extern struct dma_map_ops *mips_dma_map_ops; diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h index f5e85601..c565b7c 100644 --- a/arch/mips/include/asm/hugetlb.h +++ b/arch/mips/include/asm/hugetlb.h @@ -70,6 +70,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { + flush_tlb_mm(vma->vm_mm); } static inline int huge_pte_none(pte_t pte) diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h b/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h index 32978d3..ed72e6a 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h @@ -88,7 +88,7 @@ struct bcm_tag { char kernel_crc[CRC_LEN]; /* 228-235: Unused at present */ char reserved1[8]; - /* 236-239: CRC32 of header excluding tagVersion */ + /* 236-239: CRC32 of header excluding last 20 bytes */ char header_crc[CRC_LEN]; /* 240-255: Unused at present */ char reserved2[16]; diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index 9ce9f64..2d8e447 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -211,7 +211,7 @@ EXPORT_SYMBOL(vdma_free); */ int vdma_remap(unsigned long laddr, unsigned long paddr, unsigned long size) { - int first, pages, npages; + int first, pages; if (laddr > 0xffffff) { if (vdma_debug) @@ -228,8 +228,7 @@ int vdma_remap(unsigned long laddr, unsigned long paddr, unsigned long size) return -EINVAL; /* invalid physical address */ } - npages = pages = - (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1; + pages = (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1; first = laddr >> 12; if (vdma_debug) printk("vdma_remap: first=%x, pages=%x\n", first, pages); diff --git a/arch/mips/jz4740/dma.c b/arch/mips/jz4740/dma.c index 5ebe75a..d7feb89 100644 --- a/arch/mips/jz4740/dma.c +++ b/arch/mips/jz4740/dma.c @@ -242,9 +242,7 @@ EXPORT_SYMBOL_GPL(jz4740_dma_get_residue); static void jz4740_dma_chan_irq(struct jz4740_dma_chan *dma) { - uint32_t status; - - status = jz4740_dma_read(JZ_REG_DMA_STATUS_CTRL(dma->id)); + (void) jz4740_dma_read(JZ_REG_DMA_STATUS_CTRL(dma->id)); jz4740_dma_write_mask(JZ_REG_DMA_STATUS_CTRL(dma->id), 0, JZ_DMA_STATUS_CTRL_ENABLE | JZ_DMA_STATUS_CTRL_TRANSFER_DONE); diff --git a/arch/mips/jz4740/time.c b/arch/mips/jz4740/time.c index fe01678..eaa853a 100644 --- a/arch/mips/jz4740/time.c +++ b/arch/mips/jz4740/time.c @@ -89,7 +89,7 @@ static int jz4740_clockevent_set_next(unsigned long evt, static struct clock_event_device jz4740_clockevent = { .name = "jz4740-timer", - .features = CLOCK_EVT_FEAT_PERIODIC, + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_next_event = jz4740_clockevent_set_next, .set_mode = jz4740_clockevent_set_mode, .rating = 200, diff --git a/arch/mips/jz4740/timer.c b/arch/mips/jz4740/timer.c index b2c0151..654d5c3 100644 --- a/arch/mips/jz4740/timer.c +++ b/arch/mips/jz4740/timer.c @@ -27,11 +27,13 @@ void jz4740_timer_enable_watchdog(void) { writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_CLEAR); } +EXPORT_SYMBOL_GPL(jz4740_timer_enable_watchdog); void jz4740_timer_disable_watchdog(void) { writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_SET); } +EXPORT_SYMBOL_GPL(jz4740_timer_disable_watchdog); void __init jz4740_timer_init(void) { diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 94ca2b0..feb8021 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -23,6 +23,7 @@ #define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */ #define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */ +#define JUMP_RANGE_MASK ((1UL << 28) - 1) #define INSN_NOP 0x00000000 /* nop */ #define INSN_JAL(addr) \ @@ -44,12 +45,12 @@ static inline void ftrace_dyn_arch_init_insns(void) /* jal (ftrace_caller + 8), jump over the first two instruction */ buf = (u32 *)&insn_jal_ftrace_caller; - uasm_i_jal(&buf, (FTRACE_ADDR + 8)); + uasm_i_jal(&buf, (FTRACE_ADDR + 8) & JUMP_RANGE_MASK); #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* j ftrace_graph_caller */ buf = (u32 *)&insn_j_ftrace_graph_caller; - uasm_i_j(&buf, (unsigned long)ftrace_graph_caller); + uasm_i_j(&buf, (unsigned long)ftrace_graph_caller & JUMP_RANGE_MASK); #endif } diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index d21c388..584e6b5 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -540,8 +540,8 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) secure_computing(regs->regs[2]); if (unlikely(current->audit_context) && entryexit) - audit_syscall_exit(AUDITSC_RESULT(regs->regs[2]), - regs->regs[2]); + audit_syscall_exit(AUDITSC_RESULT(regs->regs[7]), + -regs->regs[2]); if (!(current->ptrace & PT_PTRACED)) goto out; diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 7f5468b..7f1377e 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -565,7 +565,7 @@ einval: li v0, -ENOSYS sys sys_ioprio_get 2 /* 4315 */ sys sys_utimensat 4 sys sys_signalfd 3 - sys sys_ni_syscall 0 + sys sys_ni_syscall 0 /* was timerfd */ sys sys_eventfd 1 sys sys_fallocate 6 /* 4320 */ sys sys_timerfd_create 2 diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index a2e1fcb..7c0ef7f 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -404,7 +404,7 @@ sys_call_table: PTR sys_ioprio_get PTR sys_utimensat /* 5275 */ PTR sys_signalfd - PTR sys_ni_syscall + PTR sys_ni_syscall /* was timerfd */ PTR sys_eventfd PTR sys_fallocate PTR sys_timerfd_create /* 5280 */ diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index b2c7624..de6c556 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -403,7 +403,7 @@ EXPORT(sysn32_call_table) PTR sys_ioprio_get PTR compat_sys_utimensat PTR compat_sys_signalfd /* 6280 */ - PTR sys_ni_syscall + PTR sys_ni_syscall /* was timerfd */ PTR sys_eventfd PTR sys_fallocate PTR sys_timerfd_create diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 049a9c8..b0541dd 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -522,7 +522,7 @@ sys_call_table: PTR sys_ioprio_get /* 4315 */ PTR compat_sys_utimensat PTR compat_sys_signalfd - PTR sys_ni_syscall + PTR sys_ni_syscall /* was timerfd */ PTR sys_eventfd PTR sys32_fallocate /* 4320 */ PTR sys_timerfd_create diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 71350f7..e9b3af2 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -374,7 +374,8 @@ void __noreturn die(const char *str, struct pt_regs *regs) unsigned long dvpret = dvpe(); #endif /* CONFIG_MIPS_MT_SMTC */ - notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV); + if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP) + sig = 0; console_verbose(); spin_lock_irq(&die_lock); @@ -383,9 +384,6 @@ void __noreturn die(const char *str, struct pt_regs *regs) mips_mt_regdump(dvpret); #endif /* CONFIG_MIPS_MT_SMTC */ - if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP) - sig = 0; - printk("%s[#%d]:\n", str, ++die_counter); show_registers(regs); add_taint(TAINT_DIE); diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 832afbb..e4b0b0b 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -74,6 +74,7 @@ SECTIONS INIT_TASK_DATA(PAGE_SIZE) NOSAVE_DATA CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) + READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) DATA_DATA CONSTRUCTORS } diff --git a/arch/mips/loongson/common/env.c b/arch/mips/loongson/common/env.c index 11b193f..d93830a 100644 --- a/arch/mips/loongson/common/env.c +++ b/arch/mips/loongson/common/env.c @@ -29,9 +29,10 @@ unsigned long memsize, highmemsize; #define parse_even_earlier(res, option, p) \ do { \ - int ret; \ + unsigned int tmp __maybe_unused; \ + \ if (strncmp(option, (char *)p, strlen(option)) == 0) \ - ret = strict_strtol((char *)p + strlen(option"="), 10, &res); \ + tmp = strict_strtol((char *)p + strlen(option"="), 10, &res); \ } while (0) void __init prom_init_env(void) diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index b4923a7..71bddf8 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1075,7 +1075,6 @@ static int __cpuinit probe_scache(void) unsigned long flags, addr, begin, end, pow2; unsigned int config = read_c0_config(); struct cpuinfo_mips *c = ¤t_cpu_data; - int tmp; if (config & CONF_SC) return 0; @@ -1108,7 +1107,6 @@ static int __cpuinit probe_scache(void) /* Now search for the wrap around point. */ pow2 = (128 * 1024); - tmp = 0; for (addr = begin + (128 * 1024); addr < end; addr = begin + pow2) { cache_op(Index_Load_Tag_SD, addr); __asm__ __volatile__("nop; nop; nop; nop;"); /* hazard... */ diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 5ef294f..f5734c2 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -1151,8 +1151,8 @@ static void __cpuinit build_r4000_tlb_refill_handler(void) struct uasm_reloc *r = relocs; u32 *f; unsigned int final_len; - struct mips_huge_tlb_info htlb_info; - enum vmalloc64_mode vmalloc_mode; + struct mips_huge_tlb_info htlb_info __maybe_unused; + enum vmalloc64_mode vmalloc_mode __maybe_unused; memset(tlb_handler, 0, sizeof(tlb_handler)); memset(labels, 0, sizeof(labels)); diff --git a/arch/mips/mti-malta/malta-init.c b/arch/mips/mti-malta/malta-init.c index 414f0c9..31180c3 100644 --- a/arch/mips/mti-malta/malta-init.c +++ b/arch/mips/mti-malta/malta-init.c @@ -193,8 +193,6 @@ extern struct plat_smp_ops msmtc_smp_ops; void __init prom_init(void) { - int result; - prom_argc = fw_arg0; _prom_argv = (int *) fw_arg1; _prom_envp = (int *) fw_arg2; @@ -360,20 +358,14 @@ void __init prom_init(void) #ifdef CONFIG_SERIAL_8250_CONSOLE console_config(); #endif - /* Early detection of CMP support */ - result = gcmp_probe(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ); - #ifdef CONFIG_MIPS_CMP - if (result) + /* Early detection of CMP support */ + if (gcmp_probe(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ)) register_smp_ops(&cmp_smp_ops); + else #endif #ifdef CONFIG_MIPS_MT_SMP -#ifdef CONFIG_MIPS_CMP - if (!result) register_smp_ops(&vsmp_smp_ops); -#else - register_smp_ops(&vsmp_smp_ops); -#endif #endif #ifdef CONFIG_MIPS_MT_SMTC register_smp_ops(&msmtc_smp_ops); diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c index 9027061..e85c977 100644 --- a/arch/mips/mti-malta/malta-int.c +++ b/arch/mips/mti-malta/malta-int.c @@ -56,7 +56,6 @@ static DEFINE_RAW_SPINLOCK(mips_irq_lock); static inline int mips_pcibios_iack(void) { int irq; - u32 dummy; /* * Determine highest priority pending interrupt by performing @@ -83,7 +82,7 @@ static inline int mips_pcibios_iack(void) BONITO_PCIMAP_CFG = 0x20000; /* Flush Bonito register block */ - dummy = BONITO_PCIMAP_CFG; + (void) BONITO_PCIMAP_CFG; iob(); /* sync */ irq = __raw_readl((u32 *)_pcictrl_bonito_pcicfg); diff --git a/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c b/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c index f9b9dcd..98fd009 100644 --- a/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c +++ b/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c @@ -97,7 +97,7 @@ static int msp_per_irq_set_affinity(struct irq_data *d, static struct irq_chip msp_per_irq_controller = { .name = "MSP_PER", - .irq_enable = unmask_per_irq. + .irq_enable = unmask_per_irq, .irq_disable = mask_per_irq, .irq_ack = msp_per_irq_ack, #ifdef CONFIG_SMP diff --git a/arch/mips/power/hibernate.S b/arch/mips/power/hibernate.S index dbb5c7b..f8a751c 100644 --- a/arch/mips/power/hibernate.S +++ b/arch/mips/power/hibernate.S @@ -35,7 +35,7 @@ LEAF(swsusp_arch_resume) 0: PTR_L t1, PBE_ADDRESS(t0) /* source */ PTR_L t2, PBE_ORIG_ADDRESS(t0) /* destination */ - PTR_ADDIU t3, t1, PAGE_SIZE + PTR_ADDU t3, t1, PAGE_SIZE 1: REG_L t8, (t1) REG_S t8, (t2) diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c index 37de05d..6c47dfe 100644 --- a/arch/mips/rb532/gpio.c +++ b/arch/mips/rb532/gpio.c @@ -185,7 +185,7 @@ int __init rb532_gpio_init(void) struct resource *r; r = rb532_gpio_reg0_res; - rb532_gpio_chip->regbase = ioremap_nocache(r->start, r->end - r->start); + rb532_gpio_chip->regbase = ioremap_nocache(r->start, resource_size(r)); if (!rb532_gpio_chip->regbase) { printk(KERN_ERR "rb532: cannot remap GPIO register 0\n"); diff --git a/arch/mips/sgi-ip22/ip22-platform.c b/arch/mips/sgi-ip22/ip22-platform.c index deddbf0..698904d 100644 --- a/arch/mips/sgi-ip22/ip22-platform.c +++ b/arch/mips/sgi-ip22/ip22-platform.c @@ -132,7 +132,7 @@ static struct platform_device eth1_device = { */ static int __init sgiseeq_devinit(void) { - unsigned int tmp; + unsigned int pbdma __maybe_unused; int res, i; eth0_pd.hpc = hpc3c0; @@ -151,7 +151,7 @@ static int __init sgiseeq_devinit(void) /* Second HPC is missing? */ if (ip22_is_fullhouse() || - get_dbe(tmp, (unsigned int *)&hpc3c1->pbdma[1])) + get_dbe(pbdma, (unsigned int *)&hpc3c1->pbdma[1])) return 0; sgimc->giopar |= SGIMC_GIOPAR_MASTEREXP1 | SGIMC_GIOPAR_EXP164 | diff --git a/arch/mips/sgi-ip22/ip22-time.c b/arch/mips/sgi-ip22/ip22-time.c index 603fc91..1a94c98 100644 --- a/arch/mips/sgi-ip22/ip22-time.c +++ b/arch/mips/sgi-ip22/ip22-time.c @@ -32,7 +32,7 @@ static unsigned long dosample(void) { u32 ct0, ct1; - u8 msb, lsb; + u8 msb; /* Start the counter. */ sgint->tcword = (SGINT_TCWORD_CNT2 | SGINT_TCWORD_CALL | @@ -46,7 +46,7 @@ static unsigned long dosample(void) /* Latch and spin until top byte of counter2 is zero */ do { writeb(SGINT_TCWORD_CNT2 | SGINT_TCWORD_CLAT, &sgint->tcword); - lsb = readb(&sgint->tcnt2); + (void) readb(&sgint->tcnt2); msb = readb(&sgint->tcnt2); ct1 = read_c0_count(); } while (msb); diff --git a/arch/mips/sgi-ip27/ip27-hubio.c b/arch/mips/sgi-ip27/ip27-hubio.c index a1fa4ab..cd0d5b0 100644 --- a/arch/mips/sgi-ip27/ip27-hubio.c +++ b/arch/mips/sgi-ip27/ip27-hubio.c @@ -29,7 +29,6 @@ unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget, unsigned long xtalk_addr, size_t size) { nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode); - volatile hubreg_t junk; unsigned i; /* use small-window mapping if possible */ @@ -64,7 +63,7 @@ unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget, * after we write it. */ IIO_ITTE_PUT(nasid, i, HUB_PIO_MAP_TO_MEM, widget, xtalk_addr); - junk = HUB_L(IIO_ITTE_GET(nasid, i)); + (void) HUB_L(IIO_ITTE_GET(nasid, i)); return NODE_BWIN_BASE(nasid, widget) + (xtalk_addr % BWIN_SIZE); } diff --git a/arch/mips/sgi-ip27/ip27-klnuma.c b/arch/mips/sgi-ip27/ip27-klnuma.c index c3d30a8..1d1919a 100644 --- a/arch/mips/sgi-ip27/ip27-klnuma.c +++ b/arch/mips/sgi-ip27/ip27-klnuma.c @@ -54,11 +54,8 @@ void __init setup_replication_mask(void) static __init void set_ktext_source(nasid_t client_nasid, nasid_t server_nasid) { - cnodeid_t client_cnode; kern_vars_t *kvp; - client_cnode = NASID_TO_COMPACT_NODEID(client_nasid); - kvp = &hub_data(client_nasid)->kern_vars; KERN_VARS_ADDR(client_nasid) = (unsigned long)kvp; diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c index c76151b..0904d4d 100644 --- a/arch/mips/sni/time.c +++ b/arch/mips/sni/time.c @@ -95,7 +95,7 @@ static void __init sni_a20r_timer_setup(void) static __init unsigned long dosample(void) { u32 ct0, ct1; - volatile u8 msb, lsb; + volatile u8 msb; /* Start the counter. */ outb_p(0x34, 0x43); @@ -108,7 +108,7 @@ static __init unsigned long dosample(void) /* Latch and spin until top byte of counter0 is zero */ do { outb(0x00, 0x43); - lsb = inb(0x40); + (void) inb(0x40); msb = inb(0x40); ct1 = read_c0_count(); } while (msb); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 55613e3..a6ae1cf 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -933,12 +933,16 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, if (data && !(data & DABR_TRANSLATION)) return -EIO; #ifdef CONFIG_HAVE_HW_BREAKPOINT + if (ptrace_get_breakpoints(task) < 0) + return -ESRCH; + bp = thread->ptrace_bps[0]; if ((!data) || !(data & (DABR_DATA_WRITE | DABR_DATA_READ))) { if (bp) { unregister_hw_breakpoint(bp); thread->ptrace_bps[0] = NULL; } + ptrace_put_breakpoints(task); return 0; } if (bp) { @@ -948,9 +952,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, (DABR_DATA_WRITE | DABR_DATA_READ), &attr.bp_type); ret = modify_user_hw_breakpoint(bp, &attr); - if (ret) + if (ret) { + ptrace_put_breakpoints(task); return ret; + } thread->ptrace_bps[0] = bp; + ptrace_put_breakpoints(task); thread->dabr = data; return 0; } @@ -965,9 +972,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, ptrace_triggered, task); if (IS_ERR(bp)) { thread->ptrace_bps[0] = NULL; + ptrace_put_breakpoints(task); return PTR_ERR(bp); } + ptrace_put_breakpoints(task); + #endif /* CONFIG_HAVE_HW_BREAKPOINT */ /* Move contents to the DABR register */ diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 1882729..104faa8 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -318,17 +318,20 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = { .end = mpc83xx_suspend_end, }; +static struct of_device_id pmc_match[]; static int pmc_probe(struct platform_device *ofdev) { + const struct of_device_id *match; struct device_node *np = ofdev->dev.of_node; struct resource res; struct pmc_type *type; int ret = 0; - if (!ofdev->dev.of_match) + match = of_match_device(pmc_match, &ofdev->dev); + if (!match) return -EINVAL; - type = ofdev->dev.of_match->data; + type = match->data; if (!of_device_is_available(np)) return -ENODEV; diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index d5679dc..01cd2f0 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -304,8 +304,10 @@ static int __devinit fsl_msi_setup_hwirq(struct fsl_msi *msi, return 0; } +static const struct of_device_id fsl_of_msi_ids[]; static int __devinit fsl_of_msi_probe(struct platform_device *dev) { + const struct of_device_id *match; struct fsl_msi *msi; struct resource res; int err, i, j, irq_index, count; @@ -316,9 +318,10 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev) u32 offset; static const u32 all_avail[] = { 0, NR_MSI_IRQS }; - if (!dev->dev.of_match) + match = of_match_device(fsl_of_msi_ids, &dev->dev); + if (!match) return -EINVAL; - features = dev->dev.of_match->data; + features = match->data; printk(KERN_DEBUG "Setting up Freescale MSI support\n"); diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 72b2e2f..7e91c58 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -9,9 +9,22 @@ #define _ASM_S390_DIAG_H /* - * Diagnose 10: Release pages + * Diagnose 10: Release page range */ -extern void diag10(unsigned long addr); +static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn) +{ + unsigned long start_addr, end_addr; + + start_addr = start_pfn << PAGE_SHIFT; + end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT; + + asm volatile( + "0: diag %0,%1,0x10\n" + "1:\n" + EX_TABLE(0b, 1b) + EX_TABLE(1b, 1b) + : : "a" (start_addr), "a" (end_addr)); +} /* * Diagnose 14: Input spool file manipulation diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index a6f0e7c..8c277ca 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -23,7 +23,7 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_64BIT mm->context.asce_bits |= _ASCE_TYPE_REGION3; #endif - if (current->mm->context.alloc_pgste) { + if (current->mm && current->mm->context.alloc_pgste) { /* * alloc_pgste indicates, that any NEW context will be created * with extended page tables. The old context is unchanged. The diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index c032d11..8237fc0 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -9,27 +9,6 @@ #include <asm/diag.h> /* - * Diagnose 10: Release pages - */ -void diag10(unsigned long addr) -{ - if (addr >= 0x7ff00000) - return; - asm volatile( -#ifdef CONFIG_64BIT - " sam31\n" - " diag %0,%0,0x10\n" - "0: sam64\n" -#else - " diag %0,%0,0x10\n" - "0:\n" -#endif - EX_TABLE(0b, 0b) - : : "a" (addr)); -} -EXPORT_SYMBOL(diag10); - -/* * Diagnose 14: Input spool file manipulation */ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index c83726c..3d4a78f 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -672,6 +672,7 @@ static struct insn opcode_b2[] = { { "rp", 0x77, INSTR_S_RD }, { "stcke", 0x78, INSTR_S_RD }, { "sacf", 0x79, INSTR_S_RD }, + { "spp", 0x80, INSTR_S_RD }, { "stsi", 0x7d, INSTR_S_RD }, { "srnm", 0x99, INSTR_S_RD }, { "stfpc", 0x9c, INSTR_S_RD }, diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 648f642..1b67fc6 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -836,7 +836,7 @@ restart_base: stosm __SF_EMPTY(%r15),0x04 # now we can turn dat on basr %r14,0 l %r14,restart_addr-.(%r14) - br %r14 # branch to start_secondary + basr %r14,%r14 # branch to start_secondary restart_addr: .long start_secondary .align 8 diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 9d3603d..9fd8645 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -841,7 +841,7 @@ restart_base: mvc __LC_SYSTEM_TIMER(8),__TI_system_timer(%r1) xc __LC_STEAL_TIMER(8),__LC_STEAL_TIMER stosm __SF_EMPTY(%r15),0x04 # now we can turn dat on - jg start_secondary + brasl %r14,start_secondary .align 8 restart_vtime: .long 0x7fffffff,0xffffffff diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index c66ffd8..1f1dba9 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -91,7 +91,7 @@ static long cmm_alloc_pages(long nr, long *counter, } else free_page((unsigned long) npa); } - diag10(addr); + diag10_range(addr >> PAGE_SHIFT, 1); pa->pages[pa->index++] = addr; (*counter)++; spin_unlock(&cmm_lock); diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 4952872..33cbd37 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -1021,20 +1021,14 @@ deallocate_exit: return rc; } -long hwsampler_query_min_interval(void) +unsigned long hwsampler_query_min_interval(void) { - if (min_sampler_rate) - return min_sampler_rate; - else - return -EINVAL; + return min_sampler_rate; } -long hwsampler_query_max_interval(void) +unsigned long hwsampler_query_max_interval(void) { - if (max_sampler_rate) - return max_sampler_rate; - else - return -EINVAL; + return max_sampler_rate; } unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu) diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h index 8c72b59..1912f3b 100644 --- a/arch/s390/oprofile/hwsampler.h +++ b/arch/s390/oprofile/hwsampler.h @@ -102,8 +102,8 @@ int hwsampler_setup(void); int hwsampler_shutdown(void); int hwsampler_allocate(unsigned long sdbt, unsigned long sdb); int hwsampler_deallocate(void); -long hwsampler_query_min_interval(void); -long hwsampler_query_max_interval(void); +unsigned long hwsampler_query_min_interval(void); +unsigned long hwsampler_query_max_interval(void); int hwsampler_start_all(unsigned long interval); int hwsampler_stop_all(void); int hwsampler_deactivate(unsigned int cpu); diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index c63d7e5..5995e9b 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -145,15 +145,11 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) * create hwsampler files only if hwsampler_setup() succeeds. */ oprofile_min_interval = hwsampler_query_min_interval(); - if (oprofile_min_interval < 0) { - oprofile_min_interval = 0; + if (oprofile_min_interval == 0) return -ENODEV; - } oprofile_max_interval = hwsampler_query_max_interval(); - if (oprofile_max_interval < 0) { - oprofile_max_interval = 0; + if (oprofile_max_interval == 0) return -ENODEV; - } if (oprofile_timer_init(ops)) return -ENODEV; diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 2130ca6..3d7b209 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -117,7 +117,11 @@ void user_enable_single_step(struct task_struct *child) set_tsk_thread_flag(child, TIF_SINGLESTEP); + if (ptrace_get_breakpoints(child) < 0) + return; + set_single_step(child, pc); + ptrace_put_breakpoints(child); } void user_disable_single_step(struct task_struct *child) diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c index f679c57..1e34f29 100644 --- a/arch/sparc/kernel/apc.c +++ b/arch/sparc/kernel/apc.c @@ -165,7 +165,7 @@ static int __devinit apc_probe(struct platform_device *op) return 0; } -static struct of_device_id __initdata apc_match[] = { +static struct of_device_id apc_match[] = { { .name = APC_OBPNAME, }, diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c index 948068a..d1840db 100644 --- a/arch/sparc/kernel/pci_sabre.c +++ b/arch/sparc/kernel/pci_sabre.c @@ -452,8 +452,10 @@ static void __devinit sabre_pbm_init(struct pci_pbm_info *pbm, sabre_scan_bus(pbm, &op->dev); } +static const struct of_device_id sabre_match[]; static int __devinit sabre_probe(struct platform_device *op) { + const struct of_device_id *match; const struct linux_prom64_registers *pr_regs; struct device_node *dp = op->dev.of_node; struct pci_pbm_info *pbm; @@ -463,7 +465,8 @@ static int __devinit sabre_probe(struct platform_device *op) const u32 *vdma; u64 clear_irq; - hummingbird_p = op->dev.of_match && (op->dev.of_match->data != NULL); + match = of_match_device(sabre_match, &op->dev); + hummingbird_p = match && (match->data != NULL); if (!hummingbird_p) { struct device_node *cpu_dp; diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c index fecfcb2..283fbc3 100644 --- a/arch/sparc/kernel/pci_schizo.c +++ b/arch/sparc/kernel/pci_schizo.c @@ -1458,11 +1458,15 @@ out_err: return err; } +static const struct of_device_id schizo_match[]; static int __devinit schizo_probe(struct platform_device *op) { - if (!op->dev.of_match) + const struct of_device_id *match; + + match = of_match_device(schizo_match, &op->dev); + if (!match) return -EINVAL; - return __schizo_init(op, (unsigned long) op->dev.of_match->data); + return __schizo_init(op, (unsigned long)match->data); } /* The ordering of this table is very important. Some Tomatillo diff --git a/arch/sparc/kernel/pmc.c b/arch/sparc/kernel/pmc.c index 93d7b44..6a585d3 100644 --- a/arch/sparc/kernel/pmc.c +++ b/arch/sparc/kernel/pmc.c @@ -69,7 +69,7 @@ static int __devinit pmc_probe(struct platform_device *op) return 0; } -static struct of_device_id __initdata pmc_match[] = { +static struct of_device_id pmc_match[] = { { .name = PMC_OBPNAME, }, diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index 91c10fb..850a136 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -53,6 +53,7 @@ cpumask_t smp_commenced_mask = CPU_MASK_NONE; void __cpuinit smp_store_cpu_info(int id) { int cpu_node; + int mid; cpu_data(id).udelay_val = loops_per_jiffy; @@ -60,10 +61,13 @@ void __cpuinit smp_store_cpu_info(int id) cpu_data(id).clock_tick = prom_getintdefault(cpu_node, "clock-frequency", 0); cpu_data(id).prom_node = cpu_node; - cpu_data(id).mid = cpu_get_hwmid(cpu_node); + mid = cpu_get_hwmid(cpu_node); - if (cpu_data(id).mid < 0) - panic("No MID found for CPU%d at node 0x%08d", id, cpu_node); + if (mid < 0) { + printk(KERN_NOTICE "No MID found for CPU%d at node 0x%08d", id, cpu_node); + mid = 0; + } + cpu_data(id).mid = mid; } void __init smp_cpus_done(unsigned int max_cpus) diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 4e23639..96046a4 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -168,7 +168,7 @@ static int __devinit clock_probe(struct platform_device *op) return 0; } -static struct of_device_id __initdata clock_match[] = { +static struct of_device_id clock_match[] = { { .name = "eeprom", }, diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S index 3632cb3..0084c33 100644 --- a/arch/sparc/lib/checksum_32.S +++ b/arch/sparc/lib/checksum_32.S @@ -289,10 +289,16 @@ cc_end_cruft: /* Also, handle the alignment code out of band. */ cc_dword_align: - cmp %g1, 6 - bl,a ccte + cmp %g1, 16 + bge 1f + srl %g1, 1, %o3 +2: cmp %o3, 0 + be,a ccte andcc %g1, 0xf, %o3 - andcc %o0, 0x1, %g0 + andcc %o3, %o0, %g0 ! Check %o0 only (%o1 has the same last 2 bits) + be,a 2b + srl %o3, 1, %o3 +1: andcc %o0, 0x1, %g0 bne ccslow andcc %o0, 0x2, %g0 be 1f diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c index 6ea7797..42827ca 100644 --- a/arch/um/os-Linux/util.c +++ b/arch/um/os-Linux/util.c @@ -5,6 +5,7 @@ #include <stdio.h> #include <stdlib.h> +#include <unistd.h> #include <errno.h> #include <signal.h> #include <string.h> @@ -75,6 +76,26 @@ void setup_hostinfo(char *buf, int len) host.release, host.version, host.machine); } +/* + * We cannot use glibc's abort(). It makes use of tgkill() which + * has no effect within UML's kernel threads. + * After that glibc would execute an invalid instruction to kill + * the calling process and UML crashes with SIGSEGV. + */ +static inline void __attribute__ ((noreturn)) uml_abort(void) +{ + sigset_t sig; + + fflush(NULL); + + if (!sigemptyset(&sig) && !sigaddset(&sig, SIGABRT)) + sigprocmask(SIG_UNBLOCK, &sig, 0); + + for (;;) + if (kill(getpid(), SIGABRT) < 0) + exit(127); +} + void os_dump_core(void) { int pid; @@ -116,5 +137,5 @@ void os_dump_core(void) while ((pid = waitpid(-1, NULL, WNOHANG | __WALL)) > 0) os_kill_ptraced_process(pid, 0); - abort(); + uml_abort(); } diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index d87988b..34595d5 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -78,6 +78,7 @@ #define APIC_DEST_LOGICAL 0x00800 #define APIC_DEST_PHYSICAL 0x00000 #define APIC_DM_FIXED 0x00000 +#define APIC_DM_FIXED_MASK 0x00700 #define APIC_DM_LOWEST 0x00100 #define APIC_DM_SMI 0x00200 #define APIC_DM_REMRD 0x00300 diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 7db7723..d56187c 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, /* Install a pte for a particular vaddr in kernel space. */ void set_pte_vaddr(unsigned long vaddr, pte_t pte); +extern void native_pagetable_reserve(u64 start, u64 end); #ifdef CONFIG_X86_32 extern void native_pagetable_setup_start(pgd_t *base); extern void native_pagetable_setup_done(pgd_t *base); diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 3e094af..130f1ee 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -94,6 +94,8 @@ /* after this # consecutive successes, bump up the throttle if it was lowered */ #define COMPLETE_THRESHOLD 5 +#define UV_LB_SUBNODEID 0x10 + /* * number of entries in the destination side payload queue */ @@ -124,7 +126,7 @@ * The distribution specification (32 bytes) is interpreted as a 256-bit * distribution vector. Adjacent bits correspond to consecutive even numbered * nodeIDs. The result of adding the index of a given bit to the 15-bit - * 'base_dest_nodeid' field of the header corresponds to the + * 'base_dest_nasid' field of the header corresponds to the * destination nodeID associated with that specified bit. */ struct bau_target_uvhubmask { @@ -176,7 +178,7 @@ struct bau_msg_payload { struct bau_msg_header { unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ /* bits 5:0 */ - unsigned int base_dest_nodeid:15; /* nasid of the */ + unsigned int base_dest_nasid:15; /* nasid of the */ /* bits 20:6 */ /* first bit in uvhub map */ unsigned int command:8; /* message type */ /* bits 28:21 */ @@ -378,6 +380,10 @@ struct ptc_stats { unsigned long d_rcanceled; /* number of messages canceled by resets */ }; +struct hub_and_pnode { + short uvhub; + short pnode; +}; /* * one per-cpu; to locate the software tables */ @@ -399,10 +405,12 @@ struct bau_control { int baudisabled; int set_bau_off; short cpu; + short osnode; short uvhub_cpu; short uvhub; short cpus_in_socket; short cpus_in_uvhub; + short partition_base_pnode; unsigned short message_number; unsigned short uvhub_quiesce; short socket_acknowledge_count[DEST_Q_SIZE]; @@ -422,15 +430,16 @@ struct bau_control { int congested_period; cycles_t period_time; long period_requests; + struct hub_and_pnode *target_hub_and_pnode; }; static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) { return constant_test_bit(uvhub, &dstp->bits[0]); } -static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) +static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp) { - __set_bit(uvhub, &dstp->bits[0]); + __set_bit(pnode, &dstp->bits[0]); } static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, int nbits) diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index a501741..4298002 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -398,6 +398,8 @@ struct uv_blade_info { unsigned short nr_online_cpus; unsigned short pnode; short memory_nid; + spinlock_t nmi_lock; + unsigned long nmi_count; }; extern struct uv_blade_info *uv_blade_info; extern short *uv_node_to_blade; diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 20cafea..f5bb64a 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -5,7 +5,7 @@ * * SGI UV MMR definitions * - * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_X86_UV_UV_MMRS_H @@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u { } s; }; +/* ========================================================================= */ +/* UVH_SCRATCH5 */ +/* ========================================================================= */ +#define UVH_SCRATCH5 0x2d0200UL +#define UVH_SCRATCH5_32 0x00778 + +#define UVH_SCRATCH5_SCRATCH5_SHFT 0 +#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL +union uvh_scratch5_u { + unsigned long v; + struct uvh_scratch5_s { + unsigned long scratch5 : 64; /* RW, W1CS */ + } s; +}; #endif /* __ASM_UV_MMRS_X86_H__ */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 643ebf2..d3d8590 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -68,6 +68,17 @@ struct x86_init_oem { }; /** + * struct x86_init_mapping - platform specific initial kernel pagetable setup + * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage + * + * For more details on the purpose of this hook, look in + * init_memory_mapping and the commit that added it. + */ +struct x86_init_mapping { + void (*pagetable_reserve)(u64 start, u64 end); +}; + +/** * struct x86_init_paging - platform specific paging functions * @pagetable_setup_start: platform specific pre paging_init() call * @pagetable_setup_done: platform specific post paging_init() call @@ -123,6 +134,7 @@ struct x86_init_ops { struct x86_init_mpparse mpparse; struct x86_init_irqs irqs; struct x86_init_oem oem; + struct x86_init_mapping mapping; struct x86_init_paging paging; struct x86_init_timers timers; struct x86_init_iommu iommu; diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 33b10a0..7acd2d2 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -37,6 +37,13 @@ #include <asm/smp.h> #include <asm/x86_init.h> #include <asm/emergency-restart.h> +#include <asm/nmi.h> + +/* BMC sets a bit this MMR non-zero before sending an NMI */ +#define UVH_NMI_MMR UVH_SCRATCH5 +#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8) +#define UV_NMI_PENDING_MASK (1UL << 63) +DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count); DEFINE_PER_CPU(int, x2apic_extra_bits); @@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void) */ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) { + unsigned long real_uv_nmi; + int bid; + if (reason != DIE_NMIUNKNOWN) return NOTIFY_OK; if (in_crash_kexec) /* do nothing if entering the crash kernel */ return NOTIFY_OK; + /* - * Use a lock so only one cpu prints at a time - * to prevent intermixed output. + * Each blade has an MMR that indicates when an NMI has been sent + * to cpus on the blade. If an NMI is detected, atomically + * clear the MMR and update a per-blade NMI count used to + * cause each cpu on the blade to notice a new NMI. + */ + bid = uv_numa_blade_id(); + real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); + + if (unlikely(real_uv_nmi)) { + spin_lock(&uv_blade_info[bid].nmi_lock); + real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); + if (real_uv_nmi) { + uv_blade_info[bid].nmi_count++; + uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK); + } + spin_unlock(&uv_blade_info[bid].nmi_lock); + } + + if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) + return NOTIFY_DONE; + + __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; + + /* + * Use a lock so only one cpu prints at a time. + * This prevents intermixed output. */ spin_lock(&uv_nmi_lock); - pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); + pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id()); dump_stack(); spin_unlock(&uv_nmi_lock); @@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) } static struct notifier_block uv_dump_stack_nmi_nb = { - .notifier_call = uv_handle_nmi + .notifier_call = uv_handle_nmi, + .priority = NMI_LOCAL_LOW_PRIOR - 1, }; void uv_register_nmi_notifier(void) @@ -720,8 +756,9 @@ void __init uv_system_init(void) printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); - uv_blade_info = kmalloc(bytes, GFP_KERNEL); + uv_blade_info = kzalloc(bytes, GFP_KERNEL); BUG_ON(!uv_blade_info); + for (blade = 0; blade < uv_num_possible_blades(); blade++) uv_blade_info[blade].memory_nid = -1; @@ -747,6 +784,7 @@ void __init uv_system_init(void) uv_blade_info[blade].pnode = pnode; uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0; + spin_lock_init(&uv_blade_info[blade].nmi_lock); max_pnode = max(pnode, max_pnode); blade++; } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bb9eb29..6f9d1f6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -613,7 +613,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) #endif /* As a rule processors have APIC timer running in deep C states */ - if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) + if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400)) set_cpu_cap(c, X86_FEATURE_ARAT); /* @@ -698,7 +698,7 @@ cpu_dev_register(amd_cpu_dev); */ const int amd_erratum_400[] = - AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf), + AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); EXPORT_SYMBOL_GPL(amd_erratum_400); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 167f97b..bb0adad 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -509,6 +509,7 @@ recurse: out_free: if (b) { kobject_put(&b->kobj); + list_del(&b->miscj); kfree(b); } return err; diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 6f8c5e9..0f03446 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -446,18 +446,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); + h = lvtthmr_init; /* * The initial value of thermal LVT entries on all APs always reads * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI * sequence to them and LVT registers are reset to 0s except for * the mask bits which are set to 1s when APs receive INIT IPI. - * Always restore the value that BIOS has programmed on AP based on - * BSP's info we saved since BIOS is always setting the same value - * for all threads/cores + * If BIOS takes over the thermal interrupt and sets its interrupt + * delivery mode to SMI (not fixed), it restores the value that the + * BIOS has programmed on AP based on BSP's info we saved since BIOS + * is always setting the same value for all threads/cores. */ - apic_write(APIC_LVTTHMR, lvtthmr_init); + if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED) + apic_write(APIC_LVTTHMR, lvtthmr_init); - h = lvtthmr_init; if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index e61539b..447a28d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -184,26 +184,23 @@ static __initconst const u64 snb_hw_cache_event_ids }, }, [ C(LL ) ] = { - /* - * TBD: Need Off-core Response Performance Monitoring support - */ [ C(OP_READ) ] = { - /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ + /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01bb, + /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_WRITE) ] = { - /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ + /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01bb, + /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_PREFETCH) ] = { - /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ + /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01bb, + /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, }, [ C(DTLB) ] = { @@ -285,26 +282,26 @@ static __initconst const u64 westmere_hw_cache_event_ids }, [ C(LL ) ] = { [ C(OP_READ) ] = { - /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ + /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01bb, + /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, /* * Use RFO, not WRITEBACK, because a write miss would typically occur * on RFO. */ [ C(OP_WRITE) ] = { - /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ - [ C(RESULT_ACCESS) ] = 0x01bb, - /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ + /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_PREFETCH) ] = { - /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ + /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01bb, + /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, }, [ C(DTLB) ] = { @@ -352,16 +349,36 @@ static __initconst const u64 westmere_hw_cache_event_ids }; /* - * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 + * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits; + * See IA32 SDM Vol 3B 30.6.1.3 */ -#define DMND_DATA_RD (1 << 0) -#define DMND_RFO (1 << 1) -#define DMND_WB (1 << 3) -#define PF_DATA_RD (1 << 4) -#define PF_DATA_RFO (1 << 5) -#define RESP_UNCORE_HIT (1 << 8) -#define RESP_MISS (0xf600) /* non uncore hit */ +#define NHM_DMND_DATA_RD (1 << 0) +#define NHM_DMND_RFO (1 << 1) +#define NHM_DMND_IFETCH (1 << 2) +#define NHM_DMND_WB (1 << 3) +#define NHM_PF_DATA_RD (1 << 4) +#define NHM_PF_DATA_RFO (1 << 5) +#define NHM_PF_IFETCH (1 << 6) +#define NHM_OFFCORE_OTHER (1 << 7) +#define NHM_UNCORE_HIT (1 << 8) +#define NHM_OTHER_CORE_HIT_SNP (1 << 9) +#define NHM_OTHER_CORE_HITM (1 << 10) + /* reserved */ +#define NHM_REMOTE_CACHE_FWD (1 << 12) +#define NHM_REMOTE_DRAM (1 << 13) +#define NHM_LOCAL_DRAM (1 << 14) +#define NHM_NON_DRAM (1 << 15) + +#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM) + +#define NHM_DMND_READ (NHM_DMND_DATA_RD) +#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) +#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) + +#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) +#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD) +#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) static __initconst const u64 nehalem_hw_cache_extra_regs [PERF_COUNT_HW_CACHE_MAX] @@ -370,16 +387,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs { [ C(LL ) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, - [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, + [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS, + [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS, }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, - [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, + [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS, + [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, - [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, + [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, + [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, }, } }; diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index c969fd9..f1a6244d 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -1183,12 +1183,13 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long flags; /* This is possible if op is under delayed unoptimizing */ if (kprobe_disabled(&op->kp)) return; - preempt_disable(); + local_irq_save(flags); if (kprobe_running()) { kprobes_inc_nmissed_count(&op->kp); } else { @@ -1207,7 +1208,7 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, opt_pre_handler(&op->kp, regs); __this_cpu_write(current_kprobe, NULL); } - preempt_enable_no_resched(); + local_irq_restore(flags); } static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 45892dc..f65e5b5 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -608,6 +608,9 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) unsigned len, type; struct perf_event *bp; + if (ptrace_get_breakpoints(tsk) < 0) + return -ESRCH; + data &= ~DR_CONTROL_RESERVED; old_dr7 = ptrace_get_dr7(thread->ptrace_bps); restore: @@ -655,6 +658,9 @@ restore: } goto restore; } + + ptrace_put_breakpoints(tsk); + return ((orig_ret < 0) ? orig_ret : rc); } @@ -668,10 +674,17 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) if (n < HBP_NUM) { struct perf_event *bp; + + if (ptrace_get_breakpoints(tsk) < 0) + return -ESRCH; + bp = thread->ptrace_bps[n]; if (!bp) - return 0; - val = bp->hw.info.address; + val = 0; + else + val = bp->hw.info.address; + + ptrace_put_breakpoints(tsk); } else if (n == 6) { val = thread->debugreg6; } else if (n == 7) { @@ -686,6 +699,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, struct perf_event *bp; struct thread_struct *t = &tsk->thread; struct perf_event_attr attr; + int err = 0; + + if (ptrace_get_breakpoints(tsk) < 0) + return -ESRCH; if (!t->ptrace_bps[nr]) { ptrace_breakpoint_init(&attr); @@ -709,24 +726,23 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, * writing for the user. And anyway this is the previous * behaviour. */ - if (IS_ERR(bp)) - return PTR_ERR(bp); + if (IS_ERR(bp)) { + err = PTR_ERR(bp); + goto put; + } t->ptrace_bps[nr] = bp; } else { - int err; - bp = t->ptrace_bps[nr]; attr = bp->attr; attr.bp_addr = addr; err = modify_user_hw_breakpoint(bp, &attr); - if (err) - return err; } - - return 0; +put: + ptrace_put_breakpoints(tsk); + return err; } /* diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index c11514e9..75ef4b1 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = { .banner = default_banner, }, + .mapping = { + .pagetable_reserve = native_pagetable_reserve, + }, + .paging = { .pagetable_setup_start = native_pagetable_setup_start, .pagetable_setup_done = native_pagetable_setup_done, diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 286d289..37b8b0f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse, end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); } +void __init native_pagetable_reserve(u64 start, u64 end) +{ + memblock_x86_reserve_range(start, end, "PGTABLE"); +} + struct map_range { unsigned long start; unsigned long end; @@ -272,9 +277,24 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, __flush_tlb_all(); + /* + * Reserve the kernel pagetable pages we used (pgt_buf_start - + * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) + * so that they can be reused for other purposes. + * + * On native it just means calling memblock_x86_reserve_range, on Xen it + * also means marking RW the pagetable pages that we allocated before + * but that haven't been used. + * + * In fact on xen we mark RO the whole range pgt_buf_start - + * pgt_buf_top, because we have to make sure that when + * init_memory_mapping reaches the pagetable pages area, it maps + * RO all the pagetable pages, including the ones that are beyond + * pgt_buf_end at that time. + */ if (!after_bootmem && pgt_buf_end > pgt_buf_start) - memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT, - pgt_buf_end << PAGE_SHIFT, "PGTABLE"); + x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start), + PFN_PHYS(pgt_buf_end)); if (!after_bootmem) early_memtest(start, end); diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 7cb6424..c58e0ea 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -699,16 +699,17 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va, unsigned int cpu) { - int tcpu; - int uvhub; int locals = 0; int remotes = 0; int hubs = 0; + int tcpu; + int tpnode; struct bau_desc *bau_desc; struct cpumask *flush_mask; struct ptc_stats *stat; struct bau_control *bcp; struct bau_control *tbcp; + struct hub_and_pnode *hpp; /* kernel was booted 'nobau' */ if (nobau) @@ -750,11 +751,18 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); - /* cpu statistics */ for_each_cpu(tcpu, flush_mask) { - uvhub = uv_cpu_to_blade_id(tcpu); - bau_uvhub_set(uvhub, &bau_desc->distribution); - if (uvhub == bcp->uvhub) + /* + * The distribution vector is a bit map of pnodes, relative + * to the partition base pnode (and the partition base nasid + * in the header). + * Translate cpu to pnode and hub using an array stored + * in local memory. + */ + hpp = &bcp->socket_master->target_hub_and_pnode[tcpu]; + tpnode = hpp->pnode - bcp->partition_base_pnode; + bau_uvhub_set(tpnode, &bau_desc->distribution); + if (hpp->uvhub == bcp->uvhub) locals++; else remotes++; @@ -855,7 +863,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs) * an interrupt, but causes an error message to be returned to * the sender. */ -static void uv_enable_timeouts(void) +static void __init uv_enable_timeouts(void) { int uvhub; int nuvhubs; @@ -1326,10 +1334,10 @@ static int __init uv_ptc_init(void) } /* - * initialize the sending side's sending buffers + * Initialize the sending side's sending buffers. */ static void -uv_activation_descriptor_init(int node, int pnode) +uv_activation_descriptor_init(int node, int pnode, int base_pnode) { int i; int cpu; @@ -1352,11 +1360,11 @@ uv_activation_descriptor_init(int node, int pnode) n = pa >> uv_nshift; m = pa & uv_mmask; + /* the 14-bit pnode */ uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, (n << UV_DESC_BASE_PNODE_SHIFT | m)); - /* - * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each + * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each * cpu even though we only use the first one; one descriptor can * describe a broadcast to 256 uv hubs. */ @@ -1365,12 +1373,13 @@ uv_activation_descriptor_init(int node, int pnode) memset(bd2, 0, sizeof(struct bau_desc)); bd2->header.sw_ack_flag = 1; /* - * base_dest_nodeid is the nasid of the first uvhub - * in the partition. The bit map will indicate uvhub numbers, - * which are 0-N in a partition. Pnodes are unique system-wide. + * The base_dest_nasid set in the message header is the nasid + * of the first uvhub in the partition. The bit map will + * indicate destination pnode numbers relative to that base. + * They may not be consecutive if nasid striding is being used. */ - bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode); - bd2->header.dest_subnodeid = 0x10; /* the LB */ + bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); + bd2->header.dest_subnodeid = UV_LB_SUBNODEID; bd2->header.command = UV_NET_ENDPOINT_INTD; bd2->header.int_both = 1; /* @@ -1442,7 +1451,7 @@ uv_payload_queue_init(int node, int pnode) /* * Initialization of each UV hub's structures */ -static void __init uv_init_uvhub(int uvhub, int vector) +static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode) { int node; int pnode; @@ -1450,11 +1459,11 @@ static void __init uv_init_uvhub(int uvhub, int vector) node = uvhub_to_first_node(uvhub); pnode = uv_blade_to_pnode(uvhub); - uv_activation_descriptor_init(node, pnode); + uv_activation_descriptor_init(node, pnode, base_pnode); uv_payload_queue_init(node, pnode); /* - * the below initialization can't be in firmware because the - * messaging IRQ will be determined by the OS + * The below initialization can't be in firmware because the + * messaging IRQ will be determined by the OS. */ apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, @@ -1491,10 +1500,11 @@ calculate_destination_timeout(void) /* * initialize the bau_control structure for each cpu */ -static int __init uv_init_per_cpu(int nuvhubs) +static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode) { int i; int cpu; + int tcpu; int pnode; int uvhub; int have_hmaster; @@ -1528,6 +1538,15 @@ static int __init uv_init_per_cpu(int nuvhubs) bcp = &per_cpu(bau_control, cpu); memset(bcp, 0, sizeof(struct bau_control)); pnode = uv_cpu_hub_info(cpu)->pnode; + if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) { + printk(KERN_EMERG + "cpu %d pnode %d-%d beyond %d; BAU disabled\n", + cpu, pnode, base_part_pnode, + UV_DISTRIBUTION_SIZE); + return 1; + } + bcp->osnode = cpu_to_node(cpu); + bcp->partition_base_pnode = uv_partition_base_pnode; uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); bdp = &uvhub_descs[uvhub]; @@ -1536,7 +1555,7 @@ static int __init uv_init_per_cpu(int nuvhubs) bdp->pnode = pnode; /* kludge: 'assuming' one node per socket, and assuming that disabling a socket just leaves a gap in node numbers */ - socket = (cpu_to_node(cpu) & 1); + socket = bcp->osnode & 1; bdp->socket_mask |= (1 << socket); sdp = &bdp->socket[socket]; sdp->cpu_number[sdp->num_cpus] = cpu; @@ -1585,6 +1604,20 @@ static int __init uv_init_per_cpu(int nuvhubs) nextsocket: socket++; socket_mask = (socket_mask >> 1); + /* each socket gets a local array of pnodes/hubs */ + bcp = smaster; + bcp->target_hub_and_pnode = kmalloc_node( + sizeof(struct hub_and_pnode) * + num_possible_cpus(), GFP_KERNEL, bcp->osnode); + memset(bcp->target_hub_and_pnode, 0, + sizeof(struct hub_and_pnode) * + num_possible_cpus()); + for_each_present_cpu(tcpu) { + bcp->target_hub_and_pnode[tcpu].pnode = + uv_cpu_hub_info(tcpu)->pnode; + bcp->target_hub_and_pnode[tcpu].uvhub = + uv_cpu_hub_info(tcpu)->numa_blade_id; + } } } kfree(uvhub_descs); @@ -1637,21 +1670,22 @@ static int __init uv_bau_init(void) spin_lock_init(&disable_lock); congested_cycles = microsec_2_cycles(congested_response_us); - if (uv_init_per_cpu(nuvhubs)) { - nobau = 1; - return 0; - } - uv_partition_base_pnode = 0x7fffffff; - for (uvhub = 0; uvhub < nuvhubs; uvhub++) + for (uvhub = 0; uvhub < nuvhubs; uvhub++) { if (uv_blade_nr_possible_cpus(uvhub) && (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) uv_partition_base_pnode = uv_blade_to_pnode(uvhub); + } + + if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) { + nobau = 1; + return 0; + } vector = UV_BAU_MESSAGE; for_each_possible_blade(uvhub) if (uv_blade_nr_possible_cpus(uvhub)) - uv_init_uvhub(uvhub, vector); + uv_init_uvhub(uvhub, vector, uv_partition_base_pnode); uv_enable_timeouts(); alloc_intr_gate(vector, uv_bau_message_intr1); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 55c965b..0684f3c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t *base) { } +static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) +{ + /* reserve the range used */ + native_pagetable_reserve(start, end); + + /* set as RW the rest */ + printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end, + PFN_PHYS(pgt_buf_top)); + while (end < PFN_PHYS(pgt_buf_top)) { + make_lowmem_page_readwrite(__va(end)); + end += PAGE_SIZE; + } +} + static void xen_post_allocator_init(void); static __init void xen_pagetable_setup_done(pgd_t *base) @@ -1463,119 +1477,6 @@ static int xen_pgd_alloc(struct mm_struct *mm) return ret; } -#ifdef CONFIG_X86_64 -static __initdata u64 __last_pgt_set_rw = 0; -static __initdata u64 __pgt_buf_start = 0; -static __initdata u64 __pgt_buf_end = 0; -static __initdata u64 __pgt_buf_top = 0; -/* - * As a consequence of the commit: - * - * commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e - * Author: Yinghai Lu <yinghai@kernel.org> - * Date: Fri Dec 17 16:58:28 2010 -0800 - * - * x86-64, mm: Put early page table high - * - * at some point init_memory_mapping is going to reach the pagetable pages - * area and map those pages too (mapping them as normal memory that falls - * in the range of addresses passed to init_memory_mapping as argument). - * Some of those pages are already pagetable pages (they are in the range - * pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and - * everything is fine. - * Some of these pages are not pagetable pages yet (they fall in the range - * pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they - * are going to be mapped RW. When these pages become pagetable pages and - * are hooked into the pagetable, xen will find that the guest has already - * a RW mapping of them somewhere and fail the operation. - * The reason Xen requires pagetables to be RO is that the hypervisor needs - * to verify that the pagetables are valid before using them. The validation - * operations are called "pinning". - * - * In order to fix the issue we mark all the pages in the entire range - * pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation - * is completed only the range pgt_buf_start-pgt_buf_end is reserved by - * init_memory_mapping. Hence the kernel is going to crash as soon as one - * of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those - * ranges are RO). - * - * For this reason, 'mark_rw_past_pgt' is introduced which is called _after_ - * the init_memory_mapping has completed (in a perfect world we would - * call this function from init_memory_mapping, but lets ignore that). - * - * Because we are called _after_ init_memory_mapping the pgt_buf_[start, - * end,top] have all changed to new values (b/c init_memory_mapping - * is called and setting up another new page-table). Hence, the first time - * we enter this function, we save away the pgt_buf_start value and update - * the pgt_buf_[end,top]. - * - * When we detect that the "old" pgt_buf_start through pgt_buf_end - * PFNs have been reserved (so memblock_x86_reserve_range has been called), - * we immediately set out to RW the "old" pgt_buf_end through pgt_buf_top. - * - * And then we update those "old" pgt_buf_[end|top] with the new ones - * so that we can redo this on the next pagetable. - */ -static __init void mark_rw_past_pgt(void) { - - if (pgt_buf_end > pgt_buf_start) { - u64 addr, size; - - /* Save it away. */ - if (!__pgt_buf_start) { - __pgt_buf_start = pgt_buf_start; - __pgt_buf_end = pgt_buf_end; - __pgt_buf_top = pgt_buf_top; - return; - } - /* If we get the range that starts at __pgt_buf_end that means - * the range is reserved, and that in 'init_memory_mapping' - * the 'memblock_x86_reserve_range' has been called with the - * outdated __pgt_buf_start, __pgt_buf_end (the "new" - * pgt_buf_[start|end|top] refer now to a new pagetable. - * Note: we are called _after_ the pgt_buf_[..] have been - * updated.*/ - - addr = memblock_x86_find_in_range_size(PFN_PHYS(__pgt_buf_start), - &size, PAGE_SIZE); - - /* Still not reserved, meaning 'memblock_x86_reserve_range' - * hasn't been called yet. Update the _end and _top.*/ - if (addr == PFN_PHYS(__pgt_buf_start)) { - __pgt_buf_end = pgt_buf_end; - __pgt_buf_top = pgt_buf_top; - return; - } - - /* OK, the area is reserved, meaning it is time for us to - * set RW for the old end->top PFNs. */ - - /* ..unless we had already done this. */ - if (__pgt_buf_end == __last_pgt_set_rw) - return; - - addr = PFN_PHYS(__pgt_buf_end); - - /* set as RW the rest */ - printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", - PFN_PHYS(__pgt_buf_end), PFN_PHYS(__pgt_buf_top)); - - while (addr < PFN_PHYS(__pgt_buf_top)) { - make_lowmem_page_readwrite(__va(addr)); - addr += PAGE_SIZE; - } - /* And update everything so that we are ready for the next - * pagetable (the one created for regions past 4GB) */ - __last_pgt_set_rw = __pgt_buf_end; - __pgt_buf_start = pgt_buf_start; - __pgt_buf_end = pgt_buf_end; - __pgt_buf_top = pgt_buf_top; - } - return; -} -#else -static __init void mark_rw_past_pgt(void) { } -#endif static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) { #ifdef CONFIG_X86_64 @@ -1602,14 +1503,6 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) unsigned long pfn = pte_pfn(pte); /* - * A bit of optimization. We do not need to call the workaround - * when xen_set_pte_init is called with a PTE with 0 as PFN. - * That is b/c the pagetable at that point are just being populated - * with empty values and we can save some cycles by not calling - * the 'memblock' code.*/ - if (pfn) - mark_rw_past_pgt(); - /* * If the new pfn is within the range of the newly allocated * kernel pagetable, and it isn't being mapped into an * early_ioremap fixmap slot as a freshly allocated page, make sure @@ -2118,8 +2011,6 @@ __init void xen_ident_map_ISA(void) static __init void xen_post_allocator_init(void) { - mark_rw_past_pgt(); - #ifdef CONFIG_XEN_DEBUG pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); #endif @@ -2228,6 +2119,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { void __init xen_init_mmu_ops(void) { + x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; pv_mmu_ops = xen_mmu_ops; |