diff options
Diffstat (limited to 'arch')
50 files changed, 789 insertions, 443 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 3269576..3146ed3 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1283,6 +1283,20 @@ config ARM_ERRATA_364296 processor into full low interrupt latency mode. ARM11MPCore is not affected. +config ARM_ERRATA_764369 + bool "ARM errata: Data cache line maintenance operation by MVA may not succeed" + depends on CPU_V7 && SMP + help + This option enables the workaround for erratum 764369 + affecting Cortex-A9 MPCore with two or more processors (all + current revisions). Under certain timing circumstances, a data + cache line maintenance operation by MVA targeting an Inner + Shareable memory region may fail to proceed up to either the + Point of Coherency or to the Point of Unification of the + system. This workaround adds a DSB instruction before the + relevant cache maintenance functions and sets a specific bit + in the diagnostic control register of the SCU. + endmenu source "arch/arm/common/Kconfig" diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 8c73900..253cc86 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -25,17 +25,17 @@ #ifdef CONFIG_SMP -#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ +#define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ smp_mb(); \ __asm__ __volatile__( \ - "1: ldrex %1, [%2]\n" \ + "1: ldrex %1, [%3]\n" \ " " insn "\n" \ - "2: strex %1, %0, [%2]\n" \ - " teq %1, #0\n" \ + "2: strex %2, %0, [%3]\n" \ + " teq %2, #0\n" \ " bne 1b\n" \ " mov %0, #0\n" \ - __futex_atomic_ex_table("%4") \ - : "=&r" (ret), "=&r" (oldval) \ + __futex_atomic_ex_table("%5") \ + : "=&r" (ret), "=&r" (oldval), "=&r" (tmp) \ : "r" (uaddr), "r" (oparg), "Ir" (-EFAULT) \ : "cc", "memory") @@ -73,14 +73,14 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, #include <linux/preempt.h> #include <asm/domain.h> -#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ +#define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ __asm__ __volatile__( \ - "1: " T(ldr) " %1, [%2]\n" \ + "1: " T(ldr) " %1, [%3]\n" \ " " insn "\n" \ - "2: " T(str) " %0, [%2]\n" \ + "2: " T(str) " %0, [%3]\n" \ " mov %0, #0\n" \ - __futex_atomic_ex_table("%4") \ - : "=&r" (ret), "=&r" (oldval) \ + __futex_atomic_ex_table("%5") \ + : "=&r" (ret), "=&r" (oldval), "=&r" (tmp) \ : "r" (uaddr), "r" (oparg), "Ir" (-EFAULT) \ : "cc", "memory") @@ -117,7 +117,7 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; + int oldval = 0, ret, tmp; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; @@ -129,19 +129,19 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("mov %0, %3", ret, oldval, uaddr, oparg); + __futex_atomic_op("mov %0, %4", ret, oldval, tmp, uaddr, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op("add %0, %1, %3", ret, oldval, uaddr, oparg); + __futex_atomic_op("add %0, %1, %4", ret, oldval, tmp, uaddr, oparg); break; case FUTEX_OP_OR: - __futex_atomic_op("orr %0, %1, %3", ret, oldval, uaddr, oparg); + __futex_atomic_op("orr %0, %1, %4", ret, oldval, tmp, uaddr, oparg); break; case FUTEX_OP_ANDN: - __futex_atomic_op("and %0, %1, %3", ret, oldval, uaddr, ~oparg); + __futex_atomic_op("and %0, %1, %4", ret, oldval, tmp, uaddr, ~oparg); break; case FUTEX_OP_XOR: - __futex_atomic_op("eor %0, %1, %3", ret, oldval, uaddr, oparg); + __futex_atomic_op("eor %0, %1, %4", ret, oldval, tmp, uaddr, oparg); break; default: ret = -ENOSYS; diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 2c04ed5..c60a294 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -478,8 +478,8 @@ /* * Unimplemented (or alternatively implemented) syscalls */ -#define __IGNORE_fadvise64_64 1 -#define __IGNORE_migrate_pages 1 +#define __IGNORE_fadvise64_64 +#define __IGNORE_migrate_pages #endif /* __KERNEL__ */ #endif /* __ASM_ARM_UNISTD_H */ diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c index 79ed5e7..7fcddb7 100644 --- a/arch/arm/kernel/smp_scu.c +++ b/arch/arm/kernel/smp_scu.c @@ -13,6 +13,7 @@ #include <asm/smp_scu.h> #include <asm/cacheflush.h> +#include <asm/cputype.h> #define SCU_CTRL 0x00 #define SCU_CONFIG 0x04 @@ -37,6 +38,15 @@ void __init scu_enable(void __iomem *scu_base) { u32 scu_ctrl; +#ifdef CONFIG_ARM_ERRATA_764369 + /* Cortex-A9 only */ + if ((read_cpuid(CPUID_ID) & 0xff0ffff0) == 0x410fc090) { + scu_ctrl = __raw_readl(scu_base + 0x30); + if (!(scu_ctrl & 1)) + __raw_writel(scu_ctrl | 0x1, scu_base + 0x30); + } +#endif + scu_ctrl = __raw_readl(scu_base + SCU_CTRL); /* already enabled? */ if (scu_ctrl & 1) diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index bf977f8..4e66f62 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -23,8 +23,10 @@ #if defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK) #define ARM_EXIT_KEEP(x) x +#define ARM_EXIT_DISCARD(x) #else #define ARM_EXIT_KEEP(x) +#define ARM_EXIT_DISCARD(x) x #endif OUTPUT_ARCH(arm) @@ -39,6 +41,11 @@ jiffies = jiffies_64 + 4; SECTIONS { /* + * XXX: The linker does not define how output sections are + * assigned to input sections when there are multiple statements + * matching the same input section name. There is no documented + * order of matching. + * * unwind exit sections must be discarded before the rest of the * unwind sections get included. */ @@ -47,6 +54,9 @@ SECTIONS *(.ARM.extab.exit.text) ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text)) ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text)) + ARM_EXIT_DISCARD(EXIT_TEXT) + ARM_EXIT_DISCARD(EXIT_DATA) + EXIT_CALL #ifndef CONFIG_HOTPLUG *(.ARM.exidx.devexit.text) *(.ARM.extab.devexit.text) @@ -58,6 +68,8 @@ SECTIONS #ifndef CONFIG_SMP_ON_UP *(.alt.smp.init) #endif + *(.discard) + *(.discard.*) } #ifdef CONFIG_XIP_KERNEL @@ -279,9 +291,6 @@ SECTIONS STABS_DEBUG .comment 0 : { *(.comment) } - - /* Default discards */ - DISCARDS } /* diff --git a/arch/arm/mach-exynos4/clock.c b/arch/arm/mach-exynos4/clock.c index 79d6cd0..86964d2 100644 --- a/arch/arm/mach-exynos4/clock.c +++ b/arch/arm/mach-exynos4/clock.c @@ -899,8 +899,7 @@ static struct clksrc_clk clksrcs[] = { .reg_div = { .reg = S5P_CLKDIV_CAM, .shift = 28, .size = 4 }, }, { .clk = { - .name = "sclk_cam", - .devname = "exynos4-fimc.0", + .name = "sclk_cam0", .enable = exynos4_clksrc_mask_cam_ctrl, .ctrlbit = (1 << 16), }, @@ -909,8 +908,7 @@ static struct clksrc_clk clksrcs[] = { .reg_div = { .reg = S5P_CLKDIV_CAM, .shift = 16, .size = 4 }, }, { .clk = { - .name = "sclk_cam", - .devname = "exynos4-fimc.1", + .name = "sclk_cam1", .enable = exynos4_clksrc_mask_cam_ctrl, .ctrlbit = (1 << 20), }, diff --git a/arch/arm/mach-s3c2443/clock.c b/arch/arm/mach-s3c2443/clock.c index a1a7176..38058af 100644 --- a/arch/arm/mach-s3c2443/clock.c +++ b/arch/arm/mach-s3c2443/clock.c @@ -128,7 +128,7 @@ static int s3c2443_armclk_setrate(struct clk *clk, unsigned long rate) unsigned long clkcon0; clkcon0 = __raw_readl(S3C2443_CLKDIV0); - clkcon0 &= S3C2443_CLKDIV0_ARMDIV_MASK; + clkcon0 &= ~S3C2443_CLKDIV0_ARMDIV_MASK; clkcon0 |= val << S3C2443_CLKDIV0_ARMDIV_SHIFT; __raw_writel(clkcon0, S3C2443_CLKDIV0); } diff --git a/arch/arm/mach-s5pv210/clock.c b/arch/arm/mach-s5pv210/clock.c index 52a8e60..f5f8fa8 100644 --- a/arch/arm/mach-s5pv210/clock.c +++ b/arch/arm/mach-s5pv210/clock.c @@ -815,8 +815,7 @@ static struct clksrc_clk clksrcs[] = { .reg_div = { .reg = S5P_CLK_DIV3, .shift = 20, .size = 4 }, }, { .clk = { - .name = "sclk_cam", - .devname = "s5pv210-fimc.0", + .name = "sclk_cam0", .enable = s5pv210_clk_mask0_ctrl, .ctrlbit = (1 << 3), }, @@ -825,8 +824,7 @@ static struct clksrc_clk clksrcs[] = { .reg_div = { .reg = S5P_CLK_DIV1, .shift = 12, .size = 4 }, }, { .clk = { - .name = "sclk_cam", - .devname = "s5pv210-fimc.1", + .name = "sclk_cam1", .enable = s5pv210_clk_mask0_ctrl, .ctrlbit = (1 << 4), }, diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 3b24bfa..07c4bc8 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -174,6 +174,10 @@ ENTRY(v7_coherent_user_range) dcache_line_size r2, r3 sub r3, r2, #1 bic r12, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif 1: USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification add r12, r12, r2 @@ -223,6 +227,10 @@ ENTRY(v7_flush_kern_dcache_area) add r1, r0, r1 sub r3, r2, #1 bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif 1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line / unified line add r0, r0, r2 @@ -247,6 +255,10 @@ v7_dma_inv_range: sub r3, r2, #1 tst r0, r3 bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line tst r1, r3 @@ -270,6 +282,10 @@ v7_dma_clean_range: dcache_line_size r2, r3 sub r3, r2, #1 bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif 1: mcr p15, 0, r0, c7, c10, 1 @ clean D / U line add r0, r0, r2 @@ -288,6 +304,10 @@ ENTRY(v7_dma_flush_range) dcache_line_size r2, r3 sub r3, r2, #1 bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif 1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line add r0, r0, r2 diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 0a0a1e7..c3ff82f 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -324,6 +324,8 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, if (addr) *handle = pfn_to_dma(dev, page_to_pfn(page)); + else + __dma_free_buffer(page, size); return addr; } diff --git a/arch/arm/plat-s5p/irq-gpioint.c b/arch/arm/plat-s5p/irq-gpioint.c index f71078e..f88216d 100644 --- a/arch/arm/plat-s5p/irq-gpioint.c +++ b/arch/arm/plat-s5p/irq-gpioint.c @@ -114,17 +114,18 @@ static __init int s5p_gpioint_add(struct s3c_gpio_chip *chip) { static int used_gpioint_groups = 0; int group = chip->group; - struct s5p_gpioint_bank *bank = NULL; + struct s5p_gpioint_bank *b, *bank = NULL; struct irq_chip_generic *gc; struct irq_chip_type *ct; if (used_gpioint_groups >= S5P_GPIOINT_GROUP_COUNT) return -ENOMEM; - list_for_each_entry(bank, &banks, list) { - if (group >= bank->start && - group < bank->start + bank->nr_groups) + list_for_each_entry(b, &banks, list) { + if (group >= b->start && group < b->start + b->nr_groups) { + bank = b; break; + } } if (!bank) return -EINVAL; diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index 5cc8385..31a7d3a 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -561,6 +561,20 @@ static struct pci_ops u4_pcie_pci_ops = .write = u4_pcie_write_config, }; +static void __devinit pmac_pci_fixup_u4_of_node(struct pci_dev *dev) +{ + /* Apple's device-tree "hides" the root complex virtual P2P bridge + * on U4. However, Linux sees it, causing the PCI <-> OF matching + * code to fail to properly match devices below it. This works around + * it by setting the node of the bridge to point to the PHB node, + * which is not entirely correct but fixes the matching code and + * doesn't break anything else. It's also the simplest possible fix. + */ + if (dev->dev.of_node == NULL) + dev->dev.of_node = pcibios_get_phb_of_node(dev->bus); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, 0x5b, pmac_pci_fixup_u4_of_node); + #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC32 diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 64b61bf..547f1a6 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -188,7 +188,8 @@ extern char elf_platform[]; #define SET_PERSONALITY(ex) \ do { \ if (personality(current->personality) != PER_LINUX32) \ - set_personality(PER_LINUX); \ + set_personality(PER_LINUX | \ + (current->personality & ~PER_MASK)); \ if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ set_thread_flag(TIF_31BIT); \ else \ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 519eb5f..c0cb794 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -658,12 +658,14 @@ static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste) * struct gmap_struct - guest address space * @mm: pointer to the parent mm_struct * @table: pointer to the page directory + * @asce: address space control element for gmap page table * @crst_list: list of all crst tables used in the guest address space */ struct gmap { struct list_head list; struct mm_struct *mm; unsigned long *table; + unsigned long asce; struct list_head crst_list; }; diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 532fd43..2b45591 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -10,6 +10,7 @@ #include <linux/sched.h> #include <asm/vdso.h> #include <asm/sigp.h> +#include <asm/pgtable.h> /* * Make sure that the compiler is new enough. We want a compiler that @@ -126,6 +127,7 @@ int main(void) DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack)); DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack)); DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack)); + DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce)); DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); @@ -151,6 +153,7 @@ int main(void) DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data)); DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap)); DEFINE(__LC_CMF_HPP, offsetof(struct _lowcore, cmf_hpp)); + DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); #endif /* CONFIG_32BIT */ return 0; } diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 5f729d6..713da07 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -1076,6 +1076,11 @@ sie_loop: lg %r14,__LC_THREAD_INFO # pointer thread_info struct tm __TI_flags+7(%r14),_TIF_EXIT_SIE jnz sie_exit + lg %r14,__LC_GMAP # get gmap pointer + ltgr %r14,%r14 + jz sie_gmap + lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce +sie_gmap: lg %r14,__SF_EMPTY(%r15) # get control block pointer SPP __SF_EMPTY(%r15) # set guest id sie 0(%r14) @@ -1083,6 +1088,7 @@ sie_done: SPP __LC_CMF_HPP # set host id lg %r14,__LC_THREAD_INFO # pointer thread_info struct sie_exit: + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce ni __TI_flags+6(%r14),255-(_TIF_SIE>>8) lg %r14,__SF_EMPTY+8(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f17296e..dc2b580 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -123,6 +123,7 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_S390_PSW: + case KVM_CAP_S390_GMAP: r = 1; break; default: @@ -263,10 +264,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK; restore_fp_regs(&vcpu->arch.guest_fpregs); restore_access_regs(vcpu->arch.guest_acrs); + gmap_enable(vcpu->arch.gmap); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + gmap_disable(vcpu->arch.gmap); save_fp_regs(&vcpu->arch.guest_fpregs); save_access_regs(vcpu->arch.guest_acrs); restore_fp_regs(&vcpu->arch.host_fpregs); @@ -461,7 +464,6 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) local_irq_disable(); kvm_guest_enter(); local_irq_enable(); - gmap_enable(vcpu->arch.gmap); VCPU_EVENT(vcpu, 6, "entering sie flags %x", atomic_read(&vcpu->arch.sie_block->cpuflags)); if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) { @@ -470,7 +472,6 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) } VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); - gmap_disable(vcpu->arch.gmap); local_irq_disable(); kvm_guest_exit(); local_irq_enable(); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4d1f2bc..5d56c2b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -160,6 +160,8 @@ struct gmap *gmap_alloc(struct mm_struct *mm) table = (unsigned long *) page_to_phys(page); crst_table_init(table, _REGION1_ENTRY_EMPTY); gmap->table = table; + gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | __pa(table); list_add(&gmap->list, &mm->context.gmap_list); return gmap; @@ -240,10 +242,6 @@ EXPORT_SYMBOL_GPL(gmap_free); */ void gmap_enable(struct gmap *gmap) { - /* Load primary space page table origin. */ - S390_lowcore.user_asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | __pa(gmap->table); - asm volatile("lctlg 1,1,%0\n" : : "m" (S390_lowcore.user_asce) ); S390_lowcore.gmap = (unsigned long) gmap; } EXPORT_SYMBOL_GPL(gmap_enable); @@ -254,10 +252,6 @@ EXPORT_SYMBOL_GPL(gmap_enable); */ void gmap_disable(struct gmap *gmap) { - /* Load primary space page table origin. */ - S390_lowcore.user_asce = - gmap->mm->context.asce_bits | __pa(gmap->mm->pgd); - asm volatile("lctlg 1,1,%0\n" : : "m" (S390_lowcore.user_asce) ); S390_lowcore.gmap = 0UL; } EXPORT_SYMBOL_GPL(gmap_disable); @@ -309,15 +303,15 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) /* Walk the guest addr space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); if (*table & _REGION_ENTRY_INV) - return 0; + goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 42) & 0x7ff); if (*table & _REGION_ENTRY_INV) - return 0; + goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 31) & 0x7ff); if (*table & _REGION_ENTRY_INV) - return 0; + goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 20) & 0x7ff); @@ -325,6 +319,7 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) flush |= gmap_unlink_segment(gmap, table); *table = _SEGMENT_ENTRY_INV; } +out: up_read(&gmap->mm->mmap_sem); if (flush) gmap_flush_tlb(gmap); diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h index 55a17c6..d06a2660 100644 --- a/arch/sparc/include/asm/spitfire.h +++ b/arch/sparc/include/asm/spitfire.h @@ -43,6 +43,8 @@ #define SUN4V_CHIP_NIAGARA1 0x01 #define SUN4V_CHIP_NIAGARA2 0x02 #define SUN4V_CHIP_NIAGARA3 0x03 +#define SUN4V_CHIP_NIAGARA4 0x04 +#define SUN4V_CHIP_NIAGARA5 0x05 #define SUN4V_CHIP_UNKNOWN 0xff #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/xor_64.h b/arch/sparc/include/asm/xor_64.h index 9ed6ff6..ee8edc6 100644 --- a/arch/sparc/include/asm/xor_64.h +++ b/arch/sparc/include/asm/xor_64.h @@ -66,6 +66,8 @@ static struct xor_block_template xor_block_niagara = { ((tlb_type == hypervisor && \ (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || \ sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || \ - sun4v_chip_type == SUN4V_CHIP_NIAGARA3)) ? \ + sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || \ + sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || \ + sun4v_chip_type == SUN4V_CHIP_NIAGARA5)) ? \ &xor_block_niagara : \ &xor_block_VIS) diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 9810fd8..ba9b1ce 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -481,6 +481,18 @@ static void __init sun4v_cpu_probe(void) sparc_pmu_type = "niagara3"; break; + case SUN4V_CHIP_NIAGARA4: + sparc_cpu_type = "UltraSparc T4 (Niagara4)"; + sparc_fpu_type = "UltraSparc T4 integrated FPU"; + sparc_pmu_type = "niagara4"; + break; + + case SUN4V_CHIP_NIAGARA5: + sparc_cpu_type = "UltraSparc T5 (Niagara5)"; + sparc_fpu_type = "UltraSparc T5 integrated FPU"; + sparc_pmu_type = "niagara5"; + break; + default: printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n", prom_cpu_compatible); diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c index 4197e8d..9323eaf 100644 --- a/arch/sparc/kernel/cpumap.c +++ b/arch/sparc/kernel/cpumap.c @@ -325,6 +325,8 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index) case SUN4V_CHIP_NIAGARA1: case SUN4V_CHIP_NIAGARA2: case SUN4V_CHIP_NIAGARA3: + case SUN4V_CHIP_NIAGARA4: + case SUN4V_CHIP_NIAGARA5: rover_inc_table = niagara_iterate_method; break; default: diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 0eac1b2..0d810c2 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -133,7 +133,7 @@ prom_sun4v_name: prom_niagara_prefix: .asciz "SUNW,UltraSPARC-T" prom_sparc_prefix: - .asciz "SPARC-T" + .asciz "SPARC-" .align 4 prom_root_compatible: .skip 64 @@ -396,7 +396,7 @@ sun4v_chip_type: or %g1, %lo(prom_cpu_compatible), %g1 sethi %hi(prom_sparc_prefix), %g7 or %g7, %lo(prom_sparc_prefix), %g7 - mov 7, %g3 + mov 6, %g3 90: ldub [%g7], %g2 ldub [%g1], %g4 cmp %g2, %g4 @@ -408,10 +408,23 @@ sun4v_chip_type: sethi %hi(prom_cpu_compatible), %g1 or %g1, %lo(prom_cpu_compatible), %g1 - ldub [%g1 + 7], %g2 + ldub [%g1 + 6], %g2 + cmp %g2, 'T' + be,pt %xcc, 70f + cmp %g2, 'M' + bne,pn %xcc, 4f + nop + +70: ldub [%g1 + 7], %g2 cmp %g2, '3' be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA3, %g4 + cmp %g2, '4' + be,pt %xcc, 5f + mov SUN4V_CHIP_NIAGARA4, %g4 + cmp %g2, '5' + be,pt %xcc, 5f + mov SUN4V_CHIP_NIAGARA5, %g4 ba,pt %xcc, 4f nop @@ -545,6 +558,12 @@ niagara_tlb_fixup: cmp %g1, SUN4V_CHIP_NIAGARA3 be,pt %xcc, niagara2_patch nop + cmp %g1, SUN4V_CHIP_NIAGARA4 + be,pt %xcc, niagara2_patch + nop + cmp %g1, SUN4V_CHIP_NIAGARA5 + be,pt %xcc, niagara2_patch + nop call generic_patch_copyops nop diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index c8cc461..f793742 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -380,8 +380,7 @@ void flush_thread(void) #endif } - /* Now, this task is no longer a kernel thread. */ - current->thread.current_ds = USER_DS; + /* This task is no longer a kernel thread. */ if (current->thread.flags & SPARC_FLAG_KTHREAD) { current->thread.flags &= ~SPARC_FLAG_KTHREAD; diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index c158a95..d959cd0 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -368,9 +368,6 @@ void flush_thread(void) /* Clear FPU register state. */ t->fpsaved[0] = 0; - - if (get_thread_current_ds() != ASI_AIUS) - set_fs(USER_DS); } /* It's a bit more tricky when 64-bit tasks are involved... */ diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index d26e1f6..3e3e291 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -137,7 +137,7 @@ static void __init process_switch(char c) prom_halt(); break; case 'p': - /* Just ignore, this behavior is now the default. */ + prom_early_console.flags &= ~CON_BOOT; break; default: printk("Unknown boot switch (-%c)\n", c); diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 3c5bb78..c965595a 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -106,7 +106,7 @@ static void __init process_switch(char c) prom_halt(); break; case 'p': - /* Just ignore, this behavior is now the default. */ + prom_early_console.flags &= ~CON_BOOT; break; case 'P': /* Force UltraSPARC-III P-Cache on. */ @@ -425,10 +425,14 @@ static void __init init_sparc64_elf_hwcap(void) else if (tlb_type == hypervisor) { if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || - sun4v_chip_type == SUN4V_CHIP_NIAGARA3) + sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA5) cap |= HWCAP_SPARC_BLKINIT; if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || - sun4v_chip_type == SUN4V_CHIP_NIAGARA3) + sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA5) cap |= HWCAP_SPARC_N2; } @@ -452,11 +456,15 @@ static void __init init_sparc64_elf_hwcap(void) if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1) cap |= AV_SPARC_ASI_BLK_INIT; if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || - sun4v_chip_type == SUN4V_CHIP_NIAGARA3) + sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA5) cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 | AV_SPARC_ASI_BLK_INIT | AV_SPARC_POPC); - if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3) + if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA5) cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC | AV_SPARC_FMAF); } diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 581531d..8e073d8 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -511,6 +511,11 @@ static void __init read_obp_translations(void) for (i = 0; i < prom_trans_ents; i++) prom_trans[i].data &= ~0x0003fe0000000000UL; } + + /* Force execute bit on. */ + for (i = 0; i < prom_trans_ents; i++) + prom_trans[i].data |= (tlb_type == hypervisor ? + _PAGE_EXEC_4V : _PAGE_EXEC_4U); } static void __init hypervisor_tlb_lock(unsigned long vaddr, diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 4886a68..5361095 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -22,25 +22,23 @@ void arch_trigger_all_cpu_backtrace(void); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace #endif -/* - * Define some priorities for the nmi notifier call chain. - * - * Create a local nmi bit that has a higher priority than - * external nmis, because the local ones are more frequent. - * - * Also setup some default high/normal/low settings for - * subsystems to registers with. Using 4 bits to separate - * the priorities. This can go a lot higher if needed be. - */ - -#define NMI_LOCAL_SHIFT 16 /* randomly picked */ -#define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT) -#define NMI_HIGH_PRIOR (1ULL << 8) -#define NMI_NORMAL_PRIOR (1ULL << 4) -#define NMI_LOW_PRIOR (1ULL << 0) -#define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR) -#define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR) -#define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR) +#define NMI_FLAG_FIRST 1 + +enum { + NMI_LOCAL=0, + NMI_UNKNOWN, + NMI_MAX +}; + +#define NMI_DONE 0 +#define NMI_HANDLED 1 + +typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *); + +int register_nmi_handler(unsigned int, nmi_handler_t, unsigned long, + const char *); + +void unregister_nmi_handler(unsigned int, const char *); void stop_nmi(void); void restart_nmi(void); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 094fb30..e47cb61 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -29,6 +29,9 @@ #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL +#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40) +#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41) + #define AMD64_EVENTSEL_EVENT \ (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) #define INTEL_ARCH_EVENT_MASK \ @@ -159,7 +162,19 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); ); \ } +struct perf_guest_switch_msr { + unsigned msr; + u64 host, guest; +}; + +extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); #else +static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) +{ + *nr = 0; + return NULL; +} + static inline void perf_events_lapic_init(void) { } #endif diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 3250e3d..92f29706 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -23,7 +23,7 @@ void machine_real_restart(unsigned int type); #define MRR_BIOS 0 #define MRR_APM 1 -typedef void (*nmi_shootdown_cb)(int, struct die_args*); +typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); void nmi_shootdown_cpus(nmi_shootdown_cb callback); #endif /* _ASM_X86_REBOOT_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 82f2912..8baca3c 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -19,7 +19,7 @@ endif obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o -obj-y += time.o ioport.o ldt.o dumpstack.o +obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-y += probe_roms.o diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index d5e57db0..31cb9ae 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -60,22 +60,10 @@ void arch_trigger_all_cpu_backtrace(void) } static int __kprobes -arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, - unsigned long cmd, void *__args) +arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) { - struct die_args *args = __args; - struct pt_regs *regs; int cpu; - switch (cmd) { - case DIE_NMI: - break; - - default: - return NOTIFY_DONE; - } - - regs = args->regs; cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { @@ -86,21 +74,16 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, show_regs(regs); arch_spin_unlock(&lock); cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); - return NOTIFY_STOP; + return NMI_HANDLED; } - return NOTIFY_DONE; + return NMI_DONE; } -static __read_mostly struct notifier_block backtrace_notifier = { - .notifier_call = arch_trigger_all_cpu_backtrace_handler, - .next = NULL, - .priority = NMI_LOCAL_LOW_PRIOR, -}; - static int __init register_trigger_all_cpu_backtrace(void) { - register_die_notifier(&backtrace_notifier); + register_nmi_handler(NMI_LOCAL, arch_trigger_all_cpu_backtrace_handler, + 0, "arch_bt"); return 0; } early_initcall(register_trigger_all_cpu_backtrace); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 34b1859..75be00e 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -672,18 +672,11 @@ void __cpuinit uv_cpu_init(void) /* * When NMI is received, print a stack trace. */ -int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) +int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) { unsigned long real_uv_nmi; int bid; - if (reason != DIE_NMIUNKNOWN) - return NOTIFY_OK; - - if (in_crash_kexec) - /* do nothing if entering the crash kernel */ - return NOTIFY_OK; - /* * Each blade has an MMR that indicates when an NMI has been sent * to cpus on the blade. If an NMI is detected, atomically @@ -704,7 +697,7 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) } if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) - return NOTIFY_DONE; + return NMI_DONE; __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; @@ -717,17 +710,12 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) dump_stack(); spin_unlock(&uv_nmi_lock); - return NOTIFY_STOP; + return NMI_HANDLED; } -static struct notifier_block uv_dump_stack_nmi_nb = { - .notifier_call = uv_handle_nmi, - .priority = NMI_LOCAL_LOW_PRIOR - 1, -}; - void uv_register_nmi_notifier(void) { - if (register_die_notifier(&uv_dump_stack_nmi_nb)) + if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) printk(KERN_WARNING "UV NMI handler failed to register\n"); } diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 0ed633c..6199232 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -78,27 +78,20 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) static cpumask_var_t mce_inject_cpumask; -static int mce_raise_notify(struct notifier_block *self, - unsigned long val, void *data) +static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) { - struct die_args *args = (struct die_args *)data; int cpu = smp_processor_id(); struct mce *m = &__get_cpu_var(injectm); - if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) - return NOTIFY_DONE; + if (!cpumask_test_cpu(cpu, mce_inject_cpumask)) + return NMI_DONE; cpumask_clear_cpu(cpu, mce_inject_cpumask); if (m->inject_flags & MCJ_EXCEPTION) - raise_exception(m, args->regs); + raise_exception(m, regs); else if (m->status) raise_poll(m); - return NOTIFY_STOP; + return NMI_HANDLED; } -static struct notifier_block mce_raise_nb = { - .notifier_call = mce_raise_notify, - .priority = NMI_LOCAL_NORMAL_PRIOR, -}; - /* Inject mce on current CPU */ static int raise_local(void) { @@ -216,7 +209,8 @@ static int inject_init(void) return -ENOMEM; printk(KERN_INFO "Machine check injector initialized\n"); mce_chrdev_ops.write = mce_write; - register_die_notifier(&mce_raise_nb); + register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, + "mce_notify"); return 0; } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 08363b0..fce51ad1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -908,9 +908,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) percpu_inc(mce_exception_count); - if (notify_die(DIE_NMI, "machine check", regs, error_code, - 18, SIGKILL) == NOTIFY_STOP) - goto out; if (!banks) goto out; @@ -1140,6 +1137,15 @@ static void mce_start_timer(unsigned long data) add_timer_on(t, smp_processor_id()); } +/* Must not be called in IRQ context where del_timer_sync() can deadlock */ +static void mce_timer_delete_all(void) +{ + int cpu; + + for_each_online_cpu(cpu) + del_timer_sync(&per_cpu(mce_timer, cpu)); +} + static void mce_do_trigger(struct work_struct *work) { call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); @@ -1750,7 +1756,6 @@ static struct syscore_ops mce_syscore_ops = { static void mce_cpu_restart(void *data) { - del_timer_sync(&__get_cpu_var(mce_timer)); if (!mce_available(__this_cpu_ptr(&cpu_info))) return; __mcheck_cpu_init_generic(); @@ -1760,16 +1765,15 @@ static void mce_cpu_restart(void *data) /* Reinit MCEs after user configuration changes */ static void mce_restart(void) { + mce_timer_delete_all(); on_each_cpu(mce_cpu_restart, NULL, 1); } /* Toggle features for corrected errors */ -static void mce_disable_ce(void *all) +static void mce_disable_cmci(void *data) { if (!mce_available(__this_cpu_ptr(&cpu_info))) return; - if (all) - del_timer_sync(&__get_cpu_var(mce_timer)); cmci_clear(); } @@ -1852,7 +1856,8 @@ static ssize_t set_ignore_ce(struct sys_device *s, if (mce_ignore_ce ^ !!new) { if (new) { /* disable ce features */ - on_each_cpu(mce_disable_ce, (void *)1, 1); + mce_timer_delete_all(); + on_each_cpu(mce_disable_cmci, NULL, 1); mce_ignore_ce = 1; } else { /* enable ce features */ @@ -1875,7 +1880,7 @@ static ssize_t set_cmci_disabled(struct sys_device *s, if (mce_cmci_disabled ^ !!new) { if (new) { /* disable cmci */ - on_each_cpu(mce_disable_ce, NULL, 1); + on_each_cpu(mce_disable_cmci, NULL, 1); mce_cmci_disabled = 1; } else { /* enable cmci */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8ab8911..6408910 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1058,76 +1058,15 @@ void perf_events_lapic_init(void) apic_write(APIC_LVTPC, APIC_DM_NMI); } -struct pmu_nmi_state { - unsigned int marked; - int handled; -}; - -static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi); - static int __kprobes -perf_event_nmi_handler(struct notifier_block *self, - unsigned long cmd, void *__args) +perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) { - struct die_args *args = __args; - unsigned int this_nmi; - int handled; - if (!atomic_read(&active_events)) - return NOTIFY_DONE; - - switch (cmd) { - case DIE_NMI: - break; - case DIE_NMIUNKNOWN: - this_nmi = percpu_read(irq_stat.__nmi_count); - if (this_nmi != __this_cpu_read(pmu_nmi.marked)) - /* let the kernel handle the unknown nmi */ - return NOTIFY_DONE; - /* - * This one is a PMU back-to-back nmi. Two events - * trigger 'simultaneously' raising two back-to-back - * NMIs. If the first NMI handles both, the latter - * will be empty and daze the CPU. So, we drop it to - * avoid false-positive 'unknown nmi' messages. - */ - return NOTIFY_STOP; - default: - return NOTIFY_DONE; - } - - handled = x86_pmu.handle_irq(args->regs); - if (!handled) - return NOTIFY_DONE; + return NMI_DONE; - this_nmi = percpu_read(irq_stat.__nmi_count); - if ((handled > 1) || - /* the next nmi could be a back-to-back nmi */ - ((__this_cpu_read(pmu_nmi.marked) == this_nmi) && - (__this_cpu_read(pmu_nmi.handled) > 1))) { - /* - * We could have two subsequent back-to-back nmis: The - * first handles more than one counter, the 2nd - * handles only one counter and the 3rd handles no - * counter. - * - * This is the 2nd nmi because the previous was - * handling more than one counter. We will mark the - * next (3rd) and then drop it if unhandled. - */ - __this_cpu_write(pmu_nmi.marked, this_nmi + 1); - __this_cpu_write(pmu_nmi.handled, handled); - } - - return NOTIFY_STOP; + return x86_pmu.handle_irq(regs); } -static __read_mostly struct notifier_block perf_event_nmi_notifier = { - .notifier_call = perf_event_nmi_handler, - .next = NULL, - .priority = NMI_LOCAL_LOW_PRIOR, -}; - struct event_constraint emptyconstraint; struct event_constraint unconstrained; @@ -1232,7 +1171,7 @@ static int __init init_hw_perf_events(void) ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; perf_events_lapic_init(); - register_die_notifier(&perf_event_nmi_notifier); + register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index fb330b0..b9698d4 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -131,6 +131,13 @@ struct cpu_hw_events { struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; /* + * Intel host/guest exclude bits + */ + u64 intel_ctrl_guest_mask; + u64 intel_ctrl_host_mask; + struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX]; + + /* * manage shared (per-core, per-cpu) registers * used on Intel NHM/WSM/SNB */ @@ -295,6 +302,11 @@ struct x86_pmu { */ struct extra_reg *extra_regs; unsigned int er_flags; + + /* + * Intel host/guest support (KVM) + */ + struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); }; #define ERF_NO_HT_SHARING 1 diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 384450d..db8e603 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -138,6 +138,19 @@ static int amd_pmu_hw_config(struct perf_event *event) if (ret) return ret; + if (event->attr.exclude_host && event->attr.exclude_guest) + /* + * When HO == GO == 1 the hardware treats that as GO == HO == 0 + * and will count in both modes. We don't want to count in that + * case so we emulate no-counting by setting US = OS = 0. + */ + event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | + ARCH_PERFMON_EVENTSEL_OS); + else if (event->attr.exclude_host) + event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY; + else if (event->attr.exclude_guest) + event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY; + if (event->attr.type != PERF_TYPE_RAW) return 0; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 61fa357..e09ca20 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -749,7 +749,8 @@ static void intel_pmu_enable_all(int added) intel_pmu_pebs_enable_all(); intel_pmu_lbr_enable_all(); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, + x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { struct perf_event *event = @@ -872,6 +873,7 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) static void intel_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { intel_pmu_disable_bts(); @@ -879,6 +881,9 @@ static void intel_pmu_disable_event(struct perf_event *event) return; } + cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); + cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_disable_fixed(hwc); return; @@ -924,6 +929,7 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) static void intel_pmu_enable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { if (!__this_cpu_read(cpu_hw_events.enabled)) @@ -933,6 +939,11 @@ static void intel_pmu_enable_event(struct perf_event *event) return; } + if (event->attr.exclude_host) + cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); + if (event->attr.exclude_guest) + cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx); + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_enable_fixed(hwc); return; @@ -1302,12 +1313,84 @@ static int intel_pmu_hw_config(struct perf_event *event) return 0; } +struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) +{ + if (x86_pmu.guest_get_msrs) + return x86_pmu.guest_get_msrs(nr); + *nr = 0; + return NULL; +} +EXPORT_SYMBOL_GPL(perf_guest_get_msrs); + +static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; + + arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; + arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; + arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; + + *nr = 1; + return arr; +} + +static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; + int idx; + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + struct perf_event *event = cpuc->events[idx]; + + arr[idx].msr = x86_pmu_config_addr(idx); + arr[idx].host = arr[idx].guest = 0; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + arr[idx].host = arr[idx].guest = + event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE; + + if (event->attr.exclude_host) + arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE; + else if (event->attr.exclude_guest) + arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE; + } + + *nr = x86_pmu.num_counters; + return arr; +} + +static void core_pmu_enable_event(struct perf_event *event) +{ + if (!event->attr.exclude_host) + x86_pmu_enable_event(event); +} + +static void core_pmu_enable_all(int added) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx; + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + struct hw_perf_event *hwc = &cpuc->events[idx]->hw; + + if (!test_bit(idx, cpuc->active_mask) || + cpuc->events[idx]->attr.exclude_host) + continue; + + __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); + } +} + static __initconst const struct x86_pmu core_pmu = { .name = "core", .handle_irq = x86_pmu_handle_irq, .disable_all = x86_pmu_disable_all, - .enable_all = x86_pmu_enable_all, - .enable = x86_pmu_enable_event, + .enable_all = core_pmu_enable_all, + .enable = core_pmu_enable_event, .disable = x86_pmu_disable_event, .hw_config = x86_pmu_hw_config, .schedule_events = x86_schedule_events, @@ -1325,6 +1408,7 @@ static __initconst const struct x86_pmu core_pmu = { .get_event_constraints = intel_get_event_constraints, .put_event_constraints = intel_put_event_constraints, .event_constraints = intel_core_event_constraints, + .guest_get_msrs = core_guest_get_msrs, }; struct intel_shared_regs *allocate_shared_regs(int cpu) @@ -1431,6 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = { .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, + .guest_get_msrs = intel_guest_get_msrs, }; static void intel_clovertown_quirks(void) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 764c7c2..13ad899 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -32,15 +32,12 @@ int in_crash_kexec; #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) -static void kdump_nmi_callback(int cpu, struct die_args *args) +static void kdump_nmi_callback(int cpu, struct pt_regs *regs) { - struct pt_regs *regs; #ifdef CONFIG_X86_32 struct pt_regs fixed_regs; #endif - regs = args->regs; - #ifdef CONFIG_X86_32 if (!user_mode_vm(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 00354d4..faba577 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -511,28 +511,37 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) static int was_in_debug_nmi[NR_CPUS]; -static int __kgdb_notify(struct die_args *args, unsigned long cmd) +static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs) { - struct pt_regs *regs = args->regs; - switch (cmd) { - case DIE_NMI: + case NMI_LOCAL: if (atomic_read(&kgdb_active) != -1) { /* KGDB CPU roundup */ kgdb_nmicallback(raw_smp_processor_id(), regs); was_in_debug_nmi[raw_smp_processor_id()] = 1; touch_nmi_watchdog(); - return NOTIFY_STOP; + return NMI_HANDLED; } - return NOTIFY_DONE; + break; - case DIE_NMIUNKNOWN: + case NMI_UNKNOWN: if (was_in_debug_nmi[raw_smp_processor_id()]) { was_in_debug_nmi[raw_smp_processor_id()] = 0; - return NOTIFY_STOP; + return NMI_HANDLED; } - return NOTIFY_DONE; + break; + default: + /* do nothing */ + break; + } + return NMI_DONE; +} + +static int __kgdb_notify(struct die_args *args, unsigned long cmd) +{ + struct pt_regs *regs = args->regs; + switch (cmd) { case DIE_DEBUG: if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { if (user_mode(regs)) @@ -590,11 +599,6 @@ kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) static struct notifier_block kgdb_notifier = { .notifier_call = kgdb_notify, - - /* - * Lowest-prio notifier priority, we want to be notified last: - */ - .priority = NMI_LOCAL_LOW_PRIOR, }; /** @@ -605,7 +609,31 @@ static struct notifier_block kgdb_notifier = { */ int kgdb_arch_init(void) { - return register_die_notifier(&kgdb_notifier); + int retval; + + retval = register_die_notifier(&kgdb_notifier); + if (retval) + goto out; + + retval = register_nmi_handler(NMI_LOCAL, kgdb_nmi_handler, + 0, "kgdb"); + if (retval) + goto out1; + + retval = register_nmi_handler(NMI_UNKNOWN, kgdb_nmi_handler, + 0, "kgdb"); + + if (retval) + goto out2; + + return retval; + +out2: + unregister_nmi_handler(NMI_LOCAL, "kgdb"); +out1: + unregister_die_notifier(&kgdb_notifier); +out: + return retval; } static void kgdb_hw_overflow_handler(struct perf_event *event, @@ -673,6 +701,8 @@ void kgdb_arch_exit(void) breakinfo[i].pev = NULL; } } + unregister_nmi_handler(NMI_UNKNOWN, "kgdb"); + unregister_nmi_handler(NMI_LOCAL, "kgdb"); unregister_die_notifier(&kgdb_notifier); } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c new file mode 100644 index 0000000..e20f5e7 --- /dev/null +++ b/arch/x86/kernel/nmi.c @@ -0,0 +1,336 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs + * Copyright (C) 2011 Don Zickus Red Hat, Inc. + * + * Pentium III FXSR, SSE support + * Gareth Hughes <gareth@valinux.com>, May 2000 + */ + +/* + * Handle hardware traps and faults. + */ +#include <linux/spinlock.h> +#include <linux/kprobes.h> +#include <linux/kdebug.h> +#include <linux/nmi.h> +#include <linux/delay.h> +#include <linux/hardirq.h> +#include <linux/slab.h> + +#if defined(CONFIG_EDAC) +#include <linux/edac.h> +#endif + +#include <linux/atomic.h> +#include <asm/traps.h> +#include <asm/mach_traps.h> +#include <asm/nmi.h> + +#define NMI_MAX_NAMELEN 16 +struct nmiaction { + struct list_head list; + nmi_handler_t handler; + unsigned int flags; + char *name; +}; + +struct nmi_desc { + spinlock_t lock; + struct list_head head; +}; + +static struct nmi_desc nmi_desc[NMI_MAX] = +{ + { + .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock), + .head = LIST_HEAD_INIT(nmi_desc[0].head), + }, + { + .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock), + .head = LIST_HEAD_INIT(nmi_desc[1].head), + }, + +}; + +static int ignore_nmis; + +int unknown_nmi_panic; +/* + * Prevent NMI reason port (0x61) being accessed simultaneously, can + * only be used in NMI handler. + */ +static DEFINE_RAW_SPINLOCK(nmi_reason_lock); + +static int __init setup_unknown_nmi_panic(char *str) +{ + unknown_nmi_panic = 1; + return 1; +} +__setup("unknown_nmi_panic", setup_unknown_nmi_panic); + +#define nmi_to_desc(type) (&nmi_desc[type]) + +static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs) +{ + struct nmi_desc *desc = nmi_to_desc(type); + struct nmiaction *a; + int handled=0; + + rcu_read_lock(); + + /* + * NMIs are edge-triggered, which means if you have enough + * of them concurrently, you can lose some because only one + * can be latched at any given time. Walk the whole list + * to handle those situations. + */ + list_for_each_entry_rcu(a, &desc->head, list) { + + handled += a->handler(type, regs); + + } + + rcu_read_unlock(); + + /* return total number of NMI events handled */ + return handled; +} + +static int __setup_nmi(unsigned int type, struct nmiaction *action) +{ + struct nmi_desc *desc = nmi_to_desc(type); + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + + /* + * some handlers need to be executed first otherwise a fake + * event confuses some handlers (kdump uses this flag) + */ + if (action->flags & NMI_FLAG_FIRST) + list_add_rcu(&action->list, &desc->head); + else + list_add_tail_rcu(&action->list, &desc->head); + + spin_unlock_irqrestore(&desc->lock, flags); + return 0; +} + +static struct nmiaction *__free_nmi(unsigned int type, const char *name) +{ + struct nmi_desc *desc = nmi_to_desc(type); + struct nmiaction *n; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + + list_for_each_entry_rcu(n, &desc->head, list) { + /* + * the name passed in to describe the nmi handler + * is used as the lookup key + */ + if (!strcmp(n->name, name)) { + WARN(in_nmi(), + "Trying to free NMI (%s) from NMI context!\n", n->name); + list_del_rcu(&n->list); + break; + } + } + + spin_unlock_irqrestore(&desc->lock, flags); + synchronize_rcu(); + return (n); +} + +int register_nmi_handler(unsigned int type, nmi_handler_t handler, + unsigned long nmiflags, const char *devname) +{ + struct nmiaction *action; + int retval = -ENOMEM; + + if (!handler) + return -EINVAL; + + action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL); + if (!action) + goto fail_action; + + action->handler = handler; + action->flags = nmiflags; + action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL); + if (!action->name) + goto fail_action_name; + + retval = __setup_nmi(type, action); + + if (retval) + goto fail_setup_nmi; + + return retval; + +fail_setup_nmi: + kfree(action->name); +fail_action_name: + kfree(action); +fail_action: + + return retval; +} +EXPORT_SYMBOL_GPL(register_nmi_handler); + +void unregister_nmi_handler(unsigned int type, const char *name) +{ + struct nmiaction *a; + + a = __free_nmi(type, name); + if (a) { + kfree(a->name); + kfree(a); + } +} + +EXPORT_SYMBOL_GPL(unregister_nmi_handler); + +static notrace __kprobes void +pci_serr_error(unsigned char reason, struct pt_regs *regs) +{ + pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", + reason, smp_processor_id()); + + /* + * On some machines, PCI SERR line is used to report memory + * errors. EDAC makes use of it. + */ +#if defined(CONFIG_EDAC) + if (edac_handler_set()) { + edac_atomic_assert_error(); + return; + } +#endif + + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + + pr_emerg("Dazed and confused, but trying to continue\n"); + + /* Clear and disable the PCI SERR error line. */ + reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; + outb(reason, NMI_REASON_PORT); +} + +static notrace __kprobes void +io_check_error(unsigned char reason, struct pt_regs *regs) +{ + unsigned long i; + + pr_emerg( + "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", + reason, smp_processor_id()); + show_registers(regs); + + if (panic_on_io_nmi) + panic("NMI IOCK error: Not continuing"); + + /* Re-enable the IOCK line, wait for a few seconds */ + reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; + outb(reason, NMI_REASON_PORT); + + i = 20000; + while (--i) { + touch_nmi_watchdog(); + udelay(100); + } + + reason &= ~NMI_REASON_CLEAR_IOCHK; + outb(reason, NMI_REASON_PORT); +} + +static notrace __kprobes void +unknown_nmi_error(unsigned char reason, struct pt_regs *regs) +{ + int handled; + + handled = nmi_handle(NMI_UNKNOWN, regs); + if (handled) + return; +#ifdef CONFIG_MCA + /* + * Might actually be able to figure out what the guilty party + * is: + */ + if (MCA_bus) { + mca_handle_nmi(); + return; + } +#endif + pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", + reason, smp_processor_id()); + + pr_emerg("Do you have a strange power saving mode enabled?\n"); + if (unknown_nmi_panic || panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + + pr_emerg("Dazed and confused, but trying to continue\n"); +} + +static notrace __kprobes void default_do_nmi(struct pt_regs *regs) +{ + unsigned char reason = 0; + int handled; + + /* + * CPU-specific NMI must be processed before non-CPU-specific + * NMI, otherwise we may lose it, because the CPU-specific + * NMI can not be detected/processed on other CPUs. + */ + handled = nmi_handle(NMI_LOCAL, regs); + if (handled) + return; + + /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ + raw_spin_lock(&nmi_reason_lock); + reason = get_nmi_reason(); + + if (reason & NMI_REASON_MASK) { + if (reason & NMI_REASON_SERR) + pci_serr_error(reason, regs); + else if (reason & NMI_REASON_IOCHK) + io_check_error(reason, regs); +#ifdef CONFIG_X86_32 + /* + * Reassert NMI in case it became active + * meanwhile as it's edge-triggered: + */ + reassert_nmi(); +#endif + raw_spin_unlock(&nmi_reason_lock); + return; + } + raw_spin_unlock(&nmi_reason_lock); + + unknown_nmi_error(reason, regs); +} + +dotraplinkage notrace __kprobes void +do_nmi(struct pt_regs *regs, long error_code) +{ + nmi_enter(); + + inc_irq_stat(__nmi_count); + + if (!ignore_nmis) + default_do_nmi(regs); + + nmi_exit(); +} + +void stop_nmi(void) +{ + ignore_nmis++; +} + +void restart_nmi(void) +{ + ignore_nmis--; +} diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 9242436..e334be1 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -464,7 +464,7 @@ static inline void kb_wait(void) } } -static void vmxoff_nmi(int cpu, struct die_args *args) +static void vmxoff_nmi(int cpu, struct pt_regs *regs) { cpu_emergency_vmxoff(); } @@ -736,14 +736,10 @@ static nmi_shootdown_cb shootdown_callback; static atomic_t waiting_for_crash_ipi; -static int crash_nmi_callback(struct notifier_block *self, - unsigned long val, void *data) +static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) { int cpu; - if (val != DIE_NMI) - return NOTIFY_OK; - cpu = raw_smp_processor_id(); /* Don't do anything if this handler is invoked on crashing cpu. @@ -751,10 +747,10 @@ static int crash_nmi_callback(struct notifier_block *self, * an NMI if system was initially booted with nmi_watchdog parameter. */ if (cpu == crashing_cpu) - return NOTIFY_STOP; + return NMI_HANDLED; local_irq_disable(); - shootdown_callback(cpu, (struct die_args *)data); + shootdown_callback(cpu, regs); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ @@ -762,7 +758,7 @@ static int crash_nmi_callback(struct notifier_block *self, for (;;) cpu_relax(); - return 1; + return NMI_HANDLED; } static void smp_send_nmi_allbutself(void) @@ -770,12 +766,6 @@ static void smp_send_nmi_allbutself(void) apic->send_IPI_allbutself(NMI_VECTOR); } -static struct notifier_block crash_nmi_nb = { - .notifier_call = crash_nmi_callback, - /* we want to be the first one called */ - .priority = NMI_LOCAL_HIGH_PRIOR+1, -}; - /* Halt all other CPUs, calling the specified function on each of them * * This function can be used to halt all other CPUs on crash @@ -794,7 +784,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); /* Would it be better to replace the trap vector here? */ - if (register_die_notifier(&crash_nmi_nb)) + if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback, + NMI_FLAG_FIRST, "crash")) return; /* return what? */ /* Ensure the new callback function is set before sending * out the NMI diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 3f2ad26..ccdbc16 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -42,8 +42,11 @@ int mach_set_rtc_mmss(unsigned long nowtime) { int real_seconds, real_minutes, cmos_minutes; unsigned char save_control, save_freq_select; + unsigned long flags; int retval = 0; + spin_lock_irqsave(&rtc_lock, flags); + /* tell the clock it's being set */ save_control = CMOS_READ(RTC_CONTROL); CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); @@ -93,12 +96,17 @@ int mach_set_rtc_mmss(unsigned long nowtime) CMOS_WRITE(save_control, RTC_CONTROL); CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + spin_unlock_irqrestore(&rtc_lock, flags); + return retval; } unsigned long mach_get_cmos_time(void) { unsigned int status, year, mon, day, hour, min, sec, century = 0; + unsigned long flags; + + spin_lock_irqsave(&rtc_lock, flags); /* * If UIP is clear, then we have >= 244 microseconds before @@ -125,6 +133,8 @@ unsigned long mach_get_cmos_time(void) status = CMOS_READ(RTC_CONTROL); WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY)); + spin_unlock_irqrestore(&rtc_lock, flags); + if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) { sec = bcd2bin(sec); min = bcd2bin(min); @@ -169,24 +179,15 @@ EXPORT_SYMBOL(rtc_cmos_write); int update_persistent_clock(struct timespec now) { - unsigned long flags; - int retval; - - spin_lock_irqsave(&rtc_lock, flags); - retval = x86_platform.set_wallclock(now.tv_sec); - spin_unlock_irqrestore(&rtc_lock, flags); - - return retval; + return x86_platform.set_wallclock(now.tv_sec); } /* not static: needed by APM */ void read_persistent_clock(struct timespec *ts) { - unsigned long retval, flags; + unsigned long retval; - spin_lock_irqsave(&rtc_lock, flags); retval = x86_platform.get_wallclock(); - spin_unlock_irqrestore(&rtc_lock, flags); ts->tv_sec = retval; ts->tv_nsec = 0; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 6913369..a8e3eb8 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -81,15 +81,6 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, }; DECLARE_BITMAP(used_vectors, NR_VECTORS); EXPORT_SYMBOL_GPL(used_vectors); -static int ignore_nmis; - -int unknown_nmi_panic; -/* - * Prevent NMI reason port (0x61) being accessed simultaneously, can - * only be used in NMI handler. - */ -static DEFINE_RAW_SPINLOCK(nmi_reason_lock); - static inline void conditional_sti(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) @@ -307,152 +298,6 @@ gp_in_kernel: die("general protection fault", regs, error_code); } -static int __init setup_unknown_nmi_panic(char *str) -{ - unknown_nmi_panic = 1; - return 1; -} -__setup("unknown_nmi_panic", setup_unknown_nmi_panic); - -static notrace __kprobes void -pci_serr_error(unsigned char reason, struct pt_regs *regs) -{ - pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", - reason, smp_processor_id()); - - /* - * On some machines, PCI SERR line is used to report memory - * errors. EDAC makes use of it. - */ -#if defined(CONFIG_EDAC) - if (edac_handler_set()) { - edac_atomic_assert_error(); - return; - } -#endif - - if (panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); - - pr_emerg("Dazed and confused, but trying to continue\n"); - - /* Clear and disable the PCI SERR error line. */ - reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; - outb(reason, NMI_REASON_PORT); -} - -static notrace __kprobes void -io_check_error(unsigned char reason, struct pt_regs *regs) -{ - unsigned long i; - - pr_emerg( - "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", - reason, smp_processor_id()); - show_registers(regs); - - if (panic_on_io_nmi) - panic("NMI IOCK error: Not continuing"); - - /* Re-enable the IOCK line, wait for a few seconds */ - reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; - outb(reason, NMI_REASON_PORT); - - i = 20000; - while (--i) { - touch_nmi_watchdog(); - udelay(100); - } - - reason &= ~NMI_REASON_CLEAR_IOCHK; - outb(reason, NMI_REASON_PORT); -} - -static notrace __kprobes void -unknown_nmi_error(unsigned char reason, struct pt_regs *regs) -{ - if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == - NOTIFY_STOP) - return; -#ifdef CONFIG_MCA - /* - * Might actually be able to figure out what the guilty party - * is: - */ - if (MCA_bus) { - mca_handle_nmi(); - return; - } -#endif - pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); - - pr_emerg("Do you have a strange power saving mode enabled?\n"); - if (unknown_nmi_panic || panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); - - pr_emerg("Dazed and confused, but trying to continue\n"); -} - -static notrace __kprobes void default_do_nmi(struct pt_regs *regs) -{ - unsigned char reason = 0; - - /* - * CPU-specific NMI must be processed before non-CPU-specific - * NMI, otherwise we may lose it, because the CPU-specific - * NMI can not be detected/processed on other CPUs. - */ - if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - - /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ - raw_spin_lock(&nmi_reason_lock); - reason = get_nmi_reason(); - - if (reason & NMI_REASON_MASK) { - if (reason & NMI_REASON_SERR) - pci_serr_error(reason, regs); - else if (reason & NMI_REASON_IOCHK) - io_check_error(reason, regs); -#ifdef CONFIG_X86_32 - /* - * Reassert NMI in case it became active - * meanwhile as it's edge-triggered: - */ - reassert_nmi(); -#endif - raw_spin_unlock(&nmi_reason_lock); - return; - } - raw_spin_unlock(&nmi_reason_lock); - - unknown_nmi_error(reason, regs); -} - -dotraplinkage notrace __kprobes void -do_nmi(struct pt_regs *regs, long error_code) -{ - nmi_enter(); - - inc_irq_stat(__nmi_count); - - if (!ignore_nmis) - default_do_nmi(regs); - - nmi_exit(); -} - -void stop_nmi(void) -{ - ignore_nmis++; -} - -void restart_nmi(void) -{ - ignore_nmis--; -} - /* May run on IST stack. */ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 6f08bc9..8b4cc5f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3603,7 +3603,7 @@ done_prefixes: break; case Src2CL: ctxt->src2.bytes = 1; - ctxt->src2.val = ctxt->regs[VCPU_REGS_RCX] & 0x8; + ctxt->src2.val = ctxt->regs[VCPU_REGS_RCX] & 0xff; break; case Src2ImmByte: rc = decode_imm(ctxt, &ctxt->src2, 1, true); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1c5b693..8e8da79 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -400,7 +400,8 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) /* xchg acts as a barrier before the setting of the high bits */ orig.spte_low = xchg(&ssptep->spte_low, sspte.spte_low); - orig.spte_high = ssptep->spte_high = sspte.spte_high; + orig.spte_high = ssptep->spte_high; + ssptep->spte_high = sspte.spte_high; count_spte_clear(sptep, spte); return orig.spte; diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 7ca4d43..990c35b 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -61,26 +61,15 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, } -static int profile_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) +static int profile_exceptions_notify(unsigned int val, struct pt_regs *regs) { - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - switch (val) { - case DIE_NMI: - if (ctr_running) - model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); - else if (!nmi_enabled) - break; - else - model->stop(&__get_cpu_var(cpu_msrs)); - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; + if (ctr_running) + model->check_ctrs(regs, &__get_cpu_var(cpu_msrs)); + else if (!nmi_enabled) + return NMI_DONE; + else + model->stop(&__get_cpu_var(cpu_msrs)); + return NMI_HANDLED; } static void nmi_cpu_save_registers(struct op_msrs *msrs) @@ -363,12 +352,6 @@ static void nmi_cpu_setup(void *dummy) apic_write(APIC_LVTPC, APIC_DM_NMI); } -static struct notifier_block profile_exceptions_nb = { - .notifier_call = profile_exceptions_notify, - .next = NULL, - .priority = NMI_LOCAL_LOW_PRIOR, -}; - static void nmi_cpu_restore_registers(struct op_msrs *msrs) { struct op_msr *counters = msrs->counters; @@ -508,7 +491,8 @@ static int nmi_setup(void) ctr_running = 0; /* make variables visible to the nmi handler: */ smp_mb(); - err = register_die_notifier(&profile_exceptions_nb); + err = register_nmi_handler(NMI_LOCAL, profile_exceptions_notify, + 0, "oprofile"); if (err) goto fail; @@ -538,7 +522,7 @@ static void nmi_shutdown(void) put_online_cpus(); /* make variables visible to the nmi handler: */ smp_mb(); - unregister_die_notifier(&profile_exceptions_nb); + unregister_nmi_handler(NMI_LOCAL, "oprofile"); msrs = &get_cpu_var(cpu_msrs); model->shutdown(msrs); free_msrs(); diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c index 73d70d6..6d5dbcd 100644 --- a/arch/x86/platform/mrst/vrtc.c +++ b/arch/x86/platform/mrst/vrtc.c @@ -58,8 +58,11 @@ EXPORT_SYMBOL_GPL(vrtc_cmos_write); unsigned long vrtc_get_time(void) { u8 sec, min, hour, mday, mon; + unsigned long flags; u32 year; + spin_lock_irqsave(&rtc_lock, flags); + while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP)) cpu_relax(); @@ -70,6 +73,8 @@ unsigned long vrtc_get_time(void) mon = vrtc_cmos_read(RTC_MONTH); year = vrtc_cmos_read(RTC_YEAR); + spin_unlock_irqrestore(&rtc_lock, flags); + /* vRTC YEAR reg contains the offset to 1960 */ year += 1960; @@ -83,8 +88,10 @@ unsigned long vrtc_get_time(void) int vrtc_set_mmss(unsigned long nowtime) { int real_sec, real_min; + unsigned long flags; int vrtc_min; + spin_lock_irqsave(&rtc_lock, flags); vrtc_min = vrtc_cmos_read(RTC_MINUTES); real_sec = nowtime % 60; @@ -95,6 +102,8 @@ int vrtc_set_mmss(unsigned long nowtime) vrtc_cmos_write(real_sec, RTC_SECONDS); vrtc_cmos_write(real_min, RTC_MINUTES); + spin_unlock_irqrestore(&rtc_lock, flags); + return 0; } |