diff options
108 files changed, 1150 insertions, 560 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 358eb01..88c09ca 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1186,7 +1186,7 @@ M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-mvebu/ -F: drivers/rtc/armada38x-rtc +F: drivers/rtc/rtc-armada38x.c ARM/Marvell Berlin SoC support M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> @@ -1675,8 +1675,8 @@ F: drivers/misc/eeprom/at24.c F: include/linux/platform_data/at24.h ATA OVER ETHERNET (AOE) DRIVER -M: "Ed L. Cashin" <ecashin@coraid.com> -W: http://support.coraid.com/support/linux +M: "Ed L. Cashin" <ed.cashin@acm.org> +W: http://www.openaoe.org/ S: Supported F: Documentation/aoe/ F: drivers/block/aoe/ @@ -3252,6 +3252,13 @@ S: Maintained F: Documentation/hwmon/dme1737 F: drivers/hwmon/dme1737.c +DMI/SMBIOS SUPPORT +M: Jean Delvare <jdelvare@suse.de> +S: Maintained +F: drivers/firmware/dmi-id.c +F: drivers/firmware/dmi_scan.c +F: include/linux/dmi.h + DOCKING STATION DRIVER M: Shaohua Li <shaohua.li@intel.com> L: linux-acpi@vger.kernel.org diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 114234e..edda76f 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -67,7 +67,7 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { int err; - err = __copy_to_user(&(sf->uc.uc_mcontext.regs), regs, + err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), regs, sizeof(sf->uc.uc_mcontext.regs.scratch)); err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t)); @@ -83,7 +83,7 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf) if (!err) set_current_blocked(&set); - err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs), + err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs.scratch), sizeof(sf->uc.uc_mcontext.regs.scratch)); return err; @@ -131,6 +131,15 @@ SYSCALL_DEFINE0(rt_sigreturn) /* Don't restart from sigreturn */ syscall_wont_restart(regs); + /* + * Ensure that sigreturn always returns to user mode (in case the + * regs saved on user stack got fudged between save and sigreturn) + * Otherwise it is easy to panic the kernel with a custom + * signal handler and/or restorer which clobberes the status32/ret + * to return to a bogus location in kernel mode. + */ + regs->status32 |= STATUS_U_MASK; + return regs->r0; badframe: @@ -229,8 +238,11 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) /* * handler returns using sigreturn stub provided already by userpsace + * If not, nuke the process right away */ - BUG_ON(!(ksig->ka.sa.sa_flags & SA_RESTORER)); + if(!(ksig->ka.sa.sa_flags & SA_RESTORER)) + return 1; + regs->blink = (unsigned long)ksig->ka.sa.sa_restorer; /* User Stack for signal handler will be above the frame just carved */ @@ -296,12 +308,12 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) { sigset_t *oldset = sigmask_to_save(); - int ret; + int failed; /* Set up the stack frame */ - ret = setup_rt_frame(ksig, oldset, regs); + failed = setup_rt_frame(ksig, oldset, regs); - signal_setup_done(ret, ksig, 0); + signal_setup_done(failed, ksig, 0); } void do_signal(struct pt_regs *regs) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index cb95930..d8c25b7 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -246,14 +246,30 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, __ret; \ }) -#define this_cpu_cmpxchg_1(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) -#define this_cpu_cmpxchg_2(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) -#define this_cpu_cmpxchg_4(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) -#define this_cpu_cmpxchg_8(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) - -#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ - cmpxchg_double_local(raw_cpu_ptr(&(ptr1)), raw_cpu_ptr(&(ptr2)), \ - o1, o2, n1, n2) +#define _protect_cmpxchg_local(pcp, o, n) \ +({ \ + typeof(*raw_cpu_ptr(&(pcp))) __ret; \ + preempt_disable(); \ + __ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \ + preempt_enable(); \ + __ret; \ +}) + +#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) + +#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ +({ \ + int __ret; \ + preempt_disable(); \ + __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \ + raw_cpu_ptr(&(ptr2)), \ + o1, o2, n1, n2); \ + preempt_enable(); \ + __ret; \ +}) #define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n)) #define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n)) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index a9eee33..101a42b 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -151,6 +151,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, { unsigned int cpu = smp_processor_id(); + /* + * init_mm.pgd does not contain any user mappings and it is always + * active for kernel addresses in TTBR1. Just set the reserved TTBR0. + */ + if (next == &init_mm) { + cpu_set_reserved_ttbr0(); + return; + } + if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) check_and_switch_context(next, tsk); } diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 09da25b..4fde8c1 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -204,25 +204,47 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, return ret; } +#define _percpu_read(pcp) \ +({ \ + typeof(pcp) __retval; \ + preempt_disable(); \ + __retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), \ + sizeof(pcp)); \ + preempt_enable(); \ + __retval; \ +}) + +#define _percpu_write(pcp, val) \ +do { \ + preempt_disable(); \ + __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), \ + sizeof(pcp)); \ + preempt_enable(); \ +} while(0) \ + +#define _pcp_protect(operation, pcp, val) \ +({ \ + typeof(pcp) __retval; \ + preempt_disable(); \ + __retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)), \ + (val), sizeof(pcp)); \ + preempt_enable(); \ + __retval; \ +}) + #define _percpu_add(pcp, val) \ - __percpu_add(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) + _pcp_protect(__percpu_add, pcp, val) -#define _percpu_add_return(pcp, val) (typeof(pcp)) (_percpu_add(pcp, val)) +#define _percpu_add_return(pcp, val) _percpu_add(pcp, val) #define _percpu_and(pcp, val) \ - __percpu_and(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) + _pcp_protect(__percpu_and, pcp, val) #define _percpu_or(pcp, val) \ - __percpu_or(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) - -#define _percpu_read(pcp) (typeof(pcp)) \ - (__percpu_read(raw_cpu_ptr(&(pcp)), sizeof(pcp))) - -#define _percpu_write(pcp, val) \ - __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp)) + _pcp_protect(__percpu_or, pcp, val) #define _percpu_xchg(pcp, val) (typeof(pcp)) \ - (__percpu_xchg(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp))) + _pcp_protect(__percpu_xchg, pcp, (unsigned long)(val)) #define this_cpu_add_1(pcp, val) _percpu_add(pcp, val) #define this_cpu_add_2(pcp, val) _percpu_add(pcp, val) diff --git a/arch/metag/include/asm/io.h b/arch/metag/include/asm/io.h index 9359e50..d5779b0 100644 --- a/arch/metag/include/asm/io.h +++ b/arch/metag/include/asm/io.h @@ -2,6 +2,7 @@ #define _ASM_METAG_IO_H #include <linux/types.h> +#include <asm/pgtable-bits.h> #define IO_SPACE_LIMIT 0 diff --git a/arch/metag/include/asm/pgtable-bits.h b/arch/metag/include/asm/pgtable-bits.h new file mode 100644 index 0000000..25ba672 --- /dev/null +++ b/arch/metag/include/asm/pgtable-bits.h @@ -0,0 +1,104 @@ +/* + * Meta page table definitions. + */ + +#ifndef _METAG_PGTABLE_BITS_H +#define _METAG_PGTABLE_BITS_H + +#include <asm/metag_mem.h> + +/* + * Definitions for MMU descriptors + * + * These are the hardware bits in the MMCU pte entries. + * Derived from the Meta toolkit headers. + */ +#define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT +#define _PAGE_WRITE MMCU_ENTRY_WR_BIT +#define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT +/* Write combine bit - this can cause writes to occur out of order */ +#define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT +/* Sys coherent bit - this bit is never used by Linux */ +#define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT +#define _PAGE_ALWAYS_ZERO_1 0x020 +#define _PAGE_CACHE_CTRL0 0x040 +#define _PAGE_CACHE_CTRL1 0x080 +#define _PAGE_ALWAYS_ZERO_2 0x100 +#define _PAGE_ALWAYS_ZERO_3 0x200 +#define _PAGE_ALWAYS_ZERO_4 0x400 +#define _PAGE_ALWAYS_ZERO_5 0x800 + +/* These are software bits that we stuff into the gaps in the hardware + * pte entries that are not used. Note, these DO get stored in the actual + * hardware, but the hardware just does not use them. + */ +#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1 +#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2 + +/* Pages owned, and protected by, the kernel. */ +#define _PAGE_KERNEL _PAGE_PRIV + +/* No cacheing of this page */ +#define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S) +/* burst cacheing - good for data streaming */ +#define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S) +/* One cache way per thread */ +#define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S) +/* Full on cacheing */ +#define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S) + +#define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE) + +/* which bits are used for cache control ... */ +#define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \ + _PAGE_WR_COMBINE) + +/* This is a mask of the bits that pte_modify is allowed to change. */ +#define _PAGE_CHG_MASK (PAGE_MASK) + +#define _PAGE_SZ_SHIFT 1 +#define _PAGE_SZ_4K (0x0) +#define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT) + +#if defined(CONFIG_PAGE_SIZE_4K) +#define _PAGE_SZ (_PAGE_SZ_4K) +#elif defined(CONFIG_PAGE_SIZE_8K) +#define _PAGE_SZ (_PAGE_SZ_8K) +#elif defined(CONFIG_PAGE_SIZE_16K) +#define _PAGE_SZ (_PAGE_SZ_16K) +#endif +#define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT) + +#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K) +# define _PAGE_SZHUGE (_PAGE_SZ_8K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K) +# define _PAGE_SZHUGE (_PAGE_SZ_16K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K) +# define _PAGE_SZHUGE (_PAGE_SZ_32K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) +# define _PAGE_SZHUGE (_PAGE_SZ_64K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K) +# define _PAGE_SZHUGE (_PAGE_SZ_128K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K) +# define _PAGE_SZHUGE (_PAGE_SZ_256K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) +# define _PAGE_SZHUGE (_PAGE_SZ_512K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M) +# define _PAGE_SZHUGE (_PAGE_SZ_1M) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M) +# define _PAGE_SZHUGE (_PAGE_SZ_2M) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M) +# define _PAGE_SZHUGE (_PAGE_SZ_4M) +#endif + +#endif /* _METAG_PGTABLE_BITS_H */ diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h index d0604c0..ffa3a3a 100644 --- a/arch/metag/include/asm/pgtable.h +++ b/arch/metag/include/asm/pgtable.h @@ -5,6 +5,7 @@ #ifndef _METAG_PGTABLE_H #define _METAG_PGTABLE_H +#include <asm/pgtable-bits.h> #include <asm-generic/pgtable-nopmd.h> /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */ @@ -21,100 +22,6 @@ #endif /* - * Definitions for MMU descriptors - * - * These are the hardware bits in the MMCU pte entries. - * Derived from the Meta toolkit headers. - */ -#define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT -#define _PAGE_WRITE MMCU_ENTRY_WR_BIT -#define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT -/* Write combine bit - this can cause writes to occur out of order */ -#define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT -/* Sys coherent bit - this bit is never used by Linux */ -#define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT -#define _PAGE_ALWAYS_ZERO_1 0x020 -#define _PAGE_CACHE_CTRL0 0x040 -#define _PAGE_CACHE_CTRL1 0x080 -#define _PAGE_ALWAYS_ZERO_2 0x100 -#define _PAGE_ALWAYS_ZERO_3 0x200 -#define _PAGE_ALWAYS_ZERO_4 0x400 -#define _PAGE_ALWAYS_ZERO_5 0x800 - -/* These are software bits that we stuff into the gaps in the hardware - * pte entries that are not used. Note, these DO get stored in the actual - * hardware, but the hardware just does not use them. - */ -#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1 -#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2 - -/* Pages owned, and protected by, the kernel. */ -#define _PAGE_KERNEL _PAGE_PRIV - -/* No cacheing of this page */ -#define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S) -/* burst cacheing - good for data streaming */ -#define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S) -/* One cache way per thread */ -#define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S) -/* Full on cacheing */ -#define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S) - -#define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE) - -/* which bits are used for cache control ... */ -#define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \ - _PAGE_WR_COMBINE) - -/* This is a mask of the bits that pte_modify is allowed to change. */ -#define _PAGE_CHG_MASK (PAGE_MASK) - -#define _PAGE_SZ_SHIFT 1 -#define _PAGE_SZ_4K (0x0) -#define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT) - -#if defined(CONFIG_PAGE_SIZE_4K) -#define _PAGE_SZ (_PAGE_SZ_4K) -#elif defined(CONFIG_PAGE_SIZE_8K) -#define _PAGE_SZ (_PAGE_SZ_8K) -#elif defined(CONFIG_PAGE_SIZE_16K) -#define _PAGE_SZ (_PAGE_SZ_16K) -#endif -#define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT) - -#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K) -# define _PAGE_SZHUGE (_PAGE_SZ_8K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K) -# define _PAGE_SZHUGE (_PAGE_SZ_16K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K) -# define _PAGE_SZHUGE (_PAGE_SZ_32K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) -# define _PAGE_SZHUGE (_PAGE_SZ_64K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K) -# define _PAGE_SZHUGE (_PAGE_SZ_128K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K) -# define _PAGE_SZHUGE (_PAGE_SZ_256K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) -# define _PAGE_SZHUGE (_PAGE_SZ_512K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M) -# define _PAGE_SZHUGE (_PAGE_SZ_1M) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M) -# define _PAGE_SZHUGE (_PAGE_SZ_2M) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M) -# define _PAGE_SZHUGE (_PAGE_SZ_4M) -#endif - -/* * The Linux memory management assumes a three-level page table setup. On * Meta, we use that, but "fold" the mid level into the top-level page * table. diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index f213f5b..d174372 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -26,7 +26,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) if (likely(pgd != NULL)) { memset(pgd, 0, PAGE_SIZE<<PGD_ALLOC_ORDER); -#ifdef CONFIG_64BIT +#if PT_NLEVELS == 3 actual_pgd += PTRS_PER_PGD; /* Populate first pmd with allocated memory. We mark it * with PxD_FLAG_ATTACHED as a signal to the system that this @@ -45,7 +45,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { -#ifdef CONFIG_64BIT +#if PT_NLEVELS == 3 pgd -= PTRS_PER_PGD; #endif free_pages((unsigned long)pgd, PGD_ALLOC_ORDER); @@ -72,12 +72,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { -#ifdef CONFIG_64BIT if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED) - /* This is the permanent pmd attached to the pgd; - * cannot free it */ + /* + * This is the permanent pmd attached to the pgd; + * cannot free it. + * Increment the counter to compensate for the decrement + * done by generic mm code. + */ + mm_inc_nr_pmds(mm); return; -#endif free_pages((unsigned long)pmd, PMD_ORDER); } @@ -99,7 +102,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { -#ifdef CONFIG_64BIT +#if PT_NLEVELS == 3 /* preserve the gateway marker if this is the beginning of * the permanent pmd */ if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED) diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 5a8997d..8eefb12 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -55,8 +55,8 @@ #define ENTRY_COMP(_name_) .word sys_##_name_ #endif - ENTRY_SAME(restart_syscall) /* 0 */ - ENTRY_SAME(exit) +90: ENTRY_SAME(restart_syscall) /* 0 */ +91: ENTRY_SAME(exit) ENTRY_SAME(fork_wrapper) ENTRY_SAME(read) ENTRY_SAME(write) @@ -439,7 +439,10 @@ ENTRY_SAME(bpf) ENTRY_COMP(execveat) - /* Nothing yet */ + +.ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b)) +.error "size of syscall table does not fit value of __NR_Linux_syscalls" +.endif #undef ENTRY_SAME #undef ENTRY_DIFF diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 03cd858..4cbe23a 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -153,6 +153,7 @@ #define PPC_INST_MFSPR_PVR_MASK 0xfc1fffff #define PPC_INST_MFTMR 0x7c0002dc #define PPC_INST_MSGSND 0x7c00019c +#define PPC_INST_MSGCLR 0x7c0001dc #define PPC_INST_MSGSNDP 0x7c00011c #define PPC_INST_MTTMR 0x7c0003dc #define PPC_INST_NOP 0x60000000 @@ -309,6 +310,8 @@ ___PPC_RB(b) | __PPC_EH(eh)) #define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \ ___PPC_RB(b)) +#define PPC_MSGCLR(b) stringify_in_c(.long PPC_INST_MSGCLR | \ + ___PPC_RB(b)) #define PPC_MSGSNDP(b) stringify_in_c(.long PPC_INST_MSGSNDP | \ ___PPC_RB(b)) #define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 1c874fb..af56b5c 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -608,13 +608,16 @@ #define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */ #define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */ #define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */ +#define SRR1_WAKEMASK_P8 0x003c0000 /* reason for wakeup on POWER8 */ #define SRR1_WAKESYSERR 0x00300000 /* System error */ #define SRR1_WAKEEE 0x00200000 /* External interrupt */ #define SRR1_WAKEMT 0x00280000 /* mtctrl */ #define SRR1_WAKEHMI 0x00280000 /* Hypervisor maintenance */ #define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */ +#define SRR1_WAKEDBELL 0x00140000 /* Privileged doorbell on P8 */ #define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */ #define SRR1_WAKERESET 0x00100000 /* System reset */ +#define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell on P8 */ #define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */ #define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained, * may not be recoverable */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index f337666..f830468 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -437,6 +437,26 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, + { /* Power8NVL */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004c0000, + .cpu_name = "POWER8NVL (raw)", + .cpu_features = CPU_FTRS_POWER8, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power8", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, { /* Power8 DD1: Does not support doorbell IPIs */ .pvr_mask = 0xffffff00, .pvr_value = 0x004d0100, diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index f421781..2128f3a 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -17,6 +17,7 @@ #include <asm/dbell.h> #include <asm/irq_regs.h> +#include <asm/kvm_ppc.h> #ifdef CONFIG_SMP void doorbell_setup_this_cpu(void) @@ -41,6 +42,7 @@ void doorbell_exception(struct pt_regs *regs) may_hard_irq_enable(); + kvmppc_set_host_ipi(smp_processor_id(), 0); __this_cpu_inc(irq_stat.doorbell_irqs); smp_ipi_demux(); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index c2df815..9519e6b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1408,7 +1408,7 @@ machine_check_handle_early: bne 9f /* continue in V mode if we are. */ 5: -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER /* * We are coming from kernel context. Check if we are coming from * guest. if yes, then we can continue. We will fall through diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index de4018a..de74756 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -636,7 +636,7 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu) spin_lock(&vcpu->arch.vpa_update_lock); lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr; if (lppaca) - yield_count = lppaca->yield_count; + yield_count = be32_to_cpu(lppaca->yield_count); spin_unlock(&vcpu->arch.vpa_update_lock); return yield_count; } @@ -942,20 +942,20 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, bool preserve_top32) { + struct kvm *kvm = vcpu->kvm; struct kvmppc_vcore *vc = vcpu->arch.vcore; u64 mask; + mutex_lock(&kvm->lock); spin_lock(&vc->lock); /* * If ILE (interrupt little-endian) has changed, update the * MSR_LE bit in the intr_msr for each vcpu in this vcore. */ if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) { - struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *vcpu; int i; - mutex_lock(&kvm->lock); kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->arch.vcore != vc) continue; @@ -964,7 +964,6 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, else vcpu->arch.intr_msr &= ~MSR_LE; } - mutex_unlock(&kvm->lock); } /* @@ -981,6 +980,7 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, mask &= 0xFFFFFFFF; vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); spin_unlock(&vc->lock); + mutex_unlock(&kvm->lock); } static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bb94e6f..6cbf163 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1005,6 +1005,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* Save HEIR (HV emulation assist reg) in emul_inst if this is an HEI (HV emulation interrupt, e40) */ li r3,KVM_INST_FETCH_FAILED + stw r3,VCPU_LAST_INST(r9) cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST bne 11f mfspr r3,SPRN_HEIR diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index fc34025..38a4508 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -33,6 +33,8 @@ #include <asm/runlatch.h> #include <asm/code-patching.h> #include <asm/dbell.h> +#include <asm/kvm_ppc.h> +#include <asm/ppc-opcode.h> #include "powernv.h" @@ -149,7 +151,7 @@ static int pnv_smp_cpu_disable(void) static void pnv_smp_cpu_kill_self(void) { unsigned int cpu; - unsigned long srr1; + unsigned long srr1, wmask; u32 idle_states; /* Standard hot unplug procedure */ @@ -161,6 +163,10 @@ static void pnv_smp_cpu_kill_self(void) generic_set_cpu_dead(cpu); smp_wmb(); + wmask = SRR1_WAKEMASK; + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + wmask = SRR1_WAKEMASK_P8; + idle_states = pnv_get_supported_cpuidle_states(); /* We don't want to take decrementer interrupts while we are offline, * so clear LPCR:PECE1. We keep PECE2 enabled. @@ -191,10 +197,14 @@ static void pnv_smp_cpu_kill_self(void) * having finished executing in a KVM guest, then srr1 * contains 0. */ - if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) { + if ((srr1 & wmask) == SRR1_WAKEEE) { icp_native_flush_interrupt(); local_paca->irq_happened &= PACA_IRQ_HARD_DIS; smp_mb(); + } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { + unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); + asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); + kvmppc_set_host_ipi(cpu, 0); } if (cpu_core_split_required()) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 90cf3dc..8f35d52 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -25,10 +25,10 @@ static struct kobject *mobility_kobj; struct update_props_workarea { - u32 phandle; - u32 state; - u64 reserved; - u32 nprops; + __be32 phandle; + __be32 state; + __be64 reserved; + __be32 nprops; } __packed; #define NODE_ACTION_MASK 0xff000000 @@ -54,11 +54,11 @@ static int mobility_rtas_call(int token, char *buf, s32 scope) return rc; } -static int delete_dt_node(u32 phandle) +static int delete_dt_node(__be32 phandle) { struct device_node *dn; - dn = of_find_node_by_phandle(phandle); + dn = of_find_node_by_phandle(be32_to_cpu(phandle)); if (!dn) return -ENOENT; @@ -127,7 +127,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop, return 0; } -static int update_dt_node(u32 phandle, s32 scope) +static int update_dt_node(__be32 phandle, s32 scope) { struct update_props_workarea *upwa; struct device_node *dn; @@ -136,6 +136,7 @@ static int update_dt_node(u32 phandle, s32 scope) char *prop_data; char *rtas_buf; int update_properties_token; + u32 nprops; u32 vd; update_properties_token = rtas_token("ibm,update-properties"); @@ -146,7 +147,7 @@ static int update_dt_node(u32 phandle, s32 scope) if (!rtas_buf) return -ENOMEM; - dn = of_find_node_by_phandle(phandle); + dn = of_find_node_by_phandle(be32_to_cpu(phandle)); if (!dn) { kfree(rtas_buf); return -ENOENT; @@ -162,6 +163,7 @@ static int update_dt_node(u32 phandle, s32 scope) break; prop_data = rtas_buf + sizeof(*upwa); + nprops = be32_to_cpu(upwa->nprops); /* On the first call to ibm,update-properties for a node the * the first property value descriptor contains an empty @@ -170,17 +172,17 @@ static int update_dt_node(u32 phandle, s32 scope) */ if (*prop_data == 0) { prop_data++; - vd = *(u32 *)prop_data; + vd = be32_to_cpu(*(__be32 *)prop_data); prop_data += vd + sizeof(vd); - upwa->nprops--; + nprops--; } - for (i = 0; i < upwa->nprops; i++) { + for (i = 0; i < nprops; i++) { char *prop_name; prop_name = prop_data; prop_data += strlen(prop_name) + 1; - vd = *(u32 *)prop_data; + vd = be32_to_cpu(*(__be32 *)prop_data); prop_data += sizeof(vd); switch (vd) { @@ -212,13 +214,13 @@ static int update_dt_node(u32 phandle, s32 scope) return 0; } -static int add_dt_node(u32 parent_phandle, u32 drc_index) +static int add_dt_node(__be32 parent_phandle, __be32 drc_index) { struct device_node *dn; struct device_node *parent_dn; int rc; - parent_dn = of_find_node_by_phandle(parent_phandle); + parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle)); if (!parent_dn) return -ENOENT; @@ -237,7 +239,7 @@ static int add_dt_node(u32 parent_phandle, u32 drc_index) int pseries_devicetree_update(s32 scope) { char *rtas_buf; - u32 *data; + __be32 *data; int update_nodes_token; int rc; @@ -254,17 +256,17 @@ int pseries_devicetree_update(s32 scope) if (rc && rc != 1) break; - data = (u32 *)rtas_buf + 4; - while (*data & NODE_ACTION_MASK) { + data = (__be32 *)rtas_buf + 4; + while (be32_to_cpu(*data) & NODE_ACTION_MASK) { int i; - u32 action = *data & NODE_ACTION_MASK; - int node_count = *data & NODE_COUNT_MASK; + u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK; + u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK; data++; for (i = 0; i < node_count; i++) { - u32 phandle = *data++; - u32 drc_index; + __be32 phandle = *data++; + __be32 drc_index; switch (action) { case DELETE_DT_NODE: diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index c9df40b..c9c875d 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -211,7 +211,7 @@ do { \ extern unsigned long mmap_rnd_mask; -#define STACK_RND_MASK (mmap_rnd_mask) +#define STACK_RND_MASK (test_thread_flag(TIF_31BIT) ? 0x7ff : mmap_rnd_mask) #define ARCH_DLINFO \ do { \ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 82c1989..6c79f1b 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -57,6 +57,44 @@ unsigned long ftrace_plt; +static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn) +{ +#ifdef CC_USING_HOTPATCH + /* brcl 0,0 */ + insn->opc = 0xc004; + insn->disp = 0; +#else + /* stg r14,8(r15) */ + insn->opc = 0xe3e0; + insn->disp = 0xf0080024; +#endif +} + +static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn) +{ +#ifdef CONFIG_KPROBES + if (insn->opc == BREAKPOINT_INSTRUCTION) + return 1; +#endif + return 0; +} + +static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn) +{ +#ifdef CONFIG_KPROBES + insn->opc = BREAKPOINT_INSTRUCTION; + insn->disp = KPROBE_ON_FTRACE_NOP; +#endif +} + +static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn) +{ +#ifdef CONFIG_KPROBES + insn->opc = BREAKPOINT_INSTRUCTION; + insn->disp = KPROBE_ON_FTRACE_CALL; +#endif +} + int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -72,16 +110,9 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return -EFAULT; if (addr == MCOUNT_ADDR) { /* Initial code replacement */ -#ifdef CC_USING_HOTPATCH - /* We expect to see brcl 0,0 */ - ftrace_generate_nop_insn(&orig); -#else - /* We expect to see stg r14,8(r15) */ - orig.opc = 0xe3e0; - orig.disp = 0xf0080024; -#endif + ftrace_generate_orig_insn(&orig); ftrace_generate_nop_insn(&new); - } else if (old.opc == BREAKPOINT_INSTRUCTION) { + } else if (is_kprobe_on_ftrace(&old)) { /* * If we find a breakpoint instruction, a kprobe has been * placed at the beginning of the function. We write the @@ -89,9 +120,8 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, * bytes of the original instruction so that the kprobes * handler can execute a nop, if it reaches this breakpoint. */ - new.opc = orig.opc = BREAKPOINT_INSTRUCTION; - orig.disp = KPROBE_ON_FTRACE_CALL; - new.disp = KPROBE_ON_FTRACE_NOP; + ftrace_generate_kprobe_call_insn(&orig); + ftrace_generate_kprobe_nop_insn(&new); } else { /* Replace ftrace call with a nop. */ ftrace_generate_call_insn(&orig, rec->ip); @@ -111,7 +141,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; - if (old.opc == BREAKPOINT_INSTRUCTION) { + if (is_kprobe_on_ftrace(&old)) { /* * If we find a breakpoint instruction, a kprobe has been * placed at the beginning of the function. We write the @@ -119,9 +149,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * bytes of the original instruction so that the kprobes * handler can execute a brasl if it reaches this breakpoint. */ - new.opc = orig.opc = BREAKPOINT_INSTRUCTION; - orig.disp = KPROBE_ON_FTRACE_NOP; - new.disp = KPROBE_ON_FTRACE_CALL; + ftrace_generate_kprobe_nop_insn(&orig); + ftrace_generate_kprobe_call_insn(&new); } else { /* Replace nop with an ftrace call. */ ftrace_generate_nop_insn(&orig); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index c3f8d15..e6a1578 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1415,7 +1415,7 @@ CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); static struct attribute *cpumsf_pmu_events_attr[] = { CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), - CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG), + NULL, NULL, }; @@ -1606,8 +1606,11 @@ static int __init init_cpum_sampling_pmu(void) return -EINVAL; } - if (si.ad) + if (si.ad) { sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); + cpumsf_pmu_events_attr[1] = + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG); + } sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); if (!sfdbg) diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index 6b09fdf..ca62946 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -177,6 +177,17 @@ restart_entry: lhi %r1,1 sigp %r1,%r0,SIGP_SET_ARCHITECTURE sam64 +#ifdef CONFIG_SMP + larl %r1,smp_cpu_mt_shift + icm %r1,15,0(%r1) + jz smt_done + llgfr %r1,%r1 +smt_loop: + sigp %r1,%r0,SIGP_SET_MULTI_THREADING + brc 8,smt_done /* accepted */ + brc 2,smt_loop /* busy, try again */ +smt_done: +#endif larl %r1,.Lnew_pgm_check_psw lpswe 0(%r1) pgm_check_entry: diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 4f6725f..f5b6537 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -2957,6 +2957,17 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num, unsigned long reg_val); #endif + +#define HV_FAST_M7_GET_PERFREG 0x43 +#define HV_FAST_M7_SET_PERFREG 0x44 + +#ifndef __ASSEMBLY__ +unsigned long sun4v_m7_get_perfreg(unsigned long reg_num, + unsigned long *reg_val); +unsigned long sun4v_m7_set_perfreg(unsigned long reg_num, + unsigned long reg_val); +#endif + /* Function numbers for HV_CORE_TRAP. */ #define HV_CORE_SET_VER 0x00 #define HV_CORE_PUTCHAR 0x01 @@ -2981,6 +2992,7 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num, #define HV_GRP_SDIO 0x0108 #define HV_GRP_SDIO_ERR 0x0109 #define HV_GRP_REBOOT_DATA 0x0110 +#define HV_GRP_M7_PERF 0x0114 #define HV_GRP_NIAG_PERF 0x0200 #define HV_GRP_FIRE_PERF 0x0201 #define HV_GRP_N2_CPU 0x0202 diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index 5c55145..662500f 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -48,6 +48,7 @@ static struct api_info api_table[] = { { .group = HV_GRP_VT_CPU, }, { .group = HV_GRP_T5_CPU, }, { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, + { .group = HV_GRP_M7_PERF, }, }; static DEFINE_SPINLOCK(hvapi_lock); diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index caedf83..afbaba5 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S @@ -837,3 +837,19 @@ ENTRY(sun4v_t5_set_perfreg) retl nop ENDPROC(sun4v_t5_set_perfreg) + +ENTRY(sun4v_m7_get_perfreg) + mov %o1, %o4 + mov HV_FAST_M7_GET_PERFREG, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_m7_get_perfreg) + +ENTRY(sun4v_m7_set_perfreg) + mov HV_FAST_M7_SET_PERFREG, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_m7_set_perfreg) diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index 7e967c8..eb978c7 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -217,6 +217,31 @@ static const struct pcr_ops n5_pcr_ops = { .pcr_nmi_disable = PCR_N4_PICNPT, }; +static u64 m7_pcr_read(unsigned long reg_num) +{ + unsigned long val; + + (void) sun4v_m7_get_perfreg(reg_num, &val); + + return val; +} + +static void m7_pcr_write(unsigned long reg_num, u64 val) +{ + (void) sun4v_m7_set_perfreg(reg_num, val); +} + +static const struct pcr_ops m7_pcr_ops = { + .read_pcr = m7_pcr_read, + .write_pcr = m7_pcr_write, + .read_pic = n4_pic_read, + .write_pic = n4_pic_write, + .nmi_picl_value = n4_picl_value, + .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE | + PCR_N4_UTRACE | PCR_N4_TOE | + (26 << PCR_N4_SL_SHIFT)), + .pcr_nmi_disable = PCR_N4_PICNPT, +}; static unsigned long perf_hsvc_group; static unsigned long perf_hsvc_major; @@ -248,6 +273,10 @@ static int __init register_perf_hsvc(void) perf_hsvc_group = HV_GRP_T5_CPU; break; + case SUN4V_CHIP_SPARC_M7: + perf_hsvc_group = HV_GRP_M7_PERF; + break; + default: return -ENODEV; } @@ -293,6 +322,10 @@ static int __init setup_sun4v_pcr_ops(void) pcr_ops = &n5_pcr_ops; break; + case SUN4V_CHIP_SPARC_M7: + pcr_ops = &m7_pcr_ops; + break; + default: ret = -ENODEV; break; diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 46a5e45..86eebfa 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -792,6 +792,42 @@ static const struct sparc_pmu niagara4_pmu = { .num_pic_regs = 4, }; +static void sparc_m7_write_pmc(int idx, u64 val) +{ + u64 pcr; + + pcr = pcr_ops->read_pcr(idx); + /* ensure ov and ntc are reset */ + pcr &= ~(PCR_N4_OV | PCR_N4_NTC); + + pcr_ops->write_pic(idx, val & 0xffffffff); + + pcr_ops->write_pcr(idx, pcr); +} + +static const struct sparc_pmu sparc_m7_pmu = { + .event_map = niagara4_event_map, + .cache_map = &niagara4_cache_map, + .max_events = ARRAY_SIZE(niagara4_perfmon_event_map), + .read_pmc = sparc_vt_read_pmc, + .write_pmc = sparc_m7_write_pmc, + .upper_shift = 5, + .lower_shift = 5, + .event_mask = 0x7ff, + .user_bit = PCR_N4_UTRACE, + .priv_bit = PCR_N4_STRACE, + + /* We explicitly don't support hypervisor tracing. */ + .hv_bit = 0, + + .irq_bit = PCR_N4_TOE, + .upper_nop = 0, + .lower_nop = 0, + .flags = 0, + .max_hw_events = 4, + .num_pcrs = 4, + .num_pic_regs = 4, +}; static const struct sparc_pmu *sparc_pmu __read_mostly; static u64 event_encoding(u64 event_id, int idx) @@ -960,6 +996,8 @@ out: cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; } +static void sparc_pmu_start(struct perf_event *event, int flags); + /* On this PMU each PIC has it's own PCR control register. */ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) { @@ -972,20 +1010,13 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) struct perf_event *cp = cpuc->event[i]; struct hw_perf_event *hwc = &cp->hw; int idx = hwc->idx; - u64 enc; if (cpuc->current_idx[i] != PIC_NO_INDEX) continue; - sparc_perf_event_set_period(cp, hwc, idx); cpuc->current_idx[i] = idx; - enc = perf_event_get_enc(cpuc->events[i]); - cpuc->pcr[idx] &= ~mask_for_index(idx); - if (hwc->state & PERF_HES_STOPPED) - cpuc->pcr[idx] |= nop_for_index(idx); - else - cpuc->pcr[idx] |= event_encoding(enc, idx); + sparc_pmu_start(cp, PERF_EF_RELOAD); } out: for (i = 0; i < cpuc->n_events; i++) { @@ -1101,7 +1132,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags) int i; local_irq_save(flags); - perf_pmu_disable(event->pmu); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event[i]) { @@ -1127,7 +1157,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags) } } - perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -1361,7 +1390,6 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) unsigned long flags; local_irq_save(flags); - perf_pmu_disable(event->pmu); n0 = cpuc->n_events; if (n0 >= sparc_pmu->max_hw_events) @@ -1394,7 +1422,6 @@ nocheck: ret = 0; out: - perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; } @@ -1667,6 +1694,10 @@ static bool __init supported_pmu(void) sparc_pmu = &niagara4_pmu; return true; } + if (!strcmp(sparc_pmu_type, "sparc-m7")) { + sparc_pmu = &sparc_m7_pmu; + return true; + } return false; } diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 0be7bf9..46a5964 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -287,6 +287,8 @@ void arch_trigger_all_cpu_backtrace(bool include_self) printk(" TPC[%lx] O7[%lx] I7[%lx] RPC[%lx]\n", gp->tpc, gp->o7, gp->i7, gp->rpc); } + + touch_nmi_watchdog(); } memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot)); @@ -362,6 +364,8 @@ static void pmu_snapshot_all_cpus(void) (cpu == this_cpu ? '*' : ' '), cpu, pp->pcr[0], pp->pcr[1], pp->pcr[2], pp->pcr[3], pp->pic[0], pp->pic[1], pp->pic[2], pp->pic[3]); + + touch_nmi_watchdog(); } memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot)); diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S index b7f6334..857ad4f 100644 --- a/arch/sparc/lib/memmove.S +++ b/arch/sparc/lib/memmove.S @@ -8,9 +8,11 @@ .text ENTRY(memmove) /* o0=dst o1=src o2=len */ - mov %o0, %g1 + brz,pn %o2, 99f + mov %o0, %g1 + cmp %o0, %o1 - bleu,pt %xcc, memcpy + bleu,pt %xcc, 2f add %o1, %o2, %g7 cmp %g7, %o0 bleu,pt %xcc, memcpy @@ -24,7 +26,34 @@ ENTRY(memmove) /* o0=dst o1=src o2=len */ stb %g7, [%o0] bne,pt %icc, 1b sub %o0, 1, %o0 - +99: retl mov %g1, %o0 + + /* We can't just call memcpy for these memmove cases. On some + * chips the memcpy uses cache initializing stores and when dst + * and src are close enough, those can clobber the source data + * before we've loaded it in. + */ +2: or %o0, %o1, %g7 + or %o2, %g7, %g7 + andcc %g7, 0x7, %g0 + bne,pn %xcc, 4f + nop + +3: ldx [%o1], %g7 + add %o1, 8, %o1 + subcc %o2, 8, %o2 + add %o0, 8, %o0 + bne,pt %icc, 3b + stx %g7, [%o0 - 0x8] + ba,a,pt %xcc, 99b + +4: ldub [%o1], %g7 + add %o1, 1, %o1 + subcc %o2, 1, %o2 + add %o0, 1, %o0 + bne,pt %icc, 4b + stb %g7, [%o0 - 0x1] + ba,a,pt %xcc, 99b ENDPROC(memmove) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index b1947e0..46d4449 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -422,6 +422,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, struct kvm_ioapic *ioapic, int vector, int trigger_mode) { int i; + struct kvm_lapic *apic = vcpu->arch.apic; for (i = 0; i < IOAPIC_NUM_PINS; i++) { union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; @@ -443,7 +444,8 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); spin_lock(&ioapic->lock); - if (trigger_mode != IOAPIC_LEVEL_TRIG) + if (trigger_mode != IOAPIC_LEVEL_TRIG || + kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) continue; ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bd4e34d..4ee827d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -833,8 +833,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) { - if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && - kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { + if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { int trigger_mode; if (apic_test_vector(vector, apic->regs + APIC_TMR)) trigger_mode = IOAPIC_LEVEL_TRIG; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 10a481b..ae4f6d3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2479,8 +2479,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) if (enable_ept) { /* nested EPT: emulate EPT also to L1 */ vmx->nested.nested_vmx_secondary_ctls_high |= - SECONDARY_EXEC_ENABLE_EPT | - SECONDARY_EXEC_UNRESTRICTED_GUEST; + SECONDARY_EXEC_ENABLE_EPT; vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | VMX_EPT_INVEPT_BIT; @@ -2494,6 +2493,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) } else vmx->nested.nested_vmx_ept_caps = 0; + if (enable_unrestricted_guest) + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_UNRESTRICTED_GUEST; + /* miscellaneous data */ rdmsr(MSR_IA32_VMX_MISC, vmx->nested.nested_vmx_misc_low, diff --git a/block/blk-merge.c b/block/blk-merge.c index fc1ff3b..fd3fee8 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -592,7 +592,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS)) { struct bio_vec *bprev; - bprev = &rq->biotail->bi_io_vec[bio->bi_vcnt - 1]; + bprev = &rq->biotail->bi_io_vec[rq->biotail->bi_vcnt - 1]; if (bvec_gap_to_prev(bprev, bio->bi_io_vec[0].bv_offset)) return false; } diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index d53a764..be3290c 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -278,9 +278,11 @@ static int bt_get(struct blk_mq_alloc_data *data, /* * We're out of tags on this hardware queue, kick any * pending IO submits before going to sleep waiting for - * some to complete. + * some to complete. Note that hctx can be NULL here for + * reserved tag allocation. */ - blk_mq_run_hw_queue(hctx, false); + if (hctx) + blk_mq_run_hw_queue(hctx, false); /* * Retry tag allocation after running the hardware queue, diff --git a/block/blk-mq.c b/block/blk-mq.c index 4f4bea2..b7b8933 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1938,7 +1938,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) */ if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release, PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) - goto err_map; + goto err_mq_usage; setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); blk_queue_rq_timeout(q, 30000); @@ -1981,7 +1981,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) blk_mq_init_cpu_queues(q, set->nr_hw_queues); if (blk_mq_init_hw_queues(q, set)) - goto err_hw; + goto err_mq_usage; mutex_lock(&all_q_mutex); list_add_tail(&q->all_q_node, &all_q_list); @@ -1993,7 +1993,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) return q; -err_hw: +err_mq_usage: blk_cleanup_queue(q); err_hctxs: kfree(map); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 4c35f08..ef150eb 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4737,7 +4737,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag) return NULL; /* libsas case */ - if (!ap->scsi_host) { + if (ap->flags & ATA_FLAG_SAS_HOST) { tag = ata_sas_allocate_tag(ap); if (tag < 0) return NULL; @@ -4776,7 +4776,7 @@ void ata_qc_free(struct ata_queued_cmd *qc) tag = qc->tag; if (likely(ata_tag_valid(tag))) { qc->tag = ATA_TAG_POISON; - if (!ap->scsi_host) + if (ap->flags & ATA_FLAG_SAS_HOST) ata_sas_free_tag(tag, ap); } } diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h index beb8b27..a13587b 100644 --- a/drivers/base/regmap/internal.h +++ b/drivers/base/regmap/internal.h @@ -243,4 +243,12 @@ extern struct regcache_ops regcache_rbtree_ops; extern struct regcache_ops regcache_lzo_ops; extern struct regcache_ops regcache_flat_ops; +static inline const char *regmap_name(const struct regmap *map) +{ + if (map->dev) + return dev_name(map->dev); + + return map->name; +} + #endif diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c index da84f54..87db989 100644 --- a/drivers/base/regmap/regcache.c +++ b/drivers/base/regmap/regcache.c @@ -218,7 +218,7 @@ int regcache_read(struct regmap *map, ret = map->cache_ops->read(map, reg, value); if (ret == 0) - trace_regmap_reg_read_cache(map->dev, reg, *value); + trace_regmap_reg_read_cache(map, reg, *value); return ret; } @@ -311,7 +311,7 @@ int regcache_sync(struct regmap *map) dev_dbg(map->dev, "Syncing %s cache\n", map->cache_ops->name); name = map->cache_ops->name; - trace_regcache_sync(map->dev, name, "start"); + trace_regcache_sync(map, name, "start"); if (!map->cache_dirty) goto out; @@ -346,7 +346,7 @@ out: regmap_async_complete(map); - trace_regcache_sync(map->dev, name, "stop"); + trace_regcache_sync(map, name, "stop"); return ret; } @@ -381,7 +381,7 @@ int regcache_sync_region(struct regmap *map, unsigned int min, name = map->cache_ops->name; dev_dbg(map->dev, "Syncing %s cache from %d-%d\n", name, min, max); - trace_regcache_sync(map->dev, name, "start region"); + trace_regcache_sync(map, name, "start region"); if (!map->cache_dirty) goto out; @@ -401,7 +401,7 @@ out: regmap_async_complete(map); - trace_regcache_sync(map->dev, name, "stop region"); + trace_regcache_sync(map, name, "stop region"); return ret; } @@ -428,7 +428,7 @@ int regcache_drop_region(struct regmap *map, unsigned int min, map->lock(map->lock_arg); - trace_regcache_drop_region(map->dev, min, max); + trace_regcache_drop_region(map, min, max); ret = map->cache_ops->drop(map, min, max); @@ -455,7 +455,7 @@ void regcache_cache_only(struct regmap *map, bool enable) map->lock(map->lock_arg); WARN_ON(map->cache_bypass && enable); map->cache_only = enable; - trace_regmap_cache_only(map->dev, enable); + trace_regmap_cache_only(map, enable); map->unlock(map->lock_arg); } EXPORT_SYMBOL_GPL(regcache_cache_only); @@ -493,7 +493,7 @@ void regcache_cache_bypass(struct regmap *map, bool enable) map->lock(map->lock_arg); WARN_ON(map->cache_only && enable); map->cache_bypass = enable; - trace_regmap_cache_bypass(map->dev, enable); + trace_regmap_cache_bypass(map, enable); map->unlock(map->lock_arg); } EXPORT_SYMBOL_GPL(regcache_cache_bypass); diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index f99b098..dbfe6a6 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1281,7 +1281,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, if (map->async && map->bus->async_write) { struct regmap_async *async; - trace_regmap_async_write_start(map->dev, reg, val_len); + trace_regmap_async_write_start(map, reg, val_len); spin_lock_irqsave(&map->async_lock, flags); async = list_first_entry_or_null(&map->async_free, @@ -1339,8 +1339,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, return ret; } - trace_regmap_hw_write_start(map->dev, reg, - val_len / map->format.val_bytes); + trace_regmap_hw_write_start(map, reg, val_len / map->format.val_bytes); /* If we're doing a single register write we can probably just * send the work_buf directly, otherwise try to do a gather @@ -1372,8 +1371,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, kfree(buf); } - trace_regmap_hw_write_done(map->dev, reg, - val_len / map->format.val_bytes); + trace_regmap_hw_write_done(map, reg, val_len / map->format.val_bytes); return ret; } @@ -1407,12 +1405,12 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg, map->format.format_write(map, reg, val); - trace_regmap_hw_write_start(map->dev, reg, 1); + trace_regmap_hw_write_start(map, reg, 1); ret = map->bus->write(map->bus_context, map->work_buf, map->format.buf_size); - trace_regmap_hw_write_done(map->dev, reg, 1); + trace_regmap_hw_write_done(map, reg, 1); return ret; } @@ -1470,7 +1468,7 @@ int _regmap_write(struct regmap *map, unsigned int reg, dev_info(map->dev, "%x <= %x\n", reg, val); #endif - trace_regmap_reg_write(map->dev, reg, val); + trace_regmap_reg_write(map, reg, val); return map->reg_write(context, reg, val); } @@ -1773,7 +1771,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map, for (i = 0; i < num_regs; i++) { int reg = regs[i].reg; int val = regs[i].def; - trace_regmap_hw_write_start(map->dev, reg, 1); + trace_regmap_hw_write_start(map, reg, 1); map->format.format_reg(u8, reg, map->reg_shift); u8 += reg_bytes + pad_bytes; map->format.format_val(u8, val, 0); @@ -1788,7 +1786,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map, for (i = 0; i < num_regs; i++) { int reg = regs[i].reg; - trace_regmap_hw_write_done(map->dev, reg, 1); + trace_regmap_hw_write_done(map, reg, 1); } return ret; } @@ -2059,15 +2057,13 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val, */ u8[0] |= map->read_flag_mask; - trace_regmap_hw_read_start(map->dev, reg, - val_len / map->format.val_bytes); + trace_regmap_hw_read_start(map, reg, val_len / map->format.val_bytes); ret = map->bus->read(map->bus_context, map->work_buf, map->format.reg_bytes + map->format.pad_bytes, val, val_len); - trace_regmap_hw_read_done(map->dev, reg, - val_len / map->format.val_bytes); + trace_regmap_hw_read_done(map, reg, val_len / map->format.val_bytes); return ret; } @@ -2123,7 +2119,7 @@ static int _regmap_read(struct regmap *map, unsigned int reg, dev_info(map->dev, "%x => %x\n", reg, *val); #endif - trace_regmap_reg_read(map->dev, reg, *val); + trace_regmap_reg_read(map, reg, *val); if (!map->cache_bypass) regcache_write(map, reg, *val); @@ -2480,7 +2476,7 @@ void regmap_async_complete_cb(struct regmap_async *async, int ret) struct regmap *map = async->map; bool wake; - trace_regmap_async_io_complete(map->dev); + trace_regmap_async_io_complete(map); spin_lock(&map->async_lock); list_move(&async->list, &map->async_free); @@ -2525,7 +2521,7 @@ int regmap_async_complete(struct regmap *map) if (!map->bus || !map->bus->async_write) return 0; - trace_regmap_async_complete_start(map->dev); + trace_regmap_async_complete_start(map); wait_event(map->async_waitq, regmap_async_is_done(map)); @@ -2534,7 +2530,7 @@ int regmap_async_complete(struct regmap *map) map->async_ret = 0; spin_unlock_irqrestore(&map->async_lock, flags); - trace_regmap_async_complete_done(map->dev); + trace_regmap_async_complete_done(map); return ret; } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4bc2a5c..a98c41f 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -803,10 +803,6 @@ static int __init nbd_init(void) return -EINVAL; } - nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); - if (!nbd_dev) - return -ENOMEM; - part_shift = 0; if (max_part > 0) { part_shift = fls(max_part); @@ -828,6 +824,10 @@ static int __init nbd_init(void) if (nbds_max > 1UL << (MINORBITS - part_shift)) return -EINVAL; + nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); + if (!nbd_dev) + return -ENOMEM; + for (i = 0; i < nbds_max; i++) { struct gendisk *disk = alloc_disk(1 << part_shift); if (!disk) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index ceb32dd..e23be20 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -3003,6 +3003,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) } get_device(dev->device); + INIT_LIST_HEAD(&dev->node); INIT_WORK(&dev->probe_work, nvme_async_probe); schedule_work(&dev->probe_work); return 0; diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 68161f7..a0b036c 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -192,6 +192,7 @@ config SYS_SUPPORTS_EM_STI config SH_TIMER_CMT bool "Renesas CMT timer driver" if COMPILE_TEST depends on GENERIC_CLOCKEVENTS + depends on HAS_IOMEM default SYS_SUPPORTS_SH_CMT help This enables build of a clocksource and clockevent driver for @@ -201,6 +202,7 @@ config SH_TIMER_CMT config SH_TIMER_MTU2 bool "Renesas MTU2 timer driver" if COMPILE_TEST depends on GENERIC_CLOCKEVENTS + depends on HAS_IOMEM default SYS_SUPPORTS_SH_MTU2 help This enables build of a clockevent driver for the Multi-Function @@ -210,6 +212,7 @@ config SH_TIMER_MTU2 config SH_TIMER_TMU bool "Renesas TMU timer driver" if COMPILE_TEST depends on GENERIC_CLOCKEVENTS + depends on HAS_IOMEM default SYS_SUPPORTS_SH_TMU help This enables build of a clocksource and clockevent driver for diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index 5dcbf90..58597fb 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -17,7 +17,6 @@ #include <linux/irq.h> #include <linux/irqreturn.h> #include <linux/reset.h> -#include <linux/sched_clock.h> #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> @@ -137,11 +136,6 @@ static struct irqaction sun5i_timer_irq = { .dev_id = &sun5i_clockevent, }; -static u64 sun5i_timer_sched_read(void) -{ - return ~readl(timer_base + TIMER_CNTVAL_LO_REG(1)); -} - static void __init sun5i_timer_init(struct device_node *node) { struct reset_control *rstc; @@ -172,7 +166,6 @@ static void __init sun5i_timer_init(struct device_node *node) writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD, timer_base + TIMER_CTL_REG(1)); - sched_clock_register(sun5i_timer_sched_read, 32, rate); clocksource_mmio_init(timer_base + TIMER_CNTVAL_LO_REG(1), node->name, rate, 340, 32, clocksource_mmio_readl_down); diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index f6d04c7..679b10e 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -525,17 +525,6 @@ void drm_framebuffer_reference(struct drm_framebuffer *fb) } EXPORT_SYMBOL(drm_framebuffer_reference); -static void drm_framebuffer_free_bug(struct kref *kref) -{ - BUG(); -} - -static void __drm_framebuffer_unreference(struct drm_framebuffer *fb) -{ - DRM_DEBUG("%p: FB ID: %d (%d)\n", fb, fb->base.id, atomic_read(&fb->refcount.refcount)); - kref_put(&fb->refcount, drm_framebuffer_free_bug); -} - /** * drm_framebuffer_unregister_private - unregister a private fb from the lookup idr * @fb: fb to unregister @@ -1320,7 +1309,7 @@ void drm_plane_force_disable(struct drm_plane *plane) return; } /* disconnect the plane from the fb and crtc: */ - __drm_framebuffer_unreference(plane->old_fb); + drm_framebuffer_unreference(plane->old_fb); plane->old_fb = NULL; plane->fb = NULL; plane->crtc = NULL; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5b20586..27ea6bd 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2737,24 +2737,11 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) WARN_ON(i915_verify_lists(ring->dev)); - /* Move any buffers on the active list that are no longer referenced - * by the ringbuffer to the flushing/inactive lists as appropriate, - * before we free the context associated with the requests. + /* Retire requests first as we use it above for the early return. + * If we retire requests last, we may use a later seqno and so clear + * the requests lists without clearing the active list, leading to + * confusion. */ - while (!list_empty(&ring->active_list)) { - struct drm_i915_gem_object *obj; - - obj = list_first_entry(&ring->active_list, - struct drm_i915_gem_object, - ring_list); - - if (!i915_gem_request_completed(obj->last_read_req, true)) - break; - - i915_gem_object_move_to_inactive(obj); - } - - while (!list_empty(&ring->request_list)) { struct drm_i915_gem_request *request; struct intel_ringbuffer *ringbuf; @@ -2789,6 +2776,23 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) i915_gem_free_request(request); } + /* Move any buffers on the active list that are no longer referenced + * by the ringbuffer to the flushing/inactive lists as appropriate, + * before we free the context associated with the requests. + */ + while (!list_empty(&ring->active_list)) { + struct drm_i915_gem_object *obj; + + obj = list_first_entry(&ring->active_list, + struct drm_i915_gem_object, + ring_list); + + if (!i915_gem_request_completed(obj->last_read_req, true)) + break; + + i915_gem_object_move_to_inactive(obj); + } + if (unlikely(ring->trace_irq_req && i915_gem_request_completed(ring->trace_irq_req, true))) { ring->irq_put(ring); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6d22128..f75173c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2438,8 +2438,15 @@ intel_find_plane_obj(struct intel_crtc *intel_crtc, if (!intel_crtc->base.primary->fb) return; - if (intel_alloc_plane_obj(intel_crtc, plane_config)) + if (intel_alloc_plane_obj(intel_crtc, plane_config)) { + struct drm_plane *primary = intel_crtc->base.primary; + + primary->state->crtc = &intel_crtc->base; + primary->crtc = &intel_crtc->base; + update_state_fb(primary); + return; + } kfree(intel_crtc->base.primary->fb); intel_crtc->base.primary->fb = NULL; @@ -2462,11 +2469,15 @@ intel_find_plane_obj(struct intel_crtc *intel_crtc, continue; if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) { + struct drm_plane *primary = intel_crtc->base.primary; + if (obj->tiling_mode != I915_TILING_NONE) dev_priv->preserve_bios_swizzle = true; drm_framebuffer_reference(c->primary->fb); - intel_crtc->base.primary->fb = c->primary->fb; + primary->fb = c->primary->fb; + primary->state->crtc = &intel_crtc->base; + primary->crtc = &intel_crtc->base; obj->frontbuffer_bits |= INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe); break; } @@ -6663,7 +6674,6 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, plane_config->size); crtc->base.primary->fb = fb; - update_state_fb(crtc->base.primary); } static void chv_crtc_clock_get(struct intel_crtc *crtc, @@ -7704,7 +7714,6 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, plane_config->size); crtc->base.primary->fb = fb; - update_state_fb(crtc->base.primary); return; error: @@ -7798,7 +7807,6 @@ ironlake_get_initial_plane_config(struct intel_crtc *crtc, plane_config->size); crtc->base.primary->fb = fb; - update_state_fb(crtc->base.primary); } static bool ironlake_get_pipe_config(struct intel_crtc *crtc, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 9b641b3..8001fe9 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -433,7 +433,6 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode) dm_get(md); atomic_inc(&md->open_count); - out: spin_unlock(&_minor_lock); @@ -442,16 +441,20 @@ out: static void dm_blk_close(struct gendisk *disk, fmode_t mode) { - struct mapped_device *md = disk->private_data; + struct mapped_device *md; spin_lock(&_minor_lock); + md = disk->private_data; + if (WARN_ON(!md)) + goto out; + if (atomic_dec_and_test(&md->open_count) && (test_bit(DMF_DEFERRED_REMOVE, &md->flags))) queue_work(deferred_remove_workqueue, &deferred_remove_work); dm_put(md); - +out: spin_unlock(&_minor_lock); } @@ -2241,7 +2244,6 @@ static void free_dev(struct mapped_device *md) int minor = MINOR(disk_devt(md->disk)); unlock_fs(md); - bdput(md->bdev); destroy_workqueue(md->wq); if (md->kworker_task) @@ -2252,19 +2254,22 @@ static void free_dev(struct mapped_device *md) mempool_destroy(md->rq_pool); if (md->bs) bioset_free(md->bs); - blk_integrity_unregister(md->disk); - del_gendisk(md->disk); + cleanup_srcu_struct(&md->io_barrier); free_table_devices(&md->table_devices); - free_minor(minor); + dm_stats_cleanup(&md->stats); spin_lock(&_minor_lock); md->disk->private_data = NULL; spin_unlock(&_minor_lock); - + if (blk_get_integrity(md->disk)) + blk_integrity_unregister(md->disk); + del_gendisk(md->disk); put_disk(md->disk); blk_cleanup_queue(md->queue); - dm_stats_cleanup(&md->stats); + bdput(md->bdev); + free_minor(minor); + module_put(THIS_MODULE); kfree(md); } @@ -2642,8 +2647,9 @@ static void __dm_destroy(struct mapped_device *md, bool wait) might_sleep(); - spin_lock(&_minor_lock); map = dm_get_live_table(md, &srcu_idx); + + spin_lock(&_minor_lock); idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); diff --git a/drivers/mfd/kempld-core.c b/drivers/mfd/kempld-core.c index f38ec42..5615522 100644 --- a/drivers/mfd/kempld-core.c +++ b/drivers/mfd/kempld-core.c @@ -739,7 +739,7 @@ static int __init kempld_init(void) for (id = kempld_dmi_table; id->matches[0].slot != DMI_NONE; id++) if (strstr(id->ident, force_device_id)) - if (id->callback && id->callback(id)) + if (id->callback && !id->callback(id)) break; if (id->matches[0].slot == DMI_NONE) return -ENODEV; diff --git a/drivers/mfd/rtsx_usb.c b/drivers/mfd/rtsx_usb.c index ede5024..dbd907d 100644 --- a/drivers/mfd/rtsx_usb.c +++ b/drivers/mfd/rtsx_usb.c @@ -196,18 +196,27 @@ EXPORT_SYMBOL_GPL(rtsx_usb_ep0_write_register); int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data) { u16 value; + u8 *buf; + int ret; if (!data) return -EINVAL; - *data = 0; + + buf = kzalloc(sizeof(u8), GFP_KERNEL); + if (!buf) + return -ENOMEM; addr |= EP0_READ_REG_CMD << EP0_OP_SHIFT; value = swab16(addr); - return usb_control_msg(ucr->pusb_dev, + ret = usb_control_msg(ucr->pusb_dev, usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, 0, data, 1, 100); + value, 0, buf, 1, 100); + *data = *buf; + + kfree(buf); + return ret; } EXPORT_SYMBOL_GPL(rtsx_usb_ep0_read_register); @@ -288,18 +297,27 @@ static int rtsx_usb_get_status_with_bulk(struct rtsx_ucr *ucr, u16 *status) int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status) { int ret; + u16 *buf; if (!status) return -EINVAL; - if (polling_pipe == 0) + if (polling_pipe == 0) { + buf = kzalloc(sizeof(u16), GFP_KERNEL); + if (!buf) + return -ENOMEM; + ret = usb_control_msg(ucr->pusb_dev, usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_POLL, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0, 0, status, 2, 100); - else + 0, 0, buf, 2, 100); + *status = *buf; + + kfree(buf); + } else { ret = rtsx_usb_get_status_with_bulk(ucr, status); + } /* usb_control_msg may return positive when success */ if (ret < 0) diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index 11d6e65..15a8190 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1543,7 +1543,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) { struct pcnet32_private *lp; int i, media; - int fdx, mii, fset, dxsuflo; + int fdx, mii, fset, dxsuflo, sram; int chip_version; char *chipname; struct net_device *dev; @@ -1580,7 +1580,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) } /* initialize variables */ - fdx = mii = fset = dxsuflo = 0; + fdx = mii = fset = dxsuflo = sram = 0; chip_version = (chip_version >> 12) & 0xffff; switch (chip_version) { @@ -1613,6 +1613,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) chipname = "PCnet/FAST III 79C973"; /* PCI */ fdx = 1; mii = 1; + sram = 1; break; case 0x2626: chipname = "PCnet/Home 79C978"; /* PCI */ @@ -1636,6 +1637,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) chipname = "PCnet/FAST III 79C975"; /* PCI */ fdx = 1; mii = 1; + sram = 1; break; case 0x2628: chipname = "PCnet/PRO 79C976"; @@ -1664,6 +1666,31 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) dxsuflo = 1; } + /* + * The Am79C973/Am79C975 controllers come with 12K of SRAM + * which we can use for the Tx/Rx buffers but most importantly, + * the use of SRAM allow us to use the BCR18:NOUFLO bit to avoid + * Tx fifo underflows. + */ + if (sram) { + /* + * The SRAM is being configured in two steps. First we + * set the SRAM size in the BCR25:SRAM_SIZE bits. According + * to the datasheet, each bit corresponds to a 512-byte + * page so we can have at most 24 pages. The SRAM_SIZE + * holds the value of the upper 8 bits of the 16-bit SRAM size. + * The low 8-bits start at 0x00 and end at 0xff. So the + * address range is from 0x0000 up to 0x17ff. Therefore, + * the SRAM_SIZE is set to 0x17. The next step is to set + * the BCR26:SRAM_BND midway through so the Tx and Rx + * buffers can share the SRAM equally. + */ + a->write_bcr(ioaddr, 25, 0x17); + a->write_bcr(ioaddr, 26, 0xc); + /* And finally enable the NOUFLO bit */ + a->write_bcr(ioaddr, 18, a->read_bcr(ioaddr, 18) | (1 << 11)); + } + dev = alloc_etherdev(sizeof(*lp)); if (!dev) { ret = -ENOMEM; diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 27de37a..27b9fe9 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -354,6 +354,7 @@ struct be_vf_cfg { u16 vlan_tag; u32 tx_rate; u32 plink_tracking; + u32 privileges; }; enum vf_state { @@ -423,6 +424,7 @@ struct be_adapter { u8 __iomem *csr; /* CSR BAR used only for BE2/3 */ u8 __iomem *db; /* Door Bell */ + u8 __iomem *pcicfg; /* On SH,BEx only. Shadow of PCI config space */ struct mutex mbox_lock; /* For serializing mbox cmds to BE card */ struct be_dma_mem mbox_mem; diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 36916cf..7f05f30 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -1902,15 +1902,11 @@ int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *set_eqd, { int num_eqs, i = 0; - if (lancer_chip(adapter) && num > 8) { - while (num) { - num_eqs = min(num, 8); - __be_cmd_modify_eqd(adapter, &set_eqd[i], num_eqs); - i += num_eqs; - num -= num_eqs; - } - } else { - __be_cmd_modify_eqd(adapter, set_eqd, num); + while (num) { + num_eqs = min(num, 8); + __be_cmd_modify_eqd(adapter, &set_eqd[i], num_eqs); + i += num_eqs; + num -= num_eqs; } return 0; @@ -1918,7 +1914,7 @@ int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *set_eqd, /* Uses sycnhronous mcc */ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, - u32 num) + u32 num, u32 domain) { struct be_mcc_wrb *wrb; struct be_cmd_req_vlan_config *req; @@ -1936,6 +1932,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, OPCODE_COMMON_NTWK_VLAN_CONFIG, sizeof(*req), wrb, NULL); + req->hdr.domain = domain; req->interface_id = if_id; req->untagged = BE_IF_FLAGS_UNTAGGED & be_if_cap_flags(adapter) ? 1 : 0; diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index db761e8e..a7634a3 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -2256,7 +2256,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter, int be_cmd_get_fw_ver(struct be_adapter *adapter); int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *, int num); int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, - u32 num); + u32 num, u32 domain); int be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 status); int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc); int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 0a81685..e6b790f 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1171,7 +1171,7 @@ static int be_vid_config(struct be_adapter *adapter) for_each_set_bit(i, adapter->vids, VLAN_N_VID) vids[num++] = cpu_to_le16(i); - status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num); + status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0); if (status) { dev_err(dev, "Setting HW VLAN filtering failed\n"); /* Set to VLAN promisc mode as setting VLAN filter failed */ @@ -1380,11 +1380,67 @@ static int be_get_vf_config(struct net_device *netdev, int vf, return 0; } +static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan) +{ + struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; + u16 vids[BE_NUM_VLANS_SUPPORTED]; + int vf_if_id = vf_cfg->if_handle; + int status; + + /* Enable Transparent VLAN Tagging */ + status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0); + if (status) + return status; + + /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */ + vids[0] = 0; + status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1); + if (!status) + dev_info(&adapter->pdev->dev, + "Cleared guest VLANs on VF%d", vf); + + /* After TVT is enabled, disallow VFs to program VLAN filters */ + if (vf_cfg->privileges & BE_PRIV_FILTMGMT) { + status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges & + ~BE_PRIV_FILTMGMT, vf + 1); + if (!status) + vf_cfg->privileges &= ~BE_PRIV_FILTMGMT; + } + return 0; +} + +static int be_clear_vf_tvt(struct be_adapter *adapter, int vf) +{ + struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; + struct device *dev = &adapter->pdev->dev; + int status; + + /* Reset Transparent VLAN Tagging. */ + status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1, + vf_cfg->if_handle, 0); + if (status) + return status; + + /* Allow VFs to program VLAN filtering */ + if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) { + status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges | + BE_PRIV_FILTMGMT, vf + 1); + if (!status) { + vf_cfg->privileges |= BE_PRIV_FILTMGMT; + dev_info(dev, "VF%d: FILTMGMT priv enabled", vf); + } + } + + dev_info(dev, + "Disable/re-enable i/f in VM to clear Transparent VLAN tag"); + return 0; +} + static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) { struct be_adapter *adapter = netdev_priv(netdev); struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; - int status = 0; + int status; if (!sriov_enabled(adapter)) return -EPERM; @@ -1394,24 +1450,19 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) if (vlan || qos) { vlan |= qos << VLAN_PRIO_SHIFT; - if (vf_cfg->vlan_tag != vlan) - status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, - vf_cfg->if_handle, 0); + status = be_set_vf_tvt(adapter, vf, vlan); } else { - /* Reset Transparent Vlan Tagging. */ - status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, - vf + 1, vf_cfg->if_handle, 0); + status = be_clear_vf_tvt(adapter, vf); } if (status) { dev_err(&adapter->pdev->dev, - "VLAN %d config on VF %d failed : %#x\n", vlan, - vf, status); + "VLAN %d config on VF %d failed : %#x\n", vlan, vf, + status); return be_cmd_status(status); } vf_cfg->vlan_tag = vlan; - return 0; } @@ -2772,14 +2823,12 @@ void be_detect_error(struct be_adapter *adapter) } } } else { - pci_read_config_dword(adapter->pdev, - PCICFG_UE_STATUS_LOW, &ue_lo); - pci_read_config_dword(adapter->pdev, - PCICFG_UE_STATUS_HIGH, &ue_hi); - pci_read_config_dword(adapter->pdev, - PCICFG_UE_STATUS_LOW_MASK, &ue_lo_mask); - pci_read_config_dword(adapter->pdev, - PCICFG_UE_STATUS_HI_MASK, &ue_hi_mask); + ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW); + ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH); + ue_lo_mask = ioread32(adapter->pcicfg + + PCICFG_UE_STATUS_LOW_MASK); + ue_hi_mask = ioread32(adapter->pcicfg + + PCICFG_UE_STATUS_HI_MASK); ue_lo = (ue_lo & ~ue_lo_mask); ue_hi = (ue_hi & ~ue_hi_mask); @@ -3339,7 +3388,6 @@ static int be_if_create(struct be_adapter *adapter, u32 *if_handle, u32 cap_flags, u32 vf) { u32 en_flags; - int status; en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS | @@ -3347,10 +3395,7 @@ static int be_if_create(struct be_adapter *adapter, u32 *if_handle, en_flags &= cap_flags; - status = be_cmd_if_create(adapter, cap_flags, en_flags, - if_handle, vf); - - return status; + return be_cmd_if_create(adapter, cap_flags, en_flags, if_handle, vf); } static int be_vfs_if_create(struct be_adapter *adapter) @@ -3368,8 +3413,13 @@ static int be_vfs_if_create(struct be_adapter *adapter) if (!BE3_chip(adapter)) { status = be_cmd_get_profile_config(adapter, &res, vf + 1); - if (!status) + if (!status) { cap_flags = res.if_cap_flags; + /* Prevent VFs from enabling VLAN promiscuous + * mode + */ + cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS; + } } status = be_if_create(adapter, &vf_cfg->if_handle, @@ -3403,7 +3453,6 @@ static int be_vf_setup(struct be_adapter *adapter) struct device *dev = &adapter->pdev->dev; struct be_vf_cfg *vf_cfg; int status, old_vfs, vf; - u32 privileges; old_vfs = pci_num_vf(adapter->pdev); @@ -3433,15 +3482,18 @@ static int be_vf_setup(struct be_adapter *adapter) for_all_vfs(adapter, vf_cfg, vf) { /* Allow VFs to programs MAC/VLAN filters */ - status = be_cmd_get_fn_privileges(adapter, &privileges, vf + 1); - if (!status && !(privileges & BE_PRIV_FILTMGMT)) { + status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges, + vf + 1); + if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) { status = be_cmd_set_fn_privileges(adapter, - privileges | + vf_cfg->privileges | BE_PRIV_FILTMGMT, vf + 1); - if (!status) + if (!status) { + vf_cfg->privileges |= BE_PRIV_FILTMGMT; dev_info(dev, "VF%d has FILTMGMT privilege\n", vf); + } } /* Allow full available bandwidth */ @@ -4820,24 +4872,37 @@ static int be_roce_map_pci_bars(struct be_adapter *adapter) static int be_map_pci_bars(struct be_adapter *adapter) { + struct pci_dev *pdev = adapter->pdev; u8 __iomem *addr; if (BEx_chip(adapter) && be_physfn(adapter)) { - adapter->csr = pci_iomap(adapter->pdev, 2, 0); + adapter->csr = pci_iomap(pdev, 2, 0); if (!adapter->csr) return -ENOMEM; } - addr = pci_iomap(adapter->pdev, db_bar(adapter), 0); + addr = pci_iomap(pdev, db_bar(adapter), 0); if (!addr) goto pci_map_err; adapter->db = addr; + if (skyhawk_chip(adapter) || BEx_chip(adapter)) { + if (be_physfn(adapter)) { + /* PCICFG is the 2nd BAR in BE2 */ + addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0); + if (!addr) + goto pci_map_err; + adapter->pcicfg = addr; + } else { + adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET; + } + } + be_roce_map_pci_bars(adapter); return 0; pci_map_err: - dev_err(&adapter->pdev->dev, "Error in mapping PCI BARs\n"); + dev_err(&pdev->dev, "Error in mapping PCI BARs\n"); be_unmap_pci_bars(adapter); return -ENOMEM; } diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c index fe48f4c..1762ad3 100644 --- a/drivers/net/usb/cx82310_eth.c +++ b/drivers/net/usb/cx82310_eth.c @@ -46,8 +46,7 @@ enum cx82310_status { }; #define CMD_PACKET_SIZE 64 -/* first command after power on can take around 8 seconds */ -#define CMD_TIMEOUT 15000 +#define CMD_TIMEOUT 100 #define CMD_REPLY_RETRY 5 #define CX82310_MTU 1514 @@ -78,8 +77,9 @@ static int cx82310_cmd(struct usbnet *dev, enum cx82310_cmd cmd, bool reply, ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, CMD_EP), buf, CMD_PACKET_SIZE, &actual_len, CMD_TIMEOUT); if (ret < 0) { - dev_err(&dev->udev->dev, "send command %#x: error %d\n", - cmd, ret); + if (cmd != CMD_GET_LINK_STATUS) + dev_err(&dev->udev->dev, "send command %#x: error %d\n", + cmd, ret); goto end; } @@ -90,8 +90,10 @@ static int cx82310_cmd(struct usbnet *dev, enum cx82310_cmd cmd, bool reply, buf, CMD_PACKET_SIZE, &actual_len, CMD_TIMEOUT); if (ret < 0) { - dev_err(&dev->udev->dev, - "reply receive error %d\n", ret); + if (cmd != CMD_GET_LINK_STATUS) + dev_err(&dev->udev->dev, + "reply receive error %d\n", + ret); goto end; } if (actual_len > 0) @@ -134,6 +136,8 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf) int ret; char buf[15]; struct usb_device *udev = dev->udev; + u8 link[3]; + int timeout = 50; /* avoid ADSL modems - continue only if iProduct is "USB NET CARD" */ if (usb_string(udev, udev->descriptor.iProduct, buf, sizeof(buf)) > 0 @@ -160,6 +164,20 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf) if (!dev->partial_data) return -ENOMEM; + /* wait for firmware to become ready (indicated by the link being up) */ + while (--timeout) { + ret = cx82310_cmd(dev, CMD_GET_LINK_STATUS, true, NULL, 0, + link, sizeof(link)); + /* the command can time out during boot - it's not an error */ + if (!ret && link[0] == 1 && link[2] == 1) + break; + msleep(500); + }; + if (!timeout) { + dev_err(&udev->dev, "firmware not ready in time\n"); + return -ETIMEDOUT; + } + /* enable ethernet mode (?) */ ret = cx82310_cmd(dev, CMD_ETHERNET_MODE, true, "\x01", 1, NULL, 0); if (ret) { diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c index 9205f43..1819831 100644 --- a/drivers/regulator/palmas-regulator.c +++ b/drivers/regulator/palmas-regulator.c @@ -1572,6 +1572,10 @@ static int palmas_regulators_probe(struct platform_device *pdev) if (!pmic) return -ENOMEM; + if (of_device_is_compatible(node, "ti,tps659038-pmic")) + palmas_generic_regs_info[PALMAS_REG_REGEN2].ctrl_addr = + TPS659038_REGEN2_CTRL; + pmic->dev = &pdev->dev; pmic->palmas = palmas; palmas->pmic = pmic; diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c index e2436d1..3a6fd3a 100644 --- a/drivers/rtc/rtc-mrst.c +++ b/drivers/rtc/rtc-mrst.c @@ -413,8 +413,8 @@ static void rtc_mrst_do_remove(struct device *dev) mrst->dev = NULL; } -#ifdef CONFIG_PM -static int mrst_suspend(struct device *dev, pm_message_t mesg) +#ifdef CONFIG_PM_SLEEP +static int mrst_suspend(struct device *dev) { struct mrst_rtc *mrst = dev_get_drvdata(dev); unsigned char tmp; @@ -453,7 +453,7 @@ static int mrst_suspend(struct device *dev, pm_message_t mesg) */ static inline int mrst_poweroff(struct device *dev) { - return mrst_suspend(dev, PMSG_HIBERNATE); + return mrst_suspend(dev); } static int mrst_resume(struct device *dev) @@ -490,9 +490,11 @@ static int mrst_resume(struct device *dev) return 0; } +static SIMPLE_DEV_PM_OPS(mrst_pm_ops, mrst_suspend, mrst_resume); +#define MRST_PM_OPS (&mrst_pm_ops) + #else -#define mrst_suspend NULL -#define mrst_resume NULL +#define MRST_PM_OPS NULL static inline int mrst_poweroff(struct device *dev) { @@ -529,9 +531,8 @@ static struct platform_driver vrtc_mrst_platform_driver = { .remove = vrtc_mrst_platform_remove, .shutdown = vrtc_mrst_platform_shutdown, .driver = { - .name = (char *) driver_name, - .suspend = mrst_suspend, - .resume = mrst_resume, + .name = driver_name, + .pm = MRST_PM_OPS, } }; diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 9219953..d9afc51 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -6815,7 +6815,8 @@ static struct ata_port_operations ipr_sata_ops = { }; static struct ata_port_info sata_port_info = { - .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA, + .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | + ATA_FLAG_SAS_HOST, .pio_mask = ATA_PIO4_ONLY, .mwdma_mask = ATA_MWDMA2, .udma_mask = ATA_UDMA6, diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 932d9cc..9c706d8 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -547,7 +547,8 @@ static struct ata_port_operations sas_sata_ops = { }; static struct ata_port_info sata_port_info = { - .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ, + .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ | + ATA_FLAG_SAS_HOST, .pio_mask = ATA_PIO4, .mwdma_mask = ATA_MWDMA2, .udma_mask = ATA_UDMA6, diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c index 3ce39d1..4f8c798 100644 --- a/drivers/spi/spi-dw-mid.c +++ b/drivers/spi/spi-dw-mid.c @@ -108,7 +108,8 @@ static void dw_spi_dma_tx_done(void *arg) { struct dw_spi *dws = arg; - if (test_and_clear_bit(TX_BUSY, &dws->dma_chan_busy) & BIT(RX_BUSY)) + clear_bit(TX_BUSY, &dws->dma_chan_busy); + if (test_bit(RX_BUSY, &dws->dma_chan_busy)) return; dw_spi_xfer_done(dws); } @@ -156,7 +157,8 @@ static void dw_spi_dma_rx_done(void *arg) { struct dw_spi *dws = arg; - if (test_and_clear_bit(RX_BUSY, &dws->dma_chan_busy) & BIT(TX_BUSY)) + clear_bit(RX_BUSY, &dws->dma_chan_busy); + if (test_bit(TX_BUSY, &dws->dma_chan_busy)) return; dw_spi_xfer_done(dws); } diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c index ff9cdbd..2b2c359 100644 --- a/drivers/spi/spi-qup.c +++ b/drivers/spi/spi-qup.c @@ -498,7 +498,7 @@ static int spi_qup_probe(struct platform_device *pdev) struct resource *res; struct device *dev; void __iomem *base; - u32 max_freq, iomode; + u32 max_freq, iomode, num_cs; int ret, irq, size; dev = &pdev->dev; @@ -550,10 +550,11 @@ static int spi_qup_probe(struct platform_device *pdev) } /* use num-cs unless not present or out of range */ - if (of_property_read_u16(dev->of_node, "num-cs", - &master->num_chipselect) || - (master->num_chipselect > SPI_NUM_CHIPSELECTS)) + if (of_property_read_u32(dev->of_node, "num-cs", &num_cs) || + num_cs > SPI_NUM_CHIPSELECTS) master->num_chipselect = SPI_NUM_CHIPSELECTS; + else + master->num_chipselect = num_cs; master->bus_num = pdev->id; master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP; diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index c64a3e5..57a1950 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1105,13 +1105,14 @@ void spi_finalize_current_message(struct spi_master *master) "failed to unprepare message: %d\n", ret); } } + + trace_spi_message_done(mesg); + master->cur_msg_prepared = false; mesg->state = NULL; if (mesg->complete) mesg->complete(mesg->context); - - trace_spi_message_done(mesg); } EXPORT_SYMBOL_GPL(spi_finalize_current_message); diff --git a/drivers/watchdog/imgpdc_wdt.c b/drivers/watchdog/imgpdc_wdt.c index c8def68..0deaa4f 100644 --- a/drivers/watchdog/imgpdc_wdt.c +++ b/drivers/watchdog/imgpdc_wdt.c @@ -42,10 +42,10 @@ #define PDC_WDT_MIN_TIMEOUT 1 #define PDC_WDT_DEF_TIMEOUT 64 -static int heartbeat; +static int heartbeat = PDC_WDT_DEF_TIMEOUT; module_param(heartbeat, int, 0); -MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds. " - "(default = " __MODULE_STRING(PDC_WDT_DEF_TIMEOUT) ")"); +MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds " + "(default=" __MODULE_STRING(PDC_WDT_DEF_TIMEOUT) ")"); static bool nowayout = WATCHDOG_NOWAYOUT; module_param(nowayout, bool, 0); @@ -191,6 +191,7 @@ static int pdc_wdt_probe(struct platform_device *pdev) pdc_wdt->wdt_dev.ops = &pdc_wdt_ops; pdc_wdt->wdt_dev.max_timeout = 1 << PDC_WDT_CONFIG_DELAY_MASK; pdc_wdt->wdt_dev.parent = &pdev->dev; + watchdog_set_drvdata(&pdc_wdt->wdt_dev, pdc_wdt); ret = watchdog_init_timeout(&pdc_wdt->wdt_dev, heartbeat, &pdev->dev); if (ret < 0) { @@ -232,7 +233,6 @@ static int pdc_wdt_probe(struct platform_device *pdev) watchdog_set_nowayout(&pdc_wdt->wdt_dev, nowayout); platform_set_drvdata(pdev, pdc_wdt); - watchdog_set_drvdata(&pdc_wdt->wdt_dev, pdc_wdt); ret = watchdog_register_device(&pdc_wdt->wdt_dev); if (ret) diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c index a87f6df..938b987 100644 --- a/drivers/watchdog/mtk_wdt.c +++ b/drivers/watchdog/mtk_wdt.c @@ -133,7 +133,7 @@ static int mtk_wdt_start(struct watchdog_device *wdt_dev) u32 reg; struct mtk_wdt_dev *mtk_wdt = watchdog_get_drvdata(wdt_dev); void __iomem *wdt_base = mtk_wdt->wdt_base; - u32 ret; + int ret; ret = mtk_wdt_set_timeout(wdt_dev, wdt_dev->timeout); if (ret < 0) diff --git a/fs/affs/file.c b/fs/affs/file.c index d2468bf..a91795e 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -699,8 +699,10 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, boff = tmp % bsize; if (boff) { bh = affs_bread_ino(inode, bidx, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); + if (IS_ERR(bh)) { + written = PTR_ERR(bh); + goto err_first_bh; + } tmp = min(bsize - boff, to - from); BUG_ON(boff + tmp > bsize || tmp > bsize); memcpy(AFFS_DATA(bh) + boff, data + from, tmp); @@ -712,14 +714,16 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, bidx++; } else if (bidx) { bh = affs_bread_ino(inode, bidx - 1, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); + if (IS_ERR(bh)) { + written = PTR_ERR(bh); + goto err_first_bh; + } } while (from + bsize <= to) { prev_bh = bh; bh = affs_getemptyblk_ino(inode, bidx); if (IS_ERR(bh)) - goto out; + goto err_bh; memcpy(AFFS_DATA(bh), data + from, bsize); if (buffer_new(bh)) { AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); @@ -751,7 +755,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, prev_bh = bh; bh = affs_bread_ino(inode, bidx, 1); if (IS_ERR(bh)) - goto out; + goto err_bh; tmp = min(bsize, to - from); BUG_ON(tmp > bsize); memcpy(AFFS_DATA(bh), data + from, tmp); @@ -790,12 +794,13 @@ done: if (tmp > inode->i_size) inode->i_size = AFFS_I(inode)->mmu_private = tmp; +err_first_bh: unlock_page(page); page_cache_release(page); return written; -out: +err_bh: bh = prev_bh; if (!written) written = PTR_ERR(bh); diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 6e560d5..754fdf8 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -131,13 +131,16 @@ skip: hfs_bnode_write(node, entry, data_off + key_len, entry_len); hfs_bnode_dump(node); - if (new_node) { - /* update parent key if we inserted a key - * at the start of the first node - */ - if (!rec && new_node != node) - hfs_brec_update_parent(fd); + /* + * update parent key if we inserted a key + * at the start of the node and it is not the new node + */ + if (!rec && new_node != node) { + hfs_bnode_read_key(node, fd->search_key, data_off + size); + hfs_brec_update_parent(fd); + } + if (new_node) { hfs_bnode_put(fd->bnode); if (!new_node->parent) { hfs_btree_inc_height(tree); @@ -168,9 +171,6 @@ skip: goto again; } - if (!rec) - hfs_brec_update_parent(fd); - return 0; } @@ -370,6 +370,8 @@ again: if (IS_ERR(parent)) return PTR_ERR(parent); __hfs_brec_find(parent, fd, hfs_find_rec_by_key); + if (fd->record < 0) + return -ENOENT; hfs_bnode_dump(parent); rec = fd->record; diff --git a/include/linux/libata.h b/include/linux/libata.h index fc03efa..6b08cc1 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -232,6 +232,7 @@ enum { * led */ ATA_FLAG_NO_DIPM = (1 << 23), /* host not happy with DIPM */ ATA_FLAG_LOWTAG = (1 << 24), /* host wants lowest available tag */ + ATA_FLAG_SAS_HOST = (1 << 25), /* SAS host */ /* bits 24:31 of ap->flags are reserved for LLD specific flags */ diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index fb0390a..ee7b1ce 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -2999,6 +2999,9 @@ enum usb_irq_events { #define PALMAS_GPADC_TRIM15 0x0E #define PALMAS_GPADC_TRIM16 0x0F +/* TPS659038 regen2_ctrl offset iss different from palmas */ +#define TPS659038_REGEN2_CTRL 0x12 + /* TPS65917 Interrupt registers */ /* Registers for function INTERRUPT */ diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index d4ad5b5..045f709 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -316,7 +316,7 @@ struct regulator_desc { * @driver_data: private regulator data * @of_node: OpenFirmware node to parse for device tree bindings (may be * NULL). - * @regmap: regmap to use for core regmap helpers if dev_get_regulator() is + * @regmap: regmap to use for core regmap helpers if dev_get_regmap() is * insufficient. * @ena_gpio_initialized: GPIO controlling regulator enable was properly * initialized, meaning that >= 0 is a valid gpio diff --git a/include/linux/sched.h b/include/linux/sched.h index 6d77432..a419b65 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1625,11 +1625,11 @@ struct task_struct { /* * numa_faults_locality tracks if faults recorded during the last - * scan window were remote/local. The task scan period is adapted - * based on the locality of the faults with different weights - * depending on whether they were shared or private faults + * scan window were remote/local or failed to migrate. The task scan + * period is adapted based on the locality of the faults with different + * weights depending on whether they were shared or private faults */ - unsigned long numa_faults_locality[2]; + unsigned long numa_faults_locality[3]; unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ @@ -1719,6 +1719,7 @@ struct task_struct { #define TNF_NO_GROUP 0x02 #define TNF_SHARED 0x04 #define TNF_FAULT_LOCAL 0x08 +#define TNF_MIGRATE_FAIL 0x10 #ifdef CONFIG_NUMA_BALANCING extern void task_numa_fault(int last_node, int node, int pages, int flags); diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 534e1f2..57639fc 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -79,6 +79,16 @@ void nf_log_packet(struct net *net, const struct nf_loginfo *li, const char *fmt, ...); +__printf(8, 9) +void nf_log_trace(struct net *net, + u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *li, + const char *fmt, ...); + struct nf_log_buf; struct nf_log_buf *nf_log_buf_open(void); diff --git a/include/trace/events/regmap.h b/include/trace/events/regmap.h index 23d5615..22317d2 100644 --- a/include/trace/events/regmap.h +++ b/include/trace/events/regmap.h @@ -7,27 +7,26 @@ #include <linux/ktime.h> #include <linux/tracepoint.h> -struct device; -struct regmap; +#include "../../../drivers/base/regmap/internal.h" /* * Log register events */ DECLARE_EVENT_CLASS(regmap_reg, - TP_PROTO(struct device *dev, unsigned int reg, + TP_PROTO(struct regmap *map, unsigned int reg, unsigned int val), - TP_ARGS(dev, reg, val), + TP_ARGS(map, reg, val), TP_STRUCT__entry( - __string( name, dev_name(dev) ) - __field( unsigned int, reg ) - __field( unsigned int, val ) + __string( name, regmap_name(map) ) + __field( unsigned int, reg ) + __field( unsigned int, val ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); __entry->reg = reg; __entry->val = val; ), @@ -39,45 +38,45 @@ DECLARE_EVENT_CLASS(regmap_reg, DEFINE_EVENT(regmap_reg, regmap_reg_write, - TP_PROTO(struct device *dev, unsigned int reg, + TP_PROTO(struct regmap *map, unsigned int reg, unsigned int val), - TP_ARGS(dev, reg, val) + TP_ARGS(map, reg, val) ); DEFINE_EVENT(regmap_reg, regmap_reg_read, - TP_PROTO(struct device *dev, unsigned int reg, + TP_PROTO(struct regmap *map, unsigned int reg, unsigned int val), - TP_ARGS(dev, reg, val) + TP_ARGS(map, reg, val) ); DEFINE_EVENT(regmap_reg, regmap_reg_read_cache, - TP_PROTO(struct device *dev, unsigned int reg, + TP_PROTO(struct regmap *map, unsigned int reg, unsigned int val), - TP_ARGS(dev, reg, val) + TP_ARGS(map, reg, val) ); DECLARE_EVENT_CLASS(regmap_block, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count), + TP_ARGS(map, reg, count), TP_STRUCT__entry( - __string( name, dev_name(dev) ) - __field( unsigned int, reg ) - __field( int, count ) + __string( name, regmap_name(map) ) + __field( unsigned int, reg ) + __field( int, count ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); __entry->reg = reg; __entry->count = count; ), @@ -89,48 +88,48 @@ DECLARE_EVENT_CLASS(regmap_block, DEFINE_EVENT(regmap_block, regmap_hw_read_start, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count) + TP_ARGS(map, reg, count) ); DEFINE_EVENT(regmap_block, regmap_hw_read_done, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count) + TP_ARGS(map, reg, count) ); DEFINE_EVENT(regmap_block, regmap_hw_write_start, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count) + TP_ARGS(map, reg, count) ); DEFINE_EVENT(regmap_block, regmap_hw_write_done, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count) + TP_ARGS(map, reg, count) ); TRACE_EVENT(regcache_sync, - TP_PROTO(struct device *dev, const char *type, + TP_PROTO(struct regmap *map, const char *type, const char *status), - TP_ARGS(dev, type, status), + TP_ARGS(map, type, status), TP_STRUCT__entry( - __string( name, dev_name(dev) ) - __string( status, status ) - __string( type, type ) - __field( int, type ) + __string( name, regmap_name(map) ) + __string( status, status ) + __string( type, type ) + __field( int, type ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); __assign_str(status, status); __assign_str(type, type); ), @@ -141,17 +140,17 @@ TRACE_EVENT(regcache_sync, DECLARE_EVENT_CLASS(regmap_bool, - TP_PROTO(struct device *dev, bool flag), + TP_PROTO(struct regmap *map, bool flag), - TP_ARGS(dev, flag), + TP_ARGS(map, flag), TP_STRUCT__entry( - __string( name, dev_name(dev) ) - __field( int, flag ) + __string( name, regmap_name(map) ) + __field( int, flag ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); __entry->flag = flag; ), @@ -161,32 +160,32 @@ DECLARE_EVENT_CLASS(regmap_bool, DEFINE_EVENT(regmap_bool, regmap_cache_only, - TP_PROTO(struct device *dev, bool flag), + TP_PROTO(struct regmap *map, bool flag), - TP_ARGS(dev, flag) + TP_ARGS(map, flag) ); DEFINE_EVENT(regmap_bool, regmap_cache_bypass, - TP_PROTO(struct device *dev, bool flag), + TP_PROTO(struct regmap *map, bool flag), - TP_ARGS(dev, flag) + TP_ARGS(map, flag) ); DECLARE_EVENT_CLASS(regmap_async, - TP_PROTO(struct device *dev), + TP_PROTO(struct regmap *map), - TP_ARGS(dev), + TP_ARGS(map), TP_STRUCT__entry( - __string( name, dev_name(dev) ) + __string( name, regmap_name(map) ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); ), TP_printk("%s", __get_str(name)) @@ -194,50 +193,50 @@ DECLARE_EVENT_CLASS(regmap_async, DEFINE_EVENT(regmap_block, regmap_async_write_start, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count) + TP_ARGS(map, reg, count) ); DEFINE_EVENT(regmap_async, regmap_async_io_complete, - TP_PROTO(struct device *dev), + TP_PROTO(struct regmap *map), - TP_ARGS(dev) + TP_ARGS(map) ); DEFINE_EVENT(regmap_async, regmap_async_complete_start, - TP_PROTO(struct device *dev), + TP_PROTO(struct regmap *map), - TP_ARGS(dev) + TP_ARGS(map) ); DEFINE_EVENT(regmap_async, regmap_async_complete_done, - TP_PROTO(struct device *dev), + TP_PROTO(struct regmap *map), - TP_ARGS(dev) + TP_ARGS(map) ); TRACE_EVENT(regcache_drop_region, - TP_PROTO(struct device *dev, unsigned int from, + TP_PROTO(struct regmap *map, unsigned int from, unsigned int to), - TP_ARGS(dev, from, to), + TP_ARGS(map, from, to), TP_STRUCT__entry( - __string( name, dev_name(dev) ) - __field( unsigned int, from ) - __field( unsigned int, to ) + __string( name, regmap_name(map) ) + __field( unsigned int, from ) + __field( unsigned int, to ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); __entry->from = from; __entry->to = to; ), diff --git a/kernel/events/core.c b/kernel/events/core.c index 453ef61..2fabc06 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4574,6 +4574,13 @@ static void perf_pending_event(struct irq_work *entry) { struct perf_event *event = container_of(entry, struct perf_event, pending); + int rctx; + + rctx = perf_swevent_get_recursion_context(); + /* + * If we 'fail' here, that's OK, it means recursion is already disabled + * and we won't recurse 'further'. + */ if (event->pending_disable) { event->pending_disable = 0; @@ -4584,6 +4591,9 @@ static void perf_pending_event(struct irq_work *entry) event->pending_wakeup = 0; perf_event_wakeup(event); } + + if (rctx >= 0) + perf_swevent_put_recursion_context(rctx); } /* diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 88d0d44..ba77ab5 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -633,7 +633,7 @@ static int count_matching_names(struct lock_class *new_class) if (!new_class->name) return 0; - list_for_each_entry(class, &all_lock_classes, lock_entry) { + list_for_each_entry_rcu(class, &all_lock_classes, lock_entry) { if (new_class->key - new_class->subclass == class->key) return class->name_version; if (class->name && !strcmp(class->name, new_class->name)) @@ -700,10 +700,12 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) hash_head = classhashentry(key); /* - * We can walk the hash lockfree, because the hash only - * grows, and we are careful when adding entries to the end: + * We do an RCU walk of the hash, see lockdep_free_key_range(). */ - list_for_each_entry(class, hash_head, hash_entry) { + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return NULL; + + list_for_each_entry_rcu(class, hash_head, hash_entry) { if (class->key == key) { /* * Huh! same key, different name? Did someone trample @@ -728,7 +730,8 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) struct lockdep_subclass_key *key; struct list_head *hash_head; struct lock_class *class; - unsigned long flags; + + DEBUG_LOCKS_WARN_ON(!irqs_disabled()); class = look_up_lock_class(lock, subclass); if (likely(class)) @@ -750,28 +753,26 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) key = lock->key->subkeys + subclass; hash_head = classhashentry(key); - raw_local_irq_save(flags); if (!graph_lock()) { - raw_local_irq_restore(flags); return NULL; } /* * We have to do the hash-walk again, to avoid races * with another CPU: */ - list_for_each_entry(class, hash_head, hash_entry) + list_for_each_entry_rcu(class, hash_head, hash_entry) { if (class->key == key) goto out_unlock_set; + } + /* * Allocate a new key from the static array, and add it to * the hash: */ if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { if (!debug_locks_off_graph_unlock()) { - raw_local_irq_restore(flags); return NULL; } - raw_local_irq_restore(flags); print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!"); dump_stack(); @@ -798,7 +799,6 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) if (verbose(class)) { graph_unlock(); - raw_local_irq_restore(flags); printk("\nnew class %p: %s", class->key, class->name); if (class->name_version > 1) @@ -806,15 +806,12 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) printk("\n"); dump_stack(); - raw_local_irq_save(flags); if (!graph_lock()) { - raw_local_irq_restore(flags); return NULL; } } out_unlock_set: graph_unlock(); - raw_local_irq_restore(flags); out_set_class_cache: if (!subclass || force) @@ -870,11 +867,9 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, entry->distance = distance; entry->trace = *trace; /* - * Since we never remove from the dependency list, the list can - * be walked lockless by other CPUs, it's only allocation - * that must be protected by the spinlock. But this also means - * we must make new entries visible only once writes to the - * entry become visible - hence the RCU op: + * Both allocation and removal are done under the graph lock; but + * iteration is under RCU-sched; see look_up_lock_class() and + * lockdep_free_key_range(). */ list_add_tail_rcu(&entry->entry, head); @@ -1025,7 +1020,9 @@ static int __bfs(struct lock_list *source_entry, else head = &lock->class->locks_before; - list_for_each_entry(entry, head, entry) { + DEBUG_LOCKS_WARN_ON(!irqs_disabled()); + + list_for_each_entry_rcu(entry, head, entry) { if (!lock_accessed(entry)) { unsigned int cq_depth; mark_lock_accessed(entry, lock); @@ -2022,7 +2019,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, * We can walk it lock-free, because entries only get added * to the hash: */ - list_for_each_entry(chain, hash_head, entry) { + list_for_each_entry_rcu(chain, hash_head, entry) { if (chain->chain_key == chain_key) { cache_hit: debug_atomic_inc(chain_lookup_hits); @@ -2996,8 +2993,18 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, if (unlikely(!debug_locks)) return; - if (subclass) + if (subclass) { + unsigned long flags; + + if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + current->lockdep_recursion = 1; register_lock_class(lock, subclass, 1); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); + } } EXPORT_SYMBOL_GPL(lockdep_init_map); @@ -3887,9 +3894,17 @@ static inline int within(const void *addr, void *start, unsigned long size) return addr >= start && addr < start + size; } +/* + * Used in module.c to remove lock classes from memory that is going to be + * freed; and possibly re-used by other modules. + * + * We will have had one sync_sched() before getting here, so we're guaranteed + * nobody will look up these exact classes -- they're properly dead but still + * allocated. + */ void lockdep_free_key_range(void *start, unsigned long size) { - struct lock_class *class, *next; + struct lock_class *class; struct list_head *head; unsigned long flags; int i; @@ -3905,7 +3920,7 @@ void lockdep_free_key_range(void *start, unsigned long size) head = classhash_table + i; if (list_empty(head)) continue; - list_for_each_entry_safe(class, next, head, hash_entry) { + list_for_each_entry_rcu(class, head, hash_entry) { if (within(class->key, start, size)) zap_class(class); else if (within(class->name, start, size)) @@ -3916,11 +3931,25 @@ void lockdep_free_key_range(void *start, unsigned long size) if (locked) graph_unlock(); raw_local_irq_restore(flags); + + /* + * Wait for any possible iterators from look_up_lock_class() to pass + * before continuing to free the memory they refer to. + * + * sync_sched() is sufficient because the read-side is IRQ disable. + */ + synchronize_sched(); + + /* + * XXX at this point we could return the resources to the pool; + * instead we leak them. We would need to change to bitmap allocators + * instead of the linear allocators we have now. + */ } void lockdep_reset_lock(struct lockdep_map *lock) { - struct lock_class *class, *next; + struct lock_class *class; struct list_head *head; unsigned long flags; int i, j; @@ -3948,7 +3977,7 @@ void lockdep_reset_lock(struct lockdep_map *lock) head = classhash_table + i; if (list_empty(head)) continue; - list_for_each_entry_safe(class, next, head, hash_entry) { + list_for_each_entry_rcu(class, head, hash_entry) { int match = 0; for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) diff --git a/kernel/module.c b/kernel/module.c index b3d634e..99fdf94 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1865,7 +1865,7 @@ static void free_module(struct module *mod) kfree(mod->args); percpu_modfree(mod); - /* Free lock-classes: */ + /* Free lock-classes; relies on the preceding sync_rcu(). */ lockdep_free_key_range(mod->module_core, mod->core_size); /* Finally, free the core (containing the module structure) */ @@ -3349,9 +3349,6 @@ static int load_module(struct load_info *info, const char __user *uargs, module_bug_cleanup(mod); mutex_unlock(&module_mutex); - /* Free lock-classes: */ - lockdep_free_key_range(mod->module_core, mod->core_size); - /* we can't deallocate the module until we clear memory protection */ unset_module_init_ro_nx(mod); unset_module_core_ro_nx(mod); @@ -3375,6 +3372,9 @@ static int load_module(struct load_info *info, const char __user *uargs, synchronize_rcu(); mutex_unlock(&module_mutex); free_module: + /* Free lock-classes; relies on the preceding sync_rcu() */ + lockdep_free_key_range(mod->module_core, mod->core_size); + module_deallocate(mod, info); free_copy: free_copy(info); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f0f831e..62671f5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3034,6 +3034,8 @@ void rt_mutex_setprio(struct task_struct *p, int prio) } else { if (dl_prio(oldprio)) p->dl.dl_boosted = 0; + if (rt_prio(oldprio)) + p->rt.timeout = 0; p->sched_class = &fair_sched_class; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7ce18f3..bcfe320 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1609,9 +1609,11 @@ static void update_task_scan_period(struct task_struct *p, /* * If there were no record hinting faults then either the task is * completely idle or all activity is areas that are not of interest - * to automatic numa balancing. Scan slower + * to automatic numa balancing. Related to that, if there were failed + * migration then it implies we are migrating too quickly or the local + * node is overloaded. In either case, scan slower */ - if (local + shared == 0) { + if (local + shared == 0 || p->numa_faults_locality[2]) { p->numa_scan_period = min(p->numa_scan_period_max, p->numa_scan_period << 1); @@ -2080,6 +2082,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) if (migrated) p->numa_pages_migrated += pages; + if (flags & TNF_MIGRATE_FAIL) + p->numa_faults_locality[2] += pages; p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages; p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages; diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c index eb682d5..6aac4be 100644 --- a/kernel/time/tick-broadcast-hrtimer.c +++ b/kernel/time/tick-broadcast-hrtimer.c @@ -49,6 +49,7 @@ static void bc_set_mode(enum clock_event_mode mode, */ static int bc_set_next(ktime_t expires, struct clock_event_device *bc) { + int bc_moved; /* * We try to cancel the timer first. If the callback is on * flight on some other cpu then we let it handle it. If we @@ -60,9 +61,15 @@ static int bc_set_next(ktime_t expires, struct clock_event_device *bc) * restart the timer because we are in the callback, but we * can set the expiry time and let the callback return * HRTIMER_RESTART. + * + * Since we are in the idle loop at this point and because + * hrtimer_{start/cancel} functions call into tracing, + * calls to these functions must be bound within RCU_NONIDLE. */ - if (hrtimer_try_to_cancel(&bctimer) >= 0) { - hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED); + RCU_NONIDLE(bc_moved = (hrtimer_try_to_cancel(&bctimer) >= 0) ? + !hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED) : + 0); + if (bc_moved) { /* Bind the "device" to the cpu */ bc->bound_on = smp_processor_id(); } else if (bc->bound_on == smp_processor_id()) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 626e93d..6817b03 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1260,6 +1260,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, int target_nid, last_cpupid = -1; bool page_locked; bool migrated = false; + bool was_writable; int flags = 0; /* A PROT_NONE fault should not end up here */ @@ -1291,17 +1292,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, flags |= TNF_FAULT_LOCAL; } - /* - * Avoid grouping on DSO/COW pages in specific and RO pages - * in general, RO pages shouldn't hurt as much anyway since - * they can be in shared cache state. - * - * FIXME! This checks "pmd_dirty()" as an approximation of - * "is this a read-only page", since checking "pmd_write()" - * is even more broken. We haven't actually turned this into - * a writable page, so pmd_write() will always be false. - */ - if (!pmd_dirty(pmd)) + /* See similar comment in do_numa_page for explanation */ + if (!(vma->vm_flags & VM_WRITE)) flags |= TNF_NO_GROUP; /* @@ -1358,12 +1350,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (migrated) { flags |= TNF_MIGRATED; page_nid = target_nid; - } + } else + flags |= TNF_MIGRATE_FAIL; goto out; clear_pmdnuma: BUG_ON(!PageLocked(page)); + was_writable = pmd_write(pmd); pmd = pmd_modify(pmd, vma->vm_page_prot); + pmd = pmd_mkyoung(pmd); + if (was_writable) + pmd = pmd_mkwrite(pmd); set_pmd_at(mm, haddr, pmdp, pmd); update_mmu_cache_pmd(vma, addr, pmdp); unlock_page(page); @@ -1487,6 +1484,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { pmd_t entry; + bool preserve_write = prot_numa && pmd_write(*pmd); ret = 1; /* @@ -1502,9 +1500,11 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (!prot_numa || !pmd_protnone(*pmd)) { entry = pmdp_get_and_clear_notify(mm, addr, pmd); entry = pmd_modify(entry, newprot); + if (preserve_write) + entry = pmd_mkwrite(entry); ret = HPAGE_PMD_NR; set_pmd_at(mm, addr, pmd, entry); - BUG_ON(pmd_write(entry)); + BUG_ON(!preserve_write && pmd_write(entry)); } spin_unlock(ptl); } diff --git a/mm/memory.c b/mm/memory.c index 411144f..97839f5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3035,6 +3035,7 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, int last_cpupid; int target_nid; bool migrated = false; + bool was_writable = pte_write(pte); int flags = 0; /* A PROT_NONE fault should not end up here */ @@ -3059,6 +3060,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Make it present again */ pte = pte_modify(pte, vma->vm_page_prot); pte = pte_mkyoung(pte); + if (was_writable) + pte = pte_mkwrite(pte); set_pte_at(mm, addr, ptep, pte); update_mmu_cache(vma, addr, ptep); @@ -3069,16 +3072,14 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, } /* - * Avoid grouping on DSO/COW pages in specific and RO pages - * in general, RO pages shouldn't hurt as much anyway since - * they can be in shared cache state. - * - * FIXME! This checks "pmd_dirty()" as an approximation of - * "is this a read-only page", since checking "pmd_write()" - * is even more broken. We haven't actually turned this into - * a writable page, so pmd_write() will always be false. + * Avoid grouping on RO pages in general. RO pages shouldn't hurt as + * much anyway since they can be in shared cache state. This misses + * the case where a mapping is writable but the process never writes + * to it but pte_write gets cleared during protection updates and + * pte_dirty has unpredictable behaviour between PTE scan updates, + * background writeback, dirty balancing and application behaviour. */ - if (!pte_dirty(pte)) + if (!(vma->vm_flags & VM_WRITE)) flags |= TNF_NO_GROUP; /* @@ -3102,7 +3103,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (migrated) { page_nid = target_nid; flags |= TNF_MIGRATED; - } + } else + flags |= TNF_MIGRATE_FAIL; out: if (page_nid != -1) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9fab107..65842d6 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1092,6 +1092,10 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) return NULL; arch_refresh_nodedata(nid, pgdat); + } else { + /* Reset the nr_zones and classzone_idx to 0 before reuse */ + pgdat->nr_zones = 0; + pgdat->classzone_idx = 0; } /* we can use NODE_DATA(nid) from here */ @@ -1977,15 +1981,6 @@ void try_offline_node(int nid) if (is_vmalloc_addr(zone->wait_table)) vfree(zone->wait_table); } - - /* - * Since there is no way to guarentee the address of pgdat/zone is not - * on stack of any kernel threads or used by other kernel objects - * without reference counting or other symchronizing method, do not - * reset node_data and free pgdat here. Just reset it to 0 and reuse - * the memory when the node is online again. - */ - memset(pgdat, 0, sizeof(*pgdat)); } EXPORT_SYMBOL(try_offline_node); @@ -774,10 +774,8 @@ again: remove_next = 1 + (end > next->vm_end); importer->anon_vma = exporter->anon_vma; error = anon_vma_clone(importer, exporter); - if (error) { - importer->anon_vma = NULL; + if (error) return error; - } } } diff --git a/mm/mprotect.c b/mm/mprotect.c index 4472781..8858483 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -75,6 +75,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, oldpte = *pte; if (pte_present(oldpte)) { pte_t ptent; + bool preserve_write = prot_numa && pte_write(oldpte); /* * Avoid trapping faults against the zero or KSM @@ -94,6 +95,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, ptent = ptep_modify_prot_start(mm, addr, pte); ptent = pte_modify(ptent, newprot); + if (preserve_write) + ptent = pte_mkwrite(ptent); /* Avoid taking write faults for known dirty pages */ if (dirty_accountable && pte_dirty(ptent) && diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 45e187b..644bcb6 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -857,8 +857,11 @@ static void bdi_update_write_bandwidth(struct backing_dev_info *bdi, * bw * elapsed + write_bandwidth * (period - elapsed) * write_bandwidth = --------------------------------------------------- * period + * + * @written may have decreased due to account_page_redirty(). + * Avoid underflowing @bw calculation. */ - bw = written - bdi->written_stamp; + bw = written - min(written, bdi->written_stamp); bw *= HZ; if (unlikely(elapsed > period)) { do_div(bw, elapsed); @@ -922,7 +925,7 @@ static void global_update_bandwidth(unsigned long thresh, unsigned long now) { static DEFINE_SPINLOCK(dirty_lock); - static unsigned long update_time; + static unsigned long update_time = INITIAL_JIFFIES; /* * check locklessly first to optimize away locking for the most time diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 72f5ac3..755a42c 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -103,6 +103,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype) if (!is_migrate_isolate_page(buddy)) { __isolate_free_page(page, order); + kernel_map_pages(page, (1 << order), 1); set_page_refcounted(page); isolated_page = page; } diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 75c1f28..29f2f8b 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -265,8 +265,15 @@ int walk_page_range(unsigned long start, unsigned long end, vma = vma->vm_next; err = walk_page_test(start, next, walk); - if (err > 0) + if (err > 0) { + /* + * positive return values are purely for + * controlling the pagewalk, so should never + * be passed to the callers. + */ + err = 0; continue; + } if (err < 0) break; } @@ -287,6 +287,13 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) return 0; enomem_failure: + /* + * dst->anon_vma is dropped here otherwise its degree can be incorrectly + * decremented in unlink_anon_vmas(). + * We can safely do this because callers of anon_vma_clone() don't care + * about dst->anon_vma if anon_vma_clone() failed. + */ + dst->anon_vma = NULL; unlink_anon_vmas(dst); return -ENOMEM; } @@ -2449,7 +2449,8 @@ redo: do { tid = this_cpu_read(s->cpu_slab->tid); c = raw_cpu_ptr(s->cpu_slab); - } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); + } while (IS_ENABLED(CONFIG_PREEMPT) && + unlikely(tid != READ_ONCE(c->tid))); /* * Irqless object alloc/free algorithm used here depends on sequence @@ -2718,7 +2719,8 @@ redo: do { tid = this_cpu_read(s->cpu_slab->tid); c = raw_cpu_ptr(s->cpu_slab); - } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); + } while (IS_ENABLED(CONFIG_PREEMPT) && + unlikely(tid != READ_ONCE(c->tid))); /* Same with comment on barrier() in slab_alloc_node() */ barrier(); diff --git a/net/compat.c b/net/compat.c index 94d3d5e..f7bd286 100644 --- a/net/compat.c +++ b/net/compat.c @@ -49,6 +49,13 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg, __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; + + if (!uaddr) + kmsg->msg_namelen = 0; + + if (kmsg->msg_namelen < 0) + return -EINVAL; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_control = compat_ptr(tmp3); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 99e810f..cf5e82f 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -272,9 +272,9 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo, - "TRACE: %s:%s:%s:%u ", - tablename, chainname, comment, rulenum); + nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + tablename, chainname, comment, rulenum); } #endif diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a2a796c..1db253e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2773,15 +2773,11 @@ void tcp_send_fin(struct sock *sk) } else { /* Socket is locked, keep trying until memory is available. */ for (;;) { - skb = alloc_skb_fclone(MAX_TCP_HEADER, - sk->sk_allocation); + skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); if (skb) break; yield(); } - - /* Reserve space for headers and prepare control bits. */ - skb_reserve(skb, MAX_TCP_HEADER); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ tcp_init_nondata_skb(skb, tp->write_seq, TCPHDR_ACK | TCPHDR_FIN); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index b4d5e1d..27ca796 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -104,6 +104,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto again; flp6->saddr = saddr; } + err = rt->dst.error; goto out; } again: diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e080fbb..bb00c6f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -298,9 +298,9 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo, - "TRACE: %s:%s:%s:%u ", - tablename, chainname, comment, rulenum); + nf_log_trace(net, AF_INET6, hook, skb, in, out, &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + tablename, chainname, comment, rulenum); } #endif diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index ab889bb..be2c0ba 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -112,11 +112,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); fptr->nexthdr = nexthdr; fptr->reserved = 0; - if (skb_shinfo(skb)->ip6_frag_id) - fptr->identification = skb_shinfo(skb)->ip6_frag_id; - else - ipv6_select_ident(fptr, - (struct rt6_info *)skb_dst(skb)); + if (!skb_shinfo(skb)->ip6_frag_id) + ipv6_proxy_select_ident(skb); + fptr->identification = skb_shinfo(skb)->ip6_frag_id; /* Fragment the skb. ipv6 header and the remaining fields of the * fragment header are updated in ipv6_gso_segment() diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 0d8448f..675d12c 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -212,6 +212,30 @@ void nf_log_packet(struct net *net, } EXPORT_SYMBOL(nf_log_packet); +void nf_log_trace(struct net *net, + u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, const char *fmt, ...) +{ + va_list args; + char prefix[NF_LOG_PREFIXLEN]; + const struct nf_logger *logger; + + rcu_read_lock(); + logger = rcu_dereference(net->nf.nf_loggers[pf]); + if (logger) { + va_start(args, fmt); + vsnprintf(prefix, sizeof(prefix), fmt, args); + va_end(args); + logger->logfn(net, pf, hooknum, skb, in, out, loginfo, prefix); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL(nf_log_trace); + #define S_SIZE (1024 - (sizeof(unsigned int) + 1)) struct nf_log_buf { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6ab7779..ac1a952 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1225,7 +1225,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (nla[NFTA_CHAIN_POLICY]) { if ((chain != NULL && - !(chain->flags & NFT_BASE_CHAIN)) || + !(chain->flags & NFT_BASE_CHAIN))) + return -EOPNOTSUPP; + + if (chain == NULL && nla[NFTA_CHAIN_HOOK] == NULL) return -EOPNOTSUPP; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 3b90eb2..2d298dc 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -94,10 +94,10 @@ static void nft_trace_packet(const struct nft_pktinfo *pkt, { struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, - pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", - chain->table->name, chain->name, comments[type], - rulenum); + nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, + pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", + chain->table->name, chain->name, comments[type], + rulenum); } unsigned int diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index a5599fc..54330fb 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -77,6 +77,9 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) return -EINVAL; + /* Not all fields are initialized so first zero the tuple */ + memset(tuple, 0, sizeof(struct nf_conntrack_tuple)); + tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM])); tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 213584c..65f3e2b 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -133,6 +133,9 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; break; case AF_INET6: + if (proto) + entry->e6.ipv6.flags |= IP6T_F_PROTO; + entry->e6.ipv6.proto = proto; entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; break; @@ -344,6 +347,9 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; break; case AF_INET6: + if (proto) + entry->e6.ipv6.flags |= IP6T_F_PROTO; + entry->e6.ipv6.proto = proto; entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; break; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index c82df0a..37c15e6 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -153,6 +153,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, iter->err = err; goto out; } + + continue; } if (iter->count < iter->skip) diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index ef8a926..50e1e5a 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -513,8 +513,8 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par) { const struct ip6t_ip6 *i = par->entryinfo; - if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) - && !(i->flags & IP6T_INV_PROTO)) + if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && + !(i->invflags & IP6T_INV_PROTO)) return 0; pr_info("Can be used only in combination with " diff --git a/net/socket.c b/net/socket.c index bbedbfc..245330c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1702,6 +1702,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, if (len > INT_MAX) len = INT_MAX; + if (unlikely(!access_ok(VERIFY_READ, buff, len))) + return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1760,6 +1762,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, if (size > INT_MAX) size = INT_MAX; + if (unlikely(!access_ok(VERIFY_WRITE, ubuf, size))) + return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 1684bcc..5fde343 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -152,7 +152,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, goto out; /* No partial writes. */ - length = EINVAL; + length = -EINVAL; if (*ppos != 0) goto out; diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 4ca3d5d..a8a1e14 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1989,7 +1989,7 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, /* Sunrise Point */ { PCI_DEVICE(0x8086, 0xa170), - .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, /* Sunrise Point-LP */ { PCI_DEVICE(0x8086, 0x9d70), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 526398a..7438213 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -396,7 +396,7 @@ static void alc_auto_setup_eapd(struct hda_codec *codec, bool on) { /* We currently only handle front, HP */ static hda_nid_t pins[] = { - 0x0f, 0x10, 0x14, 0x15, 0 + 0x0f, 0x10, 0x14, 0x15, 0x17, 0 }; hda_nid_t *p; for (p = pins; *p; p++) @@ -5036,6 +5036,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x501a, "Thinkpad", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x501e, "Thinkpad L440", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x5026, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x17aa, 0x5036, "Thinkpad T450s", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 4e51122..0db5713 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -22,6 +22,14 @@ TARGETS += vm TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug +# Clear LDFLAGS and MAKEFLAGS if called from main +# Makefile to avoid test build failures when test +# Makefile doesn't have explicit build rules. +ifeq (1,$(MAKELEVEL)) +undefine LDFLAGS +override MAKEFLAGS = +endif + all: for TARGET in $(TARGETS); do \ make -C $$TARGET; \ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a2214d9..cc6a25d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -471,7 +471,7 @@ static struct kvm *kvm_create_vm(unsigned long type) BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); r = -ENOMEM; - kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + kvm->memslots = kvm_kvzalloc(sizeof(struct kvm_memslots)); if (!kvm->memslots) goto out_err_no_srcu; @@ -522,7 +522,7 @@ out_err_no_srcu: out_err_no_disable: for (i = 0; i < KVM_NR_BUSES; i++) kfree(kvm->buses[i]); - kfree(kvm->memslots); + kvfree(kvm->memslots); kvm_arch_free_vm(kvm); return ERR_PTR(r); } @@ -578,7 +578,7 @@ static void kvm_free_physmem(struct kvm *kvm) kvm_for_each_memslot(memslot, slots) kvm_free_physmem_slot(kvm, memslot, NULL); - kfree(kvm->memslots); + kvfree(kvm->memslots); } static void kvm_destroy_devices(struct kvm *kvm) @@ -871,10 +871,10 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } - slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), - GFP_KERNEL); + slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); if (!slots) goto out_free; + memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { slot = id_to_memslot(slots, mem->slot); @@ -917,7 +917,7 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_arch_commit_memory_region(kvm, mem, &old, change); kvm_free_physmem_slot(kvm, &old, &new); - kfree(old_memslots); + kvfree(old_memslots); /* * IOMMU mapping: New slots need to be mapped. Old slots need to be @@ -936,7 +936,7 @@ int __kvm_set_memory_region(struct kvm *kvm, return 0; out_slots: - kfree(slots); + kvfree(slots); out_free: kvm_free_physmem_slot(kvm, &new, &old); out: |