diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-04-06 17:56:20 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-04-06 17:56:20 -0700 |
commit | 4157368edbc3d69b05e9294a73c84fc9c96bdec4 (patch) | |
tree | 60072b082dad509d270a9c53427cfedb1c997b3b /arch/tile | |
parent | 9479f0f8018a0317b0b5e0c2b338bec6e26fdf2d (diff) | |
parent | 00a62d4bc9b9a0388abee5c5ea946b9631b149d5 (diff) | |
download | op-kernel-dev-4157368edbc3d69b05e9294a73c84fc9c96bdec4.zip op-kernel-dev-4157368edbc3d69b05e9294a73c84fc9c96bdec4.tar.gz |
Merge branch 'stable' of git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile
Pull arch/tile bug fixes from Chris Metcalf:
"This includes Paul Gortmaker's change to fix the <asm/system.h>
disintegration issues on tile, a fix to unbreak the tilepro ethernet
driver, and a backlog of bugfix-only changes from internal Tilera
development over the last few months.
They have all been to LKML and on linux-next for the last few days.
The EDAC change to MAINTAINERS is an oddity but discussion on the
linux-edac list suggested I ask you to pull that change through my
tree since they don't have a tree to pull edac changes from at the
moment."
* 'stable' of git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile: (39 commits)
drivers/net/ethernet/tile: fix netdev_alloc_skb() bombing
MAINTAINERS: update EDAC information
tilepro ethernet driver: fix a few minor issues
tile-srom.c driver: minor code cleanup
edac: say "TILEGx" not "TILEPro" for the tilegx edac driver
arch/tile: avoid accidentally unmasking NMI-type interrupt accidentally
arch/tile: remove bogus performance optimization
arch/tile: return SIGBUS for addresses that are unaligned AND invalid
arch/tile: fix finv_buffer_remote() for tilegx
arch/tile: use atomic exchange in arch_write_unlock()
arch/tile: stop mentioning the "kvm" subdirectory
arch/tile: export the page_home() function.
arch/tile: fix pointer cast in cacheflush.c
arch/tile: fix single-stepping over swint1 instructions on tilegx
arch/tile: implement panic_smp_self_stop()
arch/tile: add "nop" after "nap" to help GX idle power draw
arch/tile: use proper memparse() for "maxmem" options
arch/tile: fix up locking in pgtable.c slightly
arch/tile: don't leak kernel memory when we unload modules
arch/tile: fix bug in delay_backoff()
...
Diffstat (limited to 'arch/tile')
29 files changed, 429 insertions, 257 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 11270ca..96033e2 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -12,7 +12,7 @@ config TILE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW select SYS_HYPERVISOR - select ARCH_HAVE_NMI_SAFE_CMPXCHG if !M386 + select ARCH_HAVE_NMI_SAFE_CMPXCHG # FIXME: investigate whether we need/want these options. # select HAVE_IOREMAP_PROT @@ -69,6 +69,9 @@ config ARCH_PHYS_ADDR_T_64BIT config ARCH_DMA_ADDR_T_64BIT def_bool y +config NEED_DMA_MAP_STATE + def_bool y + config LOCKDEP_SUPPORT def_bool y @@ -118,7 +121,7 @@ config 64BIT config ARCH_DEFCONFIG string - default "arch/tile/configs/tile_defconfig" if !TILEGX + default "arch/tile/configs/tilepro_defconfig" if !TILEGX default "arch/tile/configs/tilegx_defconfig" if TILEGX source "init/Kconfig" @@ -240,6 +243,7 @@ endchoice config PAGE_OFFSET hex + depends on !64BIT default 0xF0000000 if VMSPLIT_3_75G default 0xE0000000 if VMSPLIT_3_5G default 0xB0000000 if VMSPLIT_2_75G diff --git a/arch/tile/Makefile b/arch/tile/Makefile index 17acce7..9520bc5 100644 --- a/arch/tile/Makefile +++ b/arch/tile/Makefile @@ -30,7 +30,8 @@ ifneq ($(CONFIG_DEBUG_EXTRA_FLAGS),"") KBUILD_CFLAGS += $(CONFIG_DEBUG_EXTRA_FLAGS) endif -LIBGCC_PATH := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) +LIBGCC_PATH := \ + $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name) # Provide the path to use for "make defconfig". KBUILD_DEFCONFIG := $(ARCH)_defconfig @@ -53,8 +54,6 @@ libs-y += $(LIBGCC_PATH) # See arch/tile/Kbuild for content of core part of the kernel core-y += arch/tile/ -core-$(CONFIG_KVM) += arch/tile/kvm/ - ifdef TILERA_ROOT INSTALL_PATH ?= $(TILERA_ROOT)/tile/boot endif diff --git a/arch/tile/include/arch/spr_def.h b/arch/tile/include/arch/spr_def.h index f548efe..d6ba449 100644 --- a/arch/tile/include/arch/spr_def.h +++ b/arch/tile/include/arch/spr_def.h @@ -60,8 +60,8 @@ _concat4(SPR_IPI_EVENT_, CONFIG_KERNEL_PL,,) #define SPR_IPI_EVENT_RESET_K \ _concat4(SPR_IPI_EVENT_RESET_, CONFIG_KERNEL_PL,,) -#define SPR_IPI_MASK_SET_K \ - _concat4(SPR_IPI_MASK_SET_, CONFIG_KERNEL_PL,,) +#define SPR_IPI_EVENT_SET_K \ + _concat4(SPR_IPI_EVENT_SET_, CONFIG_KERNEL_PL,,) #define INT_IPI_K \ _concat4(INT_IPI_, CONFIG_KERNEL_PL,,) diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index bb696da..f246142 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -17,6 +17,8 @@ #ifndef _ASM_TILE_ATOMIC_H #define _ASM_TILE_ATOMIC_H +#include <asm/cmpxchg.h> + #ifndef __ASSEMBLY__ #include <linux/compiler.h> @@ -121,54 +123,6 @@ static inline int atomic_read(const atomic_t *v) */ #define atomic_add_negative(i, v) (atomic_add_return((i), (v)) < 0) -/* Nonexistent functions intended to cause link errors. */ -extern unsigned long __xchg_called_with_bad_pointer(void); -extern unsigned long __cmpxchg_called_with_bad_pointer(void); - -#define xchg(ptr, x) \ - ({ \ - typeof(*(ptr)) __x; \ - switch (sizeof(*(ptr))) { \ - case 4: \ - __x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \ - (atomic_t *)(ptr), \ - (u32)(typeof((x)-(x)))(x)); \ - break; \ - case 8: \ - __x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \ - (atomic64_t *)(ptr), \ - (u64)(typeof((x)-(x)))(x)); \ - break; \ - default: \ - __xchg_called_with_bad_pointer(); \ - } \ - __x; \ - }) - -#define cmpxchg(ptr, o, n) \ - ({ \ - typeof(*(ptr)) __x; \ - switch (sizeof(*(ptr))) { \ - case 4: \ - __x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \ - (atomic_t *)(ptr), \ - (u32)(typeof((o)-(o)))(o), \ - (u32)(typeof((n)-(n)))(n)); \ - break; \ - case 8: \ - __x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \ - (atomic64_t *)(ptr), \ - (u64)(typeof((o)-(o)))(o), \ - (u64)(typeof((n)-(n)))(n)); \ - break; \ - default: \ - __cmpxchg_called_with_bad_pointer(); \ - } \ - __x; \ - }) - -#define tas(ptr) (xchg((ptr), 1)) - #endif /* __ASSEMBLY__ */ #ifndef __tilegx__ diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index 466dc4a..54d1da8 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -200,7 +200,7 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v) * @u: ...unless v is equal to u. * * Atomically adds @a to @v, so long as @v was not already @u. - * Returns the old value of @v. + * Returns non-zero if @v was not @u, and zero otherwise. */ static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u) { diff --git a/arch/tile/include/asm/bitops_64.h b/arch/tile/include/asm/bitops_64.h index 58d021a..60b87ee 100644 --- a/arch/tile/include/asm/bitops_64.h +++ b/arch/tile/include/asm/bitops_64.h @@ -38,10 +38,10 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) static inline void change_bit(unsigned nr, volatile unsigned long *addr) { - unsigned long old, mask = (1UL << (nr % BITS_PER_LONG)); - long guess, oldval; + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + unsigned long guess, oldval; addr += nr / BITS_PER_LONG; - old = *addr; + oldval = *addr; do { guess = oldval; oldval = atomic64_cmpxchg((atomic64_t *)addr, @@ -85,7 +85,7 @@ static inline int test_and_change_bit(unsigned nr, volatile unsigned long *addr) { unsigned long mask = (1UL << (nr % BITS_PER_LONG)); - long guess, oldval = *addr; + unsigned long guess, oldval; addr += nr / BITS_PER_LONG; oldval = *addr; do { diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h new file mode 100644 index 0000000..276f067 --- /dev/null +++ b/arch/tile/include/asm/cmpxchg.h @@ -0,0 +1,73 @@ +/* + * cmpxchg.h -- forked from asm/atomic.h with this copyright: + * + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + */ + +#ifndef _ASM_TILE_CMPXCHG_H +#define _ASM_TILE_CMPXCHG_H + +#ifndef __ASSEMBLY__ + +/* Nonexistent functions intended to cause link errors. */ +extern unsigned long __xchg_called_with_bad_pointer(void); +extern unsigned long __cmpxchg_called_with_bad_pointer(void); + +#define xchg(ptr, x) \ + ({ \ + typeof(*(ptr)) __x; \ + switch (sizeof(*(ptr))) { \ + case 4: \ + __x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \ + (atomic_t *)(ptr), \ + (u32)(typeof((x)-(x)))(x)); \ + break; \ + case 8: \ + __x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \ + (atomic64_t *)(ptr), \ + (u64)(typeof((x)-(x)))(x)); \ + break; \ + default: \ + __xchg_called_with_bad_pointer(); \ + } \ + __x; \ + }) + +#define cmpxchg(ptr, o, n) \ + ({ \ + typeof(*(ptr)) __x; \ + switch (sizeof(*(ptr))) { \ + case 4: \ + __x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \ + (atomic_t *)(ptr), \ + (u32)(typeof((o)-(o)))(o), \ + (u32)(typeof((n)-(n)))(n)); \ + break; \ + case 8: \ + __x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \ + (atomic64_t *)(ptr), \ + (u64)(typeof((o)-(o)))(o), \ + (u64)(typeof((n)-(n)))(n)); \ + break; \ + default: \ + __cmpxchg_called_with_bad_pointer(); \ + } \ + __x; \ + }) + +#define tas(ptr) (xchg((ptr), 1)) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_TILE_CMPXCHG_H */ diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h index f80f8ce..33cff9a 100644 --- a/arch/tile/include/asm/irq.h +++ b/arch/tile/include/asm/irq.h @@ -21,7 +21,7 @@ #define NR_IRQS 32 /* IRQ numbers used for linux IPIs. */ -#define IRQ_RESCHEDULE 1 +#define IRQ_RESCHEDULE 0 #define irq_canonicalize(irq) (irq) diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h index 72be590..5f8b6a0 100644 --- a/arch/tile/include/asm/spinlock_64.h +++ b/arch/tile/include/asm/spinlock_64.h @@ -137,7 +137,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) static inline void arch_write_unlock(arch_rwlock_t *rw) { __insn_mf(); - rw->lock = 0; + __insn_exch4(&rw->lock, 0); /* Avoid waiting in the write buffer. */ } static inline int arch_read_trylock(arch_rwlock_t *rw) diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h index 4d97a2d..0e9d382a 100644 --- a/arch/tile/include/asm/stack.h +++ b/arch/tile/include/asm/stack.h @@ -25,7 +25,6 @@ struct KBacktraceIterator { BacktraceIterator it; struct task_struct *task; /* task we are backtracing */ - pte_t *pgtable; /* page table for user space access */ int end; /* iteration complete. */ int new_context; /* new context is starting */ int profile; /* profiling, so stop on async intrpt */ diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h index 5f20f92..e28c3df4 100644 --- a/arch/tile/include/asm/traps.h +++ b/arch/tile/include/asm/traps.h @@ -64,7 +64,11 @@ void do_breakpoint(struct pt_regs *, int fault_num); #ifdef __tilegx__ +/* kernel/single_step.c */ void gx_singlestep_handle(struct pt_regs *, int fault_num); + +/* kernel/intvec_64.S */ +void fill_ra_stack(void); #endif -#endif /* _ASM_TILE_SYSCALLS_H */ +#endif /* _ASM_TILE_TRAPS_H */ diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index 431e9ae6..ec91568 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -85,6 +85,7 @@ STD_ENTRY(cpu_idle_on_new_stack) /* Loop forever on a nap during SMP boot. */ STD_ENTRY(smp_nap) nap + nop /* avoid provoking the icache prefetch with a jump */ j smp_nap /* we are not architecturally guaranteed not to exit nap */ jrp lr /* clue in the backtracer */ STD_ENDPROC(smp_nap) @@ -105,5 +106,6 @@ STD_ENTRY(_cpu_idle) .global _cpu_idle_nap _cpu_idle_nap: nap + nop /* avoid provoking the icache prefetch with a jump */ jrp lr STD_ENDPROC(_cpu_idle) diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index aecc8ed..5d56a1e 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -799,6 +799,10 @@ handle_interrupt: * This routine takes a boolean in r30 indicating if this is an NMI. * If so, we also expect a boolean in r31 indicating whether to * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. */ STD_ENTRY(interrupt_return) /* If we're resuming to kernel space, don't check thread flags. */ @@ -1237,7 +1241,10 @@ handle_syscall: bzt r30, 1f jal do_syscall_trace FEEDBACK_REENTER(handle_syscall) -1: j .Lresume_userspace /* jump into middle of interrupt_return */ +1: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Linvalid_syscall: /* Report an invalid syscall back to the user program */ @@ -1246,7 +1253,10 @@ handle_syscall: movei r28, -ENOSYS } sw r29, r28 - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(handle_syscall) /* Return the address for oprofile to suppress in backtraces. */ @@ -1262,7 +1272,10 @@ STD_ENTRY(ret_from_fork) jal sim_notify_fork jal schedule_tail FEEDBACK_REENTER(ret_from_fork) - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(ret_from_fork) /* @@ -1376,7 +1389,10 @@ handle_ill: jal send_sigtrap /* issue a SIGTRAP */ FEEDBACK_REENTER(handle_ill) - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Ldispatch_normal_ill: { diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 79c93e1..49d9d66 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -22,6 +22,7 @@ #include <asm/irqflags.h> #include <asm/asm-offsets.h> #include <asm/types.h> +#include <asm/signal.h> #include <hv/hypervisor.h> #include <arch/abi.h> #include <arch/interrupts.h> @@ -605,6 +606,10 @@ handle_interrupt: * This routine takes a boolean in r30 indicating if this is an NMI. * If so, we also expect a boolean in r31 indicating whether to * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. */ STD_ENTRY(interrupt_return) /* If we're resuming to kernel space, don't check thread flags. */ @@ -1039,11 +1044,28 @@ handle_syscall: /* Do syscall trace again, if requested. */ ld r30, r31 - andi r30, r30, _TIF_SYSCALL_TRACE - beqzt r30, 1f + andi r0, r30, _TIF_SYSCALL_TRACE + { + andi r0, r30, _TIF_SINGLESTEP + beqzt r0, 1f + } jal do_syscall_trace FEEDBACK_REENTER(handle_syscall) -1: j .Lresume_userspace /* jump into middle of interrupt_return */ + andi r0, r30, _TIF_SINGLESTEP + +1: beqzt r0, 2f + + /* Single stepping -- notify ptrace. */ + { + movei r0, SIGTRAP + jal ptrace_notify + } + FEEDBACK_REENTER(handle_syscall) + +2: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Lcompat_syscall: /* @@ -1077,7 +1099,10 @@ handle_syscall: movei r28, -ENOSYS } st r29, r28 - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(handle_syscall) /* Return the address for oprofile to suppress in backtraces. */ @@ -1093,7 +1118,10 @@ STD_ENTRY(ret_from_fork) jal sim_notify_fork jal schedule_tail FEEDBACK_REENTER(ret_from_fork) - j .Lresume_userspace + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(ret_from_fork) /* Various stub interrupt handlers and syscall handlers */ @@ -1156,6 +1184,18 @@ int_unalign: push_extra_callee_saves r0 j do_trap +/* Fill the return address stack with nonzero entries. */ +STD_ENTRY(fill_ra_stack) + { + move r0, lr + jal 1f + } +1: jal 2f +2: jal 3f +3: jal 4f +4: jrp r0 + STD_ENDPROC(fill_ra_stack) + /* Include .intrpt1 array of interrupt vectors */ .section ".intrpt1", "ax" @@ -1166,7 +1206,7 @@ int_unalign: #define do_hardwall_trap bad_intr #endif - int_hand INT_MEM_ERROR, MEM_ERROR, bad_intr + int_hand INT_MEM_ERROR, MEM_ERROR, do_trap int_hand INT_SINGLE_STEP_3, SINGLE_STEP_3, bad_intr #if CONFIG_KERNEL_PL == 2 int_hand INT_SINGLE_STEP_2, SINGLE_STEP_2, gx_singlestep_handle diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c index b90ab99..98d4769 100644 --- a/arch/tile/kernel/module.c +++ b/arch/tile/kernel/module.c @@ -67,6 +67,8 @@ void *module_alloc(unsigned long size) area = __get_vm_area(size, VM_ALLOC, MEM_MODULE_START, MEM_MODULE_END); if (!area) goto error; + area->nr_pages = npages; + area->pages = pages; if (map_vm_area(area, prot_rwx, &pages)) { vunmap(area->addr); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 30caeca..2d5ef61 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -28,6 +28,7 @@ #include <linux/tracehook.h> #include <linux/signal.h> #include <asm/stack.h> +#include <asm/switch_to.h> #include <asm/homecache.h> #include <asm/syscalls.h> #include <asm/traps.h> @@ -285,7 +286,7 @@ struct task_struct *validate_current(void) static struct task_struct corrupt = { .comm = "<corrupt>" }; struct task_struct *tsk = current; if (unlikely((unsigned long)tsk < PAGE_OFFSET || - (void *)tsk > high_memory || + (high_memory && (void *)tsk > high_memory) || ((unsigned long)tsk & (__alignof__(*tsk) - 1)) != 0)) { pr_err("Corrupt 'current' %p (sp %#lx)\n", tsk, stack_pointer); tsk = &corrupt; diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 92a94f4..bff23f4 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -103,13 +103,11 @@ unsigned long __initdata pci_reserve_end_pfn = -1U; static int __init setup_maxmem(char *str) { - long maxmem_mb; - if (str == NULL || strict_strtol(str, 0, &maxmem_mb) != 0 || - maxmem_mb == 0) + unsigned long long maxmem; + if (str == NULL || (maxmem = memparse(str, NULL)) == 0) return -EINVAL; - maxmem_pfn = (maxmem_mb >> (HPAGE_SHIFT - 20)) << - (HPAGE_SHIFT - PAGE_SHIFT); + maxmem_pfn = (maxmem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT); pr_info("Forcing RAM used to no more than %dMB\n", maxmem_pfn >> (20 - PAGE_SHIFT)); return 0; @@ -119,14 +117,15 @@ early_param("maxmem", setup_maxmem); static int __init setup_maxnodemem(char *str) { char *endp; - long maxnodemem_mb, node; + unsigned long long maxnodemem; + long node; node = str ? simple_strtoul(str, &endp, 0) : INT_MAX; - if (node >= MAX_NUMNODES || *endp != ':' || - strict_strtol(endp+1, 0, &maxnodemem_mb) != 0) + if (node >= MAX_NUMNODES || *endp != ':') return -EINVAL; - maxnodemem_pfn[node] = (maxnodemem_mb >> (HPAGE_SHIFT - 20)) << + maxnodemem = memparse(endp+1, NULL); + maxnodemem_pfn[node] = (maxnodemem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT); pr_info("Forcing RAM used on node %ld to no more than %dMB\n", node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT)); @@ -913,6 +912,13 @@ void __cpuinit setup_cpu(int boot) #ifdef CONFIG_BLK_DEV_INITRD +/* + * Note that the kernel can potentially support other compression + * techniques than gz, though we don't do so by default. If we ever + * decide to do so we can either look for other filename extensions, + * or just allow a file with this name to be compressed with an + * arbitrary compressor (somewhat counterintuitively). + */ static int __initdata set_initramfs_file; static char __initdata initramfs_file[128] = "initramfs.cpio.gz"; @@ -928,9 +934,9 @@ static int __init setup_initramfs_file(char *str) early_param("initramfs_file", setup_initramfs_file); /* - * We look for an additional "initramfs.cpio.gz" file in the hvfs. + * We look for an "initramfs.cpio.gz" file in the hvfs. * If there is one, we allocate some memory for it and it will be - * unpacked to the initramfs after any built-in initramfs_data. + * unpacked to the initramfs. */ static void __init load_hv_initrd(void) { diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index bc1eb58..9efbc13 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -153,6 +153,25 @@ static tile_bundle_bits rewrite_load_store_unaligned( if (((unsigned long)addr % size) == 0) return bundle; + /* + * Return SIGBUS with the unaligned address, if requested. + * Note that we return SIGBUS even for completely invalid addresses + * as long as they are in fact unaligned; this matches what the + * tilepro hardware would be doing, if it could provide us with the + * actual bad address in an SPR, which it doesn't. + */ + if (unaligned_fixup == 0) { + siginfo_t info = { + .si_signo = SIGBUS, + .si_code = BUS_ADRALN, + .si_addr = addr + }; + trace_unhandled_signal("unaligned trap", regs, + (unsigned long)addr, SIGBUS); + force_sig_info(info.si_signo, &info, current); + return (tilepro_bundle_bits) 0; + } + #ifndef __LITTLE_ENDIAN # error We assume little-endian representation with copy_xx_user size 2 here #endif @@ -192,18 +211,6 @@ static tile_bundle_bits rewrite_load_store_unaligned( return (tile_bundle_bits) 0; } - if (unaligned_fixup == 0) { - siginfo_t info = { - .si_signo = SIGBUS, - .si_code = BUS_ADRALN, - .si_addr = addr - }; - trace_unhandled_signal("unaligned trap", regs, - (unsigned long)addr, SIGBUS); - force_sig_info(info.si_signo, &info, current); - return (tile_bundle_bits) 0; - } - if (unaligned_printk || unaligned_fixup_count == 0) { pr_info("Process %d/%s: PC %#lx: Fixup of" " unaligned %s at %#lx.\n", diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index a44e103..91da0f7 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -103,7 +103,7 @@ static void smp_stop_cpu_interrupt(void) set_cpu_online(smp_processor_id(), 0); arch_local_irq_disable_all(); for (;;) - asm("nap"); + asm("nap; nop"); } /* This function calls the 'stop' function on all other CPUs in the system. */ @@ -113,6 +113,12 @@ void smp_send_stop(void) send_IPI_allbutself(MSG_TAG_STOP_CPU); } +/* On panic, just wait; we may get an smp_send_stop() later on. */ +void panic_smp_self_stop(void) +{ + while (1) + asm("nap; nop"); +} /* * Dispatch code called from hv_message_intr() for HV_MSG_TILE hv messages. diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 37ee4d0..b2f44c2 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -21,10 +21,12 @@ #include <linux/stacktrace.h> #include <linux/uaccess.h> #include <linux/mmzone.h> +#include <linux/dcache.h> +#include <linux/fs.h> #include <asm/backtrace.h> #include <asm/page.h> -#include <asm/tlbflush.h> #include <asm/ucontext.h> +#include <asm/switch_to.h> #include <asm/sigframe.h> #include <asm/stack.h> #include <arch/abi.h> @@ -44,72 +46,23 @@ static int in_kernel_stack(struct KBacktraceIterator *kbt, unsigned long sp) return sp >= kstack_base && sp < kstack_base + THREAD_SIZE; } -/* Is address valid for reading? */ -static int valid_address(struct KBacktraceIterator *kbt, unsigned long address) -{ - HV_PTE *l1_pgtable = kbt->pgtable; - HV_PTE *l2_pgtable; - unsigned long pfn; - HV_PTE pte; - struct page *page; - - if (l1_pgtable == NULL) - return 0; /* can't read user space in other tasks */ - -#ifdef CONFIG_64BIT - /* Find the real l1_pgtable by looking in the l0_pgtable. */ - pte = l1_pgtable[HV_L0_INDEX(address)]; - if (!hv_pte_get_present(pte)) - return 0; - pfn = hv_pte_get_pfn(pte); - if (pte_huge(pte)) { - if (!pfn_valid(pfn)) { - pr_err("L0 huge page has bad pfn %#lx\n", pfn); - return 0; - } - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); - } - page = pfn_to_page(pfn); - BUG_ON(PageHighMem(page)); /* No HIGHMEM on 64-bit. */ - l1_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); -#endif - pte = l1_pgtable[HV_L1_INDEX(address)]; - if (!hv_pte_get_present(pte)) - return 0; - pfn = hv_pte_get_pfn(pte); - if (pte_huge(pte)) { - if (!pfn_valid(pfn)) { - pr_err("huge page has bad pfn %#lx\n", pfn); - return 0; - } - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); - } - - page = pfn_to_page(pfn); - if (PageHighMem(page)) { - pr_err("L2 page table not in LOWMEM (%#llx)\n", - HV_PFN_TO_CPA(pfn)); - return 0; - } - l2_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); - pte = l2_pgtable[HV_L2_INDEX(address)]; - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); -} - /* Callback for backtracer; basically a glorified memcpy */ static bool read_memory_func(void *result, unsigned long address, unsigned int size, void *vkbt) { int retval; struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt; + + if (address == 0) + return 0; if (__kernel_text_address(address)) { /* OK to read kernel code. */ } else if (address >= PAGE_OFFSET) { /* We only tolerate kernel-space reads of this task's stack */ if (!in_kernel_stack(kbt, address)) return 0; - } else if (!valid_address(kbt, address)) { - return 0; /* invalid user-space address */ + } else if (!kbt->is_current) { + return 0; /* can't read from other user address spaces */ } pagefault_disable(); retval = __copy_from_user_inatomic(result, @@ -127,6 +80,8 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) unsigned long sp = kbt->it.sp; struct pt_regs *p; + if (sp % sizeof(long) != 0) + return NULL; if (!in_kernel_stack(kbt, sp)) return NULL; if (!in_kernel_stack(kbt, sp + C_ABI_SAVE_AREA_SIZE + PTREGS_SIZE-1)) @@ -169,27 +124,27 @@ static int is_sigreturn(unsigned long pc) } /* Return a pt_regs pointer for a valid signal handler frame */ -static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt) +static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt, + struct rt_sigframe* kframe) { BacktraceIterator *b = &kbt->it; - if (b->pc == VDSO_BASE) { - struct rt_sigframe *frame; - unsigned long sigframe_top = - b->sp + sizeof(struct rt_sigframe) - 1; - if (!valid_address(kbt, b->sp) || - !valid_address(kbt, sigframe_top)) { - if (kbt->verbose) - pr_err(" (odd signal: sp %#lx?)\n", - (unsigned long)(b->sp)); + if (b->pc == VDSO_BASE && b->sp < PAGE_OFFSET && + b->sp % sizeof(long) == 0) { + int retval; + pagefault_disable(); + retval = __copy_from_user_inatomic( + kframe, (void __user __force *)b->sp, + sizeof(*kframe)); + pagefault_enable(); + if (retval != 0 || + (unsigned int)(kframe->info.si_signo) >= _NSIG) return NULL; - } - frame = (struct rt_sigframe *)b->sp; if (kbt->verbose) { pr_err(" <received signal %d>\n", - frame->info.si_signo); + kframe->info.si_signo); } - return (struct pt_regs *)&frame->uc.uc_mcontext; + return (struct pt_regs *)&kframe->uc.uc_mcontext; } return NULL; } @@ -202,10 +157,11 @@ static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt) static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt) { struct pt_regs *p; + struct rt_sigframe kframe; p = valid_fault_handler(kbt); if (p == NULL) - p = valid_sigframe(kbt); + p = valid_sigframe(kbt, &kframe); if (p == NULL) return 0; backtrace_init(&kbt->it, read_memory_func, kbt, @@ -265,41 +221,19 @@ void KBacktraceIterator_init(struct KBacktraceIterator *kbt, /* * Set up callback information. We grab the kernel stack base - * so we will allow reads of that address range, and if we're - * asking about the current process we grab the page table - * so we can check user accesses before trying to read them. - * We flush the TLB to avoid any weird skew issues. + * so we will allow reads of that address range. */ - is_current = (t == NULL); + is_current = (t == NULL || t == current); kbt->is_current = is_current; if (is_current) t = validate_current(); kbt->task = t; - kbt->pgtable = NULL; kbt->verbose = 0; /* override in caller if desired */ kbt->profile = 0; /* override in caller if desired */ kbt->end = KBT_ONGOING; - kbt->new_context = 0; - if (is_current) { - HV_PhysAddr pgdir_pa = hv_inquire_context().page_table; - if (pgdir_pa == (unsigned long)swapper_pg_dir - PAGE_OFFSET) { - /* - * Not just an optimization: this also allows - * this to work at all before va/pa mappings - * are set up. - */ - kbt->pgtable = swapper_pg_dir; - } else { - struct page *page = pfn_to_page(PFN_DOWN(pgdir_pa)); - if (!PageHighMem(page)) - kbt->pgtable = __va(pgdir_pa); - else - pr_err("page table not in LOWMEM" - " (%#llx)\n", pgdir_pa); - } - local_flush_tlb_all(); + kbt->new_context = 1; + if (is_current) validate_stack(regs); - } if (regs == NULL) { if (is_current || t->state == TASK_RUNNING) { @@ -345,6 +279,78 @@ void KBacktraceIterator_next(struct KBacktraceIterator *kbt) } EXPORT_SYMBOL(KBacktraceIterator_next); +static void describe_addr(struct KBacktraceIterator *kbt, + unsigned long address, + int have_mmap_sem, char *buf, size_t bufsize) +{ + struct vm_area_struct *vma; + size_t namelen, remaining; + unsigned long size, offset, adjust; + char *p, *modname; + const char *name; + int rc; + + /* + * Look one byte back for every caller frame (i.e. those that + * aren't a new context) so we look up symbol data for the + * call itself, not the following instruction, which may be on + * a different line (or in a different function). + */ + adjust = !kbt->new_context; + address -= adjust; + + if (address >= PAGE_OFFSET) { + /* Handle kernel symbols. */ + BUG_ON(bufsize < KSYM_NAME_LEN); + name = kallsyms_lookup(address, &size, &offset, + &modname, buf); + if (name == NULL) { + buf[0] = '\0'; + return; + } + namelen = strlen(buf); + remaining = (bufsize - 1) - namelen; + p = buf + namelen; + rc = snprintf(p, remaining, "+%#lx/%#lx ", + offset + adjust, size); + if (modname && rc < remaining) + snprintf(p + rc, remaining - rc, "[%s] ", modname); + buf[bufsize-1] = '\0'; + return; + } + + /* If we don't have the mmap_sem, we can't show any more info. */ + buf[0] = '\0'; + if (!have_mmap_sem) + return; + + /* Find vma info. */ + vma = find_vma(kbt->task->mm, address); + if (vma == NULL || address < vma->vm_start) { + snprintf(buf, bufsize, "[unmapped address] "); + return; + } + + if (vma->vm_file) { + char *s; + p = d_path(&vma->vm_file->f_path, buf, bufsize); + if (IS_ERR(p)) + p = "?"; + s = strrchr(p, '/'); + if (s) + p = s+1; + } else { + p = "anon"; + } + + /* Generate a string description of the vma info. */ + namelen = strlen(p); + remaining = (bufsize - 1) - namelen; + memmove(buf, p, namelen); + snprintf(buf + namelen, remaining, "[%lx+%lx] ", + vma->vm_start, vma->vm_end - vma->vm_start); +} + /* * This method wraps the backtracer's more generic support. * It is only invoked from the architecture-specific code; show_stack() @@ -353,6 +359,7 @@ EXPORT_SYMBOL(KBacktraceIterator_next); void tile_show_stack(struct KBacktraceIterator *kbt, int headers) { int i; + int have_mmap_sem = 0; if (headers) { /* @@ -369,31 +376,16 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) kbt->verbose = 1; i = 0; for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) { - char *modname; - const char *name; - unsigned long address = kbt->it.pc; - unsigned long offset, size; char namebuf[KSYM_NAME_LEN+100]; + unsigned long address = kbt->it.pc; - if (address >= PAGE_OFFSET) - name = kallsyms_lookup(address, &size, &offset, - &modname, namebuf); - else - name = NULL; - - if (!name) - namebuf[0] = '\0'; - else { - size_t namelen = strlen(namebuf); - size_t remaining = (sizeof(namebuf) - 1) - namelen; - char *p = namebuf + namelen; - int rc = snprintf(p, remaining, "+%#lx/%#lx ", - offset, size); - if (modname && rc < remaining) - snprintf(p + rc, remaining - rc, - "[%s] ", modname); - namebuf[sizeof(namebuf)-1] = '\0'; - } + /* Try to acquire the mmap_sem as we pass into userspace. */ + if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm) + have_mmap_sem = + down_read_trylock(&kbt->task->mm->mmap_sem); + + describe_addr(kbt, address, have_mmap_sem, + namebuf, sizeof(namebuf)); pr_err(" frame %d: 0x%lx %s(sp 0x%lx)\n", i++, address, namebuf, (unsigned long)(kbt->it.sp)); @@ -408,6 +400,8 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) pr_err("Stack dump stopped; next frame identical to this one\n"); if (headers) pr_err("Stack dump complete\n"); + if (have_mmap_sem) + up_read(&kbt->task->mm->mmap_sem); } EXPORT_SYMBOL(tile_show_stack); diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 2bb6602..73cff81 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -200,7 +200,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, { siginfo_t info = { 0 }; int signo, code; - unsigned long address; + unsigned long address = 0; bundle_bits instr; /* Re-enable interrupts. */ @@ -223,6 +223,10 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, } switch (fault_num) { + case INT_MEM_ERROR: + signo = SIGBUS; + code = BUS_OBJERR; + break; case INT_ILL: if (copy_from_user(&instr, (void __user *)regs->pc, sizeof(instr))) { @@ -289,7 +293,10 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, address = regs->pc; break; #ifdef __tilegx__ - case INT_ILL_TRANS: + case INT_ILL_TRANS: { + /* Avoid a hardware erratum with the return address stack. */ + fill_ra_stack(); + signo = SIGSEGV; code = SEGV_MAPERR; if (reason & SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK) @@ -297,6 +304,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, else address = 0; /* FIXME: GX: single-step for address */ break; + } #endif default: panic("Unexpected do_trap interrupt number %d", fault_num); @@ -308,7 +316,8 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, info.si_addr = (void __user *)address; if (signo == SIGILL) info.si_trapno = fault_num; - trace_unhandled_signal("trap", regs, address, signo); + if (signo != SIGTRAP) + trace_unhandled_signal("trap", regs, address, signo); force_sig_info(signo, &info, current); } diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 0c26086..985f598 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile @@ -7,6 +7,7 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \ strchr_$(BITS).o strlen_$(BITS).o ifeq ($(CONFIG_TILEGX),y) +CFLAGS_REMOVE_memcpy_user_64.o = -fno-omit-frame-pointer lib-y += memcpy_user_64.o else lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index 8928aac..db4fb89 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c @@ -39,7 +39,21 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) { char *p, *base; size_t step_size, load_count; + + /* + * On TILEPro the striping granularity is a fixed 8KB; on + * TILE-Gx it is configurable, and we rely on the fact that + * the hypervisor always configures maximum striping, so that + * bits 9 and 10 of the PA are part of the stripe function, so + * every 512 bytes we hit a striping boundary. + * + */ +#ifdef __tilegx__ + const unsigned long STRIPE_WIDTH = 512; +#else const unsigned long STRIPE_WIDTH = 8192; +#endif + #ifdef __tilegx__ /* * On TILE-Gx, we must disable the dstream prefetcher before doing @@ -74,7 +88,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) * memory, that one load would be sufficient, but since we may * be, we also need to back up to the last load issued to * another memory controller, which would be the point where - * we crossed an 8KB boundary (the granularity of striping + * we crossed a "striping" boundary (the granularity of striping * across memory controllers). Keep backing up and doing this * until we are before the beginning of the buffer, or have * hit all the controllers. @@ -88,12 +102,22 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) * every cache line on a full memory stripe on each * controller" that we simply do that, to simplify the logic. * - * FIXME: See bug 9535 for some issues with this code. + * On TILE-Gx the hash-for-home function is much more complex, + * with the upshot being we can't readily guarantee we have + * hit both entries in the 128-entry AMT that were hit by any + * load in the entire range, so we just re-load them all. + * With larger buffers, we may want to consider using a hypervisor + * trap to issue loads directly to each hash-for-home tile for + * each controller (doing it from Linux would trash the TLB). */ if (hfh) { step_size = L2_CACHE_BYTES; +#ifdef __tilegx__ + load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES; +#else load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * (1 << CHIP_LOG_NUM_MSHIMS()); +#endif } else { step_size = STRIPE_WIDTH; load_count = (1 << CHIP_LOG_NUM_MSHIMS()); @@ -109,7 +133,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) /* Figure out how far back we need to go. */ base = p - (step_size * (load_count - 2)); - if ((long)base < (long)buffer) + if ((unsigned long)base < (unsigned long)buffer) base = buffer; /* diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c index 4763b3a..37440ca 100644 --- a/arch/tile/lib/memcpy_user_64.c +++ b/arch/tile/lib/memcpy_user_64.c @@ -14,7 +14,13 @@ * Do memcpy(), but trap and return "n" when a load or store faults. * * Note: this idiom only works when memcpy() compiles to a leaf function. - * If "sp" is updated during memcpy, the "jrp lr" will be incorrect. + * Here leaf function not only means it does not have calls, but also + * requires no stack operations (sp, stack frame pointer) and no + * use of callee-saved registers, else "jrp lr" will be incorrect since + * unwinding stack frame is bypassed. Since memcpy() is not complex so + * these conditions are satisfied here, but we need to be careful when + * modifying this file. This is not a clean solution but is the best + * one so far. * * Also note that we are capturing "n" from the containing scope here. */ diff --git a/arch/tile/lib/spinlock_common.h b/arch/tile/lib/spinlock_common.h index c101098..6ac3750 100644 --- a/arch/tile/lib/spinlock_common.h +++ b/arch/tile/lib/spinlock_common.h @@ -60,5 +60,5 @@ static void delay_backoff(int iterations) loops += __insn_crc32_32(stack_pointer, get_cycles_low()) & (loops - 1); - relax(1 << exponent); + relax(loops); } diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index cba30e9..22e58f5 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -130,7 +130,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) } /* - * Handle a fault on the vmalloc or module mapping area + * Handle a fault on the vmalloc area. */ static inline int vmalloc_fault(pgd_t *pgd, unsigned long address) { @@ -203,9 +203,14 @@ static pgd_t *get_current_pgd(void) * interrupt or a critical region, and must do as little as possible. * Similarly, we can't use atomic ops here, since we may be handling a * fault caused by an atomic op access. + * + * If we find a migrating PTE while we're in an NMI context, and we're + * at a PC that has a registered exception handler, we don't wait, + * since this thread may (e.g.) have been interrupted while migrating + * its own stack, which would then cause us to self-deadlock. */ static int handle_migrating_pte(pgd_t *pgd, int fault_num, - unsigned long address, + unsigned long address, unsigned long pc, int is_kernel_mode, int write) { pud_t *pud; @@ -227,6 +232,8 @@ static int handle_migrating_pte(pgd_t *pgd, int fault_num, pte_offset_kernel(pmd, address); pteval = *pte; if (pte_migrating(pteval)) { + if (in_nmi() && search_exception_tables(pc)) + return 0; wait_for_migration(pte); return 1; } @@ -300,7 +307,7 @@ static int handle_page_fault(struct pt_regs *regs, * rather than trying to patch up the existing PTE. */ pgd = get_current_pgd(); - if (handle_migrating_pte(pgd, fault_num, address, + if (handle_migrating_pte(pgd, fault_num, address, regs->pc, is_kernel_mode, write)) return 1; @@ -335,9 +342,12 @@ static int handle_page_fault(struct pt_regs *regs, /* * If we're trying to touch user-space addresses, we must * be either at PL0, or else with interrupts enabled in the - * kernel, so either way we can re-enable interrupts here. + * kernel, so either way we can re-enable interrupts here + * unless we are doing atomic access to user space with + * interrupts disabled. */ - local_irq_enable(); + if (!(regs->flags & PT_FLAGS_DISABLE_IRQ)) + local_irq_enable(); mm = tsk->mm; @@ -665,7 +675,7 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, */ if (fault_num == INT_DTLB_ACCESS) write = 1; - if (handle_migrating_pte(pgd, fault_num, address, 1, write)) + if (handle_migrating_pte(pgd, fault_num, address, pc, 1, write)) return state; /* Return zero so that we continue on with normal fault handling. */ diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index 1cc6ae4..499f737 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -394,6 +394,7 @@ int page_home(struct page *page) return pte_to_home(*virt_to_pte(NULL, kva)); } } +EXPORT_SYMBOL(page_home); void homecache_change_page_home(struct page *page, int order, int home) { diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 830c490..6a9d20d 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -254,11 +254,6 @@ static pgprot_t __init init_pgprot(ulong address) return construct_pgprot(PAGE_KERNEL_RO, PAGE_HOME_IMMUTABLE); } - /* As a performance optimization, keep the boot init stack here. */ - if (address >= (ulong)&init_thread_union && - address < (ulong)&init_thread_union + THREAD_SIZE) - return construct_pgprot(PAGE_KERNEL, smp_processor_id()); - #ifndef __tilegx__ #if !ATOMIC_LOCKS_FOUND_VIA_TABLE() /* Force the atomic_locks[] array page to be hash-for-home. */ @@ -557,6 +552,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) address = MEM_SV_INTRPT; pmd = get_pmd(pgtables, address); + pfn = 0; /* code starts at PA 0 */ if (ktext_small) { /* Allocate an L2 PTE for the kernel text */ int cpu = 0; @@ -579,10 +575,15 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) } BUG_ON(address != (unsigned long)_stext); - pfn = 0; /* code starts at PA 0 */ - pte = alloc_pte(); - for (pte_ofs = 0; address < (unsigned long)_einittext; - pfn++, pte_ofs++, address += PAGE_SIZE) { + pte = NULL; + for (; address < (unsigned long)_einittext; + pfn++, address += PAGE_SIZE) { + pte_ofs = pte_index(address); + if (pte_ofs == 0) { + if (pte) + assign_pte(pmd++, pte); + pte = alloc_pte(); + } if (!ktext_local) { prot = set_remote_cache_cpu(prot, cpu); cpu = cpumask_next(cpu, &ktext_mask); @@ -591,7 +592,8 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) } pte[pte_ofs] = pfn_pte(pfn, prot); } - assign_pte(pmd, pte); + if (pte) + assign_pte(pmd, pte); } else { pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); pteval = pte_mkhuge(pteval); @@ -614,7 +616,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) else pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_NO_L3); - *(pte_t *)pmd = pteval; + for (; address < (unsigned long)_einittext; + pfn += PFN_DOWN(HPAGE_SIZE), address += HPAGE_SIZE) + *(pte_t *)(pmd++) = pfn_pte(pfn, pteval); } /* Set swapper_pgprot here so it is flushed to memory right away. */ diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 8730369..2410aa8 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -177,14 +177,10 @@ void shatter_huge_page(unsigned long addr) if (!pmd_huge_page(*pmd)) return; - /* - * Grab the pgd_lock, since we may need it to walk the pgd_list, - * and since we need some kind of lock here to avoid races. - */ - spin_lock_irqsave(&pgd_lock, flags); + spin_lock_irqsave(&init_mm.page_table_lock, flags); if (!pmd_huge_page(*pmd)) { /* Lost the race to convert the huge page. */ - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock_irqrestore(&init_mm.page_table_lock, flags); return; } @@ -194,6 +190,7 @@ void shatter_huge_page(unsigned long addr) #ifdef __PAGETABLE_PMD_FOLDED /* Walk every pgd on the system and update the pmd there. */ + spin_lock(&pgd_lock); list_for_each(pos, &pgd_list) { pmd_t *copy_pmd; pgd = list_to_pgd(pos) + pgd_index(addr); @@ -201,6 +198,7 @@ void shatter_huge_page(unsigned long addr) copy_pmd = pmd_offset(pud, addr); __set_pmd(copy_pmd, *pmd); } + spin_unlock(&pgd_lock); #endif /* Tell every cpu to notice the change. */ @@ -208,7 +206,7 @@ void shatter_huge_page(unsigned long addr) cpu_possible_mask, NULL, 0); /* Hold the lock until the TLB flush is finished to avoid races. */ - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock_irqrestore(&init_mm.page_table_lock, flags); } /* @@ -217,9 +215,13 @@ void shatter_huge_page(unsigned long addr) * against pageattr.c; it is the unique case in which a valid change * of kernel pagetables can't be lazily synchronized by vmalloc faults. * vmalloc faults work because attached pagetables are never freed. - * The locking scheme was chosen on the basis of manfred's - * recommendations and having no core impact whatsoever. - * -- wli + * + * The lock is always taken with interrupts disabled, unlike on x86 + * and other platforms, because we need to take the lock in + * shatter_huge_page(), which may be called from an interrupt context. + * We are not at risk from the tlbflush IPI deadlock that was seen on + * x86, since we use the flush_remote() API to have the hypervisor do + * the TLB flushes regardless of irq disabling. */ DEFINE_SPINLOCK(pgd_lock); LIST_HEAD(pgd_list); @@ -469,10 +471,18 @@ void __set_pte(pte_t *ptep, pte_t pte) void set_pte(pte_t *ptep, pte_t pte) { - struct page *page = pfn_to_page(pte_pfn(pte)); - - /* Update the home of a PTE if necessary */ - pte = pte_set_home(pte, page_home(page)); + if (pte_present(pte) && + (!CHIP_HAS_MMIO() || hv_pte_get_mode(pte) != HV_PTE_MODE_MMIO)) { + /* The PTE actually references physical memory. */ + unsigned long pfn = pte_pfn(pte); + if (pfn_valid(pfn)) { + /* Update the home of the PTE from the struct page. */ + pte = pte_set_home(pte, page_home(pfn_to_page(pfn))); + } else if (hv_pte_get_mode(pte) == 0) { + /* remap_pfn_range(), etc, must supply PTE mode. */ + panic("set_pte(): out-of-range PFN and mode 0\n"); + } + } __set_pte(ptep, pte); } |