summaryrefslogtreecommitdiffstats
path: root/arch/arm/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-03 09:46:29 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-03 09:46:29 -0700
commitfb2af0020a51709ad87ea8055c325d3fbde04158 (patch)
tree88f3bc38d5604c6eed00597873f209726f9aaeb5 /arch/arm/kernel
parent790eac5640abf7a57fa3a644386df330e18c11b0 (diff)
parent3c0c01ab742ddfaf6b6f2d64b890e77cda4b7727 (diff)
downloadop-kernel-dev-fb2af0020a51709ad87ea8055c325d3fbde04158.zip
op-kernel-dev-fb2af0020a51709ad87ea8055c325d3fbde04158.tar.gz
Merge branch 'for-linus' of git://git.linaro.org/people/rmk/linux-arm
Pull ARM updates from Russell King: "This contains the usual updates from other people (listed below) and the usual random muddle of miscellaneous ARM updates which cover some low priority bug fixes and performance improvements. I've started to put the pull request wording into the merge commits, which are: - NoMMU stuff: This includes the following series sent earlier to the list: - nommu-fixes - R7 Support - MPU support I've left out the ARCH_MULTIPLATFORM/!MMU stuff that Arnd and I were discussing today until we've reached a conclusion/that's had some more review. This is rebased (and re-tested) on your devel-stable branch because otherwise there were going to be conflicts with Uwe's V7M work now that you've merged that. I've included the fix for limiting MPU to CPU_V7. - Huge page support These changes bring both HugeTLB support and Transparent HugePage (THP) support to ARM. Only long descriptors (LPAE) are supported in this series. The code has been tested on an Arndale board (Exynos 5250). - LPAE updates Please pull these miscellaneous LPAE fixes I've been collecting for a while now for 3.11. They've been tested and reviewed by quite a few people, and most of the patches are pretty trivial. -- Will Deacon. - arch_timer cleanups Please pull these arch_timer cleanups I've been holding onto for a while. They're the same as my last posting, but have been rebased to v3.10-rc3. - mpidr linearisation (multiprocessor id register - identifies which CPU number we are in the system) This patch series that implements MPIDR linearization through a simple hashing algorithm and updates current cpu_{suspend}/{resume} code to use the newly created hash structures to retrieve context pointers. It represents a stepping stone for the implementation of power management code on forthcoming multi-cluster ARM systems. It has been tested on TC2 (dual cluster A15xA7 system), iMX6q, OMAP4 and Tegra, with processors hitting low-power states requiring warm-boot resume through the cpu_resume code path" * 'for-linus' of git://git.linaro.org/people/rmk/linux-arm: (77 commits) ARM: 7775/1: mm: Remove do_sect_fault from LPAE code ARM: 7777/1: Avoid extra calls to the C compiler ARM: 7774/1: Fix dtb dependency to use order-only prerequisites ARM: 7770/1: remove residual ARMv2 support from decompressor ARM: 7769/1: Cortex-A15: fix erratum 798181 implementation ARM: 7768/1: prevent risks of out-of-bound access in ASID allocator ARM: 7767/1: let the ASID allocator handle suspended animation ARM: 7766/1: versatile: don't mark pen as __INIT ARM: 7765/1: perf: Record the user-mode PC in the call chain. ARM: 7735/2: Preserve the user r/w register TPIDRURW on context switch and fork ARM: kernel: implement stack pointer save array through MPIDR hashing ARM: kernel: build MPIDR hash function data structure ARM: mpu: Ensure that MPU depends on CPU_V7 ARM: mpu: protect the vectors page with an MPU region ARM: mpu: Allow enabling of the MPU via kconfig ARM: 7758/1: introduce config HAS_BANDGAP ARM: 7757/1: mm: don't flush icache in switch_mm with hardware broadcasting ARM: 7751/1: zImage: don't overwrite ourself with a page table ARM: 7749/1: spinlock: retry trylock operation if strex fails on free lock ARM: 7748/1: oabi: handle faults when loading swi instruction from userspace ...
Diffstat (limited to 'arch/arm/kernel')
-rw-r--r--arch/arm/kernel/Makefile5
-rw-r--r--arch/arm/kernel/asm-offsets.c6
-rw-r--r--arch/arm/kernel/entry-armv.S5
-rw-r--r--arch/arm/kernel/entry-common.S42
-rw-r--r--arch/arm/kernel/head-nommu.S160
-rw-r--r--arch/arm/kernel/head.S10
-rw-r--r--arch/arm/kernel/hyp-stub.S7
-rw-r--r--arch/arm/kernel/perf_event.c1
-rw-r--r--arch/arm/kernel/process.c4
-rw-r--r--arch/arm/kernel/psci_smp.c4
-rw-r--r--arch/arm/kernel/ptrace.c2
-rw-r--r--arch/arm/kernel/setup.c82
-rw-r--r--arch/arm/kernel/signal.c9
-rw-r--r--arch/arm/kernel/sleep.S97
-rw-r--r--arch/arm/kernel/smp.c21
-rw-r--r--arch/arm/kernel/smp_tlb.c18
-rw-r--r--arch/arm/kernel/suspend.c76
-rw-r--r--arch/arm/kernel/traps.c4
18 files changed, 456 insertions, 97 deletions
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index f4285b5..fccfbdb 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -38,7 +38,10 @@ obj-$(CONFIG_ARTHUR) += arthur.o
obj-$(CONFIG_ISA_DMA) += dma-isa.o
obj-$(CONFIG_PCI) += bios32.o isa.o
obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o
-obj-$(CONFIG_SMP) += smp.o smp_tlb.o
+obj-$(CONFIG_SMP) += smp.o
+ifdef CONFIG_MMU
+obj-$(CONFIG_SMP) += smp_tlb.o
+endif
obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o
obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o
obj-$(CONFIG_ARM_ARCH_TIMER) += arch_timer.o
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index ee68cce..ded0417 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -23,6 +23,7 @@
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/procinfo.h>
+#include <asm/suspend.h>
#include <asm/hardware/cache-l2x0.h>
#include <linux/kbuild.h>
@@ -145,6 +146,11 @@ int main(void)
#ifdef MULTI_CACHE
DEFINE(CACHE_FLUSH_KERN_ALL, offsetof(struct cpu_cache_fns, flush_kern_all));
#endif
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp));
+ DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys));
+ DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash));
+#endif
BLANK();
DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL);
DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE);
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 582b405..a39cfc2a1 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -685,15 +685,16 @@ ENTRY(__switch_to)
UNWIND(.fnstart )
UNWIND(.cantunwind )
add ip, r1, #TI_CPU_SAVE
- ldr r3, [r2, #TI_TP_VALUE]
ARM( stmia ip!, {r4 - sl, fp, sp, lr} ) @ Store most regs on stack
THUMB( stmia ip!, {r4 - sl, fp} ) @ Store most regs on stack
THUMB( str sp, [ip], #4 )
THUMB( str lr, [ip], #4 )
+ ldr r4, [r2, #TI_TP_VALUE]
+ ldr r5, [r2, #TI_TP_VALUE + 4]
#ifdef CONFIG_CPU_USE_DOMAINS
ldr r6, [r2, #TI_CPU_DOMAIN]
#endif
- set_tls r3, r4, r5
+ switch_tls r1, r4, r5, r3, r7
#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
ldr r7, [r2, #TI_TASK]
ldr r8, =__stack_chk_guard
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 85a72b0..94104bf 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -366,6 +366,16 @@ ENTRY(vector_swi)
#endif
zero_fp
+#ifdef CONFIG_ALIGNMENT_TRAP
+ ldr ip, __cr_alignment
+ ldr ip, [ip]
+ mcr p15, 0, ip, c1, c0 @ update control register
+#endif
+
+ enable_irq
+ ct_user_exit
+ get_thread_info tsk
+
/*
* Get the system call number.
*/
@@ -379,9 +389,9 @@ ENTRY(vector_swi)
#ifdef CONFIG_ARM_THUMB
tst r8, #PSR_T_BIT
movne r10, #0 @ no thumb OABI emulation
- ldreq r10, [lr, #-4] @ get SWI instruction
+ USER( ldreq r10, [lr, #-4] ) @ get SWI instruction
#else
- ldr r10, [lr, #-4] @ get SWI instruction
+ USER( ldr r10, [lr, #-4] ) @ get SWI instruction
#endif
#ifdef CONFIG_CPU_ENDIAN_BE8
rev r10, r10 @ little endian instruction
@@ -396,22 +406,13 @@ ENTRY(vector_swi)
/* Legacy ABI only, possibly thumb mode. */
tst r8, #PSR_T_BIT @ this is SPSR from save_user_regs
addne scno, r7, #__NR_SYSCALL_BASE @ put OS number in
- ldreq scno, [lr, #-4]
+ USER( ldreq scno, [lr, #-4] )
#else
/* Legacy ABI only. */
- ldr scno, [lr, #-4] @ get SWI instruction
+ USER( ldr scno, [lr, #-4] ) @ get SWI instruction
#endif
-#ifdef CONFIG_ALIGNMENT_TRAP
- ldr ip, __cr_alignment
- ldr ip, [ip]
- mcr p15, 0, ip, c1, c0 @ update control register
-#endif
- enable_irq
- ct_user_exit
-
- get_thread_info tsk
adr tbl, sys_call_table @ load syscall table pointer
#if defined(CONFIG_OABI_COMPAT)
@@ -446,6 +447,21 @@ local_restart:
eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back
bcs arm_syscall
b sys_ni_syscall @ not private func
+
+#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI)
+ /*
+ * We failed to handle a fault trying to access the page
+ * containing the swi instruction, but we're not really in a
+ * position to return -EFAULT. Instead, return back to the
+ * instruction and re-enter the user fault handling path trying
+ * to page it in. This will likely result in sending SEGV to the
+ * current task.
+ */
+9001:
+ sub lr, lr, #4
+ str lr, [sp, #S_PC]
+ b ret_fast_syscall
+#endif
ENDPROC(vector_swi)
/*
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 8812ce8..75f14cc 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -17,9 +17,12 @@
#include <asm/assembler.h>
#include <asm/ptrace.h>
#include <asm/asm-offsets.h>
+#include <asm/memory.h>
#include <asm/cp15.h>
#include <asm/thread_info.h>
#include <asm/v7m.h>
+#include <asm/mpu.h>
+#include <asm/page.h>
/*
* Kernel startup entry point.
@@ -63,12 +66,74 @@ ENTRY(stext)
movs r10, r5 @ invalid processor (r5=0)?
beq __error_p @ yes, error 'p'
- adr lr, BSYM(__after_proc_init) @ return (PIC) address
+#ifdef CONFIG_ARM_MPU
+ /* Calculate the size of a region covering just the kernel */
+ ldr r5, =PHYS_OFFSET @ Region start: PHYS_OFFSET
+ ldr r6, =(_end) @ Cover whole kernel
+ sub r6, r6, r5 @ Minimum size of region to map
+ clz r6, r6 @ Region size must be 2^N...
+ rsb r6, r6, #31 @ ...so round up region size
+ lsl r6, r6, #MPU_RSR_SZ @ Put size in right field
+ orr r6, r6, #(1 << MPU_RSR_EN) @ Set region enabled bit
+ bl __setup_mpu
+#endif
+ ldr r13, =__mmap_switched @ address to jump to after
+ @ initialising sctlr
+ adr lr, BSYM(1f) @ return (PIC) address
ARM( add pc, r10, #PROCINFO_INITFUNC )
THUMB( add r12, r10, #PROCINFO_INITFUNC )
THUMB( mov pc, r12 )
+ 1: b __after_proc_init
ENDPROC(stext)
+#ifdef CONFIG_SMP
+ __CPUINIT
+ENTRY(secondary_startup)
+ /*
+ * Common entry point for secondary CPUs.
+ *
+ * Ensure that we're in SVC mode, and IRQs are disabled. Lookup
+ * the processor type - there is no need to check the machine type
+ * as it has already been validated by the primary processor.
+ */
+ setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9
+#ifndef CONFIG_CPU_CP15
+ ldr r9, =CONFIG_PROCESSOR_ID
+#else
+ mrc p15, 0, r9, c0, c0 @ get processor id
+#endif
+ bl __lookup_processor_type @ r5=procinfo r9=cpuid
+ movs r10, r5 @ invalid processor?
+ beq __error_p @ yes, error 'p'
+
+ adr r4, __secondary_data
+ ldmia r4, {r7, r12}
+
+#ifdef CONFIG_ARM_MPU
+ /* Use MPU region info supplied by __cpu_up */
+ ldr r6, [r7] @ get secondary_data.mpu_szr
+ bl __setup_mpu @ Initialize the MPU
+#endif
+
+ adr lr, BSYM(__after_proc_init) @ return address
+ mov r13, r12 @ __secondary_switched address
+ ARM( add pc, r10, #PROCINFO_INITFUNC )
+ THUMB( add r12, r10, #PROCINFO_INITFUNC )
+ THUMB( mov pc, r12 )
+ENDPROC(secondary_startup)
+
+ENTRY(__secondary_switched)
+ ldr sp, [r7, #8] @ set up the stack pointer
+ mov fp, #0
+ b secondary_start_kernel
+ENDPROC(__secondary_switched)
+
+ .type __secondary_data, %object
+__secondary_data:
+ .long secondary_data
+ .long __secondary_switched
+#endif /* CONFIG_SMP */
+
/*
* Set the Control Register and Read the process ID.
*/
@@ -99,10 +164,97 @@ __after_proc_init:
#endif
mcr p15, 0, r0, c1, c0, 0 @ write control reg
#endif /* CONFIG_CPU_CP15 */
-
- b __mmap_switched @ clear the BSS and jump
- @ to start_kernel
+ mov pc, r13
ENDPROC(__after_proc_init)
.ltorg
+#ifdef CONFIG_ARM_MPU
+
+
+/* Set which MPU region should be programmed */
+.macro set_region_nr tmp, rgnr
+ mov \tmp, \rgnr @ Use static region numbers
+ mcr p15, 0, \tmp, c6, c2, 0 @ Write RGNR
+.endm
+
+/* Setup a single MPU region, either D or I side (D-side for unified) */
+.macro setup_region bar, acr, sr, side = MPU_DATA_SIDE
+ mcr p15, 0, \bar, c6, c1, (0 + \side) @ I/DRBAR
+ mcr p15, 0, \acr, c6, c1, (4 + \side) @ I/DRACR
+ mcr p15, 0, \sr, c6, c1, (2 + \side) @ I/DRSR
+.endm
+
+/*
+ * Setup the MPU and initial MPU Regions. We create the following regions:
+ * Region 0: Use this for probing the MPU details, so leave disabled.
+ * Region 1: Background region - covers the whole of RAM as strongly ordered
+ * Region 2: Normal, Shared, cacheable for RAM. From PHYS_OFFSET, size from r6
+ * Region 3: Normal, shared, inaccessible from PL0 to protect the vectors page
+ *
+ * r6: Value to be written to DRSR (and IRSR if required) for MPU_RAM_REGION
+*/
+
+ENTRY(__setup_mpu)
+
+ /* Probe for v7 PMSA compliance */
+ mrc p15, 0, r0, c0, c1, 4 @ Read ID_MMFR0
+ and r0, r0, #(MMFR0_PMSA) @ PMSA field
+ teq r0, #(MMFR0_PMSAv7) @ PMSA v7
+ bne __error_p @ Fail: ARM_MPU on NOT v7 PMSA
+
+ /* Determine whether the D/I-side memory map is unified. We set the
+ * flags here and continue to use them for the rest of this function */
+ mrc p15, 0, r0, c0, c0, 4 @ MPUIR
+ ands r5, r0, #MPUIR_DREGION_SZMASK @ 0 size d region => No MPU
+ beq __error_p @ Fail: ARM_MPU and no MPU
+ tst r0, #MPUIR_nU @ MPUIR_nU = 0 for unified
+
+ /* Setup second region first to free up r6 */
+ set_region_nr r0, #MPU_RAM_REGION
+ isb
+ /* Full access from PL0, PL1, shared for CONFIG_SMP, cacheable */
+ ldr r0, =PHYS_OFFSET @ RAM starts at PHYS_OFFSET
+ ldr r5,=(MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL)
+
+ setup_region r0, r5, r6, MPU_DATA_SIDE @ PHYS_OFFSET, shared, enabled
+ beq 1f @ Memory-map not unified
+ setup_region r0, r5, r6, MPU_INSTR_SIDE @ PHYS_OFFSET, shared, enabled
+1: isb
+
+ /* First/background region */
+ set_region_nr r0, #MPU_BG_REGION
+ isb
+ /* Execute Never, strongly ordered, inaccessible to PL0, rw PL1 */
+ mov r0, #0 @ BG region starts at 0x0
+ ldr r5,=(MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA)
+ mov r6, #MPU_RSR_ALL_MEM @ 4GB region, enabled
+
+ setup_region r0, r5, r6, MPU_DATA_SIDE @ 0x0, BG region, enabled
+ beq 2f @ Memory-map not unified
+ setup_region r0, r5, r6, MPU_INSTR_SIDE @ 0x0, BG region, enabled
+2: isb
+
+ /* Vectors region */
+ set_region_nr r0, #MPU_VECTORS_REGION
+ isb
+ /* Shared, inaccessible to PL0, rw PL1 */
+ mov r0, #CONFIG_VECTORS_BASE @ Cover from VECTORS_BASE
+ ldr r5,=(MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL)
+ /* Writing N to bits 5:1 (RSR_SZ) --> region size 2^N+1 */
+ mov r6, #(((PAGE_SHIFT - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN)
+
+ setup_region r0, r5, r6, MPU_DATA_SIDE @ VECTORS_BASE, PL0 NA, enabled
+ beq 3f @ Memory-map not unified
+ setup_region r0, r5, r6, MPU_INSTR_SIDE @ VECTORS_BASE, PL0 NA, enabled
+3: isb
+
+ /* Enable the MPU */
+ mrc p15, 0, r0, c1, c0, 0 @ Read SCTLR
+ bic r0, r0, #CR_BR @ Disable the 'default mem-map'
+ orr r0, r0, #CR_M @ Set SCTRL.M (MPU on)
+ mcr p15, 0, r0, c1, c0, 0 @ Enable MPU
+ isb
+ mov pc,lr
+ENDPROC(__setup_mpu)
+#endif
#include "head-common.S"
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 8bac553..45e8935 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -156,7 +156,7 @@ ENDPROC(stext)
*
* Returns:
* r0, r3, r5-r7 corrupted
- * r4 = physical page table address
+ * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)
*/
__create_page_tables:
pgtbl r4, r8 @ page table address
@@ -331,6 +331,7 @@ __create_page_tables:
#endif
#ifdef CONFIG_ARM_LPAE
sub r4, r4, #0x1000 @ point to the PGD table
+ mov r4, r4, lsr #ARCH_PGD_SHIFT
#endif
mov pc, lr
ENDPROC(__create_page_tables)
@@ -408,7 +409,7 @@ __secondary_data:
* r0 = cp#15 control register
* r1 = machine ID
* r2 = atags or dtb pointer
- * r4 = page table pointer
+ * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)
* r9 = processor ID
* r13 = *virtual* address to jump to upon completion
*/
@@ -427,10 +428,7 @@ __enable_mmu:
#ifdef CONFIG_CPU_ICACHE_DISABLE
bic r0, r0, #CR_I
#endif
-#ifdef CONFIG_ARM_LPAE
- mov r5, #0
- mcrr p15, 0, r4, r5, c2 @ load TTBR0
-#else
+#ifndef CONFIG_ARM_LPAE
mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index 1315c4c..4910232 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -153,6 +153,13 @@ THUMB( orr r7, #(1 << 30) ) @ HSCTLR.TE
mrc p15, 4, r7, c14, c1, 0 @ CNTHCTL
orr r7, r7, #3 @ PL1PCEN | PL1PCTEN
mcr p15, 4, r7, c14, c1, 0 @ CNTHCTL
+ mov r7, #0
+ mcrr p15, 4, r7, r7, c14 @ CNTVOFF
+
+ @ Disable virtual timer in case it was counting
+ mrc p15, 0, r7, c14, c3, 1 @ CNTV_CTL
+ bic r7, #1 @ Clear ENABLE
+ mcr p15, 0, r7, c14, c3, 1 @ CNTV_CTL
1:
#endif
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 8c3094d..d9f5cd4 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -569,6 +569,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
return;
}
+ perf_callchain_store(entry, regs->ARM_pc);
tail = (struct frame_tail __user *)regs->ARM_fp - 1;
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 6e8931c..7f1efcd 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -39,6 +39,7 @@
#include <asm/thread_notify.h>
#include <asm/stacktrace.h>
#include <asm/mach/time.h>
+#include <asm/tls.h>
#ifdef CONFIG_CC_STACKPROTECTOR
#include <linux/stackprotector.h>
@@ -374,7 +375,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
clear_ptrace_hw_breakpoint(p);
if (clone_flags & CLONE_SETTLS)
- thread->tp_value = childregs->ARM_r3;
+ thread->tp_value[0] = childregs->ARM_r3;
+ thread->tp_value[1] = get_tpuser();
thread_notify(THREAD_NOTIFY_COPY, thread);
diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c
index 23a1142..219f1d73 100644
--- a/arch/arm/kernel/psci_smp.c
+++ b/arch/arm/kernel/psci_smp.c
@@ -68,8 +68,6 @@ void __ref psci_cpu_die(unsigned int cpu)
/* We should never return */
panic("psci: cpu %d failed to shutdown\n", cpu);
}
-#else
-#define psci_cpu_die NULL
#endif
bool __init psci_smp_available(void)
@@ -80,5 +78,7 @@ bool __init psci_smp_available(void)
struct smp_operations __initdata psci_smp_ops = {
.smp_boot_secondary = psci_boot_secondary,
+#ifdef CONFIG_HOTPLUG_CPU
.cpu_die = psci_cpu_die,
+#endif
};
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
#endif
case PTRACE_GET_THREAD_AREA:
- ret = put_user(task_thread_info(child)->tp_value,
+ ret = put_user(task_thread_info(child)->tp_value[0],
datap);
break;
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 1c8278d..9b65327 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -367,7 +367,7 @@ void __init early_print(const char *str, ...)
static void __init cpuid_init_hwcaps(void)
{
- unsigned int divide_instrs;
+ unsigned int divide_instrs, vmsa;
if (cpu_architecture() < CPU_ARCH_ARMv7)
return;
@@ -380,6 +380,11 @@ static void __init cpuid_init_hwcaps(void)
case 1:
elf_hwcap |= HWCAP_IDIVT;
}
+
+ /* LPAE implies atomic ldrd/strd instructions */
+ vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0;
+ if (vmsa >= 5)
+ elf_hwcap |= HWCAP_LPAE;
}
static void __init feat_v6_fixup(void)
@@ -470,9 +475,82 @@ void __init smp_setup_processor_id(void)
for (i = 1; i < nr_cpu_ids; ++i)
cpu_logical_map(i) = i == cpu ? 0 : i;
+ /*
+ * clear __my_cpu_offset on boot CPU to avoid hang caused by
+ * using percpu variable early, for example, lockdep will
+ * access percpu variable inside lock_release
+ */
+ set_my_cpu_offset(0);
+
printk(KERN_INFO "Booting Linux on physical CPU 0x%x\n", mpidr);
}
+struct mpidr_hash mpidr_hash;
+#ifdef CONFIG_SMP
+/**
+ * smp_build_mpidr_hash - Pre-compute shifts required at each affinity
+ * level in order to build a linear index from an
+ * MPIDR value. Resulting algorithm is a collision
+ * free hash carried out through shifting and ORing
+ */
+static void __init smp_build_mpidr_hash(void)
+{
+ u32 i, affinity;
+ u32 fs[3], bits[3], ls, mask = 0;
+ /*
+ * Pre-scan the list of MPIDRS and filter out bits that do
+ * not contribute to affinity levels, ie they never toggle.
+ */
+ for_each_possible_cpu(i)
+ mask |= (cpu_logical_map(i) ^ cpu_logical_map(0));
+ pr_debug("mask of set bits 0x%x\n", mask);
+ /*
+ * Find and stash the last and first bit set at all affinity levels to
+ * check how many bits are required to represent them.
+ */
+ for (i = 0; i < 3; i++) {
+ affinity = MPIDR_AFFINITY_LEVEL(mask, i);
+ /*
+ * Find the MSB bit and LSB bits position
+ * to determine how many bits are required
+ * to express the affinity level.
+ */
+ ls = fls(affinity);
+ fs[i] = affinity ? ffs(affinity) - 1 : 0;
+ bits[i] = ls - fs[i];
+ }
+ /*
+ * An index can be created from the MPIDR by isolating the
+ * significant bits at each affinity level and by shifting
+ * them in order to compress the 24 bits values space to a
+ * compressed set of values. This is equivalent to hashing
+ * the MPIDR through shifting and ORing. It is a collision free
+ * hash though not minimal since some levels might contain a number
+ * of CPUs that is not an exact power of 2 and their bit
+ * representation might contain holes, eg MPIDR[7:0] = {0x2, 0x80}.
+ */
+ mpidr_hash.shift_aff[0] = fs[0];
+ mpidr_hash.shift_aff[1] = MPIDR_LEVEL_BITS + fs[1] - bits[0];
+ mpidr_hash.shift_aff[2] = 2*MPIDR_LEVEL_BITS + fs[2] -
+ (bits[1] + bits[0]);
+ mpidr_hash.mask = mask;
+ mpidr_hash.bits = bits[2] + bits[1] + bits[0];
+ pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] mask[0x%x] bits[%u]\n",
+ mpidr_hash.shift_aff[0],
+ mpidr_hash.shift_aff[1],
+ mpidr_hash.shift_aff[2],
+ mpidr_hash.mask,
+ mpidr_hash.bits);
+ /*
+ * 4x is an arbitrary value used to warn on a hash table much bigger
+ * than expected on most systems.
+ */
+ if (mpidr_hash_size() > 4 * num_possible_cpus())
+ pr_warn("Large number of MPIDR hash buckets detected\n");
+ sync_cache_w(&mpidr_hash);
+}
+#endif
+
static void __init setup_processor(void)
{
struct proc_info_list *list;
@@ -820,6 +898,7 @@ void __init setup_arch(char **cmdline_p)
smp_set_ops(mdesc->smp);
}
smp_init_cpus();
+ smp_build_mpidr_hash();
}
#endif
@@ -892,6 +971,7 @@ static const char *hwcap_str[] = {
"vfpv4",
"idiva",
"idivt",
+ "lpae",
NULL
};
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 296786b..1c16c35 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -392,14 +392,19 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig,
if (ksig->ka.sa.sa_flags & SA_SIGINFO)
idx += 3;
+ /*
+ * Put the sigreturn code on the stack no matter which return
+ * mechanism we use in order to remain ABI compliant
+ */
if (__put_user(sigreturn_codes[idx], rc) ||
__put_user(sigreturn_codes[idx+1], rc+1))
return 1;
- if (cpsr & MODE32_BIT) {
+ if ((cpsr & MODE32_BIT) && !IS_ENABLED(CONFIG_ARM_MPU)) {
/*
* 32-bit code can use the new high-page
- * signal return code support.
+ * signal return code support except when the MPU has
+ * protected the vectors page from PL0
*/
retcode = KERN_SIGRETURN_CODE + (idx << 2) + thumb;
} else {
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 987dcf3..db1536b 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -7,6 +7,49 @@
.text
/*
+ * Implementation of MPIDR hash algorithm through shifting
+ * and OR'ing.
+ *
+ * @dst: register containing hash result
+ * @rs0: register containing affinity level 0 bit shift
+ * @rs1: register containing affinity level 1 bit shift
+ * @rs2: register containing affinity level 2 bit shift
+ * @mpidr: register containing MPIDR value
+ * @mask: register containing MPIDR mask
+ *
+ * Pseudo C-code:
+ *
+ *u32 dst;
+ *
+ *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 mpidr, u32 mask) {
+ * u32 aff0, aff1, aff2;
+ * u32 mpidr_masked = mpidr & mask;
+ * aff0 = mpidr_masked & 0xff;
+ * aff1 = mpidr_masked & 0xff00;
+ * aff2 = mpidr_masked & 0xff0000;
+ * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2);
+ *}
+ * Input registers: rs0, rs1, rs2, mpidr, mask
+ * Output register: dst
+ * Note: input and output registers must be disjoint register sets
+ (eg: a macro instance with mpidr = r1 and dst = r1 is invalid)
+ */
+ .macro compute_mpidr_hash dst, rs0, rs1, rs2, mpidr, mask
+ and \mpidr, \mpidr, \mask @ mask out MPIDR bits
+ and \dst, \mpidr, #0xff @ mask=aff0
+ ARM( mov \dst, \dst, lsr \rs0 ) @ dst=aff0>>rs0
+ THUMB( lsr \dst, \dst, \rs0 )
+ and \mask, \mpidr, #0xff00 @ mask = aff1
+ ARM( orr \dst, \dst, \mask, lsr \rs1 ) @ dst|=(aff1>>rs1)
+ THUMB( lsr \mask, \mask, \rs1 )
+ THUMB( orr \dst, \dst, \mask )
+ and \mask, \mpidr, #0xff0000 @ mask = aff2
+ ARM( orr \dst, \dst, \mask, lsr \rs2 ) @ dst|=(aff2>>rs2)
+ THUMB( lsr \mask, \mask, \rs2 )
+ THUMB( orr \dst, \dst, \mask )
+ .endm
+
+/*
* Save CPU state for a suspend. This saves the CPU general purpose
* registers, and allocates space on the kernel stack to save the CPU
* specific registers and some other data for resume.
@@ -29,12 +72,18 @@ ENTRY(__cpu_suspend)
mov r1, r4 @ size of save block
mov r2, r5 @ virtual SP
ldr r3, =sleep_save_sp
-#ifdef CONFIG_SMP
- ALT_SMP(mrc p15, 0, lr, c0, c0, 5)
- ALT_UP(mov lr, #0)
- and lr, lr, #15
+ ldr r3, [r3, #SLEEP_SAVE_SP_VIRT]
+ ALT_SMP(mrc p15, 0, r9, c0, c0, 5)
+ ALT_UP_B(1f)
+ ldr r8, =mpidr_hash
+ /*
+ * This ldmia relies on the memory layout of the mpidr_hash
+ * struct mpidr_hash.
+ */
+ ldmia r8, {r4-r7} @ r4 = mpidr mask (r5,r6,r7) = l[0,1,2] shifts
+ compute_mpidr_hash lr, r5, r6, r7, r9, r4
add r3, r3, lr, lsl #2
-#endif
+1:
bl __cpu_suspend_save
adr lr, BSYM(cpu_suspend_abort)
ldmfd sp!, {r0, pc} @ call suspend fn
@@ -81,15 +130,23 @@ ENDPROC(cpu_resume_after_mmu)
.data
.align
ENTRY(cpu_resume)
-#ifdef CONFIG_SMP
- adr r0, sleep_save_sp
- ALT_SMP(mrc p15, 0, r1, c0, c0, 5)
- ALT_UP(mov r1, #0)
- and r1, r1, #15
- ldr r0, [r0, r1, lsl #2] @ stack phys addr
-#else
- ldr r0, sleep_save_sp @ stack phys addr
-#endif
+ mov r1, #0
+ ALT_SMP(mrc p15, 0, r0, c0, c0, 5)
+ ALT_UP_B(1f)
+ adr r2, mpidr_hash_ptr
+ ldr r3, [r2]
+ add r2, r2, r3 @ r2 = struct mpidr_hash phys address
+ /*
+ * This ldmia relies on the memory layout of the mpidr_hash
+ * struct mpidr_hash.
+ */
+ ldmia r2, { r3-r6 } @ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts
+ compute_mpidr_hash r1, r4, r5, r6, r0, r3
+1:
+ adr r0, _sleep_save_sp
+ ldr r0, [r0, #SLEEP_SAVE_SP_PHYS]
+ ldr r0, [r0, r1, lsl #2]
+
setmode PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1 @ set SVC, irqs off
@ load phys pgd, stack, resume fn
ARM( ldmia r0!, {r1, sp, pc} )
@@ -98,7 +155,11 @@ THUMB( mov sp, r2 )
THUMB( bx r3 )
ENDPROC(cpu_resume)
-sleep_save_sp:
- .rept CONFIG_NR_CPUS
- .long 0 @ preserve stack phys ptr here
- .endr
+ .align 2
+mpidr_hash_ptr:
+ .long mpidr_hash - . @ mpidr_hash struct offset
+
+ .type sleep_save_sp, #object
+ENTRY(sleep_save_sp)
+_sleep_save_sp:
+ .space SLEEP_SAVE_SP_SZ @ struct sleep_save_sp
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 5919eb4..c5fb546 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -45,6 +45,7 @@
#include <asm/smp_plat.h>
#include <asm/virt.h>
#include <asm/mach/arch.h>
+#include <asm/mpu.h>
/*
* as from 2.5, kernels no longer have an init_tasks structure
@@ -78,6 +79,13 @@ void __init smp_set_ops(struct smp_operations *ops)
smp_ops = *ops;
};
+static unsigned long get_arch_pgd(pgd_t *pgd)
+{
+ phys_addr_t pgdir = virt_to_phys(pgd);
+ BUG_ON(pgdir & ARCH_PGD_MASK);
+ return pgdir >> ARCH_PGD_SHIFT;
+}
+
int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
{
int ret;
@@ -87,8 +95,14 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
* its stack and the page tables.
*/
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
- secondary_data.pgdir = virt_to_phys(idmap_pgd);
- secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir);
+#ifdef CONFIG_ARM_MPU
+ secondary_data.mpu_rgn_szr = mpu_rgn_info.rgns[MPU_RAM_REGION].drsr;
+#endif
+
+#ifdef CONFIG_MMU
+ secondary_data.pgdir = get_arch_pgd(idmap_pgd);
+ secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir);
+#endif
__cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data));
outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1));
@@ -112,9 +126,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
}
- secondary_data.stack = NULL;
- secondary_data.pgdir = 0;
+ memset(&secondary_data, 0, sizeof(secondary_data));
return ret;
}
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index 9a52a07..a98b62d 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -103,7 +103,7 @@ static void broadcast_tlb_a15_erratum(void)
static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm)
{
- int cpu, this_cpu;
+ int this_cpu;
cpumask_t mask = { CPU_BITS_NONE };
if (!erratum_a15_798181())
@@ -111,21 +111,7 @@ static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm)
dummy_flush_tlb_a15_erratum();
this_cpu = get_cpu();
- for_each_online_cpu(cpu) {
- if (cpu == this_cpu)
- continue;
- /*
- * We only need to send an IPI if the other CPUs are running
- * the same ASID as the one being invalidated. There is no
- * need for locking around the active_asids check since the
- * switch_mm() function has at least one dmb() (as required by
- * this workaround) in case a context switch happens on
- * another CPU after the condition below.
- */
- if (atomic64_read(&mm->context.id) ==
- atomic64_read(&per_cpu(active_asids, cpu)))
- cpumask_set_cpu(cpu, &mask);
- }
+ a15_erratum_get_cpumask(this_cpu, mm, &mask);
smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1);
put_cpu();
}
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index c59c97e..41cf3cb 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -1,15 +1,54 @@
#include <linux/init.h>
+#include <linux/slab.h>
+#include <asm/cacheflush.h>
#include <asm/idmap.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/memory.h>
+#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <asm/tlbflush.h>
extern int __cpu_suspend(unsigned long, int (*)(unsigned long));
extern void cpu_resume_mmu(void);
+#ifdef CONFIG_MMU
+/*
+ * Hide the first two arguments to __cpu_suspend - these are an implementation
+ * detail which platform code shouldn't have to know about.
+ */
+int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+ struct mm_struct *mm = current->active_mm;
+ int ret;
+
+ if (!idmap_pgd)
+ return -EINVAL;
+
+ /*
+ * Provide a temporary page table with an identity mapping for
+ * the MMU-enable code, required for resuming. On successful
+ * resume (indicated by a zero return code), we need to switch
+ * back to the correct page tables.
+ */
+ ret = __cpu_suspend(arg, fn);
+ if (ret == 0) {
+ cpu_switch_mm(mm->pgd, mm);
+ local_flush_bp_all();
+ local_flush_tlb_all();
+ }
+
+ return ret;
+}
+#else
+int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+ return __cpu_suspend(arg, fn);
+}
+#define idmap_pgd NULL
+#endif
+
/*
* This is called by __cpu_suspend() to save the state, and do whatever
* flushing is required to ensure that when the CPU goes to sleep we have
@@ -47,30 +86,19 @@ void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr)
virt_to_phys(save_ptr) + sizeof(*save_ptr));
}
-/*
- * Hide the first two arguments to __cpu_suspend - these are an implementation
- * detail which platform code shouldn't have to know about.
- */
-int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
-{
- struct mm_struct *mm = current->active_mm;
- int ret;
-
- if (!idmap_pgd)
- return -EINVAL;
+extern struct sleep_save_sp sleep_save_sp;
- /*
- * Provide a temporary page table with an identity mapping for
- * the MMU-enable code, required for resuming. On successful
- * resume (indicated by a zero return code), we need to switch
- * back to the correct page tables.
- */
- ret = __cpu_suspend(arg, fn);
- if (ret == 0) {
- cpu_switch_mm(mm->pgd, mm);
- local_flush_bp_all();
- local_flush_tlb_all();
- }
+static int cpu_suspend_alloc_sp(void)
+{
+ void *ctx_ptr;
+ /* ctx_ptr is an array of physical addresses */
+ ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(u32), GFP_KERNEL);
- return ret;
+ if (WARN_ON(!ctx_ptr))
+ return -ENOMEM;
+ sleep_save_sp.save_ptr_stash = ctx_ptr;
+ sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr);
+ sync_cache_w(&sleep_save_sp);
+ return 0;
}
+early_initcall(cpu_suspend_alloc_sp);
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 486e12a..cab094c 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -581,7 +581,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
return regs->ARM_r0;
case NR(set_tls):
- thread->tp_value = regs->ARM_r0;
+ thread->tp_value[0] = regs->ARM_r0;
if (tls_emu)
return 0;
if (has_tls_reg) {
@@ -699,7 +699,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
int reg = (instr >> 12) & 15;
if (reg == 15)
return 1;
- regs->uregs[reg] = current_thread_info()->tp_value;
+ regs->uregs[reg] = current_thread_info()->tp_value[0];
regs->ARM_pc += 4;
return 0;
}
OpenPOWER on IntegriCloud