diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-26 12:20:00 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-26 12:20:00 -0700 |
commit | e8a0b37d28ace440776c0a4fe3c65f5832a9a7ee (patch) | |
tree | 9475608c22849e5039d4dc00d3f8e3a30eeac2b9 /arch/arm | |
parent | abea9629486cf973369a641e190e739b3010bb03 (diff) | |
parent | 002af195a8c720ca47c7884fd0390f3b327423b9 (diff) | |
download | op-kernel-dev-e8a0b37d28ace440776c0a4fe3c65f5832a9a7ee.zip op-kernel-dev-e8a0b37d28ace440776c0a4fe3c65f5832a9a7ee.tar.gz |
Merge branch 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
Pull ARM updates from Russell King:
"Bigger items included in this update are:
- A series of updates from Arnd for ARM randconfig build failures
- Updates from Dmitry for StrongARM SA-1100 to move IRQ handling to
drivers/irqchip/
- Move ARMs SP804 timer to drivers/clocksource/
- Perf updates from Mark Rutland in preparation to move the ARM perf
code into drivers/ so it can be shared with ARM64.
- MCPM updates from Nicolas
- Add support for taking platform serial number from DT
- Re-implement Keystone2 physical address space switch to conform to
architecture requirements
- Clean up ARMv7 LPAE code, which goes in hand with the Keystone2
changes.
- L2C cleanups to avoid unlocking caches if we're prevented by the
secure support to unlock.
- Avoid cleaning a potentially dirty cache containing stale data on
CPU initialisation
- Add ARM-only entry point for secondary startup (for machines that
can only call into a Thumb kernel in ARM mode). Same thing is also
done for the resume entry point.
- Provide arch_irqs_disabled via asm-generic
- Enlarge ARMv7M vector table
- Always use BFD linker for VDSO, as gold doesn't accept some of the
options we need.
- Fix an incorrect BSYM (for Thumb symbols) usage, and convert all
BSYM compiler macros to a "badr" (for branch address).
- Shut up compiler warnings provoked by our cmpxchg() implementation.
- Ensure bad xchg sizes fail to link"
* 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm: (75 commits)
ARM: Fix build if CLKDEV_LOOKUP is not configured
ARM: fix new BSYM() usage introduced via for-arm-soc branch
ARM: 8383/1: nommu: avoid deprecated source register on mov
ARM: 8391/1: l2c: add options to overwrite prefetching behavior
ARM: 8390/1: irqflags: Get arch_irqs_disabled from asm-generic
ARM: 8387/1: arm/mm/dma-mapping.c: Add arm_coherent_dma_mmap
ARM: 8388/1: tcm: Don't crash when TCM banks are protected by TrustZone
ARM: 8384/1: VDSO: force use of BFD linker
ARM: 8385/1: VDSO: group link options
ARM: cmpxchg: avoid warnings from macro-ized cmpxchg() implementations
ARM: remove __bad_xchg definition
ARM: 8369/1: ARMv7M: define size of vector table for Vybrid
ARM: 8382/1: clocksource: make ARM_TIMER_SP804 depend on GENERIC_SCHED_CLOCK
ARM: 8366/1: move Dual-Timer SP804 driver to drivers/clocksource
ARM: 8365/1: introduce sp804_timer_disable and remove arm_timer.h inclusion
ARM: 8364/1: fix BE32 module loading
ARM: 8360/1: add secondary_startup_arm prototype in header file
ARM: 8359/1: correct secondary_startup_arm mode
ARM: proc-v7: sanitise and document registers around errata
ARM: proc-v7: clean up MIDR access
...
Diffstat (limited to 'arch/arm')
72 files changed, 1770 insertions, 1840 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index cf292d3..a750c14 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -33,8 +33,8 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL - select HAVE_ARCH_KGDB + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 + select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT @@ -45,7 +45,7 @@ config ARM select HAVE_DMA_API_DEBUG select HAVE_DMA_ATTRS select HAVE_DMA_CONTIGUOUS if MMU - select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) + select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32 select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL) select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL) @@ -59,10 +59,10 @@ config ARM select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO select HAVE_KERNEL_XZ - select HAVE_KPROBES if !XIP_KERNEL + select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M select HAVE_KRETPROBES if (HAVE_KPROBES) select HAVE_MEMBLOCK - select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND + select HAVE_MOD_ARCH_SPECIFIC select HAVE_OPROFILE if (HAVE_PERF_EVENTS) select HAVE_OPTPROBES if !THUMB2_KERNEL select HAVE_PERF_EVENTS @@ -173,7 +173,7 @@ config LOCKDEP_SUPPORT config TRACE_IRQFLAGS_SUPPORT bool - default y + default !CPU_V7M config RWSEM_XCHGADD_ALGORITHM bool @@ -1010,11 +1010,6 @@ config PLAT_PXA config PLAT_VERSATILE bool -config ARM_TIMER_SP804 - bool - select CLKSRC_MMIO - select CLKSRC_OF if OF - source "arch/arm/firmware/Kconfig" source arch/arm/mm/Kconfig @@ -1342,6 +1337,7 @@ config SMP depends on GENERIC_CLOCKEVENTS depends on HAVE_SMP depends on MMU || ARM_MPU + select IRQ_WORK help This enables support for systems with more than one CPU. If you have a system with only one CPU, say N. If you have a system with more @@ -1717,6 +1713,21 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE config ARCH_WANT_GENERAL_HUGETLB def_bool y +config ARM_MODULE_PLTS + bool "Use PLTs to allow module memory to spill over into vmalloc area" + depends on MODULES + help + Allocate PLTs when loading modules so that jumps and calls whose + targets are too far away for their relative offsets to be encoded + in the instructions themselves can be bounced via veneers in the + module's PLT. This allows modules to be allocated in the generic + vmalloc area after the dedicated module memory area has been + exhausted. The modules will use slightly more memory, but after + rounding up to page size, the actual memory footprint is usually + the same. + + Say y if you are getting out of memory errors while loading modules + source "mm/Kconfig" config FORCE_MAX_ZONEORDER @@ -1987,6 +1998,7 @@ config XIP_PHYS_ADDR config KEXEC bool "Kexec system call (EXPERIMENTAL)" depends on (!SMP || PM_SLEEP_SMP) + depends on !CPU_V7M help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index a6b5d0e..f1b1579 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -5,6 +5,7 @@ source "lib/Kconfig.debug" config ARM_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL + depends on MMU select DEBUG_FS ---help--- Say Y here if you want to show the kernel pagetable layout in a diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 2a4fae7..07ab3d2 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -19,6 +19,10 @@ LDFLAGS_vmlinux += --be8 LDFLAGS_MODULE += --be8 endif +ifeq ($(CONFIG_ARM_MODULE_PLTS),y) +LDFLAGS_MODULE += -T $(srctree)/arch/arm/kernel/module.lds +endif + OBJCOPYFLAGS :=-O binary -R .comment -S GZFLAGS :=-9 #KBUILD_CFLAGS +=-pipe diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 6e1fb2b..7a13aeb 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -103,6 +103,8 @@ extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \ lib1funcs.S ashldi3.S bswapsdi2.S $(libfdt) $(libfdt_hdrs) \ hyp-stub.S +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING + ifeq ($(CONFIG_FUNCTION_TRACER),y) ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS)) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 2c45b57..06e983f 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -130,7 +130,7 @@ start: .endr ARM( mov r0, r0 ) ARM( b 1f ) - THUMB( adr r12, BSYM(1f) ) + THUMB( badr r12, 1f ) THUMB( bx r12 ) .word _magic_sig @ Magic numbers to help the loader @@ -447,7 +447,7 @@ dtb_check_done: bl cache_clean_flush - adr r0, BSYM(restart) + badr r0, restart add r0, r0, r6 mov pc, r0 diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 70b1eff..6ee5959 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -11,7 +11,6 @@ obj-$(CONFIG_SHARP_LOCOMO) += locomo.o obj-$(CONFIG_SHARP_PARAM) += sharpsl_param.o obj-$(CONFIG_SHARP_SCOOP) += scoop.o obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o -obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o CFLAGS_REMOVE_mcpm_entry.o = -pg AFLAGS_mcpm_head.o := -march=armv7-a diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index 5f8a52a..a923524 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -20,6 +20,126 @@ #include <asm/cputype.h> #include <asm/suspend.h> +/* + * The public API for this code is documented in arch/arm/include/asm/mcpm.h. + * For a comprehensive description of the main algorithm used here, please + * see Documentation/arm/cluster-pm-race-avoidance.txt. + */ + +struct sync_struct mcpm_sync; + +/* + * __mcpm_cpu_going_down: Indicates that the cpu is being torn down. + * This must be called at the point of committing to teardown of a CPU. + * The CPU cache (SCTRL.C bit) is expected to still be active. + */ +static void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster) +{ + mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_GOING_DOWN; + sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu); +} + +/* + * __mcpm_cpu_down: Indicates that cpu teardown is complete and that the + * cluster can be torn down without disrupting this CPU. + * To avoid deadlocks, this must be called before a CPU is powered down. + * The CPU cache (SCTRL.C bit) is expected to be off. + * However L2 cache might or might not be active. + */ +static void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster) +{ + dmb(); + mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_DOWN; + sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu); + sev(); +} + +/* + * __mcpm_outbound_leave_critical: Leave the cluster teardown critical section. + * @state: the final state of the cluster: + * CLUSTER_UP: no destructive teardown was done and the cluster has been + * restored to the previous state (CPU cache still active); or + * CLUSTER_DOWN: the cluster has been torn-down, ready for power-off + * (CPU cache disabled, L2 cache either enabled or disabled). + */ +static void __mcpm_outbound_leave_critical(unsigned int cluster, int state) +{ + dmb(); + mcpm_sync.clusters[cluster].cluster = state; + sync_cache_w(&mcpm_sync.clusters[cluster].cluster); + sev(); +} + +/* + * __mcpm_outbound_enter_critical: Enter the cluster teardown critical section. + * This function should be called by the last man, after local CPU teardown + * is complete. CPU cache expected to be active. + * + * Returns: + * false: the critical section was not entered because an inbound CPU was + * observed, or the cluster is already being set up; + * true: the critical section was entered: it is now safe to tear down the + * cluster. + */ +static bool __mcpm_outbound_enter_critical(unsigned int cpu, unsigned int cluster) +{ + unsigned int i; + struct mcpm_sync_struct *c = &mcpm_sync.clusters[cluster]; + + /* Warn inbound CPUs that the cluster is being torn down: */ + c->cluster = CLUSTER_GOING_DOWN; + sync_cache_w(&c->cluster); + + /* Back out if the inbound cluster is already in the critical region: */ + sync_cache_r(&c->inbound); + if (c->inbound == INBOUND_COMING_UP) + goto abort; + + /* + * Wait for all CPUs to get out of the GOING_DOWN state, so that local + * teardown is complete on each CPU before tearing down the cluster. + * + * If any CPU has been woken up again from the DOWN state, then we + * shouldn't be taking the cluster down at all: abort in that case. + */ + sync_cache_r(&c->cpus); + for (i = 0; i < MAX_CPUS_PER_CLUSTER; i++) { + int cpustate; + + if (i == cpu) + continue; + + while (1) { + cpustate = c->cpus[i].cpu; + if (cpustate != CPU_GOING_DOWN) + break; + + wfe(); + sync_cache_r(&c->cpus[i].cpu); + } + + switch (cpustate) { + case CPU_DOWN: + continue; + + default: + goto abort; + } + } + + return true; + +abort: + __mcpm_outbound_leave_critical(cluster, CLUSTER_UP); + return false; +} + +static int __mcpm_cluster_state(unsigned int cluster) +{ + sync_cache_r(&mcpm_sync.clusters[cluster].cluster); + return mcpm_sync.clusters[cluster].cluster; +} + extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER]; void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr) @@ -78,16 +198,11 @@ int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster) bool cpu_is_down, cluster_is_down; int ret = 0; + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); if (!platform_ops) return -EUNATCH; /* try not to shadow power_up errors */ might_sleep(); - /* backward compatibility callback */ - if (platform_ops->power_up) - return platform_ops->power_up(cpu, cluster); - - pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); - /* * Since this is called with IRQs enabled, and no arch_spin_lock_irq * variant exists, we need to disable IRQs manually here. @@ -128,29 +243,17 @@ void mcpm_cpu_power_down(void) bool cpu_going_down, last_man; phys_reset_t phys_reset; + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); if (WARN_ON_ONCE(!platform_ops)) return; BUG_ON(!irqs_disabled()); - /* - * Do this before calling into the power_down method, - * as it might not always be safe to do afterwards. - */ setup_mm_for_reboot(); - /* backward compatibility callback */ - if (platform_ops->power_down) { - platform_ops->power_down(); - goto not_dead; - } - - mpidr = read_cpuid_mpidr(); - cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); - pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); - __mcpm_cpu_going_down(cpu, cluster); - arch_spin_lock(&mcpm_lock); BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); @@ -187,7 +290,6 @@ void mcpm_cpu_power_down(void) if (cpu_going_down) wfi(); -not_dead: /* * It is possible for a power_up request to happen concurrently * with a power_down request for the same CPU. In this case the @@ -219,22 +321,11 @@ int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster) return ret; } -void mcpm_cpu_suspend(u64 expected_residency) +void mcpm_cpu_suspend(void) { if (WARN_ON_ONCE(!platform_ops)) return; - /* backward compatibility callback */ - if (platform_ops->suspend) { - phys_reset_t phys_reset; - BUG_ON(!irqs_disabled()); - setup_mm_for_reboot(); - platform_ops->suspend(expected_residency); - phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset); - phys_reset(virt_to_phys(mcpm_entry_point)); - BUG(); - } - /* Some platforms might have to enable special resume modes, etc. */ if (platform_ops->cpu_suspend_prepare) { unsigned int mpidr = read_cpuid_mpidr(); @@ -256,12 +347,6 @@ int mcpm_cpu_powered_up(void) if (!platform_ops) return -EUNATCH; - /* backward compatibility callback */ - if (platform_ops->powered_up) { - platform_ops->powered_up(); - return 0; - } - mpidr = read_cpuid_mpidr(); cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); @@ -334,120 +419,6 @@ int __init mcpm_loopback(void (*cache_disable)(void)) #endif -struct sync_struct mcpm_sync; - -/* - * __mcpm_cpu_going_down: Indicates that the cpu is being torn down. - * This must be called at the point of committing to teardown of a CPU. - * The CPU cache (SCTRL.C bit) is expected to still be active. - */ -void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster) -{ - mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_GOING_DOWN; - sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu); -} - -/* - * __mcpm_cpu_down: Indicates that cpu teardown is complete and that the - * cluster can be torn down without disrupting this CPU. - * To avoid deadlocks, this must be called before a CPU is powered down. - * The CPU cache (SCTRL.C bit) is expected to be off. - * However L2 cache might or might not be active. - */ -void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster) -{ - dmb(); - mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_DOWN; - sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu); - sev(); -} - -/* - * __mcpm_outbound_leave_critical: Leave the cluster teardown critical section. - * @state: the final state of the cluster: - * CLUSTER_UP: no destructive teardown was done and the cluster has been - * restored to the previous state (CPU cache still active); or - * CLUSTER_DOWN: the cluster has been torn-down, ready for power-off - * (CPU cache disabled, L2 cache either enabled or disabled). - */ -void __mcpm_outbound_leave_critical(unsigned int cluster, int state) -{ - dmb(); - mcpm_sync.clusters[cluster].cluster = state; - sync_cache_w(&mcpm_sync.clusters[cluster].cluster); - sev(); -} - -/* - * __mcpm_outbound_enter_critical: Enter the cluster teardown critical section. - * This function should be called by the last man, after local CPU teardown - * is complete. CPU cache expected to be active. - * - * Returns: - * false: the critical section was not entered because an inbound CPU was - * observed, or the cluster is already being set up; - * true: the critical section was entered: it is now safe to tear down the - * cluster. - */ -bool __mcpm_outbound_enter_critical(unsigned int cpu, unsigned int cluster) -{ - unsigned int i; - struct mcpm_sync_struct *c = &mcpm_sync.clusters[cluster]; - - /* Warn inbound CPUs that the cluster is being torn down: */ - c->cluster = CLUSTER_GOING_DOWN; - sync_cache_w(&c->cluster); - - /* Back out if the inbound cluster is already in the critical region: */ - sync_cache_r(&c->inbound); - if (c->inbound == INBOUND_COMING_UP) - goto abort; - - /* - * Wait for all CPUs to get out of the GOING_DOWN state, so that local - * teardown is complete on each CPU before tearing down the cluster. - * - * If any CPU has been woken up again from the DOWN state, then we - * shouldn't be taking the cluster down at all: abort in that case. - */ - sync_cache_r(&c->cpus); - for (i = 0; i < MAX_CPUS_PER_CLUSTER; i++) { - int cpustate; - - if (i == cpu) - continue; - - while (1) { - cpustate = c->cpus[i].cpu; - if (cpustate != CPU_GOING_DOWN) - break; - - wfe(); - sync_cache_r(&c->cpus[i].cpu); - } - - switch (cpustate) { - case CPU_DOWN: - continue; - - default: - goto abort; - } - } - - return true; - -abort: - __mcpm_outbound_leave_critical(cluster, CLUSTER_UP); - return false; -} - -int __mcpm_cluster_state(unsigned int cluster) -{ - sync_cache_r(&mcpm_sync.clusters[cluster].cluster); - return mcpm_sync.clusters[cluster].cluster; -} - extern unsigned long mcpm_power_up_setup_phys; int __init mcpm_sync_init( diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S index e02db4b..08b3bb9 100644 --- a/arch/arm/common/mcpm_head.S +++ b/arch/arm/common/mcpm_head.S @@ -49,7 +49,7 @@ ENTRY(mcpm_entry_point) ARM_BE8(setend be) - THUMB( adr r12, BSYM(1f) ) + THUMB( badr r12, 1f ) THUMB( bx r12 ) THUMB( .thumb ) 1: diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c deleted file mode 100644 index 1921132..0000000 --- a/arch/arm/common/timer-sp.c +++ /dev/null @@ -1,304 +0,0 @@ -/* - * linux/arch/arm/common/timer-sp.c - * - * Copyright (C) 1999 - 2003 ARM Limited - * Copyright (C) 2000 Deep Blue Solutions Ltd - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#include <linux/clk.h> -#include <linux/clocksource.h> -#include <linux/clockchips.h> -#include <linux/err.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <linux/io.h> -#include <linux/of.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> -#include <linux/sched_clock.h> - -#include <asm/hardware/arm_timer.h> -#include <asm/hardware/timer-sp.h> - -static long __init sp804_get_clock_rate(struct clk *clk) -{ - long rate; - int err; - - err = clk_prepare(clk); - if (err) { - pr_err("sp804: clock failed to prepare: %d\n", err); - clk_put(clk); - return err; - } - - err = clk_enable(clk); - if (err) { - pr_err("sp804: clock failed to enable: %d\n", err); - clk_unprepare(clk); - clk_put(clk); - return err; - } - - rate = clk_get_rate(clk); - if (rate < 0) { - pr_err("sp804: clock failed to get rate: %ld\n", rate); - clk_disable(clk); - clk_unprepare(clk); - clk_put(clk); - } - - return rate; -} - -static void __iomem *sched_clock_base; - -static u64 notrace sp804_read(void) -{ - return ~readl_relaxed(sched_clock_base + TIMER_VALUE); -} - -void __init __sp804_clocksource_and_sched_clock_init(void __iomem *base, - const char *name, - struct clk *clk, - int use_sched_clock) -{ - long rate; - - if (!clk) { - clk = clk_get_sys("sp804", name); - if (IS_ERR(clk)) { - pr_err("sp804: clock not found: %d\n", - (int)PTR_ERR(clk)); - return; - } - } - - rate = sp804_get_clock_rate(clk); - - if (rate < 0) - return; - - /* setup timer 0 as free-running clocksource */ - writel(0, base + TIMER_CTRL); - writel(0xffffffff, base + TIMER_LOAD); - writel(0xffffffff, base + TIMER_VALUE); - writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC, - base + TIMER_CTRL); - - clocksource_mmio_init(base + TIMER_VALUE, name, - rate, 200, 32, clocksource_mmio_readl_down); - - if (use_sched_clock) { - sched_clock_base = base; - sched_clock_register(sp804_read, 32, rate); - } -} - - -static void __iomem *clkevt_base; -static unsigned long clkevt_reload; - -/* - * IRQ handler for the timer - */ -static irqreturn_t sp804_timer_interrupt(int irq, void *dev_id) -{ - struct clock_event_device *evt = dev_id; - - /* clear the interrupt */ - writel(1, clkevt_base + TIMER_INTCLR); - - evt->event_handler(evt); - - return IRQ_HANDLED; -} - -static void sp804_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - unsigned long ctrl = TIMER_CTRL_32BIT | TIMER_CTRL_IE; - - writel(ctrl, clkevt_base + TIMER_CTRL); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - writel(clkevt_reload, clkevt_base + TIMER_LOAD); - ctrl |= TIMER_CTRL_PERIODIC | TIMER_CTRL_ENABLE; - break; - - case CLOCK_EVT_MODE_ONESHOT: - /* period set, and timer enabled in 'next_event' hook */ - ctrl |= TIMER_CTRL_ONESHOT; - break; - - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - default: - break; - } - - writel(ctrl, clkevt_base + TIMER_CTRL); -} - -static int sp804_set_next_event(unsigned long next, - struct clock_event_device *evt) -{ - unsigned long ctrl = readl(clkevt_base + TIMER_CTRL); - - writel(next, clkevt_base + TIMER_LOAD); - writel(ctrl | TIMER_CTRL_ENABLE, clkevt_base + TIMER_CTRL); - - return 0; -} - -static struct clock_event_device sp804_clockevent = { - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | - CLOCK_EVT_FEAT_DYNIRQ, - .set_mode = sp804_set_mode, - .set_next_event = sp804_set_next_event, - .rating = 300, -}; - -static struct irqaction sp804_timer_irq = { - .name = "timer", - .flags = IRQF_TIMER | IRQF_IRQPOLL, - .handler = sp804_timer_interrupt, - .dev_id = &sp804_clockevent, -}; - -void __init __sp804_clockevents_init(void __iomem *base, unsigned int irq, struct clk *clk, const char *name) -{ - struct clock_event_device *evt = &sp804_clockevent; - long rate; - - if (!clk) - clk = clk_get_sys("sp804", name); - if (IS_ERR(clk)) { - pr_err("sp804: %s clock not found: %d\n", name, - (int)PTR_ERR(clk)); - return; - } - - rate = sp804_get_clock_rate(clk); - if (rate < 0) - return; - - clkevt_base = base; - clkevt_reload = DIV_ROUND_CLOSEST(rate, HZ); - evt->name = name; - evt->irq = irq; - evt->cpumask = cpu_possible_mask; - - writel(0, base + TIMER_CTRL); - - setup_irq(irq, &sp804_timer_irq); - clockevents_config_and_register(evt, rate, 0xf, 0xffffffff); -} - -static void __init sp804_of_init(struct device_node *np) -{ - static bool initialized = false; - void __iomem *base; - int irq; - u32 irq_num = 0; - struct clk *clk1, *clk2; - const char *name = of_get_property(np, "compatible", NULL); - - base = of_iomap(np, 0); - if (WARN_ON(!base)) - return; - - /* Ensure timers are disabled */ - writel(0, base + TIMER_CTRL); - writel(0, base + TIMER_2_BASE + TIMER_CTRL); - - if (initialized || !of_device_is_available(np)) - goto err; - - clk1 = of_clk_get(np, 0); - if (IS_ERR(clk1)) - clk1 = NULL; - - /* Get the 2nd clock if the timer has 3 timer clocks */ - if (of_count_phandle_with_args(np, "clocks", "#clock-cells") == 3) { - clk2 = of_clk_get(np, 1); - if (IS_ERR(clk2)) { - pr_err("sp804: %s clock not found: %d\n", np->name, - (int)PTR_ERR(clk2)); - clk2 = NULL; - } - } else - clk2 = clk1; - - irq = irq_of_parse_and_map(np, 0); - if (irq <= 0) - goto err; - - of_property_read_u32(np, "arm,sp804-has-irq", &irq_num); - if (irq_num == 2) { - __sp804_clockevents_init(base + TIMER_2_BASE, irq, clk2, name); - __sp804_clocksource_and_sched_clock_init(base, name, clk1, 1); - } else { - __sp804_clockevents_init(base, irq, clk1 , name); - __sp804_clocksource_and_sched_clock_init(base + TIMER_2_BASE, - name, clk2, 1); - } - initialized = true; - - return; -err: - iounmap(base); -} -CLOCKSOURCE_OF_DECLARE(sp804, "arm,sp804", sp804_of_init); - -static void __init integrator_cp_of_init(struct device_node *np) -{ - static int init_count = 0; - void __iomem *base; - int irq; - const char *name = of_get_property(np, "compatible", NULL); - struct clk *clk; - - base = of_iomap(np, 0); - if (WARN_ON(!base)) - return; - clk = of_clk_get(np, 0); - if (WARN_ON(IS_ERR(clk))) - return; - - /* Ensure timer is disabled */ - writel(0, base + TIMER_CTRL); - - if (init_count == 2 || !of_device_is_available(np)) - goto err; - - if (!init_count) - __sp804_clocksource_and_sched_clock_init(base, name, clk, 0); - else { - irq = irq_of_parse_and_map(np, 0); - if (irq <= 0) - goto err; - - __sp804_clockevents_init(base, irq, clk, name); - } - - init_count++; - return; -err: - iounmap(base); -} -CLOCKSOURCE_OF_DECLARE(intcp, "arm,integrator-cp-timer", integrator_cp_of_init); diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 186270b..4abe572 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -178,6 +178,21 @@ .endm /* + * Assembly version of "adr rd, BSYM(sym)". This should only be used to + * reference local symbols in the same assembly file which are to be + * resolved by the assembler. Other usage is undefined. + */ + .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo + .macro badr\c, rd, sym +#ifdef CONFIG_THUMB2_KERNEL + adr\c \rd, \sym + 1 +#else + adr\c \rd, \sym +#endif + .endm + .endr + +/* * Get current thread_info. */ .macro get_thread_info, rd @@ -326,7 +341,7 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) bne 1f orr \reg, \reg, #PSR_A_BIT - adr lr, BSYM(2f) + badr lr, 2f msr spsr_cxsf, \reg __MSR_ELR_HYP(14) __ERET diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 2d46862..4812cda 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -482,10 +482,17 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size) : : : "r0","r1","r2","r3","r4","r5","r6","r7", \ "r9","r10","lr","memory" ) +#ifdef CONFIG_MMU int set_memory_ro(unsigned long addr, int numpages); int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); +#else +static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } +#endif #ifdef CONFIG_DEBUG_RODATA void mark_rodata_ro(void); diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h index abb2c37..1692a05 100644 --- a/arch/arm/include/asm/cmpxchg.h +++ b/arch/arm/include/asm/cmpxchg.h @@ -94,6 +94,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size break; #endif default: + /* Cause a link-time error, the xchg() size is not supported */ __bad_xchg(ptr, size), ret = 0; break; } @@ -102,8 +103,10 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size return ret; } -#define xchg(ptr,x) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) +#define xchg(ptr, x) ({ \ + (__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), \ + sizeof(*(ptr))); \ +}) #include <asm-generic/cmpxchg-local.h> @@ -118,14 +121,16 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make * them available. */ -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\ - (unsigned long)(n), sizeof(*(ptr)))) +#define cmpxchg_local(ptr, o, n) ({ \ + (__typeof(*ptr))__cmpxchg_local_generic((ptr), \ + (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr))); \ +}) + #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) -#ifndef CONFIG_SMP #include <asm-generic/cmpxchg.h> -#endif #else /* min ARCH >= ARMv6 */ @@ -201,11 +206,12 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, return ret; } -#define cmpxchg(ptr,o,n) \ - ((__typeof__(*(ptr)))__cmpxchg_mb((ptr), \ - (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) +#define cmpxchg(ptr,o,n) ({ \ + (__typeof__(*(ptr)))__cmpxchg_mb((ptr), \ + (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr))); \ +}) static inline unsigned long __cmpxchg_local(volatile void *ptr, unsigned long old, @@ -227,6 +233,13 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, return ret; } +#define cmpxchg_local(ptr, o, n) ({ \ + (__typeof(*ptr))__cmpxchg_local((ptr), \ + (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr))); \ +}) + static inline unsigned long long __cmpxchg64(unsigned long long *ptr, unsigned long long old, unsigned long long new) @@ -252,6 +265,14 @@ static inline unsigned long long __cmpxchg64(unsigned long long *ptr, return oldval; } +#define cmpxchg64_relaxed(ptr, o, n) ({ \ + (__typeof__(*(ptr)))__cmpxchg64((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n)); \ +}) + +#define cmpxchg64_local(ptr, o, n) cmpxchg64_relaxed((ptr), (o), (n)) + static inline unsigned long long __cmpxchg64_mb(unsigned long long *ptr, unsigned long long old, unsigned long long new) @@ -265,23 +286,11 @@ static inline unsigned long long __cmpxchg64_mb(unsigned long long *ptr, return ret; } -#define cmpxchg_local(ptr,o,n) \ - ((__typeof__(*(ptr)))__cmpxchg_local((ptr), \ - (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) - -#define cmpxchg64(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg64_mb((ptr), \ - (unsigned long long)(o), \ - (unsigned long long)(n))) - -#define cmpxchg64_relaxed(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg64((ptr), \ - (unsigned long long)(o), \ - (unsigned long long)(n))) - -#define cmpxchg64_local(ptr, o, n) cmpxchg64_relaxed((ptr), (o), (n)) +#define cmpxchg64(ptr, o, n) ({ \ + (__typeof__(*(ptr)))__cmpxchg64_mb((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n)); \ +}) #endif /* __LINUX_ARM_ARCH__ >= 6 */ diff --git a/arch/arm/include/asm/entry-macro-multi.S b/arch/arm/include/asm/entry-macro-multi.S index 469a2b3..609184f 100644 --- a/arch/arm/include/asm/entry-macro-multi.S +++ b/arch/arm/include/asm/entry-macro-multi.S @@ -10,7 +10,7 @@ @ @ routine called with r0 = irq number, r1 = struct pt_regs * @ - adrne lr, BSYM(1b) + badrne lr, 1b bne asm_do_IRQ #ifdef CONFIG_SMP @@ -23,7 +23,7 @@ ALT_SMP(test_for_ipi r0, r2, r6, lr) ALT_UP_B(9997f) movne r1, sp - adrne lr, BSYM(1b) + badrne lr, 1b bne do_IPI #endif 9997: diff --git a/arch/arm/include/asm/hardware/arm_timer.h b/arch/arm/include/asm/hardware/arm_timer.h deleted file mode 100644 index d6030ff..0000000 --- a/arch/arm/include/asm/hardware/arm_timer.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef __ASM_ARM_HARDWARE_ARM_TIMER_H -#define __ASM_ARM_HARDWARE_ARM_TIMER_H - -/* - * ARM timer implementation, found in Integrator, Versatile and Realview - * platforms. Not all platforms support all registers and bits in these - * registers, so we mark them with A for Integrator AP, C for Integrator - * CP, V for Versatile and R for Realview. - * - * Integrator AP has 16-bit timers, Integrator CP, Versatile and Realview - * can have 16-bit or 32-bit selectable via a bit in the control register. - * - * Every SP804 contains two identical timers. - */ -#define TIMER_1_BASE 0x00 -#define TIMER_2_BASE 0x20 - -#define TIMER_LOAD 0x00 /* ACVR rw */ -#define TIMER_VALUE 0x04 /* ACVR ro */ -#define TIMER_CTRL 0x08 /* ACVR rw */ -#define TIMER_CTRL_ONESHOT (1 << 0) /* CVR */ -#define TIMER_CTRL_32BIT (1 << 1) /* CVR */ -#define TIMER_CTRL_DIV1 (0 << 2) /* ACVR */ -#define TIMER_CTRL_DIV16 (1 << 2) /* ACVR */ -#define TIMER_CTRL_DIV256 (2 << 2) /* ACVR */ -#define TIMER_CTRL_IE (1 << 5) /* VR */ -#define TIMER_CTRL_PERIODIC (1 << 6) /* ACVR */ -#define TIMER_CTRL_ENABLE (1 << 7) /* ACVR */ - -#define TIMER_INTCLR 0x0c /* ACVR wo */ -#define TIMER_RIS 0x10 /* CVR ro */ -#define TIMER_MIS 0x14 /* CVR ro */ -#define TIMER_BGLOAD 0x18 /* CVR rw */ - -#endif diff --git a/arch/arm/include/asm/hardware/timer-sp.h b/arch/arm/include/asm/hardware/timer-sp.h deleted file mode 100644 index bb28af7..0000000 --- a/arch/arm/include/asm/hardware/timer-sp.h +++ /dev/null @@ -1,23 +0,0 @@ -struct clk; - -void __sp804_clocksource_and_sched_clock_init(void __iomem *, - const char *, struct clk *, int); -void __sp804_clockevents_init(void __iomem *, unsigned int, - struct clk *, const char *); - -static inline void sp804_clocksource_init(void __iomem *base, const char *name) -{ - __sp804_clocksource_and_sched_clock_init(base, name, NULL, 0); -} - -static inline void sp804_clocksource_and_sched_clock_init(void __iomem *base, - const char *name) -{ - __sp804_clocksource_and_sched_clock_init(base, name, NULL, 1); -} - -static inline void sp804_clockevents_init(void __iomem *base, unsigned int irq, const char *name) -{ - __sp804_clockevents_init(base, irq, NULL, name); - -} diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 1b7677d..1c3938f 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -23,6 +23,7 @@ #ifdef __KERNEL__ +#include <linux/string.h> #include <linux/types.h> #include <linux/blk_types.h> #include <asm/byteorder.h> @@ -73,17 +74,16 @@ void __raw_readsl(const volatile void __iomem *addr, void *data, int longlen); static inline void __raw_writew(u16 val, volatile void __iomem *addr) { asm volatile("strh %1, %0" - : "+Q" (*(volatile u16 __force *)addr) - : "r" (val)); + : : "Q" (*(volatile u16 __force *)addr), "r" (val)); } #define __raw_readw __raw_readw static inline u16 __raw_readw(const volatile void __iomem *addr) { u16 val; - asm volatile("ldrh %1, %0" - : "+Q" (*(volatile u16 __force *)addr), - "=r" (val)); + asm volatile("ldrh %0, %1" + : "=r" (val) + : "Q" (*(volatile u16 __force *)addr)); return val; } #endif @@ -92,25 +92,23 @@ static inline u16 __raw_readw(const volatile void __iomem *addr) static inline void __raw_writeb(u8 val, volatile void __iomem *addr) { asm volatile("strb %1, %0" - : "+Qo" (*(volatile u8 __force *)addr) - : "r" (val)); + : : "Qo" (*(volatile u8 __force *)addr), "r" (val)); } #define __raw_writel __raw_writel static inline void __raw_writel(u32 val, volatile void __iomem *addr) { asm volatile("str %1, %0" - : "+Qo" (*(volatile u32 __force *)addr) - : "r" (val)); + : : "Qo" (*(volatile u32 __force *)addr), "r" (val)); } #define __raw_readb __raw_readb static inline u8 __raw_readb(const volatile void __iomem *addr) { u8 val; - asm volatile("ldrb %1, %0" - : "+Qo" (*(volatile u8 __force *)addr), - "=r" (val)); + asm volatile("ldrb %0, %1" + : "=r" (val) + : "Qo" (*(volatile u8 __force *)addr)); return val; } @@ -118,9 +116,9 @@ static inline u8 __raw_readb(const volatile void __iomem *addr) static inline u32 __raw_readl(const volatile void __iomem *addr) { u32 val; - asm volatile("ldr %1, %0" - : "+Qo" (*(volatile u32 __force *)addr), - "=r" (val)); + asm volatile("ldr %0, %1" + : "=r" (val) + : "Qo" (*(volatile u32 __force *)addr)); return val; } @@ -319,9 +317,33 @@ extern void _memset_io(volatile void __iomem *, int, size_t); #define writesw(p,d,l) __raw_writesw(p,d,l) #define writesl(p,d,l) __raw_writesl(p,d,l) +#ifndef __ARMBE__ +static inline void memset_io(volatile void __iomem *dst, unsigned c, + size_t count) +{ + memset((void __force *)dst, c, count); +} +#define memset_io(dst,c,count) memset_io(dst,c,count) + +static inline void memcpy_fromio(void *to, const volatile void __iomem *from, + size_t count) +{ + memcpy(to, (const void __force *)from, count); +} +#define memcpy_fromio(to,from,count) memcpy_fromio(to,from,count) + +static inline void memcpy_toio(volatile void __iomem *to, const void *from, + size_t count) +{ + memcpy((void __force *)to, from, count); +} +#define memcpy_toio(to,from,count) memcpy_toio(to,from,count) + +#else #define memset_io(c,v,l) _memset_io(c,(v),(l)) #define memcpy_fromio(a,c,l) _memcpy_fromio((a),c,(l)) #define memcpy_toio(c,a,l) _memcpy_toio(c,(a),(l)) +#endif #endif /* readl */ diff --git a/arch/arm/include/asm/irqflags.h b/arch/arm/include/asm/irqflags.h index 3b763d6..4390814 100644 --- a/arch/arm/include/asm/irqflags.h +++ b/arch/arm/include/asm/irqflags.h @@ -20,6 +20,7 @@ #if __LINUX_ARM_ARCH__ >= 6 +#define arch_local_irq_save arch_local_irq_save static inline unsigned long arch_local_irq_save(void) { unsigned long flags; @@ -31,6 +32,7 @@ static inline unsigned long arch_local_irq_save(void) return flags; } +#define arch_local_irq_enable arch_local_irq_enable static inline void arch_local_irq_enable(void) { asm volatile( @@ -40,6 +42,7 @@ static inline void arch_local_irq_enable(void) : "memory", "cc"); } +#define arch_local_irq_disable arch_local_irq_disable static inline void arch_local_irq_disable(void) { asm volatile( @@ -56,6 +59,7 @@ static inline void arch_local_irq_disable(void) /* * Save the current interrupt enable state & disable IRQs */ +#define arch_local_irq_save arch_local_irq_save static inline unsigned long arch_local_irq_save(void) { unsigned long flags, temp; @@ -73,6 +77,7 @@ static inline unsigned long arch_local_irq_save(void) /* * Enable IRQs */ +#define arch_local_irq_enable arch_local_irq_enable static inline void arch_local_irq_enable(void) { unsigned long temp; @@ -88,6 +93,7 @@ static inline void arch_local_irq_enable(void) /* * Disable IRQs */ +#define arch_local_irq_disable arch_local_irq_disable static inline void arch_local_irq_disable(void) { unsigned long temp; @@ -135,6 +141,7 @@ static inline void arch_local_irq_disable(void) /* * Save the current interrupt enable state. */ +#define arch_local_save_flags arch_local_save_flags static inline unsigned long arch_local_save_flags(void) { unsigned long flags; @@ -147,6 +154,7 @@ static inline unsigned long arch_local_save_flags(void) /* * restore saved IRQ & FIQ state */ +#define arch_local_irq_restore arch_local_irq_restore static inline void arch_local_irq_restore(unsigned long flags) { asm volatile( @@ -156,10 +164,13 @@ static inline void arch_local_irq_restore(unsigned long flags) : "memory", "cc"); } +#define arch_irqs_disabled_flags arch_irqs_disabled_flags static inline int arch_irqs_disabled_flags(unsigned long flags) { return flags & IRQMASK_I_BIT; } +#include <asm-generic/irqflags.h> + #endif /* ifdef __KERNEL__ */ #endif /* ifndef __ASM_ARM_IRQFLAGS_H */ diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index 0406cb3..cb3a407 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -51,7 +51,7 @@ struct machine_desc { bool (*smp_init)(void); void (*fixup)(struct tag *, char **); void (*dt_fixup)(void); - void (*init_meminfo)(void); + long long (*pv_fixup)(void); void (*reserve)(void);/* reserve mem blocks */ void (*map_io)(void);/* IO mapping function */ void (*init_early)(void); diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h index 50b378f..acd4983 100644 --- a/arch/arm/include/asm/mcpm.h +++ b/arch/arm/include/asm/mcpm.h @@ -137,17 +137,12 @@ int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster); /** * mcpm_cpu_suspend - bring the calling CPU in a suspended state * - * @expected_residency: duration in microseconds the CPU is expected - * to remain suspended, or 0 if unknown/infinity. - * - * The calling CPU is suspended. The expected residency argument is used - * as a hint by the platform specific backend to implement the appropriate - * sleep state level according to the knowledge it has on wake-up latency - * for the given hardware. + * The calling CPU is suspended. This is similar to mcpm_cpu_power_down() + * except for possible extra platform specific configuration steps to allow + * an asynchronous wake-up e.g. with a pending interrupt. * * If this CPU is found to be the "last man standing" in the cluster - * then the cluster may be prepared for power-down too, if the expected - * residency makes it worthwhile. + * then the cluster may be prepared for power-down too. * * This must be called with interrupts disabled. * @@ -157,7 +152,7 @@ int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster); * This will return if mcpm_platform_register() has not been called * previously in which case the caller should take appropriate action. */ -void mcpm_cpu_suspend(u64 expected_residency); +void mcpm_cpu_suspend(void); /** * mcpm_cpu_powered_up - housekeeping workafter a CPU has been powered up @@ -234,12 +229,6 @@ struct mcpm_platform_ops { void (*cpu_is_up)(unsigned int cpu, unsigned int cluster); void (*cluster_is_up)(unsigned int cluster); int (*wait_for_powerdown)(unsigned int cpu, unsigned int cluster); - - /* deprecated callbacks */ - int (*power_up)(unsigned int cpu, unsigned int cluster); - void (*power_down)(void); - void (*suspend)(u64); - void (*powered_up)(void); }; /** @@ -251,35 +240,6 @@ struct mcpm_platform_ops { */ int __init mcpm_platform_register(const struct mcpm_platform_ops *ops); -/* Synchronisation structures for coordinating safe cluster setup/teardown: */ - -/* - * When modifying this structure, make sure you update the MCPM_SYNC_ defines - * to match. - */ -struct mcpm_sync_struct { - /* individual CPU states */ - struct { - s8 cpu __aligned(__CACHE_WRITEBACK_GRANULE); - } cpus[MAX_CPUS_PER_CLUSTER]; - - /* cluster state */ - s8 cluster __aligned(__CACHE_WRITEBACK_GRANULE); - - /* inbound-side state */ - s8 inbound __aligned(__CACHE_WRITEBACK_GRANULE); -}; - -struct sync_struct { - struct mcpm_sync_struct clusters[MAX_NR_CLUSTERS]; -}; - -void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster); -void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster); -void __mcpm_outbound_leave_critical(unsigned int cluster, int state); -bool __mcpm_outbound_enter_critical(unsigned int this_cpu, unsigned int cluster); -int __mcpm_cluster_state(unsigned int cluster); - /** * mcpm_sync_init - Initialize the cluster synchronization support * @@ -318,6 +278,29 @@ int __init mcpm_loopback(void (*cache_disable)(void)); void __init mcpm_smp_set_ops(void); +/* + * Synchronisation structures for coordinating safe cluster setup/teardown. + * This is private to the MCPM core code and shared between C and assembly. + * When modifying this structure, make sure you update the MCPM_SYNC_ defines + * to match. + */ +struct mcpm_sync_struct { + /* individual CPU states */ + struct { + s8 cpu __aligned(__CACHE_WRITEBACK_GRANULE); + } cpus[MAX_CPUS_PER_CLUSTER]; + + /* cluster state */ + s8 cluster __aligned(__CACHE_WRITEBACK_GRANULE); + + /* inbound-side state */ + s8 inbound __aligned(__CACHE_WRITEBACK_GRANULE); +}; + +struct sync_struct { + struct mcpm_sync_struct clusters[MAX_NR_CLUSTERS]; +}; + #else /* diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 184def0..3a72d69 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -18,8 +18,6 @@ #include <linux/types.h> #include <linux/sizes.h> -#include <asm/cache.h> - #ifdef CONFIG_NEED_MACH_MEMORY_H #include <mach/memory.h> #endif @@ -133,20 +131,6 @@ #define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys))) /* - * Minimum guaranted alignment in pgd_alloc(). The page table pointers passed - * around in head.S and proc-*.S are shifted by this amount, in order to - * leave spare high bits for systems with physical address extension. This - * does not fully accomodate the 40-bit addressing capability of ARM LPAE, but - * gives us about 38-bits or so. - */ -#ifdef CONFIG_ARM_LPAE -#define ARCH_PGD_SHIFT L1_CACHE_SHIFT -#else -#define ARCH_PGD_SHIFT 0 -#endif -#define ARCH_PGD_MASK ((1 << ARCH_PGD_SHIFT) - 1) - -/* * PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical * memory. This is used for XIP and NoMMU kernels, and on platforms that don't * have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index ed690c4..e358b79 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -16,11 +16,21 @@ enum { ARM_SEC_UNLIKELY, ARM_SEC_MAX, }; +#endif struct mod_arch_specific { +#ifdef CONFIG_ARM_UNWIND struct unwind_table *unwind[ARM_SEC_MAX]; -}; #endif +#ifdef CONFIG_ARM_MODULE_PLTS + struct elf32_shdr *core_plt; + struct elf32_shdr *init_plt; + int core_plt_count; + int init_plt_count; +#endif +}; + +u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val); /* * Add the ARM architecture version to the version magic string diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index d9cf138..4f9dec4 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -19,4 +19,11 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) #endif +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->ARM_pc = (__ip); \ + (regs)->ARM_fp = (unsigned long) __builtin_frame_address(0); \ + (regs)->ARM_sp = current_stack_pointer; \ + (regs)->ARM_cpsr = SVC_MODE; \ +} + #endif /* __ARM_PERF_EVENT_H__ */ diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index 675e4ab..3fc87df 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -24,22 +24,10 @@ * interrupt and passed the address of the low level handler, * and can be used to implement any platform specific handling * before or after calling it. - * @runtime_resume: an optional handler which will be called by the - * runtime PM framework following a call to pm_runtime_get(). - * Note that if pm_runtime_get() is called more than once in - * succession this handler will only be called once. - * @runtime_suspend: an optional handler which will be called by the - * runtime PM framework following a call to pm_runtime_put(). - * Note that if pm_runtime_get() is called more than once in - * succession this handler will only be called following the - * final call to pm_runtime_put() that actually disables the - * hardware. */ struct arm_pmu_platdata { irqreturn_t (*handle_irq)(int irq, void *dev, irq_handler_t pmu_handler); - int (*runtime_resume)(struct device *dev); - int (*runtime_suspend)(struct device *dev); }; #ifdef CONFIG_HW_PERF_EVENTS @@ -92,6 +80,7 @@ struct pmu_hw_events { struct arm_pmu { struct pmu pmu; cpumask_t active_irqs; + cpumask_t supported_cpus; int *irq_affinity; char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); @@ -122,8 +111,6 @@ struct arm_pmu { #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) -extern const struct dev_pm_ops armpmu_dev_pm_ops; - int armpmu_register(struct arm_pmu *armpmu, int type); u64 armpmu_event_update(struct perf_event *event); @@ -158,6 +145,10 @@ struct pmu_probe_info { #define XSCALE_PMU_PROBE(_version, _fn) \ PMU_PROBE(ARM_CPU_IMP_INTEL << 24 | _version, ARM_PMU_XSCALE_MASK, _fn) +int arm_pmu_device_probe(struct platform_device *pdev, + const struct of_device_id *of_table, + const struct pmu_probe_info *probe_table); + #endif /* CONFIG_HW_PERF_EVENTS */ #endif /* __ARM_PMU_H__ */ diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index 5324c11..8877ad5 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -125,13 +125,6 @@ extern void cpu_resume(void); ttbr; \ }) -#define cpu_set_ttbr(nr, val) \ - do { \ - u64 ttbr = val; \ - __asm__("mcrr p15, " #nr ", %Q0, %R0, c2" \ - : : "r" (ttbr)); \ - } while (0) - #define cpu_get_pgd() \ ({ \ u64 pg = cpu_get_ttbr(0); \ diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 18f5a55..2f3ac1b 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -61,7 +61,7 @@ asmlinkage void secondary_start_kernel(void); struct secondary_data { union { unsigned long mpu_rgn_szr; - unsigned long pgdir; + u64 pgdir; }; unsigned long swapper_pg_dir; void *stack; @@ -69,6 +69,7 @@ struct secondary_data { extern struct secondary_data secondary_data; extern volatile int pen_release; extern void secondary_startup(void); +extern void secondary_startup_arm(void); extern int __cpu_disable(void); diff --git a/arch/arm/include/asm/system_info.h b/arch/arm/include/asm/system_info.h index 720ea03..3860cbd40 100644 --- a/arch/arm/include/asm/system_info.h +++ b/arch/arm/include/asm/system_info.h @@ -17,6 +17,7 @@ /* information about the system we're running on */ extern unsigned int system_rev; +extern const char *system_serial; extern unsigned int system_serial_low; extern unsigned int system_serial_high; extern unsigned int mem_fclk_21285; diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h index 200f9a7..a91ae49 100644 --- a/arch/arm/include/asm/unified.h +++ b/arch/arm/include/asm/unified.h @@ -45,7 +45,6 @@ #define THUMB(x...) x #ifdef __ASSEMBLY__ #define W(instr) instr.w -#define BSYM(sym) sym + 1 #else #define WASM(instr) #instr ".w" #endif @@ -59,7 +58,6 @@ #define THUMB(x...) #ifdef __ASSEMBLY__ #define W(instr) instr -#define BSYM(sym) sym #else #define WASM(instr) #instr #endif diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 752725d..e69f7a1 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_CPU_IDLE) += cpuidle.o obj-$(CONFIG_ISA_DMA_API) += dma.o obj-$(CONFIG_FIQ) += fiq.o fiqasm.o obj-$(CONFIG_MODULES) += armksyms.o module.o +obj-$(CONFIG_ARM_MODULE_PLTS) += module-plts.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o @@ -70,7 +71,9 @@ obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o -obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o +obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o \ + perf_event_xscale.o perf_event_v6.o \ + perf_event_v7.o CFLAGS_pj4-cp0.o := -marm AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 570306c..f8f7398 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -40,7 +40,7 @@ #ifdef CONFIG_MULTI_IRQ_HANDLER ldr r1, =handle_arch_irq mov r0, sp - adr lr, BSYM(9997f) + badr lr, 9997f ldr pc, [r1] #else arch_irq_handler_default @@ -273,7 +273,7 @@ __und_svc: str r4, [sp, #S_PC] orr r0, r9, r0, lsl #16 #endif - adr r9, BSYM(__und_svc_finish) + badr r9, __und_svc_finish mov r2, r4 bl call_fpe @@ -469,7 +469,7 @@ __und_usr: @ instruction, or the more conventional lr if we are to treat @ this as a real undefined instruction @ - adr r9, BSYM(ret_from_exception) + badr r9, ret_from_exception @ IRQs must be enabled before attempting to read the instruction from @ user space since that could cause a page/translation fault if the @@ -486,7 +486,7 @@ __und_usr: @ r2 = PC value for the following instruction (:= regs->ARM_pc) @ r4 = PC value for the faulting instruction @ lr = 32-bit undefined instruction function - adr lr, BSYM(__und_usr_fault_32) + badr lr, __und_usr_fault_32 b call_fpe __und_usr_thumb: @@ -522,7 +522,7 @@ ARM_BE8(rev16 r0, r0) @ little endian instruction add r2, r2, #2 @ r2 is PC + 2, make it PC + 4 str r2, [sp, #S_PC] @ it's a 2x16bit instr, update orr r0, r0, r5, lsl #16 - adr lr, BSYM(__und_usr_fault_32) + badr lr, __und_usr_fault_32 @ r0 = the two 16-bit Thumb instructions which caused the exception @ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc) @ r4 = PC value for the first 16-bit Thumb instruction @@ -716,7 +716,7 @@ __und_usr_fault_32: __und_usr_fault_16: mov r1, #2 1: mov r0, sp - adr lr, BSYM(ret_from_exception) + badr lr, ret_from_exception b __und_fault ENDPROC(__und_usr_fault_32) ENDPROC(__und_usr_fault_16) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 4e7f40c..92828a1 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -90,7 +90,7 @@ ENTRY(ret_from_fork) bl schedule_tail cmp r5, #0 movne r0, r4 - adrne lr, BSYM(1f) + badrne lr, 1f retne r5 1: get_thread_info tsk b ret_slow_syscall @@ -198,7 +198,7 @@ local_restart: bne __sys_trace cmp scno, #NR_syscalls @ check upper syscall limit - adr lr, BSYM(ret_fast_syscall) @ return address + badr lr, ret_fast_syscall @ return address ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine add r1, sp, #S_OFF @@ -233,7 +233,7 @@ __sys_trace: add r0, sp, #S_OFF bl syscall_trace_enter - adr lr, BSYM(__sys_trace_return) @ return address + badr lr, __sys_trace_return @ return address mov scno, r0 @ syscall number (possibly new) add r1, sp, #S_R0 + S_OFF @ pointer to regs cmp scno, #NR_syscalls @ check upper syscall limit diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S index fe57c73..c73c403 100644 --- a/arch/arm/kernel/entry-ftrace.S +++ b/arch/arm/kernel/entry-ftrace.S @@ -87,7 +87,7 @@ 1: mcount_get_lr r1 @ lr of instrumented func mcount_adjust_addr r0, lr @ instrumented function - adr lr, BSYM(2f) + badr lr, 2f mov pc, r2 2: mcount_exit .endm diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S index 8944f49..b6c8bb9 100644 --- a/arch/arm/kernel/entry-v7m.S +++ b/arch/arm/kernel/entry-v7m.S @@ -117,9 +117,14 @@ ENTRY(__switch_to) ENDPROC(__switch_to) .data - .align 8 +#if CONFIG_CPU_V7M_NUM_IRQ <= 112 + .align 9 +#else + .align 10 +#endif + /* - * Vector table (64 words => 256 bytes natural alignment) + * Vector table (Natural alignment need to be ensured) */ ENTRY(vector_table) .long 0 @ 0 - Reset stack pointer @@ -138,6 +143,6 @@ ENTRY(vector_table) .long __invalid_entry @ 13 - Reserved .long __pendsv_entry @ 14 - PendSV .long __invalid_entry @ 15 - SysTick - .rept 64 - 16 - .long __irq_entry @ 16..64 - External Interrupts + .rept CONFIG_CPU_V7M_NUM_IRQ + .long __irq_entry @ External Interrupts .endr diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index aebfbf7..9b8c5a1 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -46,7 +46,7 @@ ENTRY(stext) .arm ENTRY(stext) - THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. + THUMB( badr r9, 1f ) @ Kernel is always entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) @@ -77,13 +77,13 @@ ENTRY(stext) orr r6, r6, #(1 << MPU_RSR_EN) @ Set region enabled bit bl __setup_mpu #endif - ldr r13, =__mmap_switched @ address to jump to after - @ initialising sctlr - adr lr, BSYM(1f) @ return (PIC) address + + badr lr, 1f @ return (PIC) address ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 ret r12 - 1: b __after_proc_init +1: bl __after_proc_init + b __mmap_switched ENDPROC(stext) #ifdef CONFIG_SMP @@ -106,8 +106,7 @@ ENTRY(secondary_startup) movs r10, r5 @ invalid processor? beq __error_p @ yes, error 'p' - adr r4, __secondary_data - ldmia r4, {r7, r12} + ldr r7, __secondary_data #ifdef CONFIG_ARM_MPU /* Use MPU region info supplied by __cpu_up */ @@ -115,23 +114,19 @@ ENTRY(secondary_startup) bl __setup_mpu @ Initialize the MPU #endif - adr lr, BSYM(__after_proc_init) @ return address - mov r13, r12 @ __secondary_switched address + badr lr, 1f @ return (PIC) address ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 ret r12 -ENDPROC(secondary_startup) - -ENTRY(__secondary_switched) - ldr sp, [r7, #8] @ set up the stack pointer +1: bl __after_proc_init + ldr sp, [r7, #12] @ set up the stack pointer mov fp, #0 b secondary_start_kernel -ENDPROC(__secondary_switched) +ENDPROC(secondary_startup) .type __secondary_data, %object __secondary_data: .long secondary_data - .long __secondary_switched #endif /* CONFIG_SMP */ /* @@ -164,7 +159,7 @@ __after_proc_init: #endif mcr p15, 0, r0, c1, c0, 0 @ write control reg #endif /* CONFIG_CPU_CP15 */ - ret r13 + ret lr ENDPROC(__after_proc_init) .ltorg diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 3637973..bd755d9 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -80,7 +80,7 @@ ENTRY(stext) ARM_BE8(setend be ) @ ensure we are in BE8 mode - THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. + THUMB( badr r9, 1f ) @ Kernel is always entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) @@ -131,13 +131,30 @@ ENTRY(stext) * The following calls CPU specific code in a position independent * manner. See arch/arm/mm/proc-*.S for details. r10 = base of * xxx_proc_info structure selected by __lookup_processor_type - * above. On return, the CPU will be ready for the MMU to be - * turned on, and r0 will hold the CPU control register value. + * above. + * + * The processor init function will be called with: + * r1 - machine type + * r2 - boot data (atags/dt) pointer + * r4 - translation table base (low word) + * r5 - translation table base (high word, if LPAE) + * r8 - translation table base 1 (pfn if LPAE) + * r9 - cpuid + * r13 - virtual address for __enable_mmu -> __turn_mmu_on + * + * On return, the CPU will be ready for the MMU to be turned on, + * r0 will hold the CPU control register value, r1, r2, r4, and + * r9 will be preserved. r5 will also be preserved if LPAE. */ ldr r13, =__mmap_switched @ address to jump to after @ mmu has been enabled - adr lr, BSYM(1f) @ return (PIC) address + badr lr, 1f @ return (PIC) address +#ifdef CONFIG_ARM_LPAE + mov r5, #0 @ high TTBR0 + mov r8, r4, lsr #12 @ TTBR1 is swapper_pg_dir pfn +#else mov r8, r4 @ set TTBR1 to swapper_pg_dir +#endif ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 ret r12 @@ -158,7 +175,7 @@ ENDPROC(stext) * * Returns: * r0, r3, r5-r7 corrupted - * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) + * r4 = physical page table address */ __create_page_tables: pgtbl r4, r8 @ page table address @@ -333,7 +350,6 @@ __create_page_tables: #endif #ifdef CONFIG_ARM_LPAE sub r4, r4, #0x1000 @ point to the PGD table - mov r4, r4, lsr #ARCH_PGD_SHIFT #endif ret lr ENDPROC(__create_page_tables) @@ -346,9 +362,9 @@ __turn_mmu_on_loc: #if defined(CONFIG_SMP) .text -ENTRY(secondary_startup_arm) .arm - THUMB( adr r9, BSYM(1f) ) @ Kernel is entered in ARM. +ENTRY(secondary_startup_arm) + THUMB( badr r9, 1f ) @ Kernel is entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) @@ -381,10 +397,10 @@ ENTRY(secondary_startup) adr r4, __secondary_data ldmia r4, {r5, r7, r12} @ address to jump to after sub lr, r4, r5 @ mmu has been enabled - ldr r4, [r7, lr] @ get secondary_data.pgdir - add r7, r7, #4 - ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir - adr lr, BSYM(__enable_mmu) @ return address + add r3, r7, lr + ldrd r4, [r3, #0] @ get secondary_data.pgdir + ldr r8, [r3, #8] @ get secondary_data.swapper_pg_dir + badr lr, __enable_mmu @ return address mov r13, r12 @ __secondary_switched address ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 @ initialise processor @@ -397,7 +413,7 @@ ENDPROC(secondary_startup_arm) * r6 = &secondary_data */ ENTRY(__secondary_switched) - ldr sp, [r7, #4] @ get secondary_data.stack + ldr sp, [r7, #12] @ get secondary_data.stack mov fp, #0 b secondary_start_kernel ENDPROC(__secondary_switched) @@ -416,12 +432,14 @@ __secondary_data: /* * Setup common bits before finally enabling the MMU. Essentially * this is just loading the page table pointer and domain access - * registers. + * registers. All these registers need to be preserved by the + * processor setup function (or set in the case of r0) * * r0 = cp#15 control register * r1 = machine ID * r2 = atags or dtb pointer - * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) + * r4 = TTBR pointer (low word) + * r5 = TTBR pointer (high word if LPAE) * r9 = processor ID * r13 = *virtual* address to jump to upon completion */ @@ -440,7 +458,9 @@ __enable_mmu: #ifdef CONFIG_CPU_ICACHE_DISABLE bic r0, r0, #CR_I #endif -#ifndef CONFIG_ARM_LPAE +#ifdef CONFIG_ARM_LPAE + mcrr p15, 0, r4, r5, c2 @ load TTBR0 +#else mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c new file mode 100644 index 0000000..097e2e2 --- /dev/null +++ b/arch/arm/kernel/module-plts.c @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/elf.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <asm/cache.h> +#include <asm/opcodes.h> + +#define PLT_ENT_STRIDE L1_CACHE_BYTES +#define PLT_ENT_COUNT (PLT_ENT_STRIDE / sizeof(u32)) +#define PLT_ENT_SIZE (sizeof(struct plt_entries) / PLT_ENT_COUNT) + +#ifdef CONFIG_THUMB2_KERNEL +#define PLT_ENT_LDR __opcode_to_mem_thumb32(0xf8dff000 | \ + (PLT_ENT_STRIDE - 4)) +#else +#define PLT_ENT_LDR __opcode_to_mem_arm(0xe59ff000 | \ + (PLT_ENT_STRIDE - 8)) +#endif + +struct plt_entries { + u32 ldr[PLT_ENT_COUNT]; + u32 lit[PLT_ENT_COUNT]; +}; + +static bool in_init(const struct module *mod, u32 addr) +{ + return addr - (u32)mod->module_init < mod->init_size; +} + +u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) +{ + struct plt_entries *plt, *plt_end; + int c, *count; + + if (in_init(mod, loc)) { + plt = (void *)mod->arch.init_plt->sh_addr; + plt_end = (void *)plt + mod->arch.init_plt->sh_size; + count = &mod->arch.init_plt_count; + } else { + plt = (void *)mod->arch.core_plt->sh_addr; + plt_end = (void *)plt + mod->arch.core_plt->sh_size; + count = &mod->arch.core_plt_count; + } + + /* Look for an existing entry pointing to 'val' */ + for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) { + int i; + + if (!c) { + /* Populate a new set of entries */ + *plt = (struct plt_entries){ + { [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, }, + { val, } + }; + ++*count; + return (u32)plt->ldr; + } + for (i = 0; i < PLT_ENT_COUNT; i++) { + if (!plt->lit[i]) { + plt->lit[i] = val; + ++*count; + } + if (plt->lit[i] == val) + return (u32)&plt->ldr[i]; + } + } + BUG(); +} + +static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num, + u32 mask) +{ + u32 *loc1, *loc2; + int i; + + for (i = 0; i < num; i++) { + if (rel[i].r_info != rel[num].r_info) + continue; + + /* + * Identical relocation types against identical symbols can + * still result in different PLT entries if the addend in the + * place is different. So resolve the target of the relocation + * to compare the values. + */ + loc1 = (u32 *)(base + rel[i].r_offset); + loc2 = (u32 *)(base + rel[num].r_offset); + if (((*loc1 ^ *loc2) & mask) == 0) + return 1; + } + return 0; +} + +/* Count how many PLT entries we may need */ +static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num) +{ + unsigned int ret = 0; + int i; + + /* + * Sure, this is order(n^2), but it's usually short, and not + * time critical + */ + for (i = 0; i < num; i++) + switch (ELF32_R_TYPE(rel[i].r_info)) { + case R_ARM_CALL: + case R_ARM_PC24: + case R_ARM_JUMP24: + if (!duplicate_rel(base, rel, i, + __opcode_to_mem_arm(0x00ffffff))) + ret++; + break; +#ifdef CONFIG_THUMB2_KERNEL + case R_ARM_THM_CALL: + case R_ARM_THM_JUMP24: + if (!duplicate_rel(base, rel, i, + __opcode_to_mem_thumb32(0x07ff2fff))) + ret++; +#endif + } + return ret; +} + +int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *mod) +{ + unsigned long core_plts = 0, init_plts = 0; + Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; + + /* + * To store the PLTs, we expand the .text section for core module code + * and the .init.text section for initialization code. + */ + for (s = sechdrs; s < sechdrs_end; ++s) + if (strcmp(".core.plt", secstrings + s->sh_name) == 0) + mod->arch.core_plt = s; + else if (strcmp(".init.plt", secstrings + s->sh_name) == 0) + mod->arch.init_plt = s; + + if (!mod->arch.core_plt || !mod->arch.init_plt) { + pr_err("%s: sections missing\n", mod->name); + return -ENOEXEC; + } + + for (s = sechdrs + 1; s < sechdrs_end; ++s) { + const Elf32_Rel *rels = (void *)ehdr + s->sh_offset; + int numrels = s->sh_size / sizeof(Elf32_Rel); + Elf32_Shdr *dstsec = sechdrs + s->sh_info; + + if (s->sh_type != SHT_REL) + continue; + + if (strstr(secstrings + s->sh_name, ".init")) + init_plts += count_plts(dstsec->sh_addr, rels, numrels); + else + core_plts += count_plts(dstsec->sh_addr, rels, numrels); + } + + mod->arch.core_plt->sh_type = SHT_NOBITS; + mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE, + sizeof(struct plt_entries)); + mod->arch.core_plt_count = 0; + + mod->arch.init_plt->sh_type = SHT_NOBITS; + mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE, + sizeof(struct plt_entries)); + mod->arch.init_plt_count = 0; + pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__, + mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size); + return 0; +} diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index af791f4..efdddcb 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -40,7 +40,12 @@ #ifdef CONFIG_MMU void *module_alloc(unsigned long size) { - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + void *p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p) + return p; + return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); } @@ -110,6 +115,20 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset -= 0x04000000; offset += sym->st_value - loc; + + /* + * Route through a PLT entry if 'offset' exceeds the + * supported range. Note that 'offset + loc + 8' + * contains the absolute jump target, i.e., + * @sym + addend, corrected for the +8 PC bias. + */ + if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) && + (offset <= (s32)0xfe000000 || + offset >= (s32)0x02000000)) + offset = get_module_plt(module, loc, + offset + loc + 8) + - loc - 8; + if (offset <= (s32)0xfe000000 || offset >= (s32)0x02000000) { pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", @@ -203,6 +222,17 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset -= 0x02000000; offset += sym->st_value - loc; + /* + * Route through a PLT entry if 'offset' exceeds the + * supported range. + */ + if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) && + (offset <= (s32)0xff000000 || + offset >= (s32)0x01000000)) + offset = get_module_plt(module, loc, + offset + loc + 4) + - loc - 4; + if (offset <= (s32)0xff000000 || offset >= (s32)0x01000000) { pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds new file mode 100644 index 0000000..3682fa1 --- /dev/null +++ b/arch/arm/kernel/module.lds @@ -0,0 +1,4 @@ +SECTIONS { + .core.plt : { BYTE(0) } + .init.plt : { BYTE(0) } +} diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 4a86a01..357f57e 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -11,12 +11,18 @@ */ #define pr_fmt(fmt) "hw perfevents: " fmt +#include <linux/bitmap.h> +#include <linux/cpumask.h> +#include <linux/export.h> #include <linux/kernel.h> +#include <linux/of.h> #include <linux/platform_device.h> -#include <linux/pm_runtime.h> +#include <linux/slab.h> +#include <linux/spinlock.h> #include <linux/irq.h> #include <linux/irqdesc.h> +#include <asm/cputype.h> #include <asm/irq_regs.h> #include <asm/pmu.h> @@ -229,6 +235,10 @@ armpmu_add(struct perf_event *event, int flags) int idx; int err = 0; + /* An event following a process won't be stopped earlier */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return -ENOENT; + perf_pmu_disable(event->pmu); /* If we don't have a space for the counter then finish early. */ @@ -344,20 +354,12 @@ static void armpmu_release_hardware(struct arm_pmu *armpmu) { armpmu->free_irq(armpmu); - pm_runtime_put_sync(&armpmu->plat_device->dev); } static int armpmu_reserve_hardware(struct arm_pmu *armpmu) { - int err; - struct platform_device *pmu_device = armpmu->plat_device; - - if (!pmu_device) - return -ENODEV; - - pm_runtime_get_sync(&pmu_device->dev); - err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); + int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); if (err) { armpmu_release_hardware(armpmu); return err; @@ -454,6 +456,17 @@ static int armpmu_event_init(struct perf_event *event) int err = 0; atomic_t *active_events = &armpmu->active_events; + /* + * Reject CPU-affine events for CPUs that are of a different class to + * that which this PMU handles. Process-following events (where + * event->cpu == -1) can be migrated between CPUs, and thus we have to + * reject them later (in armpmu_add) if they're scheduled on a + * different class of CPU. + */ + if (event->cpu != -1 && + !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) + return -ENOENT; + /* does not support taken branch sampling */ if (has_branch_stack(event)) return -EOPNOTSUPP; @@ -489,6 +502,10 @@ static void armpmu_enable(struct pmu *pmu) struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + /* For task-bound events we may be called on other CPUs */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return; + if (enabled) armpmu->start(armpmu); } @@ -496,34 +513,25 @@ static void armpmu_enable(struct pmu *pmu) static void armpmu_disable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); - armpmu->stop(armpmu); -} - -#ifdef CONFIG_PM -static int armpmu_runtime_resume(struct device *dev) -{ - struct arm_pmu_platdata *plat = dev_get_platdata(dev); - if (plat && plat->runtime_resume) - return plat->runtime_resume(dev); + /* For task-bound events we may be called on other CPUs */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return; - return 0; + armpmu->stop(armpmu); } -static int armpmu_runtime_suspend(struct device *dev) +/* + * In heterogeneous systems, events are specific to a particular + * microarchitecture, and aren't suitable for another. Thus, only match CPUs of + * the same microarchitecture. + */ +static int armpmu_filter_match(struct perf_event *event) { - struct arm_pmu_platdata *plat = dev_get_platdata(dev); - - if (plat && plat->runtime_suspend) - return plat->runtime_suspend(dev); - - return 0; + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + unsigned int cpu = smp_processor_id(); + return cpumask_test_cpu(cpu, &armpmu->supported_cpus); } -#endif - -const struct dev_pm_ops armpmu_dev_pm_ops = { - SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL) -}; static void armpmu_init(struct arm_pmu *armpmu) { @@ -539,15 +547,349 @@ static void armpmu_init(struct arm_pmu *armpmu) .start = armpmu_start, .stop = armpmu_stop, .read = armpmu_read, + .filter_match = armpmu_filter_match, }; } int armpmu_register(struct arm_pmu *armpmu, int type) { armpmu_init(armpmu); - pm_runtime_enable(&armpmu->plat_device->dev); pr_info("enabled with %s PMU driver, %d counters available\n", armpmu->name, armpmu->num_events); return perf_pmu_register(&armpmu->pmu, armpmu->name, type); } +/* Set at runtime when we know what CPU type we are. */ +static struct arm_pmu *__oprofile_cpu_pmu; + +/* + * Despite the names, these two functions are CPU-specific and are used + * by the OProfile/perf code. + */ +const char *perf_pmu_name(void) +{ + if (!__oprofile_cpu_pmu) + return NULL; + + return __oprofile_cpu_pmu->name; +} +EXPORT_SYMBOL_GPL(perf_pmu_name); + +int perf_num_counters(void) +{ + int max_events = 0; + + if (__oprofile_cpu_pmu != NULL) + max_events = __oprofile_cpu_pmu->num_events; + + return max_events; +} +EXPORT_SYMBOL_GPL(perf_num_counters); + +static void cpu_pmu_enable_percpu_irq(void *data) +{ + int irq = *(int *)data; + + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + +static void cpu_pmu_disable_percpu_irq(void *data) +{ + int irq = *(int *)data; + + disable_percpu_irq(irq); +} + +static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) +{ + int i, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); + free_percpu_irq(irq, &hw_events->percpu_pmu); + } else { + for (i = 0; i < irqs; ++i) { + int cpu = i; + + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + + if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) + continue; + irq = platform_get_irq(pmu_device, i); + if (irq >= 0) + free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + } + } +} + +static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) +{ + int i, err, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; + + if (!pmu_device) + return -ENODEV; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (irqs < 1) { + pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); + return 0; + } + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + err = request_percpu_irq(irq, handler, "arm-pmu", + &hw_events->percpu_pmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); + } else { + for (i = 0; i < irqs; ++i) { + int cpu = i; + + err = 0; + irq = platform_get_irq(pmu_device, i); + if (irq < 0) + continue; + + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + + /* + * If we have a single PMU interrupt that we can't shift, + * assume that we're running on a uniprocessor machine and + * continue. Otherwise, continue without this interrupt. + */ + if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { + pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, cpu); + continue; + } + + err = request_irq(irq, handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", + per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + + cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); + } + } + + return 0; +} + +/* + * PMU hardware loses all context when a CPU goes offline. + * When a CPU is hotplugged back in, since some hardware registers are + * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading + * junk values out of them. + */ +static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, + void *hcpu) +{ + int cpu = (unsigned long)hcpu; + struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); + + if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) + return NOTIFY_DONE; + + if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) + return NOTIFY_DONE; + + if (pmu->reset) + pmu->reset(pmu); + else + return NOTIFY_DONE; + + return NOTIFY_OK; +} + +static int cpu_pmu_init(struct arm_pmu *cpu_pmu) +{ + int err; + int cpu; + struct pmu_hw_events __percpu *cpu_hw_events; + + cpu_hw_events = alloc_percpu(struct pmu_hw_events); + if (!cpu_hw_events) + return -ENOMEM; + + cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; + err = register_cpu_notifier(&cpu_pmu->hotplug_nb); + if (err) + goto out_hw_events; + + for_each_possible_cpu(cpu) { + struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); + raw_spin_lock_init(&events->pmu_lock); + events->percpu_pmu = cpu_pmu; + } + + cpu_pmu->hw_events = cpu_hw_events; + cpu_pmu->request_irq = cpu_pmu_request_irq; + cpu_pmu->free_irq = cpu_pmu_free_irq; + + /* Ensure the PMU has sane values out of reset. */ + if (cpu_pmu->reset) + on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset, + cpu_pmu, 1); + + /* If no interrupts available, set the corresponding capability flag */ + if (!platform_get_irq(cpu_pmu->plat_device, 0)) + cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + + return 0; + +out_hw_events: + free_percpu(cpu_hw_events); + return err; +} + +static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) +{ + unregister_cpu_notifier(&cpu_pmu->hotplug_nb); + free_percpu(cpu_pmu->hw_events); +} + +/* + * CPU PMU identification and probing. + */ +static int probe_current_pmu(struct arm_pmu *pmu, + const struct pmu_probe_info *info) +{ + int cpu = get_cpu(); + unsigned int cpuid = read_cpuid_id(); + int ret = -ENODEV; + + pr_info("probing PMU on CPU %d\n", cpu); + + for (; info->init != NULL; info++) { + if ((cpuid & info->mask) != info->cpuid) + continue; + ret = info->init(pmu); + break; + } + + put_cpu(); + return ret; +} + +static int of_pmu_irq_cfg(struct arm_pmu *pmu) +{ + int i, irq, *irqs; + struct platform_device *pdev = pmu->plat_device; + + /* Don't bother with PPIs; they're already affine */ + irq = platform_get_irq(pdev, 0); + if (irq >= 0 && irq_is_percpu(irq)) + return 0; + + irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); + if (!irqs) + return -ENOMEM; + + for (i = 0; i < pdev->num_resources; ++i) { + struct device_node *dn; + int cpu; + + dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", + i); + if (!dn) { + pr_warn("Failed to parse %s/interrupt-affinity[%d]\n", + of_node_full_name(pdev->dev.of_node), i); + break; + } + + for_each_possible_cpu(cpu) + if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL)) + break; + + of_node_put(dn); + if (cpu >= nr_cpu_ids) { + pr_warn("Failed to find logical CPU for %s\n", + dn->name); + break; + } + + irqs[i] = cpu; + cpumask_set_cpu(cpu, &pmu->supported_cpus); + } + + if (i == pdev->num_resources) { + pmu->irq_affinity = irqs; + } else { + kfree(irqs); + cpumask_setall(&pmu->supported_cpus); + } + + return 0; +} + +int arm_pmu_device_probe(struct platform_device *pdev, + const struct of_device_id *of_table, + const struct pmu_probe_info *probe_table) +{ + const struct of_device_id *of_id; + const int (*init_fn)(struct arm_pmu *); + struct device_node *node = pdev->dev.of_node; + struct arm_pmu *pmu; + int ret = -ENODEV; + + pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); + if (!pmu) { + pr_info("failed to allocate PMU device!\n"); + return -ENOMEM; + } + + if (!__oprofile_cpu_pmu) + __oprofile_cpu_pmu = pmu; + + pmu->plat_device = pdev; + + if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { + init_fn = of_id->data; + + ret = of_pmu_irq_cfg(pmu); + if (!ret) + ret = init_fn(pmu); + } else { + ret = probe_current_pmu(pmu, probe_table); + cpumask_setall(&pmu->supported_cpus); + } + + if (ret) { + pr_info("failed to probe PMU!\n"); + goto out_free; + } + + ret = cpu_pmu_init(pmu); + if (ret) + goto out_free; + + ret = armpmu_register(pmu, -1); + if (ret) + goto out_destroy; + + return 0; + +out_destroy: + cpu_pmu_destroy(pmu); +out_free: + pr_info("failed to register PMU devices!\n"); + kfree(pmu); + return ret; +} diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c deleted file mode 100644 index 3b8c283..0000000 --- a/arch/arm/kernel/perf_event_cpu.c +++ /dev/null @@ -1,421 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon <will.deacon@arm.com> - */ -#define pr_fmt(fmt) "CPU PMU: " fmt - -#include <linux/bitmap.h> -#include <linux/export.h> -#include <linux/kernel.h> -#include <linux/of.h> -#include <linux/platform_device.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/irq.h> -#include <linux/irqdesc.h> - -#include <asm/cputype.h> -#include <asm/irq_regs.h> -#include <asm/pmu.h> - -/* Set at runtime when we know what CPU type we are. */ -static struct arm_pmu *cpu_pmu; - -/* - * Despite the names, these two functions are CPU-specific and are used - * by the OProfile/perf code. - */ -const char *perf_pmu_name(void) -{ - if (!cpu_pmu) - return NULL; - - return cpu_pmu->name; -} -EXPORT_SYMBOL_GPL(perf_pmu_name); - -int perf_num_counters(void) -{ - int max_events = 0; - - if (cpu_pmu != NULL) - max_events = cpu_pmu->num_events; - - return max_events; -} -EXPORT_SYMBOL_GPL(perf_num_counters); - -/* Include the PMU-specific implementations. */ -#include "perf_event_xscale.c" -#include "perf_event_v6.c" -#include "perf_event_v7.c" - -static void cpu_pmu_enable_percpu_irq(void *data) -{ - int irq = *(int *)data; - - enable_percpu_irq(irq, IRQ_TYPE_NONE); -} - -static void cpu_pmu_disable_percpu_irq(void *data) -{ - int irq = *(int *)data; - - disable_percpu_irq(irq); -} - -static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) -{ - int i, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - - irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { - on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); - free_percpu_irq(irq, &hw_events->percpu_pmu); - } else { - for (i = 0; i < irqs; ++i) { - int cpu = i; - - if (cpu_pmu->irq_affinity) - cpu = cpu_pmu->irq_affinity[i]; - - if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) - continue; - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); - } - } -} - -static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) -{ - int i, err, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; - - if (!pmu_device) - return -ENODEV; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (irqs < 1) { - pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); - return 0; - } - - irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { - err = request_percpu_irq(irq, handler, "arm-pmu", - &hw_events->percpu_pmu); - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - return err; - } - on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); - } else { - for (i = 0; i < irqs; ++i) { - int cpu = i; - - err = 0; - irq = platform_get_irq(pmu_device, i); - if (irq < 0) - continue; - - if (cpu_pmu->irq_affinity) - cpu = cpu_pmu->irq_affinity[i]; - - /* - * If we have a single PMU interrupt that we can't shift, - * assume that we're running on a uniprocessor machine and - * continue. Otherwise, continue without this interrupt. - */ - if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { - pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, cpu); - continue; - } - - err = request_irq(irq, handler, - IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", - per_cpu_ptr(&hw_events->percpu_pmu, cpu)); - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - return err; - } - - cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); - } - } - - return 0; -} - -/* - * PMU hardware loses all context when a CPU goes offline. - * When a CPU is hotplugged back in, since some hardware registers are - * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading - * junk values out of them. - */ -static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, - void *hcpu) -{ - struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); - - if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) - return NOTIFY_DONE; - - if (pmu->reset) - pmu->reset(pmu); - else - return NOTIFY_DONE; - - return NOTIFY_OK; -} - -static int cpu_pmu_init(struct arm_pmu *cpu_pmu) -{ - int err; - int cpu; - struct pmu_hw_events __percpu *cpu_hw_events; - - cpu_hw_events = alloc_percpu(struct pmu_hw_events); - if (!cpu_hw_events) - return -ENOMEM; - - cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; - err = register_cpu_notifier(&cpu_pmu->hotplug_nb); - if (err) - goto out_hw_events; - - for_each_possible_cpu(cpu) { - struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); - raw_spin_lock_init(&events->pmu_lock); - events->percpu_pmu = cpu_pmu; - } - - cpu_pmu->hw_events = cpu_hw_events; - cpu_pmu->request_irq = cpu_pmu_request_irq; - cpu_pmu->free_irq = cpu_pmu_free_irq; - - /* Ensure the PMU has sane values out of reset. */ - if (cpu_pmu->reset) - on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); - - /* If no interrupts available, set the corresponding capability flag */ - if (!platform_get_irq(cpu_pmu->plat_device, 0)) - cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; - - return 0; - -out_hw_events: - free_percpu(cpu_hw_events); - return err; -} - -static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) -{ - unregister_cpu_notifier(&cpu_pmu->hotplug_nb); - free_percpu(cpu_pmu->hw_events); -} - -/* - * PMU platform driver and devicetree bindings. - */ -static const struct of_device_id cpu_pmu_of_device_ids[] = { - {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init}, - {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init}, - {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init}, - {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init}, - {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init}, - {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init}, - {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init}, - {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init}, - {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, - {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, - {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, - {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init}, - {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init}, - {}, -}; - -static struct platform_device_id cpu_pmu_plat_device_ids[] = { - {.name = "arm-pmu"}, - {.name = "armv6-pmu"}, - {.name = "armv7-pmu"}, - {.name = "xscale-pmu"}, - {}, -}; - -static const struct pmu_probe_info pmu_probe_table[] = { - ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init), - XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init), - XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init), - { /* sentinel value */ } -}; - -/* - * CPU PMU identification and probing. - */ -static int probe_current_pmu(struct arm_pmu *pmu) -{ - int cpu = get_cpu(); - unsigned int cpuid = read_cpuid_id(); - int ret = -ENODEV; - const struct pmu_probe_info *info; - - pr_info("probing PMU on CPU %d\n", cpu); - - for (info = pmu_probe_table; info->init != NULL; info++) { - if ((cpuid & info->mask) != info->cpuid) - continue; - ret = info->init(pmu); - break; - } - - put_cpu(); - return ret; -} - -static int of_pmu_irq_cfg(struct platform_device *pdev) -{ - int i, irq; - int *irqs; - - /* Don't bother with PPIs; they're already affine */ - irq = platform_get_irq(pdev, 0); - if (irq >= 0 && irq_is_percpu(irq)) - return 0; - - irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); - if (!irqs) - return -ENOMEM; - - for (i = 0; i < pdev->num_resources; ++i) { - struct device_node *dn; - int cpu; - - dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", - i); - if (!dn) { - pr_warn("Failed to parse %s/interrupt-affinity[%d]\n", - of_node_full_name(pdev->dev.of_node), i); - break; - } - - for_each_possible_cpu(cpu) - if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL)) - break; - - of_node_put(dn); - if (cpu >= nr_cpu_ids) { - pr_warn("Failed to find logical CPU for %s\n", - dn->name); - break; - } - - irqs[i] = cpu; - } - - if (i == pdev->num_resources) - cpu_pmu->irq_affinity = irqs; - else - kfree(irqs); - - return 0; -} - -static int cpu_pmu_device_probe(struct platform_device *pdev) -{ - const struct of_device_id *of_id; - const int (*init_fn)(struct arm_pmu *); - struct device_node *node = pdev->dev.of_node; - struct arm_pmu *pmu; - int ret = -ENODEV; - - if (cpu_pmu) { - pr_info("attempt to register multiple PMU devices!\n"); - return -ENOSPC; - } - - pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); - if (!pmu) { - pr_info("failed to allocate PMU device!\n"); - return -ENOMEM; - } - - cpu_pmu = pmu; - cpu_pmu->plat_device = pdev; - - if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) { - init_fn = of_id->data; - - ret = of_pmu_irq_cfg(pdev); - if (!ret) - ret = init_fn(pmu); - } else { - ret = probe_current_pmu(pmu); - } - - if (ret) { - pr_info("failed to probe PMU!\n"); - goto out_free; - } - - ret = cpu_pmu_init(cpu_pmu); - if (ret) - goto out_free; - - ret = armpmu_register(cpu_pmu, -1); - if (ret) - goto out_destroy; - - return 0; - -out_destroy: - cpu_pmu_destroy(cpu_pmu); -out_free: - pr_info("failed to register PMU devices!\n"); - kfree(pmu); - return ret; -} - -static struct platform_driver cpu_pmu_driver = { - .driver = { - .name = "arm-pmu", - .pm = &armpmu_dev_pm_ops, - .of_match_table = cpu_pmu_of_device_ids, - }, - .probe = cpu_pmu_device_probe, - .id_table = cpu_pmu_plat_device_ids, -}; - -static int __init register_pmu_driver(void) -{ - return platform_driver_register(&cpu_pmu_driver); -} -device_initcall(register_pmu_driver); diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index f2ffd5c..09f83e4 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -31,6 +31,14 @@ */ #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) + +#include <asm/cputype.h> +#include <asm/irq_regs.h> +#include <asm/pmu.h> + +#include <linux/of.h> +#include <linux/platform_device.h> + enum armv6_perf_types { ARMV6_PERFCTR_ICACHE_MISS = 0x0, ARMV6_PERFCTR_IBUF_STALL = 0x1, @@ -543,24 +551,39 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) return 0; } -#else -static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} -static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} +static struct of_device_id armv6_pmu_of_device_ids[] = { + {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init}, + {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, + {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, + { /* sentinel value */ } +}; -static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu) +static const struct pmu_probe_info armv6_pmu_probe_table[] = { + ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init), + { /* sentinel value */ } +}; + +static int armv6_pmu_device_probe(struct platform_device *pdev) { - return -ENODEV; + return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids, + armv6_pmu_probe_table); } -static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) +static struct platform_driver armv6_pmu_driver = { + .driver = { + .name = "armv6-pmu", + .of_match_table = armv6_pmu_of_device_ids, + }, + .probe = armv6_pmu_device_probe, +}; + +static int __init register_armv6_pmu_driver(void) { - return -ENODEV; + return platform_driver_register(&armv6_pmu_driver); } +device_initcall(register_armv6_pmu_driver); #endif /* CONFIG_CPU_V6 || CONFIG_CPU_V6K */ diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index f4207a4..f9b37f8 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -19,9 +19,15 @@ #ifdef CONFIG_CPU_V7 #include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/irq_regs.h> +#include <asm/pmu.h> #include <asm/vfp.h> #include "../vfp/vfpinstr.h" +#include <linux/of.h> +#include <linux/platform_device.h> + /* * Common ARMv7 event types * @@ -1056,15 +1062,22 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->max_period = (1LLU << 32) - 1; }; -static u32 armv7_read_num_pmnc_events(void) +static void armv7_read_num_pmnc_events(void *info) { - u32 nb_cnt; + int *nb_cnt = info; /* Read the nb of CNTx counters supported from PMNC */ - nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; + *nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; - /* Add the CPU cycles counter and return */ - return nb_cnt + 1; + /* Add the CPU cycles counter */ + *nb_cnt += 1; +} + +static int armv7_probe_num_events(struct arm_pmu *arm_pmu) +{ + return smp_call_function_any(&arm_pmu->supported_cpus, + armv7_read_num_pmnc_events, + &arm_pmu->num_events, 1); } static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) @@ -1072,8 +1085,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a8"; cpu_pmu->map_event = armv7_a8_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) @@ -1081,8 +1093,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a9"; cpu_pmu->map_event = armv7_a9_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) @@ -1090,8 +1101,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a5"; cpu_pmu->map_event = armv7_a5_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) @@ -1099,9 +1109,8 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a15"; cpu_pmu->map_event = armv7_a15_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) @@ -1109,9 +1118,8 @@ static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a7"; cpu_pmu->map_event = armv7_a7_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) @@ -1119,16 +1127,15 @@ static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a12"; cpu_pmu->map_event = armv7_a12_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) { - armv7_a12_pmu_init(cpu_pmu); + int ret = armv7_a12_pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a17"; - return 0; + return ret; } /* @@ -1508,14 +1515,13 @@ static int krait_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->map_event = krait_map_event_no_branch; else cpu_pmu->map_event = krait_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; cpu_pmu->reset = krait_pmu_reset; cpu_pmu->enable = krait_pmu_enable_event; cpu_pmu->disable = krait_pmu_disable_event; cpu_pmu->get_event_idx = krait_pmu_get_event_idx; cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx; - return 0; + return armv7_probe_num_events(cpu_pmu); } /* @@ -1833,13 +1839,12 @@ static int scorpion_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_scorpion"; cpu_pmu->map_event = scorpion_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->reset = scorpion_pmu_reset; cpu_pmu->enable = scorpion_pmu_enable_event; cpu_pmu->disable = scorpion_pmu_disable_event; cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; - return 0; + return armv7_probe_num_events(cpu_pmu); } static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) @@ -1847,62 +1852,52 @@ static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_scorpion_mp"; cpu_pmu->map_event = scorpion_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->reset = scorpion_pmu_reset; cpu_pmu->enable = scorpion_pmu_enable_event; cpu_pmu->disable = scorpion_pmu_disable_event; cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; - return 0; -} -#else -static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} - -static inline int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} - -static inline int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} - -static inline int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} - -static inline int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} + return armv7_probe_num_events(cpu_pmu); +} + +static const struct of_device_id armv7_pmu_of_device_ids[] = { + {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init}, + {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init}, + {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init}, + {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init}, + {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init}, + {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init}, + {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init}, + {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, + {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init}, + {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init}, + {}, +}; -static inline int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} +static const struct pmu_probe_info armv7_pmu_probe_table[] = { + ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init), + { /* sentinel value */ } +}; -static inline int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} -static inline int krait_pmu_init(struct arm_pmu *cpu_pmu) +static int armv7_pmu_device_probe(struct platform_device *pdev) { - return -ENODEV; + return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids, + armv7_pmu_probe_table); } -static inline int scorpion_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} +static struct platform_driver armv7_pmu_driver = { + .driver = { + .name = "armv7-pmu", + .of_match_table = armv7_pmu_of_device_ids, + }, + .probe = armv7_pmu_device_probe, +}; -static inline int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) +static int __init register_armv7_pmu_driver(void) { - return -ENODEV; + return platform_driver_register(&armv7_pmu_driver); } +device_initcall(register_armv7_pmu_driver); #endif /* CONFIG_CPU_V7 */ diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 8af9f1f..304d056 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -13,6 +13,14 @@ */ #ifdef CONFIG_CPU_XSCALE + +#include <asm/cputype.h> +#include <asm/irq_regs.h> +#include <asm/pmu.h> + +#include <linux/of.h> +#include <linux/platform_device.h> + enum xscale_perf_types { XSCALE_PERFCTR_ICACHE_MISS = 0x00, XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, @@ -740,14 +748,28 @@ static int xscale2pmu_init(struct arm_pmu *cpu_pmu) return 0; } -#else -static inline int xscale1pmu_init(struct arm_pmu *cpu_pmu) + +static const struct pmu_probe_info xscale_pmu_probe_table[] = { + XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init), + XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init), + { /* sentinel value */ } +}; + +static int xscale_pmu_device_probe(struct platform_device *pdev) { - return -ENODEV; + return arm_pmu_device_probe(pdev, NULL, xscale_pmu_probe_table); } -static inline int xscale2pmu_init(struct arm_pmu *cpu_pmu) +static struct platform_driver xscale_pmu_driver = { + .driver = { + .name = "xscale-pmu", + }, + .probe = xscale_pmu_device_probe, +}; + +static int __init register_xscale_pmu_driver(void) { - return -ENODEV; + return platform_driver_register(&xscale_pmu_driver); } +device_initcall(register_xscale_pmu_driver); #endif /* CONFIG_CPU_XSCALE */ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 6c777e9..e6d8c76 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -75,8 +75,7 @@ __setup("fpe=", fpe_setup); extern void init_default_cache_policy(unsigned long); extern void paging_init(const struct machine_desc *desc); -extern void early_paging_init(const struct machine_desc *, - struct proc_info_list *); +extern void early_paging_init(const struct machine_desc *); extern void sanity_check_meminfo(void); extern enum reboot_mode reboot_mode; extern void setup_dma_zone(const struct machine_desc *desc); @@ -93,6 +92,9 @@ unsigned int __atags_pointer __initdata; unsigned int system_rev; EXPORT_SYMBOL(system_rev); +const char *system_serial; +EXPORT_SYMBOL(system_serial); + unsigned int system_serial_low; EXPORT_SYMBOL(system_serial_low); @@ -839,8 +841,25 @@ arch_initcall(customize_machine); static int __init init_machine_late(void) { + struct device_node *root; + int ret; + if (machine_desc->init_late) machine_desc->init_late(); + + root = of_find_node_by_path("/"); + if (root) { + ret = of_property_read_string(root, "serial-number", + &system_serial); + if (ret) + system_serial = NULL; + } + + if (!system_serial) + system_serial = kasprintf(GFP_KERNEL, "%08x%08x", + system_serial_high, + system_serial_low); + return 0; } late_initcall(init_machine_late); @@ -936,7 +955,9 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); - early_paging_init(mdesc, lookup_processor_type(read_cpuid_id())); +#ifdef CONFIG_MMU + early_paging_init(mdesc); +#endif setup_dma_zone(mdesc); sanity_check_meminfo(); arm_memblock_init(mdesc); @@ -1109,8 +1130,7 @@ static int c_show(struct seq_file *m, void *v) seq_printf(m, "Hardware\t: %s\n", machine_name); seq_printf(m, "Revision\t: %04x\n", system_rev); - seq_printf(m, "Serial\t\t: %08x%08x\n", - system_serial_high, system_serial_low); + seq_printf(m, "Serial\t\t: %s\n", system_serial); return 0; } diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 6060dbc..0f6c100 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -81,7 +81,7 @@ ENTRY(__cpu_suspend) mov r1, r4 @ size of save block add r0, sp, #8 @ pointer to save block bl __cpu_suspend_save - adr lr, BSYM(cpu_suspend_abort) + badr lr, cpu_suspend_abort ldmfd sp!, {r0, pc} @ call suspend fn ENDPROC(__cpu_suspend) .ltorg @@ -122,7 +122,7 @@ ENDPROC(cpu_resume_after_mmu) #ifdef CONFIG_MMU .arm ENTRY(cpu_resume_arm) - THUMB( adr r9, BSYM(1f) ) @ Kernel is entered in ARM. + THUMB( badr r9, 1f ) @ Kernel is entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index cca5b87..90dfbed 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -86,9 +86,11 @@ void __init smp_set_ops(struct smp_operations *ops) static unsigned long get_arch_pgd(pgd_t *pgd) { - phys_addr_t pgdir = virt_to_idmap(pgd); - BUG_ON(pgdir & ARCH_PGD_MASK); - return pgdir >> ARCH_PGD_SHIFT; +#ifdef CONFIG_ARM_LPAE + return __phys_to_pfn(virt_to_phys(pgd)); +#else + return virt_to_phys(pgd); +#endif } int __cpu_up(unsigned int cpu, struct task_struct *idle) @@ -108,7 +110,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) #endif #ifdef CONFIG_MMU - secondary_data.pgdir = get_arch_pgd(idmap_pgd); + secondary_data.pgdir = virt_to_phys(idmap_pgd); secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir); #endif sync_cache_w(&secondary_data); diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c index 7a3be1d..b10e136 100644 --- a/arch/arm/kernel/tcm.c +++ b/arch/arm/kernel/tcm.c @@ -17,6 +17,9 @@ #include <asm/mach/map.h> #include <asm/memory.h> #include <asm/system_info.h> +#include <asm/traps.h> + +#define TCMTR_FORMAT_MASK 0xe0000000U static struct gen_pool *tcm_pool; static bool dtcm_present; @@ -176,6 +179,77 @@ static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks, } /* + * When we are running in the non-secure world and the secure world + * has not explicitly given us access to the TCM we will get an + * undefined error when reading the TCM region register in the + * setup_tcm_bank function (above). + * + * There are two variants of this register read that we need to trap, + * the read for the data TCM and the read for the instruction TCM: + * c0370628: ee196f11 mrc 15, 0, r6, cr9, cr1, {0} + * c0370674: ee196f31 mrc 15, 0, r6, cr9, cr1, {1} + * + * Our undef hook mask explicitly matches all fields of the encoded + * instruction other than the destination register. The mask also + * only allows operand 2 to have the values 0 or 1. + * + * The undefined hook is defined as __init and __initdata, and therefore + * must be removed before tcm_init returns. + * + * In this particular case (MRC with ARM condition code ALways) the + * Thumb-2 and ARM instruction encoding are identical, so this hook + * will work on a Thumb-2 kernel. + * + * See A8.8.107, DDI0406C_C ARM Architecture Reference Manual, Encoding + * T1/A1 for the bit-by-bit details. + * + * mrc p15, 0, XX, c9, c1, 0 + * mrc p15, 0, XX, c9, c1, 1 + * | | | | | | | +---- opc2 0|1 = 000|001 + * | | | | | | +------- CRm 0 = 0001 + * | | | | | +----------- CRn 0 = 1001 + * | | | | +--------------- Rt ? = ???? + * | | | +------------------- opc1 0 = 000 + * | | +----------------------- coproc 15 = 1111 + * | +-------------------------- condition ALways = 1110 + * +----------------------------- instruction MRC = 1110 + * + * Encoding this as per A8.8.107 of DDI0406C, Encoding T1/A1, yields: + * 1111 1111 1111 1111 0000 1111 1101 1111 Required Mask + * 1110 1110 0001 1001 ???? 1111 0001 0001 mrc p15, 0, XX, c9, c1, 0 + * 1110 1110 0001 1001 ???? 1111 0011 0001 mrc p15, 0, XX, c9, c1, 1 + * [ ] [ ] [ ]| [ ] [ ] [ ] [ ]| +--- CRm + * | | | | | | | | +----- SBO + * | | | | | | | +------- opc2 + * | | | | | | +----------- coproc + * | | | | | +---------------- Rt + * | | | | +--------------------- CRn + * | | | +------------------------- SBO + * | | +--------------------------- opc1 + * | +------------------------------- instruction + * +------------------------------------ condition + */ +#define TCM_REGION_READ_MASK 0xffff0fdf +#define TCM_REGION_READ_INSTR 0xee190f11 +#define DEST_REG_SHIFT 12 +#define DEST_REG_MASK 0xf + +static int __init tcm_handler(struct pt_regs *regs, unsigned int instr) +{ + regs->uregs[(instr >> DEST_REG_SHIFT) & DEST_REG_MASK] = 0; + regs->ARM_pc += 4; + return 0; +} + +static struct undef_hook tcm_hook __initdata = { + .instr_mask = TCM_REGION_READ_MASK, + .instr_val = TCM_REGION_READ_INSTR, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, + .fn = tcm_handler +}; + +/* * This initializes the TCM memory */ void __init tcm_init(void) @@ -204,9 +278,18 @@ void __init tcm_init(void) } tcm_status = read_cpuid_tcmstatus(); + + /* + * This code only supports v6-compatible TCMTR implementations. + */ + if (tcm_status & TCMTR_FORMAT_MASK) + return; + dtcm_banks = (tcm_status >> 16) & 0x03; itcm_banks = (tcm_status & 0x03); + register_undef_hook(&tcm_hook); + /* Values greater than 2 for D/ITCM banks are "reserved" */ if (dtcm_banks > 2) dtcm_banks = 0; @@ -218,7 +301,7 @@ void __init tcm_init(void) for (i = 0; i < dtcm_banks; i++) { ret = setup_tcm_bank(0, i, dtcm_banks, &dtcm_end); if (ret) - return; + goto unregister; } /* This means you compiled more code than fits into DTCM */ if (dtcm_code_sz > (dtcm_end - DTCM_OFFSET)) { @@ -227,6 +310,12 @@ void __init tcm_init(void) dtcm_code_sz, (dtcm_end - DTCM_OFFSET)); goto no_dtcm; } + /* + * This means that the DTCM sizes were 0 or the DTCM banks + * were inaccessible due to TrustZone configuration. + */ + if (!(dtcm_end - DTCM_OFFSET)) + goto no_dtcm; dtcm_res.end = dtcm_end - 1; request_resource(&iomem_resource, &dtcm_res); dtcm_iomap[0].length = dtcm_end - DTCM_OFFSET; @@ -250,15 +339,21 @@ no_dtcm: for (i = 0; i < itcm_banks; i++) { ret = setup_tcm_bank(1, i, itcm_banks, &itcm_end); if (ret) - return; + goto unregister; } /* This means you compiled more code than fits into ITCM */ if (itcm_code_sz > (itcm_end - ITCM_OFFSET)) { pr_info("CPU ITCM: %u bytes of code compiled to " "ITCM but only %lu bytes of ITCM present\n", itcm_code_sz, (itcm_end - ITCM_OFFSET)); - return; + goto unregister; } + /* + * This means that the ITCM sizes were 0 or the ITCM banks + * were inaccessible due to TrustZone configuration. + */ + if (!(itcm_end - ITCM_OFFSET)) + goto unregister; itcm_res.end = itcm_end - 1; request_resource(&iomem_resource, &itcm_res); itcm_iomap[0].length = itcm_end - ITCM_OFFSET; @@ -275,6 +370,9 @@ no_dtcm: pr_info("CPU ITCM: %u bytes of code compiled to ITCM but no " "ITCM banks present in CPU\n", itcm_code_sz); } + +unregister: + unregister_undef_hook(&tcm_hook); } /* diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 3dce1a3..d358226 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -749,14 +749,6 @@ late_initcall(arm_mrc_hook_init); #endif -void __bad_xchg(volatile void *ptr, int size) -{ - pr_err("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n", - __builtin_return_address(0), ptr, size); - BUG(); -} -EXPORT_SYMBOL(__bad_xchg); - /* * A data abort trap was taken, but we did not handle the instruction. * Try to abort the user program, or panic if it was the kernel. diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index f7db3a5..568494d 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -307,7 +307,7 @@ ENTRY(kvm_call_hyp) THUMB( orr r2, r2, #PSR_T_BIT ) msr spsr_cxsf, r2 mrs r1, ELR_hyp - ldr r2, =BSYM(panic) + ldr r2, =panic msr ELR_hyp, r2 ldr r0, =\panic_str clrex @ Clear exclusive monitor diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S index ed1a421..bf3a408 100644 --- a/arch/arm/lib/call_with_stack.S +++ b/arch/arm/lib/call_with_stack.S @@ -35,7 +35,7 @@ ENTRY(call_with_stack) mov r2, r0 mov r0, r1 - adr lr, BSYM(1f) + badr lr, 1f ret r2 1: ldr lr, [sp] diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c index 96866d0..f572219 100644 --- a/arch/arm/mach-exynos/suspend.c +++ b/arch/arm/mach-exynos/suspend.c @@ -311,13 +311,7 @@ static int exynos5420_cpu_suspend(unsigned long arg) if (IS_ENABLED(CONFIG_EXYNOS5420_MCPM)) { mcpm_set_entry_vector(cpu, cluster, exynos_cpu_resume); - - /* - * Residency value passed to mcpm_cpu_suspend back-end - * has to be given clear semantics. Set to 0 as a - * temporary value. - */ - mcpm_cpu_suspend(0); + mcpm_cpu_suspend(); } pr_info("Failed to suspend the system\n"); diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c index 280f3f1..b5f8f5f 100644 --- a/arch/arm/mach-hisi/platmcpm.c +++ b/arch/arm/mach-hisi/platmcpm.c @@ -6,6 +6,8 @@ * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. */ +#include <linux/init.h> +#include <linux/smp.h> #include <linux/delay.h> #include <linux/io.h> #include <linux/memblock.h> @@ -13,7 +15,9 @@ #include <asm/cputype.h> #include <asm/cp15.h> -#include <asm/mcpm.h> +#include <asm/cacheflush.h> +#include <asm/smp.h> +#include <asm/smp_plat.h> #include "core.h" @@ -94,11 +98,16 @@ static void hip04_set_snoop_filter(unsigned int cluster, unsigned int on) } while (data != readl_relaxed(fabric + FAB_SF_MODE)); } -static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster) +static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle) { + unsigned int mpidr, cpu, cluster; unsigned long data; void __iomem *sys_dreq, *sys_status; + mpidr = cpu_logical_map(l_cpu); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + if (!sysctrl) return -ENODEV; if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) @@ -118,6 +127,7 @@ static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster) cpu_relax(); data = readl_relaxed(sys_status); } while (data & CLUSTER_DEBUG_RESET_STATUS); + hip04_set_snoop_filter(cluster, 1); } data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \ @@ -126,11 +136,15 @@ static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster) do { cpu_relax(); } while (data == readl_relaxed(sys_status)); + /* * We may fail to power up core again without this delay. * It's not mentioned in document. It's found by test. */ udelay(20); + + arch_send_wakeup_ipi_mask(cpumask_of(l_cpu)); + out: hip04_cpu_table[cluster][cpu]++; spin_unlock_irq(&boot_lock); @@ -138,31 +152,30 @@ out: return 0; } -static void hip04_mcpm_power_down(void) +#ifdef CONFIG_HOTPLUG_CPU +static void hip04_cpu_die(unsigned int l_cpu) { unsigned int mpidr, cpu, cluster; - bool skip_wfi = false, last_man = false; + bool last_man; - mpidr = read_cpuid_mpidr(); + mpidr = cpu_logical_map(l_cpu); cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); - __mcpm_cpu_going_down(cpu, cluster); - spin_lock(&boot_lock); - BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); hip04_cpu_table[cluster][cpu]--; if (hip04_cpu_table[cluster][cpu] == 1) { /* A power_up request went ahead of us. */ - skip_wfi = true; + spin_unlock(&boot_lock); + return; } else if (hip04_cpu_table[cluster][cpu] > 1) { pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu); BUG(); } last_man = hip04_cluster_is_down(cluster); - if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { - spin_unlock(&boot_lock); + spin_unlock(&boot_lock); + if (last_man) { /* Since it's Cortex A15, disable L2 prefetching. */ asm volatile( "mcr p15, 1, %0, c15, c0, 3 \n\t" @@ -170,34 +183,30 @@ static void hip04_mcpm_power_down(void) "dsb " : : "r" (0x400) ); v7_exit_coherency_flush(all); - hip04_set_snoop_filter(cluster, 0); - __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN); } else { - spin_unlock(&boot_lock); v7_exit_coherency_flush(louis); } - __mcpm_cpu_down(cpu, cluster); - - if (!skip_wfi) + for (;;) wfi(); } -static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster) +static int hip04_cpu_kill(unsigned int l_cpu) { + unsigned int mpidr, cpu, cluster; unsigned int data, tries, count; - int ret = -ETIMEDOUT; + mpidr = cpu_logical_map(l_cpu); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); BUG_ON(cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER); count = TIMEOUT_MSEC / POLL_MSEC; spin_lock_irq(&boot_lock); for (tries = 0; tries < count; tries++) { - if (hip04_cpu_table[cluster][cpu]) { - ret = -EBUSY; + if (hip04_cpu_table[cluster][cpu]) goto err; - } cpu_relax(); data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster)); if (data & CORE_WFI_STATUS(cpu)) @@ -220,64 +229,22 @@ static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster) } if (tries >= count) goto err; + if (hip04_cluster_is_down(cluster)) + hip04_set_snoop_filter(cluster, 0); spin_unlock_irq(&boot_lock); - return 0; + return 1; err: spin_unlock_irq(&boot_lock); - return ret; -} - -static void hip04_mcpm_powered_up(void) -{ - unsigned int mpidr, cpu, cluster; - - mpidr = read_cpuid_mpidr(); - cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); - - spin_lock(&boot_lock); - if (!hip04_cpu_table[cluster][cpu]) - hip04_cpu_table[cluster][cpu] = 1; - spin_unlock(&boot_lock); -} - -static void __naked hip04_mcpm_power_up_setup(unsigned int affinity_level) -{ - asm volatile (" \n" -" cmp r0, #0 \n" -" bxeq lr \n" - /* calculate fabric phys address */ -" adr r2, 2f \n" -" ldmia r2, {r1, r3} \n" -" sub r0, r2, r1 \n" -" ldr r2, [r0, r3] \n" - /* get cluster id from MPIDR */ -" mrc p15, 0, r0, c0, c0, 5 \n" -" ubfx r1, r0, #8, #8 \n" - /* 1 << cluster id */ -" mov r0, #1 \n" -" mov r3, r0, lsl r1 \n" -" ldr r0, [r2, #"__stringify(FAB_SF_MODE)"] \n" -" tst r0, r3 \n" -" bxne lr \n" -" orr r1, r0, r3 \n" -" str r1, [r2, #"__stringify(FAB_SF_MODE)"] \n" -"1: ldr r0, [r2, #"__stringify(FAB_SF_MODE)"] \n" -" tst r0, r3 \n" -" beq 1b \n" -" bx lr \n" - -" .align 2 \n" -"2: .word . \n" -" .word fabric_phys_addr \n" - ); + return 0; } - -static const struct mcpm_platform_ops hip04_mcpm_ops = { - .power_up = hip04_mcpm_power_up, - .power_down = hip04_mcpm_power_down, - .wait_for_powerdown = hip04_mcpm_wait_for_powerdown, - .powered_up = hip04_mcpm_powered_up, +#endif + +static struct smp_operations __initdata hip04_smp_ops = { + .smp_boot_secondary = hip04_boot_secondary, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_die = hip04_cpu_die, + .cpu_kill = hip04_cpu_kill, +#endif }; static bool __init hip04_cpu_table_init(void) @@ -298,7 +265,7 @@ static bool __init hip04_cpu_table_init(void) return true; } -static int __init hip04_mcpm_init(void) +static int __init hip04_smp_init(void) { struct device_node *np, *np_sctl, *np_fab; struct resource fab_res; @@ -353,10 +320,6 @@ static int __init hip04_mcpm_init(void) ret = -EINVAL; goto err_table; } - ret = mcpm_platform_register(&hip04_mcpm_ops); - if (ret) { - goto err_table; - } /* * Fill the instruction address that is used after secondary core @@ -364,13 +327,11 @@ static int __init hip04_mcpm_init(void) */ writel_relaxed(hip04_boot_method[0], relocation); writel_relaxed(0xa5a5a5a5, relocation + 4); /* magic number */ - writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8); + writel_relaxed(virt_to_phys(secondary_startup), relocation + 8); writel_relaxed(0, relocation + 12); iounmap(relocation); - mcpm_sync_init(hip04_mcpm_power_up_setup); - mcpm_smp_set_ops(); - pr_info("HiP04 MCPM initialized\n"); + smp_set_ops(&hip04_smp_ops); return ret; err_table: iounmap(fabric); @@ -383,4 +344,4 @@ err_reloc: err: return ret; } -early_initcall(hip04_mcpm_init); +early_initcall(hip04_smp_init); diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c index 30003ba..5b0e363 100644 --- a/arch/arm/mach-integrator/integrator_ap.c +++ b/arch/arm/mach-integrator/integrator_ap.c @@ -37,7 +37,6 @@ #include <linux/stat.h> #include <linux/termios.h> -#include <asm/hardware/arm_timer.h> #include <asm/setup.h> #include <asm/param.h> /* HZ */ #include <asm/mach-types.h> diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c index 0662087..e288010 100644 --- a/arch/arm/mach-keystone/keystone.c +++ b/arch/arm/mach-keystone/keystone.c @@ -27,7 +27,6 @@ #include "keystone.h" -static struct notifier_block platform_nb; static unsigned long keystone_dma_pfn_offset __read_mostly; static int keystone_platform_notifier(struct notifier_block *nb, @@ -49,11 +48,18 @@ static int keystone_platform_notifier(struct notifier_block *nb, return NOTIFY_OK; } +static struct notifier_block platform_nb = { + .notifier_call = keystone_platform_notifier, +}; + static void __init keystone_init(void) { - keystone_pm_runtime_init(); - if (platform_nb.notifier_call) + if (PHYS_OFFSET >= KEYSTONE_HIGH_PHYS_START) { + keystone_dma_pfn_offset = PFN_DOWN(KEYSTONE_HIGH_PHYS_START - + KEYSTONE_LOW_PHYS_START); bus_register_notifier(&platform_bus_type, &platform_nb); + } + keystone_pm_runtime_init(); of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); } @@ -62,11 +68,9 @@ static phys_addr_t keystone_virt_to_idmap(unsigned long x) return (phys_addr_t)(x) - CONFIG_PAGE_OFFSET + KEYSTONE_LOW_PHYS_START; } -static void __init keystone_init_meminfo(void) +static long long __init keystone_pv_fixup(void) { - bool lpae = IS_ENABLED(CONFIG_ARM_LPAE); - bool pvpatch = IS_ENABLED(CONFIG_ARM_PATCH_PHYS_VIRT); - phys_addr_t offset = PHYS_OFFSET - KEYSTONE_LOW_PHYS_START; + long long offset; phys_addr_t mem_start, mem_end; mem_start = memblock_start_of_DRAM(); @@ -75,32 +79,21 @@ static void __init keystone_init_meminfo(void) /* nothing to do if we are running out of the <32-bit space */ if (mem_start >= KEYSTONE_LOW_PHYS_START && mem_end <= KEYSTONE_LOW_PHYS_END) - return; - - if (!lpae || !pvpatch) { - pr_crit("Enable %s%s%s to run outside 32-bit space\n", - !lpae ? __stringify(CONFIG_ARM_LPAE) : "", - (!lpae && !pvpatch) ? " and " : "", - !pvpatch ? __stringify(CONFIG_ARM_PATCH_PHYS_VIRT) : ""); - } + return 0; if (mem_start < KEYSTONE_HIGH_PHYS_START || mem_end > KEYSTONE_HIGH_PHYS_END) { pr_crit("Invalid address space for memory (%08llx-%08llx)\n", - (u64)mem_start, (u64)mem_end); + (u64)mem_start, (u64)mem_end); + return 0; } - offset += KEYSTONE_HIGH_PHYS_START; - __pv_phys_pfn_offset = PFN_DOWN(offset); - __pv_offset = (offset - PAGE_OFFSET); + offset = KEYSTONE_HIGH_PHYS_START - KEYSTONE_LOW_PHYS_START; /* Populate the arch idmap hook */ arch_virt_to_idmap = keystone_virt_to_idmap; - platform_nb.notifier_call = keystone_platform_notifier; - keystone_dma_pfn_offset = PFN_DOWN(KEYSTONE_HIGH_PHYS_START - - KEYSTONE_LOW_PHYS_START); - pr_info("Switching to high address space at 0x%llx\n", (u64)offset); + return offset; } static const char *const keystone_match[] __initconst = { @@ -115,5 +108,5 @@ DT_MACHINE_START(KEYSTONE, "Keystone") .smp = smp_ops(keystone_smp_ops), .init_machine = keystone_init, .dt_compat = keystone_match, - .init_meminfo = keystone_init_meminfo, + .pv_fixup = keystone_pv_fixup, MACHINE_END diff --git a/arch/arm/mach-keystone/platsmp.c b/arch/arm/mach-keystone/platsmp.c index 5f46a7c..4bbb184 100644 --- a/arch/arm/mach-keystone/platsmp.c +++ b/arch/arm/mach-keystone/platsmp.c @@ -39,19 +39,6 @@ static int keystone_smp_boot_secondary(unsigned int cpu, return error; } -#ifdef CONFIG_ARM_LPAE -static void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu) -{ - pgd_t *pgd0 = pgd_offset_k(0); - cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); - local_flush_tlb_all(); -} -#else -static inline void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu) -{} -#endif - struct smp_operations keystone_smp_ops __initdata = { .smp_boot_secondary = keystone_smp_boot_secondary, - .smp_secondary_init = keystone_smp_secondary_initmem, }; diff --git a/arch/arm/mach-nspire/nspire.c b/arch/arm/mach-nspire/nspire.c index 3445a56..34c2a1b3 100644 --- a/arch/arm/mach-nspire/nspire.c +++ b/arch/arm/mach-nspire/nspire.c @@ -22,8 +22,6 @@ #include <asm/mach-types.h> #include <asm/mach/map.h> -#include <asm/hardware/timer-sp.h> - #include "mmio.h" #include "clcd.h" diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index c309593..44575ed 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -35,20 +35,19 @@ #include <linux/mtd/physmap.h> #include <linux/memblock.h> +#include <clocksource/timer-sp804.h> + #include <mach/hardware.h> #include <asm/irq.h> #include <asm/mach-types.h> -#include <asm/hardware/arm_timer.h> #include <asm/hardware/icst.h> #include <asm/mach/arch.h> #include <asm/mach/irq.h> #include <asm/mach/map.h> - #include <mach/platform.h> #include <mach/irqs.h> -#include <asm/hardware/timer-sp.h> #include <plat/sched_clock.h> @@ -381,10 +380,10 @@ void __init realview_timer_init(unsigned int timer_irq) /* * Initialise to a known state (all timers off) */ - writel(0, timer0_va_base + TIMER_CTRL); - writel(0, timer1_va_base + TIMER_CTRL); - writel(0, timer2_va_base + TIMER_CTRL); - writel(0, timer3_va_base + TIMER_CTRL); + sp804_timer_disable(timer0_va_base); + sp804_timer_disable(timer1_va_base); + sp804_timer_disable(timer2_va_base); + sp804_timer_disable(timer3_va_base); sp804_clocksource_init(timer3_va_base, "timer3"); sp804_clockevents_init(timer0_va_base, timer_irq, "timer0"); diff --git a/arch/arm/mach-sa1100/Makefile b/arch/arm/mach-sa1100/Makefile index 61ff91e..ebc4d58 100644 --- a/arch/arm/mach-sa1100/Makefile +++ b/arch/arm/mach-sa1100/Makefile @@ -3,7 +3,7 @@ # # Common support -obj-y := clock.o generic.o irq.o #nmi-oopser.o +obj-y := clock.o generic.o #nmi-oopser.o # Specific board support obj-$(CONFIG_SA1100_ASSABET) += assabet.o diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 40e0d86..345e63f 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -20,9 +20,12 @@ #include <linux/ioport.h> #include <linux/platform_device.h> #include <linux/reboot.h> +#include <linux/irqchip/irq-sa11x0.h> #include <video/sa1100fb.h> +#include <soc/sa1100/pwer.h> + #include <asm/div64.h> #include <asm/mach/map.h> #include <asm/mach/flash.h> @@ -375,6 +378,18 @@ void __init sa1100_timer_init(void) pxa_timer_nodt_init(IRQ_OST0, io_p2v(0x90000000), 3686400); } +static struct resource irq_resource = + DEFINE_RES_MEM_NAMED(0x90050000, SZ_64K, "irqs"); + +void __init sa1100_init_irq(void) +{ + request_resource(&iomem_resource, &irq_resource); + + sa11x0_init_irq_nodt(IRQ_GPIO0_SC, irq_resource.start); + + sa1100_init_gpio(); +} + /* * Disable the memory bus request/grant signals on the SA1110 to * ensure that we don't receive spurious memory requests. We set @@ -416,3 +431,25 @@ void sa1110_mb_enable(void) local_irq_restore(flags); } +int sa11x0_gpio_set_wake(unsigned int gpio, unsigned int on) +{ + if (on) + PWER |= BIT(gpio); + else + PWER &= ~BIT(gpio); + + return 0; +} + +int sa11x0_sc_set_wake(unsigned int irq, unsigned int on) +{ + if (BIT(irq) != IC_RTCAlrm) + return -EINVAL; + + if (on) + PWER |= PWER_RTC; + else + PWER &= ~PWER_RTC; + + return 0; +} diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c deleted file mode 100644 index 65aebfa..0000000 --- a/arch/arm/mach-sa1100/irq.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * linux/arch/arm/mach-sa1100/irq.c - * - * Copyright (C) 1999-2001 Nicolas Pitre - * - * Generic IRQ handling for the SA11x0, GPIO 11-27 IRQ demultiplexing. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/irq.h> -#include <linux/irqdomain.h> -#include <linux/ioport.h> -#include <linux/syscore_ops.h> - -#include <mach/hardware.h> -#include <mach/irqs.h> -#include <asm/mach/irq.h> -#include <asm/exception.h> - -#include "generic.h" - - -/* - * We don't need to ACK IRQs on the SA1100 unless they're GPIOs - * this is for internal IRQs i.e. from IRQ LCD to RTCAlrm. - */ -static void sa1100_mask_irq(struct irq_data *d) -{ - ICMR &= ~BIT(d->hwirq); -} - -static void sa1100_unmask_irq(struct irq_data *d) -{ - ICMR |= BIT(d->hwirq); -} - -/* - * Apart form GPIOs, only the RTC alarm can be a wakeup event. - */ -static int sa1100_set_wake(struct irq_data *d, unsigned int on) -{ - if (BIT(d->hwirq) == IC_RTCAlrm) { - if (on) - PWER |= PWER_RTC; - else - PWER &= ~PWER_RTC; - return 0; - } - return -EINVAL; -} - -static struct irq_chip sa1100_normal_chip = { - .name = "SC", - .irq_ack = sa1100_mask_irq, - .irq_mask = sa1100_mask_irq, - .irq_unmask = sa1100_unmask_irq, - .irq_set_wake = sa1100_set_wake, -}; - -static int sa1100_normal_irqdomain_map(struct irq_domain *d, - unsigned int irq, irq_hw_number_t hwirq) -{ - irq_set_chip_and_handler(irq, &sa1100_normal_chip, - handle_level_irq); - set_irq_flags(irq, IRQF_VALID); - - return 0; -} - -static struct irq_domain_ops sa1100_normal_irqdomain_ops = { - .map = sa1100_normal_irqdomain_map, - .xlate = irq_domain_xlate_onetwocell, -}; - -static struct irq_domain *sa1100_normal_irqdomain; - -static struct resource irq_resource = - DEFINE_RES_MEM_NAMED(0x90050000, SZ_64K, "irqs"); - -static struct sa1100irq_state { - unsigned int saved; - unsigned int icmr; - unsigned int iclr; - unsigned int iccr; -} sa1100irq_state; - -static int sa1100irq_suspend(void) -{ - struct sa1100irq_state *st = &sa1100irq_state; - - st->saved = 1; - st->icmr = ICMR; - st->iclr = ICLR; - st->iccr = ICCR; - - /* - * Disable all GPIO-based interrupts. - */ - ICMR &= ~(IC_GPIO11_27|IC_GPIO10|IC_GPIO9|IC_GPIO8|IC_GPIO7| - IC_GPIO6|IC_GPIO5|IC_GPIO4|IC_GPIO3|IC_GPIO2| - IC_GPIO1|IC_GPIO0); - - return 0; -} - -static void sa1100irq_resume(void) -{ - struct sa1100irq_state *st = &sa1100irq_state; - - if (st->saved) { - ICCR = st->iccr; - ICLR = st->iclr; - - ICMR = st->icmr; - } -} - -static struct syscore_ops sa1100irq_syscore_ops = { - .suspend = sa1100irq_suspend, - .resume = sa1100irq_resume, -}; - -static int __init sa1100irq_init_devicefs(void) -{ - register_syscore_ops(&sa1100irq_syscore_ops); - return 0; -} - -device_initcall(sa1100irq_init_devicefs); - -static asmlinkage void __exception_irq_entry -sa1100_handle_irq(struct pt_regs *regs) -{ - uint32_t icip, icmr, mask; - - do { - icip = (ICIP); - icmr = (ICMR); - mask = icip & icmr; - - if (mask == 0) - break; - - handle_domain_irq(sa1100_normal_irqdomain, - ffs(mask) - 1, regs); - } while (1); -} - -void __init sa1100_init_irq(void) -{ - request_resource(&iomem_resource, &irq_resource); - - /* disable all IRQs */ - ICMR = 0; - - /* all IRQs are IRQ, not FIQ */ - ICLR = 0; - - /* - * Whatever the doc says, this has to be set for the wait-on-irq - * instruction to work... on a SA1100 rev 9 at least. - */ - ICCR = 1; - - sa1100_normal_irqdomain = irq_domain_add_simple(NULL, - 32, IRQ_GPIO0_SC, - &sa1100_normal_irqdomain_ops, NULL); - - set_handle_irq(sa1100_handle_irq); - - sa1100_init_gpio(); -} diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index 6ea09fe..23a04fe 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -41,8 +41,9 @@ #include <linux/bitops.h> #include <linux/reboot.h> +#include <clocksource/timer-sp804.h> + #include <asm/irq.h> -#include <asm/hardware/arm_timer.h> #include <asm/hardware/icst.h> #include <asm/mach-types.h> @@ -52,7 +53,6 @@ #include <asm/mach/map.h> #include <mach/hardware.h> #include <mach/platform.h> -#include <asm/hardware/timer-sp.h> #include <plat/sched_clock.h> @@ -798,10 +798,10 @@ void __init versatile_timer_init(void) /* * Initialise to a known state (all timers off) */ - writel(0, TIMER0_VA_BASE + TIMER_CTRL); - writel(0, TIMER1_VA_BASE + TIMER_CTRL); - writel(0, TIMER2_VA_BASE + TIMER_CTRL); - writel(0, TIMER3_VA_BASE + TIMER_CTRL); + sp804_timer_disable(TIMER0_VA_BASE); + sp804_timer_disable(TIMER1_VA_BASE); + sp804_timer_disable(TIMER2_VA_BASE); + sp804_timer_disable(TIMER3_VA_BASE); sp804_clocksource_init(TIMER3_VA_BASE, "timer3"); sp804_clockevents_init(TIMER0_VA_BASE, IRQ_TIMERINT0_1, "timer0"); diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index b4f92b9..7c6b976 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -6,7 +6,7 @@ comment "Processor Type" # ARM7TDMI config CPU_ARM7TDMI - bool "Support ARM7TDMI processor" + bool depends on !MMU select CPU_32v4T select CPU_ABRT_LV4T @@ -56,7 +56,7 @@ config CPU_ARM740T # ARM9TDMI config CPU_ARM9TDMI - bool "Support ARM9TDMI processor" + bool depends on !MMU select CPU_32v4T select CPU_ABRT_NOMMU @@ -604,6 +604,22 @@ config CPU_USE_DOMAINS This option enables or disables the use of domain switching via the set_fs() function. +config CPU_V7M_NUM_IRQ + int "Number of external interrupts connected to the NVIC" + depends on CPU_V7M + default 90 if ARCH_STM32 + default 38 if ARCH_EFM32 + default 112 if SOC_VF610 + default 240 + help + This option indicates the number of interrupts connected to the NVIC. + The value can be larger than the real number of interrupts supported + by the system, but must not be lower. + The default value is 240, corresponding to the maximum number of + interrupts supported by the NVIC on Cortex-M family. + + If unsure, keep default value. + # # CPU supports 36-bit I/O # @@ -624,6 +640,10 @@ config ARM_LPAE If unsure, say N. +config ARM_PV_FIXUP + def_bool y + depends on ARM_LPAE && ARM_PATCH_PHYS_VIRT && ARCH_KEYSTONE + config ARCH_PHYS_ADDR_T_64BIT def_bool ARM_LPAE diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index d3afdf9..57c8df5 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_MODULES) += proc-syms.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_ARM_PV_FIXUP) += pv-fixup-asm.o obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o @@ -55,6 +56,8 @@ obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o +CFLAGS_copypage-feroceon.o := -march=armv5te + obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index e309c8f..71b3d33 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -38,10 +38,11 @@ struct l2c_init_data { unsigned way_size_0; unsigned num_lock; void (*of_parse)(const struct device_node *, u32 *, u32 *); - void (*enable)(void __iomem *, u32, unsigned); + void (*enable)(void __iomem *, unsigned); void (*fixup)(void __iomem *, u32, struct outer_cache_fns *); void (*save)(void __iomem *); void (*configure)(void __iomem *); + void (*unlock)(void __iomem *, unsigned); struct outer_cache_fns outer_cache; }; @@ -110,14 +111,6 @@ static inline void l2c_unlock(void __iomem *base, unsigned num) static void l2c_configure(void __iomem *base) { - if (outer_cache.configure) { - outer_cache.configure(&l2x0_saved_regs); - return; - } - - if (l2x0_data->configure) - l2x0_data->configure(base); - l2c_write_sec(l2x0_saved_regs.aux_ctrl, base, L2X0_AUX_CTRL); } @@ -125,18 +118,16 @@ static void l2c_configure(void __iomem *base) * Enable the L2 cache controller. This function must only be * called when the cache controller is known to be disabled. */ -static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock) +static void l2c_enable(void __iomem *base, unsigned num_lock) { unsigned long flags; - /* Do not touch the controller if already enabled. */ - if (readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN) - return; - - l2x0_saved_regs.aux_ctrl = aux; - l2c_configure(base); + if (outer_cache.configure) + outer_cache.configure(&l2x0_saved_regs); + else + l2x0_data->configure(base); - l2c_unlock(base, num_lock); + l2x0_data->unlock(base, num_lock); local_irq_save(flags); __l2c_op_way(base + L2X0_INV_WAY); @@ -163,7 +154,11 @@ static void l2c_save(void __iomem *base) static void l2c_resume(void) { - l2c_enable(l2x0_base, l2x0_saved_regs.aux_ctrl, l2x0_data->num_lock); + void __iomem *base = l2x0_base; + + /* Do not touch the controller if already enabled. */ + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) + l2c_enable(base, l2x0_data->num_lock); } /* @@ -252,6 +247,8 @@ static const struct l2c_init_data l2c210_data __initconst = { .num_lock = 1, .enable = l2c_enable, .save = l2c_save, + .configure = l2c_configure, + .unlock = l2c_unlock, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -391,16 +388,22 @@ static void l2c220_sync(void) raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2c220_enable(void __iomem *base, u32 aux, unsigned num_lock) +static void l2c220_enable(void __iomem *base, unsigned num_lock) { /* * Always enable non-secure access to the lockdown registers - * we write to them as part of the L2C enable sequence so they * need to be accessible. */ - aux |= L220_AUX_CTRL_NS_LOCKDOWN; + l2x0_saved_regs.aux_ctrl |= L220_AUX_CTRL_NS_LOCKDOWN; - l2c_enable(base, aux, num_lock); + l2c_enable(base, num_lock); +} + +static void l2c220_unlock(void __iomem *base, unsigned num_lock) +{ + if (readl_relaxed(base + L2X0_AUX_CTRL) & L220_AUX_CTRL_NS_LOCKDOWN) + l2c_unlock(base, num_lock); } static const struct l2c_init_data l2c220_data = { @@ -409,6 +412,8 @@ static const struct l2c_init_data l2c220_data = { .num_lock = 1, .enable = l2c220_enable, .save = l2c_save, + .configure = l2c_configure, + .unlock = l2c220_unlock, .outer_cache = { .inv_range = l2c220_inv_range, .clean_range = l2c220_clean_range, @@ -569,6 +574,8 @@ static void l2c310_configure(void __iomem *base) { unsigned revision; + l2c_configure(base); + /* restore pl310 setup */ l2c_write_sec(l2x0_saved_regs.tag_latency, base, L310_TAG_LATENCY_CTRL); @@ -603,10 +610,11 @@ static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, v return NOTIFY_OK; } -static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) +static void __init l2c310_enable(void __iomem *base, unsigned num_lock) { unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK; bool cortex_a9 = read_cpuid_part() == ARM_CPU_PART_CORTEX_A9; + u32 aux = l2x0_saved_regs.aux_ctrl; if (rev >= L310_CACHE_ID_RTL_R2P0) { if (cortex_a9) { @@ -649,9 +657,9 @@ static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) * we write to them as part of the L2C enable sequence so they * need to be accessible. */ - aux |= L310_AUX_CTRL_NS_LOCKDOWN; + l2x0_saved_regs.aux_ctrl = aux | L310_AUX_CTRL_NS_LOCKDOWN; - l2c_enable(base, aux, num_lock); + l2c_enable(base, num_lock); /* Read back resulting AUX_CTRL value as it could have been altered. */ aux = readl_relaxed(base + L2X0_AUX_CTRL); @@ -755,6 +763,12 @@ static void l2c310_resume(void) set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); } +static void l2c310_unlock(void __iomem *base, unsigned num_lock) +{ + if (readl_relaxed(base + L2X0_AUX_CTRL) & L310_AUX_CTRL_NS_LOCKDOWN) + l2c_unlock(base, num_lock); +} + static const struct l2c_init_data l2c310_init_fns __initconst = { .type = "L2C-310", .way_size_0 = SZ_8K, @@ -763,6 +777,7 @@ static const struct l2c_init_data l2c310_init_fns __initconst = { .fixup = l2c310_fixup, .save = l2c310_save, .configure = l2c310_configure, + .unlock = l2c310_unlock, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -856,8 +871,11 @@ static int __init __l2c_init(const struct l2c_init_data *data, * Check if l2x0 controller is already enabled. If we are booting * in non-secure mode accessing the below registers will fault. */ - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) - data->enable(l2x0_base, aux, data->num_lock); + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { + l2x0_saved_regs.aux_ctrl = aux; + + data->enable(l2x0_base, data->num_lock); + } outer_cache = fns; @@ -1066,6 +1084,8 @@ static const struct l2c_init_data of_l2c210_data __initconst = { .of_parse = l2x0_of_parse, .enable = l2c_enable, .save = l2c_save, + .configure = l2c_configure, + .unlock = l2c_unlock, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -1084,6 +1104,8 @@ static const struct l2c_init_data of_l2c220_data __initconst = { .of_parse = l2x0_of_parse, .enable = l2c220_enable, .save = l2c_save, + .configure = l2c_configure, + .unlock = l2c220_unlock, .outer_cache = { .inv_range = l2c220_inv_range, .clean_range = l2c220_clean_range, @@ -1199,6 +1221,26 @@ static void __init l2c310_of_parse(const struct device_node *np, pr_err("L2C-310 OF arm,prefetch-offset property value is missing\n"); } + ret = of_property_read_u32(np, "prefetch-data", &val); + if (ret == 0) { + if (val) + prefetch |= L310_PREFETCH_CTRL_DATA_PREFETCH; + else + prefetch &= ~L310_PREFETCH_CTRL_DATA_PREFETCH; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF prefetch-data property value is missing\n"); + } + + ret = of_property_read_u32(np, "prefetch-instr", &val); + if (ret == 0) { + if (val) + prefetch |= L310_PREFETCH_CTRL_INSTR_PREFETCH; + else + prefetch &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF prefetch-instr property value is missing\n"); + } + l2x0_saved_regs.prefetch_ctrl = prefetch; } @@ -1211,6 +1253,7 @@ static const struct l2c_init_data of_l2c310_data __initconst = { .fixup = l2c310_fixup, .save = l2c310_save, .configure = l2c310_configure, + .unlock = l2c310_unlock, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -1240,6 +1283,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = { .fixup = l2c310_fixup, .save = l2c310_save, .configure = l2c310_configure, + .unlock = l2c310_unlock, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -1366,7 +1410,7 @@ static void aurora_save(void __iomem *base) * For Aurora cache in no outer mode, enable via the CP15 coprocessor * broadcasting of cache commands to L2. */ -static void __init aurora_enable_no_outer(void __iomem *base, u32 aux, +static void __init aurora_enable_no_outer(void __iomem *base, unsigned num_lock) { u32 u; @@ -1377,7 +1421,7 @@ static void __init aurora_enable_no_outer(void __iomem *base, u32 aux, isb(); - l2c_enable(base, aux, num_lock); + l2c_enable(base, num_lock); } static void __init aurora_fixup(void __iomem *base, u32 cache_id, @@ -1416,6 +1460,8 @@ static const struct l2c_init_data of_aurora_with_outer_data __initconst = { .enable = l2c_enable, .fixup = aurora_fixup, .save = aurora_save, + .configure = l2c_configure, + .unlock = l2c_unlock, .outer_cache = { .inv_range = aurora_inv_range, .clean_range = aurora_clean_range, @@ -1435,6 +1481,8 @@ static const struct l2c_init_data of_aurora_no_outer_data __initconst = { .enable = aurora_enable_no_outer, .fixup = aurora_fixup, .save = aurora_save, + .configure = l2c_configure, + .unlock = l2c_unlock, .outer_cache = { .resume = l2c_resume, }, @@ -1585,6 +1633,7 @@ static const struct l2c_init_data of_bcm_l2x0_data __initconst = { .enable = l2c310_enable, .save = l2c310_save, .configure = l2c310_configure, + .unlock = l2c310_unlock, .outer_cache = { .inv_range = bcm_inv_range, .clean_range = bcm_clean_range, @@ -1608,6 +1657,7 @@ static void __init tauros3_save(void __iomem *base) static void tauros3_configure(void __iomem *base) { + l2c_configure(base); writel_relaxed(l2x0_saved_regs.aux2_ctrl, base + TAUROS3_AUX2_CTRL); writel_relaxed(l2x0_saved_regs.prefetch_ctrl, @@ -1621,6 +1671,7 @@ static const struct l2c_init_data of_tauros3_data __initconst = { .enable = l2c_enable, .save = tauros3_save, .configure = tauros3_configure, + .unlock = l2c_unlock, /* Tauros3 broadcasts L1 cache operations to L2 */ .outer_cache = { .resume = l2c_resume, diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 7e7583d..1ced8a0 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -148,11 +148,14 @@ static void *arm_coherent_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs); static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs); +static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs); struct dma_map_ops arm_coherent_dma_ops = { .alloc = arm_coherent_dma_alloc, .free = arm_coherent_dma_free, - .mmap = arm_dma_mmap, + .mmap = arm_coherent_dma_mmap, .get_sgtable = arm_dma_get_sgtable, .map_page = arm_coherent_dma_map_page, .map_sg = arm_dma_map_sg, @@ -690,10 +693,7 @@ static void *arm_coherent_dma_alloc(struct device *dev, size_t size, attrs, __builtin_return_address(0)); } -/* - * Create userspace mapping for the DMA-coherent memory. - */ -int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, +static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) { @@ -704,8 +704,6 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, unsigned long pfn = dma_to_pfn(dev, dma_addr); unsigned long off = vma->vm_pgoff; - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) return ret; @@ -721,6 +719,26 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, } /* + * Create userspace mapping for the DMA-coherent memory. + */ +static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); +} + +int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ +#ifdef CONFIG_MMU + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); +#endif /* CONFIG_MMU */ + return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); +} + +/* * Free a buffer as defined by the above mapping. */ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 7186382..6ca7d9a 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1387,123 +1387,98 @@ static void __init map_lowmem(void) } } -#ifdef CONFIG_ARM_LPAE +#ifdef CONFIG_ARM_PV_FIXUP +extern unsigned long __atags_pointer; +typedef void pgtables_remap(long long offset, unsigned long pgd, void *bdata); +pgtables_remap lpae_pgtables_remap_asm; + /* * early_paging_init() recreates boot time page table setup, allowing machines * to switch over to a high (>4G) address space on LPAE systems */ -void __init early_paging_init(const struct machine_desc *mdesc, - struct proc_info_list *procinfo) +void __init early_paging_init(const struct machine_desc *mdesc) { - pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags; - unsigned long map_start, map_end; - pgd_t *pgd0, *pgdk; - pud_t *pud0, *pudk, *pud_start; - pmd_t *pmd0, *pmdk; - phys_addr_t phys; - int i; + pgtables_remap *lpae_pgtables_remap; + unsigned long pa_pgd; + unsigned int cr, ttbcr; + long long offset; + void *boot_data; - if (!(mdesc->init_meminfo)) + if (!mdesc->pv_fixup) return; - /* remap kernel code and data */ - map_start = init_mm.start_code & PMD_MASK; - map_end = ALIGN(init_mm.brk, PMD_SIZE); + offset = mdesc->pv_fixup(); + if (offset == 0) + return; - /* get a handle on things... */ - pgd0 = pgd_offset_k(0); - pud_start = pud0 = pud_offset(pgd0, 0); - pmd0 = pmd_offset(pud0, 0); + /* + * Get the address of the remap function in the 1:1 identity + * mapping setup by the early page table assembly code. We + * must get this prior to the pv update. The following barrier + * ensures that this is complete before we fixup any P:V offsets. + */ + lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm); + pa_pgd = __pa(swapper_pg_dir); + boot_data = __va(__atags_pointer); + barrier(); - pgdk = pgd_offset_k(map_start); - pudk = pud_offset(pgdk, map_start); - pmdk = pmd_offset(pudk, map_start); + pr_info("Switching physical address space to 0x%08llx\n", + (u64)PHYS_OFFSET + offset); - mdesc->init_meminfo(); + /* Re-set the phys pfn offset, and the pv offset */ + __pv_offset += offset; + __pv_phys_pfn_offset += PFN_DOWN(offset); /* Run the patch stub to update the constants */ fixup_pv_table(&__pv_table_begin, (&__pv_table_end - &__pv_table_begin) << 2); /* - * Cache cleaning operations for self-modifying code - * We should clean the entries by MVA but running a - * for loop over every pv_table entry pointer would - * just complicate the code. - */ - flush_cache_louis(); - dsb(ishst); - isb(); - - /* - * FIXME: This code is not architecturally compliant: we modify - * the mappings in-place, indeed while they are in use by this - * very same code. This may lead to unpredictable behaviour of - * the CPU. - * - * Even modifying the mappings in a separate page table does - * not resolve this. - * - * The architecture strongly recommends that when a mapping is - * changed, that it is changed by first going via an invalid - * mapping and back to the new mapping. This is to ensure that - * no TLB conflicts (caused by the TLB having more than one TLB - * entry match a translation) can occur. However, doing that - * here will result in unmapping the code we are running. - */ - pr_warn("WARNING: unsafe modification of in-place page tables - tainting kernel\n"); - add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); - - /* - * Remap level 1 table. This changes the physical addresses - * used to refer to the level 2 page tables to the high - * physical address alias, leaving everything else the same. - */ - for (i = 0; i < PTRS_PER_PGD; pud0++, i++) { - set_pud(pud0, - __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER)); - pmd0 += PTRS_PER_PMD; - } - - /* - * Remap the level 2 table, pointing the mappings at the high - * physical address alias of these pages. - */ - phys = __pa(map_start); - do { - *pmdk++ = __pmd(phys | pmdprot); - phys += PMD_SIZE; - } while (phys < map_end); - - /* - * Ensure that the above updates are flushed out of the cache. - * This is not strictly correct; on a system where the caches - * are coherent with each other, but the MMU page table walks - * may not be coherent, flush_cache_all() may be a no-op, and - * this will fail. + * We changing not only the virtual to physical mapping, but also + * the physical addresses used to access memory. We need to flush + * all levels of cache in the system with caching disabled to + * ensure that all data is written back, and nothing is prefetched + * into the caches. We also need to prevent the TLB walkers + * allocating into the caches too. Note that this is ARMv7 LPAE + * specific. */ + cr = get_cr(); + set_cr(cr & ~(CR_I | CR_C)); + asm("mrc p15, 0, %0, c2, c0, 2" : "=r" (ttbcr)); + asm volatile("mcr p15, 0, %0, c2, c0, 2" + : : "r" (ttbcr & ~(3 << 8 | 3 << 10))); flush_cache_all(); /* - * Re-write the TTBR values to point them at the high physical - * alias of the page tables. We expect __va() will work on - * cpu_get_pgd(), which returns the value of TTBR0. + * Fixup the page tables - this must be in the idmap region as + * we need to disable the MMU to do this safely, and hence it + * needs to be assembly. It's fairly simple, as we're using the + * temporary tables setup by the initial assembly code. */ - cpu_switch_mm(pgd0, &init_mm); - cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); + lpae_pgtables_remap(offset, pa_pgd, boot_data); - /* Finally flush any stale TLB values. */ - local_flush_bp_all(); - local_flush_tlb_all(); + /* Re-enable the caches and cacheable TLB walks */ + asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr)); + set_cr(cr); } #else -void __init early_paging_init(const struct machine_desc *mdesc, - struct proc_info_list *procinfo) +void __init early_paging_init(const struct machine_desc *mdesc) { - if (mdesc->init_meminfo) - mdesc->init_meminfo(); + long long offset; + + if (!mdesc->pv_fixup) + return; + + offset = mdesc->pv_fixup(); + if (offset == 0) + return; + + pr_crit("Physical address space modification is only to support Keystone2.\n"); + pr_crit("Please enable ARM_LPAE and ARM_PATCH_PHYS_VIRT support to use this\n"); + pr_crit("feature. Your kernel may crash now, have a good day.\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); } #endif diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index a014dfa..afd7e05 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -304,15 +304,6 @@ void __init sanity_check_meminfo(void) } /* - * early_paging_init() recreates boot time page table setup, allowing machines - * to switch over to a high (>4G) address space on LPAE systems - */ -void __init early_paging_init(const struct machine_desc *mdesc, - struct proc_info_list *procinfo) -{ -} - -/* * paging_init() sets up the page tables, initialises the zone memory * maps, and sets up the zero page, bad page and bad page tables. */ diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index 10405b8..c6141a5 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -36,14 +36,16 @@ * * It is assumed that: * - we are not using split page tables + * + * Note that we always need to flush BTAC/BTB if IBE is set + * even on Cortex-A8 revisions not affected by 430973. + * If IBE is not set, the flush BTAC/BTB won't do anything. */ ENTRY(cpu_ca8_switch_mm) #ifdef CONFIG_MMU mov r2, #0 -#ifdef CONFIG_ARM_ERRATA_430973 mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB #endif -#endif ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_MMU mmid r1, r1 @ get mm->context.id @@ -148,10 +150,10 @@ ENDPROC(cpu_v7_set_pte_ext) * Macro for setting up the TTBRx and TTBCR registers. * - \ttb0 and \ttb1 updated with the corresponding flags. */ - .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp + .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp mcr p15, 0, \zero, c2, c0, 2 @ TTB control register - ALT_SMP(orr \ttbr0, \ttbr0, #TTB_FLAGS_SMP) - ALT_UP(orr \ttbr0, \ttbr0, #TTB_FLAGS_UP) + ALT_SMP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_SMP) + ALT_UP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_UP) ALT_SMP(orr \ttbr1, \ttbr1, #TTB_FLAGS_SMP) ALT_UP(orr \ttbr1, \ttbr1, #TTB_FLAGS_UP) mcr p15, 0, \ttbr1, c2, c0, 1 @ load TTB1 diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index d3daed0..5e5720e 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -126,11 +126,10 @@ ENDPROC(cpu_v7_set_pte_ext) * Macro for setting up the TTBRx and TTBCR registers. * - \ttbr1 updated. */ - .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp + .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp ldr \tmp, =swapper_pg_dir @ swapper_pg_dir virtual address - mov \tmp, \tmp, lsr #ARCH_PGD_SHIFT - cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET? - mrc p15, 0, \tmp, c2, c0, 2 @ TTB control register + cmp \ttbr1, \tmp, lsr #12 @ PHYS_OFFSET > PAGE_OFFSET? + mrc p15, 0, \tmp, c2, c0, 2 @ TTB control egister orr \tmp, \tmp, #TTB_EAE ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP) ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP) @@ -143,13 +142,10 @@ ENDPROC(cpu_v7_set_pte_ext) */ orrls \tmp, \tmp, #TTBR1_SIZE @ TTBCR.T1SZ mcr p15, 0, \tmp, c2, c0, 2 @ TTBCR - mov \tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits - mov \ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT @ lower bits + mov \tmp, \ttbr1, lsr #20 + mov \ttbr1, \ttbr1, lsl #12 addls \ttbr1, \ttbr1, #TTBR1_OFFSET mcrr p15, 1, \ttbr1, \tmp, c2 @ load TTBR1 - mov \tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits - mov \ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT @ lower bits - mcrr p15, 0, \ttbr0, \tmp, c2 @ load TTBR0 .endm /* diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 75ae721..0716bbe 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -252,6 +252,12 @@ ENDPROC(cpu_pj4b_do_resume) * Initialise TLB, Caches, and MMU state ready to switch the MMU * on. Return in r0 the new CP15 C1 control register setting. * + * r1, r2, r4, r5, r9, r13 must be preserved - r13 is not a stack + * r4: TTBR0 (low word) + * r5: TTBR0 (high word if LPAE) + * r8: TTBR1 + * r9: Main ID register + * * This should be able to cover all ARMv7 cores. * * It is assumed that: @@ -279,6 +285,78 @@ __v7_ca17mp_setup: #endif b __v7_setup +/* + * Errata: + * r0, r10 available for use + * r1, r2, r4, r5, r9, r13: must be preserved + * r3: contains MIDR rX number in bits 23-20 + * r6: contains MIDR rXpY as 8-bit XY number + * r9: MIDR + */ +__ca8_errata: +#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM) + teq r3, #0x00100000 @ only present in r1p* + mrceq p15, 0, r0, c1, c0, 1 @ read aux control register + orreq r0, r0, #(1 << 6) @ set IBE to 1 + mcreq p15, 0, r0, c1, c0, 1 @ write aux control register +#endif +#ifdef CONFIG_ARM_ERRATA_458693 + teq r6, #0x20 @ only present in r2p0 + mrceq p15, 0, r0, c1, c0, 1 @ read aux control register + orreq r0, r0, #(1 << 5) @ set L1NEON to 1 + orreq r0, r0, #(1 << 9) @ set PLDNOP to 1 + mcreq p15, 0, r0, c1, c0, 1 @ write aux control register +#endif +#ifdef CONFIG_ARM_ERRATA_460075 + teq r6, #0x20 @ only present in r2p0 + mrceq p15, 1, r0, c9, c0, 2 @ read L2 cache aux ctrl register + tsteq r0, #1 << 22 + orreq r0, r0, #(1 << 22) @ set the Write Allocate disable bit + mcreq p15, 1, r0, c9, c0, 2 @ write the L2 cache aux ctrl register +#endif + b __errata_finish + +__ca9_errata: +#ifdef CONFIG_ARM_ERRATA_742230 + cmp r6, #0x22 @ only present up to r2p2 + mrcle p15, 0, r0, c15, c0, 1 @ read diagnostic register + orrle r0, r0, #1 << 4 @ set bit #4 + mcrle p15, 0, r0, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_742231 + teq r6, #0x20 @ present in r2p0 + teqne r6, #0x21 @ present in r2p1 + teqne r6, #0x22 @ present in r2p2 + mrceq p15, 0, r0, c15, c0, 1 @ read diagnostic register + orreq r0, r0, #1 << 12 @ set bit #12 + orreq r0, r0, #1 << 22 @ set bit #22 + mcreq p15, 0, r0, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_743622 + teq r3, #0x00200000 @ only present in r2p* + mrceq p15, 0, r0, c15, c0, 1 @ read diagnostic register + orreq r0, r0, #1 << 6 @ set bit #6 + mcreq p15, 0, r0, c15, c0, 1 @ write diagnostic register +#endif +#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP) + ALT_SMP(cmp r6, #0x30) @ present prior to r3p0 + ALT_UP_B(1f) + mrclt p15, 0, r0, c15, c0, 1 @ read diagnostic register + orrlt r0, r0, #1 << 11 @ set bit #11 + mcrlt p15, 0, r0, c15, c0, 1 @ write diagnostic register +1: +#endif + b __errata_finish + +__ca15_errata: +#ifdef CONFIG_ARM_ERRATA_773022 + cmp r6, #0x4 @ only present up to r0p4 + mrcle p15, 0, r0, c1, c0, 1 @ read aux control register + orrle r0, r0, #1 << 1 @ disable loop buffer + mcrle p15, 0, r0, c1, c0, 1 @ write aux control register +#endif + b __errata_finish + __v7_pj4b_setup: #ifdef CONFIG_CPU_PJ4B @@ -339,96 +417,38 @@ __v7_setup: bl v7_invalidate_l1 ldmia r12, {r0-r5, r7, r9, r11, lr} - mrc p15, 0, r0, c0, c0, 0 @ read main ID register - and r10, r0, #0xff000000 @ ARM? - teq r10, #0x41000000 - bne 3f - and r5, r0, #0x00f00000 @ variant - and r6, r0, #0x0000000f @ revision - orr r6, r6, r5, lsr #20-4 @ combine variant and revision - ubfx r0, r0, #4, #12 @ primary part number + and r0, r9, #0xff000000 @ ARM? + teq r0, #0x41000000 + bne __errata_finish + and r3, r9, #0x00f00000 @ variant + and r6, r9, #0x0000000f @ revision + orr r6, r6, r3, lsr #20-4 @ combine variant and revision + ubfx r0, r9, #4, #12 @ primary part number /* Cortex-A8 Errata */ ldr r10, =0x00000c08 @ Cortex-A8 primary part number teq r0, r10 - bne 2f -#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM) - - teq r5, #0x00100000 @ only present in r1p* - mrceq p15, 0, r10, c1, c0, 1 @ read aux control register - orreq r10, r10, #(1 << 6) @ set IBE to 1 - mcreq p15, 0, r10, c1, c0, 1 @ write aux control register -#endif -#ifdef CONFIG_ARM_ERRATA_458693 - teq r6, #0x20 @ only present in r2p0 - mrceq p15, 0, r10, c1, c0, 1 @ read aux control register - orreq r10, r10, #(1 << 5) @ set L1NEON to 1 - orreq r10, r10, #(1 << 9) @ set PLDNOP to 1 - mcreq p15, 0, r10, c1, c0, 1 @ write aux control register -#endif -#ifdef CONFIG_ARM_ERRATA_460075 - teq r6, #0x20 @ only present in r2p0 - mrceq p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register - tsteq r10, #1 << 22 - orreq r10, r10, #(1 << 22) @ set the Write Allocate disable bit - mcreq p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register -#endif - b 3f + beq __ca8_errata /* Cortex-A9 Errata */ -2: ldr r10, =0x00000c09 @ Cortex-A9 primary part number + ldr r10, =0x00000c09 @ Cortex-A9 primary part number teq r0, r10 - bne 3f -#ifdef CONFIG_ARM_ERRATA_742230 - cmp r6, #0x22 @ only present up to r2p2 - mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register - orrle r10, r10, #1 << 4 @ set bit #4 - mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register -#endif -#ifdef CONFIG_ARM_ERRATA_742231 - teq r6, #0x20 @ present in r2p0 - teqne r6, #0x21 @ present in r2p1 - teqne r6, #0x22 @ present in r2p2 - mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register - orreq r10, r10, #1 << 12 @ set bit #12 - orreq r10, r10, #1 << 22 @ set bit #22 - mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register -#endif -#ifdef CONFIG_ARM_ERRATA_743622 - teq r5, #0x00200000 @ only present in r2p* - mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register - orreq r10, r10, #1 << 6 @ set bit #6 - mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register -#endif -#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP) - ALT_SMP(cmp r6, #0x30) @ present prior to r3p0 - ALT_UP_B(1f) - mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register - orrlt r10, r10, #1 << 11 @ set bit #11 - mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register -1: -#endif + beq __ca9_errata /* Cortex-A15 Errata */ -3: ldr r10, =0x00000c0f @ Cortex-A15 primary part number + ldr r10, =0x00000c0f @ Cortex-A15 primary part number teq r0, r10 - bne 4f + beq __ca15_errata -#ifdef CONFIG_ARM_ERRATA_773022 - cmp r6, #0x4 @ only present up to r0p4 - mrcle p15, 0, r10, c1, c0, 1 @ read aux control register - orrle r10, r10, #1 << 1 @ disable loop buffer - mcrle p15, 0, r10, c1, c0, 1 @ write aux control register -#endif - -4: mov r10, #0 +__errata_finish: + mov r10, #0 mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate #ifdef CONFIG_MMU mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs - v7_ttb_setup r10, r4, r8, r5 @ TTBCR, TTBRx setup - ldr r5, =PRRR @ PRRR + v7_ttb_setup r10, r4, r5, r8, r3 @ TTBCR, TTBRx setup + ldr r3, =PRRR @ PRRR ldr r6, =NMRR @ NMRR - mcr p15, 0, r5, c10, c2, 0 @ write PRRR + mcr p15, 0, r3, c10, c2, 0 @ write PRRR mcr p15, 0, r6, c10, c2, 1 @ write NMRR #endif dsb @ Complete invalidations @@ -437,22 +457,22 @@ __v7_setup: and r0, r0, #(0xf << 12) @ ThumbEE enabled field teq r0, #(1 << 12) @ check if ThumbEE is present bne 1f - mov r5, #0 - mcr p14, 6, r5, c1, c0, 0 @ Initialize TEEHBR to 0 + mov r3, #0 + mcr p14, 6, r3, c1, c0, 0 @ Initialize TEEHBR to 0 mrc p14, 6, r0, c0, c0, 0 @ load TEECR orr r0, r0, #1 @ set the 1st bit in order to mcr p14, 6, r0, c0, c0, 0 @ stop userspace TEEHBR access 1: #endif - adr r5, v7_crval - ldmia r5, {r5, r6} + adr r3, v7_crval + ldmia r3, {r3, r6} ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables #ifdef CONFIG_SWP_EMULATE - orr r5, r5, #(1 << 10) @ set SW bit in "clear" + orr r3, r3, #(1 << 10) @ set SW bit in "clear" bic r6, r6, #(1 << 10) @ clear it in "mmuset" #endif mrc p15, 0, r0, c1, c0, 0 @ read control register - bic r0, r0, r5 @ clear bits them + bic r0, r0, r3 @ clear bits them orr r0, r0, r6 @ set them THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions ret lr @ return to head.S:__ret diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index e08e1f2..67d9209 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -98,7 +98,7 @@ __v7m_setup: str r5, [r0, V7M_SCB_SHPR3] @ set PendSV priority @ SVC to run the kernel in this mode - adr r1, BSYM(1f) + badr r1, 1f ldr r5, [r12, #11 * 4] @ read the SVC vector entry str r1, [r12, #11 * 4] @ write the temporary SVC vector entry mov r6, lr @ save LR diff --git a/arch/arm/mm/pv-fixup-asm.S b/arch/arm/mm/pv-fixup-asm.S new file mode 100644 index 0000000..1867f3e4 --- /dev/null +++ b/arch/arm/mm/pv-fixup-asm.S @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2015 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This assembly is required to safely remap the physical address space + * for Keystone 2 + */ +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/cp15.h> +#include <asm/memory.h> +#include <asm/pgtable.h> + + .section ".idmap.text", "ax" + +#define L1_ORDER 3 +#define L2_ORDER 3 + +ENTRY(lpae_pgtables_remap_asm) + stmfd sp!, {r4-r8, lr} + + mrc p15, 0, r8, c1, c0, 0 @ read control reg + bic ip, r8, #CR_M @ disable caches and MMU + mcr p15, 0, ip, c1, c0, 0 + dsb + isb + + /* Update level 2 entries covering the kernel */ + ldr r6, =(_end - 1) + add r7, r2, #0x1000 + add r6, r7, r6, lsr #SECTION_SHIFT - L2_ORDER + add r7, r7, #PAGE_OFFSET >> (SECTION_SHIFT - L2_ORDER) +1: ldrd r4, [r7] + adds r4, r4, r0 + adc r5, r5, r1 + strd r4, [r7], #1 << L2_ORDER + cmp r7, r6 + bls 1b + + /* Update level 2 entries for the boot data */ + add r7, r2, #0x1000 + add r7, r7, r3, lsr #SECTION_SHIFT - L2_ORDER + bic r7, r7, #(1 << L2_ORDER) - 1 + ldrd r4, [r7] + adds r4, r4, r0 + adc r5, r5, r1 + strd r4, [r7], #1 << L2_ORDER + ldrd r4, [r7] + adds r4, r4, r0 + adc r5, r5, r1 + strd r4, [r7] + + /* Update level 1 entries */ + mov r6, #4 + mov r7, r2 +2: ldrd r4, [r7] + adds r4, r4, r0 + adc r5, r5, r1 + strd r4, [r7], #1 << L1_ORDER + subs r6, r6, #1 + bne 2b + + mrrc p15, 0, r4, r5, c2 @ read TTBR0 + adds r4, r4, r0 @ update physical address + adc r5, r5, r1 + mcrr p15, 0, r4, r5, c2 @ write back TTBR0 + mrrc p15, 1, r4, r5, c2 @ read TTBR1 + adds r4, r4, r0 @ update physical address + adc r5, r5, r1 + mcrr p15, 1, r4, r5, c2 @ write back TTBR1 + + dsb + + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ I+BTB cache invalidate + mcr p15, 0, ip, c8, c7, 0 @ local_flush_tlb_all() + dsb + isb + + mcr p15, 0, r8, c1, c0, 0 @ re-enable MMU + dsb + isb + + ldmfd sp!, {r4-r8, pc} +ENDPROC(lpae_pgtables_remap_asm) diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 8aa79105..9d259d9 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -6,9 +6,15 @@ obj-vdso := vgettimeofday.o datapage.o targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) -ccflags-y := -shared -fPIC -fno-common -fno-builtin -fno-stack-protector -ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 -DDISABLE_BRANCH_PROFILING -ccflags-y += -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +ccflags-y := -fPIC -fno-common -fno-builtin -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING + +VDSO_LDFLAGS := -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 +VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 +VDSO_LDFLAGS += -nostdlib -shared +VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--build-id) +VDSO_LDFLAGS += $(call cc-option, -fuse-ld=bfd) obj-$(CONFIG_VDSO) += vdso.o extra-$(CONFIG_VDSO) += vdso.lds @@ -40,10 +46,8 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE # Actual build commands quiet_cmd_vdsold = VDSO $@ - cmd_vdsold = $(CC) $(c_flags) -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) \ - $(call cc-ldoption, -Wl$(comma)--build-id) \ - -Wl,-Bsymbolic -Wl,-z,max-page-size=4096 \ - -Wl,-z,common-page-size=4096 -o $@ + cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \ + -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ quiet_cmd_vdsomunge = MUNGE $@ cmd_vdsomunge = $(objtree)/$(obj)/vdsomunge $< $@ |