From 4d6b7a779be34e1df296abc1dc555134a8cf34af Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 30 Nov 2010 18:15:53 +0100 Subject: ARM: 6512/1: perf: fix warnings generated by sparse Russell reported a number of warnings coming from sparse when checking the ARM perf_event.c files: | perf_event.c seems to also have problems too: | | CHECK arch/arm/kernel/perf_event.c | arch/arm/kernel/perf_event.c:37:1: warning: symbol 'pmu_lock' was not declared. Should it be static? | arch/arm/kernel/perf_event.c:70:1: warning: symbol 'cpu_hw_events' was not declared. Should it be static? | arch/arm/kernel/perf_event.c:1006:1: warning: symbol 'armv6pmu_enable_event' was not declared. Should it be static? | arch/arm/kernel/perf_event.c:1113:1: warning: symbol 'armv6pmu_stop' was not declared. Should it be static? | arch/arm/kernel/perf_event.c:1956:6: warning: symbol 'armv7pmu_enable_event' was not declared. Should it be static? | arch/arm/kernel/perf_event.c:3072:14: warning: incorrect type in argument 1 (different address spaces) | arch/arm/kernel/perf_event.c:3072:14: expected void const volatile [noderef] * | arch/arm/kernel/perf_event.c:3072:14: got struct frame_tail *tail | arch/arm/kernel/perf_event.c:3074:49: warning: incorrect type in argument 2 (different address spaces) | arch/arm/kernel/perf_event.c:3074:49: expected void const [noderef] *from | arch/arm/kernel/perf_event.c:3074:49: got struct frame_tail *tail This patch resolves these issues so we can live in silence again. Reported-by: Russell King Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 18 +++++++++--------- arch/arm/kernel/perf_event_v6.c | 10 +++++----- arch/arm/kernel/perf_event_v7.c | 10 +++++----- arch/arm/kernel/perf_event_xscale.c | 8 ++++---- 4 files changed, 23 insertions(+), 23 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 421a4bb..50c197b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -32,7 +32,7 @@ static struct platform_device *pmu_device; * Hardware lock to serialize accesses to PMU registers. Needed for the * read/modify/write sequences. */ -DEFINE_SPINLOCK(pmu_lock); +static DEFINE_SPINLOCK(pmu_lock); /* * ARMv6 supports a maximum of 3 events, starting from index 1. If we add @@ -65,7 +65,7 @@ struct cpu_hw_events { */ unsigned long active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)]; }; -DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); struct arm_pmu { enum arm_perf_pmu_ids id; @@ -673,17 +673,17 @@ arch_initcall(init_hw_perf_events); * This code has been adapted from the ARM OProfile support. */ struct frame_tail { - struct frame_tail *fp; - unsigned long sp; - unsigned long lr; + struct frame_tail __user *fp; + unsigned long sp; + unsigned long lr; } __attribute__((packed)); /* * Get the return address for a single stackframe and return a pointer to the * next frame tail. */ -static struct frame_tail * -user_backtrace(struct frame_tail *tail, +static struct frame_tail __user * +user_backtrace(struct frame_tail __user *tail, struct perf_callchain_entry *entry) { struct frame_tail buftail; @@ -709,10 +709,10 @@ user_backtrace(struct frame_tail *tail, void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct frame_tail *tail; + struct frame_tail __user *tail; - tail = (struct frame_tail *)regs->ARM_fp - 1; + tail = (struct frame_tail __user *)regs->ARM_fp - 1; while (tail && !((unsigned long)tail & 0x3)) tail = user_backtrace(tail, entry); diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 7aeb07d..3f427aa 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -400,7 +400,7 @@ armv6pmu_write_counter(int counter, WARN_ONCE(1, "invalid counter number (%d)\n", counter); } -void +static void armv6pmu_enable_event(struct hw_perf_event *hwc, int idx) { @@ -625,7 +625,7 @@ static const struct arm_pmu armv6pmu = { .max_period = (1LLU << 32) - 1, }; -const struct arm_pmu *__init armv6pmu_init(void) +static const struct arm_pmu *__init armv6pmu_init(void) { return &armv6pmu; } @@ -655,17 +655,17 @@ static const struct arm_pmu armv6mpcore_pmu = { .max_period = (1LLU << 32) - 1, }; -const struct arm_pmu *__init armv6mpcore_pmu_init(void) +static const struct arm_pmu *__init armv6mpcore_pmu_init(void) { return &armv6mpcore_pmu; } #else -const struct arm_pmu *__init armv6pmu_init(void) +static const struct arm_pmu *__init armv6pmu_init(void) { return NULL; } -const struct arm_pmu *__init armv6mpcore_pmu_init(void) +static const struct arm_pmu *__init armv6mpcore_pmu_init(void) { return NULL; } diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 4d04239..a68ff1c 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -681,7 +681,7 @@ static void armv7_pmnc_dump_regs(void) } #endif -void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) { unsigned long flags; @@ -874,7 +874,7 @@ static u32 __init armv7_reset_read_pmnc(void) return nb_cnt + 1; } -const struct arm_pmu *__init armv7_a8_pmu_init(void) +static const struct arm_pmu *__init armv7_a8_pmu_init(void) { armv7pmu.id = ARM_PERF_PMU_ID_CA8; armv7pmu.name = "ARMv7 Cortex-A8"; @@ -884,7 +884,7 @@ const struct arm_pmu *__init armv7_a8_pmu_init(void) return &armv7pmu; } -const struct arm_pmu *__init armv7_a9_pmu_init(void) +static const struct arm_pmu *__init armv7_a9_pmu_init(void) { armv7pmu.id = ARM_PERF_PMU_ID_CA9; armv7pmu.name = "ARMv7 Cortex-A9"; @@ -894,12 +894,12 @@ const struct arm_pmu *__init armv7_a9_pmu_init(void) return &armv7pmu; } #else -const struct arm_pmu *__init armv7_a8_pmu_init(void) +static const struct arm_pmu *__init armv7_a8_pmu_init(void) { return NULL; } -const struct arm_pmu *__init armv7_a9_pmu_init(void) +static const struct arm_pmu *__init armv7_a9_pmu_init(void) { return NULL; } diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 4e95927..f14fbb6 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -428,7 +428,7 @@ static const struct arm_pmu xscale1pmu = { .max_period = (1LLU << 32) - 1, }; -const struct arm_pmu *__init xscale1pmu_init(void) +static const struct arm_pmu *__init xscale1pmu_init(void) { return &xscale1pmu; } @@ -790,17 +790,17 @@ static const struct arm_pmu xscale2pmu = { .max_period = (1LLU << 32) - 1, }; -const struct arm_pmu *__init xscale2pmu_init(void) +static const struct arm_pmu *__init xscale2pmu_init(void) { return &xscale2pmu; } #else -const struct arm_pmu *__init xscale1pmu_init(void) +static const struct arm_pmu *__init xscale1pmu_init(void) { return NULL; } -const struct arm_pmu *__init xscale2pmu_init(void) +static const struct arm_pmu *__init xscale2pmu_init(void) { return NULL; } -- cgit v1.1 From 961ec6daa7b14f376c30d447a830fa4783a2112c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 2 Dec 2010 18:01:49 +0100 Subject: ARM: 6521/1: perf: use raw_spinlock_t for pmu_lock For kernels built with PREEMPT_RT, critical sections protected by standard spinlocks are preemptible. This is not acceptable on perf as (a) we may be scheduled onto a different CPU whilst reading/writing banked PMU registers and (b) the latency when reading the PMU registers becomes unpredictable. This patch upgrades the pmu_lock spinlock to a raw_spinlock instead. Reported-by: Jamie Iles Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 2 +- arch/arm/kernel/perf_event_v6.c | 20 ++++++++++---------- arch/arm/kernel/perf_event_v7.c | 16 ++++++++-------- arch/arm/kernel/perf_event_xscale.c | 32 ++++++++++++++++---------------- 4 files changed, 35 insertions(+), 35 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 50c197b..624e2a5 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -32,7 +32,7 @@ static struct platform_device *pmu_device; * Hardware lock to serialize accesses to PMU registers. Needed for the * read/modify/write sequences. */ -static DEFINE_SPINLOCK(pmu_lock); +static DEFINE_RAW_SPINLOCK(pmu_lock); /* * ARMv6 supports a maximum of 3 events, starting from index 1. If we add diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 3f427aa..c058bfc 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -426,12 +426,12 @@ armv6pmu_enable_event(struct hw_perf_event *hwc, * Mask out the current event and set the counter to count the event * that we're interested in. */ - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = armv6_pmcr_read(); val &= ~mask; val |= evt; armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static irqreturn_t @@ -500,11 +500,11 @@ armv6pmu_start(void) { unsigned long flags, val; - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = armv6_pmcr_read(); val |= ARMV6_PMCR_ENABLE; armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void @@ -512,11 +512,11 @@ armv6pmu_stop(void) { unsigned long flags, val; - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = armv6_pmcr_read(); val &= ~ARMV6_PMCR_ENABLE; armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static int @@ -570,12 +570,12 @@ armv6pmu_disable_event(struct hw_perf_event *hwc, * of ETM bus signal assertion cycles. The external reporting should * be disabled and so this should never increment. */ - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = armv6_pmcr_read(); val &= ~mask; val |= evt; armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void @@ -599,12 +599,12 @@ armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, * Unlike UP ARMv6, we don't have a way of stopping the counters. We * simply disable the interrupt reporting. */ - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = armv6_pmcr_read(); val &= ~mask; val |= evt; armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static const struct arm_pmu armv6pmu = { diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index a68ff1c..2e14025 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -689,7 +689,7 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) * Enable counter and interrupt, and set the counter to count * the event that we're interested in. */ - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); /* * Disable counter @@ -713,7 +713,7 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) */ armv7_pmnc_enable_counter(idx); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) @@ -723,7 +723,7 @@ static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) /* * Disable counter and interrupt */ - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); /* * Disable counter @@ -735,7 +735,7 @@ static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) */ armv7_pmnc_disable_intens(idx); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) @@ -805,20 +805,20 @@ static void armv7pmu_start(void) { unsigned long flags; - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); /* Enable all counters */ armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void armv7pmu_stop(void) { unsigned long flags; - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); /* Disable all counters */ armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index f14fbb6..28cd3b0 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -291,12 +291,12 @@ xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) return; } - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale1pmu_read_pmnc(); val &= ~mask; val |= evt; xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void @@ -322,12 +322,12 @@ xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) return; } - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale1pmu_read_pmnc(); val &= ~mask; val |= evt; xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static int @@ -355,11 +355,11 @@ xscale1pmu_start(void) { unsigned long flags, val; - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale1pmu_read_pmnc(); val |= XSCALE_PMU_ENABLE; xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void @@ -367,11 +367,11 @@ xscale1pmu_stop(void) { unsigned long flags, val; - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale1pmu_read_pmnc(); val &= ~XSCALE_PMU_ENABLE; xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static inline u32 @@ -635,10 +635,10 @@ xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) return; } - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); xscale2pmu_write_event_select(evtsel); xscale2pmu_write_int_enable(ien); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void @@ -678,10 +678,10 @@ xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) return; } - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); xscale2pmu_write_event_select(evtsel); xscale2pmu_write_int_enable(ien); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static int @@ -705,11 +705,11 @@ xscale2pmu_start(void) { unsigned long flags, val; - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; val |= XSCALE_PMU_ENABLE; xscale2pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void @@ -717,11 +717,11 @@ xscale2pmu_stop(void) { unsigned long flags, val; - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale2pmu_read_pmnc(); val &= ~XSCALE_PMU_ENABLE; xscale2pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static inline u32 -- cgit v1.1 From ac88e07122fc0eb5cbad403be97ef02c317a06b7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 24 Nov 2010 16:51:17 +0000 Subject: ARM: hw_breakpoint: ensure OS lock is clear before writing to debug registers ARMv7 architects a system for saving and restoring the debug registers across low-power modes. At the heart of this system is a lock register which, when set, forbids writes to the debug registers. While locked, writes to debug registers via the co-processor interface will result in undefined instruction traps. Linux currently doesn't make use of this feature because we update the debug registers on context switch anyway, however the status of the lock is IMPLEMENTATION DEFINED on reset. This patch ensures that the lock is cleared during boot so that we can write to the debug registers safely. Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 21e3a4a..793959e 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -768,6 +768,23 @@ static void __init reset_ctrl_regs(void *unused) { int i; + /* + * v7 debug contains save and restore registers so that debug state + * can be maintained across low-power modes without leaving + * the debug logic powered up. It is IMPLEMENTATION DEFINED whether + * we can write to the debug registers out of reset, so we must + * unlock the OS Lock Access Register to avoid taking undefined + * instruction exceptions later on. + */ + if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) { + /* + * Unconditionally clear the lock by writing a value + * other than 0xC5ACCE55 to the access register. + */ + asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0)); + isb(); + } + if (enable_monitor_mode()) return; @@ -810,17 +827,17 @@ static int __init arch_hw_breakpoint_init(void) pr_warning("halting debug mode enabled. Assuming maximum " "watchpoint size of 4 bytes."); } else { - /* Work out the maximum supported watchpoint length. */ - max_watchpoint_len = get_max_wp_len(); - pr_info("maximum watchpoint size is %u bytes.\n", - max_watchpoint_len); - /* * Reset the breakpoint resources. We assume that a halting * debugger will leave the world in a nice state for us. */ smp_call_function(reset_ctrl_regs, NULL, 1); reset_ctrl_regs(NULL); + + /* Work out the maximum supported watchpoint length. */ + max_watchpoint_len = get_max_wp_len(); + pr_info("maximum watchpoint size is %u bytes.\n", + max_watchpoint_len); } /* Register debug fault handler. */ -- cgit v1.1 From 7d99331e4793b52d488e911876ef11d843c6c8c9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 24 Nov 2010 17:45:49 +0000 Subject: ARM: hw_breakpoint: reset control registers in hotplug path The ARMv7 debug architecture doesn't make any guarantees about the contents of debug control registers following a debug logic reset. This patch ensures that we reset the control registers when a cpu comes ONLINE (for example, with hotplug) so that when we enable monitor mode while inserting a breakpoint we won't exhibit random behaviour. Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 793959e..515a3c4 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -764,7 +764,7 @@ out: /* * One-time initialisation. */ -static void __init reset_ctrl_regs(void *unused) +static void reset_ctrl_regs(void *unused) { int i; @@ -799,6 +799,18 @@ static void __init reset_ctrl_regs(void *unused) } } +static int __cpuinit dbg_reset_notify(struct notifier_block *self, + unsigned long action, void *cpu) +{ + if (action == CPU_ONLINE) + smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1); + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata dbg_reset_nb = { + .notifier_call = dbg_reset_notify, +}; + static int __init arch_hw_breakpoint_init(void) { int ret = 0; @@ -846,6 +858,8 @@ static int __init arch_hw_breakpoint_init(void) hook_ifault_code(2, hw_breakpoint_pending, SIGTRAP, TRAP_HWBKPT, "breakpoint debug exception"); + /* Register hotplug notifier. */ + register_cpu_notifier(&dbg_reset_nb); out: return ret; } -- cgit v1.1 From 6ee33c2712fcdff2568d9bbadb25c8e5a7c36212 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 25 Nov 2010 12:01:54 +0000 Subject: ARM: hw_breakpoint: correct and simplify alignment fixup code The current hw_breakpoint code tries to fix up the alignment of breakpoints so that we can make use of sparse byte-address-select bits in the control register and give the illusion that we can set breakpoints on unaligned addresses. Although this works on v6 cores, v7 forbids this behaviour, instead requiring breakpoints to be set on aligned addresses and have contiguous byte-address-select ranges depending on the instruction set in use. For ARM the only supported size is 4 bytes, whilst Thumb-2 also permits 2 byte breakpoints (watchpoints can be of 1, 2, 4 or 8 bytes long). This patch simplifies the alignment fixup code so that we require addresses to be aligned to the size of the corresponding breakpoint. This allows us to handle the common case of breaking on a half-word aligned Thumb-2 instruction and also allows us to set byte watchpoints on arbitrary addresses. Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 57 ++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 26 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 515a3c4..d37ed35 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -537,6 +537,17 @@ static int arch_build_bp_info(struct perf_event *bp) return -EINVAL; } + /* + * Breakpoints must be of length 2 (thumb) or 4 (ARM) bytes. + * Watchpoints can be of length 1, 2, 4 or 8 bytes if supported + * by the hardware and must be aligned to the appropriate number of + * bytes. + */ + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE && + info->ctrl.len != ARM_BREAKPOINT_LEN_2 && + info->ctrl.len != ARM_BREAKPOINT_LEN_4) + return -EINVAL; + /* Address */ info->address = bp->attr.bp_addr; @@ -561,7 +572,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); int ret = 0; - u32 bytelen, max_len, offset, alignment_mask = 0x3; + u32 offset, alignment_mask = 0x3; /* Build the arch_hw_breakpoint. */ ret = arch_build_bp_info(bp); @@ -571,32 +582,27 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) /* Check address alignment. */ if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) alignment_mask = 0x7; - if (info->address & alignment_mask) { - /* - * Try to fix the alignment. This may result in a length - * that is too large, so we must check for that. - */ - bytelen = get_hbp_len(info->ctrl.len); - max_len = info->ctrl.type == ARM_BREAKPOINT_EXECUTE ? 4 : - max_watchpoint_len; - - if (max_len >= 8) - offset = info->address & 0x7; - else - offset = info->address & 0x3; - - if (bytelen > (1 << ((max_len - (offset + 1)) >> 1))) { - ret = -EFBIG; - goto out; - } - - info->ctrl.len <<= offset; - info->address &= ~offset; - - pr_debug("breakpoint alignment fixup: length = 0x%x, " - "address = 0x%x\n", info->ctrl.len, info->address); + offset = info->address & alignment_mask; + switch (offset) { + case 0: + /* Aligned */ + break; + case 1: + /* Allow single byte watchpoint. */ + if (info->ctrl.len == ARM_BREAKPOINT_LEN_1) + break; + case 2: + /* Allow halfword watchpoints and breakpoints. */ + if (info->ctrl.len == ARM_BREAKPOINT_LEN_2) + break; + default: + ret = -EINVAL; + goto out; } + info->address &= ~alignment_mask; + info->ctrl.len <<= offset; + /* * Currently we rely on an overflow handler to take * care of single-stepping the breakpoint when it fires. @@ -607,7 +613,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_bps()), "overflow handler required but none found")) { ret = -EINVAL; - goto out; } out: return ret; -- cgit v1.1 From 7e20269647169e7ea08a62bdc4979a3ba32e615c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sun, 28 Nov 2010 14:57:24 +0000 Subject: ARM: hw_breakpoint: disable preemption during debug exception handling On ARM, debug exceptions occur in the form of data or prefetch aborts. One difference is that debug exceptions require access to per-cpu banked registers and data structures which are not saved in the low-level exception code. For kernels built with CONFIG_PREEMPT, there is an unlikely scenario that the debug handler ends up running on a different CPU from the one that originally signalled the event, resulting in random data being read from the wrong registers. This patch adds a debug_entry macro to the low-level exception handling code which checks whether the taken exception is a debug exception. If it is, the preempt count for the faulting process is incremented. After the debug handler has finished, the count is decremented. Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm/kernel/entry-armv.S | 4 ++++ arch/arm/kernel/entry-header.S | 19 +++++++++++++++++++ arch/arm/kernel/hw_breakpoint.c | 18 +++++++++++++----- 3 files changed, 36 insertions(+), 5 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index c09e357..34bbef0 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -198,6 +198,7 @@ __dabt_svc: @ @ set desired IRQ state, then call main handler @ + debug_entry r1 msr cpsr_c, r9 mov r2, sp bl do_DataAbort @@ -324,6 +325,7 @@ __pabt_svc: #else bl CPU_PABORT_HANDLER #endif + debug_entry r1 msr cpsr_c, r9 @ Maybe enable interrupts mov r2, sp @ regs bl do_PrefetchAbort @ call abort handler @@ -439,6 +441,7 @@ __dabt_usr: @ @ IRQs on, then call the main handler @ + debug_entry r1 enable_irq mov r2, sp adr lr, BSYM(ret_from_exception) @@ -703,6 +706,7 @@ __pabt_usr: #else bl CPU_PABORT_HANDLER #endif + debug_entry r1 enable_irq @ Enable interrupts mov r2, sp @ regs bl do_PrefetchAbort @ call abort handler diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index d93f976..ae94649 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -165,6 +165,25 @@ .endm #endif /* !CONFIG_THUMB2_KERNEL */ + @ + @ Debug exceptions are taken as prefetch or data aborts. + @ We must disable preemption during the handler so that + @ we can access the debug registers safely. + @ + .macro debug_entry, fsr +#if defined(CONFIG_HAVE_HW_BREAKPOINT) && defined(CONFIG_PREEMPT) + ldr r4, =0x40f @ mask out fsr.fs + and r5, r4, \fsr + cmp r5, #2 @ debug exception + bne 1f + get_thread_info r10 + ldr r6, [r10, #TI_PREEMPT] @ get preempt count + add r11, r6, #1 @ increment it + str r11, [r10, #TI_PREEMPT] +1: +#endif + .endm + /* * These are the registers used in the syscall handler, and allow us to * have in theory up to 7 arguments to a function - r0 to r6. diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index d37ed35..eeba380 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -24,6 +24,7 @@ #define pr_fmt(fmt) "hw-breakpoint: " fmt #include +#include #include #include #include @@ -736,14 +737,17 @@ unlock: /* * Called from either the Data Abort Handler [watchpoint] or the - * Prefetch Abort Handler [breakpoint]. + * Prefetch Abort Handler [breakpoint] with preemption disabled. */ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - int ret = 1; /* Unhandled fault. */ + int ret = 0; u32 dscr; + /* We must be called with preemption disabled. */ + WARN_ON(preemptible()); + /* We only handle watchpoints and hardware breakpoints. */ ARM_DBG_READ(c1, 0, dscr); @@ -758,11 +762,15 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, watchpoint_handler(addr, regs); break; default: - goto out; + ret = 1; /* Unhandled fault. */ } - ret = 0; -out: + /* + * Re-enable preemption after it was disabled in the + * low-level exception handling code. + */ + preempt_enable(); + return ret; } -- cgit v1.1 From 0017ff42ac37ff6aeb87d0b006c5d32b9a39f5fc Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sun, 28 Nov 2010 15:09:36 +0000 Subject: ARM: hw_breakpoint: don't advertise reserved breakpoints To permit handling of watchpoint exceptions without signalling a debugger, it is necessary to reserve breakpoint registers for in-kernel use only. This patch ensures that we record and subtract the number of reserved breakpoints from the number of usable breakpoint registers that we advertise to userspace via the ptrace API. Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 206 +++++++++++++++++++++++----------------- 1 file changed, 117 insertions(+), 89 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index eeba380..b16c456 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -45,6 +45,7 @@ static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]); /* Number of BRP/WRP registers on this CPU. */ static int core_num_brps; +static int core_num_reserved_brps; static int core_num_wrps; /* Debug architecture version. */ @@ -53,87 +54,6 @@ static u8 debug_arch; /* Maximum supported watchpoint length. */ static u8 max_watchpoint_len; -/* Determine number of BRP registers available. */ -static int get_num_brps(void) -{ - u32 didr; - ARM_DBG_READ(c0, 0, didr); - return ((didr >> 24) & 0xf) + 1; -} - -/* Determine number of WRP registers available. */ -static int get_num_wrps(void) -{ - /* - * FIXME: When a watchpoint fires, the only way to work out which - * watchpoint it was is by disassembling the faulting instruction - * and working out the address of the memory access. - * - * Furthermore, we can only do this if the watchpoint was precise - * since imprecise watchpoints prevent us from calculating register - * based addresses. - * - * For the time being, we only report 1 watchpoint register so we - * always know which watchpoint fired. In the future we can either - * add a disassembler and address generation emulator, or we can - * insert a check to see if the DFAR is set on watchpoint exception - * entry [the ARM ARM states that the DFAR is UNKNOWN, but - * experience shows that it is set on some implementations]. - */ - -#if 0 - u32 didr, wrps; - ARM_DBG_READ(c0, 0, didr); - return ((didr >> 28) & 0xf) + 1; -#endif - - return 1; -} - -int hw_breakpoint_slots(int type) -{ - /* - * We can be called early, so don't rely on - * our static variables being initialised. - */ - switch (type) { - case TYPE_INST: - return get_num_brps(); - case TYPE_DATA: - return get_num_wrps(); - default: - pr_warning("unknown slot type: %d\n", type); - return 0; - } -} - -/* Determine debug architecture. */ -static u8 get_debug_arch(void) -{ - u32 didr; - - /* Do we implement the extended CPUID interface? */ - if (((read_cpuid_id() >> 16) & 0xf) != 0xf) { - pr_warning("CPUID feature registers not supported. " - "Assuming v6 debug is present.\n"); - return ARM_DEBUG_ARCH_V6; - } - - ARM_DBG_READ(c0, 0, didr); - return (didr >> 16) & 0xf; -} - -/* Does this core support mismatch breakpoints? */ -static int core_has_mismatch_bps(void) -{ - return debug_arch >= ARM_DEBUG_ARCH_V7_ECP14 && core_num_brps > 1; -} - -u8 arch_get_debug_arch(void) -{ - return debug_arch; -} - #define READ_WB_REG_CASE(OP2, M, VAL) \ case ((OP2 << 4) + M): \ ARM_DBG_READ(c ## M, OP2, VAL); \ @@ -211,6 +131,111 @@ static void write_wb_reg(int n, u32 val) isb(); } +/* Determine debug architecture. */ +static u8 get_debug_arch(void) +{ + u32 didr; + + /* Do we implement the extended CPUID interface? */ + if (((read_cpuid_id() >> 16) & 0xf) != 0xf) { + pr_warning("CPUID feature registers not supported. " + "Assuming v6 debug is present.\n"); + return ARM_DEBUG_ARCH_V6; + } + + ARM_DBG_READ(c0, 0, didr); + return (didr >> 16) & 0xf; +} + +u8 arch_get_debug_arch(void) +{ + return debug_arch; +} + +/* Determine number of BRP register available. */ +static int get_num_brp_resources(void) +{ + u32 didr; + ARM_DBG_READ(c0, 0, didr); + return ((didr >> 24) & 0xf) + 1; +} + +/* Does this core support mismatch breakpoints? */ +static int core_has_mismatch_brps(void) +{ + return (get_debug_arch() >= ARM_DEBUG_ARCH_V7_ECP14 && + get_num_brp_resources() > 1); +} + +/* Determine number of usable WRPs available. */ +static int get_num_wrps(void) +{ + /* + * FIXME: When a watchpoint fires, the only way to work out which + * watchpoint it was is by disassembling the faulting instruction + * and working out the address of the memory access. + * + * Furthermore, we can only do this if the watchpoint was precise + * since imprecise watchpoints prevent us from calculating register + * based addresses. + * + * Providing we have more than 1 breakpoint register, we only report + * a single watchpoint register for the time being. This way, we always + * know which watchpoint fired. In the future we can either add a + * disassembler and address generation emulator, or we can insert a + * check to see if the DFAR is set on watchpoint exception entry + * [the ARM ARM states that the DFAR is UNKNOWN, but experience shows + * that it is set on some implementations]. + */ + +#if 0 + int wrps; + u32 didr; + ARM_DBG_READ(c0, 0, didr); + wrps = ((didr >> 28) & 0xf) + 1; +#endif + int wrps = 1; + + if (core_has_mismatch_brps() && wrps >= get_num_brp_resources()) + wrps = get_num_brp_resources() - 1; + + return wrps; +} + +/* We reserve one breakpoint for each watchpoint. */ +static int get_num_reserved_brps(void) +{ + if (core_has_mismatch_brps()) + return get_num_wrps(); + return 0; +} + +/* Determine number of usable BRPs available. */ +static int get_num_brps(void) +{ + int brps = get_num_brp_resources(); + if (core_has_mismatch_brps()) + brps -= get_num_reserved_brps(); + return brps; +} + +int hw_breakpoint_slots(int type) +{ + /* + * We can be called early, so don't rely on + * our static variables being initialised. + */ + switch (type) { + case TYPE_INST: + return get_num_brps(); + case TYPE_DATA: + return get_num_wrps(); + default: + pr_warning("unknown slot type: %d\n", type); + return 0; + } +} + /* * In order to access the breakpoint/watchpoint control registers, * we must be running in debug monitor mode. Unfortunately, we can @@ -326,7 +351,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) ctrl_base = ARM_BASE_BCR; val_base = ARM_BASE_BVR; slots = __get_cpu_var(bp_on_reg); - max_slots = core_num_brps - 1; + max_slots = core_num_brps; if (bp_is_single_step(bp)) { info->ctrl.mismatch = 1; @@ -377,7 +402,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) /* Breakpoint */ base = ARM_BASE_BCR; slots = __get_cpu_var(bp_on_reg); - max_slots = core_num_brps - 1; + max_slots = core_num_brps; if (bp_is_single_step(bp)) { i = max_slots; @@ -611,7 +636,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * we can use the mismatch feature as a poor-man's hardware single-step. */ if (WARN_ONCE(!bp->overflow_handler && - (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_bps()), + (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps()), "overflow handler required but none found")) { ret = -EINVAL; } @@ -698,7 +723,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) /* The exception entry code places the amended lr in the PC. */ addr = regs->ARM_pc; - for (i = 0; i < core_num_brps; ++i) { + for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) { rcu_read_lock(); bp = slots[i]; @@ -801,7 +826,8 @@ static void reset_ctrl_regs(void *unused) if (enable_monitor_mode()) return; - for (i = 0; i < core_num_brps; ++i) { + /* We must also reset any reserved registers. */ + for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) { write_wb_reg(ARM_BASE_BCR + i, 0UL); write_wb_reg(ARM_BASE_BVR + i, 0UL); } @@ -839,13 +865,15 @@ static int __init arch_hw_breakpoint_init(void) /* Determine how many BRPs/WRPs are available. */ core_num_brps = get_num_brps(); + core_num_reserved_brps = get_num_reserved_brps(); core_num_wrps = get_num_wrps(); pr_info("found %d breakpoint and %d watchpoint registers.\n", - core_num_brps, core_num_wrps); + core_num_brps + core_num_reserved_brps, core_num_wrps); - if (core_has_mismatch_bps()) - pr_info("1 breakpoint reserved for watchpoint single-step.\n"); + if (core_num_reserved_brps) + pr_info("%d breakpoint(s) reserved for watchpoint " + "single-step.\n", core_num_reserved_brps); ARM_DBG_READ(c1, 0, dscr); if (dscr & ARM_DSCR_HDBGEN) { -- cgit v1.1 From 93a04a3416da12647c47840ebe2bb812fcb801d0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Nov 2010 16:56:01 +0000 Subject: ARM: hw_breakpoint: do not allocate new breakpoints with preemption disabled The watchpoint single-stepping code calls register_user_hw_breakpoint to register a mismatch breakpoint for stepping over the watchpoint. This is performed with preemption disabled, which is unsafe as we may end up scheduling whilst in_atomic(). Furthermore, using the perf API is rather overkill since we are already in the hw-breakpoint backend and only require access to reserved breakpoints anyway. This patch reworks the watchpoint stepping code so that we don't require another perf_event for the mismatch breakpoint. Instead, we hold a separate arch_hw_breakpoint_ctrl struct inside the watchpoint which is used exclusively for stepping. We can check whether or not stepping is enabled when installing or uninstalling the watchpoint and operate on the breakpoint accordingly. Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 134 +++++++++++++++++++++++----------------- 1 file changed, 77 insertions(+), 57 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index b16c456..81b63e9 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -316,23 +316,6 @@ u8 arch_get_max_wp_len(void) } /* - * Handler for reactivating a suspended watchpoint when the single - * step `mismatch' breakpoint is triggered. - */ -static void wp_single_step_handler(struct perf_event *bp, int unused, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - perf_event_enable(counter_arch_bp(bp)->suspended_wp); - unregister_hw_breakpoint(bp); -} - -static int bp_is_single_step(struct perf_event *bp) -{ - return bp->overflow_handler == wp_single_step_handler; -} - -/* * Install a perf counter breakpoint. */ int arch_install_hw_breakpoint(struct perf_event *bp) @@ -340,29 +323,35 @@ int arch_install_hw_breakpoint(struct perf_event *bp) struct arch_hw_breakpoint *info = counter_arch_bp(bp); struct perf_event **slot, **slots; int i, max_slots, ctrl_base, val_base, ret = 0; + u32 addr, ctrl; /* Ensure that we are in monitor mode and halting mode is disabled. */ ret = enable_monitor_mode(); if (ret) goto out; + addr = info->address; + ctrl = encode_ctrl_reg(info->ctrl) | 0x1; + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { /* Breakpoint */ ctrl_base = ARM_BASE_BCR; val_base = ARM_BASE_BVR; slots = __get_cpu_var(bp_on_reg); max_slots = core_num_brps; - - if (bp_is_single_step(bp)) { - info->ctrl.mismatch = 1; - i = max_slots; - slots[i] = bp; - goto setup; - } } else { /* Watchpoint */ - ctrl_base = ARM_BASE_WCR; - val_base = ARM_BASE_WVR; + if (info->step_ctrl.enabled) { + /* Install into the reserved breakpoint region. */ + ctrl_base = ARM_BASE_BCR + core_num_brps; + val_base = ARM_BASE_BVR + core_num_brps; + /* Override the watchpoint data with the step data. */ + addr = info->trigger & ~0x3; + ctrl = encode_ctrl_reg(info->step_ctrl); + } else { + ctrl_base = ARM_BASE_WCR; + val_base = ARM_BASE_WVR; + } slots = __get_cpu_var(wp_on_reg); max_slots = core_num_wrps; } @@ -381,12 +370,11 @@ int arch_install_hw_breakpoint(struct perf_event *bp) goto out; } -setup: /* Setup the address register. */ - write_wb_reg(val_base + i, info->address); + write_wb_reg(val_base + i, addr); /* Setup the control register. */ - write_wb_reg(ctrl_base + i, encode_ctrl_reg(info->ctrl) | 0x1); + write_wb_reg(ctrl_base + i, ctrl); out: return ret; @@ -403,15 +391,12 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) base = ARM_BASE_BCR; slots = __get_cpu_var(bp_on_reg); max_slots = core_num_brps; - - if (bp_is_single_step(bp)) { - i = max_slots; - slots[i] = NULL; - goto reset; - } } else { /* Watchpoint */ - base = ARM_BASE_WCR; + if (info->step_ctrl.enabled) + base = ARM_BASE_BCR + core_num_brps; + else + base = ARM_BASE_WCR; slots = __get_cpu_var(wp_on_reg); max_slots = core_num_wrps; } @@ -429,7 +414,6 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) return; -reset: /* Reset the control register. */ write_wb_reg(base + i, 0); } @@ -579,7 +563,7 @@ static int arch_build_bp_info(struct perf_event *bp) /* Privilege */ info->ctrl.privilege = ARM_BREAKPOINT_USER; - if (arch_check_bp_in_kernelspace(bp) && !bp_is_single_step(bp)) + if (arch_check_bp_in_kernelspace(bp)) info->ctrl.privilege |= ARM_BREAKPOINT_PRIV; /* Enabled? */ @@ -664,22 +648,18 @@ static void update_mismatch_flag(int idx, int flag) static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) { int i; - struct perf_event *bp, **slots = __get_cpu_var(wp_on_reg); + struct perf_event *wp, **slots = __get_cpu_var(wp_on_reg); struct arch_hw_breakpoint *info; - struct perf_event_attr attr; /* Without a disassembler, we can only handle 1 watchpoint. */ BUG_ON(core_num_wrps > 1); - hw_breakpoint_init(&attr); - attr.bp_addr = regs->ARM_pc & ~0x3; - attr.bp_len = HW_BREAKPOINT_LEN_4; - attr.bp_type = HW_BREAKPOINT_X; - for (i = 0; i < core_num_wrps; ++i) { rcu_read_lock(); - if (slots[i] == NULL) { + wp = slots[i]; + + if (wp == NULL) { rcu_read_unlock(); continue; } @@ -689,24 +669,60 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) * single watchpoint, we can set the trigger to the lowest * possible faulting address. */ - info = counter_arch_bp(slots[i]); - info->trigger = slots[i]->attr.bp_addr; + info = counter_arch_bp(wp); + info->trigger = wp->attr.bp_addr; pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); - perf_bp_event(slots[i], regs); + perf_bp_event(wp, regs); /* * If no overflow handler is present, insert a temporary * mismatch breakpoint so we can single-step over the * watchpoint trigger. */ - if (!slots[i]->overflow_handler) { - bp = register_user_hw_breakpoint(&attr, - wp_single_step_handler, - current); - counter_arch_bp(bp)->suspended_wp = slots[i]; - perf_event_disable(slots[i]); + if (!wp->overflow_handler) { + arch_uninstall_hw_breakpoint(wp); + info->step_ctrl.mismatch = 1; + info->step_ctrl.len = ARM_BREAKPOINT_LEN_4; + info->step_ctrl.type = ARM_BREAKPOINT_EXECUTE; + info->step_ctrl.privilege = info->ctrl.privilege; + info->step_ctrl.enabled = 1; + info->trigger = regs->ARM_pc; + arch_install_hw_breakpoint(wp); + } + + rcu_read_unlock(); + } +} + +static void watchpoint_single_step_handler(unsigned long pc) +{ + int i; + struct perf_event *wp, **slots = __get_cpu_var(wp_on_reg); + struct arch_hw_breakpoint *info; + + for (i = 0; i < core_num_reserved_brps; ++i) { + rcu_read_lock(); + + wp = slots[i]; + + if (wp == NULL) + goto unlock; + + info = counter_arch_bp(wp); + if (!info->step_ctrl.enabled) + goto unlock; + + /* + * Restore the original watchpoint if we've completed the + * single-step. + */ + if (info->trigger != pc) { + arch_uninstall_hw_breakpoint(wp); + info->step_ctrl.enabled = 0; + arch_install_hw_breakpoint(wp); } +unlock: rcu_read_unlock(); } } @@ -723,7 +739,8 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) /* The exception entry code places the amended lr in the PC. */ addr = regs->ARM_pc; - for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) { + /* Check the currently installed breakpoints first. */ + for (i = 0; i < core_num_brps; ++i) { rcu_read_lock(); bp = slots[i]; @@ -750,7 +767,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) } unlock: - if ((mismatch && !info->ctrl.mismatch) || bp_is_single_step(bp)) { + if (mismatch && !info->ctrl.mismatch) { pr_debug("breakpoint fired: address = 0x%x\n", addr); perf_bp_event(bp, regs); } @@ -758,6 +775,9 @@ unlock: update_mismatch_flag(i, mismatch); rcu_read_unlock(); } + + /* Handle any pending watchpoint single-step breakpoints. */ + watchpoint_single_step_handler(addr); } /* -- cgit v1.1 From 9ebb3cbcc39d4e61ae6751167086acfb5c201e6f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 1 Dec 2010 14:12:13 +0000 Subject: ARM: hw_breakpoint: unify single-stepping code for watchpoints and breakpoints The single-stepping code is currently different depending on whether we are stepping over a breakpoint or a watchpoint. There is no good reason for this, so let's sort it out. This patch adds functions for enabling/disabling single-step for a particular hw_breakpoint and integrates this with the exception handling code. Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 81 ++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 41 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 81b63e9..36cd768 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -339,6 +339,11 @@ int arch_install_hw_breakpoint(struct perf_event *bp) val_base = ARM_BASE_BVR; slots = __get_cpu_var(bp_on_reg); max_slots = core_num_brps; + if (info->step_ctrl.enabled) { + /* Override the breakpoint data with the step data. */ + addr = info->trigger & ~0x3; + ctrl = encode_ctrl_reg(info->step_ctrl); + } } else { /* Watchpoint */ if (info->step_ctrl.enabled) { @@ -628,21 +633,28 @@ out: return ret; } -static void update_mismatch_flag(int idx, int flag) +/* + * Enable/disable single-stepping over the breakpoint bp at address addr. + */ +static void enable_single_step(struct perf_event *bp, u32 addr) { - struct perf_event *bp = __get_cpu_var(bp_on_reg[idx]); - struct arch_hw_breakpoint *info; - - if (bp == NULL) - return; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); - info = counter_arch_bp(bp); + arch_uninstall_hw_breakpoint(bp); + info->step_ctrl.mismatch = 1; + info->step_ctrl.len = ARM_BREAKPOINT_LEN_4; + info->step_ctrl.type = ARM_BREAKPOINT_EXECUTE; + info->step_ctrl.privilege = info->ctrl.privilege; + info->step_ctrl.enabled = 1; + info->trigger = addr; + arch_install_hw_breakpoint(bp); +} - /* Update the mismatch field to enter/exit `single-step' mode */ - if (!bp->overflow_handler && info->ctrl.mismatch != flag) { - info->ctrl.mismatch = flag; - write_wb_reg(ARM_BASE_BCR + idx, encode_ctrl_reg(info->ctrl) | 0x1); - } +static void disable_single_step(struct perf_event *bp) +{ + arch_uninstall_hw_breakpoint(bp); + counter_arch_bp(bp)->step_ctrl.enabled = 0; + arch_install_hw_breakpoint(bp); } static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) @@ -679,16 +691,8 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) * mismatch breakpoint so we can single-step over the * watchpoint trigger. */ - if (!wp->overflow_handler) { - arch_uninstall_hw_breakpoint(wp); - info->step_ctrl.mismatch = 1; - info->step_ctrl.len = ARM_BREAKPOINT_LEN_4; - info->step_ctrl.type = ARM_BREAKPOINT_EXECUTE; - info->step_ctrl.privilege = info->ctrl.privilege; - info->step_ctrl.enabled = 1; - info->trigger = regs->ARM_pc; - arch_install_hw_breakpoint(wp); - } + if (!wp->overflow_handler) + enable_single_step(wp, instruction_pointer(regs)); rcu_read_unlock(); } @@ -716,11 +720,8 @@ static void watchpoint_single_step_handler(unsigned long pc) * Restore the original watchpoint if we've completed the * single-step. */ - if (info->trigger != pc) { - arch_uninstall_hw_breakpoint(wp); - info->step_ctrl.enabled = 0; - arch_install_hw_breakpoint(wp); - } + if (info->trigger != pc) + disable_single_step(wp); unlock: rcu_read_unlock(); @@ -730,7 +731,6 @@ unlock: static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) { int i; - int mismatch; u32 ctrl_reg, val, addr; struct perf_event *bp, **slots = __get_cpu_var(bp_on_reg); struct arch_hw_breakpoint *info; @@ -745,34 +745,33 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) bp = slots[i]; - if (bp == NULL) { - rcu_read_unlock(); - continue; - } + if (bp == NULL) + goto unlock; - mismatch = 0; + info = counter_arch_bp(bp); /* Check if the breakpoint value matches. */ val = read_wb_reg(ARM_BASE_BVR + i); if (val != (addr & ~0x3)) - goto unlock; + goto mismatch; /* Possible match, check the byte address select to confirm. */ ctrl_reg = read_wb_reg(ARM_BASE_BCR + i); decode_ctrl_reg(ctrl_reg, &ctrl); if ((1 << (addr & 0x3)) & ctrl.len) { - mismatch = 1; - info = counter_arch_bp(bp); info->trigger = addr; - } - -unlock: - if (mismatch && !info->ctrl.mismatch) { pr_debug("breakpoint fired: address = 0x%x\n", addr); perf_bp_event(bp, regs); + if (!bp->overflow_handler) + enable_single_step(bp, addr); + goto unlock; } - update_mismatch_flag(i, mismatch); +mismatch: + /* If we're stepping a breakpoint, it can now be restored. */ + if (info->step_ctrl.enabled) + disable_single_step(bp); +unlock: rcu_read_unlock(); } -- cgit v1.1 From 3ce70b2e24cd35cc9f2df8cf5205b8ab4e6178e1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 1 Dec 2010 17:05:24 +0000 Subject: ARM: hw_breakpoint: disallow per-cpu breakpoints without overflow handler Single-stepping a breakpoint requires us to disable it temporarily so that we don't get stuck in a recursive debug trap. With per-cpu breakpoints this presents a problem where an interrupt can be taken before the single-step has completed and a new task is eventually scheduled. This new task will not hit the breakpoint because it will have been disabled during the previous handling code. This patch disallows per-cpu breakpoints on ARM when an overflow handler is not present. A similar effect can be created by placing breakpoints on a shell and then running applications there. Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 36cd768..eef1b1e 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -622,10 +622,12 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * Currently we rely on an overflow handler to take * care of single-stepping the breakpoint when it fires. * In the case of userspace breakpoints on a core with V7 debug, - * we can use the mismatch feature as a poor-man's hardware single-step. + * we can use the mismatch feature as a poor-man's hardware + * single-step, but this only works for per-task breakpoints. */ if (WARN_ONCE(!bp->overflow_handler && - (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps()), + (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps() + || !bp->hw.bp_target), "overflow handler required but none found")) { ret = -EINVAL; } -- cgit v1.1 From ce9b1b09520789223f72a9fefd5f0e329f8d89d0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 25 Nov 2010 12:59:31 +0000 Subject: ARM: ptrace: fix style issue with hw_breakpoint interface This patch fixes a trivial style issue in ptrace.c. Signed-off-by: Will Deacon --- arch/arm/kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 3e97483..19c6816 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -1060,8 +1060,8 @@ static int ptrace_sethbpregs(struct task_struct *tsk, long num, goto out; if ((gen_type & implied_type) != gen_type) { - ret = -EINVAL; - goto out; + ret = -EINVAL; + goto out; } attr.bp_len = gen_len; -- cgit v1.1 From 4a55c18e2023096c8684fae5fa1cfa96a03172ff Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Nov 2010 17:06:53 +0000 Subject: ARM: hw_breakpoint: fix warnings generated by sparse sparse doesn't like per-cpu accesses such as: static DEFINE_PER_CPU(struct perf_event *, foo[MAXLEN]); struct perf_event **bar = __get_cpu_var(foo); and shouts quite loudly about it: | warning: incorrect type in assignment (different modifiers) | expected struct perf_event **slots | got struct perf_event *[noderef] * This patch adds casts to these sorts of assignments in hw_breakpoint.c in order to silence the warnings. Reported-by: Russell King Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index eef1b1e..56ed9a6 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -337,7 +337,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) /* Breakpoint */ ctrl_base = ARM_BASE_BCR; val_base = ARM_BASE_BVR; - slots = __get_cpu_var(bp_on_reg); + slots = (struct perf_event **)__get_cpu_var(bp_on_reg); max_slots = core_num_brps; if (info->step_ctrl.enabled) { /* Override the breakpoint data with the step data. */ @@ -357,7 +357,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) ctrl_base = ARM_BASE_WCR; val_base = ARM_BASE_WVR; } - slots = __get_cpu_var(wp_on_reg); + slots = (struct perf_event **)__get_cpu_var(wp_on_reg); max_slots = core_num_wrps; } @@ -394,7 +394,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { /* Breakpoint */ base = ARM_BASE_BCR; - slots = __get_cpu_var(bp_on_reg); + slots = (struct perf_event **)__get_cpu_var(bp_on_reg); max_slots = core_num_brps; } else { /* Watchpoint */ @@ -402,7 +402,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) base = ARM_BASE_BCR + core_num_brps; else base = ARM_BASE_WCR; - slots = __get_cpu_var(wp_on_reg); + slots = (struct perf_event **)__get_cpu_var(wp_on_reg); max_slots = core_num_wrps; } @@ -662,9 +662,11 @@ static void disable_single_step(struct perf_event *bp) static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) { int i; - struct perf_event *wp, **slots = __get_cpu_var(wp_on_reg); + struct perf_event *wp, **slots; struct arch_hw_breakpoint *info; + slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + /* Without a disassembler, we can only handle 1 watchpoint. */ BUG_ON(core_num_wrps > 1); @@ -703,9 +705,11 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) static void watchpoint_single_step_handler(unsigned long pc) { int i; - struct perf_event *wp, **slots = __get_cpu_var(wp_on_reg); + struct perf_event *wp, **slots; struct arch_hw_breakpoint *info; + slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + for (i = 0; i < core_num_reserved_brps; ++i) { rcu_read_lock(); @@ -734,10 +738,12 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) { int i; u32 ctrl_reg, val, addr; - struct perf_event *bp, **slots = __get_cpu_var(bp_on_reg); + struct perf_event *bp, **slots; struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; + slots = (struct perf_event **)__get_cpu_var(bp_on_reg); + /* The exception entry code places the amended lr in the PC. */ addr = regs->ARM_pc; -- cgit v1.1 From 8fbf397c3389c1dedfa9ee412715046ab28fd82d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 1 Dec 2010 17:37:45 +0000 Subject: ARM: hw_breakpoint: do not fail initcall if monitor mode is disabled The debug registers can only be manipulated from software if monitor debug mode is enabled. On some cores, this can never be enabled (i.e. the corresponding bit in the DSCR is RAZ/WI). This patch ensures we can handle this hardware configuration and fail gracefully, rather than blow up the kernel during boot. Reported-by: Cyril Chemparathy Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 54 +++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 29 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 56ed9a6..c9f3f04 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -219,23 +219,6 @@ static int get_num_brps(void) return brps; } -int hw_breakpoint_slots(int type) -{ - /* - * We can be called early, so don't rely on - * our static variables being initialised. - */ - switch (type) { - case TYPE_INST: - return get_num_brps(); - case TYPE_DATA: - return get_num_wrps(); - default: - pr_warning("unknown slot type: %d\n", type); - return 0; - } -} - /* * In order to access the breakpoint/watchpoint control registers, * we must be running in debug monitor mode. Unfortunately, we can @@ -256,8 +239,12 @@ static int enable_monitor_mode(void) goto out; } + /* If monitor mode is already enabled, just return. */ + if (dscr & ARM_DSCR_MDBGEN) + goto out; + /* Write to the corresponding DSCR. */ - switch (debug_arch) { + switch (get_debug_arch()) { case ARM_DEBUG_ARCH_V6: case ARM_DEBUG_ARCH_V6_1: ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN)); @@ -272,15 +259,30 @@ static int enable_monitor_mode(void) /* Check that the write made it through. */ ARM_DBG_READ(c1, 0, dscr); - if (WARN_ONCE(!(dscr & ARM_DSCR_MDBGEN), - "failed to enable monitor mode.")) { + if (!(dscr & ARM_DSCR_MDBGEN)) ret = -EPERM; - } out: return ret; } +int hw_breakpoint_slots(int type) +{ + /* + * We can be called early, so don't rely on + * our static variables being initialised. + */ + switch (type) { + case TYPE_INST: + return get_num_brps(); + case TYPE_DATA: + return get_num_wrps(); + default: + pr_warning("unknown slot type: %d\n", type); + return 0; + } +} + /* * Check if 8-bit byte-address select is available. * This clobbers WRP 0. @@ -294,9 +296,6 @@ static u8 get_max_wp_len(void) if (debug_arch < ARM_DEBUG_ARCH_V7_ECP14) goto out; - if (enable_monitor_mode()) - goto out; - memset(&ctrl, 0, sizeof(ctrl)); ctrl.len = ARM_BREAKPOINT_LEN_8; ctrl_reg = encode_ctrl_reg(ctrl); @@ -879,15 +878,13 @@ static struct notifier_block __cpuinitdata dbg_reset_nb = { static int __init arch_hw_breakpoint_init(void) { - int ret = 0; u32 dscr; debug_arch = get_debug_arch(); if (debug_arch > ARM_DEBUG_ARCH_V7_ECP14) { pr_info("debug architecture 0x%x unsupported.\n", debug_arch); - ret = -ENODEV; - goto out; + return 0; } /* Determine how many BRPs/WRPs are available. */ @@ -928,8 +925,7 @@ static int __init arch_hw_breakpoint_init(void) /* Register hotplug notifier. */ register_cpu_notifier(&dbg_reset_nb); -out: - return ret; + return 0; } arch_initcall(arch_hw_breakpoint_init); -- cgit v1.1 From ef6c84454f8567d4968c210d7d194fb711ed3739 Mon Sep 17 00:00:00 2001 From: Haojian Zhuang Date: Wed, 24 Nov 2010 11:54:25 +0800 Subject: ARM: pxa: add iwmmx support for PJ4 iwmmxt is used in XScale, XScale3, Mohawk and PJ4 core. But the instructions of accessing CP0 and CP1 is changed in PJ4. Append more files to support iwmmxt in PJ4 core. Signed-off-by: Zhou Zhu Signed-off-by: Haojian Zhuang Acked-by: Nicolas Pitre Signed-off-by: Eric Miao --- arch/arm/kernel/Makefile | 1 + arch/arm/kernel/iwmmxt.S | 55 ++++++++++++++++++++------- arch/arm/kernel/pj4-cp0.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 137 insertions(+), 13 deletions(-) create mode 100644 arch/arm/kernel/pj4-cp0.c (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5b9b268..b0f11fa 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -50,6 +50,7 @@ AFLAGS_crunch-bits.o := -Wa,-mcpu=ep9312 obj-$(CONFIG_CPU_XSCALE) += xscale-cp0.o obj-$(CONFIG_CPU_XSC3) += xscale-cp0.o obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o +obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_CPU_HAS_PMU) += pmu.o obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S index b63b528..7fa3bb0 100644 --- a/arch/arm/kernel/iwmmxt.S +++ b/arch/arm/kernel/iwmmxt.S @@ -19,6 +19,14 @@ #include #include +#if defined(CONFIG_CPU_PJ4) +#define PJ4(code...) code +#define XSC(code...) +#else +#define PJ4(code...) +#define XSC(code...) code +#endif + #define MMX_WR0 (0x00) #define MMX_WR1 (0x08) #define MMX_WR2 (0x10) @@ -58,11 +66,17 @@ ENTRY(iwmmxt_task_enable) - mrc p15, 0, r2, c15, c1, 0 - tst r2, #0x3 @ CP0 and CP1 accessible? + XSC(mrc p15, 0, r2, c15, c1, 0) + PJ4(mrc p15, 0, r2, c1, c0, 2) + @ CP0 and CP1 accessible? + XSC(tst r2, #0x3) + PJ4(tst r2, #0xf) movne pc, lr @ if so no business here - orr r2, r2, #0x3 @ enable access to CP0 and CP1 - mcr p15, 0, r2, c15, c1, 0 + @ enable access to CP0 and CP1 + XSC(orr r2, r2, #0x3) + XSC(mcr p15, 0, r2, c15, c1, 0) + PJ4(orr r2, r2, #0xf) + PJ4(mcr p15, 0, r2, c1, c0, 2) ldr r3, =concan_owner add r0, r10, #TI_IWMMXT_STATE @ get task Concan save area @@ -179,17 +193,26 @@ ENTRY(iwmmxt_task_disable) teqne r1, r2 @ or specified one? bne 1f @ no: quit - mrc p15, 0, r4, c15, c1, 0 - orr r4, r4, #0x3 @ enable access to CP0 and CP1 - mcr p15, 0, r4, c15, c1, 0 + @ enable access to CP0 and CP1 + XSC(mrc p15, 0, r4, c15, c1, 0) + XSC(orr r4, r4, #0xf) + XSC(mcr p15, 0, r4, c15, c1, 0) + PJ4(mrc p15, 0, r4, c1, c0, 2) + PJ4(orr r4, r4, #0x3) + PJ4(mcr p15, 0, r4, c1, c0, 2) + mov r0, #0 @ nothing to load str r0, [r3] @ no more current owner mrc p15, 0, r2, c2, c0, 0 mov r2, r2 @ cpwait bl concan_save - bic r4, r4, #0x3 @ disable access to CP0 and CP1 - mcr p15, 0, r4, c15, c1, 0 + @ disable access to CP0 and CP1 + XSC(bic r4, r4, #0x3) + XSC(mcr p15, 0, r4, c15, c1, 0) + PJ4(bic r4, r4, #0xf) + PJ4(mcr p15, 0, r4, c1, c0, 2) + mrc p15, 0, r2, c2, c0, 0 mov r2, r2 @ cpwait @@ -277,8 +300,11 @@ ENTRY(iwmmxt_task_restore) */ ENTRY(iwmmxt_task_switch) - mrc p15, 0, r1, c15, c1, 0 - tst r1, #0x3 @ CP0 and CP1 accessible? + XSC(mrc p15, 0, r1, c15, c1, 0) + PJ4(mrc p15, 0, r1, c1, c0, 2) + @ CP0 and CP1 accessible? + XSC(tst r1, #0x3) + PJ4(tst r1, #0xf) bne 1f @ yes: block them for next task ldr r2, =concan_owner @@ -287,8 +313,11 @@ ENTRY(iwmmxt_task_switch) teq r2, r3 @ next task owns it? movne pc, lr @ no: leave Concan disabled -1: eor r1, r1, #3 @ flip Concan access - mcr p15, 0, r1, c15, c1, 0 +1: @ flip Conan access + XSC(eor r1, r1, #0x3) + XSC(mcr p15, 0, r1, c15, c1, 0) + PJ4(eor r1, r1, #0xf) + PJ4(mcr p15, 0, r1, c1, c0, 2) mrc p15, 0, r1, c2, c0, 0 sub pc, lr, r1, lsr #32 @ cpwait and return diff --git a/arch/arm/kernel/pj4-cp0.c b/arch/arm/kernel/pj4-cp0.c new file mode 100644 index 0000000..a4b1b07 --- /dev/null +++ b/arch/arm/kernel/pj4-cp0.c @@ -0,0 +1,94 @@ +/* + * linux/arch/arm/kernel/pj4-cp0.c + * + * PJ4 iWMMXt coprocessor context switching and handling + * + * Copyright (c) 2010 Marvell International Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static int iwmmxt_do(struct notifier_block *self, unsigned long cmd, void *t) +{ + struct thread_info *thread = t; + + switch (cmd) { + case THREAD_NOTIFY_FLUSH: + /* + * flush_thread() zeroes thread->fpstate, so no need + * to do anything here. + * + * FALLTHROUGH: Ensure we don't try to overwrite our newly + * initialised state information on the first fault. + */ + + case THREAD_NOTIFY_EXIT: + iwmmxt_task_release(thread); + break; + + case THREAD_NOTIFY_SWITCH: + iwmmxt_task_switch(thread); + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block iwmmxt_notifier_block = { + .notifier_call = iwmmxt_do, +}; + + +static u32 __init pj4_cp_access_read(void) +{ + u32 value; + + __asm__ __volatile__ ( + "mrc p15, 0, %0, c1, c0, 2\n\t" + : "=r" (value)); + return value; +} + +static void __init pj4_cp_access_write(u32 value) +{ + u32 temp; + + __asm__ __volatile__ ( + "mcr p15, 0, %1, c1, c0, 2\n\t" + "mrc p15, 0, %0, c1, c0, 2\n\t" + "mov %0, %0\n\t" + "sub pc, pc, #4\n\t" + : "=r" (temp) : "r" (value)); +} + + +/* + * Disable CP0/CP1 on boot, and let call_fpe() and the iWMMXt lazy + * switch code handle iWMMXt context switching. + */ +static int __init pj4_cp0_init(void) +{ + u32 cp_access; + + cp_access = pj4_cp_access_read() & ~0xf; + pj4_cp_access_write(cp_access); + + printk(KERN_INFO "PJ4 iWMMXt coprocessor enabled.\n"); + elf_hwcap |= HWCAP_IWMMXT; + thread_register_notifier(&iwmmxt_notifier_block); + + return 0; +} + +late_initcall(pj4_cp0_init); -- cgit v1.1