diff options
author | Arnd Bergmann <arnd@arndb.de> | 2014-11-20 13:49:52 +0100 |
---|---|---|
committer | Arnd Bergmann <arnd@arndb.de> | 2014-11-20 13:49:52 +0100 |
commit | b9e0e5a9e075575cc47940da8271d4908d3ae9c3 (patch) | |
tree | 1ad6cafc584265817fc7ff772dc6c27745c7e55d /arch/arm/kernel | |
parent | c3e6dc65f2ce83dacc0a18104bf44931e7eb8a5d (diff) | |
parent | af66abfe2ec8bd82211e9e4f036a64c902ff4cdb (diff) | |
download | op-kernel-dev-b9e0e5a9e075575cc47940da8271d4908d3ae9c3.zip op-kernel-dev-b9e0e5a9e075575cc47940da8271d4908d3ae9c3.tar.gz |
Merge tag 'arm-perf-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into next/drivers
Pull "ARM: perf: updates for 3.19" from Will Deacon:
This patch series takes us slightly further on the road to big.LITTLE
support in perf. The main change enabling this is moving the CCI PMU
driver away from the arm-pmu abstraction, allowing the arch code to
focus specifically on support for CPU PMUs.
* tag 'arm-perf-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux:
arm: perf: fold hotplug notifier into arm_pmu
arm: perf: dynamically allocate cpu hardware data
arm: perf: fold percpu_pmu into pmu_hw_events
arm: perf: kill get_hw_events()
arm: perf: limit size of accounting data
arm: perf: use IDR types for CPU PMUs
arm: perf: make PMU probing data-driven
arm: perf: add missing pr_info newlines
arm: perf: factor out callchain code
ARM: perf: use pr_* instead of printk
ARM: perf: remove useless return and check of idx in counter handling
bus: cci: move away from arm_pmu framework
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Diffstat (limited to 'arch/arm/kernel')
-rw-r--r-- | arch/arm/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/arm/kernel/perf_callchain.c | 136 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event.c | 162 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_cpu.c | 181 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_v6.c | 12 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_v7.c | 72 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_xscale.c | 20 |
7 files changed, 294 insertions, 291 deletions
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 38ddd9f..8dcbed5 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -82,7 +82,7 @@ obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o -obj-$(CONFIG_PERF_EVENTS) += perf_regs.o +obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c new file mode 100644 index 0000000..4e02ae5 --- /dev/null +++ b/arch/arm/kernel/perf_callchain.c @@ -0,0 +1,136 @@ +/* + * ARM callchain support + * + * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles + * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> + * + * This code is based on the ARM OProfile backtrace code. + */ +#include <linux/perf_event.h> +#include <linux/uaccess.h> + +#include <asm/stacktrace.h> + +/* + * The registers we're interested in are at the end of the variable + * length saved register structure. The fp points at the end of this + * structure so the address of this struct is: + * (struct frame_tail *)(xxx->fp)-1 + * + * This code has been adapted from the ARM OProfile support. + */ +struct frame_tail { + struct frame_tail __user *fp; + unsigned long sp; + unsigned long lr; +} __attribute__((packed)); + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static struct frame_tail __user * +user_backtrace(struct frame_tail __user *tail, + struct perf_callchain_entry *entry) +{ + struct frame_tail buftail; + unsigned long err; + + if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + perf_callchain_store(entry, buftail.lr); + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail + 1 >= buftail.fp) + return NULL; + + return buftail.fp - 1; +} + +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) +{ + struct frame_tail __user *tail; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + + perf_callchain_store(entry, regs->ARM_pc); + + if (!current->mm) + return; + + tail = (struct frame_tail __user *)regs->ARM_fp - 1; + + while ((entry->nr < PERF_MAX_STACK_DEPTH) && + tail && !((unsigned long)tail & 0x3)) + tail = user_backtrace(tail, entry); +} + +/* + * Gets called by walk_stackframe() for every stackframe. This will be called + * whist unwinding the stackframe and is like a subroutine return so we use + * the PC. + */ +static int +callchain_trace(struct stackframe *fr, + void *data) +{ + struct perf_callchain_entry *entry = data; + perf_callchain_store(entry, fr->pc); + return 0; +} + +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) +{ + struct stackframe fr; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + + arm_get_current_stackframe(regs, &fr); + walk_stackframe(&fr, callchain_trace, entry); +} + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + return perf_guest_cbs->get_guest_ip(); + + return instruction_pointer(regs); +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + int misc = 0; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_cbs->is_user_mode()) + misc |= PERF_RECORD_MISC_GUEST_USER; + else + misc |= PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (user_mode(regs)) + misc |= PERF_RECORD_MISC_USER; + else + misc |= PERF_RECORD_MISC_KERNEL; + } + + return misc; +} diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 266cba4..e34934f 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -7,21 +7,18 @@ * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> * * This code is based on the sparc64 perf event code, which is in turn based - * on the x86 code. Callchain code is based on the ARM OProfile backtrace - * code. + * on the x86 code. */ #define pr_fmt(fmt) "hw perfevents: " fmt #include <linux/kernel.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> -#include <linux/uaccess.h> #include <linux/irq.h> #include <linux/irqdesc.h> #include <asm/irq_regs.h> #include <asm/pmu.h> -#include <asm/stacktrace.h> static int armpmu_map_cache_event(const unsigned (*cache_map) @@ -80,8 +77,12 @@ armpmu_map_event(struct perf_event *event, u32 raw_event_mask) { u64 config = event->attr.config; + int type = event->attr.type; - switch (event->attr.type) { + if (type == event->pmu->type) + return armpmu_map_raw_event(raw_event_mask, config); + + switch (type) { case PERF_TYPE_HARDWARE: return armpmu_map_hw_event(event_map, config); case PERF_TYPE_HW_CACHE: @@ -200,7 +201,7 @@ static void armpmu_del(struct perf_event *event, int flags) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *hw_events = armpmu->get_hw_events(); + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -217,7 +218,7 @@ static int armpmu_add(struct perf_event *event, int flags) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *hw_events = armpmu->get_hw_events(); + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); struct hw_perf_event *hwc = &event->hw; int idx; int err = 0; @@ -274,14 +275,12 @@ validate_group(struct perf_event *event) { struct perf_event *sibling, *leader = event->group_leader; struct pmu_hw_events fake_pmu; - DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS); /* * Initialise the fake PMU. We only need to populate the * used_mask for the purposes of validation. */ - memset(fake_used_mask, 0, sizeof(fake_used_mask)); - fake_pmu.used_mask = fake_used_mask; + memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask)); if (!validate_event(&fake_pmu, leader)) return -EINVAL; @@ -305,17 +304,21 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) int ret; u64 start_clock, finish_clock; - if (irq_is_percpu(irq)) - dev = *(void **)dev; - armpmu = dev; + /* + * we request the IRQ with a (possibly percpu) struct arm_pmu**, but + * the handlers expect a struct arm_pmu*. The percpu_irq framework will + * do any necessary shifting, we just need to perform the first + * dereference. + */ + armpmu = *(void **)dev; plat_device = armpmu->plat_device; plat = dev_get_platdata(&plat_device->dev); start_clock = sched_clock(); if (plat && plat->handle_irq) - ret = plat->handle_irq(irq, dev, armpmu->handle_irq); + ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq); else - ret = armpmu->handle_irq(irq, dev); + ret = armpmu->handle_irq(irq, armpmu); finish_clock = sched_clock(); perf_sample_event_took(finish_clock - start_clock); @@ -468,7 +471,7 @@ static int armpmu_event_init(struct perf_event *event) static void armpmu_enable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); - struct pmu_hw_events *hw_events = armpmu->get_hw_events(); + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); if (enabled) @@ -533,130 +536,3 @@ int armpmu_register(struct arm_pmu *armpmu, int type) return perf_pmu_register(&armpmu->pmu, armpmu->name, type); } -/* - * Callchain handling code. - */ - -/* - * The registers we're interested in are at the end of the variable - * length saved register structure. The fp points at the end of this - * structure so the address of this struct is: - * (struct frame_tail *)(xxx->fp)-1 - * - * This code has been adapted from the ARM OProfile support. - */ -struct frame_tail { - struct frame_tail __user *fp; - unsigned long sp; - unsigned long lr; -} __attribute__((packed)); - -/* - * Get the return address for a single stackframe and return a pointer to the - * next frame tail. - */ -static struct frame_tail __user * -user_backtrace(struct frame_tail __user *tail, - struct perf_callchain_entry *entry) -{ - struct frame_tail buftail; - unsigned long err; - - if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) - return NULL; - - pagefault_disable(); - err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); - pagefault_enable(); - - if (err) - return NULL; - - perf_callchain_store(entry, buftail.lr); - - /* - * Frame pointers should strictly progress back up the stack - * (towards higher addresses). - */ - if (tail + 1 >= buftail.fp) - return NULL; - - return buftail.fp - 1; -} - -void -perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) -{ - struct frame_tail __user *tail; - - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* We don't support guest os callchain now */ - return; - } - - perf_callchain_store(entry, regs->ARM_pc); - - if (!current->mm) - return; - - tail = (struct frame_tail __user *)regs->ARM_fp - 1; - - while ((entry->nr < PERF_MAX_STACK_DEPTH) && - tail && !((unsigned long)tail & 0x3)) - tail = user_backtrace(tail, entry); -} - -/* - * Gets called by walk_stackframe() for every stackframe. This will be called - * whist unwinding the stackframe and is like a subroutine return so we use - * the PC. - */ -static int -callchain_trace(struct stackframe *fr, - void *data) -{ - struct perf_callchain_entry *entry = data; - perf_callchain_store(entry, fr->pc); - return 0; -} - -void -perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) -{ - struct stackframe fr; - - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* We don't support guest os callchain now */ - return; - } - - arm_get_current_stackframe(regs, &fr); - walk_stackframe(&fr, callchain_trace, entry); -} - -unsigned long perf_instruction_pointer(struct pt_regs *regs) -{ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); - - return instruction_pointer(regs); -} - -unsigned long perf_misc_flags(struct pt_regs *regs) -{ - int misc = 0; - - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) - misc |= PERF_RECORD_MISC_GUEST_USER; - else - misc |= PERF_RECORD_MISC_GUEST_KERNEL; - } else { - if (user_mode(regs)) - misc |= PERF_RECORD_MISC_USER; - else - misc |= PERF_RECORD_MISC_KERNEL; - } - - return misc; -} diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index eb2c4d55..dd9acc9 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -35,11 +35,6 @@ /* Set at runtime when we know what CPU type we are. */ static struct arm_pmu *cpu_pmu; -static DEFINE_PER_CPU(struct arm_pmu *, percpu_pmu); -static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events); -static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask); -static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); - /* * Despite the names, these two functions are CPU-specific and are used * by the OProfile/perf code. @@ -69,11 +64,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters); #include "perf_event_v6.c" #include "perf_event_v7.c" -static struct pmu_hw_events *cpu_pmu_get_cpu_events(void) -{ - return this_cpu_ptr(&cpu_hw_events); -} - static void cpu_pmu_enable_percpu_irq(void *data) { int irq = *(int *)data; @@ -92,20 +82,21 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) { int i, irq, irqs; struct platform_device *pmu_device = cpu_pmu->plat_device; + struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; irqs = min(pmu_device->num_resources, num_possible_cpus()); irq = platform_get_irq(pmu_device, 0); if (irq >= 0 && irq_is_percpu(irq)) { on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); - free_percpu_irq(irq, &percpu_pmu); + free_percpu_irq(irq, &hw_events->percpu_pmu); } else { for (i = 0; i < irqs; ++i) { if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs)) continue; irq = platform_get_irq(pmu_device, i); if (irq >= 0) - free_irq(irq, cpu_pmu); + free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, i)); } } } @@ -114,19 +105,21 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) { int i, err, irq, irqs; struct platform_device *pmu_device = cpu_pmu->plat_device; + struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; if (!pmu_device) return -ENODEV; irqs = min(pmu_device->num_resources, num_possible_cpus()); if (irqs < 1) { - printk_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); + pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); return 0; } irq = platform_get_irq(pmu_device, 0); if (irq >= 0 && irq_is_percpu(irq)) { - err = request_percpu_irq(irq, handler, "arm-pmu", &percpu_pmu); + err = request_percpu_irq(irq, handler, "arm-pmu", + &hw_events->percpu_pmu); if (err) { pr_err("unable to request IRQ%d for ARM PMU counters\n", irq); @@ -153,7 +146,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) err = request_irq(irq, handler, IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", - cpu_pmu); + per_cpu_ptr(&hw_events->percpu_pmu, i)); if (err) { pr_err("unable to request IRQ%d for ARM PMU counters\n", irq); @@ -167,18 +160,50 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) return 0; } -static void cpu_pmu_init(struct arm_pmu *cpu_pmu) +/* + * PMU hardware loses all context when a CPU goes offline. + * When a CPU is hotplugged back in, since some hardware registers are + * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading + * junk values out of them. + */ +static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, + void *hcpu) +{ + struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); + + if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) + return NOTIFY_DONE; + + if (pmu->reset) + pmu->reset(pmu); + else + return NOTIFY_DONE; + + return NOTIFY_OK; +} + +static int cpu_pmu_init(struct arm_pmu *cpu_pmu) { + int err; int cpu; + struct pmu_hw_events __percpu *cpu_hw_events; + + cpu_hw_events = alloc_percpu(struct pmu_hw_events); + if (!cpu_hw_events) + return -ENOMEM; + + cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; + err = register_cpu_notifier(&cpu_pmu->hotplug_nb); + if (err) + goto out_hw_events; + for_each_possible_cpu(cpu) { - struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); - events->events = per_cpu(hw_events, cpu); - events->used_mask = per_cpu(used_mask, cpu); + struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); raw_spin_lock_init(&events->pmu_lock); - per_cpu(percpu_pmu, cpu) = cpu_pmu; + events->percpu_pmu = cpu_pmu; } - cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events; + cpu_pmu->hw_events = cpu_hw_events; cpu_pmu->request_irq = cpu_pmu_request_irq; cpu_pmu->free_irq = cpu_pmu_free_irq; @@ -189,31 +214,19 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu) /* If no interrupts available, set the corresponding capability flag */ if (!platform_get_irq(cpu_pmu->plat_device, 0)) cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; -} - -/* - * PMU hardware loses all context when a CPU goes offline. - * When a CPU is hotplugged back in, since some hardware registers are - * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading - * junk values out of them. - */ -static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, - void *hcpu) -{ - if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) - return NOTIFY_DONE; - if (cpu_pmu && cpu_pmu->reset) - cpu_pmu->reset(cpu_pmu); - else - return NOTIFY_DONE; + return 0; - return NOTIFY_OK; +out_hw_events: + free_percpu(cpu_hw_events); + return err; } -static struct notifier_block cpu_pmu_hotplug_notifier = { - .notifier_call = cpu_pmu_notify, -}; +static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) +{ + unregister_cpu_notifier(&cpu_pmu->hotplug_nb); + free_percpu(cpu_pmu->hw_events); +} /* * PMU platform driver and devicetree bindings. @@ -241,48 +254,34 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = { {}, }; +static const struct pmu_probe_info pmu_probe_table[] = { + ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init), + XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init), + XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init), + { /* sentinel value */ } +}; + /* * CPU PMU identification and probing. */ static int probe_current_pmu(struct arm_pmu *pmu) { int cpu = get_cpu(); + unsigned int cpuid = read_cpuid_id(); int ret = -ENODEV; + const struct pmu_probe_info *info; pr_info("probing PMU on CPU %d\n", cpu); - switch (read_cpuid_part()) { - /* ARM Ltd CPUs. */ - case ARM_CPU_PART_ARM1136: - ret = armv6_1136_pmu_init(pmu); - break; - case ARM_CPU_PART_ARM1156: - ret = armv6_1156_pmu_init(pmu); - break; - case ARM_CPU_PART_ARM1176: - ret = armv6_1176_pmu_init(pmu); - break; - case ARM_CPU_PART_ARM11MPCORE: - ret = armv6mpcore_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A8: - ret = armv7_a8_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A9: - ret = armv7_a9_pmu_init(pmu); - break; - - default: - if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) { - switch (xscale_cpu_arch_version()) { - case ARM_CPU_XSCALE_ARCH_V1: - ret = xscale1pmu_init(pmu); - break; - case ARM_CPU_XSCALE_ARCH_V2: - ret = xscale2pmu_init(pmu); - break; - } - } + for (info = pmu_probe_table; info->init != NULL; info++) { + if ((cpuid & info->mask) != info->cpuid) + continue; + ret = info->init(pmu); break; } @@ -299,13 +298,13 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) int ret = -ENODEV; if (cpu_pmu) { - pr_info("attempt to register multiple PMU devices!"); + pr_info("attempt to register multiple PMU devices!\n"); return -ENOSPC; } pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); if (!pmu) { - pr_info("failed to allocate PMU device!"); + pr_info("failed to allocate PMU device!\n"); return -ENOMEM; } @@ -320,18 +319,24 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) } if (ret) { - pr_info("failed to probe PMU!"); + pr_info("failed to probe PMU!\n"); goto out_free; } - cpu_pmu_init(cpu_pmu); - ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW); + ret = cpu_pmu_init(cpu_pmu); + if (ret) + goto out_free; - if (!ret) - return 0; + ret = armpmu_register(cpu_pmu, -1); + if (ret) + goto out_destroy; + return 0; + +out_destroy: + cpu_pmu_destroy(cpu_pmu); out_free: - pr_info("failed to register PMU devices!"); + pr_info("failed to register PMU devices!\n"); kfree(pmu); return ret; } @@ -348,16 +353,6 @@ static struct platform_driver cpu_pmu_driver = { static int __init register_pmu_driver(void) { - int err; - - err = register_cpu_notifier(&cpu_pmu_hotplug_notifier); - if (err) - return err; - - err = platform_driver_register(&cpu_pmu_driver); - if (err) - unregister_cpu_notifier(&cpu_pmu_hotplug_notifier); - - return err; + return platform_driver_register(&cpu_pmu_driver); } device_initcall(register_pmu_driver); diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index abfeb04..f2ffd5c 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -262,7 +262,7 @@ static void armv6pmu_enable_event(struct perf_event *event) unsigned long val, mask, evt, flags; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); int idx = hwc->idx; if (ARMV6_CYCLE_COUNTER == idx) { @@ -300,7 +300,7 @@ armv6pmu_handle_irq(int irq_num, unsigned long pmcr = armv6_pmcr_read(); struct perf_sample_data data; struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; - struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; @@ -356,7 +356,7 @@ armv6pmu_handle_irq(int irq_num, static void armv6pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); raw_spin_lock_irqsave(&events->pmu_lock, flags); val = armv6_pmcr_read(); @@ -368,7 +368,7 @@ static void armv6pmu_start(struct arm_pmu *cpu_pmu) static void armv6pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); raw_spin_lock_irqsave(&events->pmu_lock, flags); val = armv6_pmcr_read(); @@ -409,7 +409,7 @@ static void armv6pmu_disable_event(struct perf_event *event) unsigned long val, mask, evt, flags; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); int idx = hwc->idx; if (ARMV6_CYCLE_COUNTER == idx) { @@ -444,7 +444,7 @@ static void armv6mpcore_pmu_disable_event(struct perf_event *event) unsigned long val, mask, flags, evt = 0; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); int idx = hwc->idx; if (ARMV6_CYCLE_COUNTER == idx) { diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 116758b..8993770 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -564,13 +564,11 @@ static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx) return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx)); } -static inline int armv7_pmnc_select_counter(int idx) +static inline void armv7_pmnc_select_counter(int idx) { u32 counter = ARMV7_IDX_TO_COUNTER(idx); asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter)); isb(); - - return idx; } static inline u32 armv7pmu_read_counter(struct perf_event *event) @@ -580,13 +578,15 @@ static inline u32 armv7pmu_read_counter(struct perf_event *event) int idx = hwc->idx; u32 value = 0; - if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { pr_err("CPU%u reading wrong counter %d\n", smp_processor_id(), idx); - else if (idx == ARMV7_IDX_CYCLE_COUNTER) + } else if (idx == ARMV7_IDX_CYCLE_COUNTER) { asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); - else if (armv7_pmnc_select_counter(idx) == idx) + } else { + armv7_pmnc_select_counter(idx); asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value)); + } return value; } @@ -597,45 +597,43 @@ static inline void armv7pmu_write_counter(struct perf_event *event, u32 value) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { pr_err("CPU%u writing wrong counter %d\n", smp_processor_id(), idx); - else if (idx == ARMV7_IDX_CYCLE_COUNTER) + } else if (idx == ARMV7_IDX_CYCLE_COUNTER) { asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); - else if (armv7_pmnc_select_counter(idx) == idx) + } else { + armv7_pmnc_select_counter(idx); asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value)); + } } static inline void armv7_pmnc_write_evtsel(int idx, u32 val) { - if (armv7_pmnc_select_counter(idx) == idx) { - val &= ARMV7_EVTYPE_MASK; - asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); - } + armv7_pmnc_select_counter(idx); + val &= ARMV7_EVTYPE_MASK; + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); } -static inline int armv7_pmnc_enable_counter(int idx) +static inline void armv7_pmnc_enable_counter(int idx) { u32 counter = ARMV7_IDX_TO_COUNTER(idx); asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter))); - return idx; } -static inline int armv7_pmnc_disable_counter(int idx) +static inline void armv7_pmnc_disable_counter(int idx) { u32 counter = ARMV7_IDX_TO_COUNTER(idx); asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter))); - return idx; } -static inline int armv7_pmnc_enable_intens(int idx) +static inline void armv7_pmnc_enable_intens(int idx) { u32 counter = ARMV7_IDX_TO_COUNTER(idx); asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter))); - return idx; } -static inline int armv7_pmnc_disable_intens(int idx) +static inline void armv7_pmnc_disable_intens(int idx) { u32 counter = ARMV7_IDX_TO_COUNTER(idx); asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter))); @@ -643,8 +641,6 @@ static inline int armv7_pmnc_disable_intens(int idx) /* Clear the overflow flag in case an interrupt is pending. */ asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter))); isb(); - - return idx; } static inline u32 armv7_pmnc_getreset_flags(void) @@ -667,34 +663,34 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu) u32 val; unsigned int cnt; - printk(KERN_INFO "PMNC registers dump:\n"); + pr_info("PMNC registers dump:\n"); asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); - printk(KERN_INFO "PMNC =0x%08x\n", val); + pr_info("PMNC =0x%08x\n", val); asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); - printk(KERN_INFO "CNTENS=0x%08x\n", val); + pr_info("CNTENS=0x%08x\n", val); asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); - printk(KERN_INFO "INTENS=0x%08x\n", val); + pr_info("INTENS=0x%08x\n", val); asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - printk(KERN_INFO "FLAGS =0x%08x\n", val); + pr_info("FLAGS =0x%08x\n", val); asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); - printk(KERN_INFO "SELECT=0x%08x\n", val); + pr_info("SELECT=0x%08x\n", val); asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); - printk(KERN_INFO "CCNT =0x%08x\n", val); + pr_info("CCNT =0x%08x\n", val); for (cnt = ARMV7_IDX_COUNTER0; cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { armv7_pmnc_select_counter(cnt); asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); - printk(KERN_INFO "CNT[%d] count =0x%08x\n", + pr_info("CNT[%d] count =0x%08x\n", ARMV7_IDX_TO_COUNTER(cnt), val); asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); - printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", + pr_info("CNT[%d] evtsel=0x%08x\n", ARMV7_IDX_TO_COUNTER(cnt), val); } } @@ -705,7 +701,7 @@ static void armv7pmu_enable_event(struct perf_event *event) unsigned long flags; struct hw_perf_event *hwc = &event->hw; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); int idx = hwc->idx; if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { @@ -751,7 +747,7 @@ static void armv7pmu_disable_event(struct perf_event *event) unsigned long flags; struct hw_perf_event *hwc = &event->hw; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); int idx = hwc->idx; if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { @@ -783,7 +779,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) u32 pmnc; struct perf_sample_data data; struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; - struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; @@ -843,7 +839,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) static void armv7pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Enable all counters */ @@ -854,7 +850,7 @@ static void armv7pmu_start(struct arm_pmu *cpu_pmu) static void armv7pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Disable all counters */ @@ -1287,7 +1283,7 @@ static void krait_pmu_disable_event(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); /* Disable counter and interrupt */ raw_spin_lock_irqsave(&events->pmu_lock, flags); @@ -1313,7 +1309,7 @@ static void krait_pmu_enable_event(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); /* * Enable counter and interrupt, and set the counter to count diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 08da0af..8af9f1f 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -138,7 +138,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev) unsigned long pmnc; struct perf_sample_data data; struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; - struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; @@ -198,7 +198,7 @@ static void xscale1pmu_enable_event(struct perf_event *event) unsigned long val, mask, evt, flags; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); int idx = hwc->idx; switch (idx) { @@ -234,7 +234,7 @@ static void xscale1pmu_disable_event(struct perf_event *event) unsigned long val, mask, evt, flags; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); int idx = hwc->idx; switch (idx) { @@ -287,7 +287,7 @@ xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, static void xscale1pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); raw_spin_lock_irqsave(&events->pmu_lock, flags); val = xscale1pmu_read_pmnc(); @@ -299,7 +299,7 @@ static void xscale1pmu_start(struct arm_pmu *cpu_pmu) static void xscale1pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); raw_spin_lock_irqsave(&events->pmu_lock, flags); val = xscale1pmu_read_pmnc(); @@ -485,7 +485,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev) unsigned long pmnc, of_flags; struct perf_sample_data data; struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; - struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; @@ -539,7 +539,7 @@ static void xscale2pmu_enable_event(struct perf_event *event) unsigned long flags, ien, evtsel; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); int idx = hwc->idx; ien = xscale2pmu_read_int_enable(); @@ -585,7 +585,7 @@ static void xscale2pmu_disable_event(struct perf_event *event) unsigned long flags, ien, evtsel, of_flags; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); int idx = hwc->idx; ien = xscale2pmu_read_int_enable(); @@ -651,7 +651,7 @@ out: static void xscale2pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); raw_spin_lock_irqsave(&events->pmu_lock, flags); val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; @@ -663,7 +663,7 @@ static void xscale2pmu_start(struct arm_pmu *cpu_pmu) static void xscale2pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); raw_spin_lock_irqsave(&events->pmu_lock, flags); val = xscale2pmu_read_pmnc(); |