diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-21 13:14:16 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-21 13:14:16 -0700 |
commit | 2c856e14dad8cb1b085ae1f30c5e125c6d46019b (patch) | |
tree | 0b0067f6b83a536e7edb41d400a50c383c26e686 | |
parent | d34687ab97731b3a707106f78756342b61f46dbc (diff) | |
parent | 357b565d5d52b2dc2a51390eb8f887a9caa8597f (diff) | |
download | op-kernel-dev-2c856e14dad8cb1b085ae1f30c5e125c6d46019b.zip op-kernel-dev-2c856e14dad8cb1b085ae1f30c5e125c6d46019b.tar.gz |
Merge tag 'arm64-perf' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm[64] perf updates from Will Deacon:
"I have another mixed bag of ARM-related perf patches here.
It's about 25% CPU and 75% interconnect, but with drivers/bus/
languishing without an obvious maintainer or tree, Olof and I agreed
to keep all of these PMU patches together. I suspect a whole load of
code from drivers/bus/arm-* can be moved under drivers/perf/, so
that's on the radar for the future.
Summary:
- Initial support for ARMv8.1 CPU PMUs
- Support for the CPU PMU in Cavium ThunderX
- CPU PMU support for systems running 32-bit Linux in secure mode
- Support for the system PMU in ARM CCI-550 (Cache Coherent Interconnect)"
* tag 'arm64-perf' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (26 commits)
drivers/perf: arm_pmu: avoid NULL dereference when not using devicetree
arm64: perf: Extend ARMV8_EVTYPE_MASK to include PMCR.LC
arm-cci: remove unused variable
arm-cci: don't return value from void function
arm-cci: make private functions static
arm-cci: CoreLink CCI-550 PMU driver
arm-cci500: Rearrange PMU driver for code sharing with CCI-550 PMU
arm-cci: CCI-500: Work around PMU counter writes
arm-cci: Provide hook for writing to PMU counters
arm-cci: Add helper to enable PMU without synchornising counters
arm-cci: Add routines to save/restore all counters
arm-cci: Get the status of a counter
arm-cci: write_counter: Remove redundant check
arm-cci: Delay PMU counter writes to pmu::pmu_enable
arm-cci: Refactor CCI PMU enable/disable methods
arm-cci: Group writes to counter
arm-cci: fix handling cpumask_any_but return value
arm-cci: simplify sysfs attr handling
drivers/perf: arm_pmu: implement CPU_PM notifier
arm64: dts: Add Cavium ThunderX specific PMU
...
-rw-r--r-- | Documentation/devicetree/bindings/arm/cci.txt | 2 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/arm/pmu.txt | 11 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_v7.c | 13 | ||||
-rw-r--r-- | arch/arm64/boot/dts/cavium/thunder-88xx.dtsi | 5 | ||||
-rw-r--r-- | arch/arm64/kernel/perf_event.c | 122 | ||||
-rw-r--r-- | drivers/bus/Kconfig | 10 | ||||
-rw-r--r-- | drivers/bus/arm-cci.c | 621 | ||||
-rw-r--r-- | drivers/perf/arm_pmu.c | 109 | ||||
-rw-r--r-- | include/linux/perf/arm_pmu.h | 2 |
9 files changed, 667 insertions, 228 deletions
diff --git a/Documentation/devicetree/bindings/arm/cci.txt b/Documentation/devicetree/bindings/arm/cci.txt index aef1d20..a1a5a7e 100644 --- a/Documentation/devicetree/bindings/arm/cci.txt +++ b/Documentation/devicetree/bindings/arm/cci.txt @@ -34,6 +34,7 @@ specific to ARM. Definition: must contain one of the following: "arm,cci-400" "arm,cci-500" + "arm,cci-550" - reg Usage: required @@ -101,6 +102,7 @@ specific to ARM. "arm,cci-400-pmu" - DEPRECATED, permitted only where OS has secure acces to CCI registers "arm,cci-500-pmu,r0" + "arm,cci-550-pmu,r0" - reg: Usage: required Value type: Integer cells. A register entry, expressed diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt index 5651883..6eb73be 100644 --- a/Documentation/devicetree/bindings/arm/pmu.txt +++ b/Documentation/devicetree/bindings/arm/pmu.txt @@ -25,6 +25,7 @@ Required properties: "qcom,scorpion-pmu" "qcom,scorpion-mp-pmu" "qcom,krait-pmu" + "cavium,thunder-pmu" - interrupts : 1 combined interrupt or 1 per core. If the interrupt is a per-cpu interrupt (PPI) then 1 interrupt should be specified. @@ -46,6 +47,16 @@ Optional properties: - qcom,no-pc-write : Indicates that this PMU doesn't support the 0xc and 0xd events. +- secure-reg-access : Indicates that the ARMv7 Secure Debug Enable Register + (SDER) is accessible. This will cause the driver to do + any setup required that is only possible in ARMv7 secure + state. If not present the ARMv7 SDER will not be touched, + which means the PMU may fail to operate unless external + code (bootloader or security monitor) has performed the + appropriate initialisation. Note that this property is + not valid for non-ARMv7 CPUs or ARMv7 CPUs booting Linux + in Non-secure state. + Example: pmu { diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 4152158..1506385 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -712,6 +712,11 @@ static const struct attribute_group *armv7_pmuv2_attr_groups[] = { #define ARMV7_EXCLUDE_USER (1 << 30) #define ARMV7_INCLUDE_HYP (1 << 27) +/* + * Secure debug enable reg + */ +#define ARMV7_SDER_SUNIDEN BIT(1) /* Permit non-invasive debug */ + static inline u32 armv7_pmnc_read(void) { u32 val; @@ -1094,7 +1099,13 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event, static void armv7pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; - u32 idx, nb_cnt = cpu_pmu->num_events; + u32 idx, nb_cnt = cpu_pmu->num_events, val; + + if (cpu_pmu->secure_access) { + asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (val)); + val |= ARMV7_SDER_SUNIDEN; + asm volatile("mcr p15, 0, %0, c1, c1, 1" : : "r" (val)); + } /* The counter and interrupt enable registers are unknown at reset. */ for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { diff --git a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi index 9cb7cf94..2eb9b22 100644 --- a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi +++ b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi @@ -360,6 +360,11 @@ <1 10 0xff01>; }; + pmu { + compatible = "cavium,thunder-pmu", "arm,armv8-pmuv3"; + interrupts = <1 7 4>; + }; + soc { compatible = "simple-bus"; #address-cells = <2>; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 1b52269..767c4f6 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -88,16 +88,25 @@ #define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F #define ARMV8_PMUV3_PERFCTR_L21_TLB 0x30 +/* ARMv8 implementation defined event types. */ +#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD 0x40 +#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST 0x41 +#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD 0x42 +#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST 0x43 +#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD 0x4C +#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST 0x4D +#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD 0x4E +#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST 0x4F + /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREFETCH_LINEFILL 0xC2 -/* ARMv8 Cortex-A57 and Cortex-A72 specific event types. */ -#define ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD 0x40 -#define ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST 0x41 -#define ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD 0x42 -#define ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST 0x43 -#define ARMV8_A57_PERFCTR_DTLB_REFILL_LD 0x4c -#define ARMV8_A57_PERFCTR_DTLB_REFILL_ST 0x4d +/* ARMv8 Cavium ThunderX specific event types. */ +#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST 0xE9 +#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS 0xEA +#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS 0xEB +#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS 0xEC +#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS 0xED /* PMUv3 HW events mapping. */ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { @@ -132,6 +141,18 @@ static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, }; +static const unsigned armv8_thunder_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, +}; + static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { @@ -175,16 +196,46 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { PERF_CACHE_MAP_ALL_UNSUPPORTED, - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST, + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST, [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS, [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL, - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_LD, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_ST, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL, + [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS, + [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS, + + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST, [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL, @@ -325,7 +376,6 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = { NULL, }; - /* * Perf Events' indices */ @@ -356,9 +406,10 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = { #define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ #define ARMV8_PMCR_X (1 << 4) /* Export to ETM */ #define ARMV8_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV8_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ #define ARMV8_PMCR_N_SHIFT 11 /* Number of counters supported */ #define ARMV8_PMCR_N_MASK 0x1f -#define ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */ +#define ARMV8_PMCR_MASK 0x7f /* Mask for writable bits */ /* * PMOVSR: counters overflow flag status reg @@ -369,8 +420,8 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = { /* * PMXEVTYPER: Event selection reg */ -#define ARMV8_EVTYPE_MASK 0xc80003ff /* Mask for writable bits */ -#define ARMV8_EVTYPE_EVENT 0x3ff /* Mask for EVENT bits */ +#define ARMV8_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ +#define ARMV8_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ /* * Event filters for PMUv3 @@ -445,9 +496,16 @@ static inline void armv8pmu_write_counter(struct perf_event *event, u32 value) if (!armv8pmu_counter_valid(cpu_pmu, idx)) pr_err("CPU%u writing wrong counter %d\n", smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) - asm volatile("msr pmccntr_el0, %0" :: "r" (value)); - else if (armv8pmu_select_counter(idx) == idx) + else if (idx == ARMV8_IDX_CYCLE_COUNTER) { + /* + * Set the upper 32bits as this is a 64bit counter but we only + * count using the lower 32bits and we want an interrupt when + * it overflows. + */ + u64 value64 = 0xffffffff00000000ULL | value; + + asm volatile("msr pmccntr_el0, %0" :: "r" (value64)); + } else if (armv8pmu_select_counter(idx) == idx) asm volatile("msr pmxevcntr_el0, %0" :: "r" (value)); } @@ -722,8 +780,11 @@ static void armv8pmu_reset(void *info) armv8pmu_disable_intens(idx); } - /* Initialize & Reset PMNC: C and P bits. */ - armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C); + /* + * Initialize & Reset PMNC. Request overflow interrupt for + * 64 bit cycle counter but cheat in armv8pmu_write_counter(). + */ + armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C | ARMV8_PMCR_LC); } static int armv8_pmuv3_map_event(struct perf_event *event) @@ -747,6 +808,13 @@ static int armv8_a57_map_event(struct perf_event *event) ARMV8_EVTYPE_EVENT); } +static int armv8_thunder_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv8_thunder_perf_map, + &armv8_thunder_perf_cache_map, + ARMV8_EVTYPE_EVENT); +} + static void armv8pmu_read_num_pmnc_events(void *info) { int *nb_cnt = info; @@ -815,11 +883,21 @@ static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) return armv8pmu_probe_num_events(cpu_pmu); } +static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv8_pmu_init(cpu_pmu); + cpu_pmu->name = "armv8_cavium_thunder"; + cpu_pmu->map_event = armv8_thunder_map_event; + cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + return armv8pmu_probe_num_events(cpu_pmu); +} + static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init}, {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, {.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init}, + {.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init}, {}, }; diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index 9a92c07..d4a3a31 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -34,15 +34,15 @@ config ARM_CCI400_PORT_CTRL Low level power management driver for CCI400 cache coherent interconnect for ARM platforms. -config ARM_CCI500_PMU - bool "ARM CCI500 PMU support" +config ARM_CCI5xx_PMU + bool "ARM CCI-500/CCI-550 PMU support" depends on (ARM && CPU_V7) || ARM64 depends on PERF_EVENTS select ARM_CCI_PMU help - Support for PMU events monitoring on the ARM CCI-500 cache coherent - interconnect. CCI-500 provides 8 independent event counters, which - can count events pertaining to the slave/master interfaces as well + Support for PMU events monitoring on the ARM CCI-500/CCI-550 cache + coherent interconnects. Both of them provide 8 independent event counters, + which can count events pertaining to the slave/master interfaces as well as the internal events to the CCI. If unsure, say Y diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 577cc4b..a49b283 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -52,8 +52,9 @@ static const struct of_device_id arm_cci_matches[] = { #ifdef CONFIG_ARM_CCI400_COMMON {.compatible = "arm,cci-400", .data = CCI400_PORTS_DATA }, #endif -#ifdef CONFIG_ARM_CCI500_PMU +#ifdef CONFIG_ARM_CCI5xx_PMU { .compatible = "arm,cci-500", }, + { .compatible = "arm,cci-550", }, #endif {}, }; @@ -92,7 +93,7 @@ static const struct of_device_id arm_cci_matches[] = { enum { CCI_IF_SLAVE, CCI_IF_MASTER, -#ifdef CONFIG_ARM_CCI500_PMU +#ifdef CONFIG_ARM_CCI5xx_PMU CCI_IF_GLOBAL, #endif CCI_IF_MAX, @@ -121,13 +122,12 @@ struct cci_pmu_model { u32 fixed_hw_cntrs; u32 num_hw_cntrs; u32 cntr_size; - u64 nformat_attrs; - u64 nevent_attrs; - struct dev_ext_attribute *format_attrs; - struct dev_ext_attribute *event_attrs; + struct attribute **format_attrs; + struct attribute **event_attrs; struct event_range event_ranges[CCI_IF_MAX]; int (*validate_hw_event)(struct cci_pmu *, unsigned long); int (*get_event_idx)(struct cci_pmu *, struct cci_pmu_hw_events *, unsigned long); + void (*write_counters)(struct cci_pmu *, unsigned long *); }; static struct cci_pmu_model cci_pmu_models[]; @@ -155,19 +155,24 @@ enum cci_models { CCI400_R0, CCI400_R1, #endif -#ifdef CONFIG_ARM_CCI500_PMU +#ifdef CONFIG_ARM_CCI5xx_PMU CCI500_R0, + CCI550_R0, #endif CCI_MODEL_MAX }; +static void pmu_write_counters(struct cci_pmu *cci_pmu, + unsigned long *mask); static ssize_t cci_pmu_format_show(struct device *dev, struct device_attribute *attr, char *buf); static ssize_t cci_pmu_event_show(struct device *dev, struct device_attribute *attr, char *buf); -#define CCI_EXT_ATTR_ENTRY(_name, _func, _config) \ - { __ATTR(_name, S_IRUGO, _func, NULL), (void *)_config } +#define CCI_EXT_ATTR_ENTRY(_name, _func, _config) \ + &((struct dev_ext_attribute[]) { \ + { __ATTR(_name, S_IRUGO, _func, NULL), (void *)_config } \ + })[0].attr.attr #define CCI_FORMAT_EXT_ATTR_ENTRY(_name, _config) \ CCI_EXT_ATTR_ENTRY(_name, cci_pmu_format_show, (char *)_config) @@ -242,12 +247,13 @@ enum cci400_perf_events { static ssize_t cci400_pmu_cycle_event_show(struct device *dev, struct device_attribute *attr, char *buf); -static struct dev_ext_attribute cci400_pmu_format_attrs[] = { +static struct attribute *cci400_pmu_format_attrs[] = { CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"), CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-7"), + NULL }; -static struct dev_ext_attribute cci400_r0_pmu_event_attrs[] = { +static struct attribute *cci400_r0_pmu_event_attrs[] = { /* Slave events */ CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0), CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01), @@ -279,9 +285,10 @@ static struct dev_ext_attribute cci400_r0_pmu_event_attrs[] = { CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_tt_full, 0x1A), /* Special event for cycles counter */ CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff), + NULL }; -static struct dev_ext_attribute cci400_r1_pmu_event_attrs[] = { +static struct attribute *cci400_r1_pmu_event_attrs[] = { /* Slave events */ CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0), CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01), @@ -325,6 +332,7 @@ static struct dev_ext_attribute cci400_r1_pmu_event_attrs[] = { CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_unique_or_line_unique_addr_hazard, 0x11), /* Special event for cycles counter */ CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff), + NULL }; static ssize_t cci400_pmu_cycle_event_show(struct device *dev, @@ -420,72 +428,68 @@ static inline struct cci_pmu_model *probe_cci_model(struct platform_device *pdev } #endif /* CONFIG_ARM_CCI400_PMU */ -#ifdef CONFIG_ARM_CCI500_PMU +#ifdef CONFIG_ARM_CCI5xx_PMU /* - * CCI500 provides 8 independent event counters that can count - * any of the events available. - * - * CCI500 PMU event id is an 9-bit value made of two parts. + * CCI5xx PMU event id is an 9-bit value made of two parts. * bits [8:5] - Source for the event - * 0x0-0x6 - Slave interfaces - * 0x8-0xD - Master interfaces - * 0xf - Global Events - * 0x7,0xe - Reserved - * * bits [4:0] - Event code (specific to type of interface) + * + * */ /* Port ids */ -#define CCI500_PORT_S0 0x0 -#define CCI500_PORT_S1 0x1 -#define CCI500_PORT_S2 0x2 -#define CCI500_PORT_S3 0x3 -#define CCI500_PORT_S4 0x4 -#define CCI500_PORT_S5 0x5 -#define CCI500_PORT_S6 0x6 - -#define CCI500_PORT_M0 0x8 -#define CCI500_PORT_M1 0x9 -#define CCI500_PORT_M2 0xa -#define CCI500_PORT_M3 0xb -#define CCI500_PORT_M4 0xc -#define CCI500_PORT_M5 0xd - -#define CCI500_PORT_GLOBAL 0xf - -#define CCI500_PMU_EVENT_MASK 0x1ffUL -#define CCI500_PMU_EVENT_SOURCE_SHIFT 0x5 -#define CCI500_PMU_EVENT_SOURCE_MASK 0xf -#define CCI500_PMU_EVENT_CODE_SHIFT 0x0 -#define CCI500_PMU_EVENT_CODE_MASK 0x1f - -#define CCI500_PMU_EVENT_SOURCE(event) \ - ((event >> CCI500_PMU_EVENT_SOURCE_SHIFT) & CCI500_PMU_EVENT_SOURCE_MASK) -#define CCI500_PMU_EVENT_CODE(event) \ - ((event >> CCI500_PMU_EVENT_CODE_SHIFT) & CCI500_PMU_EVENT_CODE_MASK) - -#define CCI500_SLAVE_PORT_MIN_EV 0x00 -#define CCI500_SLAVE_PORT_MAX_EV 0x1f -#define CCI500_MASTER_PORT_MIN_EV 0x00 -#define CCI500_MASTER_PORT_MAX_EV 0x06 -#define CCI500_GLOBAL_PORT_MIN_EV 0x00 -#define CCI500_GLOBAL_PORT_MAX_EV 0x0f - - -#define CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(_name, _config) \ - CCI_EXT_ATTR_ENTRY(_name, cci500_pmu_global_event_show, \ +#define CCI5xx_PORT_S0 0x0 +#define CCI5xx_PORT_S1 0x1 +#define CCI5xx_PORT_S2 0x2 +#define CCI5xx_PORT_S3 0x3 +#define CCI5xx_PORT_S4 0x4 +#define CCI5xx_PORT_S5 0x5 +#define CCI5xx_PORT_S6 0x6 + +#define CCI5xx_PORT_M0 0x8 +#define CCI5xx_PORT_M1 0x9 +#define CCI5xx_PORT_M2 0xa +#define CCI5xx_PORT_M3 0xb +#define CCI5xx_PORT_M4 0xc +#define CCI5xx_PORT_M5 0xd +#define CCI5xx_PORT_M6 0xe + +#define CCI5xx_PORT_GLOBAL 0xf + +#define CCI5xx_PMU_EVENT_MASK 0x1ffUL +#define CCI5xx_PMU_EVENT_SOURCE_SHIFT 0x5 +#define CCI5xx_PMU_EVENT_SOURCE_MASK 0xf +#define CCI5xx_PMU_EVENT_CODE_SHIFT 0x0 +#define CCI5xx_PMU_EVENT_CODE_MASK 0x1f + +#define CCI5xx_PMU_EVENT_SOURCE(event) \ + ((event >> CCI5xx_PMU_EVENT_SOURCE_SHIFT) & CCI5xx_PMU_EVENT_SOURCE_MASK) +#define CCI5xx_PMU_EVENT_CODE(event) \ + ((event >> CCI5xx_PMU_EVENT_CODE_SHIFT) & CCI5xx_PMU_EVENT_CODE_MASK) + +#define CCI5xx_SLAVE_PORT_MIN_EV 0x00 +#define CCI5xx_SLAVE_PORT_MAX_EV 0x1f +#define CCI5xx_MASTER_PORT_MIN_EV 0x00 +#define CCI5xx_MASTER_PORT_MAX_EV 0x06 +#define CCI5xx_GLOBAL_PORT_MIN_EV 0x00 +#define CCI5xx_GLOBAL_PORT_MAX_EV 0x0f + + +#define CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(_name, _config) \ + CCI_EXT_ATTR_ENTRY(_name, cci5xx_pmu_global_event_show, \ (unsigned long) _config) -static ssize_t cci500_pmu_global_event_show(struct device *dev, +static ssize_t cci5xx_pmu_global_event_show(struct device *dev, struct device_attribute *attr, char *buf); -static struct dev_ext_attribute cci500_pmu_format_attrs[] = { +static struct attribute *cci5xx_pmu_format_attrs[] = { CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"), CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-8"), + NULL, }; -static struct dev_ext_attribute cci500_pmu_event_attrs[] = { +static struct attribute *cci5xx_pmu_event_attrs[] = { /* Slave events */ CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_arvalid, 0x0), CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_dev, 0x1), @@ -530,63 +534,73 @@ static struct dev_ext_attribute cci500_pmu_event_attrs[] = { CCI_EVENT_EXT_ATTR_ENTRY(mi_w_resp_stall, 0x6), /* Global events */ - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_0_1, 0x0), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_2_3, 0x1), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_4_5, 0x2), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_6_7, 0x3), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_0_1, 0x4), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_2_3, 0x5), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_4_5, 0x6), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_6_7, 0x7), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_back_invalidation, 0x8), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_alloc_busy, 0x9), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_tt_full, 0xA), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snopp_rq_stall_tt_full, 0xE), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_0_1, 0x0), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_2_3, 0x1), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_4_5, 0x2), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_6_7, 0x3), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_0_1, 0x4), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_2_3, 0x5), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_4_5, 0x6), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_6_7, 0x7), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_back_invalidation, 0x8), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_alloc_busy, 0x9), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_tt_full, 0xA), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snopp_rq_stall_tt_full, 0xE), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF), + NULL }; -static ssize_t cci500_pmu_global_event_show(struct device *dev, +static ssize_t cci5xx_pmu_global_event_show(struct device *dev, struct device_attribute *attr, char *buf) { struct dev_ext_attribute *eattr = container_of(attr, struct dev_ext_attribute, attr); /* Global events have single fixed source code */ return snprintf(buf, PAGE_SIZE, "event=0x%lx,source=0x%x\n", - (unsigned long)eattr->var, CCI500_PORT_GLOBAL); + (unsigned long)eattr->var, CCI5xx_PORT_GLOBAL); } +/* + * CCI500 provides 8 independent event counters that can count + * any of the events available. + * CCI500 PMU event source ids + * 0x0-0x6 - Slave interfaces + * 0x8-0xD - Master interfaces + * 0xf - Global Events + * 0x7,0xe - Reserved + */ static int cci500_validate_hw_event(struct cci_pmu *cci_pmu, unsigned long hw_event) { - u32 ev_source = CCI500_PMU_EVENT_SOURCE(hw_event); - u32 ev_code = CCI500_PMU_EVENT_CODE(hw_event); + u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event); + u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event); int if_type; - if (hw_event & ~CCI500_PMU_EVENT_MASK) + if (hw_event & ~CCI5xx_PMU_EVENT_MASK) return -ENOENT; switch (ev_source) { - case CCI500_PORT_S0: - case CCI500_PORT_S1: - case CCI500_PORT_S2: - case CCI500_PORT_S3: - case CCI500_PORT_S4: - case CCI500_PORT_S5: - case CCI500_PORT_S6: + case CCI5xx_PORT_S0: + case CCI5xx_PORT_S1: + case CCI5xx_PORT_S2: + case CCI5xx_PORT_S3: + case CCI5xx_PORT_S4: + case CCI5xx_PORT_S5: + case CCI5xx_PORT_S6: if_type = CCI_IF_SLAVE; break; - case CCI500_PORT_M0: - case CCI500_PORT_M1: - case CCI500_PORT_M2: - case CCI500_PORT_M3: - case CCI500_PORT_M4: - case CCI500_PORT_M5: + case CCI5xx_PORT_M0: + case CCI5xx_PORT_M1: + case CCI5xx_PORT_M2: + case CCI5xx_PORT_M3: + case CCI5xx_PORT_M4: + case CCI5xx_PORT_M5: if_type = CCI_IF_MASTER; break; - case CCI500_PORT_GLOBAL: + case CCI5xx_PORT_GLOBAL: if_type = CCI_IF_GLOBAL; break; default: @@ -599,7 +613,118 @@ static int cci500_validate_hw_event(struct cci_pmu *cci_pmu, return -ENOENT; } -#endif /* CONFIG_ARM_CCI500_PMU */ + +/* + * CCI550 provides 8 independent event counters that can count + * any of the events available. + * CCI550 PMU event source ids + * 0x0-0x6 - Slave interfaces + * 0x8-0xe - Master interfaces + * 0xf - Global Events + * 0x7 - Reserved + */ +static int cci550_validate_hw_event(struct cci_pmu *cci_pmu, + unsigned long hw_event) +{ + u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event); + u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event); + int if_type; + + if (hw_event & ~CCI5xx_PMU_EVENT_MASK) + return -ENOENT; + + switch (ev_source) { + case CCI5xx_PORT_S0: + case CCI5xx_PORT_S1: + case CCI5xx_PORT_S2: + case CCI5xx_PORT_S3: + case CCI5xx_PORT_S4: + case CCI5xx_PORT_S5: + case CCI5xx_PORT_S6: + if_type = CCI_IF_SLAVE; + break; + case CCI5xx_PORT_M0: + case CCI5xx_PORT_M1: + case CCI5xx_PORT_M2: + case CCI5xx_PORT_M3: + case CCI5xx_PORT_M4: + case CCI5xx_PORT_M5: + case CCI5xx_PORT_M6: + if_type = CCI_IF_MASTER; + break; + case CCI5xx_PORT_GLOBAL: + if_type = CCI_IF_GLOBAL; + break; + default: + return -ENOENT; + } + + if (ev_code >= cci_pmu->model->event_ranges[if_type].min && + ev_code <= cci_pmu->model->event_ranges[if_type].max) + return hw_event; + + return -ENOENT; +} + +#endif /* CONFIG_ARM_CCI5xx_PMU */ + +/* + * Program the CCI PMU counters which have PERF_HES_ARCH set + * with the event period and mark them ready before we enable + * PMU. + */ +static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu) +{ + int i; + struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; + + DECLARE_BITMAP(mask, cci_pmu->num_cntrs); + + bitmap_zero(mask, cci_pmu->num_cntrs); + for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) { + struct perf_event *event = cci_hw->events[i]; + + if (WARN_ON(!event)) + continue; + + /* Leave the events which are not counting */ + if (event->hw.state & PERF_HES_STOPPED) + continue; + if (event->hw.state & PERF_HES_ARCH) { + set_bit(i, mask); + event->hw.state &= ~PERF_HES_ARCH; + } + } + + pmu_write_counters(cci_pmu, mask); +} + +/* Should be called with cci_pmu->hw_events->pmu_lock held */ +static void __cci_pmu_enable_nosync(struct cci_pmu *cci_pmu) +{ + u32 val; + + /* Enable all the PMU counters. */ + val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN; + writel(val, cci_ctrl_base + CCI_PMCR); +} + +/* Should be called with cci_pmu->hw_events->pmu_lock held */ +static void __cci_pmu_enable_sync(struct cci_pmu *cci_pmu) +{ + cci_pmu_sync_counters(cci_pmu); + __cci_pmu_enable_nosync(cci_pmu); +} + +/* Should be called with cci_pmu->hw_events->pmu_lock held */ +static void __cci_pmu_disable(void) +{ + u32 val; + + /* Disable all the PMU counters. */ + val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN; + writel(val, cci_ctrl_base + CCI_PMCR); +} static ssize_t cci_pmu_format_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -633,8 +758,8 @@ static u32 pmu_read_register(struct cci_pmu *cci_pmu, int idx, unsigned int offs static void pmu_write_register(struct cci_pmu *cci_pmu, u32 value, int idx, unsigned int offset) { - return writel_relaxed(value, cci_pmu->base + - CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset); + writel_relaxed(value, cci_pmu->base + + CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset); } static void pmu_disable_counter(struct cci_pmu *cci_pmu, int idx) @@ -647,12 +772,56 @@ static void pmu_enable_counter(struct cci_pmu *cci_pmu, int idx) pmu_write_register(cci_pmu, 1, idx, CCI_PMU_CNTR_CTRL); } +static bool __maybe_unused +pmu_counter_is_enabled(struct cci_pmu *cci_pmu, int idx) +{ + return (pmu_read_register(cci_pmu, idx, CCI_PMU_CNTR_CTRL) & 0x1) != 0; +} + static void pmu_set_event(struct cci_pmu *cci_pmu, int idx, unsigned long event) { pmu_write_register(cci_pmu, event, idx, CCI_PMU_EVT_SEL); } /* + * For all counters on the CCI-PMU, disable any 'enabled' counters, + * saving the changed counters in the mask, so that we can restore + * it later using pmu_restore_counters. The mask is private to the + * caller. We cannot rely on the used_mask maintained by the CCI_PMU + * as it only tells us if the counter is assigned to perf_event or not. + * The state of the perf_event cannot be locked by the PMU layer, hence + * we check the individual counter status (which can be locked by + * cci_pm->hw_events->pmu_lock). + * + * @mask should be initialised to empty by the caller. + */ +static void __maybe_unused +pmu_save_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +{ + int i; + + for (i = 0; i < cci_pmu->num_cntrs; i++) { + if (pmu_counter_is_enabled(cci_pmu, i)) { + set_bit(i, mask); + pmu_disable_counter(cci_pmu, i); + } + } +} + +/* + * Restore the status of the counters. Reversal of the pmu_save_counters(). + * For each counter set in the mask, enable the counter back. + */ +static void __maybe_unused +pmu_restore_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +{ + int i; + + for_each_set_bit(i, mask, cci_pmu->num_cntrs) + pmu_enable_counter(cci_pmu, i); +} + +/* * Returns the number of programmable counters actually implemented * by the cci */ @@ -754,18 +923,98 @@ static u32 pmu_read_counter(struct perf_event *event) return value; } -static void pmu_write_counter(struct perf_event *event, u32 value) +static void pmu_write_counter(struct cci_pmu *cci_pmu, u32 value, int idx) { - struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); - struct hw_perf_event *hw_counter = &event->hw; - int idx = hw_counter->idx; + pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR); +} - if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) - dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); +static void __pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +{ + int i; + struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; + + for_each_set_bit(i, mask, cci_pmu->num_cntrs) { + struct perf_event *event = cci_hw->events[i]; + + if (WARN_ON(!event)) + continue; + pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i); + } +} + +static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +{ + if (cci_pmu->model->write_counters) + cci_pmu->model->write_counters(cci_pmu, mask); else - pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR); + __pmu_write_counters(cci_pmu, mask); } +#ifdef CONFIG_ARM_CCI5xx_PMU + +/* + * CCI-500/CCI-550 has advanced power saving policies, which could gate the + * clocks to the PMU counters, which makes the writes to them ineffective. + * The only way to write to those counters is when the global counters + * are enabled and the particular counter is enabled. + * + * So we do the following : + * + * 1) Disable all the PMU counters, saving their current state + * 2) Enable the global PMU profiling, now that all counters are + * disabled. + * + * For each counter to be programmed, repeat steps 3-7: + * + * 3) Write an invalid event code to the event control register for the + counter, so that the counters are not modified. + * 4) Enable the counter control for the counter. + * 5) Set the counter value + * 6) Disable the counter + * 7) Restore the event in the target counter + * + * 8) Disable the global PMU. + * 9) Restore the status of the rest of the counters. + * + * We choose an event which for CCI-5xx is guaranteed not to count. + * We use the highest possible event code (0x1f) for the master interface 0. + */ +#define CCI5xx_INVALID_EVENT ((CCI5xx_PORT_M0 << CCI5xx_PMU_EVENT_SOURCE_SHIFT) | \ + (CCI5xx_PMU_EVENT_CODE_MASK << CCI5xx_PMU_EVENT_CODE_SHIFT)) +static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +{ + int i; + DECLARE_BITMAP(saved_mask, cci_pmu->num_cntrs); + + bitmap_zero(saved_mask, cci_pmu->num_cntrs); + pmu_save_counters(cci_pmu, saved_mask); + + /* + * Now that all the counters are disabled, we can safely turn the PMU on, + * without syncing the status of the counters + */ + __cci_pmu_enable_nosync(cci_pmu); + + for_each_set_bit(i, mask, cci_pmu->num_cntrs) { + struct perf_event *event = cci_pmu->hw_events.events[i]; + + if (WARN_ON(!event)) + continue; + + pmu_set_event(cci_pmu, i, CCI5xx_INVALID_EVENT); + pmu_enable_counter(cci_pmu, i); + pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i); + pmu_disable_counter(cci_pmu, i); + pmu_set_event(cci_pmu, i, event->hw.config_base); + } + + __cci_pmu_disable(); + + pmu_restore_counters(cci_pmu, saved_mask); +} + +#endif /* CONFIG_ARM_CCI5xx_PMU */ + static u64 pmu_event_update(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -789,7 +1038,7 @@ static void pmu_read(struct perf_event *event) pmu_event_update(event); } -void pmu_event_set_period(struct perf_event *event) +static void pmu_event_set_period(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; /* @@ -800,7 +1049,14 @@ void pmu_event_set_period(struct perf_event *event) */ u64 val = 1ULL << 31; local64_set(&hwc->prev_count, val); - pmu_write_counter(event, val); + + /* + * CCI PMU uses PERF_HES_ARCH to keep track of the counters, whose + * values needs to be sync-ed with the s/w state before the PMU is + * enabled. + * Mark this counter for sync. + */ + hwc->state |= PERF_HES_ARCH; } static irqreturn_t pmu_handle_irq(int irq_num, void *dev) @@ -811,6 +1067,9 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev) int idx, handled = IRQ_NONE; raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable the PMU while we walk through the counters */ + __cci_pmu_disable(); /* * Iterate over counters and update the corresponding perf events. * This should work regardless of whether we have per-counter overflow @@ -818,13 +1077,10 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev) */ for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) { struct perf_event *event = events->events[idx]; - struct hw_perf_event *hw_counter; if (!event) continue; - hw_counter = &event->hw; - /* Did this counter overflow? */ if (!(pmu_read_register(cci_pmu, idx, CCI_PMU_OVRFLW) & CCI_PMU_OVRFLW_FLAG)) @@ -837,6 +1093,9 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev) pmu_event_set_period(event); handled = IRQ_HANDLED; } + + /* Enable the PMU and sync possibly overflowed counters */ + __cci_pmu_enable_sync(cci_pmu); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); return IRQ_RETVAL(handled); @@ -875,16 +1134,12 @@ static void cci_pmu_enable(struct pmu *pmu) struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_cntrs); unsigned long flags; - u32 val; if (!enabled) return; raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); - - /* Enable all the PMU counters. */ - val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN; - writel(val, cci_ctrl_base + CCI_PMCR); + __cci_pmu_enable_sync(cci_pmu); raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); } @@ -894,13 +1149,9 @@ static void cci_pmu_disable(struct pmu *pmu) struct cci_pmu *cci_pmu = to_cci_pmu(pmu); struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; unsigned long flags; - u32 val; raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); - - /* Disable all the PMU counters. */ - val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN; - writel(val, cci_ctrl_base + CCI_PMCR); + __cci_pmu_disable(); raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); } @@ -1176,9 +1427,8 @@ static int cci_pmu_event_init(struct perf_event *event) static ssize_t pmu_cpumask_attr_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct dev_ext_attribute *eattr = container_of(attr, - struct dev_ext_attribute, attr); - struct cci_pmu *cci_pmu = eattr->var; + struct pmu *pmu = dev_get_drvdata(dev); + struct cci_pmu *cci_pmu = to_cci_pmu(pmu); int n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", cpumask_pr_args(&cci_pmu->cpus)); @@ -1187,13 +1437,11 @@ static ssize_t pmu_cpumask_attr_show(struct device *dev, return n; } -static struct dev_ext_attribute pmu_cpumask_attr = { - __ATTR(cpumask, S_IRUGO, pmu_cpumask_attr_show, NULL), - NULL, /* Populated in cci_pmu_init */ -}; +static struct device_attribute pmu_cpumask_attr = + __ATTR(cpumask, S_IRUGO, pmu_cpumask_attr_show, NULL); static struct attribute *pmu_attrs[] = { - &pmu_cpumask_attr.attr.attr, + &pmu_cpumask_attr.attr, NULL, }; @@ -1218,60 +1466,14 @@ static const struct attribute_group *pmu_attr_groups[] = { NULL }; -static struct attribute **alloc_attrs(struct platform_device *pdev, - int n, struct dev_ext_attribute *source) -{ - int i; - struct attribute **attrs; - - /* Alloc n + 1 (for terminating NULL) */ - attrs = devm_kcalloc(&pdev->dev, n + 1, sizeof(struct attribute *), - GFP_KERNEL); - if (!attrs) - return attrs; - for(i = 0; i < n; i++) - attrs[i] = &source[i].attr.attr; - return attrs; -} - -static int cci_pmu_init_attrs(struct cci_pmu *cci_pmu, struct platform_device *pdev) -{ - const struct cci_pmu_model *model = cci_pmu->model; - struct attribute **attrs; - - /* - * All allocations below are managed, hence doesn't need to be - * free'd explicitly in case of an error. - */ - - if (model->nevent_attrs) { - attrs = alloc_attrs(pdev, model->nevent_attrs, - model->event_attrs); - if (!attrs) - return -ENOMEM; - pmu_event_attr_group.attrs = attrs; - } - if (model->nformat_attrs) { - attrs = alloc_attrs(pdev, model->nformat_attrs, - model->format_attrs); - if (!attrs) - return -ENOMEM; - pmu_format_attr_group.attrs = attrs; - } - pmu_cpumask_attr.var = cci_pmu; - - return 0; -} - static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) { - char *name = cci_pmu->model->name; + const struct cci_pmu_model *model = cci_pmu->model; + char *name = model->name; u32 num_cntrs; - int rc; - rc = cci_pmu_init_attrs(cci_pmu, pdev); - if (rc) - return rc; + pmu_event_attr_group.attrs = model->event_attrs; + pmu_format_attr_group.attrs = model->format_attrs; cci_pmu->pmu = (struct pmu) { .name = cci_pmu->model->name, @@ -1314,7 +1516,7 @@ static int cci_pmu_cpu_notifier(struct notifier_block *self, if (!cpumask_test_and_clear_cpu(cpu, &cci_pmu->cpus)) break; target = cpumask_any_but(cpu_online_mask, cpu); - if (target < 0) // UP, last CPU + if (target >= nr_cpu_ids) // UP, last CPU break; /* * TODO: migrate context once core races on event->ctx have @@ -1336,9 +1538,7 @@ static struct cci_pmu_model cci_pmu_models[] = { .num_hw_cntrs = 4, .cntr_size = SZ_4K, .format_attrs = cci400_pmu_format_attrs, - .nformat_attrs = ARRAY_SIZE(cci400_pmu_format_attrs), .event_attrs = cci400_r0_pmu_event_attrs, - .nevent_attrs = ARRAY_SIZE(cci400_r0_pmu_event_attrs), .event_ranges = { [CCI_IF_SLAVE] = { CCI400_R0_SLAVE_PORT_MIN_EV, @@ -1358,9 +1558,7 @@ static struct cci_pmu_model cci_pmu_models[] = { .num_hw_cntrs = 4, .cntr_size = SZ_4K, .format_attrs = cci400_pmu_format_attrs, - .nformat_attrs = ARRAY_SIZE(cci400_pmu_format_attrs), .event_attrs = cci400_r1_pmu_event_attrs, - .nevent_attrs = ARRAY_SIZE(cci400_r1_pmu_event_attrs), .event_ranges = { [CCI_IF_SLAVE] = { CCI400_R1_SLAVE_PORT_MIN_EV, @@ -1375,31 +1573,54 @@ static struct cci_pmu_model cci_pmu_models[] = { .get_event_idx = cci400_get_event_idx, }, #endif -#ifdef CONFIG_ARM_CCI500_PMU +#ifdef CONFIG_ARM_CCI5xx_PMU [CCI500_R0] = { .name = "CCI_500", .fixed_hw_cntrs = 0, .num_hw_cntrs = 8, .cntr_size = SZ_64K, - .format_attrs = cci500_pmu_format_attrs, - .nformat_attrs = ARRAY_SIZE(cci500_pmu_format_attrs), - .event_attrs = cci500_pmu_event_attrs, - .nevent_attrs = ARRAY_SIZE(cci500_pmu_event_attrs), + .format_attrs = cci5xx_pmu_format_attrs, + .event_attrs = cci5xx_pmu_event_attrs, .event_ranges = { [CCI_IF_SLAVE] = { - CCI500_SLAVE_PORT_MIN_EV, - CCI500_SLAVE_PORT_MAX_EV, + CCI5xx_SLAVE_PORT_MIN_EV, + CCI5xx_SLAVE_PORT_MAX_EV, }, [CCI_IF_MASTER] = { - CCI500_MASTER_PORT_MIN_EV, - CCI500_MASTER_PORT_MAX_EV, + CCI5xx_MASTER_PORT_MIN_EV, + CCI5xx_MASTER_PORT_MAX_EV, }, [CCI_IF_GLOBAL] = { - CCI500_GLOBAL_PORT_MIN_EV, - CCI500_GLOBAL_PORT_MAX_EV, + CCI5xx_GLOBAL_PORT_MIN_EV, + CCI5xx_GLOBAL_PORT_MAX_EV, }, }, .validate_hw_event = cci500_validate_hw_event, + .write_counters = cci5xx_pmu_write_counters, + }, + [CCI550_R0] = { + .name = "CCI_550", + .fixed_hw_cntrs = 0, + .num_hw_cntrs = 8, + .cntr_size = SZ_64K, + .format_attrs = cci5xx_pmu_format_attrs, + .event_attrs = cci5xx_pmu_event_attrs, + .event_ranges = { + [CCI_IF_SLAVE] = { + CCI5xx_SLAVE_PORT_MIN_EV, + CCI5xx_SLAVE_PORT_MAX_EV, + }, + [CCI_IF_MASTER] = { + CCI5xx_MASTER_PORT_MIN_EV, + CCI5xx_MASTER_PORT_MAX_EV, + }, + [CCI_IF_GLOBAL] = { + CCI5xx_GLOBAL_PORT_MIN_EV, + CCI5xx_GLOBAL_PORT_MAX_EV, + }, + }, + .validate_hw_event = cci550_validate_hw_event, + .write_counters = cci5xx_pmu_write_counters, }, #endif }; @@ -1419,11 +1640,15 @@ static const struct of_device_id arm_cci_pmu_matches[] = { .data = &cci_pmu_models[CCI400_R1], }, #endif -#ifdef CONFIG_ARM_CCI500_PMU +#ifdef CONFIG_ARM_CCI5xx_PMU { .compatible = "arm,cci-500-pmu,r0", .data = &cci_pmu_models[CCI500_R0], }, + { + .compatible = "arm,cci-550-pmu,r0", + .data = &cci_pmu_models[CCI550_R0], + }, #endif {}, }; diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 166637f..32346b5 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -13,6 +13,7 @@ #include <linux/bitmap.h> #include <linux/cpumask.h> +#include <linux/cpu_pm.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/of_device.h> @@ -710,6 +711,93 @@ static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, return NOTIFY_OK; } +#ifdef CONFIG_CPU_PM +static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd) +{ + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); + struct perf_event *event; + int idx; + + for (idx = 0; idx < armpmu->num_events; idx++) { + /* + * If the counter is not used skip it, there is no + * need of stopping/restarting it. + */ + if (!test_bit(idx, hw_events->used_mask)) + continue; + + event = hw_events->events[idx]; + + switch (cmd) { + case CPU_PM_ENTER: + /* + * Stop and update the counter + */ + armpmu_stop(event, PERF_EF_UPDATE); + break; + case CPU_PM_EXIT: + case CPU_PM_ENTER_FAILED: + /* Restore and enable the counter */ + armpmu_start(event, PERF_EF_RELOAD); + break; + default: + break; + } + } +} + +static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd, + void *v) +{ + struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb); + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); + int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return NOTIFY_DONE; + + /* + * Always reset the PMU registers on power-up even if + * there are no events running. + */ + if (cmd == CPU_PM_EXIT && armpmu->reset) + armpmu->reset(armpmu); + + if (!enabled) + return NOTIFY_OK; + + switch (cmd) { + case CPU_PM_ENTER: + armpmu->stop(armpmu); + cpu_pm_pmu_setup(armpmu, cmd); + break; + case CPU_PM_EXIT: + cpu_pm_pmu_setup(armpmu, cmd); + case CPU_PM_ENTER_FAILED: + armpmu->start(armpmu); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->cpu_pm_nb.notifier_call = cpu_pm_pmu_notify; + return cpu_pm_register_notifier(&cpu_pmu->cpu_pm_nb); +} + +static void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) +{ + cpu_pm_unregister_notifier(&cpu_pmu->cpu_pm_nb); +} +#else +static inline int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu) { return 0; } +static inline void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) { } +#endif + static int cpu_pmu_init(struct arm_pmu *cpu_pmu) { int err; @@ -725,6 +813,10 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu) if (err) goto out_hw_events; + err = cpu_pm_pmu_register(cpu_pmu); + if (err) + goto out_unregister; + for_each_possible_cpu(cpu) { struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); raw_spin_lock_init(&events->pmu_lock); @@ -746,6 +838,8 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu) return 0; +out_unregister: + unregister_cpu_notifier(&cpu_pmu->hotplug_nb); out_hw_events: free_percpu(cpu_hw_events); return err; @@ -753,6 +847,7 @@ out_hw_events: static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) { + cpu_pm_pmu_unregister(cpu_pmu); unregister_cpu_notifier(&cpu_pmu->hotplug_nb); free_percpu(cpu_pmu->hw_events); } @@ -889,6 +984,15 @@ int arm_pmu_device_probe(struct platform_device *pdev, if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { init_fn = of_id->data; + pmu->secure_access = of_property_read_bool(pdev->dev.of_node, + "secure-reg-access"); + + /* arm64 systems boot only as non-secure */ + if (IS_ENABLED(CONFIG_ARM64) && pmu->secure_access) { + pr_warn("ignoring \"secure-reg-access\" property for arm64\n"); + pmu->secure_access = false; + } + ret = of_pmu_irq_cfg(pmu); if (!ret) ret = init_fn(pmu); @@ -898,7 +1002,7 @@ int arm_pmu_device_probe(struct platform_device *pdev, } if (ret) { - pr_info("failed to probe PMU!\n"); + pr_info("%s: failed to probe PMU!\n", of_node_full_name(node)); goto out_free; } @@ -918,7 +1022,8 @@ int arm_pmu_device_probe(struct platform_device *pdev, out_destroy: cpu_pmu_destroy(pmu); out_free: - pr_info("failed to register PMU devices!\n"); + pr_info("%s: failed to register PMU devices!\n", + of_node_full_name(node)); kfree(pmu); return ret; } diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 83b5e34..4196c90 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -104,9 +104,11 @@ struct arm_pmu { atomic_t active_events; struct mutex reserve_mutex; u64 max_period; + bool secure_access; /* 32-bit ARM only */ struct platform_device *plat_device; struct pmu_hw_events __percpu *hw_events; struct notifier_block hotplug_nb; + struct notifier_block cpu_pm_nb; }; #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) |