diff options
author | Andi Kleen <ak@linux.intel.com> | 2017-03-20 13:17:00 -0700 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2017-03-21 16:04:11 -0300 |
commit | 430daf2dc7aff16096a137347e6fd03d4af609e9 (patch) | |
tree | 03e4adb546b2cafce5a57b9376098012390dbb71 | |
parent | fbe51fba82901fd15d3e0a068388fcd7d02dc047 (diff) | |
download | op-kernel-dev-430daf2dc7aff16096a137347e6fd03d4af609e9.zip op-kernel-dev-430daf2dc7aff16096a137347e6fd03d4af609e9.tar.gz |
perf stat: Collapse identically named events
The uncore PMU has a lot of duplicated PMUs for different subsystems.
When expanding an uncore alias we usually end up with a large
number of identically named aliases, which makes perf stat
output difficult to read.
Automatically sum them up in perf stat, unless --no-merge is specified.
This can be default because only the uncores generally have duplicated
aliases. Other PMUs have unique names.
Before:
% perf stat --no-merge -a -e unc_c_llc_lookup.any sleep 1
Performance counter stats for 'system wide':
694,976 Bytes unc_c_llc_lookup.any
706,304 Bytes unc_c_llc_lookup.any
956,608 Bytes unc_c_llc_lookup.any
782,720 Bytes unc_c_llc_lookup.any
605,696 Bytes unc_c_llc_lookup.any
442,816 Bytes unc_c_llc_lookup.any
659,328 Bytes unc_c_llc_lookup.any
509,312 Bytes unc_c_llc_lookup.any
263,936 Bytes unc_c_llc_lookup.any
592,448 Bytes unc_c_llc_lookup.any
672,448 Bytes unc_c_llc_lookup.any
608,640 Bytes unc_c_llc_lookup.any
641,024 Bytes unc_c_llc_lookup.any
856,896 Bytes unc_c_llc_lookup.any
808,832 Bytes unc_c_llc_lookup.any
684,864 Bytes unc_c_llc_lookup.any
710,464 Bytes unc_c_llc_lookup.any
538,304 Bytes unc_c_llc_lookup.any
1.002577660 seconds time elapsed
After:
% perf stat -a -e unc_c_llc_lookup.any sleep 1
Performance counter stats for 'system wide':
2,685,120 Bytes unc_c_llc_lookup.any
1.002648032 seconds time elapsed
v2: Split collect_aliases. Rename alias flag.
v3: Make sure unsupported/not counted is always printed.
v4: Factor out callback change into separate patch.
v5: Move check for bad results here
Move merged check into collect_data
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20170320201711.14142-3-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r-- | tools/perf/Documentation/perf-stat.txt | 3 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 38 | ||||
-rw-r--r-- | tools/perf/util/evsel.h | 1 |
3 files changed, 38 insertions, 4 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 9785481..bd0e441 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -236,6 +236,9 @@ To interpret the results it is usually needed to know on which CPUs the workload runs on. If needed the CPUs can be forced using taskset. +--no-merge:: +Do not merge results from same PMUs. + EXAMPLES -------- diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5c13a0f..a4da10a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -140,6 +140,7 @@ static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static bool metric_only = false; static bool force_metric_only = false; +static bool no_merge = false; static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; @@ -1182,12 +1183,37 @@ static void aggr_update_shadow(void) } } -static void collect_data(struct perf_evsel *counter, +static void collect_all_aliases(struct perf_evsel *counter, void (*cb)(struct perf_evsel *counter, void *data, bool first), void *data) { + struct perf_evsel *alias; + + alias = list_prepare_entry(counter, &(evsel_list->entries), node); + list_for_each_entry_continue (alias, &evsel_list->entries, node) { + if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || + alias->scale != counter->scale || + alias->cgrp != counter->cgrp || + strcmp(alias->unit, counter->unit) || + nsec_counter(alias) != nsec_counter(counter)) + break; + alias->merged_stat = true; + cb(alias, data, false); + } +} + +static bool collect_data(struct perf_evsel *counter, + void (*cb)(struct perf_evsel *counter, void *data, + bool first), + void *data) +{ + if (counter->merged_stat) + return false; cb(counter, data, true); + if (!no_merge) + collect_all_aliases(counter, cb, data); + return true; } struct aggr_data { @@ -1245,7 +1271,8 @@ static void print_aggr(char *prefix) evlist__for_each_entry(evsel_list, counter) { ad.val = ad.ena = ad.run = 0; ad.nr = 0; - collect_data(counter, aggr_cb, &ad); + if (!collect_data(counter, aggr_cb, &ad)) + continue; nr = ad.nr; ena = ad.ena; run = ad.run; @@ -1318,7 +1345,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) double uval; struct caggr_data cd = { .avg = 0.0 }; - collect_data(counter, counter_aggr_cb, &cd); + if (!collect_data(counter, counter_aggr_cb, &cd)) + return; if (prefix && !metric_only) fprintf(output, "%s", prefix); @@ -1353,7 +1381,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix) for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { struct aggr_data ad = { .cpu = cpu }; - collect_data(counter, counter_cb, &ad); + if (!collect_data(counter, counter_cb, &ad)) + return; val = ad.val; ena = ad.ena; run = ad.run; @@ -1701,6 +1730,7 @@ static const struct option stat_options[] = { "list of cpus to monitor in system-wide"), OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, "disable CPU count aggregation", AGGR_NONE), + OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), OPT_STRING('x', "field-separator", &csv_sep, "separator", "print counts with custom separator"), OPT_CALLBACK('G', "cgroup", &evsel_list, "name", diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 06ef6f2..bd2e9b1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -131,6 +131,7 @@ struct perf_evsel { bool cmdline_group_boundary; struct list_head config_terms; int bpf_fd; + bool merged_stat; }; union u64_swap { |