summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2018-03-09 08:27:55 +0100
committerIngo Molnar <mingo@kernel.org>2018-03-09 08:27:55 +0100
commitfbf8a1e12c3ba3afdf0804bc80f5f13dfec1cffe (patch)
tree6b0dd23c7646cd4ec13b0636cdda11188d6845a3
parent1af22eba248efe2de25658041a80a3d40fb3e92e (diff)
parent2427b432e63b4b911100f717c48289195b7a7d62 (diff)
downloadop-kernel-dev-fbf8a1e12c3ba3afdf0804bc80f5f13dfec1cffe.zip
op-kernel-dev-fbf8a1e12c3ba3afdf0804bc80f5f13dfec1cffe.tar.gz
Merge tag 'perf-core-for-mingo-4.17-20180308' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: - Support to display the IPC/Cycle in 'annotate' TUI, for systems where this info can be obtained, like Intel's >= Skylake (Jin Yao) - Support wildcards on PMU name in dynamic PMU events (Agustin Vega-Frias) - Display pmu name when printing unmerged events in stat (Agustin Vega-Frias) - Auto-merge PMU events created by prefix or glob match (Agustin Vega-Frias) - Fix s390 'call' operations target function annotation (Thomas Richter) - Handle s390 PC relative load and store instruction in the augmented 'annotate', code, used so far in the TUI modes of 'perf report' and 'perf annotate' (Thomas Richter) - Provide libtraceevent with a kernel symbol resolver, so that symbols in tracepoint fields can be resolved when showing them in tools such as 'perf report' (Wang YanQing) - Refactor the cgroups code to look more like other code in tools/perf, using cgroup__{put,get} for refcount operations instead of its open-coded equivalent, breaking larger functions, etc (Arnaldo Carvalho de Melo) - Implement support for the -G/--cgroup target in 'perf trace', allowing strace like tracing (plus other events, backtraces, etc) for cgroups (Arnaldo Carvalho de Melo) - Update thread shortname in 'perf sched map' when the thread's COMM changes (Changbin Du) - refcount 'struct mem_info', for better sharing it over several users, avoid duplicating structs and fixing crashes related to use after free (Jiri Olsa) - Display perf.data version, offsets in 'perf report --header' (Jiri Olsa) - Record the machine's memory topology information in a perf.data feature section, to be used by tools such as 'perf c2c' (Jiri Olsa) - Fix output of forced groups in the header for 'perf report' --stdio and --tui (Jiri Olsa) - Better support llvm, clang, cxx make tests in the build process (Jiri Olsa) - Streamline the 'struct perf_mmap' methods, storing some info in the struct instead of passing it via various methods, shortening its signatures (Kan Liang) - Update the quipper perf.data parser library site information (Stephane Eranian) - Correct perf's man pages title markers for asciidoctor (Takashi Iwai) - Intel PT fixes and refactorings paving the way for implementing support for AUX area sampling (Adrian Hunter) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--tools/build/Makefile.feature6
-rw-r--r--tools/build/feature/Makefile14
-rw-r--r--tools/include/linux/bitmap.h2
-rw-r--r--tools/perf/Documentation/perf-data.txt2
-rw-r--r--tools/perf/Documentation/perf-ftrace.txt2
-rw-r--r--tools/perf/Documentation/perf-kallsyms.txt2
-rw-r--r--tools/perf/Documentation/perf-list.txt8
-rw-r--r--tools/perf/Documentation/perf-sched.txt2
-rw-r--r--tools/perf/Documentation/perf-script-perl.txt2
-rw-r--r--tools/perf/Documentation/perf-stat.txt17
-rw-r--r--tools/perf/Documentation/perf-trace.txt25
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt7
-rw-r--r--tools/perf/Makefile.perf6
-rw-r--r--tools/perf/arch/s390/annotate/instructions.c116
-rw-r--r--tools/perf/arch/x86/tests/perf-time-to-tsc.c7
-rw-r--r--tools/perf/arch/x86/util/auxtrace.c14
-rw-r--r--tools/perf/builtin-annotate.c88
-rw-r--r--tools/perf/builtin-c2c.c24
-rw-r--r--tools/perf/builtin-kvm.c9
-rw-r--r--tools/perf/builtin-record.c45
-rw-r--r--tools/perf/builtin-report.c26
-rw-r--r--tools/perf/builtin-sched.c133
-rw-r--r--tools/perf/builtin-stat.c29
-rw-r--r--tools/perf/builtin-top.c7
-rw-r--r--tools/perf/builtin-trace.c57
-rw-r--r--tools/perf/tests/backward-ring-buffer.c5
-rw-r--r--tools/perf/tests/bpf.c5
-rw-r--r--tools/perf/tests/code-reading.c7
-rw-r--r--tools/perf/tests/keep-tracking.c7
-rw-r--r--tools/perf/tests/mmap-basic.c7
-rw-r--r--tools/perf/tests/openat-syscall-tp-fields.c7
-rw-r--r--tools/perf/tests/perf-record.c7
-rw-r--r--tools/perf/tests/sw-clock.c7
-rw-r--r--tools/perf/tests/switch-tracking.c7
-rw-r--r--tools/perf/tests/task-exit.c7
-rw-r--r--tools/perf/ui/browsers/hists.c5
-rw-r--r--tools/perf/util/annotate.c2
-rw-r--r--tools/perf/util/auxtrace.c37
-rw-r--r--tools/perf/util/auxtrace.h2
-rw-r--r--tools/perf/util/cgroup.c111
-rw-r--r--tools/perf/util/cgroup.h13
-rw-r--r--tools/perf/util/env.h9
-rw-r--r--tools/perf/util/evlist.c8
-rw-r--r--tools/perf/util/evsel.c23
-rw-r--r--tools/perf/util/evsel.h6
-rw-r--r--tools/perf/util/header.c312
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/hist.c4
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c64
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h2
-rw-r--r--tools/perf/util/intel-pt.c110
-rw-r--r--tools/perf/util/machine.c2
-rw-r--r--tools/perf/util/mmap.c63
-rw-r--r--tools/perf/util/mmap.h16
-rw-r--r--tools/perf/util/parse-events.c21
-rw-r--r--tools/perf/util/parse-events.h2
-rw-r--r--tools/perf/util/parse-events.l2
-rw-r--r--tools/perf/util/parse-events.y18
-rw-r--r--tools/perf/util/python.c7
-rw-r--r--tools/perf/util/symbol.c22
-rw-r--r--tools/perf/util/symbol.h19
-rw-r--r--tools/perf/util/thread.h1
62 files changed, 1197 insertions, 401 deletions
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index c378f00..5b6dda3 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -82,7 +82,11 @@ FEATURE_TESTS_EXTRA := \
liberty-z \
libunwind-debug-frame \
libunwind-debug-frame-arm \
- libunwind-debug-frame-aarch64
+ libunwind-debug-frame-aarch64 \
+ cxx \
+ llvm \
+ llvm-version \
+ clang
FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 0a490cb..dac9563 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -54,7 +54,10 @@ FILES= \
test-jvmti.bin \
test-sched_getcpu.bin \
test-setns.bin \
- test-libopencsd.bin
+ test-libopencsd.bin \
+ test-clang.bin \
+ test-llvm.bin \
+ test-llvm-version.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
@@ -257,11 +260,13 @@ $(OUTPUT)test-llvm.bin:
-I$(shell $(LLVM_CONFIG) --includedir) \
-L$(shell $(LLVM_CONFIG) --libdir) \
$(shell $(LLVM_CONFIG) --libs Core BPF) \
- $(shell $(LLVM_CONFIG) --system-libs)
+ $(shell $(LLVM_CONFIG) --system-libs) \
+ > $(@:.bin=.make.output) 2>&1
$(OUTPUT)test-llvm-version.bin:
$(BUILDXX) -std=gnu++11 \
- -I$(shell $(LLVM_CONFIG) --includedir)
+ -I$(shell $(LLVM_CONFIG) --includedir) \
+ > $(@:.bin=.make.output) 2>&1
$(OUTPUT)test-clang.bin:
$(BUILDXX) -std=gnu++11 \
@@ -271,7 +276,8 @@ $(OUTPUT)test-clang.bin:
-lclangFrontend -lclangEdit -lclangLex \
-lclangAST -Wl,--end-group \
$(shell $(LLVM_CONFIG) --libs Core option) \
- $(shell $(LLVM_CONFIG) --system-libs)
+ $(shell $(LLVM_CONFIG) --system-libs) \
+ > $(@:.bin=.make.output) 2>&1
-include $(OUTPUT)*.d
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index ca16027..63440cc 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -98,7 +98,7 @@ static inline int test_and_set_bit(int nr, unsigned long *addr)
/**
* bitmap_alloc - Allocate bitmap
- * @nr: Bit to set
+ * @nbits: Number of bits
*/
static inline unsigned long *bitmap_alloc(int nbits)
{
diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
index 90bb4aa..c871807 100644
--- a/tools/perf/Documentation/perf-data.txt
+++ b/tools/perf/Documentation/perf-data.txt
@@ -1,5 +1,5 @@
perf-data(1)
-==============
+============
NAME
----
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index 721a447..b80c843 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -1,5 +1,5 @@
perf-ftrace(1)
-=============
+==============
NAME
----
diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt
index cf9f404..f3c6209 100644
--- a/tools/perf/Documentation/perf-kallsyms.txt
+++ b/tools/perf/Documentation/perf-kallsyms.txt
@@ -1,5 +1,5 @@
perf-kallsyms(1)
-==============
+================
NAME
----
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index e2a897a..2549c34 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -141,7 +141,13 @@ on the first memory controller on socket 0 of a Intel Xeon system
Each memory controller has its own PMU. Measuring the complete system
bandwidth would require specifying all imc PMUs (see perf list output),
-and adding the values together.
+and adding the values together. To simplify creation of multiple events,
+prefix and glob matching is supported in the PMU name, and the prefix
+'uncore_' is also ignored when performing the match. So the command above
+can be expanded to all memory controllers by using the syntaxes:
+
+ perf stat -C 0 -a imc/cas_count_read/,imc/cas_count_write/ -I 1000 ...
+ perf stat -C 0 -a *imc*/cas_count_read/,*imc*/cas_count_write/ -I 1000 ...
This example measures the combined core power every second
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index c7e50f2..bb33601 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -1,5 +1,5 @@
perf-sched(1)
-==============
+=============
NAME
----
diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt
index 142606c..5a1f681 100644
--- a/tools/perf/Documentation/perf-script-perl.txt
+++ b/tools/perf/Documentation/perf-script-perl.txt
@@ -1,5 +1,5 @@
perf-script-perl(1)
-==================
+===================
NAME
----
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 2b38e22..f15b306 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -49,6 +49,13 @@ report::
parameters are defined by corresponding entries in
/sys/bus/event_source/devices/<pmu>/format/*
+ Note that the last two syntaxes support prefix and glob matching in
+ the PMU name to simplify creation of events accross multiple instances
+ of the same type of PMU in large systems (e.g. memory controller PMUs).
+ Multiple PMU instances are typical for uncore PMUs, so the prefix
+ 'uncore_' is also ignored when performing this match.
+
+
-i::
--no-inherit::
child tasks do not inherit counters
@@ -260,6 +267,16 @@ taskset.
--no-merge::
Do not merge results from same PMUs.
+When multiple events are created from a single event specification,
+stat will, by default, aggregate the event counts and show the result
+in a single row. This option disables that behavior and shows
+the individual events and counts.
+
+Multiple events are created from a single event specification when:
+1. Prefix or glob matching is used for the PMU name.
+2. Aliases, which are listed immediately after the Kernel PMU events
+ by perf list, are used.
+
--smi-cost::
Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 33a88e9..5a7035c 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -63,6 +63,31 @@ filter out the startup phase of the program, which is often very different.
--uid=::
Record events in threads owned by uid. Name or number.
+-G::
+--cgroup::
+ Record events in threads in a cgroup.
+
+ Look for cgroups to set at the /sys/fs/cgroup/perf_event directory, then
+ remove the /sys/fs/cgroup/perf_event/ part and try:
+
+ perf trace -G A -e sched:*switch
+
+ Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
+ _and_ sched:sched_switch to the 'A' cgroup, while:
+
+ perf trace -e sched:*switch -G A
+
+ will only set the sched:sched_switch event to the 'A' cgroup, all the
+ other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
+ a cgroup (on the root cgroup, sys wide, etc).
+
+ Multiple cgroups:
+
+ perf trace -G A -e sched:*switch -G B
+
+ the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
+ to the 'B' cgroup.
+
--filter-pids=::
Filter out events for these pids and for 'trace' itself (comma separated list).
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index f7d85e8..d00f0d5 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -485,10 +485,5 @@ in pmu-tools parser. This allows to read perf.data from python and dump it.
quipper
The quipper C++ parser is available at
-https://chromium.googlesource.com/chromiumos/platform2
+http://github.com/google/perf_data_converter/tree/master/src/quipper
-It is under the chromiumos-wide-profiling/ subdirectory. This library can
-convert a perf data file to a protobuf and vice versa.
-
-Unfortunately this parser tends to be many versions behind and may not be able
-to parse data files generated by recent perf.
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 4679e23..f7517e1 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -708,15 +708,15 @@ TAG_FILES= ../../include/uapi/linux/perf_event.h
TAGS:
$(QUIET_GEN)$(RM) TAGS; \
- $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES)
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs etags -a $(TAG_FILES)
tags:
$(QUIET_GEN)$(RM) tags; \
- $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES)
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs ctags -a $(TAG_FILES)
cscope:
$(QUIET_GEN)$(RM) cscope*; \
- $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES)
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs cscope -b $(TAG_FILES)
### Testing rules
diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
index 01df9d8..46c2183 100644
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -1,6 +1,112 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/compiler.h>
+static int s390_call__parse(struct arch *arch, struct ins_operands *ops,
+ struct map *map)
+{
+ char *endptr, *tok, *name;
+ struct addr_map_symbol target = {
+ .map = map,
+ };
+
+ tok = strchr(ops->raw, ',');
+ if (!tok)
+ return -1;
+
+ ops->target.addr = strtoull(tok + 1, &endptr, 16);
+
+ name = strchr(endptr, '<');
+ if (name == NULL)
+ return -1;
+
+ name++;
+
+ if (arch->objdump.skip_functions_char &&
+ strchr(name, arch->objdump.skip_functions_char))
+ return -1;
+
+ tok = strchr(name, '>');
+ if (tok == NULL)
+ return -1;
+
+ *tok = '\0';
+ ops->target.name = strdup(name);
+ *tok = '>';
+
+ if (ops->target.name == NULL)
+ return -1;
+ target.addr = map__objdump_2mem(map, ops->target.addr);
+
+ if (map_groups__find_ams(&target) == 0 &&
+ map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr)
+ ops->target.sym = target.sym;
+
+ return 0;
+}
+
+static int call__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops);
+
+static struct ins_ops s390_call_ops = {
+ .parse = s390_call__parse,
+ .scnprintf = call__scnprintf,
+};
+
+static int s390_mov__parse(struct arch *arch __maybe_unused,
+ struct ins_operands *ops,
+ struct map *map __maybe_unused)
+{
+ char *s = strchr(ops->raw, ','), *target, *endptr;
+
+ if (s == NULL)
+ return -1;
+
+ *s = '\0';
+ ops->source.raw = strdup(ops->raw);
+ *s = ',';
+
+ if (ops->source.raw == NULL)
+ return -1;
+
+ target = ++s;
+ ops->target.raw = strdup(target);
+ if (ops->target.raw == NULL)
+ goto out_free_source;
+
+ ops->target.addr = strtoull(target, &endptr, 16);
+ if (endptr == target)
+ goto out_free_target;
+
+ s = strchr(endptr, '<');
+ if (s == NULL)
+ goto out_free_target;
+ endptr = strchr(s + 1, '>');
+ if (endptr == NULL)
+ goto out_free_target;
+
+ *endptr = '\0';
+ ops->target.name = strdup(s + 1);
+ *endptr = '>';
+ if (ops->target.name == NULL)
+ goto out_free_target;
+
+ return 0;
+
+out_free_target:
+ zfree(&ops->target.raw);
+out_free_source:
+ zfree(&ops->source.raw);
+ return -1;
+}
+
+static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops);
+
+static struct ins_ops s390_mov_ops = {
+ .parse = s390_mov__parse,
+ .scnprintf = mov__scnprintf,
+};
+
static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name)
{
struct ins_ops *ops = NULL;
@@ -14,9 +120,17 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na
if (!strcmp(name, "bras") ||
!strcmp(name, "brasl") ||
!strcmp(name, "basr"))
- ops = &call_ops;
+ ops = &s390_call_ops;
if (!strcmp(name, "br"))
ops = &ret_ops;
+ /* override load/store relative to PC */
+ if (!strcmp(name, "lrl") ||
+ !strcmp(name, "lgrl") ||
+ !strcmp(name, "lgfrl") ||
+ !strcmp(name, "llgfrl") ||
+ !strcmp(name, "strl") ||
+ !strcmp(name, "stgrl"))
+ ops = &s390_mov_ops;
if (ops)
arch__associate_ins_ops(arch, name, ops);
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index 7f82d91..7a77216 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -61,7 +61,6 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
u64 test_tsc, comm1_tsc, comm2_tsc;
u64 test_time, comm1_time = 0, comm2_time = 0;
struct perf_mmap *md;
- u64 end, start;
threads = thread_map__new(-1, getpid(), UINT_MAX);
CHECK_NOT_NULL__(threads);
@@ -112,10 +111,10 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
for (i = 0; i < evlist->nr_mmaps; i++) {
md = &evlist->mmap[i];
- if (perf_mmap__read_init(md, false, &start, &end) < 0)
+ if (perf_mmap__read_init(md) < 0)
continue;
- while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+ while ((event = perf_mmap__read_event(md)) != NULL) {
struct perf_sample sample;
if (event->header.type != PERF_RECORD_COMM ||
@@ -134,7 +133,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
comm2_time = sample.time;
}
next_event:
- perf_mmap__consume(md, false);
+ perf_mmap__consume(md);
}
perf_mmap__read_done(md);
}
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
index 6aa3f2a..b135af6 100644
--- a/tools/perf/arch/x86/util/auxtrace.c
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -37,15 +37,11 @@ struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist,
intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
- if (evlist) {
- evlist__for_each_entry(evlist, evsel) {
- if (intel_pt_pmu &&
- evsel->attr.type == intel_pt_pmu->type)
- found_pt = true;
- if (intel_bts_pmu &&
- evsel->attr.type == intel_bts_pmu->type)
- found_bts = true;
- }
+ evlist__for_each_entry(evlist, evsel) {
+ if (intel_pt_pmu && evsel->attr.type == intel_pt_pmu->type)
+ found_pt = true;
+ if (intel_bts_pmu && evsel->attr.type == intel_bts_pmu->type)
+ found_bts = true;
}
if (found_pt && found_bts) {
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index f15731a..ead6ae4 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -44,6 +44,7 @@ struct perf_annotate {
bool full_paths;
bool print_line;
bool skip_missing;
+ bool has_br_stack;
const char *sym_hist_filter;
const char *cpu_list;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -146,16 +147,73 @@ static void process_branch_stack(struct branch_stack *bs, struct addr_location *
free(bi);
}
+static int hist_iter__branch_callback(struct hist_entry_iter *iter,
+ struct addr_location *al __maybe_unused,
+ bool single __maybe_unused,
+ void *arg __maybe_unused)
+{
+ struct hist_entry *he = iter->he;
+ struct branch_info *bi;
+ struct perf_sample *sample = iter->sample;
+ struct perf_evsel *evsel = iter->evsel;
+ int err;
+
+ hist__account_cycles(sample->branch_stack, al, sample, false);
+
+ bi = he->branch_info;
+ err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx);
+
+ if (err)
+ goto out;
+
+ err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx);
+
+out:
+ return err;
+}
+
+static int process_branch_callback(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct addr_location *al __maybe_unused,
+ struct perf_annotate *ann,
+ struct machine *machine)
+{
+ struct hist_entry_iter iter = {
+ .evsel = evsel,
+ .sample = sample,
+ .add_entry_cb = hist_iter__branch_callback,
+ .hide_unresolved = symbol_conf.hide_unresolved,
+ .ops = &hist_iter_branch,
+ };
+
+ struct addr_location a;
+ int ret;
+
+ if (machine__resolve(machine, &a, sample) < 0)
+ return -1;
+
+ if (a.sym == NULL)
+ return 0;
+
+ if (a.map != NULL)
+ a.map->dso->hit = 1;
+
+ ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann);
+ return ret;
+}
+
static int perf_evsel__add_sample(struct perf_evsel *evsel,
struct perf_sample *sample,
struct addr_location *al,
- struct perf_annotate *ann)
+ struct perf_annotate *ann,
+ struct machine *machine)
{
struct hists *hists = evsel__hists(evsel);
struct hist_entry *he;
int ret;
- if (ann->sym_hist_filter != NULL &&
+ if ((!ann->has_br_stack || !ui__has_annotation()) &&
+ ann->sym_hist_filter != NULL &&
(al->sym == NULL ||
strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
/* We're only interested in a symbol named sym_hist_filter */
@@ -178,6 +236,9 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
*/
process_branch_stack(sample->branch_stack, al, sample);
+ if (ann->has_br_stack && ui__has_annotation())
+ return process_branch_callback(evsel, sample, al, ann, machine);
+
he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
if (he == NULL)
return -ENOMEM;
@@ -206,7 +267,8 @@ static int process_sample_event(struct perf_tool *tool,
if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
goto out_put;
- if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) {
+ if (!al.filtered &&
+ perf_evsel__add_sample(evsel, sample, &al, ann, machine)) {
pr_warning("problem incrementing symbol count, "
"skipping event\n");
ret = -1;
@@ -238,6 +300,10 @@ static void hists__find_annotations(struct hists *hists,
if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned)
goto find_next;
+ if (ann->sym_hist_filter &&
+ (strcmp(he->ms.sym->name, ann->sym_hist_filter) != 0))
+ goto find_next;
+
notes = symbol__annotation(he->ms.sym);
if (notes->src == NULL) {
find_next:
@@ -269,6 +335,7 @@ find_next:
nd = rb_next(nd);
} else if (use_browser == 1) {
key = hist_entry__tui_annotate(he, evsel, NULL);
+
switch (key) {
case -1:
if (!ann->skip_missing)
@@ -489,6 +556,9 @@ int cmd_annotate(int argc, const char **argv)
if (annotate.session == NULL)
return -1;
+ annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
+ HEADER_BRANCH_STACK);
+
ret = symbol__annotation_init();
if (ret < 0)
goto out_delete;
@@ -499,9 +569,6 @@ int cmd_annotate(int argc, const char **argv)
if (ret < 0)
goto out_delete;
- if (setup_sorting(NULL) < 0)
- usage_with_options(annotate_usage, options);
-
if (annotate.use_stdio)
use_browser = 0;
else if (annotate.use_tui)
@@ -511,6 +578,15 @@ int cmd_annotate(int argc, const char **argv)
setup_browser(true);
+ if (use_browser == 1 && annotate.has_br_stack) {
+ sort__mode = SORT_MODE__BRANCH;
+ if (setup_sorting(annotate.session->evlist) < 0)
+ usage_with_options(annotate_usage, options);
+ } else {
+ if (setup_sorting(NULL) < 0)
+ usage_with_options(annotate_usage, options);
+ }
+
ret = __cmd_annotate(&annotate);
out_delete:
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 539c3d4..98d243f 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -237,9 +237,12 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
if (mi == NULL)
return -ENOMEM;
- mi_dup = memdup(mi, sizeof(*mi));
- if (!mi_dup)
- goto free_mi;
+ /*
+ * The mi object is released in hists__add_entry_ops,
+ * if it gets sorted out into existing data, so we need
+ * to take the copy now.
+ */
+ mi_dup = mem_info__get(mi);
c2c_decode_stats(&stats, mi);
@@ -247,7 +250,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
&al, NULL, NULL, mi,
sample, true);
if (he == NULL)
- goto free_mi_dup;
+ goto free_mi;
c2c_he = container_of(he, struct c2c_hist_entry, he);
c2c_add_stats(&c2c_he->stats, &stats);
@@ -272,19 +275,15 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
mi = mi_dup;
- mi_dup = memdup(mi, sizeof(*mi));
- if (!mi_dup)
- goto free_mi;
-
c2c_hists = he__get_c2c_hists(he, c2c.cl_sort, 2);
if (!c2c_hists)
- goto free_mi_dup;
+ goto free_mi;
he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops,
&al, NULL, NULL, mi,
sample, true);
if (he == NULL)
- goto free_mi_dup;
+ goto free_mi;
c2c_he = container_of(he, struct c2c_hist_entry, he);
c2c_add_stats(&c2c_he->stats, &stats);
@@ -303,10 +302,9 @@ out:
addr_location__put(&al);
return ret;
-free_mi_dup:
- free(mi_dup);
free_mi:
- free(mi);
+ mem_info__put(mi_dup);
+ mem_info__put(mi);
ret = -ENOMEM;
goto out;
}
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index d2703d3b..72e2ca0 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -746,21 +746,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
struct perf_evlist *evlist = kvm->evlist;
union perf_event *event;
struct perf_mmap *md;
- u64 end, start;
u64 timestamp;
s64 n = 0;
int err;
*mmap_time = ULLONG_MAX;
md = &evlist->mmap[idx];
- err = perf_mmap__read_init(md, false, &start, &end);
+ err = perf_mmap__read_init(md);
if (err < 0)
return (err == -EAGAIN) ? 0 : -1;
- while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+ while ((event = perf_mmap__read_event(md)) != NULL) {
err = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
if (err) {
- perf_mmap__consume(md, false);
+ perf_mmap__consume(md);
pr_err("Failed to parse sample\n");
return -1;
}
@@ -770,7 +769,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
* FIXME: Here we can't consume the event, as perf_session__queue_event will
* point to it, and it'll get possibly overwritten by the kernel.
*/
- perf_mmap__consume(md, false);
+ perf_mmap__consume(md);
if (err) {
pr_err("Failed to enqueue sample: %d\n", err);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 12230dd..b814945 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -71,7 +71,6 @@ struct record {
struct auxtrace_record *itr;
struct perf_evlist *evlist;
struct perf_session *session;
- const char *progname;
int realtime_prio;
bool no_buildid;
bool no_buildid_set;
@@ -274,6 +273,24 @@ static void record__read_auxtrace_snapshot(struct record *rec)
}
}
+static int record__auxtrace_init(struct record *rec)
+{
+ int err;
+
+ if (!rec->itr) {
+ rec->itr = auxtrace_record__init(rec->evlist, &err);
+ if (err)
+ return err;
+ }
+
+ err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
+ rec->opts.auxtrace_snapshot_opts);
+ if (err)
+ return err;
+
+ return auxtrace_parse_filters(rec->evlist);
+}
+
#else
static inline
@@ -294,6 +311,11 @@ int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
return 0;
}
+static int record__auxtrace_init(struct record *rec __maybe_unused)
+{
+ return 0;
+}
+
#endif
static int record__mmap_evlist(struct record *rec,
@@ -510,7 +532,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
if (maps[i].base) {
- if (perf_mmap__push(&maps[i], overwrite, rec, record__pushfn) != 0) {
+ if (perf_mmap__push(&maps[i], rec, record__pushfn) != 0) {
rc = -1;
goto out;
}
@@ -831,7 +853,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
int status = 0;
unsigned long waking = 0;
const bool forks = argc > 0;
- struct machine *machine;
struct perf_tool *tool = &rec->tool;
struct record_opts *opts = &rec->opts;
struct perf_data *data = &rec->data;
@@ -839,8 +860,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
bool disabled = false, draining = false;
int fd;
- rec->progname = argv[0];
-
atexit(record__sig_exit);
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
@@ -936,8 +955,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
goto out_child;
}
- machine = &session->machines.host;
-
err = record__synthesize(rec, false);
if (err < 0)
goto out_child;
@@ -965,6 +982,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
* Let the child rip
*/
if (forks) {
+ struct machine *machine = &session->machines.host;
union perf_event *event;
pid_t tgid;
@@ -1727,17 +1745,6 @@ int cmd_record(int argc, const char **argv)
alarm(rec->switch_output.time);
}
- if (!rec->itr) {
- rec->itr = auxtrace_record__init(rec->evlist, &err);
- if (err)
- goto out;
- }
-
- err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
- rec->opts.auxtrace_snapshot_opts);
- if (err)
- goto out;
-
/*
* Allow aliases to facilitate the lookup of symbols for address
* filters. Refer to auxtrace_parse_filters().
@@ -1746,7 +1753,7 @@ int cmd_record(int argc, const char **argv)
symbol__init(NULL);
- err = auxtrace_parse_filters(rec->evlist);
+ err = record__auxtrace_init(rec);
if (err)
goto out;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 1eedb18..971ccba 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -400,8 +400,10 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
nr_samples = convert_unit(nr_samples, &unit);
ret = fprintf(fp, "# Samples: %lu%c", nr_samples, unit);
- if (evname != NULL)
- ret += fprintf(fp, " of event '%s'", evname);
+ if (evname != NULL) {
+ ret += fprintf(fp, " of event%s '%s'",
+ evsel->nr_members > 1 ? "s" : "", evname);
+ }
if (rep->time_str)
ret += fprintf(fp, " (time slices: %s)", rep->time_str);
@@ -1175,8 +1177,17 @@ repeat:
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);
- if (group_set && !session->evlist->nr_groups)
+ /*
+ * Events in data file are not collect in groups, but we still want
+ * the group display. Set the artificial group and set the leader's
+ * forced_leader flag to notify the display code.
+ */
+ if (group_set && !session->evlist->nr_groups) {
+ struct perf_evsel *leader = perf_evlist__first(session->evlist);
+
perf_evlist__set_leader(session->evlist);
+ leader->forced_leader = true;
+ }
if (itrace_synth_opts.last_branch)
has_br_stack = true;
@@ -1337,6 +1348,15 @@ repeat:
report.range_num = 1;
}
+ if (session->tevent.pevent &&
+ pevent_set_function_resolver(session->tevent.pevent,
+ machine__resolve_kernel_addr,
+ &session->machines.host) < 0) {
+ pr_err("%s: failed to set libtraceevent function resolver\n",
+ __func__);
+ return -1;
+ }
+
sort__setup_elide(stdout);
ret = __cmd_report(&report);
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 83283fe..4dfdee6 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -254,6 +254,10 @@ struct thread_runtime {
u64 total_delay_time;
int last_state;
+
+ char shortname[3];
+ bool comm_changed;
+
u64 migrations;
};
@@ -897,6 +901,37 @@ struct sort_dimension {
struct list_head list;
};
+/*
+ * handle runtime stats saved per thread
+ */
+static struct thread_runtime *thread__init_runtime(struct thread *thread)
+{
+ struct thread_runtime *r;
+
+ r = zalloc(sizeof(struct thread_runtime));
+ if (!r)
+ return NULL;
+
+ init_stats(&r->run_stats);
+ thread__set_priv(thread, r);
+
+ return r;
+}
+
+static struct thread_runtime *thread__get_runtime(struct thread *thread)
+{
+ struct thread_runtime *tr;
+
+ tr = thread__priv(thread);
+ if (tr == NULL) {
+ tr = thread__init_runtime(thread);
+ if (tr == NULL)
+ pr_debug("Failed to malloc memory for runtime data.\n");
+ }
+
+ return tr;
+}
+
static int
thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r)
{
@@ -1480,6 +1515,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
{
const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
struct thread *sched_in;
+ struct thread_runtime *tr;
int new_shortname;
u64 timestamp0, timestamp = sample->time;
s64 delta;
@@ -1519,22 +1555,28 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
if (sched_in == NULL)
return -1;
+ tr = thread__get_runtime(sched_in);
+ if (tr == NULL) {
+ thread__put(sched_in);
+ return -1;
+ }
+
sched->curr_thread[this_cpu] = thread__get(sched_in);
printf(" ");
new_shortname = 0;
- if (!sched_in->shortname[0]) {
+ if (!tr->shortname[0]) {
if (!strcmp(thread__comm_str(sched_in), "swapper")) {
/*
* Don't allocate a letter-number for swapper:0
* as a shortname. Instead, we use '.' for it.
*/
- sched_in->shortname[0] = '.';
- sched_in->shortname[1] = ' ';
+ tr->shortname[0] = '.';
+ tr->shortname[1] = ' ';
} else {
- sched_in->shortname[0] = sched->next_shortname1;
- sched_in->shortname[1] = sched->next_shortname2;
+ tr->shortname[0] = sched->next_shortname1;
+ tr->shortname[1] = sched->next_shortname2;
if (sched->next_shortname1 < 'Z') {
sched->next_shortname1++;
@@ -1552,6 +1594,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
for (i = 0; i < cpus_nr; i++) {
int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
struct thread *curr_thread = sched->curr_thread[cpu];
+ struct thread_runtime *curr_tr;
const char *pid_color = color;
const char *cpu_color = color;
@@ -1569,9 +1612,14 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
else
color_fprintf(stdout, cpu_color, "*");
- if (sched->curr_thread[cpu])
- color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname);
- else
+ if (sched->curr_thread[cpu]) {
+ curr_tr = thread__get_runtime(sched->curr_thread[cpu]);
+ if (curr_tr == NULL) {
+ thread__put(sched_in);
+ return -1;
+ }
+ color_fprintf(stdout, pid_color, "%2s ", curr_tr->shortname);
+ } else
color_fprintf(stdout, color, " ");
}
@@ -1580,14 +1628,15 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
color_fprintf(stdout, color, " %12s secs ", stimestamp);
- if (new_shortname || (verbose > 0 && sched_in->tid)) {
+ if (new_shortname || tr->comm_changed || (verbose > 0 && sched_in->tid)) {
const char *pid_color = color;
if (thread__has_color(sched_in))
pid_color = COLOR_PIDS;
color_fprintf(stdout, pid_color, "%s => %s:%d",
- sched_in->shortname, thread__comm_str(sched_in), sched_in->tid);
+ tr->shortname, thread__comm_str(sched_in), sched_in->tid);
+ tr->comm_changed = false;
}
if (sched->map.comp && new_cpu)
@@ -1691,6 +1740,37 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_
return err;
}
+static int perf_sched__process_comm(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct thread *thread;
+ struct thread_runtime *tr;
+ int err;
+
+ err = perf_event__process_comm(tool, event, sample, machine);
+ if (err)
+ return err;
+
+ thread = machine__find_thread(machine, sample->pid, sample->tid);
+ if (!thread) {
+ pr_err("Internal error: can't find thread\n");
+ return -1;
+ }
+
+ tr = thread__get_runtime(thread);
+ if (tr == NULL) {
+ thread__put(thread);
+ return -1;
+ }
+
+ tr->comm_changed = true;
+ thread__put(thread);
+
+ return 0;
+}
+
static int perf_sched__read_events(struct perf_sched *sched)
{
const struct perf_evsel_str_handler handlers[] = {
@@ -2200,37 +2280,6 @@ static void save_idle_callchain(struct idle_thread_runtime *itr,
callchain_cursor__copy(&itr->cursor, &callchain_cursor);
}
-/*
- * handle runtime stats saved per thread
- */
-static struct thread_runtime *thread__init_runtime(struct thread *thread)
-{
- struct thread_runtime *r;
-
- r = zalloc(sizeof(struct thread_runtime));
- if (!r)
- return NULL;
-
- init_stats(&r->run_stats);
- thread__set_priv(thread, r);
-
- return r;
-}
-
-static struct thread_runtime *thread__get_runtime(struct thread *thread)
-{
- struct thread_runtime *tr;
-
- tr = thread__priv(thread);
- if (tr == NULL) {
- tr = thread__init_runtime(thread);
- if (tr == NULL)
- pr_debug("Failed to malloc memory for runtime data.\n");
- }
-
- return tr;
-}
-
static struct thread *timehist_get_thread(struct perf_sched *sched,
struct perf_sample *sample,
struct machine *machine,
@@ -3291,7 +3340,7 @@ int cmd_sched(int argc, const char **argv)
struct perf_sched sched = {
.tool = {
.sample = perf_sched__process_tracepoint_sample,
- .comm = perf_event__process_comm,
+ .comm = perf_sched__process_comm,
.namespaces = perf_event__process_namespaces,
.lost = perf_event__process_lost,
.fork = perf_sched__process_fork_event,
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3a022b3..0fa9ea3 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1251,6 +1251,31 @@ static void aggr_update_shadow(void)
}
}
+static void uniquify_event_name(struct perf_evsel *counter)
+{
+ char *new_name;
+ char *config;
+
+ if (!counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
+ strlen(counter->pmu_name)))
+ return;
+
+ config = strchr(counter->name, '/');
+ if (config) {
+ if (asprintf(&new_name,
+ "%s%s", counter->pmu_name, config) > 0) {
+ free(counter->name);
+ counter->name = new_name;
+ }
+ } else {
+ if (asprintf(&new_name,
+ "%s [%s]", counter->name, counter->pmu_name) > 0) {
+ free(counter->name);
+ counter->name = new_name;
+ }
+ }
+}
+
static void collect_all_aliases(struct perf_evsel *counter,
void (*cb)(struct perf_evsel *counter, void *data,
bool first),
@@ -1279,7 +1304,9 @@ static bool collect_data(struct perf_evsel *counter,
if (counter->merged_stat)
return false;
cb(counter, data, true);
- if (!no_merge && counter->auto_merge_stats)
+ if (no_merge)
+ uniquify_event_name(counter);
+ else if (counter->auto_merge_stats)
collect_all_aliases(counter, cb, data);
return true;
}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index bb4f9fa..0a26b56 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -817,14 +817,13 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
struct perf_session *session = top->session;
union perf_event *event;
struct machine *machine;
- u64 end, start;
int ret;
md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
- if (perf_mmap__read_init(md, opts->overwrite, &start, &end) < 0)
+ if (perf_mmap__read_init(md) < 0)
return;
- while ((event = perf_mmap__read_event(md, opts->overwrite, &start, end)) != NULL) {
+ while ((event = perf_mmap__read_event(md)) != NULL) {
ret = perf_evlist__parse_sample(evlist, event, &sample);
if (ret) {
pr_err("Can't parse sample, err = %d\n", ret);
@@ -879,7 +878,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
} else
++session->evlist->stats.nr_unknown_events;
next_event:
- perf_mmap__consume(md, opts->overwrite);
+ perf_mmap__consume(md);
}
perf_mmap__read_done(md);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 1a93deb..87b95c9 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -19,6 +19,7 @@
#include <traceevent/event-parse.h>
#include <api/fs/tracing_path.h>
#include "builtin.h"
+#include "util/cgroup.h"
#include "util/color.h"
#include "util/debug.h"
#include "util/env.h"
@@ -83,6 +84,7 @@ struct trace {
struct perf_evlist *evlist;
struct machine *host;
struct thread *current;
+ struct cgroup *cgroup;
u64 base_time;
FILE *output;
unsigned long nr_events;
@@ -2370,6 +2372,34 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
trace__sched_stat_runtime))
goto out_error_sched_stat_runtime;
+ /*
+ * If a global cgroup was set, apply it to all the events without an
+ * explicit cgroup. I.e.:
+ *
+ * trace -G A -e sched:*switch
+ *
+ * Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
+ * _and_ sched:sched_switch to the 'A' cgroup, while:
+ *
+ * trace -e sched:*switch -G A
+ *
+ * will only set the sched:sched_switch event to the 'A' cgroup, all the
+ * other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
+ * a cgroup (on the root cgroup, sys wide, etc).
+ *
+ * Multiple cgroups:
+ *
+ * trace -G A -e sched:*switch -G B
+ *
+ * the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
+ * to the 'B' cgroup.
+ *
+ * evlist__set_default_cgroup() grabs a reference of the passed cgroup
+ * only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL.
+ */
+ if (trace->cgroup)
+ evlist__set_default_cgroup(trace->evlist, trace->cgroup);
+
err = perf_evlist__create_maps(evlist, &trace->opts.target);
if (err < 0) {
fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
@@ -2473,13 +2503,12 @@ again:
for (i = 0; i < evlist->nr_mmaps; i++) {
union perf_event *event;
struct perf_mmap *md;
- u64 end, start;
md = &evlist->mmap[i];
- if (perf_mmap__read_init(md, false, &start, &end) < 0)
+ if (perf_mmap__read_init(md) < 0)
continue;
- while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+ while ((event = perf_mmap__read_event(md)) != NULL) {
struct perf_sample sample;
++trace->nr_events;
@@ -2492,7 +2521,7 @@ again:
trace__handle_event(trace, event, &sample);
next_event:
- perf_mmap__consume(md, false);
+ perf_mmap__consume(md);
if (interrupted)
goto out_disable;
@@ -2540,6 +2569,7 @@ out_delete_evlist:
trace__symbols__exit(trace);
perf_evlist__delete(evlist);
+ cgroup__put(trace->cgroup);
trace->evlist = NULL;
trace->live = false;
return err;
@@ -2979,6 +3009,18 @@ out:
return err;
}
+static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
+{
+ struct trace *trace = opt->value;
+
+ if (!list_empty(&trace->evlist->entries))
+ return parse_cgroups(opt, str, unset);
+
+ trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
+
+ return 0;
+}
+
int cmd_trace(int argc, const char **argv)
{
const char *trace_usage[] = {
@@ -3069,6 +3111,8 @@ int cmd_trace(int argc, const char **argv)
"print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
+ OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
+ trace__parse_cgroups),
OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
"ms to wait before starting measurement after program "
"start"),
@@ -3095,6 +3139,11 @@ int cmd_trace(int argc, const char **argv)
argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+ if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
+ usage_with_options_msg(trace_usage, trace_options,
+ "cgroup monitoring only available in system-wide mode");
+ }
+
err = bpf__setup_stdout(trace.evlist);
if (err) {
bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index e0b1b41..6d598cc 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -33,10 +33,9 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
for (i = 0; i < evlist->nr_mmaps; i++) {
struct perf_mmap *map = &evlist->overwrite_mmap[i];
union perf_event *event;
- u64 start, end;
- perf_mmap__read_init(map, true, &start, &end);
- while ((event = perf_mmap__read_event(map, true, &start, end)) != NULL) {
+ perf_mmap__read_init(map);
+ while ((event = perf_mmap__read_event(map)) != NULL) {
const u32 type = event->header.type;
switch (type) {
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 09c9c9f..79b54f8 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -177,13 +177,12 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
for (i = 0; i < evlist->nr_mmaps; i++) {
union perf_event *event;
struct perf_mmap *md;
- u64 end, start;
md = &evlist->mmap[i];
- if (perf_mmap__read_init(md, false, &start, &end) < 0)
+ if (perf_mmap__read_init(md) < 0)
continue;
- while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+ while ((event = perf_mmap__read_event(md)) != NULL) {
const u32 type = event->header.type;
if (type == PERF_RECORD_SAMPLE)
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 03ed8c7..9993635 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -410,17 +410,16 @@ static int process_events(struct machine *machine, struct perf_evlist *evlist,
{
union perf_event *event;
struct perf_mmap *md;
- u64 end, start;
int i, ret;
for (i = 0; i < evlist->nr_mmaps; i++) {
md = &evlist->mmap[i];
- if (perf_mmap__read_init(md, false, &start, &end) < 0)
+ if (perf_mmap__read_init(md) < 0)
continue;
- while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+ while ((event = perf_mmap__read_event(md)) != NULL) {
ret = process_event(machine, evlist, event, state);
- perf_mmap__consume(md, false);
+ perf_mmap__consume(md);
if (ret < 0)
return ret;
}
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index 4590d8f..17c46f3 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -28,21 +28,20 @@ static int find_comm(struct perf_evlist *evlist, const char *comm)
{
union perf_event *event;
struct perf_mmap *md;
- u64 end, start;
int i, found;
found = 0;
for (i = 0; i < evlist->nr_mmaps; i++) {
md = &evlist->mmap[i];
- if (perf_mmap__read_init(md, false, &start, &end) < 0)
+ if (perf_mmap__read_init(md) < 0)
continue;
- while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+ while ((event = perf_mmap__read_event(md)) != NULL) {
if (event->header.type == PERF_RECORD_COMM &&
(pid_t)event->comm.pid == getpid() &&
(pid_t)event->comm.tid == getpid() &&
strcmp(event->comm.comm, comm) == 0)
found += 1;
- perf_mmap__consume(md, false);
+ perf_mmap__consume(md);
}
perf_mmap__read_done(md);
}
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 44c58d6..bb8e6bc 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -39,7 +39,6 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
struct perf_evsel *evsels[nsyscalls], *evsel;
char sbuf[STRERR_BUFSIZE];
struct perf_mmap *md;
- u64 end, start;
threads = thread_map__new(-1, getpid(), UINT_MAX);
if (threads == NULL) {
@@ -109,10 +108,10 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
}
md = &evlist->mmap[0];
- if (perf_mmap__read_init(md, false, &start, &end) < 0)
+ if (perf_mmap__read_init(md) < 0)
goto out_init;
- while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+ while ((event = perf_mmap__read_event(md)) != NULL) {
struct perf_sample sample;
if (event->header.type != PERF_RECORD_SAMPLE) {
@@ -135,7 +134,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
goto out_delete_evlist;
}
nr_events[evsel->idx]++;
- perf_mmap__consume(md, false);
+ perf_mmap__consume(md);
}
perf_mmap__read_done(md);
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 620b210..344dc3a 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -87,13 +87,12 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
for (i = 0; i < evlist->nr_mmaps; i++) {
union perf_event *event;
struct perf_mmap *md;
- u64 end, start;
md = &evlist->mmap[i];
- if (perf_mmap__read_init(md, false, &start, &end) < 0)
+ if (perf_mmap__read_init(md) < 0)
continue;
- while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+ while ((event = perf_mmap__read_event(md)) != NULL) {
const u32 type = event->header.type;
int tp_flags;
struct perf_sample sample;
@@ -101,7 +100,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
++nr_events;
if (type != PERF_RECORD_SAMPLE) {
- perf_mmap__consume(md, false);
+ perf_mmap__consume(md);
continue;
}
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 31f3f70..34394cc 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -165,13 +165,12 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
for (i = 0; i < evlist->nr_mmaps; i++) {
union perf_event *event;
struct perf_mmap *md;
- u64 end, start;
md = &evlist->mmap[i];
- if (perf_mmap__read_init(md, false, &start, &end) < 0)
+ if (perf_mmap__read_init(md) < 0)
continue;
- while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+ while ((event = perf_mmap__read_event(md)) != NULL) {
const u32 type = event->header.type;
const char *name = perf_event__name(type);
@@ -272,7 +271,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
++errs;
}
- perf_mmap__consume(md, false);
+ perf_mmap__consume(md);
}
perf_mmap__read_done(md);
}
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index e6320e2..f9490b2 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -40,7 +40,6 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
struct cpu_map *cpus;
struct thread_map *threads;
struct perf_mmap *md;
- u64 end, start;
attr.sample_freq = 500;
@@ -96,10 +95,10 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
perf_evlist__disable(evlist);
md = &evlist->mmap[0];
- if (perf_mmap__read_init(md, false, &start, &end) < 0)
+ if (perf_mmap__read_init(md) < 0)
goto out_init;
- while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+ while ((event = perf_mmap__read_event(md)) != NULL) {
struct perf_sample sample;
if (event->header.type != PERF_RECORD_SAMPLE)
@@ -114,7 +113,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
total_periods += sample.period;
nr_samples++;
next_event:
- perf_mmap__consume(md, false);
+ perf_mmap__consume(md);
}
perf_mmap__read_done(md);
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 10c4dcd..9b5be51 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -259,18 +259,17 @@ static int process_events(struct perf_evlist *evlist,
LIST_HEAD(events);
struct event_node *events_array, *node;
struct perf_mmap *md;
- u64 end, start;
int i, ret;
for (i = 0; i < evlist->nr_mmaps; i++) {
md = &evlist->mmap[i];
- if (perf_mmap__read_init(md, false, &start, &end) < 0)
+ if (perf_mmap__read_init(md) < 0)
continue;
- while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+ while ((event = perf_mmap__read_event(md)) != NULL) {
cnt += 1;
ret = add_event(evlist, &events, event);
- perf_mmap__consume(md, false);
+ perf_mmap__consume(md);
if (ret < 0)
goto out_free_nodes;
}
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 02b0888..e92fa60 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -48,7 +48,6 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
struct cpu_map *cpus;
struct thread_map *threads;
struct perf_mmap *md;
- u64 end, start;
signal(SIGCHLD, sig_handler);
@@ -113,14 +112,14 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
retry:
md = &evlist->mmap[0];
- if (perf_mmap__read_init(md, false, &start, &end) < 0)
+ if (perf_mmap__read_init(md) < 0)
goto out_init;
- while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) {
+ while ((event = perf_mmap__read_event(md)) != NULL) {
if (event->header.type == PERF_RECORD_EXIT)
nr_exit++;
- perf_mmap__consume(md, false);
+ perf_mmap__consume(md);
}
perf_mmap__read_done(md);
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index de2bde2..8b4e825 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2261,8 +2261,9 @@ static int perf_evsel_browser_title(struct hist_browser *browser,
nr_samples = convert_unit(nr_samples, &unit);
printed = scnprintf(bf, size,
- "Samples: %lu%c of event '%s',%s%sEvent count (approx.): %" PRIu64,
- nr_samples, unit, ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
+ "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
+ nr_samples, unit, evsel->nr_members > 1 ? "s" : "",
+ ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
if (hists->uid_filter_str)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 49ff825..bc3302d 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -248,7 +248,7 @@ static struct ins_ops call_ops = {
bool ins__is_call(const struct ins *ins)
{
- return ins->ops == &call_ops;
+ return ins->ops == &call_ops || ins->ops == &s390_call_ops;
}
static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused)
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 6470ea2..fb357a0 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -233,9 +233,9 @@ static void *auxtrace_copy_data(u64 size, struct perf_session *session)
return p;
}
-static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
- unsigned int idx,
- struct auxtrace_buffer *buffer)
+static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues,
+ unsigned int idx,
+ struct auxtrace_buffer *buffer)
{
struct auxtrace_queue *queue;
int err;
@@ -286,7 +286,7 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
return -ENOMEM;
b->size = BUFFER_LIMIT_FOR_32_BIT;
b->consecutive = consecutive;
- err = auxtrace_queues__add_buffer(queues, idx, b);
+ err = auxtrace_queues__queue_buffer(queues, idx, b);
if (err) {
auxtrace_buffer__free(b);
return err;
@@ -302,11 +302,14 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
return 0;
}
-static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues,
- struct perf_session *session,
- unsigned int idx,
- struct auxtrace_buffer *buffer)
+static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ unsigned int idx,
+ struct auxtrace_buffer *buffer,
+ struct auxtrace_buffer **buffer_ptr)
{
+ int err;
+
if (session->one_mmap) {
buffer->data = buffer->data_offset - session->one_mmap_offset +
session->one_mmap_addr;
@@ -317,14 +320,20 @@ static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues,
buffer->data_needs_freeing = true;
} else if (BITS_PER_LONG == 32 &&
buffer->size > BUFFER_LIMIT_FOR_32_BIT) {
- int err;
-
err = auxtrace_queues__split_buffer(queues, idx, buffer);
if (err)
return err;
}
- return auxtrace_queues__add_buffer(queues, idx, buffer);
+ err = auxtrace_queues__queue_buffer(queues, idx, buffer);
+ if (err)
+ return err;
+
+ /* FIXME: Doesn't work for split buffer */
+ if (buffer_ptr)
+ *buffer_ptr = buffer;
+
+ return 0;
}
static bool filter_cpu(struct perf_session *session, int cpu)
@@ -359,13 +368,11 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues,
buffer->size = event->auxtrace.size;
idx = event->auxtrace.idx;
- err = auxtrace_queues__add_event_buffer(queues, session, idx, buffer);
+ err = auxtrace_queues__add_buffer(queues, session, idx, buffer,
+ buffer_ptr);
if (err)
goto out_err;
- if (buffer_ptr)
- *buffer_ptr = buffer;
-
return 0;
out_err:
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 453c148..e731f55 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -130,6 +130,7 @@ struct auxtrace_index {
/**
* struct auxtrace - session callbacks to allow AUX area data decoding.
* @process_event: lets the decoder see all session events
+ * @process_auxtrace_event: process a PERF_RECORD_AUXTRACE event
* @flush_events: process any remaining data
* @free_events: free resources associated with event processing
* @free: free resources associated with the session
@@ -301,6 +302,7 @@ struct auxtrace_mmap_params {
* @parse_snapshot_options: parse snapshot options
* @reference: provide a 64-bit reference number for auxtrace_event
* @read_finish: called after reading from an auxtrace mmap
+ * @alignment: alignment (if any) for AUX area data
*/
struct auxtrace_record {
int (*recording_options)(struct auxtrace_record *itr,
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 5dd9b5e..78408f5c 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -71,7 +71,7 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen)
return -1;
}
-static int open_cgroup(char *name)
+static int open_cgroup(const char *name)
{
char path[PATH_MAX + 1];
char mnt[PATH_MAX + 1];
@@ -90,41 +90,64 @@ static int open_cgroup(char *name)
return fd;
}
-static int add_cgroup(struct perf_evlist *evlist, char *str)
+static struct cgroup *evlist__find_cgroup(struct perf_evlist *evlist, const char *str)
{
struct perf_evsel *counter;
- struct cgroup_sel *cgrp = NULL;
- int n;
+ struct cgroup *cgrp = NULL;
/*
* check if cgrp is already defined, if so we reuse it
*/
evlist__for_each_entry(evlist, counter) {
- cgrp = counter->cgrp;
- if (!cgrp)
+ if (!counter->cgrp)
continue;
- if (!strcmp(cgrp->name, str)) {
- refcount_inc(&cgrp->refcnt);
+ if (!strcmp(counter->cgrp->name, str)) {
+ cgrp = cgroup__get(counter->cgrp);
break;
}
-
- cgrp = NULL;
}
- if (!cgrp) {
- cgrp = zalloc(sizeof(*cgrp));
- if (!cgrp)
- return -1;
+ return cgrp;
+}
+
+static struct cgroup *cgroup__new(const char *name)
+{
+ struct cgroup *cgroup = zalloc(sizeof(*cgroup));
- cgrp->name = str;
- refcount_set(&cgrp->refcnt, 1);
+ if (cgroup != NULL) {
+ refcount_set(&cgroup->refcnt, 1);
- cgrp->fd = open_cgroup(str);
- if (cgrp->fd == -1) {
- free(cgrp);
- return -1;
- }
+ cgroup->name = strdup(name);
+ if (!cgroup->name)
+ goto out_err;
+ cgroup->fd = open_cgroup(name);
+ if (cgroup->fd == -1)
+ goto out_free_name;
}
+ return cgroup;
+
+out_free_name:
+ free(cgroup->name);
+out_err:
+ free(cgroup);
+ return NULL;
+}
+
+struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, const char *name)
+{
+ struct cgroup *cgroup = evlist__find_cgroup(evlist, name);
+
+ return cgroup ?: cgroup__new(name);
+}
+
+static int add_cgroup(struct perf_evlist *evlist, const char *str)
+{
+ struct perf_evsel *counter;
+ struct cgroup *cgrp = evlist__findnew_cgroup(evlist, str);
+ int n;
+
+ if (!cgrp)
+ return -1;
/*
* find corresponding event
* if add cgroup N, then need to find event N
@@ -135,30 +158,55 @@ static int add_cgroup(struct perf_evlist *evlist, char *str)
goto found;
n++;
}
- if (refcount_dec_and_test(&cgrp->refcnt))
- free(cgrp);
+ cgroup__put(cgrp);
return -1;
found:
counter->cgrp = cgrp;
return 0;
}
-void close_cgroup(struct cgroup_sel *cgrp)
+static void cgroup__delete(struct cgroup *cgroup)
+{
+ close(cgroup->fd);
+ zfree(&cgroup->name);
+ free(cgroup);
+}
+
+void cgroup__put(struct cgroup *cgrp)
{
if (cgrp && refcount_dec_and_test(&cgrp->refcnt)) {
- close(cgrp->fd);
- zfree(&cgrp->name);
- free(cgrp);
+ cgroup__delete(cgrp);
}
}
-int parse_cgroups(const struct option *opt __maybe_unused, const char *str,
+struct cgroup *cgroup__get(struct cgroup *cgroup)
+{
+ if (cgroup)
+ refcount_inc(&cgroup->refcnt);
+ return cgroup;
+}
+
+static void evsel__set_default_cgroup(struct perf_evsel *evsel, struct cgroup *cgroup)
+{
+ if (evsel->cgrp == NULL)
+ evsel->cgrp = cgroup__get(cgroup);
+}
+
+void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgroup)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__set_default_cgroup(evsel, cgroup);
+}
+
+int parse_cgroups(const struct option *opt, const char *str,
int unset __maybe_unused)
{
struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
struct perf_evsel *counter;
- struct cgroup_sel *cgrp = NULL;
+ struct cgroup *cgrp = NULL;
const char *p, *e, *eos = str + strlen(str);
char *s;
int ret, i;
@@ -179,10 +227,9 @@ int parse_cgroups(const struct option *opt __maybe_unused, const char *str,
if (!s)
return -1;
ret = add_cgroup(evlist, s);
- if (ret) {
- free(s);
+ free(s);
+ if (ret)
return -1;
- }
}
/* nr_cgroups is increased een for empty cgroups */
nr_cgroups++;
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index afafc87..f033a80 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -6,7 +6,7 @@
struct option;
-struct cgroup_sel {
+struct cgroup {
char *name;
int fd;
refcount_t refcnt;
@@ -14,7 +14,16 @@ struct cgroup_sel {
extern int nr_cgroups; /* number of explicit cgroups defined */
-void close_cgroup(struct cgroup_sel *cgrp);
+
+struct cgroup *cgroup__get(struct cgroup *cgroup);
+void cgroup__put(struct cgroup *cgroup);
+
+struct perf_evlist;
+
+struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, const char *name);
+
+void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgroup);
+
int parse_cgroups(const struct option *opt, const char *str, int unset);
#endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index bf970f5..c4ef2e5 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -27,6 +27,12 @@ struct numa_node {
struct cpu_map *map;
};
+struct memory_node {
+ u64 node;
+ u64 size;
+ unsigned long *set;
+};
+
struct perf_env {
char *hostname;
char *os_release;
@@ -43,6 +49,7 @@ struct perf_env {
int nr_sibling_cores;
int nr_sibling_threads;
int nr_numa_nodes;
+ int nr_memory_nodes;
int nr_pmu_mappings;
int nr_groups;
char *cmdline;
@@ -54,6 +61,8 @@ struct perf_env {
struct cpu_cache_level *caches;
int caches_cnt;
struct numa_node *numa_nodes;
+ struct memory_node *memory_nodes;
+ unsigned long long memory_bsize;
};
extern struct perf_env perf_env;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 41a4666..a59281d 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -722,7 +722,8 @@ void perf_evlist__munmap(struct perf_evlist *evlist)
zfree(&evlist->overwrite_mmap);
}
-static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
+static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist,
+ bool overwrite)
{
int i;
struct perf_mmap *map;
@@ -736,6 +737,7 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
for (i = 0; i < evlist->nr_mmaps; i++) {
map[i].fd = -1;
+ map[i].overwrite = overwrite;
/*
* When the perf_mmap() call is made we grab one refcount, plus
* one extra to let perf_mmap__consume() get the last
@@ -779,7 +781,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
maps = evlist->overwrite_mmap;
if (!maps) {
- maps = perf_evlist__alloc_mmap(evlist);
+ maps = perf_evlist__alloc_mmap(evlist, true);
if (!maps)
return -1;
evlist->overwrite_mmap = maps;
@@ -1029,7 +1031,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
struct mmap_params mp;
if (!evlist->mmap)
- evlist->mmap = perf_evlist__alloc_mmap(evlist);
+ evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
if (!evlist->mmap)
return -ENOMEM;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index b56e1c2..1ac8d92 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -244,6 +244,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
evsel->metric_name = NULL;
evsel->metric_events = NULL;
evsel->collect_stat = false;
+ evsel->pmu_name = NULL;
}
struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
@@ -621,22 +622,34 @@ const char *perf_evsel__group_name(struct perf_evsel *evsel)
return evsel->group_name ?: "anon group";
}
+/*
+ * Returns the group details for the specified leader,
+ * with following rules.
+ *
+ * For record -e '{cycles,instructions}'
+ * 'anon group { cycles:u, instructions:u }'
+ *
+ * For record -e 'cycles,instructions' and report --group
+ * 'cycles:u, instructions:u'
+ */
int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
{
- int ret;
+ int ret = 0;
struct perf_evsel *pos;
const char *group_name = perf_evsel__group_name(evsel);
- ret = scnprintf(buf, size, "%s", group_name);
+ if (!evsel->forced_leader)
+ ret = scnprintf(buf, size, "%s { ", group_name);
- ret += scnprintf(buf + ret, size - ret, " { %s",
+ ret += scnprintf(buf + ret, size - ret, "%s",
perf_evsel__name(evsel));
for_each_group_member(pos, evsel)
ret += scnprintf(buf + ret, size - ret, ", %s",
perf_evsel__name(pos));
- ret += scnprintf(buf + ret, size - ret, " }");
+ if (!evsel->forced_leader)
+ ret += scnprintf(buf + ret, size - ret, " }");
return ret;
}
@@ -1233,7 +1246,7 @@ void perf_evsel__exit(struct perf_evsel *evsel)
perf_evsel__free_fd(evsel);
perf_evsel__free_id(evsel);
perf_evsel__free_config_terms(evsel);
- close_cgroup(evsel->cgrp);
+ cgroup__put(evsel->cgrp);
cpu_map__put(evsel->cpus);
cpu_map__put(evsel->own_cpus);
thread_map__put(evsel->threads);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index a7487c6..d3ee3af 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -30,7 +30,7 @@ struct perf_sample_id {
u64 period;
};
-struct cgroup_sel;
+struct cgroup;
/*
* The 'struct perf_evsel_config_term' is used to pass event
@@ -107,7 +107,7 @@ struct perf_evsel {
struct perf_stat_evsel *stats;
void *priv;
u64 db_id;
- struct cgroup_sel *cgrp;
+ struct cgroup *cgrp;
void *handler;
struct cpu_map *cpus;
struct cpu_map *own_cpus;
@@ -125,6 +125,7 @@ struct perf_evsel {
bool per_pkg;
bool precise_max;
bool ignore_missing_thread;
+ bool forced_leader;
/* parse modifier helper */
int exclude_GH;
int nr_members;
@@ -142,6 +143,7 @@ struct perf_evsel {
struct perf_evsel **metric_events;
bool collect_stat;
bool weak_group;
+ const char *pmu_name;
};
union u64_swap {
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index a326e0d..e14b3f7 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -17,6 +17,7 @@
#include <sys/stat.h>
#include <sys/utsname.h>
#include <linux/time64.h>
+#include <dirent.h>
#include "evlist.h"
#include "evsel.h"
@@ -37,6 +38,7 @@
#include "asm/bug.h"
#include "tool.h"
#include "time-utils.h"
+#include "units.h"
#include "sane_ctype.h"
@@ -132,6 +134,25 @@ int do_write(struct feat_fd *ff, const void *buf, size_t size)
}
/* Return: 0 if succeded, -ERR if failed. */
+static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size)
+{
+ u64 *p = (u64 *) set;
+ int i, ret;
+
+ ret = do_write(ff, &size, sizeof(size));
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
+ ret = do_write(ff, p + i, sizeof(*p));
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* Return: 0 if succeded, -ERR if failed. */
int write_padded(struct feat_fd *ff, const void *bf,
size_t count, size_t count_aligned)
{
@@ -243,6 +264,38 @@ static char *do_read_string(struct feat_fd *ff)
return NULL;
}
+/* Return: 0 if succeded, -ERR if failed. */
+static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize)
+{
+ unsigned long *set;
+ u64 size, *p;
+ int i, ret;
+
+ ret = do_read_u64(ff, &size);
+ if (ret)
+ return ret;
+
+ set = bitmap_alloc(size);
+ if (!set)
+ return -ENOMEM;
+
+ bitmap_zero(set, size);
+
+ p = (u64 *) set;
+
+ for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
+ ret = do_read_u64(ff, p + i);
+ if (ret < 0) {
+ free(set);
+ return ret;
+ }
+ }
+
+ *pset = set;
+ *psize = size;
+ return 0;
+}
+
static int write_tracing_data(struct feat_fd *ff,
struct perf_evlist *evlist)
{
@@ -1196,6 +1249,176 @@ static int write_sample_time(struct feat_fd *ff,
sizeof(evlist->last_sample_time));
}
+
+static int memory_node__read(struct memory_node *n, unsigned long idx)
+{
+ unsigned int phys, size = 0;
+ char path[PATH_MAX];
+ struct dirent *ent;
+ DIR *dir;
+
+#define for_each_memory(mem, dir) \
+ while ((ent = readdir(dir))) \
+ if (strcmp(ent->d_name, ".") && \
+ strcmp(ent->d_name, "..") && \
+ sscanf(ent->d_name, "memory%u", &mem) == 1)
+
+ scnprintf(path, PATH_MAX,
+ "%s/devices/system/node/node%lu",
+ sysfs__mountpoint(), idx);
+
+ dir = opendir(path);
+ if (!dir) {
+ pr_warning("failed: cant' open memory sysfs data\n");
+ return -1;
+ }
+
+ for_each_memory(phys, dir) {
+ size = max(phys, size);
+ }
+
+ size++;
+
+ n->set = bitmap_alloc(size);
+ if (!n->set) {
+ closedir(dir);
+ return -ENOMEM;
+ }
+
+ bitmap_zero(n->set, size);
+ n->node = idx;
+ n->size = size;
+
+ rewinddir(dir);
+
+ for_each_memory(phys, dir) {
+ set_bit(phys, n->set);
+ }
+
+ closedir(dir);
+ return 0;
+}
+
+static int memory_node__sort(const void *a, const void *b)
+{
+ const struct memory_node *na = a;
+ const struct memory_node *nb = b;
+
+ return na->node - nb->node;
+}
+
+static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
+{
+ char path[PATH_MAX];
+ struct dirent *ent;
+ DIR *dir;
+ u64 cnt = 0;
+ int ret = 0;
+
+ scnprintf(path, PATH_MAX, "%s/devices/system/node/",
+ sysfs__mountpoint());
+
+ dir = opendir(path);
+ if (!dir) {
+ pr_warning("failed: can't open node sysfs data\n");
+ return -1;
+ }
+
+ while (!ret && (ent = readdir(dir))) {
+ unsigned int idx;
+ int r;
+
+ if (!strcmp(ent->d_name, ".") ||
+ !strcmp(ent->d_name, ".."))
+ continue;
+
+ r = sscanf(ent->d_name, "node%u", &idx);
+ if (r != 1)
+ continue;
+
+ if (WARN_ONCE(cnt >= size,
+ "failed to write MEM_TOPOLOGY, way too many nodes\n"))
+ return -1;
+
+ ret = memory_node__read(&nodes[cnt++], idx);
+ }
+
+ *cntp = cnt;
+ closedir(dir);
+
+ if (!ret)
+ qsort(nodes, cnt, sizeof(nodes[0]), memory_node__sort);
+
+ return ret;
+}
+
+#define MAX_MEMORY_NODES 2000
+
+/*
+ * The MEM_TOPOLOGY holds physical memory map for every
+ * node in system. The format of data is as follows:
+ *
+ * 0 - version | for future changes
+ * 8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes
+ * 16 - count | number of nodes
+ *
+ * For each node we store map of physical indexes for
+ * each node:
+ *
+ * 32 - node id | node index
+ * 40 - size | size of bitmap
+ * 48 - bitmap | bitmap of memory indexes that belongs to node
+ */
+static int write_mem_topology(struct feat_fd *ff __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ static struct memory_node nodes[MAX_MEMORY_NODES];
+ u64 bsize, version = 1, i, nr;
+ int ret;
+
+ ret = sysfs__read_xll("devices/system/memory/block_size_bytes",
+ (unsigned long long *) &bsize);
+ if (ret)
+ return ret;
+
+ ret = build_mem_topology(&nodes[0], MAX_MEMORY_NODES, &nr);
+ if (ret)
+ return ret;
+
+ ret = do_write(ff, &version, sizeof(version));
+ if (ret < 0)
+ goto out;
+
+ ret = do_write(ff, &bsize, sizeof(bsize));
+ if (ret < 0)
+ goto out;
+
+ ret = do_write(ff, &nr, sizeof(nr));
+ if (ret < 0)
+ goto out;
+
+ for (i = 0; i < nr; i++) {
+ struct memory_node *n = &nodes[i];
+
+ #define _W(v) \
+ ret = do_write(ff, &n->v, sizeof(n->v)); \
+ if (ret < 0) \
+ goto out;
+
+ _W(node)
+ _W(size)
+
+ #undef _W
+
+ ret = do_write_bitmap(ff, n->set, n->size);
+ if (ret < 0)
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
static void print_hostname(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
@@ -1543,6 +1766,35 @@ static void print_sample_time(struct feat_fd *ff, FILE *fp)
fprintf(fp, "# sample duration : %10.3f ms\n", d);
}
+static void memory_node__fprintf(struct memory_node *n,
+ unsigned long long bsize, FILE *fp)
+{
+ char buf_map[100], buf_size[50];
+ unsigned long long size;
+
+ size = bsize * bitmap_weight(n->set, n->size);
+ unit_number__scnprintf(buf_size, 50, size);
+
+ bitmap_scnprintf(n->set, n->size, buf_map, 100);
+ fprintf(fp, "# %3" PRIu64 " [%s]: %s\n", n->node, buf_size, buf_map);
+}
+
+static void print_mem_topology(struct feat_fd *ff, FILE *fp)
+{
+ struct memory_node *nodes;
+ int i, nr;
+
+ nodes = ff->ph->env.memory_nodes;
+ nr = ff->ph->env.nr_memory_nodes;
+
+ fprintf(fp, "# memory nodes (nr %d, block size 0x%llx):\n",
+ nr, ff->ph->env.memory_bsize);
+
+ for (i = 0; i < nr; i++) {
+ memory_node__fprintf(&nodes[i], ff->ph->env.memory_bsize, fp);
+ }
+}
+
static int __event_process_build_id(struct build_id_event *bev,
char *filename,
struct perf_session *session)
@@ -2205,6 +2457,58 @@ static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
return 0;
}
+static int process_mem_topology(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ struct memory_node *nodes;
+ u64 version, i, nr, bsize;
+ int ret = -1;
+
+ if (do_read_u64(ff, &version))
+ return -1;
+
+ if (version != 1)
+ return -1;
+
+ if (do_read_u64(ff, &bsize))
+ return -1;
+
+ if (do_read_u64(ff, &nr))
+ return -1;
+
+ nodes = zalloc(sizeof(*nodes) * nr);
+ if (!nodes)
+ return -1;
+
+ for (i = 0; i < nr; i++) {
+ struct memory_node n;
+
+ #define _R(v) \
+ if (do_read_u64(ff, &n.v)) \
+ goto out; \
+
+ _R(node)
+ _R(size)
+
+ #undef _R
+
+ if (do_read_bitmap(ff, &n.set, &n.size))
+ goto out;
+
+ nodes[i] = n;
+ }
+
+ ff->ph->env.memory_bsize = bsize;
+ ff->ph->env.memory_nodes = nodes;
+ ff->ph->env.nr_memory_nodes = nr;
+ ret = 0;
+
+out:
+ if (ret)
+ free(nodes);
+ return ret;
+}
+
struct feature_ops {
int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2263,6 +2567,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPN(STAT, stat, false),
FEAT_OPN(CACHE, cache, true),
FEAT_OPR(SAMPLE_TIME, sample_time, false),
+ FEAT_OPR(MEM_TOPOLOGY, mem_topology, true),
};
struct header_print_data {
@@ -2318,7 +2623,12 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
if (ret == -1)
return -1;
- fprintf(fp, "# captured on: %s", ctime(&st.st_ctime));
+ fprintf(fp, "# captured on : %s", ctime(&st.st_ctime));
+
+ fprintf(fp, "# header version : %u\n", header->version);
+ fprintf(fp, "# data offset : %" PRIu64 "\n", header->data_offset);
+ fprintf(fp, "# data size : %" PRIu64 "\n", header->data_size);
+ fprintf(fp, "# feat offset : %" PRIu64 "\n", header->feat_offset);
perf_header__process_sections(header, fd, &hd,
perf_file_section__fprintf_info);
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 942bdec..90d4577 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -36,6 +36,7 @@ enum {
HEADER_STAT,
HEADER_CACHE,
HEADER_SAMPLE_TIME,
+ HEADER_MEM_TOPOLOGY,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 44a8456..7d96889 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -536,7 +536,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
* This mem info was allocated from sample__resolve_mem
* and will not be used anymore.
*/
- zfree(&entry->mem_info);
+ mem_info__zput(entry->mem_info);
/* If the map of an existing hist_entry has
* become out-of-date due to an exec() or
@@ -1139,7 +1139,7 @@ void hist_entry__delete(struct hist_entry *he)
if (he->mem_info) {
map__zput(he->mem_info->iaddr.map);
map__zput(he->mem_info->daddr.map);
- zfree(&he->mem_info);
+ mem_info__zput(he->mem_info);
}
zfree(&he->stat_acc);
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index aa1593c..f9157ae 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -1378,6 +1378,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
intel_pt_clear_tx_flags(decoder);
decoder->have_tma = false;
decoder->cbr = 0;
+ decoder->timestamp_insn_cnt = 0;
decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
decoder->overflow = true;
return -EOVERFLOW;
@@ -1616,6 +1617,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
case INTEL_PT_PWRX:
intel_pt_log("ERROR: Missing TIP after FUP\n");
decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ decoder->pkt_step = 0;
return -ENOENT;
case INTEL_PT_OVF:
@@ -2390,14 +2392,6 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
return &decoder->state;
}
-static bool intel_pt_at_psb(unsigned char *buf, size_t len)
-{
- if (len < INTEL_PT_PSB_LEN)
- return false;
- return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR,
- INTEL_PT_PSB_LEN);
-}
-
/**
* intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
* @buf: pointer to buffer pointer
@@ -2486,6 +2480,7 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
* @buf: buffer
* @len: size of buffer
* @tsc: TSC value returned
+ * @rem: returns remaining size when TSC is found
*
* Find a TSC packet in @buf and return the TSC value. This function assumes
* that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
@@ -2493,7 +2488,8 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
*
* Return: %true if TSC is found, false otherwise.
*/
-static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc)
+static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
+ size_t *rem)
{
struct intel_pt_pkt packet;
int ret;
@@ -2504,6 +2500,7 @@ static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc)
return false;
if (packet.type == INTEL_PT_TSC) {
*tsc = packet.payload;
+ *rem = len;
return true;
}
if (packet.type == INTEL_PT_PSBEND)
@@ -2554,6 +2551,8 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
* @len_a: size of first buffer
* @buf_b: second buffer
* @len_b: size of second buffer
+ * @consecutive: returns true if there is data in buf_b that is consecutive
+ * to buf_a
*
* If the trace contains TSC we can look at the last TSC of @buf_a and the
* first TSC of @buf_b in order to determine if the buffers overlap, and then
@@ -2566,33 +2565,41 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
size_t len_a,
unsigned char *buf_b,
- size_t len_b)
+ size_t len_b, bool *consecutive)
{
uint64_t tsc_a, tsc_b;
unsigned char *p;
- size_t len;
+ size_t len, rem_a, rem_b;
p = intel_pt_last_psb(buf_a, len_a);
if (!p)
return buf_b; /* No PSB in buf_a => no overlap */
len = len_a - (p - buf_a);
- if (!intel_pt_next_tsc(p, len, &tsc_a)) {
+ if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) {
/* The last PSB+ in buf_a is incomplete, so go back one more */
len_a -= len;
p = intel_pt_last_psb(buf_a, len_a);
if (!p)
return buf_b; /* No full PSB+ => assume no overlap */
len = len_a - (p - buf_a);
- if (!intel_pt_next_tsc(p, len, &tsc_a))
+ if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a))
return buf_b; /* No TSC in buf_a => assume no overlap */
}
while (1) {
/* Ignore PSB+ with no TSC */
- if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) &&
- intel_pt_tsc_cmp(tsc_a, tsc_b) < 0)
- return buf_b; /* tsc_a < tsc_b => no overlap */
+ if (intel_pt_next_tsc(buf_b, len_b, &tsc_b, &rem_b)) {
+ int cmp = intel_pt_tsc_cmp(tsc_a, tsc_b);
+
+ /* Same TSC, so buffers are consecutive */
+ if (!cmp && rem_b >= rem_a) {
+ *consecutive = true;
+ return buf_b + len_b - (rem_b - rem_a);
+ }
+ if (cmp < 0)
+ return buf_b; /* tsc_a < tsc_b => no overlap */
+ }
if (!intel_pt_step_psb(&buf_b, &len_b))
return buf_b + len_b; /* No PSB in buf_b => no data */
@@ -2606,6 +2613,8 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
* @buf_b: second buffer
* @len_b: size of second buffer
* @have_tsc: can use TSC packets to detect overlap
+ * @consecutive: returns true if there is data in buf_b that is consecutive
+ * to buf_a
*
* When trace samples or snapshots are recorded there is the possibility that
* the data overlaps. Note that, for the purposes of decoding, data is only
@@ -2616,7 +2625,7 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
*/
unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
unsigned char *buf_b, size_t len_b,
- bool have_tsc)
+ bool have_tsc, bool *consecutive)
{
unsigned char *found;
@@ -2628,7 +2637,8 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
return buf_b; /* No overlap */
if (have_tsc) {
- found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b);
+ found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b,
+ consecutive);
if (found)
return found;
}
@@ -2643,28 +2653,16 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
}
/* Now len_b >= len_a */
- if (len_b > len_a) {
- /* The leftover buffer 'b' must start at a PSB */
- while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
- if (!intel_pt_step_psb(&buf_a, &len_a))
- return buf_b; /* No overlap */
- }
- }
-
while (1) {
/* Potential overlap so check the bytes */
found = memmem(buf_a, len_a, buf_b, len_a);
- if (found)
+ if (found) {
+ *consecutive = true;
return buf_b + len_a;
+ }
/* Try again at next PSB in buffer 'a' */
if (!intel_pt_step_psb(&buf_a, &len_a))
return buf_b; /* No overlap */
-
- /* The leftover buffer 'b' must start at a PSB */
- while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
- if (!intel_pt_step_psb(&buf_a, &len_a))
- return buf_b; /* No overlap */
- }
}
}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 921b22e..fc1752d 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -117,7 +117,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
unsigned char *buf_b, size_t len_b,
- bool have_tsc);
+ bool have_tsc, bool *consecutive);
int intel_pt__strerror(int code, char *buf, size_t buflen);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 3773d9c..0effaff 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -132,6 +132,7 @@ struct intel_pt_queue {
struct intel_pt *pt;
unsigned int queue_nr;
struct auxtrace_buffer *buffer;
+ struct auxtrace_buffer *old_buffer;
void *decoder;
const struct intel_pt_state *state;
struct ip_callchain *chain;
@@ -143,6 +144,7 @@ struct intel_pt_queue {
bool stop;
bool step_through_buffers;
bool use_buffer_pid_tid;
+ bool sync_switch;
pid_t pid, tid;
int cpu;
int switch_state;
@@ -207,49 +209,28 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
struct auxtrace_buffer *b)
{
+ bool consecutive = false;
void *start;
start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
- pt->have_tsc);
+ pt->have_tsc, &consecutive);
if (!start)
return -EINVAL;
b->use_size = b->data + b->size - start;
b->use_data = start;
+ if (b->use_size && consecutive)
+ b->consecutive = true;
return 0;
}
-static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
- struct auxtrace_queue *queue,
- struct auxtrace_buffer *buffer)
-{
- if (queue->cpu == -1 && buffer->cpu != -1)
- ptq->cpu = buffer->cpu;
-
- ptq->pid = buffer->pid;
- ptq->tid = buffer->tid;
-
- intel_pt_log("queue %u cpu %d pid %d tid %d\n",
- ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
-
- thread__zput(ptq->thread);
-
- if (ptq->tid != -1) {
- if (ptq->pid != -1)
- ptq->thread = machine__findnew_thread(ptq->pt->machine,
- ptq->pid,
- ptq->tid);
- else
- ptq->thread = machine__find_thread(ptq->pt->machine, -1,
- ptq->tid);
- }
-}
-
/* This function assumes data is processed sequentially only */
static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
{
struct intel_pt_queue *ptq = data;
- struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
+ struct auxtrace_buffer *buffer = ptq->buffer;
+ struct auxtrace_buffer *old_buffer = ptq->old_buffer;
struct auxtrace_queue *queue;
+ bool might_overlap;
if (ptq->stop) {
b->len = 0;
@@ -257,7 +238,7 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
}
queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
-next:
+
buffer = auxtrace_buffer__next(queue, buffer);
if (!buffer) {
if (old_buffer)
@@ -276,7 +257,8 @@ next:
return -ENOMEM;
}
- if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
+ might_overlap = ptq->pt->snapshot_mode || ptq->pt->sampling_mode;
+ if (might_overlap && !buffer->consecutive && old_buffer &&
intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
return -ENOMEM;
@@ -289,33 +271,24 @@ next:
}
b->ref_timestamp = buffer->reference;
- /*
- * If in snapshot mode and the buffer has no usable data, get next
- * buffer and again check overlap against old_buffer.
- */
- if (ptq->pt->snapshot_mode && !b->len)
- goto next;
-
- if (old_buffer)
- auxtrace_buffer__drop_data(old_buffer);
-
- if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
- !buffer->consecutive)) {
+ if (!old_buffer || (might_overlap && !buffer->consecutive)) {
b->consecutive = false;
b->trace_nr = buffer->buffer_nr + 1;
} else {
b->consecutive = true;
}
- if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
- ptq->tid != buffer->tid))
- intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
-
if (ptq->step_through_buffers)
ptq->stop = true;
- if (!b->len)
+ if (b->len) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ ptq->old_buffer = buffer;
+ } else {
+ auxtrace_buffer__drop_data(buffer);
return intel_pt_get_trace(b, data);
+ }
return 0;
}
@@ -954,16 +927,15 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
ptq->cpu = queue->cpu;
ptq->tid = queue->tid;
- if (pt->sampling_mode) {
- if (pt->timeless_decoding)
- ptq->step_through_buffers = true;
- if (pt->timeless_decoding || !pt->have_sched_switch)
- ptq->use_buffer_pid_tid = true;
- }
+ if (pt->sampling_mode && !pt->snapshot_mode &&
+ pt->timeless_decoding)
+ ptq->step_through_buffers = true;
+
+ ptq->sync_switch = pt->sync_switch;
}
if (!ptq->on_heap &&
- (!pt->sync_switch ||
+ (!ptq->sync_switch ||
ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
const struct intel_pt_state *state;
int ret;
@@ -1546,7 +1518,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
if (pt->synth_opts.last_branch)
intel_pt_update_last_branch_rb(ptq);
- if (!pt->sync_switch)
+ if (!ptq->sync_switch)
return 0;
if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
@@ -1627,6 +1599,21 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
return switch_ip;
}
+static void intel_pt_enable_sync_switch(struct intel_pt *pt)
+{
+ unsigned int i;
+
+ pt->sync_switch = true;
+
+ for (i = 0; i < pt->queues.nr_queues; i++) {
+ struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (ptq)
+ ptq->sync_switch = true;
+ }
+}
+
static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
{
const struct intel_pt_state *state = ptq->state;
@@ -1643,7 +1630,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
if (pt->switch_ip) {
intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
pt->switch_ip, pt->ptss_ip);
- pt->sync_switch = true;
+ intel_pt_enable_sync_switch(pt);
}
}
}
@@ -1659,9 +1646,9 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
if (state->err) {
if (state->err == INTEL_PT_ERR_NODATA)
return 1;
- if (pt->sync_switch &&
+ if (ptq->sync_switch &&
state->from_ip >= pt->kernel_start) {
- pt->sync_switch = false;
+ ptq->sync_switch = false;
intel_pt_next_tid(pt, ptq);
}
if (pt->synth_opts.errors) {
@@ -1687,7 +1674,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
state->timestamp, state->est_timestamp);
ptq->timestamp = state->est_timestamp;
/* Use estimated TSC in unknown switch state */
- } else if (pt->sync_switch &&
+ } else if (ptq->sync_switch &&
ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
intel_pt_is_switch_ip(ptq, state->to_ip) &&
ptq->next_tid == -1) {
@@ -1834,7 +1821,7 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
return 1;
ptq = intel_pt_cpu_to_ptq(pt, cpu);
- if (!ptq)
+ if (!ptq || !ptq->sync_switch)
return 1;
switch (ptq->switch_state) {
@@ -2075,9 +2062,6 @@ static int intel_pt_process_auxtrace_event(struct perf_session *session,
struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
auxtrace);
- if (pt->sampling_mode)
- return 0;
-
if (!pt->data_queued) {
struct auxtrace_buffer *buffer;
off_t data_offset;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 12b7427..43fbbee 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1697,7 +1697,7 @@ static void ip__resolve_data(struct thread *thread,
struct mem_info *sample__resolve_mem(struct perf_sample *sample,
struct addr_location *al)
{
- struct mem_info *mi = zalloc(sizeof(*mi));
+ struct mem_info *mi = mem_info__new();
if (!mi)
return NULL;
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 4f27c46..074c4fd 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -75,9 +75,7 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
* }
* perf_mmap__read_done()
*/
-union perf_event *perf_mmap__read_event(struct perf_mmap *map,
- bool overwrite,
- u64 *startp, u64 end)
+union perf_event *perf_mmap__read_event(struct perf_mmap *map)
{
union perf_event *event;
@@ -87,17 +85,14 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map,
if (!refcount_read(&map->refcnt))
return NULL;
- if (startp == NULL)
- return NULL;
-
/* non-overwirte doesn't pause the ringbuffer */
- if (!overwrite)
- end = perf_mmap__read_head(map);
+ if (!map->overwrite)
+ map->end = perf_mmap__read_head(map);
- event = perf_mmap__read(map, startp, end);
+ event = perf_mmap__read(map, &map->start, map->end);
- if (!overwrite)
- map->prev = *startp;
+ if (!map->overwrite)
+ map->prev = map->start;
return event;
}
@@ -120,9 +115,9 @@ void perf_mmap__put(struct perf_mmap *map)
perf_mmap__munmap(map);
}
-void perf_mmap__consume(struct perf_mmap *map, bool overwrite)
+void perf_mmap__consume(struct perf_mmap *map)
{
- if (!overwrite) {
+ if (!map->overwrite) {
u64 old = map->prev;
perf_mmap__write_tail(map, old);
@@ -240,27 +235,26 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u6
/*
* Report the start and end of the available data in ringbuffer
*/
-int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
- u64 *startp, u64 *endp)
+int perf_mmap__read_init(struct perf_mmap *md)
{
u64 head = perf_mmap__read_head(md);
u64 old = md->prev;
unsigned char *data = md->base + page_size;
unsigned long size;
- *startp = overwrite ? head : old;
- *endp = overwrite ? old : head;
+ md->start = md->overwrite ? head : old;
+ md->end = md->overwrite ? old : head;
- if (*startp == *endp)
+ if (md->start == md->end)
return -EAGAIN;
- size = *endp - *startp;
+ size = md->end - md->start;
if (size > (unsigned long)(md->mask) + 1) {
- if (!overwrite) {
+ if (!md->overwrite) {
WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
md->prev = head;
- perf_mmap__consume(md, overwrite);
+ perf_mmap__consume(md);
return -EAGAIN;
}
@@ -268,33 +262,32 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
* Backward ring buffer is full. We still have a chance to read
* most of data from it.
*/
- if (overwrite_rb_find_range(data, md->mask, head, startp, endp))
+ if (overwrite_rb_find_range(data, md->mask, head, &md->start, &md->end))
return -EINVAL;
}
return 0;
}
-int perf_mmap__push(struct perf_mmap *md, bool overwrite,
- void *to, int push(void *to, void *buf, size_t size))
+int perf_mmap__push(struct perf_mmap *md, void *to,
+ int push(void *to, void *buf, size_t size))
{
u64 head = perf_mmap__read_head(md);
- u64 end, start;
unsigned char *data = md->base + page_size;
unsigned long size;
void *buf;
int rc = 0;
- rc = perf_mmap__read_init(md, overwrite, &start, &end);
+ rc = perf_mmap__read_init(md);
if (rc < 0)
return (rc == -EAGAIN) ? 0 : -1;
- size = end - start;
+ size = md->end - md->start;
- if ((start & md->mask) + size != (end & md->mask)) {
- buf = &data[start & md->mask];
- size = md->mask + 1 - (start & md->mask);
- start += size;
+ if ((md->start & md->mask) + size != (md->end & md->mask)) {
+ buf = &data[md->start & md->mask];
+ size = md->mask + 1 - (md->start & md->mask);
+ md->start += size;
if (push(to, buf, size) < 0) {
rc = -1;
@@ -302,9 +295,9 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
}
}
- buf = &data[start & md->mask];
- size = end - start;
- start += size;
+ buf = &data[md->start & md->mask];
+ size = md->end - md->start;
+ md->start += size;
if (push(to, buf, size) < 0) {
rc = -1;
@@ -312,7 +305,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
}
md->prev = head;
- perf_mmap__consume(md, overwrite);
+ perf_mmap__consume(md);
out:
return rc;
}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index ec7d3a24..d82294d 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -20,6 +20,9 @@ struct perf_mmap {
int fd;
refcount_t refcnt;
u64 prev;
+ u64 start;
+ u64 end;
+ bool overwrite;
struct auxtrace_mmap auxtrace_mmap;
char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
};
@@ -63,7 +66,7 @@ void perf_mmap__munmap(struct perf_mmap *map);
void perf_mmap__get(struct perf_mmap *map);
void perf_mmap__put(struct perf_mmap *map);
-void perf_mmap__consume(struct perf_mmap *map, bool overwrite);
+void perf_mmap__consume(struct perf_mmap *map);
static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
{
@@ -86,16 +89,13 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
-union perf_event *perf_mmap__read_event(struct perf_mmap *map,
- bool overwrite,
- u64 *startp, u64 end);
+union perf_event *perf_mmap__read_event(struct perf_mmap *map);
-int perf_mmap__push(struct perf_mmap *md, bool backward,
- void *to, int push(void *to, void *buf, size_t size));
+int perf_mmap__push(struct perf_mmap *md, void *to,
+ int push(void *to, void *buf, size_t size));
size_t perf_mmap__mmap_len(struct perf_mmap *map);
-int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
- u64 *startp, u64 *endp);
+int perf_mmap__read_init(struct perf_mmap *md);
void perf_mmap__read_done(struct perf_mmap *map);
#endif /*__PERF_MMAP_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 34589c4..4e80ca3 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1217,7 +1217,7 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
get_config_name(head_config), &config_terms);
}
-static int __parse_events_add_pmu(struct parse_events_state *parse_state,
+int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, char *name,
struct list_head *head_config, bool auto_merge_stats)
{
@@ -1247,7 +1247,12 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state,
if (!head_config) {
attr.type = pmu->type;
evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats);
- return evsel ? 0 : -ENOMEM;
+ if (evsel) {
+ evsel->pmu_name = name;
+ return 0;
+ } else {
+ return -ENOMEM;
+ }
}
if (perf_pmu__check_alias(pmu, head_config, &info))
@@ -1276,18 +1281,12 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state,
evsel->snapshot = info.snapshot;
evsel->metric_expr = info.metric_expr;
evsel->metric_name = info.metric_name;
+ evsel->pmu_name = name;
}
return evsel ? 0 : -ENOMEM;
}
-int parse_events_add_pmu(struct parse_events_state *parse_state,
- struct list_head *list, char *name,
- struct list_head *head_config)
-{
- return __parse_events_add_pmu(parse_state, list, name, head_config, false);
-}
-
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
char *str, struct list_head **listp)
{
@@ -1317,8 +1316,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
return -1;
list_add_tail(&term->list, head);
- if (!__parse_events_add_pmu(parse_state, list,
- pmu->name, head, true)) {
+ if (!parse_events_add_pmu(parse_state, list,
+ pmu->name, head, true)) {
pr_debug("%s -> %s/%s/\n", str,
pmu->name, alias->str);
ok++;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 88108cd..5015cfd 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -167,7 +167,7 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx,
void *ptr, char *type, u64 len);
int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, char *name,
- struct list_head *head_config);
+ struct list_head *head_config, bool auto_merge_stats);
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
char *str,
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 655ecff..a1a01b1 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -175,7 +175,7 @@ bpf_source [^,{}]+\.c[a-zA-Z0-9._]*
num_dec [0-9]+
num_hex 0x[a-fA-F0-9]+
num_raw_hex [a-fA-F0-9]+
-name [a-zA-Z_*?][a-zA-Z0-9_*?.]*
+name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]]*
name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
/* If you add a modifier you need to update check_modifier() */
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index e81a20e..7afeb80 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -8,6 +8,7 @@
#define YYDEBUG 1
+#include <fnmatch.h>
#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/types.h>
@@ -231,9 +232,13 @@ PE_NAME opt_event_config
YYABORT;
ALLOC_LIST(list);
- if (parse_events_add_pmu(_parse_state, list, $1, $2)) {
+ if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) {
struct perf_pmu *pmu = NULL;
int ok = 0;
+ char *pattern;
+
+ if (asprintf(&pattern, "%s*", $1) < 0)
+ YYABORT;
while ((pmu = perf_pmu__scan(pmu)) != NULL) {
char *name = pmu->name;
@@ -241,14 +246,19 @@ PE_NAME opt_event_config
if (!strncmp(name, "uncore_", 7) &&
strncmp($1, "uncore_", 7))
name += 7;
- if (!strncmp($1, name, strlen($1))) {
- if (parse_events_copy_term_list(orig_terms, &terms))
+ if (!fnmatch(pattern, name, 0)) {
+ if (parse_events_copy_term_list(orig_terms, &terms)) {
+ free(pattern);
YYABORT;
- if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms))
+ }
+ if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true))
ok++;
parse_events_terms__delete(terms);
}
}
+
+ free(pattern);
+
if (!ok)
YYABORT;
}
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 35fb5ef..b956868 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -984,7 +984,6 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
int sample_id_all = 1, cpu;
static char *kwlist[] = { "cpu", "sample_id_all", NULL };
struct perf_mmap *md;
- u64 end, start;
int err;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
@@ -992,10 +991,10 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
return NULL;
md = &evlist->mmap[cpu];
- if (perf_mmap__read_init(md, false, &start, &end) < 0)
+ if (perf_mmap__read_init(md) < 0)
goto end;
- event = perf_mmap__read_event(md, false, &start, end);
+ event = perf_mmap__read_event(md);
if (event != NULL) {
PyObject *pyevent = pyrf_event__new(event);
struct pyrf_event *pevent = (struct pyrf_event *)pyevent;
@@ -1013,7 +1012,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
err = perf_evsel__parse_sample(evsel, event, &pevent->sample);
/* Consume the even only after we parsed it out. */
- perf_mmap__consume(md, false);
+ perf_mmap__consume(md);
if (err)
return PyErr_Format(PyExc_OSError,
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index a1a312d..62b2dd2 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -2221,3 +2221,25 @@ int symbol__config_symfs(const struct option *opt __maybe_unused,
free(bf);
return 0;
}
+
+struct mem_info *mem_info__get(struct mem_info *mi)
+{
+ if (mi)
+ refcount_inc(&mi->refcnt);
+ return mi;
+}
+
+void mem_info__put(struct mem_info *mi)
+{
+ if (mi && refcount_dec_and_test(&mi->refcnt))
+ free(mi);
+}
+
+struct mem_info *mem_info__new(void)
+{
+ struct mem_info *mi = zalloc(sizeof(*mi));
+
+ if (mi)
+ refcount_set(&mi->refcnt, 1);
+ return mi;
+}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 0563f33..70c16741 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -200,9 +200,10 @@ struct branch_info {
};
struct mem_info {
- struct addr_map_symbol iaddr;
- struct addr_map_symbol daddr;
- union perf_mem_data_src data_src;
+ struct addr_map_symbol iaddr;
+ struct addr_map_symbol daddr;
+ union perf_mem_data_src data_src;
+ refcount_t refcnt;
};
struct addr_location {
@@ -389,4 +390,16 @@ int sdt_notes__get_count(struct list_head *start);
#define SDT_NOTE_NAME "stapsdt"
#define NR_ADDR 3
+struct mem_info *mem_info__new(void);
+struct mem_info *mem_info__get(struct mem_info *mi);
+void mem_info__put(struct mem_info *mi);
+
+static inline void __mem_info__zput(struct mem_info **mi)
+{
+ mem_info__put(*mi);
+ *mi = NULL;
+}
+
+#define mem_info__zput(mi) __mem_info__zput(&mi)
+
#endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 40cfa36..14d44c3 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -26,7 +26,6 @@ struct thread {
pid_t ppid;
int cpu;
refcount_t refcnt;
- char shortname[3];
bool comm_set;
int comm_len;
bool dead; /* if set thread has exited */
OpenPOWER on IntegriCloud