summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile40
-rw-r--r--tools/arch/alpha/include/asm/barrier.h8
-rw-r--r--tools/arch/arm/include/asm/barrier.h12
-rw-r--r--tools/arch/arm64/include/asm/barrier.h16
-rw-r--r--tools/arch/ia64/include/asm/barrier.h48
-rw-r--r--tools/arch/mips/include/asm/barrier.h20
-rw-r--r--tools/arch/powerpc/include/asm/barrier.h29
-rw-r--r--tools/arch/s390/include/asm/barrier.h30
-rw-r--r--tools/arch/sh/include/asm/barrier.h32
-rw-r--r--tools/arch/sparc/include/asm/barrier.h8
-rw-r--r--tools/arch/sparc/include/asm/barrier_32.h6
-rw-r--r--tools/arch/sparc/include/asm/barrier_64.h42
-rw-r--r--tools/arch/tile/include/asm/barrier.h15
-rw-r--r--tools/arch/x86/include/asm/atomic.h65
-rw-r--r--tools/arch/x86/include/asm/barrier.h28
-rw-r--r--tools/arch/x86/include/asm/rmwcc.h41
-rw-r--r--tools/arch/xtensa/include/asm/barrier.h18
-rw-r--r--tools/build/Documentation/Build.txt1
-rw-r--r--tools/build/Makefile.build16
-rw-r--r--tools/build/Makefile.feature4
-rw-r--r--tools/build/feature/Makefile13
-rw-r--r--tools/build/feature/test-bpf.c18
-rw-r--r--tools/build/feature/test-glibc.c11
-rw-r--r--tools/build/tests/ex/Build2
-rw-r--r--tools/build/tests/ex/empty2/README2
-rw-r--r--tools/hv/hv_fcopy_daemon.c15
-rw-r--r--tools/hv/hv_kvp_daemon.c166
-rw-r--r--tools/hv/hv_vss_daemon.c149
-rw-r--r--tools/hv/lsvmbus101
-rw-r--r--tools/iio/Makefile4
-rw-r--r--tools/iio/generic_buffer.c289
-rw-r--r--tools/iio/iio_event_monitor.c86
-rw-r--r--tools/iio/iio_utils.c688
-rw-r--r--tools/iio/iio_utils.h39
-rw-r--r--tools/iio/lsiio.c106
-rw-r--r--tools/include/asm-generic/atomic-gcc.h63
-rw-r--r--tools/include/asm-generic/barrier.h44
-rw-r--r--tools/include/asm/atomic.h10
-rw-r--r--tools/include/asm/barrier.h27
-rw-r--r--tools/include/linux/atomic.h6
-rw-r--r--tools/include/linux/compiler.h62
-rw-r--r--tools/include/linux/kernel.h (renamed from tools/perf/util/include/linux/kernel.h)4
-rw-r--r--tools/include/linux/list.h (renamed from tools/perf/util/include/linux/list.h)6
-rw-r--r--tools/include/linux/poison.h1
-rw-r--r--tools/include/linux/rbtree.h104
-rw-r--r--tools/include/linux/rbtree_augmented.h245
-rw-r--r--tools/include/linux/types.h8
-rw-r--r--tools/laptop/freefall/Makefile17
-rw-r--r--tools/laptop/freefall/freefall.c174
-rw-r--r--tools/lguest/.gitignore1
-rw-r--r--tools/lguest/Makefile1
-rw-r--r--tools/lguest/lguest.c10
-rw-r--r--tools/lib/api/Makefile4
-rw-r--r--tools/lib/api/fs/debugfs.c15
-rw-r--r--tools/lib/bpf/.gitignore2
-rw-r--r--tools/lib/bpf/Build1
-rw-r--r--tools/lib/bpf/Makefile195
-rw-r--r--tools/lib/bpf/bpf.c85
-rw-r--r--tools/lib/bpf/bpf.h23
-rw-r--r--tools/lib/bpf/libbpf.c1056
-rw-r--r--tools/lib/bpf/libbpf.h83
-rw-r--r--tools/lib/hweight.c62
-rw-r--r--tools/lib/lockdep/Makefile3
-rw-r--r--tools/lib/lockdep/preload.c2
-rw-r--r--tools/lib/lockdep/uinclude/linux/kernel.h5
-rw-r--r--tools/lib/lockdep/uinclude/linux/rbtree.h1
-rw-r--r--tools/lib/rbtree.c548
-rw-r--r--tools/lib/traceevent/.gitignore1
-rw-r--r--tools/lib/traceevent/Makefile36
-rw-r--r--tools/lib/traceevent/event-parse.c86
-rw-r--r--tools/lib/traceevent/event-parse.h9
-rw-r--r--tools/lib/traceevent/plugin_cfg80211.c13
-rw-r--r--tools/net/bpf_jit_disasm.c111
-rw-r--r--tools/perf/.gitignore2
-rw-r--r--tools/perf/Build1
-rw-r--r--tools/perf/Documentation/callchain-overhead-calculation.txt108
-rw-r--r--tools/perf/Documentation/intel-bts.txt86
-rw-r--r--tools/perf/Documentation/intel-pt.txt766
-rw-r--r--tools/perf/Documentation/itrace.txt22
-rw-r--r--tools/perf/Documentation/perf-bench.txt7
-rw-r--r--tools/perf/Documentation/perf-inject.txt6
-rw-r--r--tools/perf/Documentation/perf-kmem.txt11
-rw-r--r--tools/perf/Documentation/perf-kvm.txt6
-rw-r--r--tools/perf/Documentation/perf-probe.txt17
-rw-r--r--tools/perf/Documentation/perf-record.txt63
-rw-r--r--tools/perf/Documentation/perf-report.txt30
-rw-r--r--tools/perf/Documentation/perf-script.txt30
-rw-r--r--tools/perf/Documentation/perf-stat.txt4
-rw-r--r--tools/perf/Documentation/perf-top.txt30
-rw-r--r--tools/perf/Documentation/perf-trace.txt7
-rw-r--r--tools/perf/MANIFEST36
-rw-r--r--tools/perf/Makefile6
-rw-r--r--tools/perf/Makefile.perf68
-rw-r--r--tools/perf/arch/alpha/Build1
-rw-r--r--tools/perf/arch/arm64/Build1
-rw-r--r--tools/perf/arch/arm64/include/perf_regs.h3
-rw-r--r--tools/perf/arch/arm64/tests/Build2
-rw-r--r--tools/perf/arch/arm64/tests/dwarf-unwind.c61
-rw-r--r--tools/perf/arch/arm64/tests/regs_load.S46
-rw-r--r--tools/perf/arch/common.c6
-rw-r--r--tools/perf/arch/common.h2
-rw-r--r--tools/perf/arch/mips/Build1
-rw-r--r--tools/perf/arch/parisc/Build1
-rw-r--r--tools/perf/arch/powerpc/util/Build1
-rw-r--r--tools/perf/arch/powerpc/util/sym-handling.c82
-rw-r--r--tools/perf/arch/sh/util/dwarf-regs.c2
-rw-r--r--tools/perf/arch/sparc/util/dwarf-regs.c2
-rw-r--r--tools/perf/arch/x86/util/Build6
-rw-r--r--tools/perf/arch/x86/util/auxtrace.c83
-rw-r--r--tools/perf/arch/x86/util/dwarf-regs.c2
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c458
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c1007
-rw-r--r--tools/perf/arch/x86/util/perf_regs.c28
-rw-r--r--tools/perf/arch/x86/util/pmu.c18
-rw-r--r--tools/perf/arch/xtensa/Build1
-rw-r--r--tools/perf/arch/xtensa/Makefile3
-rw-r--r--tools/perf/arch/xtensa/util/Build1
-rw-r--r--tools/perf/arch/xtensa/util/dwarf-regs.c25
-rw-r--r--tools/perf/bench/Build2
-rw-r--r--tools/perf/bench/bench.h4
-rw-r--r--tools/perf/bench/futex-lock-pi.c219
-rw-r--r--tools/perf/bench/futex-requeue.c15
-rw-r--r--tools/perf/bench/futex-wake-parallel.c294
-rw-r--r--tools/perf/bench/futex-wake.c7
-rw-r--r--tools/perf/bench/futex.h20
-rw-r--r--tools/perf/bench/numa.c45
-rw-r--r--tools/perf/builtin-annotate.c23
-rw-r--r--tools/perf/builtin-bench.c3
-rw-r--r--tools/perf/builtin-buildid-cache.c30
-rw-r--r--tools/perf/builtin-buildid-list.c37
-rw-r--r--tools/perf/builtin-diff.c12
-rw-r--r--tools/perf/builtin-inject.c183
-rw-r--r--tools/perf/builtin-kmem.c1028
-rw-r--r--tools/perf/builtin-kvm.c25
-rw-r--r--tools/perf/builtin-lock.c8
-rw-r--r--tools/perf/builtin-mem.c21
-rw-r--r--tools/perf/builtin-probe.c198
-rw-r--r--tools/perf/builtin-record.c425
-rw-r--r--tools/perf/builtin-report.c85
-rw-r--r--tools/perf/builtin-sched.c159
-rw-r--r--tools/perf/builtin-script.c194
-rw-r--r--tools/perf/builtin-stat.c1014
-rw-r--r--tools/perf/builtin-timechart.c9
-rw-r--r--tools/perf/builtin-top.c67
-rw-r--r--tools/perf/builtin-trace.c648
-rw-r--r--tools/perf/config/Makefile33
-rw-r--r--tools/perf/config/utilities.mak19
-rw-r--r--tools/perf/perf-sys.h73
-rw-r--r--tools/perf/perf-with-kcore.sh28
-rw-r--r--tools/perf/perf.c2
-rw-r--r--tools/perf/perf.h11
-rwxr-xr-xtools/perf/python/twatch.py12
-rw-r--r--tools/perf/scripts/python/bin/compaction-times-record2
-rw-r--r--tools/perf/scripts/python/bin/compaction-times-report4
-rw-r--r--tools/perf/scripts/python/call-graph-from-postgresql.py327
-rw-r--r--tools/perf/scripts/python/compaction-times.py311
-rw-r--r--tools/perf/scripts/python/export-to-postgresql.py47
-rw-r--r--tools/perf/tests/Build10
-rw-r--r--tools/perf/tests/builtin-test.c24
-rw-r--r--tools/perf/tests/code-reading.c30
-rw-r--r--tools/perf/tests/dso-data.c15
-rw-r--r--tools/perf/tests/dwarf-unwind.c3
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c4
-rw-r--r--tools/perf/tests/hists_common.c10
-rw-r--r--tools/perf/tests/hists_cumulate.c14
-rw-r--r--tools/perf/tests/hists_filter.c12
-rw-r--r--tools/perf/tests/hists_link.c12
-rw-r--r--tools/perf/tests/hists_output.c10
-rw-r--r--tools/perf/tests/keep-tracking.c8
-rw-r--r--tools/perf/tests/kmod-path.c72
-rw-r--r--tools/perf/tests/llvm.c98
-rw-r--r--tools/perf/tests/make50
-rw-r--r--tools/perf/tests/mmap-basic.c10
-rw-r--r--tools/perf/tests/mmap-thread-lookup.c8
-rw-r--r--tools/perf/tests/openat-syscall-all-cpus.c (renamed from tools/perf/tests/open-syscall-all-cpus.c)21
-rw-r--r--tools/perf/tests/openat-syscall-tp-fields.c (renamed from tools/perf/tests/open-syscall-tp-fields.c)8
-rw-r--r--tools/perf/tests/openat-syscall.c (renamed from tools/perf/tests/open-syscall.c)16
-rw-r--r--tools/perf/tests/parse-events.c64
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c2
-rw-r--r--tools/perf/tests/pmu.c3
-rw-r--r--tools/perf/tests/switch-tracking.c12
-rw-r--r--tools/perf/tests/tests.h19
-rw-r--r--tools/perf/tests/thread-map.c42
-rw-r--r--tools/perf/tests/thread-mg-share.c41
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c34
-rw-r--r--tools/perf/trace/strace/groups/file18
-rw-r--r--tools/perf/ui/browser.c17
-rw-r--r--tools/perf/ui/browser.h7
-rw-r--r--tools/perf/ui/browsers/annotate.c199
-rw-r--r--tools/perf/ui/browsers/header.c4
-rw-r--r--tools/perf/ui/browsers/hists.c725
-rw-r--r--tools/perf/ui/browsers/map.c11
-rw-r--r--tools/perf/ui/browsers/scripts.c2
-rw-r--r--tools/perf/ui/libslang.h3
-rw-r--r--tools/perf/ui/tui/progress.c19
-rw-r--r--tools/perf/ui/tui/setup.c2
-rw-r--r--tools/perf/ui/tui/util.c2
-rw-r--r--tools/perf/util/Build25
-rw-r--r--tools/perf/util/annotate.c193
-rw-r--r--tools/perf/util/annotate.h26
-rw-r--r--tools/perf/util/auxtrace.c1370
-rw-r--r--tools/perf/util/auxtrace.h646
-rw-r--r--tools/perf/util/build-id.c102
-rw-r--r--tools/perf/util/build-id.h6
-rw-r--r--tools/perf/util/cache.h1
-rw-r--r--tools/perf/util/callchain.c93
-rw-r--r--tools/perf/util/callchain.h7
-rw-r--r--tools/perf/util/cgroup.c10
-rw-r--r--tools/perf/util/cgroup.h4
-rw-r--r--tools/perf/util/cloexec.c4
-rw-r--r--tools/perf/util/cloexec.h2
-rw-r--r--tools/perf/util/color.c21
-rw-r--r--tools/perf/util/color.h1
-rw-r--r--tools/perf/util/comm.c13
-rw-r--r--tools/perf/util/config.c4
-rw-r--r--tools/perf/util/counts.c52
-rw-r--r--tools/perf/util/counts.h37
-rw-r--r--tools/perf/util/cpumap.c26
-rw-r--r--tools/perf/util/cpumap.h6
-rw-r--r--tools/perf/util/data-convert-bt.c410
-rw-r--r--tools/perf/util/db-export.c31
-rw-r--r--tools/perf/util/debug.c5
-rw-r--r--tools/perf/util/debug.h1
-rw-r--r--tools/perf/util/dso.c334
-rw-r--r--tools/perf/util/dso.h53
-rw-r--r--tools/perf/util/dwarf-aux.c241
-rw-r--r--tools/perf/util/dwarf-aux.h13
-rw-r--r--tools/perf/util/environment.c1
-rw-r--r--tools/perf/util/event.c153
-rw-r--r--tools/perf/util/event.h114
-rw-r--r--tools/perf/util/evlist.c184
-rw-r--r--tools/perf/util/evlist.h29
-rw-r--r--tools/perf/util/evsel.c229
-rw-r--r--tools/perf/util/evsel.h87
-rw-r--r--tools/perf/util/header.c91
-rw-r--r--tools/perf/util/header.h6
-rw-r--r--tools/perf/util/hist.c122
-rw-r--r--tools/perf/util/hist.h10
-rw-r--r--tools/perf/util/include/linux/poison.h1
-rw-r--r--tools/perf/util/include/linux/rbtree.h2
-rw-r--r--tools/perf/util/include/linux/rbtree_augmented.h2
-rw-r--r--tools/perf/util/intel-bts.c933
-rw-r--r--tools/perf/util/intel-bts.h43
-rw-r--r--tools/perf/util/intel-pt-decoder/Build12
-rw-r--r--tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk386
-rw-r--r--tools/perf/util/intel-pt-decoder/inat.c96
-rw-r--r--tools/perf/util/intel-pt-decoder/inat.h221
-rw-r--r--tools/perf/util/intel-pt-decoder/inat_types.h29
-rw-r--r--tools/perf/util/intel-pt-decoder/insn.c594
-rw-r--r--tools/perf/util/intel-pt-decoder/insn.h201
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c2345
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h109
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c249
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h65
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.c155
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.h52
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c518
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h70
-rw-r--r--tools/perf/util/intel-pt-decoder/x86-opcode-map.txt970
-rw-r--r--tools/perf/util/intel-pt.c1956
-rw-r--r--tools/perf/util/intel-pt.h56
-rw-r--r--tools/perf/util/llvm-utils.c408
-rw-r--r--tools/perf/util/llvm-utils.h49
-rw-r--r--tools/perf/util/machine.c327
-rw-r--r--tools/perf/util/machine.h38
-rw-r--r--tools/perf/util/map.c294
-rw-r--r--tools/perf/util/map.h57
-rw-r--r--tools/perf/util/ordered-events.c3
-rw-r--r--tools/perf/util/pager.c5
-rw-r--r--tools/perf/util/parse-branch-options.c94
-rw-r--r--tools/perf/util/parse-branch-options.h5
-rw-r--r--tools/perf/util/parse-events.c375
-rw-r--r--tools/perf/util/parse-events.h47
-rw-r--r--tools/perf/util/parse-events.l50
-rw-r--r--tools/perf/util/parse-events.y50
-rw-r--r--tools/perf/util/parse-options.h4
-rw-r--r--tools/perf/util/parse-regs-options.c71
-rw-r--r--tools/perf/util/parse-regs-options.h5
-rw-r--r--tools/perf/util/perf_regs.c4
-rw-r--r--tools/perf/util/perf_regs.h9
-rw-r--r--tools/perf/util/pmu.c162
-rw-r--r--tools/perf/util/pmu.h7
-rw-r--r--tools/perf/util/probe-event.c1217
-rw-r--r--tools/perf/util/probe-event.h39
-rw-r--r--tools/perf/util/probe-file.c301
-rw-r--r--tools/perf/util/probe-file.h18
-rw-r--r--tools/perf/util/probe-finder.c192
-rw-r--r--tools/perf/util/probe-finder.h10
-rw-r--r--tools/perf/util/pstack.c7
-rw-r--r--tools/perf/util/pstack.h1
-rw-r--r--tools/perf/util/python-ext-sources6
-rw-r--r--tools/perf/util/python.c144
-rw-r--r--tools/perf/util/record.c53
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c4
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c4
-rw-r--r--tools/perf/util/session.c333
-rw-r--r--tools/perf/util/session.h6
-rw-r--r--tools/perf/util/sort.c92
-rw-r--r--tools/perf/util/sort.h41
-rw-r--r--tools/perf/util/srcline.c6
-rw-r--r--tools/perf/util/stat-shadow.c432
-rw-r--r--tools/perf/util/stat.c269
-rw-r--r--tools/perf/util/stat.h67
-rw-r--r--tools/perf/util/strfilter.c107
-rw-r--r--tools/perf/util/strfilter.h35
-rw-r--r--tools/perf/util/string.c39
-rw-r--r--tools/perf/util/strlist.c43
-rw-r--r--tools/perf/util/strlist.h9
-rw-r--r--tools/perf/util/svghelper.c2
-rw-r--r--tools/perf/util/symbol-elf.c36
-rw-r--r--tools/perf/util/symbol.c147
-rw-r--r--tools/perf/util/symbol.h20
-rw-r--r--tools/perf/util/thread-stack.c18
-rw-r--r--tools/perf/util/thread-stack.h1
-rw-r--r--tools/perf/util/thread.c18
-rw-r--r--tools/perf/util/thread.h5
-rw-r--r--tools/perf/util/thread_map.c161
-rw-r--r--tools/perf/util/thread_map.h31
-rw-r--r--tools/perf/util/tool.h14
-rw-r--r--tools/perf/util/trace-event-info.c22
-rw-r--r--tools/perf/util/trace-event-parse.c32
-rw-r--r--tools/perf/util/trace-event-read.c28
-rw-r--r--tools/perf/util/trace-event.c44
-rw-r--r--tools/perf/util/trace-event.h2
-rw-r--r--tools/perf/util/unwind-libunwind.c11
-rw-r--r--tools/perf/util/util.c268
-rw-r--r--tools/perf/util/util.h23
-rw-r--r--tools/perf/util/vdso.c58
-rw-r--r--tools/perf/util/vdso.h4
-rw-r--r--tools/perf/util/xyarray.c8
-rw-r--r--tools/perf/util/xyarray.h2
-rw-r--r--tools/power/acpi/Makefile168
-rw-r--r--tools/power/acpi/Makefile.config92
-rw-r--r--tools/power/acpi/Makefile.rules37
-rw-r--r--tools/power/acpi/common/getopt.c4
-rw-r--r--tools/power/acpi/man/acpidump.817
-rw-r--r--tools/power/acpi/os_specific/service_layers/oslinuxtbl.c95
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixmap.c2
-rw-r--r--tools/power/acpi/tools/acpidump/Makefile53
-rw-r--r--tools/power/acpi/tools/acpidump/acpidump.h2
-rw-r--r--tools/power/acpi/tools/acpidump/apdump.c8
-rw-r--r--tools/power/acpi/tools/acpidump/apfiles.c12
-rw-r--r--tools/power/acpi/tools/acpidump/apmain.c15
-rw-r--r--tools/power/acpi/tools/ec/Makefile33
-rw-r--r--tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c4
-rw-r--r--tools/power/cpupower/utils/cpufreq-set.c4
-rw-r--r--tools/power/cpupower/utils/helpers/topology.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/mperf_monitor.c5
-rw-r--r--tools/power/x86/turbostat/Makefile2
-rw-r--r--tools/power/x86/turbostat/turbostat.85
-rw-r--r--tools/power/x86/turbostat/turbostat.c318
-rw-r--r--tools/testing/nvdimm/Kbuild43
-rw-r--r--tools/testing/nvdimm/Makefile7
-rw-r--r--tools/testing/nvdimm/config_check.c15
-rw-r--r--tools/testing/nvdimm/test/Kbuild8
-rw-r--r--tools/testing/nvdimm/test/iomap.c178
-rw-r--r--tools/testing/nvdimm/test/nfit.c1162
-rw-r--r--tools/testing/nvdimm/test/nfit_test.h29
-rw-r--r--tools/testing/selftests/Makefile9
-rw-r--r--tools/testing/selftests/capabilities/.gitignore2
-rw-r--r--tools/testing/selftests/capabilities/Makefile18
-rw-r--r--tools/testing/selftests/capabilities/test_execve.c427
-rw-r--r--tools/testing/selftests/capabilities/validate_cap.c73
-rw-r--r--tools/testing/selftests/exec/Makefile2
-rwxr-xr-xtools/testing/selftests/firmware/fw_filesystem.sh25
-rwxr-xr-xtools/testing/selftests/firmware/fw_userhelper.sh12
-rw-r--r--tools/testing/selftests/ftrace/Makefile1
-rw-r--r--tools/testing/selftests/futex/Makefile29
-rw-r--r--tools/testing/selftests/futex/README62
-rw-r--r--tools/testing/selftests/futex/functional/.gitignore7
-rw-r--r--tools/testing/selftests/futex/functional/Makefile25
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi.c409
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c135
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c223
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c125
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_timeout.c86
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c124
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_wouldblock.c79
-rwxr-xr-xtools/testing/selftests/futex/functional/run.sh79
-rw-r--r--tools/testing/selftests/futex/include/atomic.h83
-rw-r--r--tools/testing/selftests/futex/include/futextest.h266
-rw-r--r--tools/testing/selftests/futex/include/logging.h153
-rwxr-xr-xtools/testing/selftests/futex/run.sh33
-rw-r--r--tools/testing/selftests/kselftest.h17
-rw-r--r--tools/testing/selftests/lib.mk3
-rw-r--r--tools/testing/selftests/mount/Makefile7
-rw-r--r--tools/testing/selftests/net/psock_fanout.c71
-rw-r--r--tools/testing/selftests/net/psock_lib.h29
-rw-r--r--tools/testing/selftests/powerpc/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/dscr/.gitignore7
-rw-r--r--tools/testing/selftests/powerpc/dscr/Makefile14
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr.h127
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_default_test.c127
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c71
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c117
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c95
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c97
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c80
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_user_test.c61
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile3
-rw-r--r--tools/testing/selftests/powerpc/pmu/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/Makefile14
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-syscall.c3
-rw-r--r--tools/testing/selftests/powerpc/vphn/Makefile13
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configinit.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh25
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-N1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS015
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS021
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS032
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY022
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE012
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE023
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE02-T2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE039
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE047
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE055
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE065
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE073
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE087
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE08-T2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE092
-rw-r--r--tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt35
-rw-r--r--tools/testing/selftests/seccomp/.gitignore1
-rw-r--r--tools/testing/selftests/seccomp/Makefile10
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c2122
-rw-r--r--tools/testing/selftests/seccomp/test_harness.h537
-rw-r--r--tools/testing/selftests/static_keys/Makefile8
-rw-r--r--tools/testing/selftests/static_keys/test_static_keys.sh16
-rw-r--r--tools/testing/selftests/timers/.gitignore18
-rw-r--r--tools/testing/selftests/timers/alarmtimer-suspend.c10
-rw-r--r--tools/testing/selftests/timers/leap-a-day.c77
-rw-r--r--tools/testing/selftests/vm/Makefile10
-rw-r--r--tools/testing/selftests/vm/compaction_test.c225
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests23
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c636
-rw-r--r--tools/testing/selftests/x86/Makefile64
-rwxr-xr-xtools/testing/selftests/x86/check_cc.sh16
-rw-r--r--tools/testing/selftests/x86/entry_from_vm86.c233
-rw-r--r--tools/testing/selftests/x86/ldt_gdt.c576
-rw-r--r--tools/testing/selftests/x86/run_x86_tests.sh13
-rw-r--r--tools/testing/selftests/x86/syscall_arg_fault.c130
-rw-r--r--tools/testing/selftests/x86/syscall_nt.c54
-rw-r--r--tools/testing/selftests/x86/sysret_ss_attrs.c112
-rw-r--r--tools/testing/selftests/x86/thunks.S67
-rw-r--r--tools/testing/selftests/x86/trivial_32bit_program.c4
-rw-r--r--tools/testing/selftests/x86/trivial_64bit_program.c18
-rw-r--r--tools/thermal/tmon/Makefile8
-rw-r--r--tools/vm/Makefile2
459 files changed, 41761 insertions, 5200 deletions
diff --git a/tools/Makefile b/tools/Makefile
index 9a617ad..d6f307d 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -1,3 +1,8 @@
+# Some of the tools (perf) use same make variables
+# as in kernel build.
+export srctree=
+export objtree=
+
include scripts/Makefile.include
help:
@@ -8,6 +13,7 @@ help:
@echo ' cpupower - a tool for all things x86 CPU power'
@echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer'
@echo ' hv - tools used when in Hyper-V clients'
+ @echo ' iio - IIO tools'
@echo ' lguest - a minimal 32-bit x86 hypervisor'
@echo ' perf - Linux performance measurement and analysis tool'
@echo ' selftests - various kernel selftests'
@@ -18,6 +24,7 @@ help:
@echo ' vm - misc vm tools'
@echo ' x86_energy_perf_policy - Intel energy policy tool'
@echo ' tmon - thermal monitoring and tuning tool'
+ @echo ' freefall - laptop accelerometer program for disk protection'
@echo ''
@echo 'You can do:'
@echo ' $$ make -C tools/ <tool>_install'
@@ -41,17 +48,22 @@ acpi: FORCE
cpupower: FORCE
$(call descend,power/$@)
-cgroup firewire hv guest usb virtio vm net: FORCE
+cgroup firewire hv guest usb virtio vm net iio: FORCE
$(call descend,$@)
liblockdep: FORCE
$(call descend,lib/lockdep)
-libapikfs: FORCE
+libapi: FORCE
$(call descend,lib/api)
-perf: libapikfs FORCE
- $(call descend,$@)
+# The perf build does not follow the descend function setup,
+# invoking it via it's own make rule.
+PERF_O = $(if $(O),$(O)/tools/perf,)
+
+perf: FORCE
+ $(Q)mkdir -p $(PERF_O) .
+ $(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir=
selftests: FORCE
$(call descend,testing/$@)
@@ -62,6 +74,9 @@ turbostat x86_energy_perf_policy: FORCE
tmon: FORCE
$(call descend,thermal/$@)
+freefall: FORCE
+ $(call descend,laptop/$@)
+
acpi_install:
$(call descend,power/$(@:_install=),install)
@@ -80,10 +95,13 @@ turbostat_install x86_energy_perf_policy_install:
tmon_install:
$(call descend,thermal/$(@:_install=),install)
+freefall_install:
+ $(call descend,laptop/$(@:_install=),install)
+
install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \
perf_install selftests_install turbostat_install usb_install \
virtio_install vm_install net_install x86_energy_perf_policy_install \
- tmon
+ tmon freefall_install
acpi_clean:
$(call descend,power/acpi,clean)
@@ -91,16 +109,16 @@ acpi_clean:
cpupower_clean:
$(call descend,power/cpupower,clean)
-cgroup_clean hv_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clean net_clean:
+cgroup_clean hv_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clean net_clean iio_clean:
$(call descend,$(@:_clean=),clean)
liblockdep_clean:
$(call descend,lib/lockdep,clean)
-libapikfs_clean:
+libapi_clean:
$(call descend,lib/api,clean)
-perf_clean: libapikfs_clean
+perf_clean:
$(call descend,$(@:_clean=),clean)
selftests_clean:
@@ -112,8 +130,12 @@ turbostat_clean x86_energy_perf_policy_clean:
tmon_clean:
$(call descend,thermal/tmon,clean)
+freefall_clean:
+ $(call descend,laptop/freefall,clean)
+
clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean lguest_clean \
perf_clean selftests_clean turbostat_clean usb_clean virtio_clean \
- vm_clean net_clean x86_energy_perf_policy_clean tmon_clean
+ vm_clean net_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
+ freefall_clean
.PHONY: FORCE
diff --git a/tools/arch/alpha/include/asm/barrier.h b/tools/arch/alpha/include/asm/barrier.h
new file mode 100644
index 0000000..95df19c
--- /dev/null
+++ b/tools/arch/alpha/include/asm/barrier.h
@@ -0,0 +1,8 @@
+#ifndef __TOOLS_LINUX_ASM_ALPHA_BARRIER_H
+#define __TOOLS_LINUX_ASM_ALPHA_BARRIER_H
+
+#define mb() __asm__ __volatile__("mb": : :"memory")
+#define rmb() __asm__ __volatile__("mb": : :"memory")
+#define wmb() __asm__ __volatile__("wmb": : :"memory")
+
+#endif /* __TOOLS_LINUX_ASM_ALPHA_BARRIER_H */
diff --git a/tools/arch/arm/include/asm/barrier.h b/tools/arch/arm/include/asm/barrier.h
new file mode 100644
index 0000000..005c618
--- /dev/null
+++ b/tools/arch/arm/include/asm/barrier.h
@@ -0,0 +1,12 @@
+#ifndef _TOOLS_LINUX_ASM_ARM_BARRIER_H
+#define _TOOLS_LINUX_ASM_ARM_BARRIER_H
+
+/*
+ * Use the __kuser_memory_barrier helper in the CPU helper page. See
+ * arch/arm/kernel/entry-armv.S in the kernel source for details.
+ */
+#define mb() ((void(*)(void))0xffff0fa0)()
+#define wmb() ((void(*)(void))0xffff0fa0)()
+#define rmb() ((void(*)(void))0xffff0fa0)()
+
+#endif /* _TOOLS_LINUX_ASM_ARM_BARRIER_H */
diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h
new file mode 100644
index 0000000..a0483c8
--- /dev/null
+++ b/tools/arch/arm64/include/asm/barrier.h
@@ -0,0 +1,16 @@
+#ifndef _TOOLS_LINUX_ASM_AARCH64_BARRIER_H
+#define _TOOLS_LINUX_ASM_AARCH64_BARRIER_H
+
+/*
+ * From tools/perf/perf-sys.h, last modified in:
+ * f428ebd184c82a7914b2aa7e9f868918aaf7ea78 perf tools: Fix AAAAARGH64 memory barriers
+ *
+ * XXX: arch/arm64/include/asm/barrier.h in the kernel sources use dsb, is this
+ * a case like for arm32 where we do things differently in userspace?
+ */
+
+#define mb() asm volatile("dmb ish" ::: "memory")
+#define wmb() asm volatile("dmb ishst" ::: "memory")
+#define rmb() asm volatile("dmb ishld" ::: "memory")
+
+#endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */
diff --git a/tools/arch/ia64/include/asm/barrier.h b/tools/arch/ia64/include/asm/barrier.h
new file mode 100644
index 0000000..e4422b4
--- /dev/null
+++ b/tools/arch/ia64/include/asm/barrier.h
@@ -0,0 +1,48 @@
+/*
+ * Copied from the kernel sources to tools/:
+ *
+ * Memory barrier definitions. This is based on information published
+ * in the Processor Abstraction Layer and the System Abstraction Layer
+ * manual.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ */
+#ifndef _TOOLS_LINUX_ASM_IA64_BARRIER_H
+#define _TOOLS_LINUX_ASM_IA64_BARRIER_H
+
+#include <linux/compiler.h>
+
+/*
+ * Macros to force memory ordering. In these descriptions, "previous"
+ * and "subsequent" refer to program order; "visible" means that all
+ * architecturally visible effects of a memory access have occurred
+ * (at a minimum, this means the memory has been read or written).
+ *
+ * wmb(): Guarantees that all preceding stores to memory-
+ * like regions are visible before any subsequent
+ * stores and that all following stores will be
+ * visible only after all previous stores.
+ * rmb(): Like wmb(), but for reads.
+ * mb(): wmb()/rmb() combo, i.e., all previous memory
+ * accesses are visible before all subsequent
+ * accesses and vice versa. This is also known as
+ * a "fence."
+ *
+ * Note: "mb()" and its variants cannot be used as a fence to order
+ * accesses to memory mapped I/O registers. For that, mf.a needs to
+ * be used. However, we don't want to always use mf.a because (a)
+ * it's (presumably) much slower than mf and (b) mf.a is supported for
+ * sequential memory pages only.
+ */
+
+/* XXX From arch/ia64/include/uapi/asm/gcc_intrin.h */
+#define ia64_mf() asm volatile ("mf" ::: "memory")
+
+#define mb() ia64_mf()
+#define rmb() mb()
+#define wmb() mb()
+
+#endif /* _TOOLS_LINUX_ASM_IA64_BARRIER_H */
diff --git a/tools/arch/mips/include/asm/barrier.h b/tools/arch/mips/include/asm/barrier.h
new file mode 100644
index 0000000..80f96f7
--- /dev/null
+++ b/tools/arch/mips/include/asm/barrier.h
@@ -0,0 +1,20 @@
+#ifndef _TOOLS_LINUX_ASM_MIPS_BARRIER_H
+#define _TOOLS_LINUX_ASM_MIPS_BARRIER_H
+/*
+ * FIXME: This came from tools/perf/perf-sys.h, where it was first introduced
+ * in c1e028ef40b8d6943b767028ba17d4f2ba020edb, more work needed to make it
+ * more closely follow the Linux kernel arch/mips/include/asm/barrier.h file.
+ * Probably when we continue work on tools/ Kconfig support to have all the
+ * CONFIG_ needed for properly doing that.
+ */
+#define mb() asm volatile( \
+ ".set mips2\n\t" \
+ "sync\n\t" \
+ ".set mips0" \
+ : /* no output */ \
+ : /* no input */ \
+ : "memory")
+#define wmb() mb()
+#define rmb() mb()
+
+#endif /* _TOOLS_LINUX_ASM_MIPS_BARRIER_H */
diff --git a/tools/arch/powerpc/include/asm/barrier.h b/tools/arch/powerpc/include/asm/barrier.h
new file mode 100644
index 0000000..b23aee8
--- /dev/null
+++ b/tools/arch/powerpc/include/asm/barrier.h
@@ -0,0 +1,29 @@
+/*
+ * Copied from the kernel sources:
+ *
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ */
+#ifndef _TOOLS_LINUX_ASM_POWERPC_BARRIER_H
+#define _TOOLS_LINUX_ASM_POWERPC_BARRIER_H
+
+/*
+ * Memory barrier.
+ * The sync instruction guarantees that all memory accesses initiated
+ * by this processor have been performed (with respect to all other
+ * mechanisms that access memory). The eieio instruction is a barrier
+ * providing an ordering (separately) for (a) cacheable stores and (b)
+ * loads and stores to non-cacheable memory (e.g. I/O devices).
+ *
+ * mb() prevents loads and stores being reordered across this point.
+ * rmb() prevents loads being reordered across this point.
+ * wmb() prevents stores being reordered across this point.
+ *
+ * *mb() variants without smp_ prefix must order all types of memory
+ * operations with one another. sync is the only instruction sufficient
+ * to do this.
+ */
+#define mb() __asm__ __volatile__ ("sync" : : : "memory")
+#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
+#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
+
+#endif /* _TOOLS_LINUX_ASM_POWERPC_BARRIER_H */
diff --git a/tools/arch/s390/include/asm/barrier.h b/tools/arch/s390/include/asm/barrier.h
new file mode 100644
index 0000000..f851412
--- /dev/null
+++ b/tools/arch/s390/include/asm/barrier.h
@@ -0,0 +1,30 @@
+/*
+ * Copied from the kernel sources:
+ *
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __TOOLS_LINUX_ASM_BARRIER_H
+#define __TOOLS_LINUX_ASM_BARRIER_H
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+/* Fast-BCR without checkpoint synchronization */
+#define __ASM_BARRIER "bcr 14,0\n"
+#else
+#define __ASM_BARRIER "bcr 15,0\n"
+#endif
+
+#define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0)
+
+#define rmb() mb()
+#define wmb() mb()
+
+#endif /* __TOOLS_LIB_ASM_BARRIER_H */
diff --git a/tools/arch/sh/include/asm/barrier.h b/tools/arch/sh/include/asm/barrier.h
new file mode 100644
index 0000000..c18fd75
--- /dev/null
+++ b/tools/arch/sh/include/asm/barrier.h
@@ -0,0 +1,32 @@
+/*
+ * Copied from the kernel sources:
+ *
+ * Copyright (C) 1999, 2000 Niibe Yutaka & Kaz Kojima
+ * Copyright (C) 2002 Paul Mundt
+ */
+#ifndef __TOOLS_LINUX_ASM_SH_BARRIER_H
+#define __TOOLS_LINUX_ASM_SH_BARRIER_H
+
+/*
+ * A brief note on ctrl_barrier(), the control register write barrier.
+ *
+ * Legacy SH cores typically require a sequence of 8 nops after
+ * modification of a control register in order for the changes to take
+ * effect. On newer cores (like the sh4a and sh5) this is accomplished
+ * with icbi.
+ *
+ * Also note that on sh4a in the icbi case we can forego a synco for the
+ * write barrier, as it's not necessary for control registers.
+ *
+ * Historically we have only done this type of barrier for the MMUCR, but
+ * it's also necessary for the CCR, so we make it generic here instead.
+ */
+#if defined(__SH4A__) || defined(__SH5__)
+#define mb() __asm__ __volatile__ ("synco": : :"memory")
+#define rmb() mb()
+#define wmb() mb()
+#endif
+
+#include <asm-generic/barrier.h>
+
+#endif /* __TOOLS_LINUX_ASM_SH_BARRIER_H */
diff --git a/tools/arch/sparc/include/asm/barrier.h b/tools/arch/sparc/include/asm/barrier.h
new file mode 100644
index 0000000..8c017b3
--- /dev/null
+++ b/tools/arch/sparc/include/asm/barrier.h
@@ -0,0 +1,8 @@
+#ifndef ___TOOLS_LINUX_ASM_SPARC_BARRIER_H
+#define ___TOOLS_LINUX_ASM_SPARC_BARRIER_H
+#if defined(__sparc__) && defined(__arch64__)
+#include "barrier_64.h"
+#else
+#include "barrier_32.h"
+#endif
+#endif
diff --git a/tools/arch/sparc/include/asm/barrier_32.h b/tools/arch/sparc/include/asm/barrier_32.h
new file mode 100644
index 0000000..c5eadd0
--- /dev/null
+++ b/tools/arch/sparc/include/asm/barrier_32.h
@@ -0,0 +1,6 @@
+#ifndef __TOOLS_PERF_SPARC_BARRIER_H
+#define __TOOLS_PERF_SPARC_BARRIER_H
+
+#include <asm-generic/barrier.h>
+
+#endif /* !(__TOOLS_PERF_SPARC_BARRIER_H) */
diff --git a/tools/arch/sparc/include/asm/barrier_64.h b/tools/arch/sparc/include/asm/barrier_64.h
new file mode 100644
index 0000000..9a7d732
--- /dev/null
+++ b/tools/arch/sparc/include/asm/barrier_64.h
@@ -0,0 +1,42 @@
+#ifndef __TOOLS_LINUX_SPARC64_BARRIER_H
+#define __TOOLS_LINUX_SPARC64_BARRIER_H
+
+/* Copied from the kernel sources to tools/:
+ *
+ * These are here in an effort to more fully work around Spitfire Errata
+ * #51. Essentially, if a memory barrier occurs soon after a mispredicted
+ * branch, the chip can stop executing instructions until a trap occurs.
+ * Therefore, if interrupts are disabled, the chip can hang forever.
+ *
+ * It used to be believed that the memory barrier had to be right in the
+ * delay slot, but a case has been traced recently wherein the memory barrier
+ * was one instruction after the branch delay slot and the chip still hung.
+ * The offending sequence was the following in sym_wakeup_done() of the
+ * sym53c8xx_2 driver:
+ *
+ * call sym_ccb_from_dsa, 0
+ * movge %icc, 0, %l0
+ * brz,pn %o0, .LL1303
+ * mov %o0, %l2
+ * membar #LoadLoad
+ *
+ * The branch has to be mispredicted for the bug to occur. Therefore, we put
+ * the memory barrier explicitly into a "branch always, predicted taken"
+ * delay slot to avoid the problem case.
+ */
+#define membar_safe(type) \
+do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
+ " membar " type "\n" \
+ "1:\n" \
+ : : : "memory"); \
+} while (0)
+
+/* The kernel always executes in TSO memory model these days,
+ * and furthermore most sparc64 chips implement more stringent
+ * memory ordering than required by the specifications.
+ */
+#define mb() membar_safe("#StoreLoad")
+#define rmb() __asm__ __volatile__("":::"memory")
+#define wmb() __asm__ __volatile__("":::"memory")
+
+#endif /* !(__TOOLS_LINUX_SPARC64_BARRIER_H) */
diff --git a/tools/arch/tile/include/asm/barrier.h b/tools/arch/tile/include/asm/barrier.h
new file mode 100644
index 0000000..7d3692c
--- /dev/null
+++ b/tools/arch/tile/include/asm/barrier.h
@@ -0,0 +1,15 @@
+#ifndef _TOOLS_LINUX_ASM_TILE_BARRIER_H
+#define _TOOLS_LINUX_ASM_TILE_BARRIER_H
+/*
+ * FIXME: This came from tools/perf/perf-sys.h, where it was first introduced
+ * in 620830b6954913647b7c7f68920cf48eddf6ad92, more work needed to make it
+ * more closely follow the Linux kernel arch/tile/include/asm/barrier.h file.
+ * Probably when we continue work on tools/ Kconfig support to have all the
+ * CONFIG_ needed for properly doing that.
+ */
+
+#define mb() asm volatile ("mf" ::: "memory")
+#define wmb() mb()
+#define rmb() mb()
+
+#endif /* _TOOLS_LINUX_ASM_TILE_BARRIER_H */
diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h
new file mode 100644
index 0000000..059e33e
--- /dev/null
+++ b/tools/arch/x86/include/asm/atomic.h
@@ -0,0 +1,65 @@
+#ifndef _TOOLS_LINUX_ASM_X86_ATOMIC_H
+#define _TOOLS_LINUX_ASM_X86_ATOMIC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include "rmwcc.h"
+
+#define LOCK_PREFIX "\n\tlock; "
+
+/*
+ * Atomic operations that C can't guarantee us. Useful for
+ * resource counting etc..
+ */
+
+#define ATOMIC_INIT(i) { (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v.
+ */
+static inline int atomic_read(const atomic_t *v)
+{
+ return ACCESS_ONCE((v)->counter);
+}
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ */
+static inline void atomic_set(atomic_t *v, int i)
+{
+ v->counter = i;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1.
+ */
+static inline void atomic_inc(atomic_t *v)
+{
+ asm volatile(LOCK_PREFIX "incl %0"
+ : "+m" (v->counter));
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline int atomic_dec_and_test(atomic_t *v)
+{
+ GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
+}
+
+#endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */
diff --git a/tools/arch/x86/include/asm/barrier.h b/tools/arch/x86/include/asm/barrier.h
new file mode 100644
index 0000000..f366d8e
--- /dev/null
+++ b/tools/arch/x86/include/asm/barrier.h
@@ -0,0 +1,28 @@
+#ifndef _TOOLS_LINUX_ASM_X86_BARRIER_H
+#define _TOOLS_LINUX_ASM_X86_BARRIER_H
+
+/*
+ * Copied from the Linux kernel sources, and also moving code
+ * out from tools/perf/perf-sys.h so as to make it be located
+ * in a place similar as in the kernel sources.
+ *
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+
+#if defined(__i386__)
+/*
+ * Some non-Intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#elif defined(__x86_64__)
+#define mb() asm volatile("mfence":::"memory")
+#define rmb() asm volatile("lfence":::"memory")
+#define wmb() asm volatile("sfence" ::: "memory")
+#endif
+
+#endif /* _TOOLS_LINUX_ASM_X86_BARRIER_H */
diff --git a/tools/arch/x86/include/asm/rmwcc.h b/tools/arch/x86/include/asm/rmwcc.h
new file mode 100644
index 0000000..a6669bc
--- /dev/null
+++ b/tools/arch/x86/include/asm/rmwcc.h
@@ -0,0 +1,41 @@
+#ifndef _TOOLS_LINUX_ASM_X86_RMWcc
+#define _TOOLS_LINUX_ASM_X86_RMWcc
+
+#ifdef CC_HAVE_ASM_GOTO
+
+#define __GEN_RMWcc(fullop, var, cc, ...) \
+do { \
+ asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \
+ : : "m" (var), ## __VA_ARGS__ \
+ : "memory" : cc_label); \
+ return 0; \
+cc_label: \
+ return 1; \
+} while (0)
+
+#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
+ __GEN_RMWcc(op " " arg0, var, cc)
+
+#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
+ __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val))
+
+#else /* !CC_HAVE_ASM_GOTO */
+
+#define __GEN_RMWcc(fullop, var, cc, ...) \
+do { \
+ char c; \
+ asm volatile (fullop "; set" cc " %1" \
+ : "+m" (var), "=qm" (c) \
+ : __VA_ARGS__ : "memory"); \
+ return c != 0; \
+} while (0)
+
+#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
+ __GEN_RMWcc(op " " arg0, var, cc)
+
+#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
+ __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val))
+
+#endif /* CC_HAVE_ASM_GOTO */
+
+#endif /* _TOOLS_LINUX_ASM_X86_RMWcc */
diff --git a/tools/arch/xtensa/include/asm/barrier.h b/tools/arch/xtensa/include/asm/barrier.h
new file mode 100644
index 0000000..583800b
--- /dev/null
+++ b/tools/arch/xtensa/include/asm/barrier.h
@@ -0,0 +1,18 @@
+/*
+ * Copied from the kernel sources to tools/:
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001 - 2012 Tensilica Inc.
+ */
+
+#ifndef _TOOLS_LINUX_XTENSA_SYSTEM_H
+#define _TOOLS_LINUX_XTENSA_SYSTEM_H
+
+#define mb() ({ __asm__ __volatile__("memw" : : : "memory"); })
+#define rmb() barrier()
+#define wmb() mb()
+
+#endif /* _TOOLS_LINUX_XTENSA_SYSTEM_H */
diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt
index 00ad2d6..aa5e092 100644
--- a/tools/build/Documentation/Build.txt
+++ b/tools/build/Documentation/Build.txt
@@ -66,6 +66,7 @@ To follow the above example, the user provides following 'Build' files:
ex/Build:
ex-y += a.o
ex-y += b.o
+ ex-y += b.o # duplicates in the lists are allowed
libex-y += c.o
libex-y += d.o
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index 10df572..0c5f485 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -25,7 +25,7 @@ build-dir := $(srctree)/tools/build
include $(build-dir)/Build.include
# do not force detected configuration
--include .config-detected
+-include $(OUTPUT).config-detected
# Init all relevant variables used in build files so
# 1) they have correct type
@@ -37,7 +37,7 @@ subdir-obj-y :=
# Build definitions
build-file := $(dir)/Build
-include $(build-file)
+-include $(build-file)
quiet_cmd_flex = FLEX $@
quiet_cmd_bison = BISON $@
@@ -57,11 +57,13 @@ quiet_cmd_cc_i_c = CPP $@
quiet_cmd_cc_s_c = AS $@
cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $<
+quiet_cmd_gen = GEN $@
+
# Link agregate command
# If there's nothing to link, create empty $@ object.
quiet_cmd_ld_multi = LD $@
cmd_ld_multi = $(if $(strip $(obj-y)),\
- $(LD) -r -o $@ $(obj-y),rm -f $@; $(AR) rcs $@)
+ $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@)
# Build rules
$(OUTPUT)%.o: %.c FORCE
@@ -94,12 +96,12 @@ obj-y := $(patsubst %/, %/$(obj)-in.o, $(obj-y))
subdir-obj-y := $(filter %/$(obj)-in.o, $(obj-y))
# '$(OUTPUT)/dir' prefix to all objects
-prefix := $(subst ./,,$(OUTPUT)$(dir)/)
-obj-y := $(addprefix $(prefix),$(obj-y))
-subdir-obj-y := $(addprefix $(prefix),$(subdir-obj-y))
+objprefix := $(subst ./,,$(OUTPUT)$(dir)/)
+obj-y := $(addprefix $(objprefix),$(obj-y))
+subdir-obj-y := $(addprefix $(objprefix),$(subdir-obj-y))
# Final '$(obj)-in.o' object
-in-target := $(prefix)$(obj)-in.o
+in-target := $(objprefix)$(obj)-in.o
PHONY += $(subdir-y)
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 3a0b0ca..2975632 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -27,7 +27,7 @@ endef
# the rule that uses them - an example for that is the 'bionic'
# feature check. ]
#
-FEATURE_TESTS = \
+FEATURE_TESTS ?= \
backtrace \
dwarf \
fortify-source \
@@ -53,7 +53,7 @@ FEATURE_TESTS = \
zlib \
lzma
-FEATURE_DISPLAY = \
+FEATURE_DISPLAY ?= \
dwarf \
glibc \
gtk2 \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 463ed8f..74ca420 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -33,7 +33,8 @@ FILES= \
test-compile-32.bin \
test-compile-x32.bin \
test-zlib.bin \
- test-lzma.bin
+ test-lzma.bin \
+ test-bpf.bin
CC := $(CROSS_COMPILE)gcc -MD
PKG_CONFIG := $(CROSS_COMPILE)pkg-config
@@ -69,8 +70,13 @@ test-libelf.bin:
test-glibc.bin:
$(BUILD)
+DWARFLIBS := -ldw
+ifeq ($(findstring -static,${LDFLAGS}),-static)
+DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
+endif
+
test-dwarf.bin:
- $(BUILD) -ldw
+ $(BUILD) $(DWARFLIBS)
test-libelf-mmap.bin:
$(BUILD) -lelf
@@ -156,6 +162,9 @@ test-zlib.bin:
test-lzma.bin:
$(BUILD) -llzma
+test-bpf.bin:
+ $(BUILD)
+
-include *.d
###############################
diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c
new file mode 100644
index 0000000..062bac8
--- /dev/null
+++ b/tools/build/feature/test-bpf.c
@@ -0,0 +1,18 @@
+#include <linux/bpf.h>
+
+int main(void)
+{
+ union bpf_attr attr;
+
+ attr.prog_type = BPF_PROG_TYPE_KPROBE;
+ attr.insn_cnt = 0;
+ attr.insns = 0;
+ attr.license = 0;
+ attr.log_buf = 0;
+ attr.log_size = 0;
+ attr.log_level = 0;
+ attr.kern_version = 0;
+
+ attr = attr;
+ return 0;
+}
diff --git a/tools/build/feature/test-glibc.c b/tools/build/feature/test-glibc.c
index b082034..9367f758 100644
--- a/tools/build/feature/test-glibc.c
+++ b/tools/build/feature/test-glibc.c
@@ -1,8 +1,19 @@
+#include <stdlib.h>
+
+#if !defined(__UCLIBC__)
#include <gnu/libc-version.h>
+#else
+#define XSTR(s) STR(s)
+#define STR(s) #s
+#endif
int main(void)
{
+#if !defined(__UCLIBC__)
const char *version = gnu_get_libc_version();
+#else
+ const char *version = XSTR(__GLIBC__) "." XSTR(__GLIBC_MINOR__);
+#endif
return (long)version;
}
diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build
index 0e6c3e6..429c7d4 100644
--- a/tools/build/tests/ex/Build
+++ b/tools/build/tests/ex/Build
@@ -1,7 +1,9 @@
ex-y += ex.o
ex-y += a.o
ex-y += b.o
+ex-y += b.o
ex-y += empty/
+ex-y += empty2/
libex-y += c.o
libex-y += d.o
diff --git a/tools/build/tests/ex/empty2/README b/tools/build/tests/ex/empty2/README
new file mode 100644
index 0000000..2107cc5
--- /dev/null
+++ b/tools/build/tests/ex/empty2/README
@@ -0,0 +1,2 @@
+This directory is left intentionally without Build file
+to test proper nesting into Build-less directories.
diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c
index 9445d8f..5480e4e 100644
--- a/tools/hv/hv_fcopy_daemon.c
+++ b/tools/hv/hv_fcopy_daemon.c
@@ -137,6 +137,8 @@ int main(int argc, char *argv[])
int version = FCOPY_CURRENT_VERSION;
char *buffer[4096 * 2];
struct hv_fcopy_hdr *in_msg;
+ int in_handshake = 1;
+ __u32 kernel_modver;
static struct option long_options[] = {
{"help", no_argument, 0, 'h' },
@@ -191,6 +193,19 @@ int main(int argc, char *argv[])
syslog(LOG_ERR, "pread failed: %s", strerror(errno));
exit(EXIT_FAILURE);
}
+
+ if (in_handshake) {
+ if (len != sizeof(kernel_modver)) {
+ syslog(LOG_ERR, "invalid version negotiation");
+ exit(EXIT_FAILURE);
+ }
+ kernel_modver = *(__u32 *)buffer;
+ in_handshake = 0;
+ syslog(LOG_INFO, "HV_FCOPY: kernel module version: %d",
+ kernel_modver);
+ continue;
+ }
+
in_msg = (struct hv_fcopy_hdr *)buffer;
switch (in_msg->operation) {
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 408bb07..0d9f48e 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -33,7 +33,6 @@
#include <ctype.h>
#include <errno.h>
#include <arpa/inet.h>
-#include <linux/connector.h>
#include <linux/hyperv.h>
#include <linux/netlink.h>
#include <ifaddrs.h>
@@ -79,7 +78,6 @@ enum {
DNS
};
-static struct sockaddr_nl addr;
static int in_hand_shake = 1;
static char *os_name = "";
@@ -1387,34 +1385,6 @@ kvp_get_domain_name(char *buffer, int length)
freeaddrinfo(info);
}
-static int
-netlink_send(int fd, struct cn_msg *msg)
-{
- struct nlmsghdr nlh = { .nlmsg_type = NLMSG_DONE };
- unsigned int size;
- struct msghdr message;
- struct iovec iov[2];
-
- size = sizeof(struct cn_msg) + msg->len;
-
- nlh.nlmsg_pid = getpid();
- nlh.nlmsg_len = NLMSG_LENGTH(size);
-
- iov[0].iov_base = &nlh;
- iov[0].iov_len = sizeof(nlh);
-
- iov[1].iov_base = msg;
- iov[1].iov_len = size;
-
- memset(&message, 0, sizeof(message));
- message.msg_name = &addr;
- message.msg_namelen = sizeof(addr);
- message.msg_iov = iov;
- message.msg_iovlen = 2;
-
- return sendmsg(fd, &message, 0);
-}
-
void print_usage(char *argv[])
{
fprintf(stderr, "Usage: %s [options]\n"
@@ -1425,22 +1395,17 @@ void print_usage(char *argv[])
int main(int argc, char *argv[])
{
- int fd, len, nl_group;
+ int kvp_fd, len;
int error;
- struct cn_msg *message;
struct pollfd pfd;
- struct nlmsghdr *incoming_msg;
- struct cn_msg *incoming_cn_msg;
- struct hv_kvp_msg *hv_msg;
- char *p;
+ char *p;
+ struct hv_kvp_msg hv_msg[1];
char *key_value;
char *key_name;
int op;
int pool;
char *if_name;
struct hv_kvp_ipaddr_value *kvp_ip_val;
- char *kvp_recv_buffer;
- size_t kvp_recv_buffer_len;
int daemonize = 1, long_index = 0, opt;
static struct option long_options[] = {
@@ -1468,12 +1433,14 @@ int main(int argc, char *argv[])
openlog("KVP", 0, LOG_USER);
syslog(LOG_INFO, "KVP starting; pid is:%d", getpid());
- kvp_recv_buffer_len = NLMSG_LENGTH(0) + sizeof(struct cn_msg) + sizeof(struct hv_kvp_msg);
- kvp_recv_buffer = calloc(1, kvp_recv_buffer_len);
- if (!kvp_recv_buffer) {
- syslog(LOG_ERR, "Failed to allocate netlink buffer");
+ kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR);
+
+ if (kvp_fd < 0) {
+ syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s",
+ errno, strerror(errno));
exit(EXIT_FAILURE);
}
+
/*
* Retrieve OS release information.
*/
@@ -1489,100 +1456,44 @@ int main(int argc, char *argv[])
exit(EXIT_FAILURE);
}
- fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
- if (fd < 0) {
- syslog(LOG_ERR, "netlink socket creation failed; error: %d %s", errno,
- strerror(errno));
- exit(EXIT_FAILURE);
- }
- addr.nl_family = AF_NETLINK;
- addr.nl_pad = 0;
- addr.nl_pid = 0;
- addr.nl_groups = 0;
-
-
- error = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
- if (error < 0) {
- syslog(LOG_ERR, "bind failed; error: %d %s", errno, strerror(errno));
- close(fd);
- exit(EXIT_FAILURE);
- }
- nl_group = CN_KVP_IDX;
-
- if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &nl_group, sizeof(nl_group)) < 0) {
- syslog(LOG_ERR, "setsockopt failed; error: %d %s", errno, strerror(errno));
- close(fd);
- exit(EXIT_FAILURE);
- }
-
/*
* Register ourselves with the kernel.
*/
- message = (struct cn_msg *)kvp_recv_buffer;
- message->id.idx = CN_KVP_IDX;
- message->id.val = CN_KVP_VAL;
-
- hv_msg = (struct hv_kvp_msg *)message->data;
hv_msg->kvp_hdr.operation = KVP_OP_REGISTER1;
- message->ack = 0;
- message->len = sizeof(struct hv_kvp_msg);
-
- len = netlink_send(fd, message);
- if (len < 0) {
- syslog(LOG_ERR, "netlink_send failed; error: %d %s", errno, strerror(errno));
- close(fd);
+ len = write(kvp_fd, hv_msg, sizeof(struct hv_kvp_msg));
+ if (len != sizeof(struct hv_kvp_msg)) {
+ syslog(LOG_ERR, "registration to kernel failed; error: %d %s",
+ errno, strerror(errno));
+ close(kvp_fd);
exit(EXIT_FAILURE);
}
- pfd.fd = fd;
+ pfd.fd = kvp_fd;
while (1) {
- struct sockaddr *addr_p = (struct sockaddr *) &addr;
- socklen_t addr_l = sizeof(addr);
pfd.events = POLLIN;
pfd.revents = 0;
if (poll(&pfd, 1, -1) < 0) {
syslog(LOG_ERR, "poll failed; error: %d %s", errno, strerror(errno));
if (errno == EINVAL) {
- close(fd);
+ close(kvp_fd);
exit(EXIT_FAILURE);
}
else
continue;
}
- len = recvfrom(fd, kvp_recv_buffer, kvp_recv_buffer_len, 0,
- addr_p, &addr_l);
-
- if (len < 0) {
- int saved_errno = errno;
- syslog(LOG_ERR, "recvfrom failed; pid:%u error:%d %s",
- addr.nl_pid, errno, strerror(errno));
+ len = read(kvp_fd, hv_msg, sizeof(struct hv_kvp_msg));
- if (saved_errno == ENOBUFS) {
- syslog(LOG_ERR, "receive error: ignored");
- continue;
- }
+ if (len != sizeof(struct hv_kvp_msg)) {
+ syslog(LOG_ERR, "read failed; error:%d %s",
+ errno, strerror(errno));
- close(fd);
- return -1;
+ close(kvp_fd);
+ return EXIT_FAILURE;
}
- if (addr.nl_pid) {
- syslog(LOG_WARNING, "Received packet from untrusted pid:%u",
- addr.nl_pid);
- continue;
- }
-
- incoming_msg = (struct nlmsghdr *)kvp_recv_buffer;
-
- if (incoming_msg->nlmsg_type != NLMSG_DONE)
- continue;
-
- incoming_cn_msg = (struct cn_msg *)NLMSG_DATA(incoming_msg);
- hv_msg = (struct hv_kvp_msg *)incoming_cn_msg->data;
-
/*
* We will use the KVP header information to pass back
* the error from this daemon. So, first copy the state
@@ -1603,7 +1514,7 @@ int main(int argc, char *argv[])
if (lic_version) {
strcpy(lic_version, p);
syslog(LOG_INFO, "KVP LIC Version: %s",
- lic_version);
+ lic_version);
} else {
syslog(LOG_ERR, "malloc failed");
}
@@ -1702,7 +1613,6 @@ int main(int argc, char *argv[])
goto kvp_done;
}
- hv_msg = (struct hv_kvp_msg *)incoming_cn_msg->data;
key_name = (char *)hv_msg->body.kvp_enum_data.data.key;
key_value = (char *)hv_msg->body.kvp_enum_data.data.value;
@@ -1753,31 +1663,17 @@ int main(int argc, char *argv[])
hv_msg->error = HV_S_CONT;
break;
}
- /*
- * Send the value back to the kernel. The response is
- * already in the receive buffer. Update the cn_msg header to
- * reflect the key value that has been added to the message
- */
-kvp_done:
-
- incoming_cn_msg->id.idx = CN_KVP_IDX;
- incoming_cn_msg->id.val = CN_KVP_VAL;
- incoming_cn_msg->ack = 0;
- incoming_cn_msg->len = sizeof(struct hv_kvp_msg);
-
- len = netlink_send(fd, incoming_cn_msg);
- if (len < 0) {
- int saved_errno = errno;
- syslog(LOG_ERR, "net_link send failed; error: %d %s", errno,
- strerror(errno));
-
- if (saved_errno == ENOMEM || saved_errno == ENOBUFS) {
- syslog(LOG_ERR, "send error: ignored");
- continue;
- }
+ /* Send the value back to the kernel. */
+kvp_done:
+ len = write(kvp_fd, hv_msg, sizeof(struct hv_kvp_msg));
+ if (len != sizeof(struct hv_kvp_msg)) {
+ syslog(LOG_ERR, "write failed; error: %d %s", errno,
+ strerror(errno));
exit(EXIT_FAILURE);
}
}
+ close(kvp_fd);
+ exit(0);
}
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index 506dd01..96234b6 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -19,7 +19,6 @@
#include <sys/types.h>
-#include <sys/socket.h>
#include <sys/poll.h>
#include <sys/ioctl.h>
#include <fcntl.h>
@@ -30,21 +29,11 @@
#include <string.h>
#include <ctype.h>
#include <errno.h>
-#include <arpa/inet.h>
#include <linux/fs.h>
-#include <linux/connector.h>
#include <linux/hyperv.h>
-#include <linux/netlink.h>
#include <syslog.h>
#include <getopt.h>
-static struct sockaddr_nl addr;
-
-#ifndef SOL_NETLINK
-#define SOL_NETLINK 270
-#endif
-
-
/* Don't use syslog() in the function since that can cause write to disk */
static int vss_do_freeze(char *dir, unsigned int cmd)
{
@@ -143,33 +132,6 @@ out:
return error;
}
-static int netlink_send(int fd, struct cn_msg *msg)
-{
- struct nlmsghdr nlh = { .nlmsg_type = NLMSG_DONE };
- unsigned int size;
- struct msghdr message;
- struct iovec iov[2];
-
- size = sizeof(struct cn_msg) + msg->len;
-
- nlh.nlmsg_pid = getpid();
- nlh.nlmsg_len = NLMSG_LENGTH(size);
-
- iov[0].iov_base = &nlh;
- iov[0].iov_len = sizeof(nlh);
-
- iov[1].iov_base = msg;
- iov[1].iov_len = size;
-
- memset(&message, 0, sizeof(message));
- message.msg_name = &addr;
- message.msg_namelen = sizeof(addr);
- message.msg_iov = iov;
- message.msg_iovlen = 2;
-
- return sendmsg(fd, &message, 0);
-}
-
void print_usage(char *argv[])
{
fprintf(stderr, "Usage: %s [options]\n"
@@ -180,17 +142,14 @@ void print_usage(char *argv[])
int main(int argc, char *argv[])
{
- int fd, len, nl_group;
+ int vss_fd, len;
int error;
- struct cn_msg *message;
struct pollfd pfd;
- struct nlmsghdr *incoming_msg;
- struct cn_msg *incoming_cn_msg;
int op;
- struct hv_vss_msg *vss_msg;
- char *vss_recv_buffer;
- size_t vss_recv_buffer_len;
+ struct hv_vss_msg vss_msg[1];
int daemonize = 1, long_index = 0, opt;
+ int in_handshake = 1;
+ __u32 kernel_modver;
static struct option long_options[] = {
{"help", no_argument, 0, 'h' },
@@ -217,98 +176,62 @@ int main(int argc, char *argv[])
openlog("Hyper-V VSS", 0, LOG_USER);
syslog(LOG_INFO, "VSS starting; pid is:%d", getpid());
- vss_recv_buffer_len = NLMSG_LENGTH(0) + sizeof(struct cn_msg) + sizeof(struct hv_vss_msg);
- vss_recv_buffer = calloc(1, vss_recv_buffer_len);
- if (!vss_recv_buffer) {
- syslog(LOG_ERR, "Failed to allocate netlink buffers");
- exit(EXIT_FAILURE);
- }
-
- fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
- if (fd < 0) {
- syslog(LOG_ERR, "netlink socket creation failed; error:%d %s",
- errno, strerror(errno));
- exit(EXIT_FAILURE);
- }
- addr.nl_family = AF_NETLINK;
- addr.nl_pad = 0;
- addr.nl_pid = 0;
- addr.nl_groups = 0;
-
-
- error = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
- if (error < 0) {
- syslog(LOG_ERR, "bind failed; error:%d %s", errno, strerror(errno));
- close(fd);
- exit(EXIT_FAILURE);
- }
- nl_group = CN_VSS_IDX;
- if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &nl_group, sizeof(nl_group)) < 0) {
- syslog(LOG_ERR, "setsockopt failed; error:%d %s", errno, strerror(errno));
- close(fd);
+ vss_fd = open("/dev/vmbus/hv_vss", O_RDWR);
+ if (vss_fd < 0) {
+ syslog(LOG_ERR, "open /dev/vmbus/hv_vss failed; error: %d %s",
+ errno, strerror(errno));
exit(EXIT_FAILURE);
}
/*
* Register ourselves with the kernel.
*/
- message = (struct cn_msg *)vss_recv_buffer;
- message->id.idx = CN_VSS_IDX;
- message->id.val = CN_VSS_VAL;
- message->ack = 0;
- vss_msg = (struct hv_vss_msg *)message->data;
- vss_msg->vss_hdr.operation = VSS_OP_REGISTER;
+ vss_msg->vss_hdr.operation = VSS_OP_REGISTER1;
- message->len = sizeof(struct hv_vss_msg);
-
- len = netlink_send(fd, message);
+ len = write(vss_fd, vss_msg, sizeof(struct hv_vss_msg));
if (len < 0) {
- syslog(LOG_ERR, "netlink_send failed; error:%d %s", errno, strerror(errno));
- close(fd);
+ syslog(LOG_ERR, "registration to kernel failed; error: %d %s",
+ errno, strerror(errno));
+ close(vss_fd);
exit(EXIT_FAILURE);
}
- pfd.fd = fd;
+ pfd.fd = vss_fd;
while (1) {
- struct sockaddr *addr_p = (struct sockaddr *) &addr;
- socklen_t addr_l = sizeof(addr);
pfd.events = POLLIN;
pfd.revents = 0;
if (poll(&pfd, 1, -1) < 0) {
syslog(LOG_ERR, "poll failed; error:%d %s", errno, strerror(errno));
if (errno == EINVAL) {
- close(fd);
+ close(vss_fd);
exit(EXIT_FAILURE);
}
else
continue;
}
- len = recvfrom(fd, vss_recv_buffer, vss_recv_buffer_len, 0,
- addr_p, &addr_l);
+ len = read(vss_fd, vss_msg, sizeof(struct hv_vss_msg));
- if (len < 0) {
- syslog(LOG_ERR, "recvfrom failed; pid:%u error:%d %s",
- addr.nl_pid, errno, strerror(errno));
- close(fd);
- return -1;
- }
-
- if (addr.nl_pid) {
- syslog(LOG_WARNING,
- "Received packet from untrusted pid:%u",
- addr.nl_pid);
+ if (in_handshake) {
+ if (len != sizeof(kernel_modver)) {
+ syslog(LOG_ERR, "invalid version negotiation");
+ exit(EXIT_FAILURE);
+ }
+ kernel_modver = *(__u32 *)vss_msg;
+ in_handshake = 0;
+ syslog(LOG_INFO, "VSS: kernel module version: %d",
+ kernel_modver);
continue;
}
- incoming_msg = (struct nlmsghdr *)vss_recv_buffer;
-
- if (incoming_msg->nlmsg_type != NLMSG_DONE)
- continue;
+ if (len != sizeof(struct hv_vss_msg)) {
+ syslog(LOG_ERR, "read failed; error:%d %s",
+ errno, strerror(errno));
+ close(vss_fd);
+ return EXIT_FAILURE;
+ }
- incoming_cn_msg = (struct cn_msg *)NLMSG_DATA(incoming_msg);
- vss_msg = (struct hv_vss_msg *)incoming_cn_msg->data;
op = vss_msg->vss_hdr.operation;
error = HV_S_OK;
@@ -331,12 +254,14 @@ int main(int argc, char *argv[])
syslog(LOG_ERR, "Illegal op:%d\n", op);
}
vss_msg->error = error;
- len = netlink_send(fd, incoming_cn_msg);
- if (len < 0) {
- syslog(LOG_ERR, "net_link send failed; error:%d %s",
- errno, strerror(errno));
+ len = write(vss_fd, &error, sizeof(struct hv_vss_msg));
+ if (len != sizeof(struct hv_vss_msg)) {
+ syslog(LOG_ERR, "write failed; error: %d %s", errno,
+ strerror(errno));
exit(EXIT_FAILURE);
}
}
+ close(vss_fd);
+ exit(0);
}
diff --git a/tools/hv/lsvmbus b/tools/hv/lsvmbus
new file mode 100644
index 0000000..162a378
--- /dev/null
+++ b/tools/hv/lsvmbus
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+
+import os
+from optparse import OptionParser
+
+parser = OptionParser()
+parser.add_option("-v", "--verbose", dest="verbose",
+ help="print verbose messages. Try -vv, -vvv for \
+ more verbose messages", action="count")
+
+(options, args) = parser.parse_args()
+
+verbose = 0
+if options.verbose is not None:
+ verbose = options.verbose
+
+vmbus_sys_path = '/sys/bus/vmbus/devices'
+if not os.path.isdir(vmbus_sys_path):
+ print "%s doesn't exist: exiting..." % vmbus_sys_path
+ exit(-1)
+
+vmbus_dev_dict = {
+ '{0e0b6031-5213-4934-818b-38d90ced39db}' : '[Operating system shutdown]',
+ '{9527e630-d0ae-497b-adce-e80ab0175caf}' : '[Time Synchronization]',
+ '{57164f39-9115-4e78-ab55-382f3bd5422d}' : '[Heartbeat]',
+ '{a9a0f4e7-5a45-4d96-b827-8a841e8c03e6}' : '[Data Exchange]',
+ '{35fa2e29-ea23-4236-96ae-3a6ebacba440}' : '[Backup (volume checkpoint)]',
+ '{34d14be3-dee4-41c8-9ae7-6b174977c192}' : '[Guest services]',
+ '{525074dc-8985-46e2-8057-a307dc18a502}' : '[Dynamic Memory]',
+ '{cfa8b69e-5b4a-4cc0-b98b-8ba1a1f3f95a}' : 'Synthetic mouse',
+ '{f912ad6d-2b17-48ea-bd65-f927a61c7684}' : 'Synthetic keyboard',
+ '{da0a7802-e377-4aac-8e77-0558eb1073f8}' : 'Synthetic framebuffer adapter',
+ '{f8615163-df3e-46c5-913f-f2d2f965ed0e}' : 'Synthetic network adapter',
+ '{32412632-86cb-44a2-9b5c-50d1417354f5}' : 'Synthetic IDE Controller',
+ '{ba6163d9-04a1-4d29-b605-72e2ffb1dc7f}' : 'Synthetic SCSI Controller',
+ '{2f9bcc4a-0069-4af3-b76b-6fd0be528cda}' : 'Synthetic fiber channel adapter',
+ '{8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501}' : 'Synthetic RDMA adapter',
+ '{276aacf4-ac15-426c-98dd-7521ad3f01fe}' : '[Reserved system device]',
+ '{f8e65716-3cb3-4a06-9a60-1889c5cccab5}' : '[Reserved system device]',
+ '{3375baf4-9e15-4b30-b765-67acb10d607b}' : '[Reserved system device]',
+}
+
+def get_vmbus_dev_attr(dev_name, attr):
+ try:
+ f = open('%s/%s/%s' % (vmbus_sys_path, dev_name, attr), 'r')
+ lines = f.readlines()
+ f.close()
+ except IOError:
+ lines = []
+
+ return lines
+
+class VMBus_Dev:
+ pass
+
+
+vmbus_dev_list = []
+
+for f in os.listdir(vmbus_sys_path):
+ vmbus_id = get_vmbus_dev_attr(f, 'id')[0].strip()
+ class_id = get_vmbus_dev_attr(f, 'class_id')[0].strip()
+ device_id = get_vmbus_dev_attr(f, 'device_id')[0].strip()
+ dev_desc = vmbus_dev_dict.get(class_id, 'Unknown')
+
+ chn_vp_mapping = get_vmbus_dev_attr(f, 'channel_vp_mapping')
+ chn_vp_mapping = [c.strip() for c in chn_vp_mapping]
+ chn_vp_mapping = sorted(chn_vp_mapping,
+ key = lambda c : int(c.split(':')[0]))
+
+ chn_vp_mapping = ['\tRel_ID=%s, target_cpu=%s' %
+ (c.split(':')[0], c.split(':')[1])
+ for c in chn_vp_mapping]
+ d = VMBus_Dev()
+ d.sysfs_path = '%s/%s' % (vmbus_sys_path, f)
+ d.vmbus_id = vmbus_id
+ d.class_id = class_id
+ d.device_id = device_id
+ d.dev_desc = dev_desc
+ d.chn_vp_mapping = '\n'.join(chn_vp_mapping)
+ if d.chn_vp_mapping:
+ d.chn_vp_mapping += '\n'
+
+ vmbus_dev_list.append(d)
+
+
+vmbus_dev_list = sorted(vmbus_dev_list, key = lambda d : int(d.vmbus_id))
+
+format0 = '%2s: %s'
+format1 = '%2s: Class_ID = %s - %s\n%s'
+format2 = '%2s: Class_ID = %s - %s\n\tDevice_ID = %s\n\tSysfs path: %s\n%s'
+
+for d in vmbus_dev_list:
+ if verbose == 0:
+ print ('VMBUS ID ' + format0) % (d.vmbus_id, d.dev_desc)
+ elif verbose == 1:
+ print ('VMBUS ID ' + format1) % \
+ (d.vmbus_id, d.class_id, d.dev_desc, d.chn_vp_mapping)
+ else:
+ print ('VMBUS ID ' + format2) % \
+ (d.vmbus_id, d.class_id, d.dev_desc, \
+ d.device_id, d.sysfs_path, d.chn_vp_mapping)
diff --git a/tools/iio/Makefile b/tools/iio/Makefile
index bf7ae6d..3a7a54f 100644
--- a/tools/iio/Makefile
+++ b/tools/iio/Makefile
@@ -1,5 +1,5 @@
-CC = gcc
-CFLAGS = -Wall -g -D_GNU_SOURCE
+CC = $(CROSS_COMPILE)gcc
+CFLAGS += -Wall -g -D_GNU_SOURCE
all: iio_event_monitor lsiio generic_buffer
diff --git a/tools/iio/generic_buffer.c b/tools/iio/generic_buffer.c
index f805493..9f7b85b 100644
--- a/tools/iio/generic_buffer.c
+++ b/tools/iio/generic_buffer.c
@@ -51,47 +51,113 @@ int size_from_channelarray(struct iio_channel_info *channels, int num_channels)
if (bytes % channels[i].bytes == 0)
channels[i].location = bytes;
else
- channels[i].location = bytes - bytes%channels[i].bytes
- + channels[i].bytes;
+ channels[i].location = bytes - bytes % channels[i].bytes
+ + channels[i].bytes;
+
bytes = channels[i].location + channels[i].bytes;
i++;
}
+
return bytes;
}
-void print2byte(int input, struct iio_channel_info *info)
+void print1byte(uint8_t input, struct iio_channel_info *info)
+{
+ /*
+ * Shift before conversion to avoid sign extension
+ * of left aligned data
+ */
+ input >>= info->shift;
+ input &= info->mask;
+ if (info->is_signed) {
+ int8_t val = (int8_t)(input << (8 - info->bits_used)) >>
+ (8 - info->bits_used);
+ printf("%05f ", ((float)val + info->offset) * info->scale);
+ } else {
+ printf("%05f ", ((float)input + info->offset) * info->scale);
+ }
+}
+
+void print2byte(uint16_t input, struct iio_channel_info *info)
{
/* First swap if incorrect endian */
if (info->be)
- input = be16toh((uint16_t)input);
+ input = be16toh(input);
else
- input = le16toh((uint16_t)input);
+ input = le16toh(input);
/*
* Shift before conversion to avoid sign extension
* of left aligned data
*/
input >>= info->shift;
+ input &= info->mask;
if (info->is_signed) {
- int16_t val = input;
+ int16_t val = (int16_t)(input << (16 - info->bits_used)) >>
+ (16 - info->bits_used);
+ printf("%05f ", ((float)val + info->offset) * info->scale);
+ } else {
+ printf("%05f ", ((float)input + info->offset) * info->scale);
+ }
+}
- val &= (1 << info->bits_used) - 1;
- val = (int16_t)(val << (16 - info->bits_used)) >>
- (16 - info->bits_used);
- printf("%05f ", ((float)val + info->offset)*info->scale);
+void print4byte(uint32_t input, struct iio_channel_info *info)
+{
+ /* First swap if incorrect endian */
+ if (info->be)
+ input = be32toh(input);
+ else
+ input = le32toh(input);
+
+ /*
+ * Shift before conversion to avoid sign extension
+ * of left aligned data
+ */
+ input >>= info->shift;
+ input &= info->mask;
+ if (info->is_signed) {
+ int32_t val = (int32_t)(input << (32 - info->bits_used)) >>
+ (32 - info->bits_used);
+ printf("%05f ", ((float)val + info->offset) * info->scale);
} else {
- uint16_t val = input;
+ printf("%05f ", ((float)input + info->offset) * info->scale);
+ }
+}
- val &= (1 << info->bits_used) - 1;
- printf("%05f ", ((float)val + info->offset)*info->scale);
+void print8byte(uint64_t input, struct iio_channel_info *info)
+{
+ /* First swap if incorrect endian */
+ if (info->be)
+ input = be64toh(input);
+ else
+ input = le64toh(input);
+
+ /*
+ * Shift before conversion to avoid sign extension
+ * of left aligned data
+ */
+ input >>= info->shift;
+ input &= info->mask;
+ if (info->is_signed) {
+ int64_t val = (int64_t)(input << (64 - info->bits_used)) >>
+ (64 - info->bits_used);
+ /* special case for timestamp */
+ if (info->scale == 1.0f && info->offset == 0.0f)
+ printf("%" PRId64 " ", val);
+ else
+ printf("%05f ",
+ ((float)val + info->offset) * info->scale);
+ } else {
+ printf("%05f ", ((float)input + info->offset) * info->scale);
}
}
+
/**
* process_scan() - print out the values in SI units
* @data: pointer to the start of the scan
- * @channels: information about the channels. Note
- * size_from_channelarray must have been called first to fill the
- * location offsets.
+ * @channels: information about the channels.
+ * Note: size_from_channelarray must have been called first
+ * to fill the location offsets.
* @num_channels: number of channels
**/
void process_scan(char *data,
@@ -103,37 +169,21 @@ void process_scan(char *data,
for (k = 0; k < num_channels; k++)
switch (channels[k].bytes) {
/* only a few cases implemented so far */
+ case 1:
+ print1byte(*(uint8_t *)(data + channels[k].location),
+ &channels[k]);
+ break;
case 2:
print2byte(*(uint16_t *)(data + channels[k].location),
&channels[k]);
break;
case 4:
- if (!channels[k].is_signed) {
- uint32_t val = *(uint32_t *)
- (data + channels[k].location);
- printf("%05f ", ((float)val +
- channels[k].offset)*
- channels[k].scale);
-
- }
+ print4byte(*(uint32_t *)(data + channels[k].location),
+ &channels[k]);
break;
case 8:
- if (channels[k].is_signed) {
- int64_t val = *(int64_t *)
- (data +
- channels[k].location);
- if ((val >> channels[k].bits_used) & 1)
- val = (val & channels[k].mask) |
- ~channels[k].mask;
- /* special case for timestamp */
- if (channels[k].scale == 1.0f &&
- channels[k].offset == 0.0f)
- printf("%" PRId64 " ", val);
- else
- printf("%05f ", ((float)val +
- channels[k].offset)*
- channels[k].scale);
- }
+ print8byte(*(uint64_t *)(data + channels[k].location),
+ &channels[k]);
break;
default:
break;
@@ -141,6 +191,19 @@ void process_scan(char *data,
printf("\n");
}
+void print_usage(void)
+{
+ fprintf(stderr, "Usage: generic_buffer [options]...\n"
+ "Capture, convert and output data from IIO device buffer\n"
+ " -c <n> Do n conversions\n"
+ " -e Disable wait for event (new data)\n"
+ " -g Use trigger-less mode\n"
+ " -l <n> Set buffer length to n samples\n"
+ " -n <name> Set device name (mandatory)\n"
+ " -t <name> Set trigger name\n"
+ " -w <n> Set delay between reads in us (event-less mode)\n");
+}
+
int main(int argc, char **argv)
{
unsigned long num_loops = 2;
@@ -166,8 +229,28 @@ int main(int argc, char **argv)
struct iio_channel_info *channels;
- while ((c = getopt(argc, argv, "l:w:c:et:n:g")) != -1) {
+ while ((c = getopt(argc, argv, "c:egl:n:t:w:")) != -1) {
switch (c) {
+ case 'c':
+ errno = 0;
+ num_loops = strtoul(optarg, &dummy, 10);
+ if (errno)
+ return -errno;
+
+ break;
+ case 'e':
+ noevents = 1;
+ break;
+ case 'g':
+ notrigger = 1;
+ break;
+ case 'l':
+ errno = 0;
+ buf_len = strtoul(optarg, &dummy, 10);
+ if (errno)
+ return -errno;
+
+ break;
case 'n':
device_name = optarg;
break;
@@ -175,42 +258,39 @@ int main(int argc, char **argv)
trigger_name = optarg;
datardytrigger = 0;
break;
- case 'e':
- noevents = 1;
- break;
- case 'c':
- num_loops = strtoul(optarg, &dummy, 10);
- break;
case 'w':
+ errno = 0;
timedelay = strtoul(optarg, &dummy, 10);
- break;
- case 'l':
- buf_len = strtoul(optarg, &dummy, 10);
- break;
- case 'g':
- notrigger = 1;
+ if (errno)
+ return -errno;
break;
case '?':
+ print_usage();
return -1;
}
}
- if (device_name == NULL)
+ if (!device_name) {
+ fprintf(stderr, "Device name not set\n");
+ print_usage();
return -1;
+ }
/* Find the device requested */
dev_num = find_type_by_name(device_name, "iio:device");
if (dev_num < 0) {
- printf("Failed to find the %s\n", device_name);
- ret = -ENODEV;
- goto error_ret;
+ fprintf(stderr, "Failed to find the %s\n", device_name);
+ return dev_num;
}
+
printf("iio device number being used is %d\n", dev_num);
- asprintf(&dev_dir_name, "%siio:device%d", iio_dir, dev_num);
+ ret = asprintf(&dev_dir_name, "%siio:device%d", iio_dir, dev_num);
+ if (ret < 0)
+ return -ENOMEM;
if (!notrigger) {
- if (trigger_name == NULL) {
+ if (!trigger_name) {
/*
* Build the trigger name. If it is device associated
* its name is <device_name>_dev[n] where n matches
@@ -220,20 +300,23 @@ int main(int argc, char **argv)
"%s-dev%d", device_name, dev_num);
if (ret < 0) {
ret = -ENOMEM;
- goto error_ret;
+ goto error_free_dev_dir_name;
}
}
/* Verify the trigger exists */
trig_num = find_type_by_name(trigger_name, "trigger");
if (trig_num < 0) {
- printf("Failed to find the trigger %s\n", trigger_name);
- ret = -ENODEV;
+ fprintf(stderr, "Failed to find the trigger %s\n",
+ trigger_name);
+ ret = trig_num;
goto error_free_triggername;
}
+
printf("iio trigger number being used is %d\n", trig_num);
- } else
+ } else {
printf("trigger-less mode selected\n");
+ }
/*
* Parse the files in scan_elements to identify what channels are
@@ -241,8 +324,8 @@ int main(int argc, char **argv)
*/
ret = build_channel_array(dev_dir_name, &channels, &num_channels);
if (ret) {
- printf("Problem reading scan element information\n");
- printf("diag %s\n", dev_dir_name);
+ fprintf(stderr, "Problem reading scan element information\n"
+ "diag %s\n", dev_dir_name);
goto error_free_triggername;
}
@@ -255,18 +338,21 @@ int main(int argc, char **argv)
"%siio:device%d/buffer", iio_dir, dev_num);
if (ret < 0) {
ret = -ENOMEM;
- goto error_free_triggername;
+ goto error_free_channels;
}
if (!notrigger) {
printf("%s %s\n", dev_dir_name, trigger_name);
- /* Set the device trigger to be the data ready trigger found
- * above */
+ /*
+ * Set the device trigger to be the data ready trigger found
+ * above
+ */
ret = write_sysfs_string_and_verify("trigger/current_trigger",
dev_dir_name,
trigger_name);
if (ret < 0) {
- printf("Failed to write current_trigger file\n");
+ fprintf(stderr,
+ "Failed to write current_trigger file\n");
goto error_free_buf_dir_name;
}
}
@@ -278,10 +364,14 @@ int main(int argc, char **argv)
/* Enable the buffer */
ret = write_sysfs_int("enable", buf_dir_name, 1);
- if (ret < 0)
+ if (ret < 0) {
+ fprintf(stderr,
+ "Failed to enable buffer: %s\n", strerror(-ret));
goto error_free_buf_dir_name;
+ }
+
scan_size = size_from_channelarray(channels, num_channels);
- data = malloc(scan_size*buf_len);
+ data = malloc(scan_size * buf_len);
if (!data) {
ret = -ENOMEM;
goto error_free_buf_dir_name;
@@ -295,13 +385,12 @@ int main(int argc, char **argv)
/* Attempt to open non blocking the access dev */
fp = open(buffer_access, O_RDONLY | O_NONBLOCK);
- if (fp == -1) { /* If it isn't there make the node */
- printf("Failed to open %s\n", buffer_access);
+ if (fp == -1) { /* TODO: If it isn't there make the node */
ret = -errno;
+ fprintf(stderr, "Failed to open %s\n", buffer_access);
goto error_free_buffer_access;
}
- /* Wait for events 10 times */
for (j = 0; j < num_loops; j++) {
if (!noevents) {
struct pollfd pfd = {
@@ -309,27 +398,31 @@ int main(int argc, char **argv)
.events = POLLIN,
};
- poll(&pfd, 1, -1);
- toread = buf_len;
+ ret = poll(&pfd, 1, -1);
+ if (ret < 0) {
+ ret = -errno;
+ goto error_close_buffer_access;
+ } else if (ret == 0) {
+ continue;
+ }
+ toread = buf_len;
} else {
usleep(timedelay);
toread = 64;
}
- read_size = read(fp,
- data,
- toread*scan_size);
+ read_size = read(fp, data, toread * scan_size);
if (read_size < 0) {
- if (errno == -EAGAIN) {
- printf("nothing available\n");
+ if (errno == EAGAIN) {
+ fprintf(stderr, "nothing available\n");
continue;
- } else
+ } else {
break;
+ }
}
- for (i = 0; i < read_size/scan_size; i++)
- process_scan(data + scan_size*i,
- channels,
+ for (i = 0; i < read_size / scan_size; i++)
+ process_scan(data + scan_size * i, channels,
num_channels);
}
@@ -340,20 +433,34 @@ int main(int argc, char **argv)
if (!notrigger)
/* Disconnect the trigger - just write a dummy name. */
- write_sysfs_string("trigger/current_trigger",
- dev_dir_name, "NULL");
+ ret = write_sysfs_string("trigger/current_trigger",
+ dev_dir_name, "NULL");
+ if (ret < 0)
+ fprintf(stderr, "Failed to write to %s\n",
+ dev_dir_name);
error_close_buffer_access:
- close(fp);
-error_free_data:
- free(data);
+ if (close(fp) == -1)
+ perror("Failed to close buffer");
+
error_free_buffer_access:
free(buffer_access);
+error_free_data:
+ free(data);
error_free_buf_dir_name:
free(buf_dir_name);
+error_free_channels:
+ for (i = num_channels - 1; i >= 0; i--) {
+ free(channels[i].name);
+ free(channels[i].generic_name);
+ }
+ free(channels);
error_free_triggername:
if (datardytrigger)
free(trigger_name);
-error_ret:
+
+error_free_dev_dir_name:
+ free(dev_dir_name);
+
return ret;
}
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index 427c271..cd3fd41 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -13,7 +13,6 @@
*
* Usage:
* iio_event_monitor <device_name>
- *
*/
#include <unistd.h>
@@ -51,6 +50,9 @@ static const char * const iio_chan_type_name_spec[] = {
[IIO_HUMIDITYRELATIVE] = "humidityrelative",
[IIO_ACTIVITY] = "activity",
[IIO_STEPS] = "steps",
+ [IIO_ENERGY] = "energy",
+ [IIO_DISTANCE] = "distance",
+ [IIO_VELOCITY] = "velocity",
};
static const char * const iio_ev_type_text[] = {
@@ -99,6 +101,7 @@ static const char * const iio_modifier_names[] = {
[IIO_MOD_JOGGING] = "jogging",
[IIO_MOD_WALKING] = "walking",
[IIO_MOD_STILL] = "still",
+ [IIO_MOD_ROOT_SUM_SQUARED_X_Y_Z] = "sqrt(x^2+y^2+z^2)",
};
static bool event_is_known(struct iio_event_data *event)
@@ -130,6 +133,9 @@ static bool event_is_known(struct iio_event_data *event)
case IIO_HUMIDITYRELATIVE:
case IIO_ACTIVITY:
case IIO_STEPS:
+ case IIO_ENERGY:
+ case IIO_DISTANCE:
+ case IIO_VELOCITY:
break;
default:
return false;
@@ -167,6 +173,7 @@ static bool event_is_known(struct iio_event_data *event)
case IIO_MOD_JOGGING:
case IIO_MOD_WALKING:
case IIO_MOD_STILL:
+ case IIO_MOD_ROOT_SUM_SQUARED_X_Y_Z:
break;
default:
return false;
@@ -208,31 +215,29 @@ static void print_event(struct iio_event_data *event)
bool diff = IIO_EVENT_CODE_EXTRACT_DIFF(event->id);
if (!event_is_known(event)) {
- printf("Unknown event: time: %lld, id: %llx\n",
- event->timestamp, event->id);
+ fprintf(stderr, "Unknown event: time: %lld, id: %llx\n",
+ event->timestamp, event->id);
+
return;
}
- printf("Event: time: %lld, ", event->timestamp);
+ printf("Event: time: %lld, type: %s", event->timestamp,
+ iio_chan_type_name_spec[type]);
- if (mod != IIO_NO_MOD) {
- printf("type: %s(%s), ",
- iio_chan_type_name_spec[type],
- iio_modifier_names[mod]);
- } else {
- printf("type: %s, ",
- iio_chan_type_name_spec[type]);
- }
+ if (mod != IIO_NO_MOD)
+ printf("(%s)", iio_modifier_names[mod]);
- if (diff && chan >= 0 && chan2 >= 0)
- printf("channel: %d-%d, ", chan, chan2);
- else if (chan >= 0)
- printf("channel: %d, ", chan);
+ if (chan >= 0) {
+ printf(", channel: %d", chan);
+ if (diff && chan2 >= 0)
+ printf("-%d", chan2);
+ }
- printf("evtype: %s", iio_ev_type_text[ev_type]);
+ printf(", evtype: %s", iio_ev_type_text[ev_type]);
if (dir != IIO_EV_DIR_NONE)
printf(", direction: %s", iio_ev_dir_text[dir]);
+
printf("\n");
}
@@ -246,7 +251,7 @@ int main(int argc, char **argv)
int fd, event_fd;
if (argc <= 1) {
- printf("Usage: %s <device_name>\n", argv[0]);
+ fprintf(stderr, "Usage: %s <device_name>\n", argv[0]);
return -1;
}
@@ -255,31 +260,38 @@ int main(int argc, char **argv)
dev_num = find_type_by_name(device_name, "iio:device");
if (dev_num >= 0) {
printf("Found IIO device with name %s with device number %d\n",
- device_name, dev_num);
+ device_name, dev_num);
ret = asprintf(&chrdev_name, "/dev/iio:device%d", dev_num);
- if (ret < 0) {
- ret = -ENOMEM;
- goto error_ret;
- }
+ if (ret < 0)
+ return -ENOMEM;
} else {
- /* If we can't find a IIO device by name assume device_name is a
- IIO chrdev */
+ /*
+ * If we can't find an IIO device by name assume device_name is
+ * an IIO chrdev
+ */
chrdev_name = strdup(device_name);
+ if (!chrdev_name)
+ return -ENOMEM;
}
fd = open(chrdev_name, 0);
if (fd == -1) {
- fprintf(stdout, "Failed to open %s\n", chrdev_name);
ret = -errno;
+ fprintf(stderr, "Failed to open %s\n", chrdev_name);
goto error_free_chrdev_name;
}
ret = ioctl(fd, IIO_GET_EVENT_FD_IOCTL, &event_fd);
+ if (ret == -1 || event_fd == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to retrieve event fd\n");
+ if (close(fd) == -1)
+ perror("Failed to close character device file");
- close(fd);
+ goto error_free_chrdev_name;
+ }
- if (ret == -1 || event_fd == -1) {
- fprintf(stdout, "Failed to retrieve event fd\n");
+ if (close(fd) == -1) {
ret = -errno;
goto error_free_chrdev_name;
}
@@ -288,21 +300,29 @@ int main(int argc, char **argv)
ret = read(event_fd, &event, sizeof(event));
if (ret == -1) {
if (errno == EAGAIN) {
- printf("nothing available\n");
+ fprintf(stderr, "nothing available\n");
continue;
} else {
- perror("Failed to read event from device");
ret = -errno;
+ perror("Failed to read event from device");
break;
}
}
+ if (ret != sizeof(event)) {
+ fprintf(stderr, "Reading event failed!\n");
+ ret = -EIO;
+ break;
+ }
+
print_event(&event);
}
- close(event_fd);
+ if (close(event_fd) == -1)
+ perror("Failed to close event file");
+
error_free_chrdev_name:
free(chrdev_name);
-error_ret:
+
return ret;
}
diff --git a/tools/iio/iio_utils.c b/tools/iio/iio_utils.c
index 6f64521..5eb6793 100644
--- a/tools/iio/iio_utils.c
+++ b/tools/iio/iio_utils.c
@@ -6,9 +6,6 @@
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*/
-#ifndef _IIO_UTILS_H
-#define _IIO_UTILS_H
-
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
@@ -29,16 +26,17 @@ static char * const iio_direction[] = {
* iioutils_break_up_name() - extract generic name from full channel name
* @full_name: the full channel name
* @generic_name: the output generic channel name
+ *
+ * Returns 0 on success, or a negative error code if string extraction failed.
**/
-int iioutils_break_up_name(const char *full_name,
- char **generic_name)
+int iioutils_break_up_name(const char *full_name, char **generic_name)
{
char *current;
char *w, *r;
char *working, *prefix = "";
- int i;
+ int i, ret;
- for (i = 0; i < sizeof(iio_direction) / sizeof(iio_direction[0]); i++)
+ for (i = 0; i < ARRAY_SIZE(iio_direction); i++)
if (!strncmp(full_name, iio_direction[i],
strlen(iio_direction[i]))) {
prefix = iio_direction[i];
@@ -46,7 +44,14 @@ int iioutils_break_up_name(const char *full_name,
}
current = strdup(full_name + strlen(prefix) + 1);
+ if (!current)
+ return -ENOMEM;
+
working = strtok(current, "_\0");
+ if (!working) {
+ free(current);
+ return -EINVAL;
+ }
w = working;
r = working;
@@ -56,34 +61,34 @@ int iioutils_break_up_name(const char *full_name,
*w = *r;
w++;
}
+
r++;
}
*w = '\0';
- asprintf(generic_name, "%s_%s", prefix, working);
+ ret = asprintf(generic_name, "%s_%s", prefix, working);
free(current);
- return 0;
+ return (ret == -1) ? -ENOMEM : 0;
}
/**
* iioutils_get_type() - find and process _type attribute data
* @is_signed: output whether channel is signed
* @bytes: output how many bytes the channel storage occupies
+ * @bits_used: output number of valid bits of data
+ * @shift: output amount of bits to shift right data before applying bit mask
* @mask: output a bit mask for the raw data
- * @be: big endian
- * @device_dir: the iio device directory
+ * @be: output if data in big endian
+ * @device_dir: the IIO device directory
* @name: the channel name
* @generic_name: the channel type name
+ *
+ * Returns a value >= 0 on success, otherwise a negative error code.
**/
-int iioutils_get_type(unsigned *is_signed,
- unsigned *bytes,
- unsigned *bits_used,
- unsigned *shift,
- uint64_t *mask,
- unsigned *be,
- const char *device_dir,
- const char *name,
- const char *generic_name)
+int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used,
+ unsigned *shift, uint64_t *mask, unsigned *be,
+ const char *device_dir, const char *name,
+ const char *generic_name)
{
FILE *sysfsfp;
int ret;
@@ -94,10 +99,9 @@ int iioutils_get_type(unsigned *is_signed,
const struct dirent *ent;
ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir);
- if (ret < 0) {
- ret = -ENOMEM;
- goto error_ret;
- }
+ if (ret < 0)
+ return -ENOMEM;
+
ret = asprintf(&builtname, FORMAT_TYPE_FILE, name);
if (ret < 0) {
ret = -ENOMEM;
@@ -110,11 +114,13 @@ int iioutils_get_type(unsigned *is_signed,
}
dp = opendir(scan_el_dir);
- if (dp == NULL) {
+ if (!dp) {
ret = -errno;
goto error_free_builtname_generic;
}
- while (ent = readdir(dp), ent != NULL)
+
+ ret = -ENOENT;
+ while (ent = readdir(dp), ent)
/*
* Do we allow devices to override a generic name with
* a specific one?
@@ -127,10 +133,12 @@ int iioutils_get_type(unsigned *is_signed,
ret = -ENOMEM;
goto error_closedir;
}
+
sysfsfp = fopen(filename, "r");
- if (sysfsfp == NULL) {
- printf("failed to open %s\n", filename);
+ if (!sysfsfp) {
ret = -errno;
+ fprintf(stderr, "failed to open %s\n",
+ filename);
goto error_free_filename;
}
@@ -141,49 +149,73 @@ int iioutils_get_type(unsigned *is_signed,
bits_used,
&padint, shift);
if (ret < 0) {
- printf("failed to pass scan type description\n");
ret = -errno;
+ fprintf(stderr,
+ "failed to pass scan type description\n");
+ goto error_close_sysfsfp;
+ } else if (ret != 5) {
+ ret = -EIO;
+ fprintf(stderr,
+ "scan type description didn't match\n");
goto error_close_sysfsfp;
}
+
*be = (endianchar == 'b');
*bytes = padint / 8;
if (*bits_used == 64)
*mask = ~0;
else
- *mask = (1 << *bits_used) - 1;
- if (signchar == 's')
- *is_signed = 1;
- else
- *is_signed = 0;
- fclose(sysfsfp);
- free(filename);
+ *mask = (1ULL << *bits_used) - 1;
+
+ *is_signed = (signchar == 's');
+ if (fclose(sysfsfp)) {
+ ret = -errno;
+ fprintf(stderr, "Failed to close %s\n",
+ filename);
+ goto error_free_filename;
+ }
- filename = 0;
sysfsfp = 0;
+ free(filename);
+ filename = 0;
}
+
error_close_sysfsfp:
if (sysfsfp)
- fclose(sysfsfp);
+ if (fclose(sysfsfp))
+ perror("iioutils_get_type(): Failed to close file");
+
error_free_filename:
if (filename)
free(filename);
+
error_closedir:
- closedir(dp);
+ if (closedir(dp) == -1)
+ perror("iioutils_get_type(): Failed to close directory");
+
error_free_builtname_generic:
free(builtname_generic);
error_free_builtname:
free(builtname);
error_free_scan_el_dir:
free(scan_el_dir);
-error_ret:
+
return ret;
}
-int iioutils_get_param_float(float *output,
- const char *param_name,
- const char *device_dir,
- const char *name,
- const char *generic_name)
+/**
+ * iioutils_get_param_float() - read a float value from a channel parameter
+ * @output: output the float value
+ * @param_name: the parameter name to read
+ * @device_dir: the IIO device directory in sysfs
+ * @name: the channel name
+ * @generic_name: the channel type name
+ *
+ * Returns a value >= 0 on success, otherwise a negative error code.
+ **/
+int iioutils_get_param_float(float *output, const char *param_name,
+ const char *device_dir, const char *name,
+ const char *generic_name)
{
FILE *sysfsfp;
int ret;
@@ -193,22 +225,24 @@ int iioutils_get_param_float(float *output,
const struct dirent *ent;
ret = asprintf(&builtname, "%s_%s", name, param_name);
- if (ret < 0) {
- ret = -ENOMEM;
- goto error_ret;
- }
+ if (ret < 0)
+ return -ENOMEM;
+
ret = asprintf(&builtname_generic,
"%s_%s", generic_name, param_name);
if (ret < 0) {
ret = -ENOMEM;
goto error_free_builtname;
}
+
dp = opendir(device_dir);
- if (dp == NULL) {
+ if (!dp) {
ret = -errno;
goto error_free_builtname_generic;
}
- while (ent = readdir(dp), ent != NULL)
+
+ ret = -ENOENT;
+ while (ent = readdir(dp), ent)
if ((strcmp(builtname, ent->d_name) == 0) ||
(strcmp(builtname_generic, ent->d_name) == 0)) {
ret = asprintf(&filename,
@@ -217,60 +251,69 @@ int iioutils_get_param_float(float *output,
ret = -ENOMEM;
goto error_closedir;
}
+
sysfsfp = fopen(filename, "r");
if (!sysfsfp) {
ret = -errno;
goto error_free_filename;
}
- fscanf(sysfsfp, "%f", output);
+
+ errno = 0;
+ if (fscanf(sysfsfp, "%f", output) != 1)
+ ret = errno ? -errno : -ENODATA;
+
break;
}
error_free_filename:
if (filename)
free(filename);
+
error_closedir:
- closedir(dp);
+ if (closedir(dp) == -1)
+ perror("iioutils_get_param_float(): Failed to close directory");
+
error_free_builtname_generic:
free(builtname_generic);
error_free_builtname:
free(builtname);
-error_ret:
+
return ret;
}
/**
- * bsort_channel_array_by_index() - reorder so that the array is in index order
- *
+ * bsort_channel_array_by_index() - sort the array in index order
+ * @ci_array: the iio_channel_info array to be sorted
+ * @cnt: the amount of array elements
**/
-void bsort_channel_array_by_index(struct iio_channel_info **ci_array,
- int cnt)
+void bsort_channel_array_by_index(struct iio_channel_info *ci_array, int cnt)
{
-
struct iio_channel_info temp;
int x, y;
for (x = 0; x < cnt; x++)
for (y = 0; y < (cnt - 1); y++)
- if ((*ci_array)[y].index > (*ci_array)[y+1].index) {
- temp = (*ci_array)[y + 1];
- (*ci_array)[y + 1] = (*ci_array)[y];
- (*ci_array)[y] = temp;
+ if (ci_array[y].index > ci_array[y + 1].index) {
+ temp = ci_array[y + 1];
+ ci_array[y + 1] = ci_array[y];
+ ci_array[y] = temp;
}
}
/**
* build_channel_array() - function to figure out what channels are present
* @device_dir: the IIO device directory in sysfs
- * @
+ * @ci_array: output the resulting array of iio_channel_info
+ * @counter: output the amount of array elements
+ *
+ * Returns 0 on success, otherwise a negative error code.
**/
int build_channel_array(const char *device_dir,
- struct iio_channel_info **ci_array,
- int *counter)
+ struct iio_channel_info **ci_array, int *counter)
{
DIR *dp;
FILE *sysfsfp;
- int count, i;
+ int count = 0, i;
struct iio_channel_info *current;
int ret;
const struct dirent *ent;
@@ -279,16 +322,16 @@ int build_channel_array(const char *device_dir,
*counter = 0;
ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir);
- if (ret < 0) {
- ret = -ENOMEM;
- goto error_ret;
- }
+ if (ret < 0)
+ return -ENOMEM;
+
dp = opendir(scan_el_dir);
- if (dp == NULL) {
+ if (!dp) {
ret = -errno;
goto error_free_name;
}
- while (ent = readdir(dp), ent != NULL)
+
+ while (ent = readdir(dp), ent)
if (strcmp(ent->d_name + strlen(ent->d_name) - strlen("_en"),
"_en") == 0) {
ret = asprintf(&filename,
@@ -297,26 +340,43 @@ int build_channel_array(const char *device_dir,
ret = -ENOMEM;
goto error_close_dir;
}
+
sysfsfp = fopen(filename, "r");
- if (sysfsfp == NULL) {
+ if (!sysfsfp) {
ret = -errno;
free(filename);
goto error_close_dir;
}
- fscanf(sysfsfp, "%i", &ret);
+
+ errno = 0;
+ if (fscanf(sysfsfp, "%i", &ret) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ if (fclose(sysfsfp))
+ perror("build_channel_array(): Failed to close file");
+
+ free(filename);
+ goto error_close_dir;
+ }
if (ret == 1)
(*counter)++;
- fclose(sysfsfp);
+
+ if (fclose(sysfsfp)) {
+ ret = -errno;
+ free(filename);
+ goto error_close_dir;
+ }
+
free(filename);
}
+
*ci_array = malloc(sizeof(**ci_array) * (*counter));
- if (*ci_array == NULL) {
+ if (!*ci_array) {
ret = -ENOMEM;
goto error_close_dir;
}
+
seekdir(dp, 0);
- count = 0;
- while (ent = readdir(dp), ent != NULL) {
+ while (ent = readdir(dp), ent) {
if (strcmp(ent->d_name + strlen(ent->d_name) - strlen("_en"),
"_en") == 0) {
int current_enabled = 0;
@@ -330,14 +390,29 @@ int build_channel_array(const char *device_dir,
count--;
goto error_cleanup_array;
}
+
sysfsfp = fopen(filename, "r");
- if (sysfsfp == NULL) {
+ if (!sysfsfp) {
+ ret = -errno;
free(filename);
+ count--;
+ goto error_cleanup_array;
+ }
+
+ errno = 0;
+ if (fscanf(sysfsfp, "%i", &current_enabled) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ free(filename);
+ count--;
+ goto error_cleanup_array;
+ }
+
+ if (fclose(sysfsfp)) {
ret = -errno;
+ free(filename);
+ count--;
goto error_cleanup_array;
}
- fscanf(sysfsfp, "%i", &current_enabled);
- fclose(sysfsfp);
if (!current_enabled) {
free(filename);
@@ -350,18 +425,23 @@ int build_channel_array(const char *device_dir,
current->name = strndup(ent->d_name,
strlen(ent->d_name) -
strlen("_en"));
- if (current->name == NULL) {
+ if (!current->name) {
free(filename);
ret = -ENOMEM;
+ count--;
goto error_cleanup_array;
}
+
/* Get the generic and specific name elements */
ret = iioutils_break_up_name(current->name,
&current->generic_name);
if (ret) {
free(filename);
+ free(current->name);
+ count--;
goto error_cleanup_array;
}
+
ret = asprintf(&filename,
"%s/%s_index",
scan_el_dir,
@@ -371,9 +451,32 @@ int build_channel_array(const char *device_dir,
ret = -ENOMEM;
goto error_cleanup_array;
}
+
sysfsfp = fopen(filename, "r");
- fscanf(sysfsfp, "%u", &current->index);
- fclose(sysfsfp);
+ if (!sysfsfp) {
+ ret = -errno;
+ fprintf(stderr, "failed to open %s\n",
+ filename);
+ free(filename);
+ goto error_cleanup_array;
+ }
+
+ errno = 0;
+ if (fscanf(sysfsfp, "%u", &current->index) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ if (fclose(sysfsfp))
+ perror("build_channel_array(): Failed to close file");
+
+ free(filename);
+ goto error_cleanup_array;
+ }
+
+ if (fclose(sysfsfp)) {
+ ret = -errno;
+ free(filename);
+ goto error_cleanup_array;
+ }
+
free(filename);
/* Find the scale */
ret = iioutils_get_param_float(&current->scale,
@@ -381,15 +484,17 @@ int build_channel_array(const char *device_dir,
device_dir,
current->name,
current->generic_name);
- if (ret < 0)
+ if ((ret < 0) && (ret != -ENOENT))
goto error_cleanup_array;
+
ret = iioutils_get_param_float(&current->offset,
"offset",
device_dir,
current->name,
current->generic_name);
- if (ret < 0)
+ if ((ret < 0) && (ret != -ENOENT))
goto error_cleanup_array;
+
ret = iioutils_get_type(&current->is_signed,
&current->bytes,
&current->bits_used,
@@ -399,182 +504,317 @@ int build_channel_array(const char *device_dir,
device_dir,
current->name,
current->generic_name);
+ if (ret < 0)
+ goto error_cleanup_array;
}
}
- closedir(dp);
+ if (closedir(dp) == -1) {
+ ret = -errno;
+ goto error_cleanup_array;
+ }
+
+ free(scan_el_dir);
/* reorder so that the array is in index order */
- bsort_channel_array_by_index(ci_array, *counter);
+ bsort_channel_array_by_index(*ci_array, *counter);
return 0;
error_cleanup_array:
- for (i = count - 1; i >= 0; i--)
+ for (i = count - 1; i >= 0; i--) {
free((*ci_array)[i].name);
+ free((*ci_array)[i].generic_name);
+ }
free(*ci_array);
+ *ci_array = NULL;
+ *counter = 0;
error_close_dir:
- closedir(dp);
+ if (dp)
+ if (closedir(dp) == -1)
+ perror("build_channel_array(): Failed to close dir");
+
error_free_name:
free(scan_el_dir);
-error_ret:
+
return ret;
}
+static int calc_digits(int num)
+{
+ int count = 0;
+
+ while (num != 0) {
+ num /= 10;
+ count++;
+ }
+
+ return count;
+}
+
/**
* find_type_by_name() - function to match top level types by name
* @name: top level type instance name
- * @type: the type of top level instance being sort
+ * @type: the type of top level instance being searched
*
+ * Returns the device number of a matched IIO device on success, otherwise a
+ * negative error code.
* Typical types this is used for are device and trigger.
**/
int find_type_by_name(const char *name, const char *type)
{
const struct dirent *ent;
- int number, numstrlen;
+ int number, numstrlen, ret;
- FILE *nameFile;
+ FILE *namefp;
DIR *dp;
char thisname[IIO_MAX_NAME_LENGTH];
char *filename;
dp = opendir(iio_dir);
- if (dp == NULL) {
- printf("No industrialio devices available\n");
+ if (!dp) {
+ fprintf(stderr, "No industrialio devices available\n");
return -ENODEV;
}
- while (ent = readdir(dp), ent != NULL) {
+ while (ent = readdir(dp), ent) {
if (strcmp(ent->d_name, ".") != 0 &&
- strcmp(ent->d_name, "..") != 0 &&
- strlen(ent->d_name) > strlen(type) &&
- strncmp(ent->d_name, type, strlen(type)) == 0) {
- numstrlen = sscanf(ent->d_name + strlen(type),
- "%d",
- &number);
+ strcmp(ent->d_name, "..") != 0 &&
+ strlen(ent->d_name) > strlen(type) &&
+ strncmp(ent->d_name, type, strlen(type)) == 0) {
+ errno = 0;
+ ret = sscanf(ent->d_name + strlen(type), "%d", &number);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "failed to read element number\n");
+ goto error_close_dir;
+ } else if (ret != 1) {
+ ret = -EIO;
+ fprintf(stderr,
+ "failed to match element number\n");
+ goto error_close_dir;
+ }
+
+ numstrlen = calc_digits(number);
/* verify the next character is not a colon */
if (strncmp(ent->d_name + strlen(type) + numstrlen,
- ":",
- 1) != 0) {
- filename = malloc(strlen(iio_dir)
- + strlen(type)
- + numstrlen
- + 6);
- if (filename == NULL) {
- closedir(dp);
- return -ENOMEM;
+ ":", 1) != 0) {
+ filename = malloc(strlen(iio_dir) + strlen(type)
+ + numstrlen + 6);
+ if (!filename) {
+ ret = -ENOMEM;
+ goto error_close_dir;
}
- sprintf(filename, "%s%s%d/name",
- iio_dir,
- type,
- number);
- nameFile = fopen(filename, "r");
- if (!nameFile) {
+
+ ret = sprintf(filename, "%s%s%d/name", iio_dir,
+ type, number);
+ if (ret < 0) {
+ free(filename);
+ goto error_close_dir;
+ }
+
+ namefp = fopen(filename, "r");
+ if (!namefp) {
free(filename);
continue;
}
+
free(filename);
- fscanf(nameFile, "%s", thisname);
- fclose(nameFile);
+ errno = 0;
+ if (fscanf(namefp, "%s", thisname) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ goto error_close_dir;
+ }
+
+ if (fclose(namefp)) {
+ ret = -errno;
+ goto error_close_dir;
+ }
+
if (strcmp(name, thisname) == 0) {
- closedir(dp);
+ if (closedir(dp) == -1)
+ return -errno;
+
return number;
}
}
}
}
- closedir(dp);
+ if (closedir(dp) == -1)
+ return -errno;
+
return -ENODEV;
+
+error_close_dir:
+ if (closedir(dp) == -1)
+ perror("find_type_by_name(): Failed to close directory");
+
+ return ret;
}
-int _write_sysfs_int(char *filename, char *basedir, int val, int verify)
+static int _write_sysfs_int(const char *filename, const char *basedir, int val,
+ int verify)
{
int ret = 0;
FILE *sysfsfp;
int test;
char *temp = malloc(strlen(basedir) + strlen(filename) + 2);
- if (temp == NULL)
+ if (!temp)
return -ENOMEM;
- sprintf(temp, "%s/%s", basedir, filename);
+
+ ret = sprintf(temp, "%s/%s", basedir, filename);
+ if (ret < 0)
+ goto error_free;
+
sysfsfp = fopen(temp, "w");
- if (sysfsfp == NULL) {
- printf("failed to open %s\n", temp);
+ if (!sysfsfp) {
ret = -errno;
+ fprintf(stderr, "failed to open %s\n", temp);
+ goto error_free;
+ }
+
+ ret = fprintf(sysfsfp, "%d", val);
+ if (ret < 0) {
+ if (fclose(sysfsfp))
+ perror("_write_sysfs_int(): Failed to close dir");
+
goto error_free;
}
- fprintf(sysfsfp, "%d", val);
- fclose(sysfsfp);
+
+ if (fclose(sysfsfp)) {
+ ret = -errno;
+ goto error_free;
+ }
+
if (verify) {
sysfsfp = fopen(temp, "r");
- if (sysfsfp == NULL) {
- printf("failed to open %s\n", temp);
+ if (!sysfsfp) {
+ ret = -errno;
+ fprintf(stderr, "failed to open %s\n", temp);
+ goto error_free;
+ }
+
+ if (fscanf(sysfsfp, "%d", &test) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ if (fclose(sysfsfp))
+ perror("_write_sysfs_int(): Failed to close dir");
+
+ goto error_free;
+ }
+
+ if (fclose(sysfsfp)) {
ret = -errno;
goto error_free;
}
- fscanf(sysfsfp, "%d", &test);
- fclose(sysfsfp);
+
if (test != val) {
- printf("Possible failure in int write %d to %s%s\n",
- val,
- basedir,
- filename);
+ fprintf(stderr,
+ "Possible failure in int write %d to %s/%s\n",
+ val, basedir, filename);
ret = -1;
}
}
+
error_free:
free(temp);
return ret;
}
-int write_sysfs_int(char *filename, char *basedir, int val)
+/**
+ * write_sysfs_int() - write an integer value to a sysfs file
+ * @filename: name of the file to write to
+ * @basedir: the sysfs directory in which the file is to be found
+ * @val: integer value to write to file
+ *
+ * Returns a value >= 0 on success, otherwise a negative error code.
+ **/
+int write_sysfs_int(const char *filename, const char *basedir, int val)
{
return _write_sysfs_int(filename, basedir, val, 0);
}
-int write_sysfs_int_and_verify(char *filename, char *basedir, int val)
+/**
+ * write_sysfs_int_and_verify() - write an integer value to a sysfs file
+ * and verify
+ * @filename: name of the file to write to
+ * @basedir: the sysfs directory in which the file is to be found
+ * @val: integer value to write to file
+ *
+ * Returns a value >= 0 on success, otherwise a negative error code.
+ **/
+int write_sysfs_int_and_verify(const char *filename, const char *basedir,
+ int val)
{
return _write_sysfs_int(filename, basedir, val, 1);
}
-int _write_sysfs_string(char *filename, char *basedir, char *val, int verify)
+static int _write_sysfs_string(const char *filename, const char *basedir,
+ const char *val, int verify)
{
int ret = 0;
FILE *sysfsfp;
char *temp = malloc(strlen(basedir) + strlen(filename) + 2);
- if (temp == NULL) {
- printf("Memory allocation failed\n");
+ if (!temp) {
+ fprintf(stderr, "Memory allocation failed\n");
return -ENOMEM;
}
- sprintf(temp, "%s/%s", basedir, filename);
+
+ ret = sprintf(temp, "%s/%s", basedir, filename);
+ if (ret < 0)
+ goto error_free;
+
sysfsfp = fopen(temp, "w");
- if (sysfsfp == NULL) {
- printf("Could not open %s\n", temp);
+ if (!sysfsfp) {
+ ret = -errno;
+ fprintf(stderr, "Could not open %s\n", temp);
+ goto error_free;
+ }
+
+ ret = fprintf(sysfsfp, "%s", val);
+ if (ret < 0) {
+ if (fclose(sysfsfp))
+ perror("_write_sysfs_string(): Failed to close dir");
+
+ goto error_free;
+ }
+
+ if (fclose(sysfsfp)) {
ret = -errno;
goto error_free;
}
- fprintf(sysfsfp, "%s", val);
- fclose(sysfsfp);
+
if (verify) {
sysfsfp = fopen(temp, "r");
- if (sysfsfp == NULL) {
- printf("could not open file to verify\n");
+ if (!sysfsfp) {
ret = -errno;
+ fprintf(stderr, "Could not open file to verify\n");
goto error_free;
}
- fscanf(sysfsfp, "%s", temp);
- fclose(sysfsfp);
+
+ if (fscanf(sysfsfp, "%s", temp) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ if (fclose(sysfsfp))
+ perror("_write_sysfs_string(): Failed to close dir");
+
+ goto error_free;
+ }
+
+ if (fclose(sysfsfp)) {
+ ret = -errno;
+ goto error_free;
+ }
+
if (strcmp(temp, val) != 0) {
- printf("Possible failure in string write of %s "
- "Should be %s "
- "written to %s\%s\n",
- temp,
- val,
- basedir,
- filename);
+ fprintf(stderr,
+ "Possible failure in string write of %s "
+ "Should be %s written to %s/%s\n", temp, val,
+ basedir, filename);
ret = -1;
}
}
+
error_free:
free(temp);
@@ -586,84 +826,166 @@ error_free:
* @filename: name of file to write to
* @basedir: the sysfs directory in which the file is to be found
* @val: the string to write
+ *
+ * Returns a value >= 0 on success, otherwise a negative error code.
**/
-int write_sysfs_string_and_verify(char *filename, char *basedir, char *val)
+int write_sysfs_string_and_verify(const char *filename, const char *basedir,
+ const char *val)
{
return _write_sysfs_string(filename, basedir, val, 1);
}
-int write_sysfs_string(char *filename, char *basedir, char *val)
+/**
+ * write_sysfs_string() - write string to a sysfs file
+ * @filename: name of file to write to
+ * @basedir: the sysfs directory in which the file is to be found
+ * @val: the string to write
+ *
+ * Returns a value >= 0 on success, otherwise a negative error code.
+ **/
+int write_sysfs_string(const char *filename, const char *basedir,
+ const char *val)
{
return _write_sysfs_string(filename, basedir, val, 0);
}
-int read_sysfs_posint(char *filename, char *basedir)
+/**
+ * read_sysfs_posint() - read an integer value from file
+ * @filename: name of file to read from
+ * @basedir: the sysfs directory in which the file is to be found
+ *
+ * Returns the read integer value >= 0 on success, otherwise a negative error
+ * code.
+ **/
+int read_sysfs_posint(const char *filename, const char *basedir)
{
int ret;
FILE *sysfsfp;
char *temp = malloc(strlen(basedir) + strlen(filename) + 2);
- if (temp == NULL) {
- printf("Memory allocation failed");
+ if (!temp) {
+ fprintf(stderr, "Memory allocation failed");
return -ENOMEM;
}
- sprintf(temp, "%s/%s", basedir, filename);
+
+ ret = sprintf(temp, "%s/%s", basedir, filename);
+ if (ret < 0)
+ goto error_free;
+
sysfsfp = fopen(temp, "r");
- if (sysfsfp == NULL) {
+ if (!sysfsfp) {
ret = -errno;
goto error_free;
}
- fscanf(sysfsfp, "%d\n", &ret);
- fclose(sysfsfp);
+
+ errno = 0;
+ if (fscanf(sysfsfp, "%d\n", &ret) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ if (fclose(sysfsfp))
+ perror("read_sysfs_posint(): Failed to close dir");
+
+ goto error_free;
+ }
+
+ if (fclose(sysfsfp))
+ ret = -errno;
+
error_free:
free(temp);
+
return ret;
}
-int read_sysfs_float(char *filename, char *basedir, float *val)
+/**
+ * read_sysfs_float() - read a float value from file
+ * @filename: name of file to read from
+ * @basedir: the sysfs directory in which the file is to be found
+ * @val: output the read float value
+ *
+ * Returns a value >= 0 on success, otherwise a negative error code.
+ **/
+int read_sysfs_float(const char *filename, const char *basedir, float *val)
{
int ret = 0;
FILE *sysfsfp;
char *temp = malloc(strlen(basedir) + strlen(filename) + 2);
- if (temp == NULL) {
- printf("Memory allocation failed");
+ if (!temp) {
+ fprintf(stderr, "Memory allocation failed");
return -ENOMEM;
}
- sprintf(temp, "%s/%s", basedir, filename);
+
+ ret = sprintf(temp, "%s/%s", basedir, filename);
+ if (ret < 0)
+ goto error_free;
+
sysfsfp = fopen(temp, "r");
- if (sysfsfp == NULL) {
+ if (!sysfsfp) {
ret = -errno;
goto error_free;
}
- fscanf(sysfsfp, "%f\n", val);
- fclose(sysfsfp);
+
+ errno = 0;
+ if (fscanf(sysfsfp, "%f\n", val) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ if (fclose(sysfsfp))
+ perror("read_sysfs_float(): Failed to close dir");
+
+ goto error_free;
+ }
+
+ if (fclose(sysfsfp))
+ ret = -errno;
+
error_free:
free(temp);
+
return ret;
}
+/**
+ * read_sysfs_string() - read a string from file
+ * @filename: name of file to read from
+ * @basedir: the sysfs directory in which the file is to be found
+ * @str: output the read string
+ *
+ * Returns a value >= 0 on success, otherwise a negative error code.
+ **/
int read_sysfs_string(const char *filename, const char *basedir, char *str)
{
int ret = 0;
FILE *sysfsfp;
char *temp = malloc(strlen(basedir) + strlen(filename) + 2);
- if (temp == NULL) {
- printf("Memory allocation failed");
+ if (!temp) {
+ fprintf(stderr, "Memory allocation failed");
return -ENOMEM;
}
- sprintf(temp, "%s/%s", basedir, filename);
+
+ ret = sprintf(temp, "%s/%s", basedir, filename);
+ if (ret < 0)
+ goto error_free;
+
sysfsfp = fopen(temp, "r");
- if (sysfsfp == NULL) {
+ if (!sysfsfp) {
ret = -errno;
goto error_free;
}
- fscanf(sysfsfp, "%s\n", str);
- fclose(sysfsfp);
+
+ errno = 0;
+ if (fscanf(sysfsfp, "%s\n", str) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ if (fclose(sysfsfp))
+ perror("read_sysfs_string(): Failed to close dir");
+
+ goto error_free;
+ }
+
+ if (fclose(sysfsfp))
+ ret = -errno;
+
error_free:
free(temp);
+
return ret;
}
-
-#endif /* _IIO_UTILS_H */
diff --git a/tools/iio/iio_utils.h b/tools/iio/iio_utils.h
index 1bc837b..e3503bf 100644
--- a/tools/iio/iio_utils.h
+++ b/tools/iio/iio_utils.h
@@ -18,6 +18,8 @@
#define FORMAT_SCAN_ELEMENTS_DIR "%s/scan_elements"
#define FORMAT_TYPE_FILE "%s_type"
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
+
extern const char *iio_dir;
/**
@@ -28,9 +30,12 @@ extern const char *iio_dir;
* @offset: offset to be applied for conversion to si units
* @index: the channel index in the buffer output
* @bytes: number of bytes occupied in buffer output
+ * @bits_used: number of valid bits of data
+ * @shift: amount of bits to shift right data before applying bit mask
* @mask: a bit mask for the raw output
+ * @be: flag if data is big endian
* @is_signed: is the raw value stored signed
- * @enabled: is this channel enabled
+ * @location: data offset for this channel inside the buffer (in bytes)
**/
struct iio_channel_info {
char *name;
@@ -48,24 +53,26 @@ struct iio_channel_info {
};
int iioutils_break_up_name(const char *full_name, char **generic_name);
-int iioutils_get_type(unsigned *is_signed, unsigned *bytes,
- unsigned *bits_used, unsigned *shift,
- uint64_t *mask, unsigned *be,
- const char *device_dir, const char *name,
- const char *generic_name);
+int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used,
+ unsigned *shift, uint64_t *mask, unsigned *be,
+ const char *device_dir, const char *name,
+ const char *generic_name);
int iioutils_get_param_float(float *output, const char *param_name,
- const char *device_dir, const char *name,
- const char *generic_name);
-void bsort_channel_array_by_index(struct iio_channel_info **ci_array, int cnt);
+ const char *device_dir, const char *name,
+ const char *generic_name);
+void bsort_channel_array_by_index(struct iio_channel_info *ci_array, int cnt);
int build_channel_array(const char *device_dir,
- struct iio_channel_info **ci_array, int *counter);
+ struct iio_channel_info **ci_array, int *counter);
int find_type_by_name(const char *name, const char *type);
-int write_sysfs_int(char *filename, char *basedir, int val);
-int write_sysfs_int_and_verify(char *filename, char *basedir, int val);
-int write_sysfs_string_and_verify(char *filename, char *basedir, char *val);
-int write_sysfs_string(char *filename, char *basedir, char *val);
-int read_sysfs_posint(char *filename, char *basedir);
-int read_sysfs_float(char *filename, char *basedir, float *val);
+int write_sysfs_int(const char *filename, const char *basedir, int val);
+int write_sysfs_int_and_verify(const char *filename, const char *basedir,
+ int val);
+int write_sysfs_string_and_verify(const char *filename, const char *basedir,
+ const char *val);
+int write_sysfs_string(const char *filename, const char *basedir,
+ const char *val);
+int read_sysfs_posint(const char *filename, const char *basedir);
+int read_sysfs_float(const char *filename, const char *basedir, float *val);
int read_sysfs_string(const char *filename, const char *basedir, char *str);
#endif /* _IIO_UTILS_H_ */
diff --git a/tools/iio/lsiio.c b/tools/iio/lsiio.c
index c585440..3d650e6 100644
--- a/tools/iio/lsiio.c
+++ b/tools/iio/lsiio.c
@@ -20,7 +20,6 @@
#include <sys/dir.h>
#include "iio_utils.h"
-
static enum verbosity {
VERBLEVEL_DEFAULT, /* 0 gives lspci behaviour */
VERBLEVEL_SENSORS, /* 1 lists sensors */
@@ -29,17 +28,16 @@ static enum verbosity {
const char *type_device = "iio:device";
const char *type_trigger = "trigger";
-
static inline int check_prefix(const char *str, const char *prefix)
{
return strlen(str) > strlen(prefix) &&
- strncmp(str, prefix, strlen(prefix)) == 0;
+ strncmp(str, prefix, strlen(prefix)) == 0;
}
static inline int check_postfix(const char *str, const char *postfix)
{
return strlen(str) > strlen(postfix) &&
- strcmp(str + strlen(str) - strlen(postfix), postfix) == 0;
+ strcmp(str + strlen(str) - strlen(postfix), postfix) == 0;
}
static int dump_channels(const char *dev_dir_name)
@@ -48,32 +46,37 @@ static int dump_channels(const char *dev_dir_name)
const struct dirent *ent;
dp = opendir(dev_dir_name);
- if (dp == NULL)
+ if (!dp)
return -errno;
- while (ent = readdir(dp), ent != NULL)
+
+ while (ent = readdir(dp), ent)
if (check_prefix(ent->d_name, "in_") &&
- check_postfix(ent->d_name, "_raw")) {
+ check_postfix(ent->d_name, "_raw"))
printf(" %-10s\n", ent->d_name);
- }
- return 0;
+ return (closedir(dp) == -1) ? -errno : 0;
}
static int dump_one_device(const char *dev_dir_name)
{
char name[IIO_MAX_NAME_LENGTH];
int dev_idx;
- int retval;
+ int ret;
- retval = sscanf(dev_dir_name + strlen(iio_dir) + strlen(type_device),
- "%i", &dev_idx);
- if (retval != 1)
+ ret = sscanf(dev_dir_name + strlen(iio_dir) + strlen(type_device), "%i",
+ &dev_idx);
+ if (ret != 1)
return -EINVAL;
- read_sysfs_string("name", dev_dir_name, name);
+
+ ret = read_sysfs_string("name", dev_dir_name, name);
+ if (ret < 0)
+ return ret;
+
printf("Device %03d: %s\n", dev_idx, name);
if (verblevel >= VERBLEVEL_SENSORS)
return dump_channels(dev_dir_name);
+
return 0;
}
@@ -81,57 +84,90 @@ static int dump_one_trigger(const char *dev_dir_name)
{
char name[IIO_MAX_NAME_LENGTH];
int dev_idx;
- int retval;
+ int ret;
- retval = sscanf(dev_dir_name + strlen(iio_dir) + strlen(type_trigger),
- "%i", &dev_idx);
- if (retval != 1)
+ ret = sscanf(dev_dir_name + strlen(iio_dir) + strlen(type_trigger),
+ "%i", &dev_idx);
+ if (ret != 1)
return -EINVAL;
- read_sysfs_string("name", dev_dir_name, name);
+
+ ret = read_sysfs_string("name", dev_dir_name, name);
+ if (ret < 0)
+ return ret;
+
printf("Trigger %03d: %s\n", dev_idx, name);
+
return 0;
}
-static void dump_devices(void)
+static int dump_devices(void)
{
const struct dirent *ent;
+ int ret;
DIR *dp;
dp = opendir(iio_dir);
- if (dp == NULL) {
- printf("No industrial I/O devices available\n");
- return;
+ if (!dp) {
+ fprintf(stderr, "No industrial I/O devices available\n");
+ return -ENODEV;
}
- while (ent = readdir(dp), ent != NULL) {
+ while (ent = readdir(dp), ent) {
if (check_prefix(ent->d_name, type_device)) {
char *dev_dir_name;
- asprintf(&dev_dir_name, "%s%s", iio_dir, ent->d_name);
- dump_one_device(dev_dir_name);
+ if (asprintf(&dev_dir_name, "%s%s", iio_dir,
+ ent->d_name) < 0) {
+ ret = -ENOMEM;
+ goto error_close_dir;
+ }
+
+ ret = dump_one_device(dev_dir_name);
+ if (ret) {
+ free(dev_dir_name);
+ goto error_close_dir;
+ }
+
free(dev_dir_name);
if (verblevel >= VERBLEVEL_SENSORS)
printf("\n");
}
}
rewinddir(dp);
- while (ent = readdir(dp), ent != NULL) {
+ while (ent = readdir(dp), ent) {
if (check_prefix(ent->d_name, type_trigger)) {
char *dev_dir_name;
- asprintf(&dev_dir_name, "%s%s", iio_dir, ent->d_name);
- dump_one_trigger(dev_dir_name);
+ if (asprintf(&dev_dir_name, "%s%s", iio_dir,
+ ent->d_name) < 0) {
+ ret = -ENOMEM;
+ goto error_close_dir;
+ }
+
+ ret = dump_one_trigger(dev_dir_name);
+ if (ret) {
+ free(dev_dir_name);
+ goto error_close_dir;
+ }
+
free(dev_dir_name);
}
}
- closedir(dp);
+
+ return (closedir(dp) == -1) ? -errno : 0;
+
+error_close_dir:
+ if (closedir(dp) == -1)
+ perror("dump_devices(): Failed to close directory");
+
+ return ret;
}
int main(int argc, char **argv)
{
int c, err = 0;
- while ((c = getopt(argc, argv, "d:D:v")) != EOF) {
+ while ((c = getopt(argc, argv, "v")) != EOF) {
switch (c) {
case 'v':
verblevel++;
@@ -146,13 +182,9 @@ int main(int argc, char **argv)
if (err || argc > optind) {
fprintf(stderr, "Usage: lsiio [options]...\n"
"List industrial I/O devices\n"
- " -v, --verbose\n"
- " Increase verbosity (may be given multiple times)\n"
- );
+ " -v Increase verbosity (may be given multiple times)\n");
exit(1);
}
- dump_devices();
-
- return 0;
+ return dump_devices();
}
diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h
new file mode 100644
index 0000000..2ba78c9
--- /dev/null
+++ b/tools/include/asm-generic/atomic-gcc.h
@@ -0,0 +1,63 @@
+#ifndef __TOOLS_ASM_GENERIC_ATOMIC_H
+#define __TOOLS_ASM_GENERIC_ATOMIC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+/*
+ * Atomic operations that C can't guarantee us. Useful for
+ * resource counting etc..
+ *
+ * Excerpts obtained from the Linux kernel sources.
+ */
+
+#define ATOMIC_INIT(i) { (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v.
+ */
+static inline int atomic_read(const atomic_t *v)
+{
+ return ACCESS_ONCE((v)->counter);
+}
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ */
+static inline void atomic_set(atomic_t *v, int i)
+{
+ v->counter = i;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1.
+ */
+static inline void atomic_inc(atomic_t *v)
+{
+ __sync_add_and_fetch(&v->counter, 1);
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline int atomic_dec_and_test(atomic_t *v)
+{
+ return __sync_sub_and_fetch(&v->counter, 1) == 0;
+}
+
+#endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */
diff --git a/tools/include/asm-generic/barrier.h b/tools/include/asm-generic/barrier.h
new file mode 100644
index 0000000..47b9339
--- /dev/null
+++ b/tools/include/asm-generic/barrier.h
@@ -0,0 +1,44 @@
+/*
+ * Copied from the kernel sources to tools/perf/:
+ *
+ * Generic barrier definitions, originally based on MN10300 definitions.
+ *
+ * It should be possible to use these on really simple architectures,
+ * but it serves more as a starting point for new ports.
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef __TOOLS_LINUX_ASM_GENERIC_BARRIER_H
+#define __TOOLS_LINUX_ASM_GENERIC_BARRIER_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+
+/*
+ * Force strict CPU ordering. And yes, this is required on UP too when we're
+ * talking to devices.
+ *
+ * Fall back to compiler barriers if nothing better is provided.
+ */
+
+#ifndef mb
+#define mb() barrier()
+#endif
+
+#ifndef rmb
+#define rmb() mb()
+#endif
+
+#ifndef wmb
+#define wmb() mb()
+#endif
+
+#endif /* !__ASSEMBLY__ */
+#endif /* __TOOLS_LINUX_ASM_GENERIC_BARRIER_H */
diff --git a/tools/include/asm/atomic.h b/tools/include/asm/atomic.h
new file mode 100644
index 0000000..70794f5
--- /dev/null
+++ b/tools/include/asm/atomic.h
@@ -0,0 +1,10 @@
+#ifndef __TOOLS_LINUX_ASM_ATOMIC_H
+#define __TOOLS_LINUX_ASM_ATOMIC_H
+
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../arch/x86/include/asm/atomic.h"
+#else
+#include <asm-generic/atomic-gcc.h>
+#endif
+
+#endif /* __TOOLS_LINUX_ASM_ATOMIC_H */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
new file mode 100644
index 0000000..ac66ac5
--- /dev/null
+++ b/tools/include/asm/barrier.h
@@ -0,0 +1,27 @@
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../arch/x86/include/asm/barrier.h"
+#elif defined(__arm__)
+#include "../../arch/arm/include/asm/barrier.h"
+#elif defined(__aarch64__)
+#include "../../arch/arm64/include/asm/barrier.h"
+#elif defined(__powerpc__)
+#include "../../arch/powerpc/include/asm/barrier.h"
+#elif defined(__s390__)
+#include "../../arch/s390/include/asm/barrier.h"
+#elif defined(__sh__)
+#include "../../arch/sh/include/asm/barrier.h"
+#elif defined(__sparc__)
+#include "../../arch/sparc/include/asm/barrier.h"
+#elif defined(__tile__)
+#include "../../arch/tile/include/asm/barrier.h"
+#elif defined(__alpha__)
+#include "../../arch/alpha/include/asm/barrier.h"
+#elif defined(__mips__)
+#include "../../arch/mips/include/asm/barrier.h"
+#elif defined(__ia64__)
+#include "../../arch/ia64/include/asm/barrier.h"
+#elif defined(__xtensa__)
+#include "../../arch/xtensa/include/asm/barrier.h"
+#else
+#include <asm-generic/barrier.h>
+#endif
diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h
new file mode 100644
index 0000000..4e3d3d1
--- /dev/null
+++ b/tools/include/linux/atomic.h
@@ -0,0 +1,6 @@
+#ifndef __TOOLS_LINUX_ATOMIC_H
+#define __TOOLS_LINUX_ATOMIC_H
+
+#include <asm/atomic.h>
+
+#endif /* __TOOLS_LINUX_ATOMIC_H */
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 88461f0..9098083 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -1,6 +1,10 @@
#ifndef _TOOLS_LINUX_COMPILER_H_
#define _TOOLS_LINUX_COMPILER_H_
+/* Optimization barrier */
+/* The "volatile" is due to gcc bugs */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
#ifndef __always_inline
# define __always_inline inline __attribute__((always_inline))
#endif
@@ -37,4 +41,62 @@
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+#include <linux/types.h>
+
+static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
+ case 2: *(__u16 *)res = *(volatile __u16 *)p; break;
+ case 4: *(__u32 *)res = *(volatile __u32 *)p; break;
+ case 8: *(__u64 *)res = *(volatile __u64 *)p; break;
+ default:
+ barrier();
+ __builtin_memcpy((void *)res, (const void *)p, size);
+ barrier();
+ }
+}
+
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
+ case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
+ case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
+ case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
+ default:
+ barrier();
+ __builtin_memcpy((void *)p, (const void *)res, size);
+ barrier();
+ }
+}
+
+/*
+ * Prevent the compiler from merging or refetching reads or writes. The
+ * compiler is also forbidden from reordering successive instances of
+ * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the
+ * compiler is aware of some particular ordering. One way to make the
+ * compiler aware of ordering is to put the two invocations of READ_ONCE,
+ * WRITE_ONCE or ACCESS_ONCE() in different C statements.
+ *
+ * In contrast to ACCESS_ONCE these two macros will also work on aggregate
+ * data types like structs or unions. If the size of the accessed data
+ * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
+ * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a
+ * compile-time warning.
+ *
+ * Their two major use cases are: (1) Mediating communication between
+ * process-level code and irq/NMI handlers, all running on the same CPU,
+ * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
+ * mutilate accesses that either do not require ordering or that interact
+ * with an explicit memory barrier or atomic instruction that provides the
+ * required ordering.
+ */
+
+#define READ_ONCE(x) \
+ ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+
+#define WRITE_ONCE(x, val) \
+ ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+
#endif /* _TOOLS_LINUX_COMPILER_H */
diff --git a/tools/perf/util/include/linux/kernel.h b/tools/include/linux/kernel.h
index 09e8e7a..76df535 100644
--- a/tools/perf/util/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -1,5 +1,5 @@
-#ifndef PERF_LINUX_KERNEL_H_
-#define PERF_LINUX_KERNEL_H_
+#ifndef __TOOLS_LINUX_KERNEL_H
+#define __TOOLS_LINUX_KERNEL_H
#include <stdarg.h>
#include <stdio.h>
diff --git a/tools/perf/util/include/linux/list.h b/tools/include/linux/list.h
index 76ddbc7..76b014c 100644
--- a/tools/perf/util/include/linux/list.h
+++ b/tools/include/linux/list.h
@@ -1,10 +1,10 @@
#include <linux/kernel.h>
#include <linux/types.h>
-#include "../../../../include/linux/list.h"
+#include "../../../include/linux/list.h"
-#ifndef PERF_LIST_H
-#define PERF_LIST_H
+#ifndef TOOLS_LIST_H
+#define TOOLS_LIST_H
/**
* list_del_range - deletes range of entries from list.
* @begin: first element in the range to delete from the list.
diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h
new file mode 100644
index 0000000..0c27bdf
--- /dev/null
+++ b/tools/include/linux/poison.h
@@ -0,0 +1 @@
+#include "../../../include/linux/poison.h"
diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h
new file mode 100644
index 0000000..1125822
--- /dev/null
+++ b/tools/include/linux/rbtree.h
@@ -0,0 +1,104 @@
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea@suse.de>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ linux/include/linux/rbtree.h
+
+ To use rbtrees you'll have to implement your own insert and search cores.
+ This will avoid us to use callbacks and to drop drammatically performances.
+ I know it's not the cleaner way, but in C (not in C++) to get
+ performances and genericity...
+
+ See Documentation/rbtree.txt for documentation and samples.
+*/
+
+#ifndef __TOOLS_LINUX_PERF_RBTREE_H
+#define __TOOLS_LINUX_PERF_RBTREE_H
+
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+
+struct rb_node {
+ unsigned long __rb_parent_color;
+ struct rb_node *rb_right;
+ struct rb_node *rb_left;
+} __attribute__((aligned(sizeof(long))));
+ /* The alignment might seem pointless, but allegedly CRIS needs it */
+
+struct rb_root {
+ struct rb_node *rb_node;
+};
+
+
+#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3))
+
+#define RB_ROOT (struct rb_root) { NULL, }
+#define rb_entry(ptr, type, member) container_of(ptr, type, member)
+
+#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
+
+/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */
+#define RB_EMPTY_NODE(node) \
+ ((node)->__rb_parent_color == (unsigned long)(node))
+#define RB_CLEAR_NODE(node) \
+ ((node)->__rb_parent_color = (unsigned long)(node))
+
+
+extern void rb_insert_color(struct rb_node *, struct rb_root *);
+extern void rb_erase(struct rb_node *, struct rb_root *);
+
+
+/* Find logical next and previous nodes in a tree */
+extern struct rb_node *rb_next(const struct rb_node *);
+extern struct rb_node *rb_prev(const struct rb_node *);
+extern struct rb_node *rb_first(const struct rb_root *);
+extern struct rb_node *rb_last(const struct rb_root *);
+
+/* Postorder iteration - always visit the parent after its children */
+extern struct rb_node *rb_first_postorder(const struct rb_root *);
+extern struct rb_node *rb_next_postorder(const struct rb_node *);
+
+/* Fast replacement of a single node without remove/rebalance/add/rebalance */
+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+ struct rb_root *root);
+
+static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
+ struct rb_node **rb_link)
+{
+ node->__rb_parent_color = (unsigned long)parent;
+ node->rb_left = node->rb_right = NULL;
+
+ *rb_link = node;
+}
+
+#define rb_entry_safe(ptr, type, member) \
+ ({ typeof(ptr) ____ptr = (ptr); \
+ ____ptr ? rb_entry(____ptr, type, member) : NULL; \
+ })
+
+
+/*
+ * Handy for checking that we are not deleting an entry that is
+ * already in a list, found in block/{blk-throttle,cfq-iosched}.c,
+ * probably should be moved to lib/rbtree.c...
+ */
+static inline void rb_erase_init(struct rb_node *n, struct rb_root *root)
+{
+ rb_erase(n, root);
+ RB_CLEAR_NODE(n);
+}
+#endif /* __TOOLS_LINUX_PERF_RBTREE_H */
diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h
new file mode 100644
index 0000000..43be941
--- /dev/null
+++ b/tools/include/linux/rbtree_augmented.h
@@ -0,0 +1,245 @@
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea@suse.de>
+ (C) 2002 David Woodhouse <dwmw2@infradead.org>
+ (C) 2012 Michel Lespinasse <walken@google.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ tools/linux/include/linux/rbtree_augmented.h
+
+ Copied from:
+ linux/include/linux/rbtree_augmented.h
+*/
+
+#ifndef _TOOLS_LINUX_RBTREE_AUGMENTED_H
+#define _TOOLS_LINUX_RBTREE_AUGMENTED_H
+
+#include <linux/compiler.h>
+#include <linux/rbtree.h>
+
+/*
+ * Please note - only struct rb_augment_callbacks and the prototypes for
+ * rb_insert_augmented() and rb_erase_augmented() are intended to be public.
+ * The rest are implementation details you are not expected to depend on.
+ *
+ * See Documentation/rbtree.txt for documentation and samples.
+ */
+
+struct rb_augment_callbacks {
+ void (*propagate)(struct rb_node *node, struct rb_node *stop);
+ void (*copy)(struct rb_node *old, struct rb_node *new);
+ void (*rotate)(struct rb_node *old, struct rb_node *new);
+};
+
+extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+/*
+ * Fixup the rbtree and update the augmented information when rebalancing.
+ *
+ * On insertion, the user must update the augmented information on the path
+ * leading to the inserted node, then call rb_link_node() as usual and
+ * rb_augment_inserted() instead of the usual rb_insert_color() call.
+ * If rb_augment_inserted() rebalances the rbtree, it will callback into
+ * a user provided function to update the augmented information on the
+ * affected subtrees.
+ */
+static inline void
+rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+ const struct rb_augment_callbacks *augment)
+{
+ __rb_insert_augmented(node, root, augment->rotate);
+}
+
+#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \
+ rbtype, rbaugmented, rbcompute) \
+static inline void \
+rbname ## _propagate(struct rb_node *rb, struct rb_node *stop) \
+{ \
+ while (rb != stop) { \
+ rbstruct *node = rb_entry(rb, rbstruct, rbfield); \
+ rbtype augmented = rbcompute(node); \
+ if (node->rbaugmented == augmented) \
+ break; \
+ node->rbaugmented = augmented; \
+ rb = rb_parent(&node->rbfield); \
+ } \
+} \
+static inline void \
+rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \
+{ \
+ rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \
+ rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \
+ new->rbaugmented = old->rbaugmented; \
+} \
+static void \
+rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \
+{ \
+ rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \
+ rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \
+ new->rbaugmented = old->rbaugmented; \
+ old->rbaugmented = rbcompute(old); \
+} \
+rbstatic const struct rb_augment_callbacks rbname = { \
+ rbname ## _propagate, rbname ## _copy, rbname ## _rotate \
+};
+
+
+#define RB_RED 0
+#define RB_BLACK 1
+
+#define __rb_parent(pc) ((struct rb_node *)(pc & ~3))
+
+#define __rb_color(pc) ((pc) & 1)
+#define __rb_is_black(pc) __rb_color(pc)
+#define __rb_is_red(pc) (!__rb_color(pc))
+#define rb_color(rb) __rb_color((rb)->__rb_parent_color)
+#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color)
+#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color)
+
+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
+{
+ rb->__rb_parent_color = rb_color(rb) | (unsigned long)p;
+}
+
+static inline void rb_set_parent_color(struct rb_node *rb,
+ struct rb_node *p, int color)
+{
+ rb->__rb_parent_color = (unsigned long)p | color;
+}
+
+static inline void
+__rb_change_child(struct rb_node *old, struct rb_node *new,
+ struct rb_node *parent, struct rb_root *root)
+{
+ if (parent) {
+ if (parent->rb_left == old)
+ parent->rb_left = new;
+ else
+ parent->rb_right = new;
+ } else
+ root->rb_node = new;
+}
+
+extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+
+static __always_inline struct rb_node *
+__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+ const struct rb_augment_callbacks *augment)
+{
+ struct rb_node *child = node->rb_right, *tmp = node->rb_left;
+ struct rb_node *parent, *rebalance;
+ unsigned long pc;
+
+ if (!tmp) {
+ /*
+ * Case 1: node to erase has no more than 1 child (easy!)
+ *
+ * Note that if there is one child it must be red due to 5)
+ * and node must be black due to 4). We adjust colors locally
+ * so as to bypass __rb_erase_color() later on.
+ */
+ pc = node->__rb_parent_color;
+ parent = __rb_parent(pc);
+ __rb_change_child(node, child, parent, root);
+ if (child) {
+ child->__rb_parent_color = pc;
+ rebalance = NULL;
+ } else
+ rebalance = __rb_is_black(pc) ? parent : NULL;
+ tmp = parent;
+ } else if (!child) {
+ /* Still case 1, but this time the child is node->rb_left */
+ tmp->__rb_parent_color = pc = node->__rb_parent_color;
+ parent = __rb_parent(pc);
+ __rb_change_child(node, tmp, parent, root);
+ rebalance = NULL;
+ tmp = parent;
+ } else {
+ struct rb_node *successor = child, *child2;
+ tmp = child->rb_left;
+ if (!tmp) {
+ /*
+ * Case 2: node's successor is its right child
+ *
+ * (n) (s)
+ * / \ / \
+ * (x) (s) -> (x) (c)
+ * \
+ * (c)
+ */
+ parent = successor;
+ child2 = successor->rb_right;
+ augment->copy(node, successor);
+ } else {
+ /*
+ * Case 3: node's successor is leftmost under
+ * node's right child subtree
+ *
+ * (n) (s)
+ * / \ / \
+ * (x) (y) -> (x) (y)
+ * / /
+ * (p) (p)
+ * / /
+ * (s) (c)
+ * \
+ * (c)
+ */
+ do {
+ parent = successor;
+ successor = tmp;
+ tmp = tmp->rb_left;
+ } while (tmp);
+ parent->rb_left = child2 = successor->rb_right;
+ successor->rb_right = child;
+ rb_set_parent(child, successor);
+ augment->copy(node, successor);
+ augment->propagate(parent, successor);
+ }
+
+ successor->rb_left = tmp = node->rb_left;
+ rb_set_parent(tmp, successor);
+
+ pc = node->__rb_parent_color;
+ tmp = __rb_parent(pc);
+ __rb_change_child(node, successor, tmp, root);
+ if (child2) {
+ successor->__rb_parent_color = pc;
+ rb_set_parent_color(child2, parent, RB_BLACK);
+ rebalance = NULL;
+ } else {
+ unsigned long pc2 = successor->__rb_parent_color;
+ successor->__rb_parent_color = pc;
+ rebalance = __rb_is_black(pc2) ? parent : NULL;
+ }
+ tmp = successor;
+ }
+
+ augment->propagate(tmp, NULL);
+ return rebalance;
+}
+
+static __always_inline void
+rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+ const struct rb_augment_callbacks *augment)
+{
+ struct rb_node *rebalance = __rb_erase_augmented(node, root, augment);
+ if (rebalance)
+ __rb_erase_color(rebalance, root, augment->rotate);
+}
+
+#endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */
diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h
index b5cf25e..8ebf627 100644
--- a/tools/include/linux/types.h
+++ b/tools/include/linux/types.h
@@ -60,6 +60,14 @@ typedef __u32 __bitwise __be32;
typedef __u64 __bitwise __le64;
typedef __u64 __bitwise __be64;
+typedef struct {
+ int counter;
+} atomic_t;
+
+#ifndef __aligned_u64
+# define __aligned_u64 __u64 __attribute__((aligned(8)))
+#endif
+
struct list_head {
struct list_head *next, *prev;
};
diff --git a/tools/laptop/freefall/Makefile b/tools/laptop/freefall/Makefile
new file mode 100644
index 0000000..48c6c93
--- /dev/null
+++ b/tools/laptop/freefall/Makefile
@@ -0,0 +1,17 @@
+PREFIX ?= /usr
+SBINDIR ?= sbin
+INSTALL ?= install
+CC = $(CROSS_COMPILE)gcc
+
+TARGET = freefall
+
+all: $(TARGET)
+
+%: %.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
+
+clean:
+ $(RM) $(TARGET)
+
+install: freefall
+ $(INSTALL) -D -m 755 $(TARGET) $(DESTDIR)$(PREFIX)/$(SBINDIR)/$(TARGET)
diff --git a/tools/laptop/freefall/freefall.c b/tools/laptop/freefall/freefall.c
new file mode 100644
index 0000000..5e44b20
--- /dev/null
+++ b/tools/laptop/freefall/freefall.c
@@ -0,0 +1,174 @@
+/* Disk protection for HP/DELL machines.
+ *
+ * Copyright 2008 Eric Piel
+ * Copyright 2009 Pavel Machek <pavel@ucw.cz>
+ * Copyright 2012 Sonal Santan
+ * Copyright 2014 Pali Rohár <pali.rohar@gmail.com>
+ *
+ * GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <sched.h>
+#include <syslog.h>
+
+static int noled;
+static char unload_heads_path[64];
+static char device_path[32];
+static const char app_name[] = "FREE FALL";
+
+static int set_unload_heads_path(char *device)
+{
+ if (strlen(device) <= 5 || strncmp(device, "/dev/", 5) != 0)
+ return -EINVAL;
+ strncpy(device_path, device, sizeof(device_path) - 1);
+
+ snprintf(unload_heads_path, sizeof(unload_heads_path) - 1,
+ "/sys/block/%s/device/unload_heads", device+5);
+ return 0;
+}
+
+static int valid_disk(void)
+{
+ int fd = open(unload_heads_path, O_RDONLY);
+
+ if (fd < 0) {
+ perror(unload_heads_path);
+ return 0;
+ }
+
+ close(fd);
+ return 1;
+}
+
+static void write_int(char *path, int i)
+{
+ char buf[1024];
+ int fd = open(path, O_RDWR);
+
+ if (fd < 0) {
+ perror("open");
+ exit(1);
+ }
+
+ sprintf(buf, "%d", i);
+
+ if (write(fd, buf, strlen(buf)) != strlen(buf)) {
+ perror("write");
+ exit(1);
+ }
+
+ close(fd);
+}
+
+static void set_led(int on)
+{
+ if (noled)
+ return;
+ write_int("/sys/class/leds/hp::hddprotect/brightness", on);
+}
+
+static void protect(int seconds)
+{
+ const char *str = (seconds == 0) ? "Unparked" : "Parked";
+
+ write_int(unload_heads_path, seconds*1000);
+ syslog(LOG_INFO, "%s %s disk head\n", str, device_path);
+}
+
+static int on_ac(void)
+{
+ /* /sys/class/power_supply/AC0/online */
+ return 1;
+}
+
+static int lid_open(void)
+{
+ /* /proc/acpi/button/lid/LID/state */
+ return 1;
+}
+
+static void ignore_me(int signum)
+{
+ protect(0);
+ set_led(0);
+}
+
+int main(int argc, char **argv)
+{
+ int fd, ret;
+ struct stat st;
+ struct sched_param param;
+
+ if (argc == 1)
+ ret = set_unload_heads_path("/dev/sda");
+ else if (argc == 2)
+ ret = set_unload_heads_path(argv[1]);
+ else
+ ret = -EINVAL;
+
+ if (ret || !valid_disk()) {
+ fprintf(stderr, "usage: %s <device> (default: /dev/sda)\n",
+ argv[0]);
+ exit(1);
+ }
+
+ fd = open("/dev/freefall", O_RDONLY);
+ if (fd < 0) {
+ perror("/dev/freefall");
+ return EXIT_FAILURE;
+ }
+
+ if (stat("/sys/class/leds/hp::hddprotect/brightness", &st))
+ noled = 1;
+
+ if (daemon(0, 0) != 0) {
+ perror("daemon");
+ return EXIT_FAILURE;
+ }
+
+ openlog(app_name, LOG_CONS | LOG_PID | LOG_NDELAY, LOG_LOCAL1);
+
+ param.sched_priority = sched_get_priority_max(SCHED_FIFO);
+ sched_setscheduler(0, SCHED_FIFO, &param);
+ mlockall(MCL_CURRENT|MCL_FUTURE);
+
+ signal(SIGALRM, ignore_me);
+
+ for (;;) {
+ unsigned char count;
+
+ ret = read(fd, &count, sizeof(count));
+ alarm(0);
+ if ((ret == -1) && (errno == EINTR)) {
+ /* Alarm expired, time to unpark the heads */
+ continue;
+ }
+
+ if (ret != sizeof(count)) {
+ perror("read");
+ break;
+ }
+
+ protect(21);
+ set_led(1);
+ if (1 || on_ac() || lid_open())
+ alarm(2);
+ else
+ alarm(20);
+ }
+
+ closelog();
+ close(fd);
+ return EXIT_SUCCESS;
+}
diff --git a/tools/lguest/.gitignore b/tools/lguest/.gitignore
index 115587f..8d9a838 100644
--- a/tools/lguest/.gitignore
+++ b/tools/lguest/.gitignore
@@ -1 +1,2 @@
lguest
+include
diff --git a/tools/lguest/Makefile b/tools/lguest/Makefile
index a107b5e..d04599a 100644
--- a/tools/lguest/Makefile
+++ b/tools/lguest/Makefile
@@ -11,3 +11,4 @@ lguest: include/linux/virtio_types.h
clean:
rm -f lguest
+ rm -rf include
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c
index e440524..80159e6 100644
--- a/tools/lguest/lguest.c
+++ b/tools/lguest/lguest.c
@@ -125,7 +125,11 @@ struct device_list {
/* The list of Guest devices, based on command line arguments. */
static struct device_list devices;
-struct virtio_pci_cfg_cap {
+/*
+ * Just like struct virtio_pci_cfg_cap in uapi/linux/virtio_pci.h,
+ * but uses a u32 explicitly for the data.
+ */
+struct virtio_pci_cfg_cap_u32 {
struct virtio_pci_cap cap;
u32 pci_cfg_data; /* Data for BAR access. */
};
@@ -157,7 +161,7 @@ struct pci_config {
struct virtio_pci_notify_cap notify;
struct virtio_pci_cap isr;
struct virtio_pci_cap device;
- struct virtio_pci_cfg_cap cfg_access;
+ struct virtio_pci_cfg_cap_u32 cfg_access;
};
/* The device structure describes a single device. */
@@ -1291,7 +1295,7 @@ static struct device *dev_and_reg(u32 *reg)
* only fault if they try to write with some invalid bar/offset/length.
*/
static bool valid_bar_access(struct device *d,
- struct virtio_pci_cfg_cap *cfg_access)
+ struct virtio_pci_cfg_cap_u32 *cfg_access)
{
/* We only have 1 bar (BAR0) */
if (cfg_access->cap.bar != 0)
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index d8fe29f..fe1b02c 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -16,7 +16,7 @@ MAKEFLAGS += --no-print-directory
LIBFILE = $(OUTPUT)libapi.a
CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
-CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 -fPIC
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
RM = rm -f
@@ -36,7 +36,7 @@ $(LIBFILE): $(API_IN)
clean:
$(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \
- find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o | xargs $(RM)
+ find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
FORCE:
diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c
index 8305b3e..eb7cf4d 100644
--- a/tools/lib/api/fs/debugfs.c
+++ b/tools/lib/api/fs/debugfs.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include "debugfs.h"
+#include "tracefs.h"
#ifndef DEBUGFS_DEFAULT_PATH
#define DEBUGFS_DEFAULT_PATH "/sys/kernel/debug"
@@ -94,11 +95,21 @@ int debugfs__strerror_open(int err, char *buf, size_t size, const char *filename
"Hint:\tIs the debugfs filesystem mounted?\n"
"Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'");
break;
- case EACCES:
+ case EACCES: {
+ const char *mountpoint = debugfs_mountpoint;
+
+ if (!access(debugfs_mountpoint, R_OK) && strncmp(filename, "tracing/", 8) == 0) {
+ const char *tracefs_mntpoint = tracefs_find_mountpoint();
+
+ if (tracefs_mntpoint)
+ mountpoint = tracefs_mntpoint;
+ }
+
snprintf(buf, size,
"Error:\tNo permissions to read %s/%s\n"
"Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
- debugfs_mountpoint, filename, debugfs_mountpoint);
+ debugfs_mountpoint, filename, mountpoint);
+ }
break;
default:
snprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf)));
diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore
new file mode 100644
index 0000000..812aeed
--- /dev/null
+++ b/tools/lib/bpf/.gitignore
@@ -0,0 +1,2 @@
+libbpf_version.h
+FEATURE-DUMP
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
new file mode 100644
index 0000000..d874975
--- /dev/null
+++ b/tools/lib/bpf/Build
@@ -0,0 +1 @@
+libbpf-y := libbpf.o bpf.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
new file mode 100644
index 0000000..f68d23a
--- /dev/null
+++ b/tools/lib/bpf/Makefile
@@ -0,0 +1,195 @@
+# Most of this file is copied from tools/lib/traceevent/Makefile
+
+BPF_VERSION = 0
+BPF_PATCHLEVEL = 0
+BPF_EXTRAVERSION = 1
+
+MAKEFLAGS += --no-print-directory
+
+
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+ $(if $(or $(findstring environment,$(origin $(1))),\
+ $(findstring command line,$(origin $(1)))),,\
+ $(eval $(1) = $(2)))
+endef
+
+# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
+ifeq ($(LP64), 1)
+ libdir_relative = lib64
+else
+ libdir_relative = lib
+endif
+
+prefix ?= /usr/local
+libdir = $(prefix)/$(libdir_relative)
+man_dir = $(prefix)/share/man
+man_dir_SQ = '$(subst ','\'',$(man_dir))'
+
+export man_dir man_dir_SQ INSTALL
+export DESTDIR DESTDIR_SQ
+
+include ../../scripts/Makefile.include
+
+# copy a bit from Linux kbuild
+
+ifeq ("$(origin V)", "command line")
+ VERBOSE = $(V)
+endif
+ifndef VERBOSE
+ VERBOSE = 0
+endif
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+FEATURE_DISPLAY = libelf libelf-getphdrnum libelf-mmap bpf
+FEATURE_TESTS = libelf bpf
+
+INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi
+FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
+
+include $(srctree)/tools/build/Makefile.feature
+
+export prefix libdir src obj
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
+plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
+
+LIB_FILE = libbpf.a libbpf.so
+
+VERSION = $(BPF_VERSION)
+PATCHLEVEL = $(BPF_PATCHLEVEL)
+EXTRAVERSION = $(BPF_EXTRAVERSION)
+
+OBJ = $@
+N =
+
+LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION)
+
+# Set compile option CFLAGS
+ifdef EXTRA_CFLAGS
+ CFLAGS := $(EXTRA_CFLAGS)
+else
+ CFLAGS := -g -Wall
+endif
+
+ifeq ($(feature-libelf-mmap), 1)
+ override CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
+endif
+
+ifeq ($(feature-libelf-getphdrnum), 1)
+ override CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
+endif
+
+# Append required CFLAGS
+override CFLAGS += $(EXTRA_WARNINGS)
+override CFLAGS += -Werror -Wall
+override CFLAGS += -fPIC
+override CFLAGS += $(INCLUDES)
+
+ifeq ($(VERBOSE),1)
+ Q =
+else
+ Q = @
+endif
+
+# Disable command line variables (CFLAGS) overide from top
+# level Makefile (perf), otherwise build Makefile will get
+# the same command line setup.
+MAKEOVERRIDES=
+
+export srctree OUTPUT CC LD CFLAGS V
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+
+BPF_IN := $(OUTPUT)libbpf-in.o
+LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
+
+CMD_TARGETS = $(LIB_FILE)
+
+TARGETS = $(CMD_TARGETS)
+
+all: $(VERSION_FILES) all_cmd
+
+all_cmd: $(CMD_TARGETS)
+
+$(BPF_IN): force elfdep bpfdep
+ $(Q)$(MAKE) $(build)=libbpf
+
+$(OUTPUT)libbpf.so: $(BPF_IN)
+ $(QUIET_LINK)$(CC) --shared $^ -o $@
+
+$(OUTPUT)libbpf.a: $(BPF_IN)
+ $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
+
+define update_dir
+ (echo $1 > $@.tmp; \
+ if [ -r $@ ] && cmp -s $@ $@.tmp; then \
+ rm -f $@.tmp; \
+ else \
+ echo ' UPDATE $@'; \
+ mv -f $@.tmp $@; \
+ fi);
+endef
+
+define do_install
+ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+ fi; \
+ $(INSTALL) $1 '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: all_cmd
+ $(call QUIET_INSTALL, $(LIB_FILE)) \
+ $(call do_install,$(LIB_FILE),$(libdir_SQ))
+
+install: install_lib
+
+### Cleaning rules
+
+config-clean:
+ $(call QUIET_CLEAN, config)
+ $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
+
+clean:
+ $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d \
+ $(RM) LIBBPF-CFLAGS
+ $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP
+
+
+
+PHONY += force elfdep bpfdep
+force:
+
+elfdep:
+ @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit -1 ; fi
+
+bpfdep:
+ @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit -1 ; fi
+
+# Declare the contents of the .PHONY variable as phony. We keep that
+# information in a variable so we can use it in if_changed and friends.
+.PHONY: $(PHONY)
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
new file mode 100644
index 0000000..a633105
--- /dev/null
+++ b/tools/lib/bpf/bpf.c
@@ -0,0 +1,85 @@
+/*
+ * common eBPF ELF operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ */
+
+#include <stdlib.h>
+#include <memory.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <linux/bpf.h>
+#include "bpf.h"
+
+/*
+ * When building perf, unistd.h is override. Define __NR_bpf is
+ * required to be defined.
+ */
+#ifndef __NR_bpf
+# if defined(__i386__)
+# define __NR_bpf 357
+# elif defined(__x86_64__)
+# define __NR_bpf 321
+# elif defined(__aarch64__)
+# define __NR_bpf 280
+# else
+# error __NR_bpf not defined. libbpf does not support your arch.
+# endif
+#endif
+
+static __u64 ptr_to_u64(void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+static int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
+ unsigned int size)
+{
+ return syscall(__NR_bpf, cmd, attr, size);
+}
+
+int bpf_create_map(enum bpf_map_type map_type, int key_size,
+ int value_size, int max_entries)
+{
+ union bpf_attr attr;
+
+ memset(&attr, '\0', sizeof(attr));
+
+ attr.map_type = map_type;
+ attr.key_size = key_size;
+ attr.value_size = value_size;
+ attr.max_entries = max_entries;
+
+ return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns,
+ size_t insns_cnt, char *license,
+ u32 kern_version, char *log_buf, size_t log_buf_sz)
+{
+ int fd;
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.prog_type = type;
+ attr.insn_cnt = (__u32)insns_cnt;
+ attr.insns = ptr_to_u64(insns);
+ attr.license = ptr_to_u64(license);
+ attr.log_buf = ptr_to_u64(NULL);
+ attr.log_size = 0;
+ attr.log_level = 0;
+ attr.kern_version = kern_version;
+
+ fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (fd >= 0 || !log_buf || !log_buf_sz)
+ return fd;
+
+ /* Try again with log */
+ attr.log_buf = ptr_to_u64(log_buf);
+ attr.log_size = log_buf_sz;
+ attr.log_level = 1;
+ log_buf[0] = 0;
+ return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
new file mode 100644
index 0000000..854b736
--- /dev/null
+++ b/tools/lib/bpf/bpf.h
@@ -0,0 +1,23 @@
+/*
+ * common eBPF ELF operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ */
+#ifndef __BPF_BPF_H
+#define __BPF_BPF_H
+
+#include <linux/bpf.h>
+
+int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
+ int max_entries);
+
+/* Recommend log buffer size */
+#define BPF_LOG_BUF_SIZE 65536
+int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns,
+ size_t insns_cnt, char *license,
+ u32 kern_version, char *log_buf,
+ size_t log_buf_sz);
+
+#endif
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
new file mode 100644
index 0000000..4252fc2
--- /dev/null
+++ b/tools/lib/bpf/libbpf.c
@@ -0,0 +1,1056 @@
+/*
+ * Common eBPF ELF object loading operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <asm/unistd.h>
+#include <linux/kernel.h>
+#include <linux/bpf.h>
+#include <linux/list.h>
+#include <libelf.h>
+#include <gelf.h>
+
+#include "libbpf.h"
+#include "bpf.h"
+
+#define __printf(a, b) __attribute__((format(printf, a, b)))
+
+__printf(1, 2)
+static int __base_pr(const char *format, ...)
+{
+ va_list args;
+ int err;
+
+ va_start(args, format);
+ err = vfprintf(stderr, format, args);
+ va_end(args);
+ return err;
+}
+
+static __printf(1, 2) libbpf_print_fn_t __pr_warning = __base_pr;
+static __printf(1, 2) libbpf_print_fn_t __pr_info = __base_pr;
+static __printf(1, 2) libbpf_print_fn_t __pr_debug;
+
+#define __pr(func, fmt, ...) \
+do { \
+ if ((func)) \
+ (func)("libbpf: " fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__)
+#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__)
+#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__)
+
+void libbpf_set_print(libbpf_print_fn_t warn,
+ libbpf_print_fn_t info,
+ libbpf_print_fn_t debug)
+{
+ __pr_warning = warn;
+ __pr_info = info;
+ __pr_debug = debug;
+}
+
+/* Copied from tools/perf/util/util.h */
+#ifndef zfree
+# define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
+#endif
+
+#ifndef zclose
+# define zclose(fd) ({ \
+ int ___err = 0; \
+ if ((fd) >= 0) \
+ ___err = close((fd)); \
+ fd = -1; \
+ ___err; })
+#endif
+
+#ifdef HAVE_LIBELF_MMAP_SUPPORT
+# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP
+#else
+# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ
+#endif
+
+/*
+ * bpf_prog should be a better name but it has been used in
+ * linux/filter.h.
+ */
+struct bpf_program {
+ /* Index in elf obj file, for relocation use. */
+ int idx;
+ char *section_name;
+ struct bpf_insn *insns;
+ size_t insns_cnt;
+
+ struct {
+ int insn_idx;
+ int map_idx;
+ } *reloc_desc;
+ int nr_reloc;
+
+ int fd;
+
+ struct bpf_object *obj;
+ void *priv;
+ bpf_program_clear_priv_t clear_priv;
+};
+
+static LIST_HEAD(bpf_objects_list);
+
+struct bpf_object {
+ char license[64];
+ u32 kern_version;
+ void *maps_buf;
+ size_t maps_buf_sz;
+
+ struct bpf_program *programs;
+ size_t nr_programs;
+ int *map_fds;
+ /*
+ * This field is required because maps_buf will be freed and
+ * maps_buf_sz will be set to 0 after loaded.
+ */
+ size_t nr_map_fds;
+ bool loaded;
+
+ /*
+ * Information when doing elf related work. Only valid if fd
+ * is valid.
+ */
+ struct {
+ int fd;
+ void *obj_buf;
+ size_t obj_buf_sz;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ Elf_Data *symbols;
+ struct {
+ GElf_Shdr shdr;
+ Elf_Data *data;
+ } *reloc;
+ int nr_reloc;
+ } efile;
+ /*
+ * All loaded bpf_object is linked in a list, which is
+ * hidden to caller. bpf_objects__<func> handlers deal with
+ * all objects.
+ */
+ struct list_head list;
+ char path[];
+};
+#define obj_elf_valid(o) ((o)->efile.elf)
+
+static void bpf_program__unload(struct bpf_program *prog)
+{
+ if (!prog)
+ return;
+
+ zclose(prog->fd);
+}
+
+static void bpf_program__exit(struct bpf_program *prog)
+{
+ if (!prog)
+ return;
+
+ if (prog->clear_priv)
+ prog->clear_priv(prog, prog->priv);
+
+ prog->priv = NULL;
+ prog->clear_priv = NULL;
+
+ bpf_program__unload(prog);
+ zfree(&prog->section_name);
+ zfree(&prog->insns);
+ zfree(&prog->reloc_desc);
+
+ prog->nr_reloc = 0;
+ prog->insns_cnt = 0;
+ prog->idx = -1;
+}
+
+static int
+bpf_program__init(void *data, size_t size, char *name, int idx,
+ struct bpf_program *prog)
+{
+ if (size < sizeof(struct bpf_insn)) {
+ pr_warning("corrupted section '%s'\n", name);
+ return -EINVAL;
+ }
+
+ bzero(prog, sizeof(*prog));
+
+ prog->section_name = strdup(name);
+ if (!prog->section_name) {
+ pr_warning("failed to alloc name for prog %s\n",
+ name);
+ goto errout;
+ }
+
+ prog->insns = malloc(size);
+ if (!prog->insns) {
+ pr_warning("failed to alloc insns for %s\n", name);
+ goto errout;
+ }
+ prog->insns_cnt = size / sizeof(struct bpf_insn);
+ memcpy(prog->insns, data,
+ prog->insns_cnt * sizeof(struct bpf_insn));
+ prog->idx = idx;
+ prog->fd = -1;
+
+ return 0;
+errout:
+ bpf_program__exit(prog);
+ return -ENOMEM;
+}
+
+static int
+bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
+ char *name, int idx)
+{
+ struct bpf_program prog, *progs;
+ int nr_progs, err;
+
+ err = bpf_program__init(data, size, name, idx, &prog);
+ if (err)
+ return err;
+
+ progs = obj->programs;
+ nr_progs = obj->nr_programs;
+
+ progs = realloc(progs, sizeof(progs[0]) * (nr_progs + 1));
+ if (!progs) {
+ /*
+ * In this case the original obj->programs
+ * is still valid, so don't need special treat for
+ * bpf_close_object().
+ */
+ pr_warning("failed to alloc a new program '%s'\n",
+ name);
+ bpf_program__exit(&prog);
+ return -ENOMEM;
+ }
+
+ pr_debug("found program %s\n", prog.section_name);
+ obj->programs = progs;
+ obj->nr_programs = nr_progs + 1;
+ prog.obj = obj;
+ progs[nr_progs] = prog;
+ return 0;
+}
+
+static struct bpf_object *bpf_object__new(const char *path,
+ void *obj_buf,
+ size_t obj_buf_sz)
+{
+ struct bpf_object *obj;
+
+ obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
+ if (!obj) {
+ pr_warning("alloc memory failed for %s\n", path);
+ return NULL;
+ }
+
+ strcpy(obj->path, path);
+ obj->efile.fd = -1;
+
+ /*
+ * Caller of this function should also calls
+ * bpf_object__elf_finish() after data collection to return
+ * obj_buf to user. If not, we should duplicate the buffer to
+ * avoid user freeing them before elf finish.
+ */
+ obj->efile.obj_buf = obj_buf;
+ obj->efile.obj_buf_sz = obj_buf_sz;
+
+ obj->loaded = false;
+
+ INIT_LIST_HEAD(&obj->list);
+ list_add(&obj->list, &bpf_objects_list);
+ return obj;
+}
+
+static void bpf_object__elf_finish(struct bpf_object *obj)
+{
+ if (!obj_elf_valid(obj))
+ return;
+
+ if (obj->efile.elf) {
+ elf_end(obj->efile.elf);
+ obj->efile.elf = NULL;
+ }
+ obj->efile.symbols = NULL;
+
+ zfree(&obj->efile.reloc);
+ obj->efile.nr_reloc = 0;
+ zclose(obj->efile.fd);
+ obj->efile.obj_buf = NULL;
+ obj->efile.obj_buf_sz = 0;
+}
+
+static int bpf_object__elf_init(struct bpf_object *obj)
+{
+ int err = 0;
+ GElf_Ehdr *ep;
+
+ if (obj_elf_valid(obj)) {
+ pr_warning("elf init: internal error\n");
+ return -EEXIST;
+ }
+
+ if (obj->efile.obj_buf_sz > 0) {
+ /*
+ * obj_buf should have been validated by
+ * bpf_object__open_buffer().
+ */
+ obj->efile.elf = elf_memory(obj->efile.obj_buf,
+ obj->efile.obj_buf_sz);
+ } else {
+ obj->efile.fd = open(obj->path, O_RDONLY);
+ if (obj->efile.fd < 0) {
+ pr_warning("failed to open %s: %s\n", obj->path,
+ strerror(errno));
+ return -errno;
+ }
+
+ obj->efile.elf = elf_begin(obj->efile.fd,
+ LIBBPF_ELF_C_READ_MMAP,
+ NULL);
+ }
+
+ if (!obj->efile.elf) {
+ pr_warning("failed to open %s as ELF file\n",
+ obj->path);
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
+ pr_warning("failed to get EHDR from %s\n",
+ obj->path);
+ err = -EINVAL;
+ goto errout;
+ }
+ ep = &obj->efile.ehdr;
+
+ if ((ep->e_type != ET_REL) || (ep->e_machine != 0)) {
+ pr_warning("%s is not an eBPF object file\n",
+ obj->path);
+ err = -EINVAL;
+ goto errout;
+ }
+
+ return 0;
+errout:
+ bpf_object__elf_finish(obj);
+ return err;
+}
+
+static int
+bpf_object__check_endianness(struct bpf_object *obj)
+{
+ static unsigned int const endian = 1;
+
+ switch (obj->efile.ehdr.e_ident[EI_DATA]) {
+ case ELFDATA2LSB:
+ /* We are big endian, BPF obj is little endian. */
+ if (*(unsigned char const *)&endian != 1)
+ goto mismatch;
+ break;
+
+ case ELFDATA2MSB:
+ /* We are little endian, BPF obj is big endian. */
+ if (*(unsigned char const *)&endian != 0)
+ goto mismatch;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+
+mismatch:
+ pr_warning("Error: endianness mismatch.\n");
+ return -EINVAL;
+}
+
+static int
+bpf_object__init_license(struct bpf_object *obj,
+ void *data, size_t size)
+{
+ memcpy(obj->license, data,
+ min(size, sizeof(obj->license) - 1));
+ pr_debug("license of %s is %s\n", obj->path, obj->license);
+ return 0;
+}
+
+static int
+bpf_object__init_kversion(struct bpf_object *obj,
+ void *data, size_t size)
+{
+ u32 kver;
+
+ if (size != sizeof(kver)) {
+ pr_warning("invalid kver section in %s\n", obj->path);
+ return -EINVAL;
+ }
+ memcpy(&kver, data, sizeof(kver));
+ obj->kern_version = kver;
+ pr_debug("kernel version of %s is %x\n", obj->path,
+ obj->kern_version);
+ return 0;
+}
+
+static int
+bpf_object__init_maps(struct bpf_object *obj, void *data,
+ size_t size)
+{
+ if (size == 0) {
+ pr_debug("%s doesn't need map definition\n",
+ obj->path);
+ return 0;
+ }
+
+ obj->maps_buf = malloc(size);
+ if (!obj->maps_buf) {
+ pr_warning("malloc maps failed: %s\n", obj->path);
+ return -ENOMEM;
+ }
+
+ obj->maps_buf_sz = size;
+ memcpy(obj->maps_buf, data, size);
+ pr_debug("maps in %s: %ld bytes\n", obj->path, (long)size);
+ return 0;
+}
+
+static int bpf_object__elf_collect(struct bpf_object *obj)
+{
+ Elf *elf = obj->efile.elf;
+ GElf_Ehdr *ep = &obj->efile.ehdr;
+ Elf_Scn *scn = NULL;
+ int idx = 0, err = 0;
+
+ /* Elf is corrupted/truncated, avoid calling elf_strptr. */
+ if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
+ pr_warning("failed to get e_shstrndx from %s\n",
+ obj->path);
+ return -EINVAL;
+ }
+
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ char *name;
+ GElf_Shdr sh;
+ Elf_Data *data;
+
+ idx++;
+ if (gelf_getshdr(scn, &sh) != &sh) {
+ pr_warning("failed to get section header from %s\n",
+ obj->path);
+ err = -EINVAL;
+ goto out;
+ }
+
+ name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
+ if (!name) {
+ pr_warning("failed to get section name from %s\n",
+ obj->path);
+ err = -EINVAL;
+ goto out;
+ }
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ pr_warning("failed to get section data from %s(%s)\n",
+ name, obj->path);
+ err = -EINVAL;
+ goto out;
+ }
+ pr_debug("section %s, size %ld, link %d, flags %lx, type=%d\n",
+ name, (unsigned long)data->d_size,
+ (int)sh.sh_link, (unsigned long)sh.sh_flags,
+ (int)sh.sh_type);
+
+ if (strcmp(name, "license") == 0)
+ err = bpf_object__init_license(obj,
+ data->d_buf,
+ data->d_size);
+ else if (strcmp(name, "version") == 0)
+ err = bpf_object__init_kversion(obj,
+ data->d_buf,
+ data->d_size);
+ else if (strcmp(name, "maps") == 0)
+ err = bpf_object__init_maps(obj, data->d_buf,
+ data->d_size);
+ else if (sh.sh_type == SHT_SYMTAB) {
+ if (obj->efile.symbols) {
+ pr_warning("bpf: multiple SYMTAB in %s\n",
+ obj->path);
+ err = -EEXIST;
+ } else
+ obj->efile.symbols = data;
+ } else if ((sh.sh_type == SHT_PROGBITS) &&
+ (sh.sh_flags & SHF_EXECINSTR) &&
+ (data->d_size > 0)) {
+ err = bpf_object__add_program(obj, data->d_buf,
+ data->d_size, name, idx);
+ if (err) {
+ char errmsg[128];
+ strerror_r(-err, errmsg, sizeof(errmsg));
+ pr_warning("failed to alloc program %s (%s): %s",
+ name, obj->path, errmsg);
+ }
+ } else if (sh.sh_type == SHT_REL) {
+ void *reloc = obj->efile.reloc;
+ int nr_reloc = obj->efile.nr_reloc + 1;
+
+ reloc = realloc(reloc,
+ sizeof(*obj->efile.reloc) * nr_reloc);
+ if (!reloc) {
+ pr_warning("realloc failed\n");
+ err = -ENOMEM;
+ } else {
+ int n = nr_reloc - 1;
+
+ obj->efile.reloc = reloc;
+ obj->efile.nr_reloc = nr_reloc;
+
+ obj->efile.reloc[n].shdr = sh;
+ obj->efile.reloc[n].data = data;
+ }
+ }
+ if (err)
+ goto out;
+ }
+out:
+ return err;
+}
+
+static struct bpf_program *
+bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
+{
+ struct bpf_program *prog;
+ size_t i;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+ if (prog->idx == idx)
+ return prog;
+ }
+ return NULL;
+}
+
+static int
+bpf_program__collect_reloc(struct bpf_program *prog,
+ size_t nr_maps, GElf_Shdr *shdr,
+ Elf_Data *data, Elf_Data *symbols)
+{
+ int i, nrels;
+
+ pr_debug("collecting relocating info for: '%s'\n",
+ prog->section_name);
+ nrels = shdr->sh_size / shdr->sh_entsize;
+
+ prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
+ if (!prog->reloc_desc) {
+ pr_warning("failed to alloc memory in relocation\n");
+ return -ENOMEM;
+ }
+ prog->nr_reloc = nrels;
+
+ for (i = 0; i < nrels; i++) {
+ GElf_Sym sym;
+ GElf_Rel rel;
+ unsigned int insn_idx;
+ struct bpf_insn *insns = prog->insns;
+ size_t map_idx;
+
+ if (!gelf_getrel(data, i, &rel)) {
+ pr_warning("relocation: failed to get %d reloc\n", i);
+ return -EINVAL;
+ }
+
+ insn_idx = rel.r_offset / sizeof(struct bpf_insn);
+ pr_debug("relocation: insn_idx=%u\n", insn_idx);
+
+ if (!gelf_getsym(symbols,
+ GELF_R_SYM(rel.r_info),
+ &sym)) {
+ pr_warning("relocation: symbol %"PRIx64" not found\n",
+ GELF_R_SYM(rel.r_info));
+ return -EINVAL;
+ }
+
+ if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
+ pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n",
+ insn_idx, insns[insn_idx].code);
+ return -EINVAL;
+ }
+
+ map_idx = sym.st_value / sizeof(struct bpf_map_def);
+ if (map_idx >= nr_maps) {
+ pr_warning("bpf relocation: map_idx %d large than %d\n",
+ (int)map_idx, (int)nr_maps - 1);
+ return -EINVAL;
+ }
+
+ prog->reloc_desc[i].insn_idx = insn_idx;
+ prog->reloc_desc[i].map_idx = map_idx;
+ }
+ return 0;
+}
+
+static int
+bpf_object__create_maps(struct bpf_object *obj)
+{
+ unsigned int i;
+ size_t nr_maps;
+ int *pfd;
+
+ nr_maps = obj->maps_buf_sz / sizeof(struct bpf_map_def);
+ if (!obj->maps_buf || !nr_maps) {
+ pr_debug("don't need create maps for %s\n",
+ obj->path);
+ return 0;
+ }
+
+ obj->map_fds = malloc(sizeof(int) * nr_maps);
+ if (!obj->map_fds) {
+ pr_warning("realloc perf_bpf_map_fds failed\n");
+ return -ENOMEM;
+ }
+ obj->nr_map_fds = nr_maps;
+
+ /* fill all fd with -1 */
+ memset(obj->map_fds, -1, sizeof(int) * nr_maps);
+
+ pfd = obj->map_fds;
+ for (i = 0; i < nr_maps; i++) {
+ struct bpf_map_def def;
+
+ def = *(struct bpf_map_def *)(obj->maps_buf +
+ i * sizeof(struct bpf_map_def));
+
+ *pfd = bpf_create_map(def.type,
+ def.key_size,
+ def.value_size,
+ def.max_entries);
+ if (*pfd < 0) {
+ size_t j;
+ int err = *pfd;
+
+ pr_warning("failed to create map: %s\n",
+ strerror(errno));
+ for (j = 0; j < i; j++)
+ zclose(obj->map_fds[j]);
+ obj->nr_map_fds = 0;
+ zfree(&obj->map_fds);
+ return err;
+ }
+ pr_debug("create map: fd=%d\n", *pfd);
+ pfd++;
+ }
+
+ zfree(&obj->maps_buf);
+ obj->maps_buf_sz = 0;
+ return 0;
+}
+
+static int
+bpf_program__relocate(struct bpf_program *prog, int *map_fds)
+{
+ int i;
+
+ if (!prog || !prog->reloc_desc)
+ return 0;
+
+ for (i = 0; i < prog->nr_reloc; i++) {
+ int insn_idx, map_idx;
+ struct bpf_insn *insns = prog->insns;
+
+ insn_idx = prog->reloc_desc[i].insn_idx;
+ map_idx = prog->reloc_desc[i].map_idx;
+
+ if (insn_idx >= (int)prog->insns_cnt) {
+ pr_warning("relocation out of range: '%s'\n",
+ prog->section_name);
+ return -ERANGE;
+ }
+ insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
+ insns[insn_idx].imm = map_fds[map_idx];
+ }
+
+ zfree(&prog->reloc_desc);
+ prog->nr_reloc = 0;
+ return 0;
+}
+
+
+static int
+bpf_object__relocate(struct bpf_object *obj)
+{
+ struct bpf_program *prog;
+ size_t i;
+ int err;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+
+ err = bpf_program__relocate(prog, obj->map_fds);
+ if (err) {
+ pr_warning("failed to relocate '%s'\n",
+ prog->section_name);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static int bpf_object__collect_reloc(struct bpf_object *obj)
+{
+ int i, err;
+
+ if (!obj_elf_valid(obj)) {
+ pr_warning("Internal error: elf object is closed\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < obj->efile.nr_reloc; i++) {
+ GElf_Shdr *shdr = &obj->efile.reloc[i].shdr;
+ Elf_Data *data = obj->efile.reloc[i].data;
+ int idx = shdr->sh_info;
+ struct bpf_program *prog;
+ size_t nr_maps = obj->maps_buf_sz /
+ sizeof(struct bpf_map_def);
+
+ if (shdr->sh_type != SHT_REL) {
+ pr_warning("internal error at %d\n", __LINE__);
+ return -EINVAL;
+ }
+
+ prog = bpf_object__find_prog_by_idx(obj, idx);
+ if (!prog) {
+ pr_warning("relocation failed: no %d section\n",
+ idx);
+ return -ENOENT;
+ }
+
+ err = bpf_program__collect_reloc(prog, nr_maps,
+ shdr, data,
+ obj->efile.symbols);
+ if (err)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int
+load_program(struct bpf_insn *insns, int insns_cnt,
+ char *license, u32 kern_version, int *pfd)
+{
+ int ret;
+ char *log_buf;
+
+ if (!insns || !insns_cnt)
+ return -EINVAL;
+
+ log_buf = malloc(BPF_LOG_BUF_SIZE);
+ if (!log_buf)
+ pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
+
+ ret = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns,
+ insns_cnt, license, kern_version,
+ log_buf, BPF_LOG_BUF_SIZE);
+
+ if (ret >= 0) {
+ *pfd = ret;
+ ret = 0;
+ goto out;
+ }
+
+ ret = -EINVAL;
+ pr_warning("load bpf program failed: %s\n", strerror(errno));
+
+ if (log_buf) {
+ pr_warning("-- BEGIN DUMP LOG ---\n");
+ pr_warning("\n%s\n", log_buf);
+ pr_warning("-- END LOG --\n");
+ }
+
+out:
+ free(log_buf);
+ return ret;
+}
+
+static int
+bpf_program__load(struct bpf_program *prog,
+ char *license, u32 kern_version)
+{
+ int err, fd;
+
+ err = load_program(prog->insns, prog->insns_cnt,
+ license, kern_version, &fd);
+ if (!err)
+ prog->fd = fd;
+
+ if (err)
+ pr_warning("failed to load program '%s'\n",
+ prog->section_name);
+ zfree(&prog->insns);
+ prog->insns_cnt = 0;
+ return err;
+}
+
+static int
+bpf_object__load_progs(struct bpf_object *obj)
+{
+ size_t i;
+ int err;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ err = bpf_program__load(&obj->programs[i],
+ obj->license,
+ obj->kern_version);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int bpf_object__validate(struct bpf_object *obj)
+{
+ if (obj->kern_version == 0) {
+ pr_warning("%s doesn't provide kernel version\n",
+ obj->path);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static struct bpf_object *
+__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz)
+{
+ struct bpf_object *obj;
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ pr_warning("failed to init libelf for %s\n", path);
+ return NULL;
+ }
+
+ obj = bpf_object__new(path, obj_buf, obj_buf_sz);
+ if (!obj)
+ return NULL;
+
+ if (bpf_object__elf_init(obj))
+ goto out;
+ if (bpf_object__check_endianness(obj))
+ goto out;
+ if (bpf_object__elf_collect(obj))
+ goto out;
+ if (bpf_object__collect_reloc(obj))
+ goto out;
+ if (bpf_object__validate(obj))
+ goto out;
+
+ bpf_object__elf_finish(obj);
+ return obj;
+out:
+ bpf_object__close(obj);
+ return NULL;
+}
+
+struct bpf_object *bpf_object__open(const char *path)
+{
+ /* param validation */
+ if (!path)
+ return NULL;
+
+ pr_debug("loading %s\n", path);
+
+ return __bpf_object__open(path, NULL, 0);
+}
+
+struct bpf_object *bpf_object__open_buffer(void *obj_buf,
+ size_t obj_buf_sz,
+ const char *name)
+{
+ char tmp_name[64];
+
+ /* param validation */
+ if (!obj_buf || obj_buf_sz <= 0)
+ return NULL;
+
+ if (!name) {
+ snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
+ (unsigned long)obj_buf,
+ (unsigned long)obj_buf_sz);
+ tmp_name[sizeof(tmp_name) - 1] = '\0';
+ name = tmp_name;
+ }
+ pr_debug("loading object '%s' from buffer\n",
+ name);
+
+ return __bpf_object__open(name, obj_buf, obj_buf_sz);
+}
+
+int bpf_object__unload(struct bpf_object *obj)
+{
+ size_t i;
+
+ if (!obj)
+ return -EINVAL;
+
+ for (i = 0; i < obj->nr_map_fds; i++)
+ zclose(obj->map_fds[i]);
+ zfree(&obj->map_fds);
+ obj->nr_map_fds = 0;
+
+ for (i = 0; i < obj->nr_programs; i++)
+ bpf_program__unload(&obj->programs[i]);
+
+ return 0;
+}
+
+int bpf_object__load(struct bpf_object *obj)
+{
+ if (!obj)
+ return -EINVAL;
+
+ if (obj->loaded) {
+ pr_warning("object should not be loaded twice\n");
+ return -EINVAL;
+ }
+
+ obj->loaded = true;
+ if (bpf_object__create_maps(obj))
+ goto out;
+ if (bpf_object__relocate(obj))
+ goto out;
+ if (bpf_object__load_progs(obj))
+ goto out;
+
+ return 0;
+out:
+ bpf_object__unload(obj);
+ pr_warning("failed to load object '%s'\n", obj->path);
+ return -EINVAL;
+}
+
+void bpf_object__close(struct bpf_object *obj)
+{
+ size_t i;
+
+ if (!obj)
+ return;
+
+ bpf_object__elf_finish(obj);
+ bpf_object__unload(obj);
+
+ zfree(&obj->maps_buf);
+
+ if (obj->programs && obj->nr_programs) {
+ for (i = 0; i < obj->nr_programs; i++)
+ bpf_program__exit(&obj->programs[i]);
+ }
+ zfree(&obj->programs);
+
+ list_del(&obj->list);
+ free(obj);
+}
+
+struct bpf_object *
+bpf_object__next(struct bpf_object *prev)
+{
+ struct bpf_object *next;
+
+ if (!prev)
+ next = list_first_entry(&bpf_objects_list,
+ struct bpf_object,
+ list);
+ else
+ next = list_next_entry(prev, list);
+
+ /* Empty list is noticed here so don't need checking on entry. */
+ if (&next->list == &bpf_objects_list)
+ return NULL;
+
+ return next;
+}
+
+const char *
+bpf_object__get_name(struct bpf_object *obj)
+{
+ if (!obj)
+ return NULL;
+ return obj->path;
+}
+
+struct bpf_program *
+bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
+{
+ size_t idx;
+
+ if (!obj->programs)
+ return NULL;
+ /* First handler */
+ if (prev == NULL)
+ return &obj->programs[0];
+
+ if (prev->obj != obj) {
+ pr_warning("error: program handler doesn't match object\n");
+ return NULL;
+ }
+
+ idx = (prev - obj->programs) + 1;
+ if (idx >= obj->nr_programs)
+ return NULL;
+ return &obj->programs[idx];
+}
+
+int bpf_program__set_private(struct bpf_program *prog,
+ void *priv,
+ bpf_program_clear_priv_t clear_priv)
+{
+ if (prog->priv && prog->clear_priv)
+ prog->clear_priv(prog, prog->priv);
+
+ prog->priv = priv;
+ prog->clear_priv = clear_priv;
+ return 0;
+}
+
+int bpf_program__get_private(struct bpf_program *prog, void **ppriv)
+{
+ *ppriv = prog->priv;
+ return 0;
+}
+
+const char *bpf_program__title(struct bpf_program *prog, bool dup)
+{
+ const char *title;
+
+ title = prog->section_name;
+ if (dup) {
+ title = strdup(title);
+ if (!title) {
+ pr_warning("failed to strdup program title\n");
+ return NULL;
+ }
+ }
+
+ return title;
+}
+
+int bpf_program__fd(struct bpf_program *prog)
+{
+ return prog->fd;
+}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
new file mode 100644
index 0000000..f16170c
--- /dev/null
+++ b/tools/lib/bpf/libbpf.h
@@ -0,0 +1,83 @@
+/*
+ * Common eBPF ELF object loading operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ */
+#ifndef __BPF_LIBBPF_H
+#define __BPF_LIBBPF_H
+
+#include <stdio.h>
+#include <stdbool.h>
+
+/*
+ * In include/linux/compiler-gcc.h, __printf is defined. However
+ * it should be better if libbpf.h doesn't depend on Linux header file.
+ * So instead of __printf, here we use gcc attribute directly.
+ */
+typedef int (*libbpf_print_fn_t)(const char *, ...)
+ __attribute__((format(printf, 1, 2)));
+
+void libbpf_set_print(libbpf_print_fn_t warn,
+ libbpf_print_fn_t info,
+ libbpf_print_fn_t debug);
+
+/* Hide internal to user */
+struct bpf_object;
+
+struct bpf_object *bpf_object__open(const char *path);
+struct bpf_object *bpf_object__open_buffer(void *obj_buf,
+ size_t obj_buf_sz,
+ const char *name);
+void bpf_object__close(struct bpf_object *object);
+
+/* Load/unload object into/from kernel */
+int bpf_object__load(struct bpf_object *obj);
+int bpf_object__unload(struct bpf_object *obj);
+const char *bpf_object__get_name(struct bpf_object *obj);
+
+struct bpf_object *bpf_object__next(struct bpf_object *prev);
+#define bpf_object__for_each_safe(pos, tmp) \
+ for ((pos) = bpf_object__next(NULL), \
+ (tmp) = bpf_object__next(pos); \
+ (pos) != NULL; \
+ (pos) = (tmp), (tmp) = bpf_object__next(tmp))
+
+/* Accessors of bpf_program. */
+struct bpf_program;
+struct bpf_program *bpf_program__next(struct bpf_program *prog,
+ struct bpf_object *obj);
+
+#define bpf_object__for_each_program(pos, obj) \
+ for ((pos) = bpf_program__next(NULL, (obj)); \
+ (pos) != NULL; \
+ (pos) = bpf_program__next((pos), (obj)))
+
+typedef void (*bpf_program_clear_priv_t)(struct bpf_program *,
+ void *);
+
+int bpf_program__set_private(struct bpf_program *prog, void *priv,
+ bpf_program_clear_priv_t clear_priv);
+
+int bpf_program__get_private(struct bpf_program *prog,
+ void **ppriv);
+
+const char *bpf_program__title(struct bpf_program *prog, bool dup);
+
+int bpf_program__fd(struct bpf_program *prog);
+
+/*
+ * We don't need __attribute__((packed)) now since it is
+ * unnecessary for 'bpf_map_def' because they are all aligned.
+ * In addition, using it will trigger -Wpacked warning message,
+ * and will be treated as an error due to -Werror.
+ */
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+};
+
+#endif
diff --git a/tools/lib/hweight.c b/tools/lib/hweight.c
new file mode 100644
index 0000000..0b859b8
--- /dev/null
+++ b/tools/lib/hweight.c
@@ -0,0 +1,62 @@
+#include <linux/bitops.h>
+#include <asm/types.h>
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+
+unsigned int __sw_hweight32(unsigned int w)
+{
+#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
+ w -= (w >> 1) & 0x55555555;
+ w = (w & 0x33333333) + ((w >> 2) & 0x33333333);
+ w = (w + (w >> 4)) & 0x0f0f0f0f;
+ return (w * 0x01010101) >> 24;
+#else
+ unsigned int res = w - ((w >> 1) & 0x55555555);
+ res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+ res = (res + (res >> 4)) & 0x0F0F0F0F;
+ res = res + (res >> 8);
+ return (res + (res >> 16)) & 0x000000FF;
+#endif
+}
+
+unsigned int __sw_hweight16(unsigned int w)
+{
+ unsigned int res = w - ((w >> 1) & 0x5555);
+ res = (res & 0x3333) + ((res >> 2) & 0x3333);
+ res = (res + (res >> 4)) & 0x0F0F;
+ return (res + (res >> 8)) & 0x00FF;
+}
+
+unsigned int __sw_hweight8(unsigned int w)
+{
+ unsigned int res = w - ((w >> 1) & 0x55);
+ res = (res & 0x33) + ((res >> 2) & 0x33);
+ return (res + (res >> 4)) & 0x0F;
+}
+
+unsigned long __sw_hweight64(__u64 w)
+{
+#if BITS_PER_LONG == 32
+ return __sw_hweight32((unsigned int)(w >> 32)) +
+ __sw_hweight32((unsigned int)w);
+#elif BITS_PER_LONG == 64
+#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
+ w -= (w >> 1) & 0x5555555555555555ul;
+ w = (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul);
+ w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful;
+ return (w * 0x0101010101010101ul) >> 56;
+#else
+ __u64 res = w - ((w >> 1) & 0x5555555555555555ul);
+ res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
+ res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful;
+ res = res + (res >> 8);
+ res = res + (res >> 16);
+ return (res + (res >> 32)) & 0x00000000000000FFul;
+#endif
+#endif
+}
diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile
index 0c356fb..18ffccf 100644
--- a/tools/lib/lockdep/Makefile
+++ b/tools/lib/lockdep/Makefile
@@ -14,9 +14,10 @@ define allow-override
$(eval $(1) = $(2)))
endef
-# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
+# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix.
$(call allow-override,CC,$(CROSS_COMPILE)gcc)
$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,LD,$(CROSS_COMPILE)ld)
INSTALL = install
diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c
index 0b0112c..21cdf86 100644
--- a/tools/lib/lockdep/preload.c
+++ b/tools/lib/lockdep/preload.c
@@ -5,7 +5,7 @@
#include <stdlib.h>
#include <sysexits.h>
#include "include/liblockdep/mutex.h"
-#include "../../../include/linux/rbtree.h"
+#include "../../include/linux/rbtree.h"
/**
* struct lock_lookup - liblockdep's view of a single unique lock
diff --git a/tools/lib/lockdep/uinclude/linux/kernel.h b/tools/lib/lockdep/uinclude/linux/kernel.h
index a11e3c3..276c7a8 100644
--- a/tools/lib/lockdep/uinclude/linux/kernel.h
+++ b/tools/lib/lockdep/uinclude/linux/kernel.h
@@ -23,11 +23,14 @@
#define WARN_ON(x) (x)
#define WARN_ON_ONCE(x) (x)
#define likely(x) (x)
-#define WARN(x, y, z) (x)
+#define WARN(x, y...) (x)
#define uninitialized_var(x) x
#define __init
#define noinline
#define list_add_tail_rcu list_add_tail
+#define list_for_each_entry_rcu list_for_each_entry
+#define barrier()
+#define synchronize_sched()
#ifndef CALLER_ADDR0
#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
diff --git a/tools/lib/lockdep/uinclude/linux/rbtree.h b/tools/lib/lockdep/uinclude/linux/rbtree.h
deleted file mode 100644
index 965901d..0000000
--- a/tools/lib/lockdep/uinclude/linux/rbtree.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../include/linux/rbtree.h"
diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c
new file mode 100644
index 0000000..17c2b59
--- /dev/null
+++ b/tools/lib/rbtree.c
@@ -0,0 +1,548 @@
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea@suse.de>
+ (C) 2002 David Woodhouse <dwmw2@infradead.org>
+ (C) 2012 Michel Lespinasse <walken@google.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ linux/lib/rbtree.c
+*/
+
+#include <linux/rbtree_augmented.h>
+
+/*
+ * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree
+ *
+ * 1) A node is either red or black
+ * 2) The root is black
+ * 3) All leaves (NULL) are black
+ * 4) Both children of every red node are black
+ * 5) Every simple path from root to leaves contains the same number
+ * of black nodes.
+ *
+ * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two
+ * consecutive red nodes in a path and every red node is therefore followed by
+ * a black. So if B is the number of black nodes on every simple path (as per
+ * 5), then the longest possible path due to 4 is 2B.
+ *
+ * We shall indicate color with case, where black nodes are uppercase and red
+ * nodes will be lowercase. Unknown color nodes shall be drawn as red within
+ * parentheses and have some accompanying text comment.
+ */
+
+static inline void rb_set_black(struct rb_node *rb)
+{
+ rb->__rb_parent_color |= RB_BLACK;
+}
+
+static inline struct rb_node *rb_red_parent(struct rb_node *red)
+{
+ return (struct rb_node *)red->__rb_parent_color;
+}
+
+/*
+ * Helper function for rotations:
+ * - old's parent and color get assigned to new
+ * - old gets assigned new as a parent and 'color' as a color.
+ */
+static inline void
+__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new,
+ struct rb_root *root, int color)
+{
+ struct rb_node *parent = rb_parent(old);
+ new->__rb_parent_color = old->__rb_parent_color;
+ rb_set_parent_color(old, new, color);
+ __rb_change_child(old, new, parent, root);
+}
+
+static __always_inline void
+__rb_insert(struct rb_node *node, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+ struct rb_node *parent = rb_red_parent(node), *gparent, *tmp;
+
+ while (true) {
+ /*
+ * Loop invariant: node is red
+ *
+ * If there is a black parent, we are done.
+ * Otherwise, take some corrective action as we don't
+ * want a red root or two consecutive red nodes.
+ */
+ if (!parent) {
+ rb_set_parent_color(node, NULL, RB_BLACK);
+ break;
+ } else if (rb_is_black(parent))
+ break;
+
+ gparent = rb_red_parent(parent);
+
+ tmp = gparent->rb_right;
+ if (parent != tmp) { /* parent == gparent->rb_left */
+ if (tmp && rb_is_red(tmp)) {
+ /*
+ * Case 1 - color flips
+ *
+ * G g
+ * / \ / \
+ * p u --> P U
+ * / /
+ * n n
+ *
+ * However, since g's parent might be red, and
+ * 4) does not allow this, we need to recurse
+ * at g.
+ */
+ rb_set_parent_color(tmp, gparent, RB_BLACK);
+ rb_set_parent_color(parent, gparent, RB_BLACK);
+ node = gparent;
+ parent = rb_parent(node);
+ rb_set_parent_color(node, parent, RB_RED);
+ continue;
+ }
+
+ tmp = parent->rb_right;
+ if (node == tmp) {
+ /*
+ * Case 2 - left rotate at parent
+ *
+ * G G
+ * / \ / \
+ * p U --> n U
+ * \ /
+ * n p
+ *
+ * This still leaves us in violation of 4), the
+ * continuation into Case 3 will fix that.
+ */
+ parent->rb_right = tmp = node->rb_left;
+ node->rb_left = parent;
+ if (tmp)
+ rb_set_parent_color(tmp, parent,
+ RB_BLACK);
+ rb_set_parent_color(parent, node, RB_RED);
+ augment_rotate(parent, node);
+ parent = node;
+ tmp = node->rb_right;
+ }
+
+ /*
+ * Case 3 - right rotate at gparent
+ *
+ * G P
+ * / \ / \
+ * p U --> n g
+ * / \
+ * n U
+ */
+ gparent->rb_left = tmp; /* == parent->rb_right */
+ parent->rb_right = gparent;
+ if (tmp)
+ rb_set_parent_color(tmp, gparent, RB_BLACK);
+ __rb_rotate_set_parents(gparent, parent, root, RB_RED);
+ augment_rotate(gparent, parent);
+ break;
+ } else {
+ tmp = gparent->rb_left;
+ if (tmp && rb_is_red(tmp)) {
+ /* Case 1 - color flips */
+ rb_set_parent_color(tmp, gparent, RB_BLACK);
+ rb_set_parent_color(parent, gparent, RB_BLACK);
+ node = gparent;
+ parent = rb_parent(node);
+ rb_set_parent_color(node, parent, RB_RED);
+ continue;
+ }
+
+ tmp = parent->rb_left;
+ if (node == tmp) {
+ /* Case 2 - right rotate at parent */
+ parent->rb_left = tmp = node->rb_right;
+ node->rb_right = parent;
+ if (tmp)
+ rb_set_parent_color(tmp, parent,
+ RB_BLACK);
+ rb_set_parent_color(parent, node, RB_RED);
+ augment_rotate(parent, node);
+ parent = node;
+ tmp = node->rb_left;
+ }
+
+ /* Case 3 - left rotate at gparent */
+ gparent->rb_right = tmp; /* == parent->rb_left */
+ parent->rb_left = gparent;
+ if (tmp)
+ rb_set_parent_color(tmp, gparent, RB_BLACK);
+ __rb_rotate_set_parents(gparent, parent, root, RB_RED);
+ augment_rotate(gparent, parent);
+ break;
+ }
+ }
+}
+
+/*
+ * Inline version for rb_erase() use - we want to be able to inline
+ * and eliminate the dummy_rotate callback there
+ */
+static __always_inline void
+____rb_erase_color(struct rb_node *parent, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+ struct rb_node *node = NULL, *sibling, *tmp1, *tmp2;
+
+ while (true) {
+ /*
+ * Loop invariants:
+ * - node is black (or NULL on first iteration)
+ * - node is not the root (parent is not NULL)
+ * - All leaf paths going through parent and node have a
+ * black node count that is 1 lower than other leaf paths.
+ */
+ sibling = parent->rb_right;
+ if (node != sibling) { /* node == parent->rb_left */
+ if (rb_is_red(sibling)) {
+ /*
+ * Case 1 - left rotate at parent
+ *
+ * P S
+ * / \ / \
+ * N s --> p Sr
+ * / \ / \
+ * Sl Sr N Sl
+ */
+ parent->rb_right = tmp1 = sibling->rb_left;
+ sibling->rb_left = parent;
+ rb_set_parent_color(tmp1, parent, RB_BLACK);
+ __rb_rotate_set_parents(parent, sibling, root,
+ RB_RED);
+ augment_rotate(parent, sibling);
+ sibling = tmp1;
+ }
+ tmp1 = sibling->rb_right;
+ if (!tmp1 || rb_is_black(tmp1)) {
+ tmp2 = sibling->rb_left;
+ if (!tmp2 || rb_is_black(tmp2)) {
+ /*
+ * Case 2 - sibling color flip
+ * (p could be either color here)
+ *
+ * (p) (p)
+ * / \ / \
+ * N S --> N s
+ * / \ / \
+ * Sl Sr Sl Sr
+ *
+ * This leaves us violating 5) which
+ * can be fixed by flipping p to black
+ * if it was red, or by recursing at p.
+ * p is red when coming from Case 1.
+ */
+ rb_set_parent_color(sibling, parent,
+ RB_RED);
+ if (rb_is_red(parent))
+ rb_set_black(parent);
+ else {
+ node = parent;
+ parent = rb_parent(node);
+ if (parent)
+ continue;
+ }
+ break;
+ }
+ /*
+ * Case 3 - right rotate at sibling
+ * (p could be either color here)
+ *
+ * (p) (p)
+ * / \ / \
+ * N S --> N Sl
+ * / \ \
+ * sl Sr s
+ * \
+ * Sr
+ */
+ sibling->rb_left = tmp1 = tmp2->rb_right;
+ tmp2->rb_right = sibling;
+ parent->rb_right = tmp2;
+ if (tmp1)
+ rb_set_parent_color(tmp1, sibling,
+ RB_BLACK);
+ augment_rotate(sibling, tmp2);
+ tmp1 = sibling;
+ sibling = tmp2;
+ }
+ /*
+ * Case 4 - left rotate at parent + color flips
+ * (p and sl could be either color here.
+ * After rotation, p becomes black, s acquires
+ * p's color, and sl keeps its color)
+ *
+ * (p) (s)
+ * / \ / \
+ * N S --> P Sr
+ * / \ / \
+ * (sl) sr N (sl)
+ */
+ parent->rb_right = tmp2 = sibling->rb_left;
+ sibling->rb_left = parent;
+ rb_set_parent_color(tmp1, sibling, RB_BLACK);
+ if (tmp2)
+ rb_set_parent(tmp2, parent);
+ __rb_rotate_set_parents(parent, sibling, root,
+ RB_BLACK);
+ augment_rotate(parent, sibling);
+ break;
+ } else {
+ sibling = parent->rb_left;
+ if (rb_is_red(sibling)) {
+ /* Case 1 - right rotate at parent */
+ parent->rb_left = tmp1 = sibling->rb_right;
+ sibling->rb_right = parent;
+ rb_set_parent_color(tmp1, parent, RB_BLACK);
+ __rb_rotate_set_parents(parent, sibling, root,
+ RB_RED);
+ augment_rotate(parent, sibling);
+ sibling = tmp1;
+ }
+ tmp1 = sibling->rb_left;
+ if (!tmp1 || rb_is_black(tmp1)) {
+ tmp2 = sibling->rb_right;
+ if (!tmp2 || rb_is_black(tmp2)) {
+ /* Case 2 - sibling color flip */
+ rb_set_parent_color(sibling, parent,
+ RB_RED);
+ if (rb_is_red(parent))
+ rb_set_black(parent);
+ else {
+ node = parent;
+ parent = rb_parent(node);
+ if (parent)
+ continue;
+ }
+ break;
+ }
+ /* Case 3 - right rotate at sibling */
+ sibling->rb_right = tmp1 = tmp2->rb_left;
+ tmp2->rb_left = sibling;
+ parent->rb_left = tmp2;
+ if (tmp1)
+ rb_set_parent_color(tmp1, sibling,
+ RB_BLACK);
+ augment_rotate(sibling, tmp2);
+ tmp1 = sibling;
+ sibling = tmp2;
+ }
+ /* Case 4 - left rotate at parent + color flips */
+ parent->rb_left = tmp2 = sibling->rb_right;
+ sibling->rb_right = parent;
+ rb_set_parent_color(tmp1, sibling, RB_BLACK);
+ if (tmp2)
+ rb_set_parent(tmp2, parent);
+ __rb_rotate_set_parents(parent, sibling, root,
+ RB_BLACK);
+ augment_rotate(parent, sibling);
+ break;
+ }
+ }
+}
+
+/* Non-inline version for rb_erase_augmented() use */
+void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+ ____rb_erase_color(parent, root, augment_rotate);
+}
+
+/*
+ * Non-augmented rbtree manipulation functions.
+ *
+ * We use dummy augmented callbacks here, and have the compiler optimize them
+ * out of the rb_insert_color() and rb_erase() function definitions.
+ */
+
+static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {}
+static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {}
+static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {}
+
+static const struct rb_augment_callbacks dummy_callbacks = {
+ dummy_propagate, dummy_copy, dummy_rotate
+};
+
+void rb_insert_color(struct rb_node *node, struct rb_root *root)
+{
+ __rb_insert(node, root, dummy_rotate);
+}
+
+void rb_erase(struct rb_node *node, struct rb_root *root)
+{
+ struct rb_node *rebalance;
+ rebalance = __rb_erase_augmented(node, root, &dummy_callbacks);
+ if (rebalance)
+ ____rb_erase_color(rebalance, root, dummy_rotate);
+}
+
+/*
+ * Augmented rbtree manipulation functions.
+ *
+ * This instantiates the same __always_inline functions as in the non-augmented
+ * case, but this time with user-defined callbacks.
+ */
+
+void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+ __rb_insert(node, root, augment_rotate);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+struct rb_node *rb_first(const struct rb_root *root)
+{
+ struct rb_node *n;
+
+ n = root->rb_node;
+ if (!n)
+ return NULL;
+ while (n->rb_left)
+ n = n->rb_left;
+ return n;
+}
+
+struct rb_node *rb_last(const struct rb_root *root)
+{
+ struct rb_node *n;
+
+ n = root->rb_node;
+ if (!n)
+ return NULL;
+ while (n->rb_right)
+ n = n->rb_right;
+ return n;
+}
+
+struct rb_node *rb_next(const struct rb_node *node)
+{
+ struct rb_node *parent;
+
+ if (RB_EMPTY_NODE(node))
+ return NULL;
+
+ /*
+ * If we have a right-hand child, go down and then left as far
+ * as we can.
+ */
+ if (node->rb_right) {
+ node = node->rb_right;
+ while (node->rb_left)
+ node=node->rb_left;
+ return (struct rb_node *)node;
+ }
+
+ /*
+ * No right-hand children. Everything down and left is smaller than us,
+ * so any 'next' node must be in the general direction of our parent.
+ * Go up the tree; any time the ancestor is a right-hand child of its
+ * parent, keep going up. First time it's a left-hand child of its
+ * parent, said parent is our 'next' node.
+ */
+ while ((parent = rb_parent(node)) && node == parent->rb_right)
+ node = parent;
+
+ return parent;
+}
+
+struct rb_node *rb_prev(const struct rb_node *node)
+{
+ struct rb_node *parent;
+
+ if (RB_EMPTY_NODE(node))
+ return NULL;
+
+ /*
+ * If we have a left-hand child, go down and then right as far
+ * as we can.
+ */
+ if (node->rb_left) {
+ node = node->rb_left;
+ while (node->rb_right)
+ node=node->rb_right;
+ return (struct rb_node *)node;
+ }
+
+ /*
+ * No left-hand children. Go up till we find an ancestor which
+ * is a right-hand child of its parent.
+ */
+ while ((parent = rb_parent(node)) && node == parent->rb_left)
+ node = parent;
+
+ return parent;
+}
+
+void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+ struct rb_root *root)
+{
+ struct rb_node *parent = rb_parent(victim);
+
+ /* Set the surrounding nodes to point to the replacement */
+ __rb_change_child(victim, new, parent, root);
+ if (victim->rb_left)
+ rb_set_parent(victim->rb_left, new);
+ if (victim->rb_right)
+ rb_set_parent(victim->rb_right, new);
+
+ /* Copy the pointers/colour from the victim to the replacement */
+ *new = *victim;
+}
+
+static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
+{
+ for (;;) {
+ if (node->rb_left)
+ node = node->rb_left;
+ else if (node->rb_right)
+ node = node->rb_right;
+ else
+ return (struct rb_node *)node;
+ }
+}
+
+struct rb_node *rb_next_postorder(const struct rb_node *node)
+{
+ const struct rb_node *parent;
+ if (!node)
+ return NULL;
+ parent = rb_parent(node);
+
+ /* If we're sitting on node, we've already seen our children */
+ if (parent && node == parent->rb_left && parent->rb_right) {
+ /* If we are the parent's left node, go to the parent's right
+ * node then all the way down to the left */
+ return rb_left_deepest_node(parent->rb_right);
+ } else
+ /* Otherwise we are the parent's right node, and the parent
+ * should be next */
+ return (struct rb_node *)parent;
+}
+
+struct rb_node *rb_first_postorder(const struct rb_root *root)
+{
+ if (!root->rb_node)
+ return NULL;
+
+ return rb_left_deepest_node(root->rb_node);
+}
diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore
index 35f56be..3c60335 100644
--- a/tools/lib/traceevent/.gitignore
+++ b/tools/lib/traceevent/.gitignore
@@ -1 +1,2 @@
TRACEEVENT-CFLAGS
+libtraceevent-dynamic-list
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index d410da3..7851df1 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -23,6 +23,7 @@ endef
# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
$(call allow-override,CC,$(CROSS_COMPILE)gcc)
$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,NM,$(CROSS_COMPILE)nm)
EXT = -std=gnu99
INSTALL = install
@@ -34,9 +35,15 @@ INSTALL = install
DESTDIR ?=
DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
+ifeq ($(LP64), 1)
+ libdir_relative = lib64
+else
+ libdir_relative = lib
+endif
+
prefix ?= /usr/local
-bindir_relative = bin
-bindir = $(prefix)/$(bindir_relative)
+libdir = $(prefix)/$(libdir_relative)
man_dir = $(prefix)/share/man
man_dir_SQ = '$(subst ','\'',$(man_dir))'
@@ -58,7 +65,7 @@ ifeq ($(prefix),$(HOME))
override plugin_dir = $(HOME)/.traceevent/plugins
set_plugin_dir := 0
else
-override plugin_dir = $(prefix)/lib/traceevent/plugins
+override plugin_dir = $(libdir)/traceevent/plugins
endif
endif
@@ -85,11 +92,11 @@ srctree := $(patsubst %/,%,$(dir $(srctree)))
#$(info Determined 'srctree' to be $(srctree))
endif
-export prefix bindir src obj
+export prefix libdir src obj
# Shell quotes
-bindir_SQ = $(subst ','\'',$(bindir))
-bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
+libdir_SQ = $(subst ','\'',$(libdir))
+libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
LIB_FILE = libtraceevent.a libtraceevent.so
@@ -151,8 +158,9 @@ PLUGINS_IN := $(PLUGINS:.so=-in.o)
TE_IN := $(OUTPUT)libtraceevent-in.o
LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
+DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list
-CMD_TARGETS = $(LIB_FILE) $(PLUGINS)
+CMD_TARGETS = $(LIB_FILE) $(PLUGINS) $(DYNAMIC_LIST_FILE)
TARGETS = $(CMD_TARGETS)
@@ -169,6 +177,9 @@ $(OUTPUT)libtraceevent.so: $(TE_IN)
$(OUTPUT)libtraceevent.a: $(TE_IN)
$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
+$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS)
+ $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@)
+
plugins: $(PLUGINS)
__plugin_obj = $(notdir $@)
@@ -238,9 +249,16 @@ define do_install_plugins
done
endef
+define do_generate_dynamic_list_file
+ (echo '{'; \
+ $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u; \
+ echo '};'; \
+ ) > $2
+endef
+
install_lib: all_cmd install_plugins
$(call QUIET_INSTALL, $(LIB_FILE)) \
- $(call do_install,$(LIB_FILE),$(bindir_SQ))
+ $(call do_install,$(LIB_FILE),$(libdir_SQ))
install_plugins: $(PLUGINS)
$(call QUIET_INSTALL, trace_plugins) \
@@ -250,7 +268,7 @@ install: install_lib
clean:
$(call QUIET_CLEAN, libtraceevent) \
- $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d \
+ $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd \
$(RM) TRACEEVENT-CFLAGS tags TAGS
PHONY += force plugins
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index e0917c0..4d88593 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -418,7 +418,7 @@ static int func_map_init(struct pevent *pevent)
}
static struct func_map *
-find_func(struct pevent *pevent, unsigned long long addr)
+__find_func(struct pevent *pevent, unsigned long long addr)
{
struct func_map *func;
struct func_map key;
@@ -434,6 +434,71 @@ find_func(struct pevent *pevent, unsigned long long addr)
return func;
}
+struct func_resolver {
+ pevent_func_resolver_t *func;
+ void *priv;
+ struct func_map map;
+};
+
+/**
+ * pevent_set_function_resolver - set an alternative function resolver
+ * @pevent: handle for the pevent
+ * @resolver: function to be used
+ * @priv: resolver function private state.
+ *
+ * Some tools may have already a way to resolve kernel functions, allow them to
+ * keep using it instead of duplicating all the entries inside
+ * pevent->funclist.
+ */
+int pevent_set_function_resolver(struct pevent *pevent,
+ pevent_func_resolver_t *func, void *priv)
+{
+ struct func_resolver *resolver = malloc(sizeof(*resolver));
+
+ if (resolver == NULL)
+ return -1;
+
+ resolver->func = func;
+ resolver->priv = priv;
+
+ free(pevent->func_resolver);
+ pevent->func_resolver = resolver;
+
+ return 0;
+}
+
+/**
+ * pevent_reset_function_resolver - reset alternative function resolver
+ * @pevent: handle for the pevent
+ *
+ * Stop using whatever alternative resolver was set, use the default
+ * one instead.
+ */
+void pevent_reset_function_resolver(struct pevent *pevent)
+{
+ free(pevent->func_resolver);
+ pevent->func_resolver = NULL;
+}
+
+static struct func_map *
+find_func(struct pevent *pevent, unsigned long long addr)
+{
+ struct func_map *map;
+
+ if (!pevent->func_resolver)
+ return __find_func(pevent, addr);
+
+ map = &pevent->func_resolver->map;
+ map->mod = NULL;
+ map->addr = addr;
+ map->func = pevent->func_resolver->func(pevent->func_resolver->priv,
+ &map->addr, &map->mod);
+ if (map->func == NULL)
+ return NULL;
+
+ return map;
+}
+
/**
* pevent_find_function - find a function by a given address
* @pevent: handle for the pevent
@@ -1387,7 +1452,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
do_warning_event(event, "%s: no type found", __func__);
goto fail;
}
- field->name = last_token;
+ field->name = field->alias = last_token;
if (test_type(type, EVENT_OP))
goto fail;
@@ -1469,7 +1534,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
size_dynamic = type_size(field->name);
free_token(field->name);
strcat(field->type, brackets);
- field->name = token;
+ field->name = field->alias = token;
type = read_token(&token);
} else {
char *new_type;
@@ -1680,6 +1745,9 @@ process_cond(struct event_format *event, struct print_arg *top, char **tok)
type = process_arg(event, left, &token);
again:
+ if (type == EVENT_ERROR)
+ goto out_free;
+
/* Handle other operations in the arguments */
if (type == EVENT_OP && strcmp(token, ":") != 0) {
type = process_op(event, left, &token);
@@ -1939,6 +2007,12 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
goto out_warn_free;
type = process_arg_token(event, right, tok, type);
+ if (type == EVENT_ERROR) {
+ free_arg(right);
+ /* token was freed in process_arg_token() via *tok */
+ token = NULL;
+ goto out_free;
+ }
if (right->type == PRINT_OP &&
get_op_prio(arg->op.op) < get_op_prio(right->op.op)) {
@@ -3865,7 +3939,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
} else if (el_size == 4) {
trace_seq_printf(s, "%u", *(uint32_t *)num);
} else if (el_size == 8) {
- trace_seq_printf(s, "%lu", *(uint64_t *)num);
+ trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num);
} else {
trace_seq_printf(s, "BAD SIZE:%d 0x%x",
el_size, *(uint8_t *)num);
@@ -4754,6 +4828,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
case 'z':
case 'Z':
case '0' ... '9':
+ case '-':
goto cont_process;
case 'p':
if (pevent->long_size == 4)
@@ -6444,6 +6519,8 @@ void pevent_ref(struct pevent *pevent)
void pevent_free_format_field(struct format_field *field)
{
free(field->type);
+ if (field->alias != field->name)
+ free(field->alias);
free(field->name);
free(field);
}
@@ -6562,6 +6639,7 @@ void pevent_free(struct pevent *pevent)
free(pevent->trace_clock);
free(pevent->events);
free(pevent->sort_events);
+ free(pevent->func_resolver);
free(pevent);
}
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 86a5839..204befb 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -191,6 +191,7 @@ struct format_field {
struct event_format *event;
char *type;
char *name;
+ char *alias;
int offset;
int size;
unsigned int arraylen;
@@ -452,6 +453,10 @@ struct cmdline_list;
struct func_map;
struct func_list;
struct event_handler;
+struct func_resolver;
+
+typedef char *(pevent_func_resolver_t)(void *priv,
+ unsigned long long *addrp, char **modp);
struct pevent {
int ref_count;
@@ -480,6 +485,7 @@ struct pevent {
int cmdline_count;
struct func_map *func_map;
+ struct func_resolver *func_resolver;
struct func_list *funclist;
unsigned int func_count;
@@ -610,6 +616,9 @@ enum trace_flag_type {
TRACE_FLAG_SOFTIRQ = 0x10,
};
+int pevent_set_function_resolver(struct pevent *pevent,
+ pevent_func_resolver_t *func, void *priv);
+void pevent_reset_function_resolver(struct pevent *pevent);
int pevent_register_comm(struct pevent *pevent, const char *comm, int pid);
int pevent_register_trace_clock(struct pevent *pevent, const char *trace_clock);
int pevent_register_function(struct pevent *pevent, char *name,
diff --git a/tools/lib/traceevent/plugin_cfg80211.c b/tools/lib/traceevent/plugin_cfg80211.c
index 4592d84..ec57d0c 100644
--- a/tools/lib/traceevent/plugin_cfg80211.c
+++ b/tools/lib/traceevent/plugin_cfg80211.c
@@ -4,6 +4,19 @@
#include <endian.h>
#include "event-parse.h"
+/*
+ * From glibc endian.h, for older systems where it is not present, e.g.: RHEL5,
+ * Fedora6.
+ */
+#ifndef le16toh
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+# define le16toh(x) (x)
+# else
+# define le16toh(x) __bswap_16 (x)
+# endif
+#endif
+
+
static unsigned long long
process___le16_to_cpup(struct trace_seq *s, unsigned long long *args)
{
diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c
index c5baf9c..2cd3d4c 100644
--- a/tools/net/bpf_jit_disasm.c
+++ b/tools/net/bpf_jit_disasm.c
@@ -22,9 +22,14 @@
#include <string.h>
#include <bfd.h>
#include <dis-asm.h>
+#include <regex.h>
+#include <fcntl.h>
#include <sys/klog.h>
#include <sys/types.h>
-#include <regex.h>
+#include <sys/stat.h>
+
+#define CMD_ACTION_SIZE_BUFFER 10
+#define CMD_ACTION_READ_ALL 3
static void get_exec_path(char *tpath, size_t size)
{
@@ -87,20 +92,66 @@ static void get_asm_insns(uint8_t *image, size_t len, int opcodes)
bfd_close(bfdf);
}
-static char *get_klog_buff(int *klen)
+static char *get_klog_buff(unsigned int *klen)
{
- int ret, len = klogctl(10, NULL, 0);
- char *buff = malloc(len);
+ int ret, len;
+ char *buff;
+
+ len = klogctl(CMD_ACTION_SIZE_BUFFER, NULL, 0);
+ buff = malloc(len);
+ if (!buff)
+ return NULL;
+
+ ret = klogctl(CMD_ACTION_READ_ALL, buff, len);
+ if (ret < 0) {
+ free(buff);
+ return NULL;
+ }
- assert(buff && klen);
- ret = klogctl(3, buff, len);
- assert(ret >= 0);
*klen = ret;
+ return buff;
+}
+static char *get_flog_buff(const char *file, unsigned int *klen)
+{
+ int fd, ret, len;
+ struct stat fi;
+ char *buff;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ return NULL;
+
+ ret = fstat(fd, &fi);
+ if (ret < 0 || !S_ISREG(fi.st_mode))
+ goto out;
+
+ len = fi.st_size + 1;
+ buff = malloc(len);
+ if (!buff)
+ goto out;
+
+ memset(buff, 0, len);
+ ret = read(fd, buff, len - 1);
+ if (ret <= 0)
+ goto out_free;
+
+ close(fd);
+ *klen = ret;
return buff;
+out_free:
+ free(buff);
+out:
+ close(fd);
+ return NULL;
+}
+
+static char *get_log_buff(const char *file, unsigned int *klen)
+{
+ return file ? get_flog_buff(file, klen) : get_klog_buff(klen);
}
-static void put_klog_buff(char *buff)
+static void put_log_buff(char *buff)
{
free(buff);
}
@@ -123,6 +174,8 @@ static int get_last_jit_image(char *haystack, size_t hlen,
assert(ret == 0);
ptr = haystack;
+ memset(pmatch, 0, sizeof(pmatch));
+
while (1) {
ret = regexec(&regex, ptr, 1, pmatch, 0);
if (ret == 0) {
@@ -136,8 +189,10 @@ static int get_last_jit_image(char *haystack, size_t hlen,
ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so);
ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx",
&flen, &proglen, &pass, &base);
- if (ret != 4)
+ if (ret != 4) {
+ regfree(&regex);
return 0;
+ }
tmp = ptr = haystack + off;
while ((ptr = strtok(tmp, "\n")) != NULL && ulen < ilen) {
@@ -167,31 +222,49 @@ static int get_last_jit_image(char *haystack, size_t hlen,
return ulen;
}
+static void usage(void)
+{
+ printf("Usage: bpf_jit_disasm [...]\n");
+ printf(" -o Also display related opcodes (default: off).\n");
+ printf(" -f <file> Read last image dump from file or stdin (default: klog).\n");
+ printf(" -h Display this help.\n");
+}
+
int main(int argc, char **argv)
{
- int len, klen, opcodes = 0;
- char *kbuff;
+ unsigned int len, klen, opt, opcodes = 0;
static uint8_t image[32768];
+ char *kbuff, *file = NULL;
- if (argc > 1) {
- if (!strncmp("-o", argv[argc - 1], 2)) {
+ while ((opt = getopt(argc, argv, "of:")) != -1) {
+ switch (opt) {
+ case 'o':
opcodes = 1;
- } else {
- printf("usage: bpf_jit_disasm [-o: show opcodes]\n");
- exit(0);
+ break;
+ case 'f':
+ file = optarg;
+ break;
+ default:
+ usage();
+ return -1;
}
}
bfd_init();
memset(image, 0, sizeof(image));
- kbuff = get_klog_buff(&klen);
+ kbuff = get_log_buff(file, &klen);
+ if (!kbuff) {
+ fprintf(stderr, "Could not retrieve log buffer!\n");
+ return -1;
+ }
len = get_last_jit_image(kbuff, klen, image, sizeof(image));
if (len > 0)
get_asm_insns(image, len, opcodes);
+ else
+ fprintf(stderr, "No JIT image found!\n");
- put_klog_buff(kbuff);
-
+ put_log_buff(kbuff);
return 0;
}
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 812f904..3d1bb80 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -28,3 +28,5 @@ config.mak.autogen
*-flex.*
*.pyc
*.pyo
+.config-detected
+util/intel-pt-decoder/inat-tables.c
diff --git a/tools/perf/Build b/tools/perf/Build
index b77370e..7223745 100644
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -35,6 +35,7 @@ paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))"
CFLAGS_builtin-help.o += $(paths)
CFLAGS_builtin-timechart.o += $(paths)
CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" -include $(OUTPUT)PERF-VERSION-FILE
+CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_SQ))"
libperf-y += util/
libperf-y += arch/
diff --git a/tools/perf/Documentation/callchain-overhead-calculation.txt b/tools/perf/Documentation/callchain-overhead-calculation.txt
new file mode 100644
index 0000000..1a75792
--- /dev/null
+++ b/tools/perf/Documentation/callchain-overhead-calculation.txt
@@ -0,0 +1,108 @@
+Overhead calculation
+--------------------
+The overhead can be shown in two columns as 'Children' and 'Self' when
+perf collects callchains. The 'self' overhead is simply calculated by
+adding all period values of the entry - usually a function (symbol).
+This is the value that perf shows traditionally and sum of all the
+'self' overhead values should be 100%.
+
+The 'children' overhead is calculated by adding all period values of
+the child functions so that it can show the total overhead of the
+higher level functions even if they don't directly execute much.
+'Children' here means functions that are called from another (parent)
+function.
+
+It might be confusing that the sum of all the 'children' overhead
+values exceeds 100% since each of them is already an accumulation of
+'self' overhead of its child functions. But with this enabled, users
+can find which function has the most overhead even if samples are
+spread over the children.
+
+Consider the following example; there are three functions like below.
+
+-----------------------
+void foo(void) {
+ /* do something */
+}
+
+void bar(void) {
+ /* do something */
+ foo();
+}
+
+int main(void) {
+ bar()
+ return 0;
+}
+-----------------------
+
+In this case 'foo' is a child of 'bar', and 'bar' is an immediate
+child of 'main' so 'foo' also is a child of 'main'. In other words,
+'main' is a parent of 'foo' and 'bar', and 'bar' is a parent of 'foo'.
+
+Suppose all samples are recorded in 'foo' and 'bar' only. When it's
+recorded with callchains the output will show something like below
+in the usual (self-overhead-only) output of perf report:
+
+----------------------------------
+Overhead Symbol
+........ .....................
+ 60.00% foo
+ |
+ --- foo
+ bar
+ main
+ __libc_start_main
+
+ 40.00% bar
+ |
+ --- bar
+ main
+ __libc_start_main
+----------------------------------
+
+When the --children option is enabled, the 'self' overhead values of
+child functions (i.e. 'foo' and 'bar') are added to the parents to
+calculate the 'children' overhead. In this case the report could be
+displayed as:
+
+-------------------------------------------
+Children Self Symbol
+........ ........ ....................
+ 100.00% 0.00% __libc_start_main
+ |
+ --- __libc_start_main
+
+ 100.00% 0.00% main
+ |
+ --- main
+ __libc_start_main
+
+ 100.00% 40.00% bar
+ |
+ --- bar
+ main
+ __libc_start_main
+
+ 60.00% 60.00% foo
+ |
+ --- foo
+ bar
+ main
+ __libc_start_main
+-------------------------------------------
+
+In the above output, the 'self' overhead of 'foo' (60%) was add to the
+'children' overhead of 'bar', 'main' and '\_\_libc_start_main'.
+Likewise, the 'self' overhead of 'bar' (40%) was added to the
+'children' overhead of 'main' and '\_\_libc_start_main'.
+
+So '\_\_libc_start_main' and 'main' are shown first since they have
+same (100%) 'children' overhead (even though they have zero 'self'
+overhead) and they are the parents of 'foo' and 'bar'.
+
+Since v3.16 the 'children' overhead is shown by default and the output
+is sorted by its values. The 'children' overhead is disabled by
+specifying --no-children option on the command line or by adding
+'report.children = false' or 'top.children = false' in the perf config
+file.
diff --git a/tools/perf/Documentation/intel-bts.txt b/tools/perf/Documentation/intel-bts.txt
new file mode 100644
index 0000000..8bdc93b
--- /dev/null
+++ b/tools/perf/Documentation/intel-bts.txt
@@ -0,0 +1,86 @@
+Intel Branch Trace Store
+========================
+
+Overview
+========
+
+Intel BTS could be regarded as a predecessor to Intel PT and has some
+similarities because it can also identify every branch a program takes. A
+notable difference is that Intel BTS has no timing information and as a
+consequence the present implementation is limited to per-thread recording.
+
+While decoding Intel BTS does not require walking the object code, the object
+code is still needed to pair up calls and returns correctly, consequently much
+of the Intel PT documentation applies also to Intel BTS. Refer to the Intel PT
+documentation and consider that the PMU 'intel_bts' can usually be used in
+place of 'intel_pt' in the examples provided, with the proviso that per-thread
+recording must also be stipulated i.e. the --per-thread option for
+'perf record'.
+
+
+perf record
+===========
+
+new event
+---------
+
+The Intel BTS kernel driver creates a new PMU for Intel BTS. The perf record
+option is:
+
+ -e intel_bts//
+
+Currently Intel BTS is limited to per-thread tracing so the --per-thread option
+is also needed.
+
+
+snapshot option
+---------------
+
+The snapshot option is the same as Intel PT (refer Intel PT documentation).
+
+
+auxtrace mmap size option
+-----------------------
+
+The mmap size option is the same as Intel PT (refer Intel PT documentation).
+
+
+perf script
+===========
+
+By default, perf script will decode trace data found in the perf.data file.
+This can be further controlled by option --itrace. The --itrace option is
+the same as Intel PT (refer Intel PT documentation) except that neither
+"instructions" events nor "transactions" events (and consequently call
+chains) are supported.
+
+To disable trace decoding entirely, use the option --no-itrace.
+
+
+dump option
+-----------
+
+perf script has an option (-D) to "dump" the events i.e. display the binary
+data.
+
+When -D is used, Intel BTS packets are displayed.
+
+To disable the display of Intel BTS packets, combine the -D option with
+--no-itrace.
+
+
+perf report
+===========
+
+By default, perf report will decode trace data found in the perf.data file.
+This can be further controlled by new option --itrace exactly the same as
+perf script.
+
+
+perf inject
+===========
+
+perf inject also accepts the --itrace option in which case tracing data is
+removed and replaced with the synthesized events. e.g.
+
+ perf inject --itrace -i perf.data -o perf.data.new
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
new file mode 100644
index 0000000..4a0501d
--- /dev/null
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -0,0 +1,766 @@
+Intel Processor Trace
+=====================
+
+Overview
+========
+
+Intel Processor Trace (Intel PT) is an extension of Intel Architecture that
+collects information about software execution such as control flow, execution
+modes and timings and formats it into highly compressed binary packets.
+Technical details are documented in the Intel 64 and IA-32 Architectures
+Software Developer Manuals, Chapter 36 Intel Processor Trace.
+
+Intel PT is first supported in Intel Core M and 5th generation Intel Core
+processors that are based on the Intel micro-architecture code name Broadwell.
+
+Trace data is collected by 'perf record' and stored within the perf.data file.
+See below for options to 'perf record'.
+
+Trace data must be 'decoded' which involves walking the object code and matching
+the trace data packets. For example a TNT packet only tells whether a
+conditional branch was taken or not taken, so to make use of that packet the
+decoder must know precisely which instruction was being executed.
+
+Decoding is done on-the-fly. The decoder outputs samples in the same format as
+samples output by perf hardware events, for example as though the "instructions"
+or "branches" events had been recorded. Presently 3 tools support this:
+'perf script', 'perf report' and 'perf inject'. See below for more information
+on using those tools.
+
+The main distinguishing feature of Intel PT is that the decoder can determine
+the exact flow of software execution. Intel PT can be used to understand why
+and how did software get to a certain point, or behave a certain way. The
+software does not have to be recompiled, so Intel PT works with debug or release
+builds, however the executed images are needed - which makes use in JIT-compiled
+environments, or with self-modified code, a challenge. Also symbols need to be
+provided to make sense of addresses.
+
+A limitation of Intel PT is that it produces huge amounts of trace data
+(hundreds of megabytes per second per core) which takes a long time to decode,
+for example two or three orders of magnitude longer than it took to collect.
+Another limitation is the performance impact of tracing, something that will
+vary depending on the use-case and architecture.
+
+
+Quickstart
+==========
+
+It is important to start small. That is because it is easy to capture vastly
+more data than can possibly be processed.
+
+The simplest thing to do with Intel PT is userspace profiling of small programs.
+Data is captured with 'perf record' e.g. to trace 'ls' userspace-only:
+
+ perf record -e intel_pt//u ls
+
+And profiled with 'perf report' e.g.
+
+ perf report
+
+To also trace kernel space presents a problem, namely kernel self-modifying
+code. A fairly good kernel image is available in /proc/kcore but to get an
+accurate image a copy of /proc/kcore needs to be made under the same conditions
+as the data capture. A script perf-with-kcore can do that, but beware that the
+script makes use of 'sudo' to copy /proc/kcore. If you have perf installed
+locally from the source tree you can do:
+
+ ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls
+
+which will create a directory named 'pt_ls' and put the perf.data file and
+copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use
+'perf report' becomes:
+
+ ~/libexec/perf-core/perf-with-kcore report pt_ls
+
+Because samples are synthesized after-the-fact, the sampling period can be
+selected for reporting. e.g. sample every microsecond
+
+ ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge
+
+See the sections below for more information about the --itrace option.
+
+Beware the smaller the period, the more samples that are produced, and the
+longer it takes to process them.
+
+Also note that the coarseness of Intel PT timing information will start to
+distort the statistical value of the sampling as the sampling period becomes
+smaller.
+
+To represent software control flow, "branches" samples are produced. By default
+a branch sample is synthesized for every single branch. To get an idea what
+data is available you can use the 'perf script' tool with no parameters, which
+will list all the samples.
+
+ perf record -e intel_pt//u ls
+ perf script
+
+An interesting field that is not printed by default is 'flags' which can be
+displayed as follows:
+
+ perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags
+
+The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
+system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
+in transaction, respectively.
+
+While it is possible to create scripts to analyze the data, an alternative
+approach is available to export the data to a postgresql database. Refer to
+script export-to-postgresql.py for more details, and to script
+call-graph-from-postgresql.py for an example of using the database.
+
+As mentioned above, it is easy to capture too much data. One way to limit the
+data captured is to use 'snapshot' mode which is explained further below.
+Refer to 'new snapshot option' and 'Intel PT modes of operation' further below.
+
+Another problem that will be experienced is decoder errors. They can be caused
+by inability to access the executed image, self-modified or JIT-ed code, or the
+inability to match side-band information (such as context switches and mmaps)
+which results in the decoder not knowing what code was executed.
+
+There is also the problem of perf not being able to copy the data fast enough,
+resulting in data lost because the buffer was full. See 'Buffer handling' below
+for more details.
+
+
+perf record
+===========
+
+new event
+---------
+
+The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are
+selected by providing the PMU name followed by the "config" separated by slashes.
+An enhancement has been made to allow default "config" e.g. the option
+
+ -e intel_pt//
+
+will use a default config value. Currently that is the same as
+
+ -e intel_pt/tsc,noretcomp=0/
+
+which is the same as
+
+ -e intel_pt/tsc=1,noretcomp=0/
+
+Note there are now new config terms - see section 'config terms' further below.
+
+The config terms are listed in /sys/devices/intel_pt/format. They are bit
+fields within the config member of the struct perf_event_attr which is
+passed to the kernel by the perf_event_open system call. They correspond to bit
+fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions:
+
+ $ grep -H . /sys/bus/event_source/devices/intel_pt/format/*
+ /sys/bus/event_source/devices/intel_pt/format/cyc:config:1
+ /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22
+ /sys/bus/event_source/devices/intel_pt/format/mtc:config:9
+ /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17
+ /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11
+ /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27
+ /sys/bus/event_source/devices/intel_pt/format/tsc:config:10
+
+Note that the default config must be overridden for each term i.e.
+
+ -e intel_pt/noretcomp=0/
+
+is the same as:
+
+ -e intel_pt/tsc=1,noretcomp=0/
+
+So, to disable TSC packets use:
+
+ -e intel_pt/tsc=0/
+
+It is also possible to specify the config value explicitly:
+
+ -e intel_pt/config=0x400/
+
+Note that, as with all events, the event is suffixed with event modifiers:
+
+ u userspace
+ k kernel
+ h hypervisor
+ G guest
+ H host
+ p precise ip
+
+'h', 'G' and 'H' are for virtualization which is not supported by Intel PT.
+'p' is also not relevant to Intel PT. So only options 'u' and 'k' are
+meaningful for Intel PT.
+
+perf_event_attr is displayed if the -vv option is used e.g.
+
+ ------------------------------------------------------------
+ perf_event_attr:
+ type 6
+ size 112
+ config 0x400
+ { sample_period, sample_freq } 1
+ sample_type IP|TID|TIME|CPU|IDENTIFIER
+ read_format ID
+ disabled 1
+ inherit 1
+ exclude_kernel 1
+ exclude_hv 1
+ enable_on_exec 1
+ sample_id_all 1
+ ------------------------------------------------------------
+ sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
+ ------------------------------------------------------------
+
+
+config terms
+------------
+
+The June 2015 version of Intel 64 and IA-32 Architectures Software Developer
+Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features.
+Some of the features are reflect in new config terms. All the config terms are
+described below.
+
+tsc Always supported. Produces TSC timestamp packets to provide
+ timing information. In some cases it is possible to decode
+ without timing information, for example a per-thread context
+ that does not overlap executable memory maps.
+
+ The default config selects tsc (i.e. tsc=1).
+
+noretcomp Always supported. Disables "return compression" so a TIP packet
+ is produced when a function returns. Causes more packets to be
+ produced but might make decoding more reliable.
+
+ The default config does not select noretcomp (i.e. noretcomp=0).
+
+psb_period Allows the frequency of PSB packets to be specified.
+
+ The PSB packet is a synchronization packet that provides a
+ starting point for decoding or recovery from errors.
+
+ Support for psb_period is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/psb_cyc
+
+ which contains "1" if the feature is supported and "0"
+ otherwise.
+
+ Valid values are given by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/psb_periods
+
+ which contains a hexadecimal value, the bits of which represent
+ valid values e.g. bit 2 set means value 2 is valid.
+
+ The psb_period value is converted to the approximate number of
+ trace bytes between PSB packets as:
+
+ 2 ^ (value + 11)
+
+ e.g. value 3 means 16KiB bytes between PSBs
+
+ If an invalid value is entered, the error message
+ will give a list of valid values e.g.
+
+ $ perf record -e intel_pt/psb_period=15/u uname
+ Invalid psb_period for intel_pt. Valid values are: 0-5
+
+ If MTC packets are selected, the default config selects a value
+ of 3 (i.e. psb_period=3) or the nearest lower value that is
+ supported (0 is always supported). Otherwise the default is 0.
+
+ If decoding is expected to be reliable and the buffer is large
+ then a large PSB period can be used.
+
+ Because a TSC packet is produced with PSB, the PSB period can
+ also affect the granularity to timing information in the absence
+ of MTC or CYC.
+
+mtc Produces MTC timing packets.
+
+ MTC packets provide finer grain timestamp information than TSC
+ packets. MTC packets record time using the hardware crystal
+ clock (CTC) which is related to TSC packets using a TMA packet.
+
+ Support for this feature is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/mtc
+
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
+ The frequency of MTC packets can also be specified - see
+ mtc_period below.
+
+mtc_period Specifies how frequently MTC packets are produced - see mtc
+ above for how to determine if MTC packets are supported.
+
+ Valid values are given by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/mtc_periods
+
+ which contains a hexadecimal value, the bits of which represent
+ valid values e.g. bit 2 set means value 2 is valid.
+
+ The mtc_period value is converted to the MTC frequency as:
+
+ CTC-frequency / (2 ^ value)
+
+ e.g. value 3 means one eighth of CTC-frequency
+
+ Where CTC is the hardware crystal clock, the frequency of which
+ can be related to TSC via values provided in cpuid leaf 0x15.
+
+ If an invalid value is entered, the error message
+ will give a list of valid values e.g.
+
+ $ perf record -e intel_pt/mtc_period=15/u uname
+ Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9
+
+ The default value is 3 or the nearest lower value
+ that is supported (0 is always supported).
+
+cyc Produces CYC timing packets.
+
+ CYC packets provide even finer grain timestamp information than
+ MTC and TSC packets. A CYC packet contains the number of CPU
+ cycles since the last CYC packet. Unlike MTC and TSC packets,
+ CYC packets are only sent when another packet is also sent.
+
+ Support for this feature is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/psb_cyc
+
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
+ The number of CYC packets produced can be reduced by specifying
+ a threshold - see cyc_thresh below.
+
+cyc_thresh Specifies how frequently CYC packets are produced - see cyc
+ above for how to determine if CYC packets are supported.
+
+ Valid cyc_thresh values are given by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds
+
+ which contains a hexadecimal value, the bits of which represent
+ valid values e.g. bit 2 set means value 2 is valid.
+
+ The cyc_thresh value represents the minimum number of CPU cycles
+ that must have passed before a CYC packet can be sent. The
+ number of CPU cycles is:
+
+ 2 ^ (value - 1)
+
+ e.g. value 4 means 8 CPU cycles must pass before a CYC packet
+ can be sent. Note a CYC packet is still only sent when another
+ packet is sent, not at, e.g. every 8 CPU cycles.
+
+ If an invalid value is entered, the error message
+ will give a list of valid values e.g.
+
+ $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname
+ Invalid cyc_thresh for intel_pt. Valid values are: 0-12
+
+ CYC packets are not requested by default.
+
+no_force_psb This is a driver option and is not in the IA32_RTIT_CTL MSR.
+
+ It stops the driver resetting the byte count to zero whenever
+ enabling the trace (for example on context switches) which in
+ turn results in no PSB being forced. However some processors
+ will produce a PSB anyway.
+
+ In any case, there is still a PSB when the trace is enabled for
+ the first time.
+
+ no_force_psb can be used to slightly decrease the trace size but
+ may make it harder for the decoder to recover from errors.
+
+ no_force_psb is not selected by default.
+
+
+new snapshot option
+-------------------
+
+The difference between full trace and snapshot from the kernel's perspective is
+that in full trace we don't overwrite trace data that the user hasn't collected
+yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let
+the trace run and overwrite older data in the buffer so that whenever something
+interesting happens, we can stop it and grab a snapshot of what was going on
+around that interesting moment.
+
+To select snapshot mode a new option has been added:
+
+ -S
+
+Optionally it can be followed by the snapshot size e.g.
+
+ -S0x100000
+
+The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size
+nor snapshot size is specified, then the default is 4MiB for privileged users
+(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
+If an unprivileged user does not specify mmap pages, the mmap pages will be
+reduced as described in the 'new auxtrace mmap size option' section below.
+
+The snapshot size is displayed if the option -vv is used e.g.
+
+ Intel PT snapshot size: %zu
+
+
+new auxtrace mmap size option
+---------------------------
+
+Intel PT buffer size is specified by an addition to the -m option e.g.
+
+ -m,16
+
+selects a buffer size of 16 pages i.e. 64KiB.
+
+Note that the existing functionality of -m is unchanged. The auxtrace mmap size
+is specified by the optional addition of a comma and the value.
+
+The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users
+(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
+If an unprivileged user does not specify mmap pages, the mmap pages will be
+reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
+user is likely to get an error as they exceed their mlock limit (Max locked
+memory as shown in /proc/self/limits). Note that perf does not count the first
+512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu
+against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus
+their mlock limit (which defaults to 64KiB but is not multiplied by the number
+of cpus).
+
+In full-trace mode, powers of two are allowed for buffer size, with a minimum
+size of 2 pages. In snapshot mode, it is the same but the minimum size is
+1 page.
+
+The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g.
+
+ mmap length 528384
+ auxtrace mmap length 4198400
+
+
+Intel PT modes of operation
+---------------------------
+
+Intel PT can be used in 2 modes:
+ full-trace mode
+ snapshot mode
+
+Full-trace mode traces continuously e.g.
+
+ perf record -e intel_pt//u uname
+
+Snapshot mode captures the available data when a signal is sent e.g.
+
+ perf record -v -e intel_pt//u -S ./loopy 1000000000 &
+ [1] 11435
+ kill -USR2 11435
+ Recording AUX area tracing snapshot
+
+Note that the signal sent is SIGUSR2.
+Note that "Recording AUX area tracing snapshot" is displayed because the -v
+option is used.
+
+The 2 modes cannot be used together.
+
+
+Buffer handling
+---------------
+
+There may be buffer limitations (i.e. single ToPa entry) which means that actual
+buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to
+provide other sizes, and in particular an arbitrarily large size, multiple
+buffers are logically concatenated. However an interrupt must be used to switch
+between buffers. That has two potential problems:
+ a) the interrupt may not be handled in time so that the current buffer
+ becomes full and some trace data is lost.
+ b) the interrupts may slow the system and affect the performance
+ results.
+
+If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event
+which the tools report as an error.
+
+In full-trace mode, the driver waits for data to be copied out before allowing
+the (logical) buffer to wrap-around. If data is not copied out quickly enough,
+again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to
+wait, the intel_pt event gets disabled. Because it is difficult to know when
+that happens, perf tools always re-enable the intel_pt event after copying out
+data.
+
+
+Intel PT and build ids
+----------------------
+
+By default "perf record" post-processes the event stream to find all build ids
+for executables for all addresses sampled. Deliberately, Intel PT is not
+decoded for that purpose (it would take too long). Instead the build ids for
+all executables encountered (due to mmap, comm or task events) are included
+in the perf.data file.
+
+To see buildids included in the perf.data file use the command:
+
+ perf buildid-list
+
+If the perf.data file contains Intel PT data, that is the same as:
+
+ perf buildid-list --with-hits
+
+
+Snapshot mode and event disabling
+---------------------------------
+
+In order to make a snapshot, the intel_pt event is disabled using an IOCTL,
+namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the
+collection of side-band information. In order to prevent that, a dummy
+software event has been introduced that permits tracking events (like mmaps) to
+continue to be recorded while intel_pt is disabled. That is important to ensure
+there is complete side-band information to allow the decoding of subsequent
+snapshots.
+
+A test has been created for that. To find the test:
+
+ perf test list
+ ...
+ 23: Test using a dummy software event to keep tracking
+
+To run the test:
+
+ perf test 23
+ 23: Test using a dummy software event to keep tracking : Ok
+
+
+perf record modes (nothing new here)
+------------------------------------
+
+perf record essentially operates in one of three modes:
+ per thread
+ per cpu
+ workload only
+
+"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a
+workload).
+"per cpu" is selected by -C or -a.
+"workload only" mode is selected by not using the other options but providing a
+command to run (i.e. the workload).
+
+In per-thread mode an exact list of threads is traced. There is no inheritance.
+Each thread has its own event buffer.
+
+In per-cpu mode all processes (or processes from the selected cgroup i.e. -G
+option, or processes selected with -p or -u) are traced. Each cpu has its own
+buffer. Inheritance is allowed.
+
+In workload-only mode, the workload is traced but with per-cpu buffers.
+Inheritance is allowed. Note that you can now trace a workload in per-thread
+mode by using the --per-thread option.
+
+
+Privileged vs non-privileged users
+----------------------------------
+
+Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users
+have memory limits imposed upon them. That affects what buffer sizes they can
+have as outlined above.
+
+Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
+not permitted to use tracepoints which means there is insufficient side-band
+information to decode Intel PT in per-cpu mode, and potentially workload-only
+mode too if the workload creates new processes.
+
+Note also, that to use tracepoints, read-access to debugfs is required. So if
+debugfs is not mounted or the user does not have read-access, it will again not
+be possible to decode Intel PT in per-cpu mode.
+
+
+sched_switch tracepoint
+-----------------------
+
+The sched_switch tracepoint is used to provide side-band data for Intel PT
+decoding. sched_switch events are automatically added. e.g. the second event
+shown below
+
+ $ perf record -vv -e intel_pt//u uname
+ ------------------------------------------------------------
+ perf_event_attr:
+ type 6
+ size 112
+ config 0x400
+ { sample_period, sample_freq } 1
+ sample_type IP|TID|TIME|CPU|IDENTIFIER
+ read_format ID
+ disabled 1
+ inherit 1
+ exclude_kernel 1
+ exclude_hv 1
+ enable_on_exec 1
+ sample_id_all 1
+ ------------------------------------------------------------
+ sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
+ ------------------------------------------------------------
+ perf_event_attr:
+ type 2
+ size 112
+ config 0x108
+ { sample_period, sample_freq } 1
+ sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER
+ read_format ID
+ inherit 1
+ sample_id_all 1
+ exclude_guest 1
+ ------------------------------------------------------------
+ sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8
+ sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8
+ sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8
+ sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8
+ ------------------------------------------------------------
+ perf_event_attr:
+ type 1
+ size 112
+ config 0x9
+ { sample_period, sample_freq } 1
+ sample_type IP|TID|TIME|IDENTIFIER
+ read_format ID
+ disabled 1
+ inherit 1
+ exclude_kernel 1
+ exclude_hv 1
+ mmap 1
+ comm 1
+ enable_on_exec 1
+ task 1
+ sample_id_all 1
+ mmap2 1
+ comm_exec 1
+ ------------------------------------------------------------
+ sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
+ mmap size 528384B
+ AUX area mmap length 4194304
+ perf event ring buffer mmapped per cpu
+ Synthesizing auxtrace information
+ Linux
+ [ perf record: Woken up 1 times to write data ]
+ [ perf record: Captured and wrote 0.042 MB perf.data ]
+
+Note, the sched_switch event is only added if the user is permitted to use it
+and only in per-cpu mode.
+
+Note also, the sched_switch event is only added if TSC packets are requested.
+That is because, in the absence of timing information, the sched_switch events
+cannot be matched against the Intel PT trace.
+
+
+perf script
+===========
+
+By default, perf script will decode trace data found in the perf.data file.
+This can be further controlled by new option --itrace.
+
+
+New --itrace option
+-------------------
+
+Having no option is the same as
+
+ --itrace
+
+which, in turn, is the same as
+
+ --itrace=ibxe
+
+The letters are:
+
+ i synthesize "instructions" events
+ b synthesize "branches" events
+ x synthesize "transactions" events
+ c synthesize branches events (calls only)
+ r synthesize branches events (returns only)
+ e synthesize tracing error events
+ d create a debug log
+ g synthesize a call chain (use with i or x)
+
+"Instructions" events look like they were recorded by "perf record -e
+instructions".
+
+"Branches" events look like they were recorded by "perf record -e branches". "c"
+and "r" can be combined to get calls and returns.
+
+"Transactions" events correspond to the start or end of transactions. The
+'flags' field can be used in perf script to determine whether the event is a
+tranasaction start, commit or abort.
+
+Error events are new. They show where the decoder lost the trace. Error events
+are quite important. Users must know if what they are seeing is a complete
+picture or not.
+
+The "d" option will cause the creation of a file "intel_pt.log" containing all
+decoded packets and instructions. Note that this option slows down the decoder
+and that the resulting file may be very large.
+
+In addition, the period of the "instructions" event can be specified. e.g.
+
+ --itrace=i10us
+
+sets the period to 10us i.e. one instruction sample is synthesized for each 10
+microseconds of trace. Alternatives to "us" are "ms" (milliseconds),
+"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions).
+
+"ms", "us" and "ns" are converted to TSC ticks.
+
+The timing information included with Intel PT does not give the time of every
+instruction. Consequently, for the purpose of sampling, the decoder estimates
+the time since the last timing packet based on 1 tick per instruction. The time
+on the sample is *not* adjusted and reflects the last known value of TSC.
+
+For Intel PT, the default period is 100us.
+
+Also the call chain size (default 16, max. 1024) for instructions or
+transactions events can be specified. e.g.
+
+ --itrace=ig32
+ --itrace=xg32
+
+To disable trace decoding entirely, use the option --no-itrace.
+
+
+dump option
+-----------
+
+perf script has an option (-D) to "dump" the events i.e. display the binary
+data.
+
+When -D is used, Intel PT packets are displayed. The packet decoder does not
+pay attention to PSB packets, but just decodes the bytes - so the packets seen
+by the actual decoder may not be identical in places where the data is corrupt.
+One example of that would be when the buffer-switching interrupt has been too
+slow, and the buffer has been filled completely. In that case, the last packet
+in the buffer might be truncated and immediately followed by a PSB as the trace
+continues in the next buffer.
+
+To disable the display of Intel PT packets, combine the -D option with
+--no-itrace.
+
+
+perf report
+===========
+
+By default, perf report will decode trace data found in the perf.data file.
+This can be further controlled by new option --itrace exactly the same as
+perf script, with the exception that the default is --itrace=igxe.
+
+
+perf inject
+===========
+
+perf inject also accepts the --itrace option in which case tracing data is
+removed and replaced with the synthesized events. e.g.
+
+ perf inject --itrace -i perf.data -o perf.data.new
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
new file mode 100644
index 0000000..2ff9466
--- /dev/null
+++ b/tools/perf/Documentation/itrace.txt
@@ -0,0 +1,22 @@
+ i synthesize instructions events
+ b synthesize branches events
+ c synthesize branches events (calls only)
+ r synthesize branches events (returns only)
+ x synthesize transactions events
+ e synthesize error events
+ d create a debug log
+ g synthesize a call chain (use with i or x)
+
+ The default is all events i.e. the same as --itrace=ibxe
+
+ In addition, the period (default 100000) for instructions events
+ can be specified in units of:
+
+ i instructions
+ t ticks
+ ms milliseconds
+ us microseconds
+ ns nanoseconds (default)
+
+ Also the call chain size (default 16, max. 1024) for instructions or
+ transactions events can be specified.
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index f6480cb..ab632d9 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -210,9 +210,16 @@ Suite for evaluating hash tables.
*wake*::
Suite for evaluating wake calls.
+*wake-parallel*::
+Suite for evaluating parallel wake calls.
+
*requeue*::
Suite for evaluating requeue calls.
+*lock-pi*::
+Suite for evaluating futex lock_pi calls.
+
+
SEE ALSO
--------
linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index dc7442c..0c721c3 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -44,6 +44,12 @@ OPTIONS
--kallsyms=<file>::
kallsyms pathname
+--itrace::
+ Decode Instruction Tracing data, replacing it with synthesized events.
+ Options are:
+
+include::itrace.txt[]
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
index 23219c6..ff0f433 100644
--- a/tools/perf/Documentation/perf-kmem.txt
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -37,7 +37,11 @@ OPTIONS
-s <key[,key2...]>::
--sort=<key[,key2...]>::
- Sort the output (default: frag,hit,bytes)
+ Sort the output (default: 'frag,hit,bytes' for slab and 'bytes,hit'
+ for page). Available sort keys are 'ptr, callsite, bytes, hit,
+ pingpong, frag' for slab and 'page, callsite, bytes, hit, order,
+ migtype, gfp' for page. This option should be preceded by one of the
+ mode selection options - i.e. --slab, --page, --alloc and/or --caller.
-l <num>::
--line=<num>::
@@ -52,6 +56,11 @@ OPTIONS
--page::
Analyze page allocator events
+--live::
+ Show live page stat. The perf kmem shows total allocation stat by
+ default, but this option shows live (currently allocated) pages
+ instead. (This option works with --page option only)
+
SEE ALSO
--------
linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
index 6252e77..6a5bb2b 100644
--- a/tools/perf/Documentation/perf-kvm.txt
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -151,6 +151,12 @@ STAT LIVE OPTIONS
Show events other than HLT (x86 only) or Wait state (s390 only)
that take longer than duration usecs.
+--proc-map-timeout::
+ When processing pre-existing threads /proc/XXX/mmap, it may take
+ a long time, because the file may be huge. A time out is needed
+ in such cases.
+ This option sets the time out limit. The default value is 500 ms.
+
SEE ALSO
--------
linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1],
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 239609c..3a8a9ba 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -14,11 +14,13 @@ or
or
'perf probe' [options] --del='[GROUP:]EVENT' [...]
or
-'perf probe' --list
+'perf probe' --list[=[GROUP:]EVENT]
or
'perf probe' [options] --line='LINE'
or
'perf probe' [options] --vars='PROBEPOINT'
+or
+'perf probe' [options] --funcs
DESCRIPTION
-----------
@@ -64,8 +66,8 @@ OPTIONS
classes(e.g. [a-z], [!A-Z]).
-l::
---list::
- List up current probe events.
+--list[=[GROUP:]EVENT]::
+ List up current probe events. This can also accept filtering patterns of event names.
-L::
--line=::
@@ -81,10 +83,15 @@ OPTIONS
(Only for --vars) Show external defined variables in addition to local
variables.
+--no-inlines::
+ (Only for --add) Search only for non-inlined functions. The functions
+ which do not have instances are ignored.
+
-F::
---funcs::
+--funcs[=FILTER]::
Show available functions in given module or kernel. With -x/--exec,
can also list functions in a user space executable / shared library.
+ This also can accept a FILTER rule argument.
--filter=FILTER::
(Only for --vars and --funcs) Set filter. FILTER is a combination of glob
@@ -148,7 +155,7 @@ Each probe argument follows below syntax.
[NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE]
'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
-'$vars' special argument is also available for NAME, it is expanded to the local variables which can access at given probe point.
+'$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 4847a79..2e9ce77 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -45,6 +45,21 @@ OPTIONS
param1 and param2 are defined as formats for the PMU in:
/sys/bus/event_sources/devices/<pmu>/format/*
+ There are also some params which are not defined in .../<pmu>/format/*.
+ These params can be used to overload default config values per event.
+ Here is a list of the params.
+ - 'period': Set event sampling period
+ - 'freq': Set event sampling frequency
+ - 'time': Disable/enable time stamping. Acceptable values are 1 for
+ enabling time stamping. 0 for disabling time stamping.
+ The default is 1.
+ - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
+ FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and
+ "no" for disable callgraph.
+ - 'stack-size': user stack size for dwarf mode
+ Note: If user explicitly sets options which conflict with the params,
+ the value set by the params will be overridden.
+
- a hardware breakpoint event in the form of '\mem:addr[/len][:access]'
where addr is the address in memory you want to break in.
Access is the memory access type (read, write, execute) it can
@@ -61,7 +76,16 @@ OPTIONS
"perf report" to view group events together.
--filter=<filter>::
- Event filter.
+ Event filter. This option should follow a event selector (-e) which
+ selects tracepoint event(s). Multiple '--filter' options are combined
+ using '&&'.
+
+--exclude-perf::
+ Don't record events issued by perf itself. This option should follow
+ a event selector (-e) which selects tracepoint event(s). It adds a
+ filter expression 'common_pid != $PERFPID' to filters. If other
+ '--filter' exists, the new filter expression will be combined with
+ them by '&&'.
-a::
--all-cpus::
@@ -108,6 +132,8 @@ OPTIONS
Number of mmap data pages (must be a power of two) or size
specification with appended unit character - B/K/M/G. The
size is rounded up to have nearest pages power of two value.
+ Also, by adding a comma, the number of mmap pages for AUX
+ area tracing can be specified.
--group::
Put all events in a single event group. This precedes the --event
@@ -145,16 +171,21 @@ OPTIONS
-s::
--stat::
- Per thread counts.
+ Record per-thread event counts. Use it with 'perf report -T' to see
+ the values.
-d::
--data::
- Sample addresses.
+ Record the sample addresses.
-T::
--timestamp::
- Sample timestamps. Use it with 'perf report -D' to see the timestamps,
- for instance.
+ Record the sample timestamps. Use it with 'perf report -D' to see the
+ timestamps, for instance.
+
+-P::
+--period::
+ Record the sample period.
-n::
--no-samples::
@@ -245,7 +276,11 @@ filter out the startup phase of the program, which is often very different.
--intr-regs::
Capture machine state (registers) at interrupt, i.e., on counter overflows for
each sample. List of captured registers depends on the architecture. This option
-is off by default.
+is off by default. It is possible to select the registers to sample using their
+symbolic names, e.g. on x86, ax, si. To list the available registers use
+--intr-regs=\?. To name registers, pass a comma separated list such as
+--intr-regs=ax,bx. The list of register is architecture dependent.
+
--running-time::
Record running and enabled time for read events (:S)
@@ -257,6 +292,22 @@ records. See clock_gettime(). In particular CLOCK_MONOTONIC and
CLOCK_MONOTONIC_RAW are supported, some events might also allow
CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI.
+-S::
+--snapshot::
+Select AUX area tracing Snapshot Mode. This option is valid only with an
+AUX area tracing event. Optionally the number of bytes to capture per
+snapshot can be specified. In Snapshot Mode, trace data is captured only when
+signal SIGUSR2 is received.
+
+--proc-map-timeout::
+When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
+because the file may be huge. A time out is needed in such cases.
+This option sets the time out limit. The default value is 500 ms.
+
+--switch-events::
+Record context switch events i.e. events of type PERF_RECORD_SWITCH or
+PERF_RECORD_SWITCH_CPU_WIDE.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 4879cf6..9c7981b 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -34,7 +34,8 @@ OPTIONS
-T::
--threads::
- Show per-thread event counters
+ Show per-thread event counters. The input data file should be recorded
+ with -s option.
-c::
--comms=::
Only consider symbols in these comms. CSV that understands
@@ -80,6 +81,8 @@ OPTIONS
- cpu: cpu number the task ran at the time of sample
- srcline: filename and line number executed at the time of sample. The
DWARF debugging info must be provided.
+ - srcfile: file name of the source file of the same. Requires dwarf
+ information.
- weight: Event specific weight, e.g. memory latency or transaction
abort cost. This is the global weight.
- local_weight: Local weight version of the weight above.
@@ -108,6 +111,7 @@ OPTIONS
- mispredict: "N" for predicted branch, "Y" for mispredicted branch
- in_tx: branch in TSX transaction
- abort: TSX transaction abort.
+ - cycles: Cycles in basic block
And default sort keys are changed to comm, dso_from, symbol_from, dso_to
and symbol_to, see '--branch-stack'.
@@ -193,6 +197,7 @@ OPTIONS
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
and will be sorted on the data. It requires callchains are recorded.
+ See the `overhead calculation' section for more details.
--max-stack::
Set the stack depth limit when parsing the callchain, anything
@@ -323,6 +328,29 @@ OPTIONS
--header-only::
Show only perf.data header (forces --stdio).
+--itrace::
+ Options for decoding instruction tracing data. The options are:
+
+include::itrace.txt[]
+
+ To disable decoding entirely, use --no-itrace.
+
+--full-source-path::
+ Show the full path for source files for srcline output.
+
+--show-ref-call-graph::
+ When multiple events are sampled, it may not be needed to collect
+ callgraphs for all of them. The sample sites are usually nearby,
+ and it's enough to collect the callgraphs on a reference event.
+ So user can use "call-graph=no" event modifier to disable callgraph
+ for other events to reduce the overhead.
+ However, perf report cannot show callgraphs for the event which
+ disable the callgraph.
+ This option extends the perf report to show reference callgraphs,
+ which collected by reference event, in no callgraph event.
+
+include::callchain-overhead-calculation.txt[]
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-annotate[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 7944575..dc3ec78 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -115,7 +115,8 @@ OPTIONS
-f::
--fields::
Comma separated list of fields to print. Options are:
- comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period.
+ comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
+ srcline, period, iregs, flags.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
@@ -165,6 +166,12 @@ OPTIONS
At this point usage is displayed, and perf-script exits.
+ The flags field is synthesized and may have a value when Instruction
+ Trace decoding. The flags are "bcrosyiABEx" which stand for branch,
+ call, return, conditional, system, asynchronous, interrupt,
+ transaction abort, trace begin, trace end, and in transaction,
+ respectively.
+
Finally, a user may not set fields to none for all event types.
i.e., -f "" is not allowed.
@@ -215,12 +222,33 @@ OPTIONS
--show-mmap-events
Display mmap related events (e.g. MMAP, MMAP2).
+--show-switch-events
+ Display context switch events i.e. events of type PERF_RECORD_SWITCH or
+ PERF_RECORD_SWITCH_CPU_WIDE.
+
+--demangle::
+ Demangle symbol names to human readable form. It's enabled by default,
+ disable with --no-demangle.
+
+--demangle-kernel::
+ Demangle kernel symbol names to human readable form (for C++ kernels).
+
--header
Show perf.data header.
--header-only
Show only perf.data header.
+--itrace::
+ Options for decoding instruction tracing data. The options are:
+
+include::itrace.txt[]
+
+ To disable decoding entirely, use --no-itrace.
+
+--full-source-path::
+ Show the full path for source files for srcline output.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 04e150d..47469ab 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -144,6 +144,10 @@ is a useful mode to detect imbalance between physical cores. To enable this mod
use --per-core in addition to -a. (system-wide). The output includes the
core number and the number of online logical processors on that physical processor.
+--per-thread::
+Aggregate counts per monitored threads, when monitoring threads (-t option)
+or processes (-p option).
+
-D msecs::
--delay msecs::
After starting the program, wait msecs before measuring. This is useful to
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 3265b10..f6a23eb 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -168,7 +168,7 @@ Default is to monitor all CPUS.
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
and will be sorted on the data. It requires -g/--call-graph option
- enabled.
+ enabled. See the `overhead calculation' section for more details.
--max-stack::
Set the stack depth limit when parsing the callchain, anything
@@ -201,6 +201,33 @@ Default is to monitor all CPUS.
Force each column width to the provided list, for large terminal
readability. 0 means no limit (default behavior).
+--proc-map-timeout::
+ When processing pre-existing threads /proc/XXX/mmap, it may take
+ a long time, because the file may be huge. A time out is needed
+ in such cases.
+ This option sets the time out limit. The default value is 500 ms.
+
+
+-b::
+--branch-any::
+ Enable taken branch stack sampling. Any type of taken branch may be sampled.
+ This is a shortcut for --branch-filter any. See --branch-filter for more infos.
+
+-j::
+--branch-filter::
+ Enable taken branch stack sampling. Each sample captures a series of consecutive
+ taken branches. The number of branches captured with each sample depends on the
+ underlying hardware, the type of branches of interest, and the executed code.
+ It is possible to select the types of branches captured by enabling filters.
+ For a full list of modifiers please see the perf record manpage.
+
+ The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
+ The privilege levels may be omitted, in which case, the privilege levels of the associated
+ event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
+ levels are subject to permissions. When sampling on multiple events, branch stack sampling
+ is enabled for all the sampling events. The sampled branch type is the same for all events.
+ The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
+ Note that this feature may not be available on all processors.
INTERACTIVE PROMPTING KEYS
--------------------------
@@ -234,6 +261,7 @@ INTERACTIVE PROMPTING KEYS
Pressing any unmapped key displays a menu, and prompts for input.
+include::callchain-overhead-calculation.txt[]
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index ba03fd5..7ea0786 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -35,7 +35,7 @@ OPTIONS
-e::
--expr::
- List of events to show, currently only syscall names.
+ List of syscalls to show, currently only syscall names.
Prefixing with ! shows all syscalls but the ones specified. You may
need to escape it.
@@ -121,6 +121,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
--event::
Trace other events, see 'perf list' for a complete list.
+--proc-map-timeout::
+ When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
+ because the file may be huge. A time out is needed in such cases.
+ This option sets the time out limit. The default value is 500 ms.
+
PAGEFAULTS
----------
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 11ccbb2..af009bd 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,12 +1,33 @@
tools/perf
+tools/arch/alpha/include/asm/barrier.h
+tools/arch/arm/include/asm/barrier.h
+tools/arch/ia64/include/asm/barrier.h
+tools/arch/mips/include/asm/barrier.h
+tools/arch/powerpc/include/asm/barrier.h
+tools/arch/s390/include/asm/barrier.h
+tools/arch/sh/include/asm/barrier.h
+tools/arch/sparc/include/asm/barrier.h
+tools/arch/sparc/include/asm/barrier_32.h
+tools/arch/sparc/include/asm/barrier_64.h
+tools/arch/tile/include/asm/barrier.h
+tools/arch/x86/include/asm/barrier.h
+tools/arch/xtensa/include/asm/barrier.h
tools/scripts
tools/build
+tools/arch/x86/include/asm/atomic.h
+tools/arch/x86/include/asm/rmwcc.h
tools/lib/traceevent
tools/lib/api
+tools/lib/bpf
+tools/lib/hweight.c
+tools/lib/rbtree.c
tools/lib/symbol/kallsyms.c
tools/lib/symbol/kallsyms.h
tools/lib/util/find_next_bit.c
+tools/include/asm/atomic.h
+tools/include/asm/barrier.h
tools/include/asm/bug.h
+tools/include/asm-generic/barrier.h
tools/include/asm-generic/bitops/arch_hweight.h
tools/include/asm-generic/bitops/atomic.h
tools/include/asm-generic/bitops/const_hweight.h
@@ -17,37 +38,34 @@ tools/include/asm-generic/bitops/fls64.h
tools/include/asm-generic/bitops/fls.h
tools/include/asm-generic/bitops/hweight.h
tools/include/asm-generic/bitops.h
+tools/include/linux/atomic.h
tools/include/linux/bitops.h
tools/include/linux/compiler.h
-tools/include/linux/export.h
tools/include/linux/hash.h
+tools/include/linux/kernel.h
+tools/include/linux/list.h
tools/include/linux/log2.h
+tools/include/linux/poison.h
+tools/include/linux/rbtree.h
+tools/include/linux/rbtree_augmented.h
tools/include/linux/types.h
include/asm-generic/bitops/arch_hweight.h
include/asm-generic/bitops/const_hweight.h
include/asm-generic/bitops/fls64.h
include/asm-generic/bitops/__fls.h
include/asm-generic/bitops/fls.h
-include/linux/const.h
include/linux/perf_event.h
-include/linux/rbtree.h
include/linux/list.h
include/linux/hash.h
include/linux/stringify.h
-lib/find_next_bit.c
-lib/hweight.c
-lib/rbtree.c
include/linux/swab.h
arch/*/include/asm/unistd*.h
-arch/*/include/asm/perf_regs.h
arch/*/include/uapi/asm/unistd*.h
arch/*/include/uapi/asm/perf_regs.h
arch/*/lib/memcpy*.S
arch/*/lib/memset*.S
include/linux/poison.h
-include/linux/magic.h
include/linux/hw_breakpoint.h
-include/linux/rbtree_augmented.h
include/uapi/linux/perf_event.h
include/uapi/linux/const.h
include/uapi/linux/swab.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index c699dc3..480546d 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -24,7 +24,7 @@ unexport MAKEFLAGS
# (To override it, run 'make JOBS=1' and similar.)
#
ifeq ($(JOBS),)
- JOBS := $(shell egrep -c '^processor|^CPU' /proc/cpuinfo 2>/dev/null)
+ JOBS := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
ifeq ($(JOBS),0)
JOBS := 1
endif
@@ -83,8 +83,8 @@ build-test:
#
# All other targets get passed through:
#
-%:
+%: FORCE
$(print_msg)
$(make)
-.PHONY: tags TAGS
+.PHONY: tags TAGS FORCE Makefile
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index c43a205..d9863cb 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -73,6 +73,14 @@ include config/utilities.mak
# for CTF data format.
#
# Define NO_LZMA if you do not want to support compressed (xz) kernel modules
+#
+# Define NO_AUXTRACE if you do not want AUX area tracing support
+
+# As per kernel Makefile, avoid funny character set dependencies
+unexport LC_ALL
+LC_COLLATE=C
+LC_NUMERIC=C
+export LC_COLLATE LC_NUMERIC
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(shell pwd)))
@@ -107,9 +115,22 @@ $(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD
$(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
$(Q)touch $(OUTPUT)PERF-VERSION-FILE
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)ld
-AR = $(CROSS_COMPILE)ar
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+ $(if $(or $(findstring environment,$(origin $(1))),\
+ $(findstring command line,$(origin $(1)))),,\
+ $(eval $(1) = $(2)))
+endef
+
+# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,LD,$(CROSS_COMPILE)ld)
+
PKG_CONFIG = $(CROSS_COMPILE)pkg-config
RM = rm -f
@@ -120,6 +141,7 @@ INSTALL = install
FLEX = flex
BISON = bison
STRIP = strip
+AWK = awk
LIB_DIR = $(srctree)/tools/lib/api/
TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
@@ -171,6 +193,9 @@ endif
LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
export LIBTRACEEVENT
+LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list
+LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
+
LIBAPI = $(LIB_PATH)libapi.a
export LIBAPI
@@ -185,8 +210,9 @@ python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT
PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI)
-$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
- $(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \
+$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
+ $(QUIET_GEN)CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \
+ $(PYTHON_WORD) util/setup.py \
--quiet build_ext; \
mkdir -p $(OUTPUT)python && \
cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
@@ -270,14 +296,15 @@ strip: $(PROGRAMS) $(OUTPUT)perf
PERF_IN := $(OUTPUT)perf-in.o
-export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX
+export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE
$(Q)$(MAKE) $(build)=perf
-$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(PERF_IN) $(LIBS) -o $@
+$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
+ $(PERF_IN) $(LIBS) -o $@
$(GTK_IN): FORCE
$(Q)$(MAKE) $(build)=gtk
@@ -371,7 +398,13 @@ $(LIB_FILE): $(LIBPERF_IN)
LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
$(LIBTRACEEVENT): FORCE
- $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a plugins
+ $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
+
+libtraceevent_plugins: FORCE
+ $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
+
+$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
+ $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list
$(LIBTRACEEVENT)-clean:
$(call QUIET_CLEAN, libtraceevent)
@@ -462,7 +495,7 @@ check: $(OUTPUT)common-cmds.h
install-gtk:
-install-bin: all install-gtk
+install-tools: all install-gtk
$(call QUIET_INSTALL, binaries) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \
$(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \
@@ -481,6 +514,11 @@ endif
$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
$(call QUIET_INSTALL, perf-with-kcore) \
$(INSTALL) $(OUTPUT)perf-with-kcore -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ifndef NO_LIBAUDIT
+ $(call QUIET_INSTALL, strace/groups) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'; \
+ $(INSTALL) trace/strace/groups/* -t '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'
+endif
ifndef NO_LIBPERL
$(call QUIET_INSTALL, perl-scripts) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
@@ -500,12 +538,16 @@ endif
$(call QUIET_INSTALL, perf_completion-script) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \
$(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
+
+install-tests: all install-gtk
$(call QUIET_INSTALL, tests) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
$(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
+install-bin: install-tools install-tests
+
install: install-bin try-install-man install-traceevent-plugins
install-python_ext:
@@ -528,9 +570,10 @@ config-clean:
clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
$(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
- $(Q)$(RM) .config-detected
+ $(Q)$(RM) $(OUTPUT).config-detected
$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32
- $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex*
+ $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
+ $(OUTPUT)util/intel-pt-decoder/inat-tables.c
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
$(python-clean)
@@ -549,4 +592,5 @@ FORCE:
.PHONY: all install clean config-clean strip install-gtk
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep
+.PHONY: libtraceevent_plugins
diff --git a/tools/perf/arch/alpha/Build b/tools/perf/arch/alpha/Build
new file mode 100644
index 0000000..1bb8bf6
--- /dev/null
+++ b/tools/perf/arch/alpha/Build
@@ -0,0 +1 @@
+# empty
diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build
index 54afe4a..41bf61d 100644
--- a/tools/perf/arch/arm64/Build
+++ b/tools/perf/arch/arm64/Build
@@ -1 +1,2 @@
libperf-y += util/
+libperf-$(CONFIG_DWARF_UNWIND) += tests/
diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
index 1d3f39c..4e5af27e3 100644
--- a/tools/perf/arch/arm64/include/perf_regs.h
+++ b/tools/perf/arch/arm64/include/perf_regs.h
@@ -5,8 +5,11 @@
#include <linux/types.h>
#include <asm/perf_regs.h>
+void perf_regs_load(u64 *regs);
+
#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1)
#define PERF_REGS_MAX PERF_REG_ARM64_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
#define PERF_REG_IP PERF_REG_ARM64_PC
#define PERF_REG_SP PERF_REG_ARM64_SP
diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build
new file mode 100644
index 0000000..b30eff9
--- /dev/null
+++ b/tools/perf/arch/arm64/tests/Build
@@ -0,0 +1,2 @@
+libperf-y += regs_load.o
+libperf-y += dwarf-unwind.o
diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c
new file mode 100644
index 0000000..cf04a4c9
--- /dev/null
+++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c
@@ -0,0 +1,61 @@
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "debug.h"
+#include "tests/tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+ struct thread *thread, u64 *regs)
+{
+ struct stack_dump *stack = &sample->user_stack;
+ struct map *map;
+ unsigned long sp;
+ u64 stack_size, *buf;
+
+ buf = malloc(STACK_SIZE);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ sp = (unsigned long) regs[PERF_REG_ARM64_SP];
+
+ map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+ if (!map) {
+ pr_debug("failed to get stack map\n");
+ free(buf);
+ return -1;
+ }
+
+ stack_size = map->end - sp;
+ stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+ memcpy(buf, (void *) sp, stack_size);
+ stack->data = (char *) buf;
+ stack->size = stack_size;
+ return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ u64 *buf;
+
+ buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ perf_regs_load(buf);
+ regs->abi = PERF_SAMPLE_REGS_ABI;
+ regs->regs = buf;
+ regs->mask = PERF_REGS_MASK;
+
+ return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/arm64/tests/regs_load.S b/tools/perf/arch/arm64/tests/regs_load.S
new file mode 100644
index 0000000..025b46e
--- /dev/null
+++ b/tools/perf/arch/arm64/tests/regs_load.S
@@ -0,0 +1,46 @@
+#include <linux/linkage.h>
+
+.text
+.type perf_regs_load,%function
+#define STR_REG(r) str x##r, [x0, 8 * r]
+#define LDR_REG(r) ldr x##r, [x0, 8 * r]
+#define SP (8 * 31)
+#define PC (8 * 32)
+ENTRY(perf_regs_load)
+ STR_REG(0)
+ STR_REG(1)
+ STR_REG(2)
+ STR_REG(3)
+ STR_REG(4)
+ STR_REG(5)
+ STR_REG(6)
+ STR_REG(7)
+ STR_REG(8)
+ STR_REG(9)
+ STR_REG(10)
+ STR_REG(11)
+ STR_REG(12)
+ STR_REG(13)
+ STR_REG(14)
+ STR_REG(15)
+ STR_REG(16)
+ STR_REG(17)
+ STR_REG(18)
+ STR_REG(19)
+ STR_REG(20)
+ STR_REG(21)
+ STR_REG(22)
+ STR_REG(23)
+ STR_REG(24)
+ STR_REG(25)
+ STR_REG(26)
+ STR_REG(27)
+ STR_REG(28)
+ STR_REG(29)
+ STR_REG(30)
+ mov x1, sp
+ str x1, [x0, #SP]
+ str x30, [x0, #PC]
+ LDR_REG(1)
+ ret
+ENDPROC(perf_regs_load)
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index 49776f1..b00dfd92 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -61,7 +61,7 @@ const char *const mips_triplets[] = {
static bool lookup_path(char *name)
{
bool found = false;
- char *path, *tmp;
+ char *path, *tmp = NULL;
char buf[PATH_MAX];
char *env = getenv("PATH");
@@ -128,7 +128,7 @@ static const char *normalize_arch(char *arch)
return arch;
}
-static int perf_session_env__lookup_binutils_path(struct perf_session_env *env,
+static int perf_session_env__lookup_binutils_path(struct perf_env *env,
const char *name,
const char **path)
{
@@ -206,7 +206,7 @@ out_error:
return -1;
}
-int perf_session_env__lookup_objdump(struct perf_session_env *env)
+int perf_session_env__lookup_objdump(struct perf_env *env)
{
/*
* For live mode, env->arch will be NULL and we can use
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
index ede246e..20176df 100644
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -5,6 +5,6 @@
extern const char *objdump_path;
-int perf_session_env__lookup_objdump(struct perf_session_env *env);
+int perf_session_env__lookup_objdump(struct perf_env *env);
#endif /* ARCH_PERF_COMMON_H */
diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build
new file mode 100644
index 0000000..1bb8bf6
--- /dev/null
+++ b/tools/perf/arch/mips/Build
@@ -0,0 +1 @@
+# empty
diff --git a/tools/perf/arch/parisc/Build b/tools/perf/arch/parisc/Build
new file mode 100644
index 0000000..1bb8bf6
--- /dev/null
+++ b/tools/perf/arch/parisc/Build
@@ -0,0 +1 @@
+# empty
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 0af6e9b..7b8b0d1 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,4 +1,5 @@
libperf-y += header.o
+libperf-y += sym-handling.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
new file mode 100644
index 0000000..bbc1a50
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -0,0 +1,82 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
+ */
+
+#include "debug.h"
+#include "symbol.h"
+#include "map.h"
+#include "probe-event.h"
+
+#ifdef HAVE_LIBELF_SUPPORT
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+{
+ return ehdr.e_type == ET_EXEC ||
+ ehdr.e_type == ET_REL ||
+ ehdr.e_type == ET_DYN;
+}
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+void arch__elf_sym_adjust(GElf_Sym *sym)
+{
+ sym->st_value += PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
+}
+#endif
+#endif
+
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+int arch__choose_best_symbol(struct symbol *syma,
+ struct symbol *symb __maybe_unused)
+{
+ char *sym = syma->name;
+
+ /* Skip over any initial dot */
+ if (*sym == '.')
+ sym++;
+
+ /* Avoid "SyS" kernel syscall aliases */
+ if (strlen(sym) >= 3 && !strncmp(sym, "SyS", 3))
+ return SYMBOL_B;
+ if (strlen(sym) >= 10 && !strncmp(sym, "compat_SyS", 10))
+ return SYMBOL_B;
+
+ return SYMBOL_A;
+}
+
+/* Allow matching against dot variants */
+int arch__compare_symbol_names(const char *namea, const char *nameb)
+{
+ /* Skip over initial dot */
+ if (*namea == '.')
+ namea++;
+ if (*nameb == '.')
+ nameb++;
+
+ return strcmp(namea, nameb);
+}
+#endif
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+bool arch__prefers_symtab(void)
+{
+ return true;
+}
+
+#define PPC64LE_LEP_OFFSET 8
+
+void arch__fix_tev_from_maps(struct perf_probe_event *pev,
+ struct probe_trace_event *tev, struct map *map)
+{
+ /*
+ * ppc64 ABIv2 local entry point is currently always 2 instructions
+ * (8 bytes) after the global entry point.
+ */
+ if (!pev->uprobes && map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
+ tev->point.address += PPC64LE_LEP_OFFSET;
+ tev->point.offset += PPC64LE_LEP_OFFSET;
+ }
+}
+#endif
diff --git a/tools/perf/arch/sh/util/dwarf-regs.c b/tools/perf/arch/sh/util/dwarf-regs.c
index 0d0897f..f8dfa89 100644
--- a/tools/perf/arch/sh/util/dwarf-regs.c
+++ b/tools/perf/arch/sh/util/dwarf-regs.c
@@ -51,5 +51,5 @@ const char *sh_regs_table[SH_MAX_REGS] = {
/* Return architecture dependent register string (for kprobe-tracer) */
const char *get_arch_regstr(unsigned int n)
{
- return (n <= SH_MAX_REGS) ? sh_regs_table[n] : NULL;
+ return (n < SH_MAX_REGS) ? sh_regs_table[n] : NULL;
}
diff --git a/tools/perf/arch/sparc/util/dwarf-regs.c b/tools/perf/arch/sparc/util/dwarf-regs.c
index 92eda41..b704fdb 100644
--- a/tools/perf/arch/sparc/util/dwarf-regs.c
+++ b/tools/perf/arch/sparc/util/dwarf-regs.c
@@ -39,5 +39,5 @@ const char *sparc_regs_table[SPARC_MAX_REGS] = {
*/
const char *get_arch_regstr(unsigned int n)
{
- return (n <= SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL;
+ return (n < SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL;
}
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index cfbccc4..ff63649 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -1,8 +1,14 @@
libperf-y += header.o
libperf-y += tsc.o
+libperf-y += pmu.o
libperf-y += kvm-stat.o
+libperf-y += perf_regs.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+
+libperf-$(CONFIG_AUXTRACE) += auxtrace.o
+libperf-$(CONFIG_AUXTRACE) += intel-pt.o
+libperf-$(CONFIG_AUXTRACE) += intel-bts.o
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
new file mode 100644
index 0000000..7a78055
--- /dev/null
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -0,0 +1,83 @@
+/*
+ * auxtrace.c: AUX area tracing support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <stdbool.h>
+
+#include "../../util/header.h"
+#include "../../util/debug.h"
+#include "../../util/pmu.h"
+#include "../../util/auxtrace.h"
+#include "../../util/intel-pt.h"
+#include "../../util/intel-bts.h"
+#include "../../util/evlist.h"
+
+static
+struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist,
+ int *err)
+{
+ struct perf_pmu *intel_pt_pmu;
+ struct perf_pmu *intel_bts_pmu;
+ struct perf_evsel *evsel;
+ bool found_pt = false;
+ bool found_bts = false;
+
+ intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
+ intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
+
+ if (evlist) {
+ evlist__for_each(evlist, evsel) {
+ if (intel_pt_pmu &&
+ evsel->attr.type == intel_pt_pmu->type)
+ found_pt = true;
+ if (intel_bts_pmu &&
+ evsel->attr.type == intel_bts_pmu->type)
+ found_bts = true;
+ }
+ }
+
+ if (found_pt && found_bts) {
+ pr_err("intel_pt and intel_bts may not be used together\n");
+ *err = -EINVAL;
+ return NULL;
+ }
+
+ if (found_pt)
+ return intel_pt_recording_init(err);
+
+ if (found_bts)
+ return intel_bts_recording_init(err);
+
+ return NULL;
+}
+
+struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
+ int *err)
+{
+ char buffer[64];
+ int ret;
+
+ *err = 0;
+
+ ret = get_cpuid(buffer, sizeof(buffer));
+ if (ret) {
+ *err = ret;
+ return NULL;
+ }
+
+ if (!strncmp(buffer, "GenuineIntel,", 13))
+ return auxtrace_record__init_intel(evlist, err);
+
+ return NULL;
+}
diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c
index be22dd4..a08de0a 100644
--- a/tools/perf/arch/x86/util/dwarf-regs.c
+++ b/tools/perf/arch/x86/util/dwarf-regs.c
@@ -71,5 +71,5 @@ const char *x86_64_regs_table[X86_64_MAX_REGS] = {
/* Return architecture dependent register string (for kprobe-tracer) */
const char *get_arch_regstr(unsigned int n)
{
- return (n <= ARCH_MAX_REGS) ? arch_regs_table[n] : NULL;
+ return (n < ARCH_MAX_REGS) ? arch_regs_table[n] : NULL;
}
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
new file mode 100644
index 0000000..9b94ce5
--- /dev/null
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -0,0 +1,458 @@
+/*
+ * intel-bts.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "../../util/cpumap.h"
+#include "../../util/evsel.h"
+#include "../../util/evlist.h"
+#include "../../util/session.h"
+#include "../../util/util.h"
+#include "../../util/pmu.h"
+#include "../../util/debug.h"
+#include "../../util/tsc.h"
+#include "../../util/auxtrace.h"
+#include "../../util/intel-bts.h"
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+#define KiB_MASK(x) (KiB(x) - 1)
+#define MiB_MASK(x) (MiB(x) - 1)
+
+#define INTEL_BTS_DFLT_SAMPLE_SIZE KiB(4)
+
+#define INTEL_BTS_MAX_SAMPLE_SIZE KiB(60)
+
+struct intel_bts_snapshot_ref {
+ void *ref_buf;
+ size_t ref_offset;
+ bool wrapped;
+};
+
+struct intel_bts_recording {
+ struct auxtrace_record itr;
+ struct perf_pmu *intel_bts_pmu;
+ struct perf_evlist *evlist;
+ bool snapshot_mode;
+ size_t snapshot_size;
+ int snapshot_ref_cnt;
+ struct intel_bts_snapshot_ref *snapshot_refs;
+};
+
+struct branch {
+ u64 from;
+ u64 to;
+ u64 misc;
+};
+
+static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused)
+{
+ return INTEL_BTS_AUXTRACE_PRIV_SIZE;
+}
+
+static int intel_bts_info_fill(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct auxtrace_info_event *auxtrace_info,
+ size_t priv_size)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
+ struct perf_event_mmap_page *pc;
+ struct perf_tsc_conversion tc = { .time_mult = 0, };
+ bool cap_user_time_zero = false;
+ int err;
+
+ if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE)
+ return -EINVAL;
+
+ if (!session->evlist->nr_mmaps)
+ return -EINVAL;
+
+ pc = session->evlist->mmap[0].base;
+ if (pc) {
+ err = perf_read_tsc_conversion(pc, &tc);
+ if (err) {
+ if (err != -EOPNOTSUPP)
+ return err;
+ } else {
+ cap_user_time_zero = tc.time_mult != 0;
+ }
+ if (!cap_user_time_zero)
+ ui__warning("Intel BTS: TSC not available\n");
+ }
+
+ auxtrace_info->type = PERF_AUXTRACE_INTEL_BTS;
+ auxtrace_info->priv[INTEL_BTS_PMU_TYPE] = intel_bts_pmu->type;
+ auxtrace_info->priv[INTEL_BTS_TIME_SHIFT] = tc.time_shift;
+ auxtrace_info->priv[INTEL_BTS_TIME_MULT] = tc.time_mult;
+ auxtrace_info->priv[INTEL_BTS_TIME_ZERO] = tc.time_zero;
+ auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO] = cap_user_time_zero;
+ auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE] = btsr->snapshot_mode;
+
+ return 0;
+}
+
+static int intel_bts_recording_options(struct auxtrace_record *itr,
+ struct perf_evlist *evlist,
+ struct record_opts *opts)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
+ struct perf_evsel *evsel, *intel_bts_evsel = NULL;
+ const struct cpu_map *cpus = evlist->cpus;
+ bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
+
+ btsr->evlist = evlist;
+ btsr->snapshot_mode = opts->auxtrace_snapshot_mode;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->attr.type == intel_bts_pmu->type) {
+ if (intel_bts_evsel) {
+ pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n");
+ return -EINVAL;
+ }
+ evsel->attr.freq = 0;
+ evsel->attr.sample_period = 1;
+ intel_bts_evsel = evsel;
+ opts->full_auxtrace = true;
+ }
+ }
+
+ if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
+ pr_err("Snapshot mode (-S option) requires " INTEL_BTS_PMU_NAME " PMU event (-e " INTEL_BTS_PMU_NAME ")\n");
+ return -EINVAL;
+ }
+
+ if (!opts->full_auxtrace)
+ return 0;
+
+ if (opts->full_auxtrace && !cpu_map__empty(cpus)) {
+ pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
+ return -EINVAL;
+ }
+
+ /* Set default sizes for snapshot mode */
+ if (opts->auxtrace_snapshot_mode) {
+ if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ } else if (!opts->auxtrace_mmap_pages && !privileged &&
+ opts->mmap_pages == UINT_MAX) {
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ if (!opts->auxtrace_snapshot_size)
+ opts->auxtrace_snapshot_size =
+ opts->auxtrace_mmap_pages * (size_t)page_size;
+ if (!opts->auxtrace_mmap_pages) {
+ size_t sz = opts->auxtrace_snapshot_size;
+
+ sz = round_up(sz, page_size) / page_size;
+ opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
+ }
+ if (opts->auxtrace_snapshot_size >
+ opts->auxtrace_mmap_pages * (size_t)page_size) {
+ pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
+ opts->auxtrace_snapshot_size,
+ opts->auxtrace_mmap_pages * (size_t)page_size);
+ return -EINVAL;
+ }
+ if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
+ pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
+ return -EINVAL;
+ }
+ pr_debug2("Intel BTS snapshot size: %zu\n",
+ opts->auxtrace_snapshot_size);
+ }
+
+ /* Set default sizes for full trace mode */
+ if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ }
+
+ /* Validate auxtrace_mmap_pages */
+ if (opts->auxtrace_mmap_pages) {
+ size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
+ size_t min_sz;
+
+ if (opts->auxtrace_snapshot_mode)
+ min_sz = KiB(4);
+ else
+ min_sz = KiB(8);
+
+ if (sz < min_sz || !is_power_of_2(sz)) {
+ pr_err("Invalid mmap size for Intel BTS: must be at least %zuKiB and a power of 2\n",
+ min_sz / 1024);
+ return -EINVAL;
+ }
+ }
+
+ if (intel_bts_evsel) {
+ /*
+ * To obtain the auxtrace buffer file descriptor, the auxtrace event
+ * must come first.
+ */
+ perf_evlist__to_front(evlist, intel_bts_evsel);
+ /*
+ * In the case of per-cpu mmaps, we need the CPU on the
+ * AUX event.
+ */
+ if (!cpu_map__empty(cpus))
+ perf_evsel__set_sample_bit(intel_bts_evsel, CPU);
+ }
+
+ /* Add dummy event to keep tracking */
+ if (opts->full_auxtrace) {
+ struct perf_evsel *tracking_evsel;
+ int err;
+
+ err = parse_events(evlist, "dummy:u", NULL);
+ if (err)
+ return err;
+
+ tracking_evsel = perf_evlist__last(evlist);
+
+ perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+ tracking_evsel->attr.freq = 0;
+ tracking_evsel->attr.sample_period = 1;
+ }
+
+ return 0;
+}
+
+static int intel_bts_parse_snapshot_options(struct auxtrace_record *itr,
+ struct record_opts *opts,
+ const char *str)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ unsigned long long snapshot_size = 0;
+ char *endptr;
+
+ if (str) {
+ snapshot_size = strtoull(str, &endptr, 0);
+ if (*endptr || snapshot_size > SIZE_MAX)
+ return -1;
+ }
+
+ opts->auxtrace_snapshot_mode = true;
+ opts->auxtrace_snapshot_size = snapshot_size;
+
+ btsr->snapshot_size = snapshot_size;
+
+ return 0;
+}
+
+static u64 intel_bts_reference(struct auxtrace_record *itr __maybe_unused)
+{
+ return rdtsc();
+}
+
+static int intel_bts_alloc_snapshot_refs(struct intel_bts_recording *btsr,
+ int idx)
+{
+ const size_t sz = sizeof(struct intel_bts_snapshot_ref);
+ int cnt = btsr->snapshot_ref_cnt, new_cnt = cnt * 2;
+ struct intel_bts_snapshot_ref *refs;
+
+ if (!new_cnt)
+ new_cnt = 16;
+
+ while (new_cnt <= idx)
+ new_cnt *= 2;
+
+ refs = calloc(new_cnt, sz);
+ if (!refs)
+ return -ENOMEM;
+
+ memcpy(refs, btsr->snapshot_refs, cnt * sz);
+
+ btsr->snapshot_refs = refs;
+ btsr->snapshot_ref_cnt = new_cnt;
+
+ return 0;
+}
+
+static void intel_bts_free_snapshot_refs(struct intel_bts_recording *btsr)
+{
+ int i;
+
+ for (i = 0; i < btsr->snapshot_ref_cnt; i++)
+ zfree(&btsr->snapshot_refs[i].ref_buf);
+ zfree(&btsr->snapshot_refs);
+}
+
+static void intel_bts_recording_free(struct auxtrace_record *itr)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+
+ intel_bts_free_snapshot_refs(btsr);
+ free(btsr);
+}
+
+static int intel_bts_snapshot_start(struct auxtrace_record *itr)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each(btsr->evlist, evsel) {
+ if (evsel->attr.type == btsr->intel_bts_pmu->type)
+ return perf_evlist__disable_event(btsr->evlist, evsel);
+ }
+ return -EINVAL;
+}
+
+static int intel_bts_snapshot_finish(struct auxtrace_record *itr)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each(btsr->evlist, evsel) {
+ if (evsel->attr.type == btsr->intel_bts_pmu->type)
+ return perf_evlist__enable_event(btsr->evlist, evsel);
+ }
+ return -EINVAL;
+}
+
+static bool intel_bts_first_wrap(u64 *data, size_t buf_size)
+{
+ int i, a, b;
+
+ b = buf_size >> 3;
+ a = b - 512;
+ if (a < 0)
+ a = 0;
+
+ for (i = a; i < b; i++) {
+ if (data[i])
+ return true;
+ }
+
+ return false;
+}
+
+static int intel_bts_find_snapshot(struct auxtrace_record *itr, int idx,
+ struct auxtrace_mmap *mm, unsigned char *data,
+ u64 *head, u64 *old)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ bool wrapped;
+ int err;
+
+ pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
+ __func__, idx, (size_t)*old, (size_t)*head);
+
+ if (idx >= btsr->snapshot_ref_cnt) {
+ err = intel_bts_alloc_snapshot_refs(btsr, idx);
+ if (err)
+ goto out_err;
+ }
+
+ wrapped = btsr->snapshot_refs[idx].wrapped;
+ if (!wrapped && intel_bts_first_wrap((u64 *)data, mm->len)) {
+ btsr->snapshot_refs[idx].wrapped = true;
+ wrapped = true;
+ }
+
+ /*
+ * In full trace mode 'head' continually increases. However in snapshot
+ * mode 'head' is an offset within the buffer. Here 'old' and 'head'
+ * are adjusted to match the full trace case which expects that 'old' is
+ * always less than 'head'.
+ */
+ if (wrapped) {
+ *old = *head;
+ *head += mm->len;
+ } else {
+ if (mm->mask)
+ *old &= mm->mask;
+ else
+ *old %= mm->len;
+ if (*old > *head)
+ *head += mm->len;
+ }
+
+ pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
+ __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
+
+ return 0;
+
+out_err:
+ pr_err("%s: failed, error %d\n", __func__, err);
+ return err;
+}
+
+static int intel_bts_read_finish(struct auxtrace_record *itr, int idx)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each(btsr->evlist, evsel) {
+ if (evsel->attr.type == btsr->intel_bts_pmu->type)
+ return perf_evlist__enable_event_idx(btsr->evlist,
+ evsel, idx);
+ }
+ return -EINVAL;
+}
+
+struct auxtrace_record *intel_bts_recording_init(int *err)
+{
+ struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
+ struct intel_bts_recording *btsr;
+
+ if (!intel_bts_pmu)
+ return NULL;
+
+ btsr = zalloc(sizeof(struct intel_bts_recording));
+ if (!btsr) {
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ btsr->intel_bts_pmu = intel_bts_pmu;
+ btsr->itr.recording_options = intel_bts_recording_options;
+ btsr->itr.info_priv_size = intel_bts_info_priv_size;
+ btsr->itr.info_fill = intel_bts_info_fill;
+ btsr->itr.free = intel_bts_recording_free;
+ btsr->itr.snapshot_start = intel_bts_snapshot_start;
+ btsr->itr.snapshot_finish = intel_bts_snapshot_finish;
+ btsr->itr.find_snapshot = intel_bts_find_snapshot;
+ btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options;
+ btsr->itr.reference = intel_bts_reference;
+ btsr->itr.read_finish = intel_bts_read_finish;
+ btsr->itr.alignment = sizeof(struct branch);
+ return &btsr->itr;
+}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
new file mode 100644
index 0000000..2ca10d7
--- /dev/null
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -0,0 +1,1007 @@
+/*
+ * intel_pt.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <stdbool.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <cpuid.h>
+
+#include "../../perf.h"
+#include "../../util/session.h"
+#include "../../util/event.h"
+#include "../../util/evlist.h"
+#include "../../util/evsel.h"
+#include "../../util/cpumap.h"
+#include "../../util/parse-options.h"
+#include "../../util/parse-events.h"
+#include "../../util/pmu.h"
+#include "../../util/debug.h"
+#include "../../util/auxtrace.h"
+#include "../../util/tsc.h"
+#include "../../util/intel-pt.h"
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+#define KiB_MASK(x) (KiB(x) - 1)
+#define MiB_MASK(x) (MiB(x) - 1)
+
+#define INTEL_PT_DEFAULT_SAMPLE_SIZE KiB(4)
+
+#define INTEL_PT_MAX_SAMPLE_SIZE KiB(60)
+
+#define INTEL_PT_PSB_PERIOD_NEAR 256
+
+struct intel_pt_snapshot_ref {
+ void *ref_buf;
+ size_t ref_offset;
+ bool wrapped;
+};
+
+struct intel_pt_recording {
+ struct auxtrace_record itr;
+ struct perf_pmu *intel_pt_pmu;
+ int have_sched_switch;
+ struct perf_evlist *evlist;
+ bool snapshot_mode;
+ bool snapshot_init_done;
+ size_t snapshot_size;
+ size_t snapshot_ref_buf_size;
+ int snapshot_ref_cnt;
+ struct intel_pt_snapshot_ref *snapshot_refs;
+};
+
+static int intel_pt_parse_terms_with_default(struct list_head *formats,
+ const char *str,
+ u64 *config)
+{
+ struct list_head *terms;
+ struct perf_event_attr attr = { .size = 0, };
+ int err;
+
+ terms = malloc(sizeof(struct list_head));
+ if (!terms)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(terms);
+
+ err = parse_events_terms(terms, str);
+ if (err)
+ goto out_free;
+
+ attr.config = *config;
+ err = perf_pmu__config_terms(formats, &attr, terms, true, NULL);
+ if (err)
+ goto out_free;
+
+ *config = attr.config;
+out_free:
+ parse_events__free_terms(terms);
+ return err;
+}
+
+static int intel_pt_parse_terms(struct list_head *formats, const char *str,
+ u64 *config)
+{
+ *config = 0;
+ return intel_pt_parse_terms_with_default(formats, str, config);
+}
+
+static u64 intel_pt_masked_bits(u64 mask, u64 bits)
+{
+ const u64 top_bit = 1ULL << 63;
+ u64 res = 0;
+ int i;
+
+ for (i = 0; i < 64; i++) {
+ if (mask & top_bit) {
+ res <<= 1;
+ if (bits & top_bit)
+ res |= 1;
+ }
+ mask <<= 1;
+ bits <<= 1;
+ }
+
+ return res;
+}
+
+static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str,
+ struct perf_evlist *evlist, u64 *res)
+{
+ struct perf_evsel *evsel;
+ u64 mask;
+
+ *res = 0;
+
+ mask = perf_pmu__format_bits(&intel_pt_pmu->format, str);
+ if (!mask)
+ return -EINVAL;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->attr.type == intel_pt_pmu->type) {
+ *res = intel_pt_masked_bits(mask, evsel->attr.config);
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu,
+ struct perf_evlist *evlist)
+{
+ u64 val;
+ int err, topa_multiple_entries;
+ size_t psb_period;
+
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries",
+ "%d", &topa_multiple_entries) != 1)
+ topa_multiple_entries = 0;
+
+ /*
+ * Use caps/topa_multiple_entries to indicate early hardware that had
+ * extra frequent PSBs.
+ */
+ if (!topa_multiple_entries) {
+ psb_period = 256;
+ goto out;
+ }
+
+ err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val);
+ if (err)
+ val = 0;
+
+ psb_period = 1 << (val + 11);
+out:
+ pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period);
+ return psb_period;
+}
+
+static int intel_pt_pick_bit(int bits, int target)
+{
+ int pos, pick = -1;
+
+ for (pos = 0; bits; bits >>= 1, pos++) {
+ if (bits & 1) {
+ if (pos <= target || pick < 0)
+ pick = pos;
+ if (pos >= target)
+ break;
+ }
+ }
+
+ return pick;
+}
+
+static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
+{
+ char buf[256];
+ int mtc, mtc_periods = 0, mtc_period;
+ int psb_cyc, psb_periods, psb_period;
+ int pos = 0;
+ u64 config;
+
+ pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc");
+
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d",
+ &mtc) != 1)
+ mtc = 1;
+
+ if (mtc) {
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x",
+ &mtc_periods) != 1)
+ mtc_periods = 0;
+ if (mtc_periods) {
+ mtc_period = intel_pt_pick_bit(mtc_periods, 3);
+ pos += scnprintf(buf + pos, sizeof(buf) - pos,
+ ",mtc,mtc_period=%d", mtc_period);
+ }
+ }
+
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d",
+ &psb_cyc) != 1)
+ psb_cyc = 1;
+
+ if (psb_cyc && mtc_periods) {
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x",
+ &psb_periods) != 1)
+ psb_periods = 0;
+ if (psb_periods) {
+ psb_period = intel_pt_pick_bit(psb_periods, 3);
+ pos += scnprintf(buf + pos, sizeof(buf) - pos,
+ ",psb_period=%d", psb_period);
+ }
+ }
+
+ pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
+
+ intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config);
+
+ return config;
+}
+
+static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr,
+ struct record_opts *opts,
+ const char *str)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ unsigned long long snapshot_size = 0;
+ char *endptr;
+
+ if (str) {
+ snapshot_size = strtoull(str, &endptr, 0);
+ if (*endptr || snapshot_size > SIZE_MAX)
+ return -1;
+ }
+
+ opts->auxtrace_snapshot_mode = true;
+ opts->auxtrace_snapshot_size = snapshot_size;
+
+ ptr->snapshot_size = snapshot_size;
+
+ return 0;
+}
+
+struct perf_event_attr *
+intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
+{
+ struct perf_event_attr *attr;
+
+ attr = zalloc(sizeof(struct perf_event_attr));
+ if (!attr)
+ return NULL;
+
+ attr->config = intel_pt_default_config(intel_pt_pmu);
+
+ intel_pt_pmu->selectable = true;
+
+ return attr;
+}
+
+static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused)
+{
+ return INTEL_PT_AUXTRACE_PRIV_SIZE;
+}
+
+static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
+{
+ unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
+
+ __get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
+ *n = ebx;
+ *d = eax;
+}
+
+static int intel_pt_info_fill(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct auxtrace_info_event *auxtrace_info,
+ size_t priv_size)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
+ struct perf_event_mmap_page *pc;
+ struct perf_tsc_conversion tc = { .time_mult = 0, };
+ bool cap_user_time_zero = false, per_cpu_mmaps;
+ u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
+ u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
+ int err;
+
+ if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE)
+ return -EINVAL;
+
+ intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
+ intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp",
+ &noretcomp_bit);
+ intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit);
+ mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format,
+ "mtc_period");
+ intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit);
+
+ intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
+
+ if (!session->evlist->nr_mmaps)
+ return -EINVAL;
+
+ pc = session->evlist->mmap[0].base;
+ if (pc) {
+ err = perf_read_tsc_conversion(pc, &tc);
+ if (err) {
+ if (err != -EOPNOTSUPP)
+ return err;
+ } else {
+ cap_user_time_zero = tc.time_mult != 0;
+ }
+ if (!cap_user_time_zero)
+ ui__warning("Intel Processor Trace: TSC not available\n");
+ }
+
+ per_cpu_mmaps = !cpu_map__empty(session->evlist->cpus);
+
+ auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
+ auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
+ auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift;
+ auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult;
+ auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero;
+ auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero;
+ auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit;
+ auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit;
+ auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch;
+ auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode;
+ auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps;
+ auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit;
+ auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits;
+ auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n;
+ auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d;
+ auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit;
+
+ return 0;
+}
+
+static int intel_pt_track_switches(struct perf_evlist *evlist)
+{
+ const char *sched_switch = "sched:sched_switch";
+ struct perf_evsel *evsel;
+ int err;
+
+ if (!perf_evlist__can_select_event(evlist, sched_switch))
+ return -EPERM;
+
+ err = parse_events(evlist, sched_switch, NULL);
+ if (err) {
+ pr_debug2("%s: failed to parse %s, error %d\n",
+ __func__, sched_switch, err);
+ return err;
+ }
+
+ evsel = perf_evlist__last(evlist);
+
+ perf_evsel__set_sample_bit(evsel, CPU);
+ perf_evsel__set_sample_bit(evsel, TIME);
+
+ evsel->system_wide = true;
+ evsel->no_aux_samples = true;
+ evsel->immediate = true;
+
+ return 0;
+}
+
+static void intel_pt_valid_str(char *str, size_t len, u64 valid)
+{
+ unsigned int val, last = 0, state = 1;
+ int p = 0;
+
+ str[0] = '\0';
+
+ for (val = 0; val <= 64; val++, valid >>= 1) {
+ if (valid & 1) {
+ last = val;
+ switch (state) {
+ case 0:
+ p += scnprintf(str + p, len - p, ",");
+ /* Fall through */
+ case 1:
+ p += scnprintf(str + p, len - p, "%u", val);
+ state = 2;
+ break;
+ case 2:
+ state = 3;
+ break;
+ case 3:
+ state = 4;
+ break;
+ default:
+ break;
+ }
+ } else {
+ switch (state) {
+ case 3:
+ p += scnprintf(str + p, len - p, ",%u", last);
+ state = 0;
+ break;
+ case 4:
+ p += scnprintf(str + p, len - p, "-%u", last);
+ state = 0;
+ break;
+ default:
+ break;
+ }
+ if (state != 1)
+ state = 0;
+ }
+ }
+}
+
+static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu,
+ const char *caps, const char *name,
+ const char *supported, u64 config)
+{
+ char valid_str[256];
+ unsigned int shift;
+ unsigned long long valid;
+ u64 bits;
+ int ok;
+
+ if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1)
+ valid = 0;
+
+ if (supported &&
+ perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok)
+ valid = 0;
+
+ valid |= 1;
+
+ bits = perf_pmu__format_bits(&intel_pt_pmu->format, name);
+
+ config &= bits;
+
+ for (shift = 0; bits && !(bits & 1); shift++)
+ bits >>= 1;
+
+ config >>= shift;
+
+ if (config > 63)
+ goto out_err;
+
+ if (valid & (1 << config))
+ return 0;
+out_err:
+ intel_pt_valid_str(valid_str, sizeof(valid_str), valid);
+ pr_err("Invalid %s for %s. Valid values are: %s\n",
+ name, INTEL_PT_PMU_NAME, valid_str);
+ return -EINVAL;
+}
+
+static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu,
+ struct perf_evsel *evsel)
+{
+ int err;
+
+ if (!evsel)
+ return 0;
+
+ err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds",
+ "cyc_thresh", "caps/psb_cyc",
+ evsel->attr.config);
+ if (err)
+ return err;
+
+ err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods",
+ "mtc_period", "caps/mtc",
+ evsel->attr.config);
+ if (err)
+ return err;
+
+ return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods",
+ "psb_period", "caps/psb_cyc",
+ evsel->attr.config);
+}
+
+static int intel_pt_recording_options(struct auxtrace_record *itr,
+ struct perf_evlist *evlist,
+ struct record_opts *opts)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
+ bool have_timing_info;
+ struct perf_evsel *evsel, *intel_pt_evsel = NULL;
+ const struct cpu_map *cpus = evlist->cpus;
+ bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
+ u64 tsc_bit;
+ int err;
+
+ ptr->evlist = evlist;
+ ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->attr.type == intel_pt_pmu->type) {
+ if (intel_pt_evsel) {
+ pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n");
+ return -EINVAL;
+ }
+ evsel->attr.freq = 0;
+ evsel->attr.sample_period = 1;
+ intel_pt_evsel = evsel;
+ opts->full_auxtrace = true;
+ }
+ }
+
+ if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
+ pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n");
+ return -EINVAL;
+ }
+
+ if (opts->use_clockid) {
+ pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n");
+ return -EINVAL;
+ }
+
+ if (!opts->full_auxtrace)
+ return 0;
+
+ err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel);
+ if (err)
+ return err;
+
+ /* Set default sizes for snapshot mode */
+ if (opts->auxtrace_snapshot_mode) {
+ size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist);
+
+ if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ } else if (!opts->auxtrace_mmap_pages && !privileged &&
+ opts->mmap_pages == UINT_MAX) {
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ if (!opts->auxtrace_snapshot_size)
+ opts->auxtrace_snapshot_size =
+ opts->auxtrace_mmap_pages * (size_t)page_size;
+ if (!opts->auxtrace_mmap_pages) {
+ size_t sz = opts->auxtrace_snapshot_size;
+
+ sz = round_up(sz, page_size) / page_size;
+ opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
+ }
+ if (opts->auxtrace_snapshot_size >
+ opts->auxtrace_mmap_pages * (size_t)page_size) {
+ pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
+ opts->auxtrace_snapshot_size,
+ opts->auxtrace_mmap_pages * (size_t)page_size);
+ return -EINVAL;
+ }
+ if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
+ pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
+ return -EINVAL;
+ }
+ pr_debug2("Intel PT snapshot size: %zu\n",
+ opts->auxtrace_snapshot_size);
+ if (psb_period &&
+ opts->auxtrace_snapshot_size <= psb_period +
+ INTEL_PT_PSB_PERIOD_NEAR)
+ ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n",
+ opts->auxtrace_snapshot_size, psb_period);
+ }
+
+ /* Set default sizes for full trace mode */
+ if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ }
+
+ /* Validate auxtrace_mmap_pages */
+ if (opts->auxtrace_mmap_pages) {
+ size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
+ size_t min_sz;
+
+ if (opts->auxtrace_snapshot_mode)
+ min_sz = KiB(4);
+ else
+ min_sz = KiB(8);
+
+ if (sz < min_sz || !is_power_of_2(sz)) {
+ pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n",
+ min_sz / 1024);
+ return -EINVAL;
+ }
+ }
+
+ intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
+
+ if (opts->full_auxtrace && (intel_pt_evsel->attr.config & tsc_bit))
+ have_timing_info = true;
+ else
+ have_timing_info = false;
+
+ /*
+ * Per-cpu recording needs sched_switch events to distinguish different
+ * threads.
+ */
+ if (have_timing_info && !cpu_map__empty(cpus)) {
+ err = intel_pt_track_switches(evlist);
+ if (err == -EPERM)
+ pr_debug2("Unable to select sched:sched_switch\n");
+ else if (err)
+ return err;
+ else
+ ptr->have_sched_switch = 1;
+ }
+
+ if (intel_pt_evsel) {
+ /*
+ * To obtain the auxtrace buffer file descriptor, the auxtrace
+ * event must come first.
+ */
+ perf_evlist__to_front(evlist, intel_pt_evsel);
+ /*
+ * In the case of per-cpu mmaps, we need the CPU on the
+ * AUX event.
+ */
+ if (!cpu_map__empty(cpus))
+ perf_evsel__set_sample_bit(intel_pt_evsel, CPU);
+ }
+
+ /* Add dummy event to keep tracking */
+ if (opts->full_auxtrace) {
+ struct perf_evsel *tracking_evsel;
+
+ err = parse_events(evlist, "dummy:u", NULL);
+ if (err)
+ return err;
+
+ tracking_evsel = perf_evlist__last(evlist);
+
+ perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+ tracking_evsel->attr.freq = 0;
+ tracking_evsel->attr.sample_period = 1;
+
+ /* In per-cpu case, always need the time of mmap events etc */
+ if (!cpu_map__empty(cpus))
+ perf_evsel__set_sample_bit(tracking_evsel, TIME);
+ }
+
+ /*
+ * Warn the user when we do not have enough information to decode i.e.
+ * per-cpu with no sched_switch (except workload-only).
+ */
+ if (!ptr->have_sched_switch && !cpu_map__empty(cpus) &&
+ !target__none(&opts->target))
+ ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
+
+ return 0;
+}
+
+static int intel_pt_snapshot_start(struct auxtrace_record *itr)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each(ptr->evlist, evsel) {
+ if (evsel->attr.type == ptr->intel_pt_pmu->type)
+ return perf_evlist__disable_event(ptr->evlist, evsel);
+ }
+ return -EINVAL;
+}
+
+static int intel_pt_snapshot_finish(struct auxtrace_record *itr)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each(ptr->evlist, evsel) {
+ if (evsel->attr.type == ptr->intel_pt_pmu->type)
+ return perf_evlist__enable_event(ptr->evlist, evsel);
+ }
+ return -EINVAL;
+}
+
+static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx)
+{
+ const size_t sz = sizeof(struct intel_pt_snapshot_ref);
+ int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2;
+ struct intel_pt_snapshot_ref *refs;
+
+ if (!new_cnt)
+ new_cnt = 16;
+
+ while (new_cnt <= idx)
+ new_cnt *= 2;
+
+ refs = calloc(new_cnt, sz);
+ if (!refs)
+ return -ENOMEM;
+
+ memcpy(refs, ptr->snapshot_refs, cnt * sz);
+
+ ptr->snapshot_refs = refs;
+ ptr->snapshot_ref_cnt = new_cnt;
+
+ return 0;
+}
+
+static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr)
+{
+ int i;
+
+ for (i = 0; i < ptr->snapshot_ref_cnt; i++)
+ zfree(&ptr->snapshot_refs[i].ref_buf);
+ zfree(&ptr->snapshot_refs);
+}
+
+static void intel_pt_recording_free(struct auxtrace_record *itr)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+
+ intel_pt_free_snapshot_refs(ptr);
+ free(ptr);
+}
+
+static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx,
+ size_t snapshot_buf_size)
+{
+ size_t ref_buf_size = ptr->snapshot_ref_buf_size;
+ void *ref_buf;
+
+ ref_buf = zalloc(ref_buf_size);
+ if (!ref_buf)
+ return -ENOMEM;
+
+ ptr->snapshot_refs[idx].ref_buf = ref_buf;
+ ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size;
+
+ return 0;
+}
+
+static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr,
+ size_t snapshot_buf_size)
+{
+ const size_t max_size = 256 * 1024;
+ size_t buf_size = 0, psb_period;
+
+ if (ptr->snapshot_size <= 64 * 1024)
+ return 0;
+
+ psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist);
+ if (psb_period)
+ buf_size = psb_period * 2;
+
+ if (!buf_size || buf_size > max_size)
+ buf_size = max_size;
+
+ if (buf_size >= snapshot_buf_size)
+ return 0;
+
+ if (buf_size >= ptr->snapshot_size / 2)
+ return 0;
+
+ return buf_size;
+}
+
+static int intel_pt_snapshot_init(struct intel_pt_recording *ptr,
+ size_t snapshot_buf_size)
+{
+ if (ptr->snapshot_init_done)
+ return 0;
+
+ ptr->snapshot_init_done = true;
+
+ ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr,
+ snapshot_buf_size);
+
+ return 0;
+}
+
+/**
+ * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer.
+ * @buf1: first buffer
+ * @compare_size: number of bytes to compare
+ * @buf2: second buffer (a circular buffer)
+ * @offs2: offset in second buffer
+ * @buf2_size: size of second buffer
+ *
+ * The comparison allows for the possibility that the bytes to compare in the
+ * circular buffer are not contiguous. It is assumed that @compare_size <=
+ * @buf2_size. This function returns %false if the bytes are identical, %true
+ * otherwise.
+ */
+static bool intel_pt_compare_buffers(void *buf1, size_t compare_size,
+ void *buf2, size_t offs2, size_t buf2_size)
+{
+ size_t end2 = offs2 + compare_size, part_size;
+
+ if (end2 <= buf2_size)
+ return memcmp(buf1, buf2 + offs2, compare_size);
+
+ part_size = end2 - buf2_size;
+ if (memcmp(buf1, buf2 + offs2, part_size))
+ return true;
+
+ compare_size -= part_size;
+
+ return memcmp(buf1 + part_size, buf2, compare_size);
+}
+
+static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset,
+ size_t ref_size, size_t buf_size,
+ void *data, size_t head)
+{
+ size_t ref_end = ref_offset + ref_size;
+
+ if (ref_end > buf_size) {
+ if (head > ref_offset || head < ref_end - buf_size)
+ return true;
+ } else if (head > ref_offset && head < ref_end) {
+ return true;
+ }
+
+ return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset,
+ buf_size);
+}
+
+static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size,
+ void *data, size_t head)
+{
+ if (head >= ref_size) {
+ memcpy(ref_buf, data + head - ref_size, ref_size);
+ } else {
+ memcpy(ref_buf, data, head);
+ ref_size -= head;
+ memcpy(ref_buf + head, data + buf_size - ref_size, ref_size);
+ }
+}
+
+static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx,
+ struct auxtrace_mmap *mm, unsigned char *data,
+ u64 head)
+{
+ struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx];
+ bool wrapped;
+
+ wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset,
+ ptr->snapshot_ref_buf_size, mm->len,
+ data, head);
+
+ intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len,
+ data, head);
+
+ return wrapped;
+}
+
+static bool intel_pt_first_wrap(u64 *data, size_t buf_size)
+{
+ int i, a, b;
+
+ b = buf_size >> 3;
+ a = b - 512;
+ if (a < 0)
+ a = 0;
+
+ for (i = a; i < b; i++) {
+ if (data[i])
+ return true;
+ }
+
+ return false;
+}
+
+static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx,
+ struct auxtrace_mmap *mm, unsigned char *data,
+ u64 *head, u64 *old)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ bool wrapped;
+ int err;
+
+ pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
+ __func__, idx, (size_t)*old, (size_t)*head);
+
+ err = intel_pt_snapshot_init(ptr, mm->len);
+ if (err)
+ goto out_err;
+
+ if (idx >= ptr->snapshot_ref_cnt) {
+ err = intel_pt_alloc_snapshot_refs(ptr, idx);
+ if (err)
+ goto out_err;
+ }
+
+ if (ptr->snapshot_ref_buf_size) {
+ if (!ptr->snapshot_refs[idx].ref_buf) {
+ err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len);
+ if (err)
+ goto out_err;
+ }
+ wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head);
+ } else {
+ wrapped = ptr->snapshot_refs[idx].wrapped;
+ if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) {
+ ptr->snapshot_refs[idx].wrapped = true;
+ wrapped = true;
+ }
+ }
+
+ /*
+ * In full trace mode 'head' continually increases. However in snapshot
+ * mode 'head' is an offset within the buffer. Here 'old' and 'head'
+ * are adjusted to match the full trace case which expects that 'old' is
+ * always less than 'head'.
+ */
+ if (wrapped) {
+ *old = *head;
+ *head += mm->len;
+ } else {
+ if (mm->mask)
+ *old &= mm->mask;
+ else
+ *old %= mm->len;
+ if (*old > *head)
+ *head += mm->len;
+ }
+
+ pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
+ __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
+
+ return 0;
+
+out_err:
+ pr_err("%s: failed, error %d\n", __func__, err);
+ return err;
+}
+
+static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
+{
+ return rdtsc();
+}
+
+static int intel_pt_read_finish(struct auxtrace_record *itr, int idx)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each(ptr->evlist, evsel) {
+ if (evsel->attr.type == ptr->intel_pt_pmu->type)
+ return perf_evlist__enable_event_idx(ptr->evlist, evsel,
+ idx);
+ }
+ return -EINVAL;
+}
+
+struct auxtrace_record *intel_pt_recording_init(int *err)
+{
+ struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
+ struct intel_pt_recording *ptr;
+
+ if (!intel_pt_pmu)
+ return NULL;
+
+ ptr = zalloc(sizeof(struct intel_pt_recording));
+ if (!ptr) {
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ ptr->intel_pt_pmu = intel_pt_pmu;
+ ptr->itr.recording_options = intel_pt_recording_options;
+ ptr->itr.info_priv_size = intel_pt_info_priv_size;
+ ptr->itr.info_fill = intel_pt_info_fill;
+ ptr->itr.free = intel_pt_recording_free;
+ ptr->itr.snapshot_start = intel_pt_snapshot_start;
+ ptr->itr.snapshot_finish = intel_pt_snapshot_finish;
+ ptr->itr.find_snapshot = intel_pt_find_snapshot;
+ ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
+ ptr->itr.reference = intel_pt_reference;
+ ptr->itr.read_finish = intel_pt_read_finish;
+ return &ptr->itr;
+}
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
new file mode 100644
index 0000000..c5db14f
--- /dev/null
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -0,0 +1,28 @@
+#include "../../perf.h"
+#include "../../util/perf_regs.h"
+
+const struct sample_reg sample_reg_masks[] = {
+ SMPL_REG(AX, PERF_REG_X86_AX),
+ SMPL_REG(BX, PERF_REG_X86_BX),
+ SMPL_REG(CX, PERF_REG_X86_CX),
+ SMPL_REG(DX, PERF_REG_X86_DX),
+ SMPL_REG(SI, PERF_REG_X86_SI),
+ SMPL_REG(DI, PERF_REG_X86_DI),
+ SMPL_REG(BP, PERF_REG_X86_BP),
+ SMPL_REG(SP, PERF_REG_X86_SP),
+ SMPL_REG(IP, PERF_REG_X86_IP),
+ SMPL_REG(FLAGS, PERF_REG_X86_FLAGS),
+ SMPL_REG(CS, PERF_REG_X86_CS),
+ SMPL_REG(SS, PERF_REG_X86_SS),
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+ SMPL_REG(R8, PERF_REG_X86_R8),
+ SMPL_REG(R9, PERF_REG_X86_R9),
+ SMPL_REG(R10, PERF_REG_X86_R10),
+ SMPL_REG(R11, PERF_REG_X86_R11),
+ SMPL_REG(R12, PERF_REG_X86_R12),
+ SMPL_REG(R13, PERF_REG_X86_R13),
+ SMPL_REG(R14, PERF_REG_X86_R14),
+ SMPL_REG(R15, PERF_REG_X86_R15),
+#endif
+ SMPL_REG_END
+};
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
new file mode 100644
index 0000000..79fe071
--- /dev/null
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -0,0 +1,18 @@
+#include <string.h>
+
+#include <linux/perf_event.h>
+
+#include "../../util/intel-pt.h"
+#include "../../util/intel-bts.h"
+#include "../../util/pmu.h"
+
+struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
+{
+#ifdef HAVE_AUXTRACE_SUPPORT
+ if (!strcmp(pmu->name, INTEL_PT_PMU_NAME))
+ return intel_pt_pmu_default_config(pmu);
+ if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME))
+ pmu->selectable = true;
+#endif
+ return NULL;
+}
diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build
new file mode 100644
index 0000000..54afe4a
--- /dev/null
+++ b/tools/perf/arch/xtensa/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/tools/perf/arch/xtensa/Makefile b/tools/perf/arch/xtensa/Makefile
new file mode 100644
index 0000000..7fbca17
--- /dev/null
+++ b/tools/perf/arch/xtensa/Makefile
@@ -0,0 +1,3 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build
new file mode 100644
index 0000000..954e287
--- /dev/null
+++ b/tools/perf/arch/xtensa/util/Build
@@ -0,0 +1 @@
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c
new file mode 100644
index 0000000..4dba76b
--- /dev/null
+++ b/tools/perf/arch/xtensa/util/dwarf-regs.c
@@ -0,0 +1,25 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (c) 2015 Cadence Design Systems Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+#define XTENSA_MAX_REGS 16
+
+const char *xtensa_regs_table[XTENSA_MAX_REGS] = {
+ "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+ "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
+};
+
+const char *get_arch_regstr(unsigned int n)
+{
+ return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL;
+}
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 5ce9802..573e288 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -3,7 +3,9 @@ perf-y += sched-pipe.o
perf-y += mem-memcpy.o
perf-y += futex-hash.o
perf-y += futex-wake.o
+perf-y += futex-wake-parallel.o
perf-y += futex-requeue.o
+perf-y += futex-lock-pi.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 3c4dd44..a50df86 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -33,7 +33,11 @@ extern int bench_mem_memcpy(int argc, const char **argv,
extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
extern int bench_futex_hash(int argc, const char **argv, const char *prefix);
extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
+extern int bench_futex_wake_parallel(int argc, const char **argv,
+ const char *prefix);
extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
+/* pi futexes */
+extern int bench_futex_lock_pi(int argc, const char **argv, const char *prefix);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
new file mode 100644
index 0000000..bc6a16a
--- /dev/null
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2015 Davidlohr Bueso.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/stat.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "futex.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+struct worker {
+ int tid;
+ u_int32_t *futex;
+ pthread_t thread;
+ unsigned long ops;
+};
+
+static u_int32_t global_futex = 0;
+static struct worker *worker;
+static unsigned int nsecs = 10;
+static bool silent = false, multi = false;
+static bool done = false, fshared = false;
+static unsigned int ncpus, nthreads = 0;
+static int futex_flag = 0;
+struct timeval start, end, runtime;
+static pthread_mutex_t thread_lock;
+static unsigned int threads_starting;
+static struct stats throughput_stats;
+static pthread_cond_t thread_parent, thread_worker;
+
+static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+ OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
+ OPT_BOOLEAN( 'M', "multi", &multi, "Use multiple futexes"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
+ OPT_END()
+};
+
+static const char * const bench_futex_lock_pi_usage[] = {
+ "perf bench futex requeue <options>",
+ NULL
+};
+
+static void print_summary(void)
+{
+ unsigned long avg = avg_stats(&throughput_stats);
+ double stddev = stddev_stats(&throughput_stats);
+
+ printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
+ !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
+ (int) runtime.tv_sec);
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ /* inform all threads that we're done for the day */
+ done = true;
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &runtime);
+}
+
+static void *workerfn(void *arg)
+{
+ struct worker *w = (struct worker *) arg;
+
+ pthread_mutex_lock(&thread_lock);
+ threads_starting--;
+ if (!threads_starting)
+ pthread_cond_signal(&thread_parent);
+ pthread_cond_wait(&thread_worker, &thread_lock);
+ pthread_mutex_unlock(&thread_lock);
+
+ do {
+ int ret;
+ again:
+ ret = futex_lock_pi(w->futex, NULL, 0, futex_flag);
+
+ if (ret) { /* handle lock acquisition */
+ if (!silent)
+ warn("thread %d: Could not lock pi-lock for %p (%d)",
+ w->tid, w->futex, ret);
+ if (done)
+ break;
+
+ goto again;
+ }
+
+ usleep(1);
+ ret = futex_unlock_pi(w->futex, futex_flag);
+ if (ret && !silent)
+ warn("thread %d: Could not unlock pi-lock for %p (%d)",
+ w->tid, w->futex, ret);
+ w->ops++; /* account for thread's share of work */
+ } while (!done);
+
+ return NULL;
+}
+
+static void create_threads(struct worker *w, pthread_attr_t thread_attr)
+{
+ cpu_set_t cpu;
+ unsigned int i;
+
+ threads_starting = nthreads;
+
+ for (i = 0; i < nthreads; i++) {
+ worker[i].tid = i;
+
+ if (multi) {
+ worker[i].futex = calloc(1, sizeof(u_int32_t));
+ if (!worker[i].futex)
+ err(EXIT_FAILURE, "calloc");
+ } else
+ worker[i].futex = &global_futex;
+
+ CPU_ZERO(&cpu);
+ CPU_SET(i % ncpus, &cpu);
+
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
+ err(EXIT_FAILURE, "pthread_create");
+ }
+}
+
+int bench_futex_lock_pi(int argc, const char **argv,
+ const char *prefix __maybe_unused)
+{
+ int ret = 0;
+ unsigned int i;
+ struct sigaction act;
+ pthread_attr_t thread_attr;
+
+ argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
+ if (argc)
+ goto err;
+
+ ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+ sigfillset(&act.sa_mask);
+ act.sa_sigaction = toggle_done;
+ sigaction(SIGINT, &act, NULL);
+
+ if (!nthreads)
+ nthreads = ncpus;
+
+ worker = calloc(nthreads, sizeof(*worker));
+ if (!worker)
+ err(EXIT_FAILURE, "calloc");
+
+ if (!fshared)
+ futex_flag = FUTEX_PRIVATE_FLAG;
+
+ printf("Run summary [PID %d]: %d threads doing pi lock/unlock pairing for %d secs.\n\n",
+ getpid(), nthreads, nsecs);
+
+ init_stats(&throughput_stats);
+ pthread_mutex_init(&thread_lock, NULL);
+ pthread_cond_init(&thread_parent, NULL);
+ pthread_cond_init(&thread_worker, NULL);
+
+ threads_starting = nthreads;
+ pthread_attr_init(&thread_attr);
+ gettimeofday(&start, NULL);
+
+ create_threads(worker, thread_attr);
+ pthread_attr_destroy(&thread_attr);
+
+ pthread_mutex_lock(&thread_lock);
+ while (threads_starting)
+ pthread_cond_wait(&thread_parent, &thread_lock);
+ pthread_cond_broadcast(&thread_worker);
+ pthread_mutex_unlock(&thread_lock);
+
+ sleep(nsecs);
+ toggle_done(0, NULL, NULL);
+
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(worker[i].thread, NULL);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_join");
+ }
+
+ /* cleanup & report results */
+ pthread_cond_destroy(&thread_parent);
+ pthread_cond_destroy(&thread_worker);
+ pthread_mutex_destroy(&thread_lock);
+
+ for (i = 0; i < nthreads; i++) {
+ unsigned long t = worker[i].ops/runtime.tv_sec;
+
+ update_stats(&throughput_stats, t);
+ if (!silent)
+ printf("[thread %3d] futex: %p [ %ld ops/sec ]\n",
+ worker[i].tid, worker[i].futex, t);
+
+ if (multi)
+ free(worker[i].futex);
+ }
+
+ print_summary();
+
+ free(worker);
+ return ret;
+err:
+ usage_with_options(bench_futex_lock_pi_usage, options);
+ exit(EXIT_FAILURE);
+}
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index bedff6b..ad0d9b5 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -132,6 +132,9 @@ int bench_futex_requeue(int argc, const char **argv,
if (!fshared)
futex_flag = FUTEX_PRIVATE_FLAG;
+ if (nrequeue > nthreads)
+ nrequeue = nthreads;
+
printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
"%d at a time.\n\n", getpid(), nthreads,
fshared ? "shared":"private", &futex1, &futex2, nrequeue);
@@ -161,20 +164,18 @@ int bench_futex_requeue(int argc, const char **argv,
/* Ok, all threads are patiently blocked, start requeueing */
gettimeofday(&start, NULL);
- for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue) {
+ while (nrequeued < nthreads) {
/*
* Do not wakeup any tasks blocked on futex1, allowing
* us to really measure futex_wait functionality.
*/
- futex_cmp_requeue(&futex1, 0, &futex2, 0,
- nrequeue, futex_flag);
+ nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0,
+ nrequeue, futex_flag);
}
+
gettimeofday(&end, NULL);
timersub(&end, &start, &runtime);
- if (nrequeued > nthreads)
- nrequeued = nthreads;
-
update_stats(&requeued_stats, nrequeued);
update_stats(&requeuetime_stats, runtime.tv_usec);
@@ -184,7 +185,7 @@ int bench_futex_requeue(int argc, const char **argv,
}
/* everybody should be blocked on futex2, wake'em up */
- nrequeued = futex_wake(&futex2, nthreads, futex_flag);
+ nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
if (nthreads != nrequeued)
warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
new file mode 100644
index 0000000..6d8c9fa
--- /dev/null
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2015 Davidlohr Bueso.
+ *
+ * Block a bunch of threads and let parallel waker threads wakeup an
+ * equal amount of them. The program output reflects the avg latency
+ * for each individual thread to service its share of work. Ultimately
+ * it can be used to measure futex_wake() changes.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/stat.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "futex.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+struct thread_data {
+ pthread_t worker;
+ unsigned int nwoken;
+ struct timeval runtime;
+};
+
+static unsigned int nwakes = 1;
+
+/* all threads will block on the same futex -- hash bucket chaos ;) */
+static u_int32_t futex = 0;
+
+static pthread_t *blocked_worker;
+static bool done = false, silent = false, fshared = false;
+static unsigned int nblocked_threads = 0, nwaking_threads = 0;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static struct stats waketime_stats, wakeup_stats;
+static unsigned int ncpus, threads_starting;
+static int futex_flag = 0;
+
+static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"),
+ OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
+ OPT_END()
+};
+
+static const char * const bench_futex_wake_parallel_usage[] = {
+ "perf bench futex wake-parallel <options>",
+ NULL
+};
+
+static void *waking_workerfn(void *arg)
+{
+ struct thread_data *waker = (struct thread_data *) arg;
+ struct timeval start, end;
+
+ gettimeofday(&start, NULL);
+
+ waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
+ if (waker->nwoken != nwakes)
+ warnx("couldn't wakeup all tasks (%d/%d)",
+ waker->nwoken, nwakes);
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &waker->runtime);
+
+ pthread_exit(NULL);
+ return NULL;
+}
+
+static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
+{
+ unsigned int i;
+
+ pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
+
+ /* create and block all threads */
+ for (i = 0; i < nwaking_threads; i++) {
+ /*
+ * Thread creation order will impact per-thread latency
+ * as it will affect the order to acquire the hb spinlock.
+ * For now let the scheduler decide.
+ */
+ if (pthread_create(&td[i].worker, &thread_attr,
+ waking_workerfn, (void *)&td[i]))
+ err(EXIT_FAILURE, "pthread_create");
+ }
+
+ for (i = 0; i < nwaking_threads; i++)
+ if (pthread_join(td[i].worker, NULL))
+ err(EXIT_FAILURE, "pthread_join");
+}
+
+static void *blocked_workerfn(void *arg __maybe_unused)
+{
+ pthread_mutex_lock(&thread_lock);
+ threads_starting--;
+ if (!threads_starting)
+ pthread_cond_signal(&thread_parent);
+ pthread_cond_wait(&thread_worker, &thread_lock);
+ pthread_mutex_unlock(&thread_lock);
+
+ while (1) { /* handle spurious wakeups */
+ if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR)
+ break;
+ }
+
+ pthread_exit(NULL);
+ return NULL;
+}
+
+static void block_threads(pthread_t *w, pthread_attr_t thread_attr)
+{
+ cpu_set_t cpu;
+ unsigned int i;
+
+ threads_starting = nblocked_threads;
+
+ /* create and block all threads */
+ for (i = 0; i < nblocked_threads; i++) {
+ CPU_ZERO(&cpu);
+ CPU_SET(i % ncpus, &cpu);
+
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
+ err(EXIT_FAILURE, "pthread_create");
+ }
+}
+
+static void print_run(struct thread_data *waking_worker, unsigned int run_num)
+{
+ unsigned int i, wakeup_avg;
+ double waketime_avg, waketime_stddev;
+ struct stats __waketime_stats, __wakeup_stats;
+
+ init_stats(&__wakeup_stats);
+ init_stats(&__waketime_stats);
+
+ for (i = 0; i < nwaking_threads; i++) {
+ update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec);
+ update_stats(&__wakeup_stats, waking_worker[i].nwoken);
+ }
+
+ waketime_avg = avg_stats(&__waketime_stats);
+ waketime_stddev = stddev_stats(&__waketime_stats);
+ wakeup_avg = avg_stats(&__wakeup_stats);
+
+ printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
+ "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,
+ nblocked_threads, waketime_avg/1e3,
+ rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+static void print_summary(void)
+{
+ unsigned int wakeup_avg;
+ double waketime_avg, waketime_stddev;
+
+ waketime_avg = avg_stats(&waketime_stats);
+ waketime_stddev = stddev_stats(&waketime_stats);
+ wakeup_avg = avg_stats(&wakeup_stats);
+
+ printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
+ wakeup_avg,
+ nblocked_threads,
+ waketime_avg/1e3,
+ rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+
+static void do_run_stats(struct thread_data *waking_worker)
+{
+ unsigned int i;
+
+ for (i = 0; i < nwaking_threads; i++) {
+ update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec);
+ update_stats(&wakeup_stats, waking_worker[i].nwoken);
+ }
+
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ done = true;
+}
+
+int bench_futex_wake_parallel(int argc, const char **argv,
+ const char *prefix __maybe_unused)
+{
+ int ret = 0;
+ unsigned int i, j;
+ struct sigaction act;
+ pthread_attr_t thread_attr;
+ struct thread_data *waking_worker;
+
+ argc = parse_options(argc, argv, options,
+ bench_futex_wake_parallel_usage, 0);
+ if (argc) {
+ usage_with_options(bench_futex_wake_parallel_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ sigfillset(&act.sa_mask);
+ act.sa_sigaction = toggle_done;
+ sigaction(SIGINT, &act, NULL);
+
+ ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+ if (!nblocked_threads)
+ nblocked_threads = ncpus;
+
+ /* some sanity checks */
+ if (nwaking_threads > nblocked_threads || !nwaking_threads)
+ nwaking_threads = nblocked_threads;
+
+ if (nblocked_threads % nwaking_threads)
+ errx(EXIT_FAILURE, "Must be perfectly divisible");
+ /*
+ * Each thread will wakeup nwakes tasks in
+ * a single futex_wait call.
+ */
+ nwakes = nblocked_threads/nwaking_threads;
+
+ blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker));
+ if (!blocked_worker)
+ err(EXIT_FAILURE, "calloc");
+
+ if (!fshared)
+ futex_flag = FUTEX_PRIVATE_FLAG;
+
+ printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
+ "futex %p), %d threads waking up %d at a time.\n\n",
+ getpid(), nblocked_threads, fshared ? "shared":"private",
+ &futex, nwaking_threads, nwakes);
+
+ init_stats(&wakeup_stats);
+ init_stats(&waketime_stats);
+
+ pthread_attr_init(&thread_attr);
+ pthread_mutex_init(&thread_lock, NULL);
+ pthread_cond_init(&thread_parent, NULL);
+ pthread_cond_init(&thread_worker, NULL);
+
+ for (j = 0; j < bench_repeat && !done; j++) {
+ waking_worker = calloc(nwaking_threads, sizeof(*waking_worker));
+ if (!waking_worker)
+ err(EXIT_FAILURE, "calloc");
+
+ /* create, launch & block all threads */
+ block_threads(blocked_worker, thread_attr);
+
+ /* make sure all threads are already blocked */
+ pthread_mutex_lock(&thread_lock);
+ while (threads_starting)
+ pthread_cond_wait(&thread_parent, &thread_lock);
+ pthread_cond_broadcast(&thread_worker);
+ pthread_mutex_unlock(&thread_lock);
+
+ usleep(100000);
+
+ /* Ok, all threads are patiently blocked, start waking folks up */
+ wakeup_threads(waking_worker, thread_attr);
+
+ for (i = 0; i < nblocked_threads; i++) {
+ ret = pthread_join(blocked_worker[i], NULL);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_join");
+ }
+
+ do_run_stats(waking_worker);
+ if (!silent)
+ print_run(waking_worker, j);
+
+ free(waking_worker);
+ }
+
+ /* cleanup & report results */
+ pthread_cond_destroy(&thread_parent);
+ pthread_cond_destroy(&thread_worker);
+ pthread_mutex_destroy(&thread_lock);
+ pthread_attr_destroy(&thread_attr);
+
+ print_summary();
+
+ free(blocked_worker);
+ return ret;
+}
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 929f762..e5e41d3 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -60,7 +60,12 @@ static void *workerfn(void *arg __maybe_unused)
pthread_cond_wait(&thread_worker, &thread_lock);
pthread_mutex_unlock(&thread_lock);
- futex_wait(&futex1, 0, NULL, futex_flag);
+ while (1) {
+ if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR)
+ break;
+ }
+
+ pthread_exit(NULL);
return NULL;
}
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index 7ed22ff..d44de9f 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -56,6 +56,26 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
}
/**
+ * futex_lock_pi() - block on uaddr as a PI mutex
+ * @detect: whether (1) or not (0) to perform deadlock detection
+ */
+static inline int
+futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect,
+ int opflags)
+{
+ return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter
+ */
+static inline int
+futex_unlock_pi(u_int32_t *uaddr, int opflags)
+{
+ return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags);
+}
+
+/**
* futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
* @nr_wake: wake up to this many tasks
* @nr_requeue: requeue up to this many tasks
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index ebfa163..870b7e6 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -8,6 +8,7 @@
#include "../builtin.h"
#include "../util/util.h"
#include "../util/parse-options.h"
+#include "../util/cloexec.h"
#include "bench.h"
@@ -23,6 +24,7 @@
#include <pthread.h>
#include <sys/mman.h>
#include <sys/time.h>
+#include <sys/resource.h>
#include <sys/wait.h>
#include <sys/prctl.h>
#include <sys/types.h>
@@ -51,6 +53,9 @@ struct thread_data {
unsigned int loops_done;
u64 val;
u64 runtime_ns;
+ u64 system_time_ns;
+ u64 user_time_ns;
+ double speed_gbs;
pthread_mutex_t *process_lock;
};
@@ -180,7 +185,7 @@ static const struct option options[] = {
OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"),
OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"),
- OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "bzero the initial allocations"),
+ OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"),
OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
/* Special option string parsing callbacks: */
@@ -828,6 +833,9 @@ static int count_process_nodes(int process_nr)
td = g->threads + task_nr;
node = numa_node_of_cpu(td->curr_cpu);
+ if (node < 0) /* curr_cpu was likely still -1 */
+ return 0;
+
node_present[node] = 1;
}
@@ -882,6 +890,11 @@ static void calc_convergence_compression(int *strong)
for (p = 0; p < g->p.nr_proc; p++) {
unsigned int nodes = count_process_nodes(p);
+ if (!nodes) {
+ *strong = 0;
+ return;
+ }
+
nodes_min = min(nodes, nodes_min);
nodes_max = max(nodes, nodes_max);
}
@@ -1034,6 +1047,7 @@ static void *worker_thread(void *__tdata)
u64 bytes_done;
long work_done;
u32 l;
+ struct rusage rusage;
bind_to_cpumask(td->bind_cpumask);
bind_to_memnode(td->bind_node);
@@ -1186,6 +1200,13 @@ static void *worker_thread(void *__tdata)
timersub(&stop, &start0, &diff);
td->runtime_ns = diff.tv_sec * 1000000000ULL;
td->runtime_ns += diff.tv_usec * 1000ULL;
+ td->speed_gbs = bytes_done / (td->runtime_ns / 1e9) / 1e9;
+
+ getrusage(RUSAGE_THREAD, &rusage);
+ td->system_time_ns = rusage.ru_stime.tv_sec * 1000000000ULL;
+ td->system_time_ns += rusage.ru_stime.tv_usec * 1000ULL;
+ td->user_time_ns = rusage.ru_utime.tv_sec * 1000000000ULL;
+ td->user_time_ns += rusage.ru_utime.tv_usec * 1000ULL;
free_data(thread_data, g->p.bytes_thread);
@@ -1395,7 +1416,7 @@ static void print_res(const char *name, double val,
if (!name)
name = "main,";
- if (g->p.show_quiet)
+ if (!g->p.show_quiet)
printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
else
printf(" %14.3f %s\n", val, txt_long);
@@ -1412,7 +1433,7 @@ static int __bench_numa(const char *name)
double runtime_sec_min;
int wait_stat;
double bytes;
- int i, t;
+ int i, t, p;
if (init())
return -1;
@@ -1548,6 +1569,24 @@ static int __bench_numa(const char *name)
print_res(name, bytes / runtime_sec_max / 1e9,
"GB/sec,", "total-speed", "GB/sec total speed");
+ if (g->p.show_details >= 2) {
+ char tname[32];
+ struct thread_data *td;
+ for (p = 0; p < g->p.nr_proc; p++) {
+ for (t = 0; t < g->p.nr_threads; t++) {
+ memset(tname, 0, 32);
+ td = g->threads + p*g->p.nr_threads + t;
+ snprintf(tname, 32, "process%d:thread%d", p, t);
+ print_res(tname, td->speed_gbs,
+ "GB/sec", "thread-speed", "GB/sec/thread speed");
+ print_res(tname, td->system_time_ns / 1e9,
+ "secs", "thread-system-time", "system CPU time/thread");
+ print_res(tname, td->user_time_ns / 1e9,
+ "secs", "thread-user-time", "user CPU time/thread");
+ }
+ }
+ }
+
free(pids);
deinit();
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 71bf745..8edc205 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -59,10 +59,15 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
(al->sym == NULL ||
strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
/* We're only interested in a symbol named sym_hist_filter */
+ /*
+ * FIXME: why isn't this done in the symbol_filter when loading
+ * the DSO?
+ */
if (al->sym != NULL) {
rb_erase(&al->sym->rb_node,
&al->map->dso->symbols[al->map->type]);
symbol__delete(al->sym);
+ dso__reset_find_symbol_cache(al->map->dso);
}
return 0;
}
@@ -84,6 +89,7 @@ static int process_sample_event(struct perf_tool *tool,
{
struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
struct addr_location al;
+ int ret = 0;
if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
@@ -92,15 +98,16 @@ static int process_sample_event(struct perf_tool *tool,
}
if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
- return 0;
+ goto out_put;
if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) {
pr_warning("problem incrementing symbol count, "
"skipping event\n");
- return -1;
+ ret = -1;
}
-
- return 0;
+out_put:
+ addr_location__put(&al);
+ return ret;
}
static int hist_entry__tty_annotate(struct hist_entry *he,
@@ -181,6 +188,7 @@ find_next:
* symbol, free he->ms.sym->src to signal we already
* processed this symbol.
*/
+ zfree(&notes->src->cycles_hist);
zfree(&notes->src);
}
}
@@ -232,6 +240,8 @@ static int __cmd_annotate(struct perf_annotate *ann)
if (nr_samples > 0) {
total_nr_samples += nr_samples;
hists__collapse_resort(hists, NULL);
+ /* Don't sort callchain */
+ perf_evsel__reset_sample_bit(pos, CALLCHAIN);
hists__output_resort(hists, NULL);
if (symbol_conf.event_group &&
@@ -283,7 +293,6 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
},
};
struct perf_data_file file = {
- .path = input_name,
.mode = PERF_DATA_MODE_READ,
};
const struct option options[] = {
@@ -324,6 +333,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
"objdump binary to use for disassembly and annotations"),
OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
"Show event group information together"),
+ OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
+ "Show a column with the sum of periods"),
OPT_END()
};
int ret = hists__init();
@@ -340,6 +351,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
else if (annotate.use_gtk)
use_browser = 2;
+ file.path = input_name;
+
setup_browser(true);
annotate.session = perf_session__new(&file, false, &annotate.tool);
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index b9a56fa..f67934d 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -58,7 +58,10 @@ static struct bench mem_benchmarks[] = {
static struct bench futex_benchmarks[] = {
{ "hash", "Benchmark for futex hash table", bench_futex_hash },
{ "wake", "Benchmark for futex wake calls", bench_futex_wake },
+ { "wake-parallel", "Benchmark for parallel futex wake calls", bench_futex_wake_parallel },
{ "requeue", "Benchmark for futex requeue calls", bench_futex_requeue },
+ /* pi-futexes */
+ { "lock-pi", "Benchmark for futex lock_pi calls", bench_futex_lock_pi },
{ "all", "Test all futex benchmarks", NULL },
{ NULL, NULL, NULL }
};
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index d47a0cd..7b8450c 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -25,8 +25,6 @@
static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
{
char root_dir[PATH_MAX];
- char notes[PATH_MAX];
- u8 build_id[BUILD_ID_SIZE];
char *p;
strlcpy(root_dir, proc_dir, sizeof(root_dir));
@@ -35,15 +33,7 @@ static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
if (!p)
return -1;
*p = '\0';
-
- scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir);
-
- if (sysfs__read_build_id(notes, build_id, sizeof(build_id)))
- return -1;
-
- build_id__sprintf(build_id, sizeof(build_id), sbuildid);
-
- return 0;
+ return sysfs__sprintf_build_id(root_dir, sbuildid);
}
static int build_id_cache__kcore_dir(char *dir, size_t sz)
@@ -127,7 +117,7 @@ static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir,
static int build_id_cache__add_kcore(const char *filename, bool force)
{
- char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1];
+ char dir[32], sbuildid[SBUILD_ID_SIZE];
char from_dir[PATH_MAX], to_dir[PATH_MAX];
char *p;
@@ -138,7 +128,7 @@ static int build_id_cache__add_kcore(const char *filename, bool force)
return -1;
*p = '\0';
- if (build_id_cache__kcore_buildid(from_dir, sbuildid))
+ if (build_id_cache__kcore_buildid(from_dir, sbuildid) < 0)
return -1;
scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s",
@@ -184,7 +174,7 @@ static int build_id_cache__add_kcore(const char *filename, bool force)
static int build_id_cache__add_file(const char *filename)
{
- char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+ char sbuild_id[SBUILD_ID_SIZE];
u8 build_id[BUILD_ID_SIZE];
int err;
@@ -204,7 +194,7 @@ static int build_id_cache__add_file(const char *filename)
static int build_id_cache__remove_file(const char *filename)
{
u8 build_id[BUILD_ID_SIZE];
- char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+ char sbuild_id[SBUILD_ID_SIZE];
int err;
@@ -276,7 +266,7 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f
static int build_id_cache__update_file(const char *filename)
{
u8 build_id[BUILD_ID_SIZE];
- char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+ char sbuild_id[SBUILD_ID_SIZE];
int err = 0;
@@ -363,7 +353,7 @@ int cmd_buildid_cache(int argc, const char **argv,
setup_pager();
if (add_name_list_str) {
- list = strlist__new(true, add_name_list_str);
+ list = strlist__new(add_name_list_str, NULL);
if (list) {
strlist__for_each(pos, list)
if (build_id_cache__add_file(pos->s)) {
@@ -381,7 +371,7 @@ int cmd_buildid_cache(int argc, const char **argv,
}
if (remove_name_list_str) {
- list = strlist__new(true, remove_name_list_str);
+ list = strlist__new(remove_name_list_str, NULL);
if (list) {
strlist__for_each(pos, list)
if (build_id_cache__remove_file(pos->s)) {
@@ -399,7 +389,7 @@ int cmd_buildid_cache(int argc, const char **argv,
}
if (purge_name_list_str) {
- list = strlist__new(true, purge_name_list_str);
+ list = strlist__new(purge_name_list_str, NULL);
if (list) {
strlist__for_each(pos, list)
if (build_id_cache__purge_path(pos->s)) {
@@ -420,7 +410,7 @@ int cmd_buildid_cache(int argc, const char **argv,
ret = build_id_cache__fprintf_missing(session, stdout);
if (update_name_list_str) {
- list = strlist__new(true, update_name_list_str);
+ list = strlist__new(update_name_list_str, NULL);
if (list) {
strlist__for_each(pos, list)
if (build_id_cache__update_file(pos->s)) {
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index feb420f..918b4de 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -19,29 +19,25 @@
static int sysfs__fprintf_build_id(FILE *fp)
{
- u8 kallsyms_build_id[BUILD_ID_SIZE];
- char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+ char sbuild_id[SBUILD_ID_SIZE];
+ int ret;
- if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id,
- sizeof(kallsyms_build_id)) != 0)
- return -1;
+ ret = sysfs__sprintf_build_id("/", sbuild_id);
+ if (ret != sizeof(sbuild_id))
+ return ret < 0 ? ret : -EINVAL;
- build_id__sprintf(kallsyms_build_id, sizeof(kallsyms_build_id),
- sbuild_id);
- fprintf(fp, "%s\n", sbuild_id);
- return 0;
+ return fprintf(fp, "%s\n", sbuild_id);
}
static int filename__fprintf_build_id(const char *name, FILE *fp)
{
- u8 build_id[BUILD_ID_SIZE];
- char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+ char sbuild_id[SBUILD_ID_SIZE];
+ int ret;
- if (filename__read_build_id(name, build_id,
- sizeof(build_id)) != sizeof(build_id))
- return 0;
+ ret = filename__sprintf_build_id(name, sbuild_id);
+ if (ret != sizeof(sbuild_id))
+ return ret < 0 ? ret : -EINVAL;
- build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
return fprintf(fp, "%s\n", sbuild_id);
}
@@ -63,12 +59,21 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
/*
* See if this is an ELF file first:
*/
- if (filename__fprintf_build_id(input_name, stdout))
+ if (filename__fprintf_build_id(input_name, stdout) > 0)
goto out;
session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops);
if (session == NULL)
return -1;
+
+ /*
+ * We take all buildids when the file contains AUX area tracing data
+ * because we do not decode the trace because it would take too long.
+ */
+ if (!perf_data_file__is_pipe(&file) &&
+ perf_header__has_feat(&session->header, HEADER_AUXTRACE))
+ with_hits = false;
+
/*
* in pipe-mode, the only way to get the buildids is to parse
* the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index df6307b..0b180a8 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -328,6 +328,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
{
struct addr_location al;
struct hists *hists = evsel__hists(evsel);
+ int ret = -1;
if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
@@ -338,7 +339,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
if (hists__add_entry(hists, &al, sample->period,
sample->weight, sample->transaction)) {
pr_warning("problem incrementing symbol period, skipping event\n");
- return -1;
+ goto out_put;
}
/*
@@ -350,8 +351,10 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
hists->stats.total_period += sample->period;
if (!al.filtered)
hists->stats.total_non_filtered_period += sample->period;
-
- return 0;
+ ret = 0;
+out_put:
+ addr_location__put(&al);
+ return ret;
}
static struct perf_tool tool = {
@@ -719,6 +722,9 @@ static void data_process(void)
if (verbose || data__files_cnt > 2)
data__fprintf();
+ /* Don't sort callchain for perf diff */
+ perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN);
+
hists__process(hists_base);
}
}
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 40a33d7..f62c49b 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -16,6 +16,7 @@
#include "util/debug.h"
#include "util/build-id.h"
#include "util/data.h"
+#include "util/auxtrace.h"
#include "util/parse-options.h"
@@ -26,10 +27,12 @@ struct perf_inject {
struct perf_session *session;
bool build_ids;
bool sched_stat;
+ bool have_auxtrace;
const char *input_name;
struct perf_data_file output;
u64 bytes_written;
struct list_head samples;
+ struct itrace_synth_opts itrace_synth_opts;
};
struct event_entry {
@@ -38,14 +41,11 @@ struct event_entry {
union perf_event event[0];
};
-static int perf_event__repipe_synth(struct perf_tool *tool,
- union perf_event *event)
+static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
{
- struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
ssize_t size;
- size = perf_data_file__write(&inject->output, event,
- event->header.size);
+ size = perf_data_file__write(&inject->output, buf, sz);
if (size < 0)
return -errno;
@@ -53,6 +53,15 @@ static int perf_event__repipe_synth(struct perf_tool *tool,
return 0;
}
+static int perf_event__repipe_synth(struct perf_tool *tool,
+ union perf_event *event)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject,
+ tool);
+
+ return output_bytes(inject, event, event->header.size);
+}
+
static int perf_event__repipe_oe_synth(struct perf_tool *tool,
union perf_event *event,
struct ordered_events *oe __maybe_unused)
@@ -86,6 +95,79 @@ static int perf_event__repipe_attr(struct perf_tool *tool,
return perf_event__repipe_synth(tool, event);
}
+#ifdef HAVE_AUXTRACE_SUPPORT
+
+static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
+{
+ char buf[4096];
+ ssize_t ssz;
+ int ret;
+
+ while (size > 0) {
+ ssz = read(fd, buf, min(size, (off_t)sizeof(buf)));
+ if (ssz < 0)
+ return -errno;
+ ret = output_bytes(inject, buf, ssz);
+ if (ret)
+ return ret;
+ size -= ssz;
+ }
+
+ return 0;
+}
+
+static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session
+ __maybe_unused)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject,
+ tool);
+ int ret;
+
+ inject->have_auxtrace = true;
+
+ if (!inject->output.is_pipe) {
+ off_t offset;
+
+ offset = lseek(inject->output.fd, 0, SEEK_CUR);
+ if (offset == -1)
+ return -errno;
+ ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
+ event, offset);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (perf_data_file__is_pipe(session->file) || !session->one_mmap) {
+ ret = output_bytes(inject, event, event->header.size);
+ if (ret < 0)
+ return ret;
+ ret = copy_bytes(inject, perf_data_file__fd(session->file),
+ event->auxtrace.size);
+ } else {
+ ret = output_bytes(inject, event,
+ event->header.size + event->auxtrace.size);
+ }
+ if (ret < 0)
+ return ret;
+
+ return event->auxtrace.size;
+}
+
+#else
+
+static s64
+perf_event__repipe_auxtrace(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ pr_err("AUX area tracing not supported\n");
+ return -EINVAL;
+}
+
+#endif
+
static int perf_event__repipe(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
@@ -155,6 +237,32 @@ static int perf_event__repipe_fork(struct perf_tool *tool,
return err;
}
+static int perf_event__repipe_comm(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ int err;
+
+ err = perf_event__process_comm(tool, event, sample, machine);
+ perf_event__repipe(tool, event, sample, machine);
+
+ return err;
+}
+
+static int perf_event__repipe_exit(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ int err;
+
+ err = perf_event__process_exit(tool, event, sample, machine);
+ perf_event__repipe(tool, event, sample, machine);
+
+ return err;
+}
+
static int perf_event__repipe_tracing_data(struct perf_tool *tool,
union perf_event *event,
struct perf_session *session)
@@ -167,6 +275,18 @@ static int perf_event__repipe_tracing_data(struct perf_tool *tool,
return err;
}
+static int perf_event__repipe_id_index(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ int err;
+
+ perf_event__repipe_synth(tool, event);
+ err = perf_event__process_id_index(tool, event, session);
+
+ return err;
+}
+
static int dso__read_build_id(struct dso *dso)
{
if (dso->has_build_id)
@@ -245,6 +365,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
}
}
+ thread__put(thread);
repipe:
perf_event__repipe(tool, event, sample, machine);
return 0;
@@ -351,16 +472,20 @@ static int __cmd_inject(struct perf_inject *inject)
struct perf_session *session = inject->session;
struct perf_data_file *file_out = &inject->output;
int fd = perf_data_file__fd(file_out);
+ u64 output_data_offset;
signal(SIGINT, sig_handler);
- if (inject->build_ids || inject->sched_stat) {
+ if (inject->build_ids || inject->sched_stat ||
+ inject->itrace_synth_opts.set) {
inject->tool.mmap = perf_event__repipe_mmap;
inject->tool.mmap2 = perf_event__repipe_mmap2;
inject->tool.fork = perf_event__repipe_fork;
inject->tool.tracing_data = perf_event__repipe_tracing_data;
}
+ output_data_offset = session->header.data_offset;
+
if (inject->build_ids) {
inject->tool.sample = perf_event__inject_buildid;
} else if (inject->sched_stat) {
@@ -379,17 +504,43 @@ static int __cmd_inject(struct perf_inject *inject)
else if (!strncmp(name, "sched:sched_stat_", 17))
evsel->handler = perf_inject__sched_stat;
}
+ } else if (inject->itrace_synth_opts.set) {
+ session->itrace_synth_opts = &inject->itrace_synth_opts;
+ inject->itrace_synth_opts.inject = true;
+ inject->tool.comm = perf_event__repipe_comm;
+ inject->tool.exit = perf_event__repipe_exit;
+ inject->tool.id_index = perf_event__repipe_id_index;
+ inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
+ inject->tool.auxtrace = perf_event__process_auxtrace;
+ inject->tool.ordered_events = true;
+ inject->tool.ordering_requires_timestamps = true;
+ /* Allow space in the header for new attributes */
+ output_data_offset = 4096;
}
+ if (!inject->itrace_synth_opts.set)
+ auxtrace_index__free(&session->auxtrace_index);
+
if (!file_out->is_pipe)
- lseek(fd, session->header.data_offset, SEEK_SET);
+ lseek(fd, output_data_offset, SEEK_SET);
ret = perf_session__process_events(session);
if (!file_out->is_pipe) {
- if (inject->build_ids)
+ if (inject->build_ids) {
perf_header__set_feat(&session->header,
HEADER_BUILD_ID);
+ if (inject->have_auxtrace)
+ dsos__hit_all(session);
+ }
+ /*
+ * The AUX areas have been removed and replaced with
+ * synthesized hardware events, so clear the feature flag.
+ */
+ if (inject->itrace_synth_opts.set)
+ perf_header__clear_feat(&session->header,
+ HEADER_AUXTRACE);
+ session->header.data_offset = output_data_offset;
session->header.data_size = inject->bytes_written;
perf_session__write_header(session, session->evlist, fd, true);
}
@@ -408,11 +559,17 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
.fork = perf_event__repipe,
.exit = perf_event__repipe,
.lost = perf_event__repipe,
+ .aux = perf_event__repipe,
+ .itrace_start = perf_event__repipe,
+ .context_switch = perf_event__repipe,
.read = perf_event__repipe_sample,
.throttle = perf_event__repipe,
.unthrottle = perf_event__repipe,
.attr = perf_event__repipe_attr,
.tracing_data = perf_event__repipe_op2_synth,
+ .auxtrace_info = perf_event__repipe_op2_synth,
+ .auxtrace = perf_event__repipe_auxtrace,
+ .auxtrace_error = perf_event__repipe_op2_synth,
.finished_round = perf_event__repipe_oe_synth,
.build_id = perf_event__repipe_op2_synth,
.id_index = perf_event__repipe_op2_synth,
@@ -444,6 +601,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
"kallsyms pathname"),
OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
+ OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
+ NULL, "opts", "Instruction Tracing options",
+ itrace_parse_synth_opts),
OPT_END()
};
const char * const inject_usage[] = {
@@ -471,12 +631,13 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
if (inject.session == NULL)
return -1;
- if (symbol__init(&inject.session->header.env) < 0)
- return -1;
+ ret = symbol__init(&inject.session->header.env);
+ if (ret < 0)
+ goto out_delete;
ret = __cmd_inject(&inject);
+out_delete:
perf_session__delete(inject.session);
-
return ret;
}
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 63ea013..23b1faa 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -10,6 +10,7 @@
#include "util/header.h"
#include "util/session.h"
#include "util/tool.h"
+#include "util/callchain.h"
#include "util/parse-options.h"
#include "util/trace-event.h"
@@ -21,14 +22,19 @@
#include <linux/rbtree.h>
#include <linux/string.h>
#include <locale.h>
+#include <regex.h>
static int kmem_slab;
static int kmem_page;
static long kmem_page_size;
+static enum {
+ KMEM_SLAB,
+ KMEM_PAGE,
+} kmem_default = KMEM_SLAB; /* for backward compatibility */
struct alloc_stat;
-typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
+typedef int (*sort_fn_t)(void *, void *);
static int alloc_flag;
static int caller_flag;
@@ -179,8 +185,8 @@ static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
return ret;
}
-static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
-static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
+static int ptr_cmp(void *, void *);
+static int slab_callsite_cmp(void *, void *);
static struct alloc_stat *search_alloc_stat(unsigned long ptr,
unsigned long call_site,
@@ -221,7 +227,8 @@ static int perf_evsel__process_free_event(struct perf_evsel *evsel,
s_alloc->pingpong++;
s_caller = search_alloc_stat(0, s_alloc->call_site,
- &root_caller_stat, callsite_cmp);
+ &root_caller_stat,
+ slab_callsite_cmp);
if (!s_caller)
return -1;
s_caller->pingpong++;
@@ -241,6 +248,8 @@ static unsigned long nr_page_fails;
static unsigned long nr_page_nomatch;
static bool use_pfn;
+static bool live_page;
+static struct perf_session *kmem_session;
#define MAX_MIGRATE_TYPES 6
#define MAX_PAGE_ORDER 11
@@ -250,6 +259,7 @@ static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
struct page_stat {
struct rb_node node;
u64 page;
+ u64 callsite;
int order;
unsigned gfp_flags;
unsigned migrate_type;
@@ -259,13 +269,158 @@ struct page_stat {
int nr_free;
};
-static struct rb_root page_tree;
+static struct rb_root page_live_tree;
static struct rb_root page_alloc_tree;
static struct rb_root page_alloc_sorted;
+static struct rb_root page_caller_tree;
+static struct rb_root page_caller_sorted;
-static struct page_stat *search_page(unsigned long page, bool create)
+struct alloc_func {
+ u64 start;
+ u64 end;
+ char *name;
+};
+
+static int nr_alloc_funcs;
+static struct alloc_func *alloc_func_list;
+
+static int funcmp(const void *a, const void *b)
+{
+ const struct alloc_func *fa = a;
+ const struct alloc_func *fb = b;
+
+ if (fa->start > fb->start)
+ return 1;
+ else
+ return -1;
+}
+
+static int callcmp(const void *a, const void *b)
+{
+ const struct alloc_func *fa = a;
+ const struct alloc_func *fb = b;
+
+ if (fb->start <= fa->start && fa->end < fb->end)
+ return 0;
+
+ if (fa->start > fb->start)
+ return 1;
+ else
+ return -1;
+}
+
+static int build_alloc_func_list(void)
+{
+ int ret;
+ struct map *kernel_map;
+ struct symbol *sym;
+ struct rb_node *node;
+ struct alloc_func *func;
+ struct machine *machine = &kmem_session->machines.host;
+ regex_t alloc_func_regex;
+ const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?";
+
+ ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED);
+ if (ret) {
+ char err[BUFSIZ];
+
+ regerror(ret, &alloc_func_regex, err, sizeof(err));
+ pr_err("Invalid regex: %s\n%s", pattern, err);
+ return -EINVAL;
+ }
+
+ kernel_map = machine->vmlinux_maps[MAP__FUNCTION];
+ if (map__load(kernel_map, NULL) < 0) {
+ pr_err("cannot load kernel map\n");
+ return -ENOENT;
+ }
+
+ map__for_each_symbol(kernel_map, sym, node) {
+ if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0))
+ continue;
+
+ func = realloc(alloc_func_list,
+ (nr_alloc_funcs + 1) * sizeof(*func));
+ if (func == NULL)
+ return -ENOMEM;
+
+ pr_debug("alloc func: %s\n", sym->name);
+ func[nr_alloc_funcs].start = sym->start;
+ func[nr_alloc_funcs].end = sym->end;
+ func[nr_alloc_funcs].name = sym->name;
+
+ alloc_func_list = func;
+ nr_alloc_funcs++;
+ }
+
+ qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp);
+
+ regfree(&alloc_func_regex);
+ return 0;
+}
+
+/*
+ * Find first non-memory allocation function from callchain.
+ * The allocation functions are in the 'alloc_func_list'.
+ */
+static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample)
{
- struct rb_node **node = &page_tree.rb_node;
+ struct addr_location al;
+ struct machine *machine = &kmem_session->machines.host;
+ struct callchain_cursor_node *node;
+
+ if (alloc_func_list == NULL) {
+ if (build_alloc_func_list() < 0)
+ goto out;
+ }
+
+ al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+ sample__resolve_callchain(sample, NULL, evsel, &al, 16);
+
+ callchain_cursor_commit(&callchain_cursor);
+ while (true) {
+ struct alloc_func key, *caller;
+ u64 addr;
+
+ node = callchain_cursor_current(&callchain_cursor);
+ if (node == NULL)
+ break;
+
+ key.start = key.end = node->ip;
+ caller = bsearch(&key, alloc_func_list, nr_alloc_funcs,
+ sizeof(key), callcmp);
+ if (!caller) {
+ /* found */
+ if (node->map)
+ addr = map__unmap_ip(node->map, node->ip);
+ else
+ addr = node->ip;
+
+ return addr;
+ } else
+ pr_debug3("skipping alloc function: %s\n", caller->name);
+
+ callchain_cursor_advance(&callchain_cursor);
+ }
+
+out:
+ pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip);
+ return sample->ip;
+}
+
+struct sort_dimension {
+ const char name[20];
+ sort_fn_t cmp;
+ struct list_head list;
+};
+
+static LIST_HEAD(page_alloc_sort_input);
+static LIST_HEAD(page_caller_sort_input);
+
+static struct page_stat *
+__page_stat__findnew_page(struct page_stat *pstat, bool create)
+{
+ struct rb_node **node = &page_live_tree.rb_node;
struct rb_node *parent = NULL;
struct page_stat *data;
@@ -275,7 +430,7 @@ static struct page_stat *search_page(unsigned long page, bool create)
parent = *node;
data = rb_entry(*node, struct page_stat, node);
- cmp = data->page - page;
+ cmp = data->page - pstat->page;
if (cmp < 0)
node = &parent->rb_left;
else if (cmp > 0)
@@ -289,49 +444,48 @@ static struct page_stat *search_page(unsigned long page, bool create)
data = zalloc(sizeof(*data));
if (data != NULL) {
- data->page = page;
+ data->page = pstat->page;
+ data->order = pstat->order;
+ data->gfp_flags = pstat->gfp_flags;
+ data->migrate_type = pstat->migrate_type;
rb_link_node(&data->node, parent, node);
- rb_insert_color(&data->node, &page_tree);
+ rb_insert_color(&data->node, &page_live_tree);
}
return data;
}
-static int page_stat_cmp(struct page_stat *a, struct page_stat *b)
+static struct page_stat *page_stat__find_page(struct page_stat *pstat)
{
- if (a->page > b->page)
- return -1;
- if (a->page < b->page)
- return 1;
- if (a->order > b->order)
- return -1;
- if (a->order < b->order)
- return 1;
- if (a->migrate_type > b->migrate_type)
- return -1;
- if (a->migrate_type < b->migrate_type)
- return 1;
- if (a->gfp_flags > b->gfp_flags)
- return -1;
- if (a->gfp_flags < b->gfp_flags)
- return 1;
- return 0;
+ return __page_stat__findnew_page(pstat, false);
+}
+
+static struct page_stat *page_stat__findnew_page(struct page_stat *pstat)
+{
+ return __page_stat__findnew_page(pstat, true);
}
-static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool create)
+static struct page_stat *
+__page_stat__findnew_alloc(struct page_stat *pstat, bool create)
{
struct rb_node **node = &page_alloc_tree.rb_node;
struct rb_node *parent = NULL;
struct page_stat *data;
+ struct sort_dimension *sort;
while (*node) {
- s64 cmp;
+ int cmp = 0;
parent = *node;
data = rb_entry(*node, struct page_stat, node);
- cmp = page_stat_cmp(data, stat);
+ list_for_each_entry(sort, &page_alloc_sort_input, list) {
+ cmp = sort->cmp(pstat, data);
+ if (cmp)
+ break;
+ }
+
if (cmp < 0)
node = &parent->rb_left;
else if (cmp > 0)
@@ -345,10 +499,10 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool cre
data = zalloc(sizeof(*data));
if (data != NULL) {
- data->page = stat->page;
- data->order = stat->order;
- data->gfp_flags = stat->gfp_flags;
- data->migrate_type = stat->migrate_type;
+ data->page = pstat->page;
+ data->order = pstat->order;
+ data->gfp_flags = pstat->gfp_flags;
+ data->migrate_type = pstat->migrate_type;
rb_link_node(&data->node, parent, node);
rb_insert_color(&data->node, &page_alloc_tree);
@@ -357,6 +511,71 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool cre
return data;
}
+static struct page_stat *page_stat__find_alloc(struct page_stat *pstat)
+{
+ return __page_stat__findnew_alloc(pstat, false);
+}
+
+static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat)
+{
+ return __page_stat__findnew_alloc(pstat, true);
+}
+
+static struct page_stat *
+__page_stat__findnew_caller(struct page_stat *pstat, bool create)
+{
+ struct rb_node **node = &page_caller_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct page_stat *data;
+ struct sort_dimension *sort;
+
+ while (*node) {
+ int cmp = 0;
+
+ parent = *node;
+ data = rb_entry(*node, struct page_stat, node);
+
+ list_for_each_entry(sort, &page_caller_sort_input, list) {
+ cmp = sort->cmp(pstat, data);
+ if (cmp)
+ break;
+ }
+
+ if (cmp < 0)
+ node = &parent->rb_left;
+ else if (cmp > 0)
+ node = &parent->rb_right;
+ else
+ return data;
+ }
+
+ if (!create)
+ return NULL;
+
+ data = zalloc(sizeof(*data));
+ if (data != NULL) {
+ data->callsite = pstat->callsite;
+ data->order = pstat->order;
+ data->gfp_flags = pstat->gfp_flags;
+ data->migrate_type = pstat->migrate_type;
+
+ rb_link_node(&data->node, parent, node);
+ rb_insert_color(&data->node, &page_caller_tree);
+ }
+
+ return data;
+}
+
+static struct page_stat *page_stat__find_caller(struct page_stat *pstat)
+{
+ return __page_stat__findnew_caller(pstat, false);
+}
+
+static struct page_stat *page_stat__findnew_caller(struct page_stat *pstat)
+{
+ return __page_stat__findnew_caller(pstat, true);
+}
+
static bool valid_page(u64 pfn_or_page)
{
if (use_pfn && pfn_or_page == -1UL)
@@ -366,6 +585,176 @@ static bool valid_page(u64 pfn_or_page)
return true;
}
+struct gfp_flag {
+ unsigned int flags;
+ char *compact_str;
+ char *human_readable;
+};
+
+static struct gfp_flag *gfps;
+static int nr_gfps;
+
+static int gfpcmp(const void *a, const void *b)
+{
+ const struct gfp_flag *fa = a;
+ const struct gfp_flag *fb = b;
+
+ return fa->flags - fb->flags;
+}
+
+/* see include/trace/events/gfpflags.h */
+static const struct {
+ const char *original;
+ const char *compact;
+} gfp_compact_table[] = {
+ { "GFP_TRANSHUGE", "THP" },
+ { "GFP_HIGHUSER_MOVABLE", "HUM" },
+ { "GFP_HIGHUSER", "HU" },
+ { "GFP_USER", "U" },
+ { "GFP_TEMPORARY", "TMP" },
+ { "GFP_KERNEL", "K" },
+ { "GFP_NOFS", "NF" },
+ { "GFP_ATOMIC", "A" },
+ { "GFP_NOIO", "NI" },
+ { "GFP_HIGH", "H" },
+ { "GFP_WAIT", "W" },
+ { "GFP_IO", "I" },
+ { "GFP_COLD", "CO" },
+ { "GFP_NOWARN", "NWR" },
+ { "GFP_REPEAT", "R" },
+ { "GFP_NOFAIL", "NF" },
+ { "GFP_NORETRY", "NR" },
+ { "GFP_COMP", "C" },
+ { "GFP_ZERO", "Z" },
+ { "GFP_NOMEMALLOC", "NMA" },
+ { "GFP_MEMALLOC", "MA" },
+ { "GFP_HARDWALL", "HW" },
+ { "GFP_THISNODE", "TN" },
+ { "GFP_RECLAIMABLE", "RC" },
+ { "GFP_MOVABLE", "M" },
+ { "GFP_NOTRACK", "NT" },
+ { "GFP_NO_KSWAPD", "NK" },
+ { "GFP_OTHER_NODE", "ON" },
+ { "GFP_NOWAIT", "NW" },
+};
+
+static size_t max_gfp_len;
+
+static char *compact_gfp_flags(char *gfp_flags)
+{
+ char *orig_flags = strdup(gfp_flags);
+ char *new_flags = NULL;
+ char *str, *pos = NULL;
+ size_t len = 0;
+
+ if (orig_flags == NULL)
+ return NULL;
+
+ str = strtok_r(orig_flags, "|", &pos);
+ while (str) {
+ size_t i;
+ char *new;
+ const char *cpt;
+
+ for (i = 0; i < ARRAY_SIZE(gfp_compact_table); i++) {
+ if (strcmp(gfp_compact_table[i].original, str))
+ continue;
+
+ cpt = gfp_compact_table[i].compact;
+ new = realloc(new_flags, len + strlen(cpt) + 2);
+ if (new == NULL) {
+ free(new_flags);
+ return NULL;
+ }
+
+ new_flags = new;
+
+ if (!len) {
+ strcpy(new_flags, cpt);
+ } else {
+ strcat(new_flags, "|");
+ strcat(new_flags, cpt);
+ len++;
+ }
+
+ len += strlen(cpt);
+ }
+
+ str = strtok_r(NULL, "|", &pos);
+ }
+
+ if (max_gfp_len < len)
+ max_gfp_len = len;
+
+ free(orig_flags);
+ return new_flags;
+}
+
+static char *compact_gfp_string(unsigned long gfp_flags)
+{
+ struct gfp_flag key = {
+ .flags = gfp_flags,
+ };
+ struct gfp_flag *gfp;
+
+ gfp = bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp);
+ if (gfp)
+ return gfp->compact_str;
+
+ return NULL;
+}
+
+static int parse_gfp_flags(struct perf_evsel *evsel, struct perf_sample *sample,
+ unsigned int gfp_flags)
+{
+ struct pevent_record record = {
+ .cpu = sample->cpu,
+ .data = sample->raw_data,
+ .size = sample->raw_size,
+ };
+ struct trace_seq seq;
+ char *str, *pos = NULL;
+
+ if (nr_gfps) {
+ struct gfp_flag key = {
+ .flags = gfp_flags,
+ };
+
+ if (bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp))
+ return 0;
+ }
+
+ trace_seq_init(&seq);
+ pevent_event_info(&seq, evsel->tp_format, &record);
+
+ str = strtok_r(seq.buffer, " ", &pos);
+ while (str) {
+ if (!strncmp(str, "gfp_flags=", 10)) {
+ struct gfp_flag *new;
+
+ new = realloc(gfps, (nr_gfps + 1) * sizeof(*gfps));
+ if (new == NULL)
+ return -ENOMEM;
+
+ gfps = new;
+ new += nr_gfps++;
+
+ new->flags = gfp_flags;
+ new->human_readable = strdup(str + 10);
+ new->compact_str = compact_gfp_flags(str + 10);
+ if (!new->human_readable || !new->compact_str)
+ return -ENOMEM;
+
+ qsort(gfps, nr_gfps, sizeof(*gfps), gfpcmp);
+ }
+
+ str = strtok_r(NULL, " ", &pos);
+ }
+
+ trace_seq_destroy(&seq);
+ return 0;
+}
+
static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
struct perf_sample *sample)
{
@@ -375,7 +764,8 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
unsigned int migrate_type = perf_evsel__intval(evsel, sample,
"migratetype");
u64 bytes = kmem_page_size << order;
- struct page_stat *stat;
+ u64 callsite;
+ struct page_stat *pstat;
struct page_stat this = {
.order = order,
.gfp_flags = gfp_flags,
@@ -397,25 +787,41 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
return 0;
}
+ if (parse_gfp_flags(evsel, sample, gfp_flags) < 0)
+ return -1;
+
+ callsite = find_callsite(evsel, sample);
+
/*
* This is to find the current page (with correct gfp flags and
* migrate type) at free event.
*/
- stat = search_page(page, true);
- if (stat == NULL)
+ this.page = page;
+ pstat = page_stat__findnew_page(&this);
+ if (pstat == NULL)
return -ENOMEM;
- stat->order = order;
- stat->gfp_flags = gfp_flags;
- stat->migrate_type = migrate_type;
+ pstat->nr_alloc++;
+ pstat->alloc_bytes += bytes;
+ pstat->callsite = callsite;
- this.page = page;
- stat = search_page_alloc_stat(&this, true);
- if (stat == NULL)
+ if (!live_page) {
+ pstat = page_stat__findnew_alloc(&this);
+ if (pstat == NULL)
+ return -ENOMEM;
+
+ pstat->nr_alloc++;
+ pstat->alloc_bytes += bytes;
+ pstat->callsite = callsite;
+ }
+
+ this.callsite = callsite;
+ pstat = page_stat__findnew_caller(&this);
+ if (pstat == NULL)
return -ENOMEM;
- stat->nr_alloc++;
- stat->alloc_bytes += bytes;
+ pstat->nr_alloc++;
+ pstat->alloc_bytes += bytes;
order_stats[order][migrate_type]++;
@@ -428,7 +834,7 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
u64 page;
unsigned int order = perf_evsel__intval(evsel, sample, "order");
u64 bytes = kmem_page_size << order;
- struct page_stat *stat;
+ struct page_stat *pstat;
struct page_stat this = {
.order = order,
};
@@ -441,8 +847,9 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
nr_page_frees++;
total_page_free_bytes += bytes;
- stat = search_page(page, false);
- if (stat == NULL) {
+ this.page = page;
+ pstat = page_stat__find_page(&this);
+ if (pstat == NULL) {
pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
page, order);
@@ -452,19 +859,40 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
return 0;
}
- this.page = page;
- this.gfp_flags = stat->gfp_flags;
- this.migrate_type = stat->migrate_type;
+ this.gfp_flags = pstat->gfp_flags;
+ this.migrate_type = pstat->migrate_type;
+ this.callsite = pstat->callsite;
+
+ rb_erase(&pstat->node, &page_live_tree);
+ free(pstat);
+
+ if (live_page) {
+ order_stats[this.order][this.migrate_type]--;
+ } else {
+ pstat = page_stat__find_alloc(&this);
+ if (pstat == NULL)
+ return -ENOMEM;
- rb_erase(&stat->node, &page_tree);
- free(stat);
+ pstat->nr_free++;
+ pstat->free_bytes += bytes;
+ }
- stat = search_page_alloc_stat(&this, false);
- if (stat == NULL)
+ pstat = page_stat__find_caller(&this);
+ if (pstat == NULL)
return -ENOENT;
- stat->nr_free++;
- stat->free_bytes += bytes;
+ pstat->nr_free++;
+ pstat->free_bytes += bytes;
+
+ if (live_page) {
+ pstat->nr_alloc--;
+ pstat->alloc_bytes -= bytes;
+
+ if (pstat->nr_alloc == 0) {
+ rb_erase(&pstat->node, &page_caller_tree);
+ free(pstat);
+ }
+ }
return 0;
}
@@ -478,6 +906,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
struct perf_evsel *evsel,
struct machine *machine)
{
+ int err = 0;
struct thread *thread = machine__findnew_thread(machine, sample->pid,
sample->tid);
@@ -491,10 +920,12 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
if (evsel->handler != NULL) {
tracepoint_handler f = evsel->handler;
- return f(evsel, sample);
+ err = f(evsel, sample);
}
- return 0;
+ thread__put(thread);
+
+ return err;
}
static struct perf_tool perf_kmem = {
@@ -576,41 +1007,111 @@ static const char * const migrate_type_str[] = {
"UNKNOWN",
};
-static void __print_page_result(struct rb_root *root,
- struct perf_session *session __maybe_unused,
- int n_lines)
+static void __print_page_alloc_result(struct perf_session *session, int n_lines)
{
- struct rb_node *next = rb_first(root);
+ struct rb_node *next = rb_first(&page_alloc_sorted);
+ struct machine *machine = &session->machines.host;
const char *format;
+ int gfp_len = max(strlen("GFP flags"), max_gfp_len);
- printf("\n%.80s\n", graph_dotted_line);
- printf(" %-16s | Total alloc (KB) | Hits | Order | Mig.type | GFP flags\n",
- use_pfn ? "PFN" : "Page");
- printf("%.80s\n", graph_dotted_line);
+ printf("\n%.105s\n", graph_dotted_line);
+ printf(" %-16s | %5s alloc (KB) | Hits | Order | Mig.type | %-*s | Callsite\n",
+ use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total",
+ gfp_len, "GFP flags");
+ printf("%.105s\n", graph_dotted_line);
if (use_pfn)
- format = " %16llu | %'16llu | %'9d | %5d | %8s | %08lx\n";
+ format = " %16llu | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
else
- format = " %016llx | %'16llu | %'9d | %5d | %8s | %08lx\n";
+ format = " %016llx | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
while (next && n_lines--) {
struct page_stat *data;
+ struct symbol *sym;
+ struct map *map;
+ char buf[32];
+ char *caller = buf;
data = rb_entry(next, struct page_stat, node);
+ sym = machine__find_kernel_function(machine, data->callsite,
+ &map, NULL);
+ if (sym && sym->name)
+ caller = sym->name;
+ else
+ scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
printf(format, (unsigned long long)data->page,
(unsigned long long)data->alloc_bytes / 1024,
data->nr_alloc, data->order,
migrate_type_str[data->migrate_type],
- (unsigned long)data->gfp_flags);
+ gfp_len, compact_gfp_string(data->gfp_flags), caller);
next = rb_next(next);
}
- if (n_lines == -1)
- printf(" ... | ... | ... | ... | ... | ... \n");
+ if (n_lines == -1) {
+ printf(" ... | ... | ... | ... | ... | %-*s | ...\n",
+ gfp_len, "...");
+ }
+
+ printf("%.105s\n", graph_dotted_line);
+}
+
+static void __print_page_caller_result(struct perf_session *session, int n_lines)
+{
+ struct rb_node *next = rb_first(&page_caller_sorted);
+ struct machine *machine = &session->machines.host;
+ int gfp_len = max(strlen("GFP flags"), max_gfp_len);
+
+ printf("\n%.105s\n", graph_dotted_line);
+ printf(" %5s alloc (KB) | Hits | Order | Mig.type | %-*s | Callsite\n",
+ live_page ? "Live" : "Total", gfp_len, "GFP flags");
+ printf("%.105s\n", graph_dotted_line);
+
+ while (next && n_lines--) {
+ struct page_stat *data;
+ struct symbol *sym;
+ struct map *map;
+ char buf[32];
+ char *caller = buf;
+
+ data = rb_entry(next, struct page_stat, node);
+ sym = machine__find_kernel_function(machine, data->callsite,
+ &map, NULL);
+ if (sym && sym->name)
+ caller = sym->name;
+ else
+ scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
+
+ printf(" %'16llu | %'9d | %5d | %8s | %-*s | %s\n",
+ (unsigned long long)data->alloc_bytes / 1024,
+ data->nr_alloc, data->order,
+ migrate_type_str[data->migrate_type],
+ gfp_len, compact_gfp_string(data->gfp_flags), caller);
- printf("%.80s\n", graph_dotted_line);
+ next = rb_next(next);
+ }
+
+ if (n_lines == -1) {
+ printf(" ... | ... | ... | ... | %-*s | ...\n",
+ gfp_len, "...");
+ }
+
+ printf("%.105s\n", graph_dotted_line);
+}
+
+static void print_gfp_flags(void)
+{
+ int i;
+
+ printf("#\n");
+ printf("# GFP flags\n");
+ printf("# ---------\n");
+ for (i = 0; i < nr_gfps; i++) {
+ printf("# %08x: %*s: %s\n", gfps[i].flags,
+ (int) max_gfp_len, gfps[i].compact_str,
+ gfps[i].human_readable);
+ }
}
static void print_slab_summary(void)
@@ -640,9 +1141,9 @@ static void print_page_summary(void)
nr_page_frees, total_page_free_bytes / 1024);
printf("\n");
- printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
+ printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
- printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
+ printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
nr_page_allocs - nr_alloc_freed,
(total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free-only requests",
@@ -682,8 +1183,12 @@ static void print_slab_result(struct perf_session *session)
static void print_page_result(struct perf_session *session)
{
+ if (caller_flag || alloc_flag)
+ print_gfp_flags();
+ if (caller_flag)
+ __print_page_caller_result(session, caller_lines);
if (alloc_flag)
- __print_page_result(&page_alloc_sorted, session, alloc_lines);
+ __print_page_alloc_result(session, alloc_lines);
print_page_summary();
}
@@ -695,14 +1200,10 @@ static void print_result(struct perf_session *session)
print_page_result(session);
}
-struct sort_dimension {
- const char name[20];
- sort_fn_t cmp;
- struct list_head list;
-};
-
-static LIST_HEAD(caller_sort);
-static LIST_HEAD(alloc_sort);
+static LIST_HEAD(slab_caller_sort);
+static LIST_HEAD(slab_alloc_sort);
+static LIST_HEAD(page_caller_sort);
+static LIST_HEAD(page_alloc_sort);
static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data,
struct list_head *sort_list)
@@ -751,10 +1252,12 @@ static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted
}
}
-static void sort_page_insert(struct rb_root *root, struct page_stat *data)
+static void sort_page_insert(struct rb_root *root, struct page_stat *data,
+ struct list_head *sort_list)
{
struct rb_node **new = &root->rb_node;
struct rb_node *parent = NULL;
+ struct sort_dimension *sort;
while (*new) {
struct page_stat *this;
@@ -763,8 +1266,11 @@ static void sort_page_insert(struct rb_root *root, struct page_stat *data)
this = rb_entry(*new, struct page_stat, node);
parent = *new;
- /* TODO: support more sort key */
- cmp = data->alloc_bytes - this->alloc_bytes;
+ list_for_each_entry(sort, sort_list, list) {
+ cmp = sort->cmp(data, this);
+ if (cmp)
+ break;
+ }
if (cmp > 0)
new = &parent->rb_left;
@@ -776,7 +1282,8 @@ static void sort_page_insert(struct rb_root *root, struct page_stat *data)
rb_insert_color(&data->node, root);
}
-static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted)
+static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted,
+ struct list_head *sort_list)
{
struct rb_node *node;
struct page_stat *data;
@@ -788,7 +1295,7 @@ static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted
rb_erase(node, root);
data = rb_entry(node, struct page_stat, node);
- sort_page_insert(root_sorted, data);
+ sort_page_insert(root_sorted, data, sort_list);
}
}
@@ -796,12 +1303,20 @@ static void sort_result(void)
{
if (kmem_slab) {
__sort_slab_result(&root_alloc_stat, &root_alloc_sorted,
- &alloc_sort);
+ &slab_alloc_sort);
__sort_slab_result(&root_caller_stat, &root_caller_sorted,
- &caller_sort);
+ &slab_caller_sort);
}
if (kmem_page) {
- __sort_page_result(&page_alloc_tree, &page_alloc_sorted);
+ if (live_page)
+ __sort_page_result(&page_live_tree, &page_alloc_sorted,
+ &page_alloc_sort);
+ else
+ __sort_page_result(&page_alloc_tree, &page_alloc_sorted,
+ &page_alloc_sort);
+
+ __sort_page_result(&page_caller_tree, &page_caller_sorted,
+ &page_caller_sort);
}
}
@@ -850,8 +1365,12 @@ out:
return err;
}
-static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
+/* slab sort keys */
+static int ptr_cmp(void *a, void *b)
{
+ struct alloc_stat *l = a;
+ struct alloc_stat *r = b;
+
if (l->ptr < r->ptr)
return -1;
else if (l->ptr > r->ptr)
@@ -864,8 +1383,11 @@ static struct sort_dimension ptr_sort_dimension = {
.cmp = ptr_cmp,
};
-static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
+static int slab_callsite_cmp(void *a, void *b)
{
+ struct alloc_stat *l = a;
+ struct alloc_stat *r = b;
+
if (l->call_site < r->call_site)
return -1;
else if (l->call_site > r->call_site)
@@ -875,11 +1397,14 @@ static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
static struct sort_dimension callsite_sort_dimension = {
.name = "callsite",
- .cmp = callsite_cmp,
+ .cmp = slab_callsite_cmp,
};
-static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
+static int hit_cmp(void *a, void *b)
{
+ struct alloc_stat *l = a;
+ struct alloc_stat *r = b;
+
if (l->hit < r->hit)
return -1;
else if (l->hit > r->hit)
@@ -892,8 +1417,11 @@ static struct sort_dimension hit_sort_dimension = {
.cmp = hit_cmp,
};
-static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
+static int bytes_cmp(void *a, void *b)
{
+ struct alloc_stat *l = a;
+ struct alloc_stat *r = b;
+
if (l->bytes_alloc < r->bytes_alloc)
return -1;
else if (l->bytes_alloc > r->bytes_alloc)
@@ -906,9 +1434,11 @@ static struct sort_dimension bytes_sort_dimension = {
.cmp = bytes_cmp,
};
-static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
+static int frag_cmp(void *a, void *b)
{
double x, y;
+ struct alloc_stat *l = a;
+ struct alloc_stat *r = b;
x = fragmentation(l->bytes_req, l->bytes_alloc);
y = fragmentation(r->bytes_req, r->bytes_alloc);
@@ -925,8 +1455,11 @@ static struct sort_dimension frag_sort_dimension = {
.cmp = frag_cmp,
};
-static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r)
+static int pingpong_cmp(void *a, void *b)
{
+ struct alloc_stat *l = a;
+ struct alloc_stat *r = b;
+
if (l->pingpong < r->pingpong)
return -1;
else if (l->pingpong > r->pingpong)
@@ -939,7 +1472,135 @@ static struct sort_dimension pingpong_sort_dimension = {
.cmp = pingpong_cmp,
};
-static struct sort_dimension *avail_sorts[] = {
+/* page sort keys */
+static int page_cmp(void *a, void *b)
+{
+ struct page_stat *l = a;
+ struct page_stat *r = b;
+
+ if (l->page < r->page)
+ return -1;
+ else if (l->page > r->page)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension page_sort_dimension = {
+ .name = "page",
+ .cmp = page_cmp,
+};
+
+static int page_callsite_cmp(void *a, void *b)
+{
+ struct page_stat *l = a;
+ struct page_stat *r = b;
+
+ if (l->callsite < r->callsite)
+ return -1;
+ else if (l->callsite > r->callsite)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension page_callsite_sort_dimension = {
+ .name = "callsite",
+ .cmp = page_callsite_cmp,
+};
+
+static int page_hit_cmp(void *a, void *b)
+{
+ struct page_stat *l = a;
+ struct page_stat *r = b;
+
+ if (l->nr_alloc < r->nr_alloc)
+ return -1;
+ else if (l->nr_alloc > r->nr_alloc)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension page_hit_sort_dimension = {
+ .name = "hit",
+ .cmp = page_hit_cmp,
+};
+
+static int page_bytes_cmp(void *a, void *b)
+{
+ struct page_stat *l = a;
+ struct page_stat *r = b;
+
+ if (l->alloc_bytes < r->alloc_bytes)
+ return -1;
+ else if (l->alloc_bytes > r->alloc_bytes)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension page_bytes_sort_dimension = {
+ .name = "bytes",
+ .cmp = page_bytes_cmp,
+};
+
+static int page_order_cmp(void *a, void *b)
+{
+ struct page_stat *l = a;
+ struct page_stat *r = b;
+
+ if (l->order < r->order)
+ return -1;
+ else if (l->order > r->order)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension page_order_sort_dimension = {
+ .name = "order",
+ .cmp = page_order_cmp,
+};
+
+static int migrate_type_cmp(void *a, void *b)
+{
+ struct page_stat *l = a;
+ struct page_stat *r = b;
+
+ /* for internal use to find free'd page */
+ if (l->migrate_type == -1U)
+ return 0;
+
+ if (l->migrate_type < r->migrate_type)
+ return -1;
+ else if (l->migrate_type > r->migrate_type)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension migrate_type_sort_dimension = {
+ .name = "migtype",
+ .cmp = migrate_type_cmp,
+};
+
+static int gfp_flags_cmp(void *a, void *b)
+{
+ struct page_stat *l = a;
+ struct page_stat *r = b;
+
+ /* for internal use to find free'd page */
+ if (l->gfp_flags == -1U)
+ return 0;
+
+ if (l->gfp_flags < r->gfp_flags)
+ return -1;
+ else if (l->gfp_flags > r->gfp_flags)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension gfp_flags_sort_dimension = {
+ .name = "gfp",
+ .cmp = gfp_flags_cmp,
+};
+
+static struct sort_dimension *slab_sorts[] = {
&ptr_sort_dimension,
&callsite_sort_dimension,
&hit_sort_dimension,
@@ -948,16 +1609,44 @@ static struct sort_dimension *avail_sorts[] = {
&pingpong_sort_dimension,
};
-#define NUM_AVAIL_SORTS ((int)ARRAY_SIZE(avail_sorts))
+static struct sort_dimension *page_sorts[] = {
+ &page_sort_dimension,
+ &page_callsite_sort_dimension,
+ &page_hit_sort_dimension,
+ &page_bytes_sort_dimension,
+ &page_order_sort_dimension,
+ &migrate_type_sort_dimension,
+ &gfp_flags_sort_dimension,
+};
+
+static int slab_sort_dimension__add(const char *tok, struct list_head *list)
+{
+ struct sort_dimension *sort;
+ int i;
+
+ for (i = 0; i < (int)ARRAY_SIZE(slab_sorts); i++) {
+ if (!strcmp(slab_sorts[i]->name, tok)) {
+ sort = memdup(slab_sorts[i], sizeof(*slab_sorts[i]));
+ if (!sort) {
+ pr_err("%s: memdup failed\n", __func__);
+ return -1;
+ }
+ list_add_tail(&sort->list, list);
+ return 0;
+ }
+ }
+
+ return -1;
+}
-static int sort_dimension__add(const char *tok, struct list_head *list)
+static int page_sort_dimension__add(const char *tok, struct list_head *list)
{
struct sort_dimension *sort;
int i;
- for (i = 0; i < NUM_AVAIL_SORTS; i++) {
- if (!strcmp(avail_sorts[i]->name, tok)) {
- sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i]));
+ for (i = 0; i < (int)ARRAY_SIZE(page_sorts); i++) {
+ if (!strcmp(page_sorts[i]->name, tok)) {
+ sort = memdup(page_sorts[i], sizeof(*page_sorts[i]));
if (!sort) {
pr_err("%s: memdup failed\n", __func__);
return -1;
@@ -970,7 +1659,7 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
return -1;
}
-static int setup_sorting(struct list_head *sort_list, const char *arg)
+static int setup_slab_sorting(struct list_head *sort_list, const char *arg)
{
char *tok;
char *str = strdup(arg);
@@ -985,8 +1674,34 @@ static int setup_sorting(struct list_head *sort_list, const char *arg)
tok = strsep(&pos, ",");
if (!tok)
break;
- if (sort_dimension__add(tok, sort_list) < 0) {
- error("Unknown --sort key: '%s'", tok);
+ if (slab_sort_dimension__add(tok, sort_list) < 0) {
+ error("Unknown slab --sort key: '%s'", tok);
+ free(str);
+ return -1;
+ }
+ }
+
+ free(str);
+ return 0;
+}
+
+static int setup_page_sorting(struct list_head *sort_list, const char *arg)
+{
+ char *tok;
+ char *str = strdup(arg);
+ char *pos = str;
+
+ if (!str) {
+ pr_err("%s: strdup failed\n", __func__);
+ return -1;
+ }
+
+ while (true) {
+ tok = strsep(&pos, ",");
+ if (!tok)
+ break;
+ if (page_sort_dimension__add(tok, sort_list) < 0) {
+ error("Unknown page --sort key: '%s'", tok);
free(str);
return -1;
}
@@ -1002,10 +1717,18 @@ static int parse_sort_opt(const struct option *opt __maybe_unused,
if (!arg)
return -1;
- if (caller_flag > alloc_flag)
- return setup_sorting(&caller_sort, arg);
- else
- return setup_sorting(&alloc_sort, arg);
+ if (kmem_page > kmem_slab ||
+ (kmem_page == 0 && kmem_slab == 0 && kmem_default == KMEM_PAGE)) {
+ if (caller_flag > alloc_flag)
+ return setup_page_sorting(&page_caller_sort, arg);
+ else
+ return setup_page_sorting(&page_alloc_sort, arg);
+ } else {
+ if (caller_flag > alloc_flag)
+ return setup_slab_sorting(&slab_caller_sort, arg);
+ else
+ return setup_slab_sorting(&slab_alloc_sort, arg);
+ }
return 0;
}
@@ -1084,7 +1807,7 @@ static int __cmd_record(int argc, const char **argv)
if (kmem_slab)
rec_argc += ARRAY_SIZE(slab_events);
if (kmem_page)
- rec_argc += ARRAY_SIZE(page_events);
+ rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */
rec_argv = calloc(rec_argc + 1, sizeof(char *));
@@ -1099,6 +1822,8 @@ static int __cmd_record(int argc, const char **argv)
rec_argv[i] = strdup(slab_events[j]);
}
if (kmem_page) {
+ rec_argv[i++] = strdup("-g");
+
for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
rec_argv[i] = strdup(page_events[j]);
}
@@ -1109,9 +1834,26 @@ static int __cmd_record(int argc, const char **argv)
return cmd_record(i, rec_argv, NULL);
}
+static int kmem_config(const char *var, const char *value, void *cb)
+{
+ if (!strcmp(var, "kmem.default")) {
+ if (!strcmp(value, "slab"))
+ kmem_default = KMEM_SLAB;
+ else if (!strcmp(value, "page"))
+ kmem_default = KMEM_PAGE;
+ else
+ pr_err("invalid default value ('slab' or 'page' required): %s\n",
+ value);
+ return 0;
+ }
+
+ return perf_default_config(var, value, cb);
+}
+
int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
{
- const char * const default_sort_order = "frag,hit,bytes";
+ const char * const default_slab_sort = "frag,hit,bytes";
+ const char * const default_page_sort = "bytes,hit";
struct perf_data_file file = {
.mode = PERF_DATA_MODE_READ,
};
@@ -1124,8 +1866,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
"show per-allocation statistics", parse_alloc_opt),
OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
- "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
- parse_sort_opt),
+ "sort by keys: ptr, callsite, bytes, hit, pingpong, frag, "
+ "page, order, migtype, gfp", parse_sort_opt),
OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
@@ -1133,6 +1875,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
parse_slab_opt),
OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
parse_page_opt),
+ OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"),
OPT_END()
};
const char *const kmem_subcommands[] = { "record", "stat", NULL };
@@ -1142,15 +1885,21 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
};
struct perf_session *session;
int ret = -1;
+ const char errmsg[] = "No %s allocation events found. Have you run 'perf kmem record --%s'?\n";
+ perf_config(kmem_config, NULL);
argc = parse_options_subcommand(argc, argv, kmem_options,
kmem_subcommands, kmem_usage, 0);
if (!argc)
usage_with_options(kmem_usage, kmem_options);
- if (kmem_slab == 0 && kmem_page == 0)
- kmem_slab = 1; /* for backward compatibility */
+ if (kmem_slab == 0 && kmem_page == 0) {
+ if (kmem_default == KMEM_SLAB)
+ kmem_slab = 1;
+ else
+ kmem_page = 1;
+ }
if (!strncmp(argv[0], "rec", 3)) {
symbol__init(NULL);
@@ -1159,19 +1908,30 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
file.path = input_name;
- session = perf_session__new(&file, false, &perf_kmem);
+ kmem_session = session = perf_session__new(&file, false, &perf_kmem);
if (session == NULL)
return -1;
+ if (kmem_slab) {
+ if (!perf_evlist__find_tracepoint_by_name(session->evlist,
+ "kmem:kmalloc")) {
+ pr_err(errmsg, "slab", "slab");
+ goto out_delete;
+ }
+ }
+
if (kmem_page) {
- struct perf_evsel *evsel = perf_evlist__first(session->evlist);
+ struct perf_evsel *evsel;
- if (evsel == NULL || evsel->tp_format == NULL) {
- pr_err("invalid event found.. aborting\n");
- return -1;
+ evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
+ "kmem:mm_page_alloc");
+ if (evsel == NULL) {
+ pr_err(errmsg, "page", "page");
+ goto out_delete;
}
kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent);
+ symbol_conf.use_callchain = true;
}
symbol__init(&session->header.env);
@@ -1182,11 +1942,21 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
if (cpu__setup_cpunode_map())
goto out_delete;
- if (list_empty(&caller_sort))
- setup_sorting(&caller_sort, default_sort_order);
- if (list_empty(&alloc_sort))
- setup_sorting(&alloc_sort, default_sort_order);
-
+ if (list_empty(&slab_caller_sort))
+ setup_slab_sorting(&slab_caller_sort, default_slab_sort);
+ if (list_empty(&slab_alloc_sort))
+ setup_slab_sorting(&slab_alloc_sort, default_slab_sort);
+ if (list_empty(&page_caller_sort))
+ setup_page_sorting(&page_caller_sort, default_page_sort);
+ if (list_empty(&page_alloc_sort))
+ setup_page_sorting(&page_alloc_sort, default_page_sort);
+
+ if (kmem_page) {
+ setup_page_sorting(&page_alloc_sort_input,
+ "page,order,migtype,gfp");
+ setup_page_sorting(&page_caller_sort_input,
+ "callsite,order,migtype,gfp");
+ }
ret = __cmd_kmem(session);
} else
usage_with_options(kmem_usage, kmem_options);
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 1f9338f..fc1cffb 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -651,6 +651,7 @@ static int process_sample_event(struct perf_tool *tool,
struct perf_evsel *evsel,
struct machine *machine)
{
+ int err = 0;
struct thread *thread;
struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat,
tool);
@@ -666,9 +667,10 @@ static int process_sample_event(struct perf_tool *tool,
}
if (!handle_kvm_event(kvm, thread, evsel, sample))
- return -1;
+ err = -1;
- return 0;
+ thread__put(thread);
+ return err;
}
static int cpu_isa_config(struct perf_kvm_stat *kvm)
@@ -1059,8 +1061,10 @@ static int read_events(struct perf_kvm_stat *kvm)
symbol__init(&kvm->session->header.env);
- if (!perf_session__has_traces(kvm->session, "kvm record"))
- return -EINVAL;
+ if (!perf_session__has_traces(kvm->session, "kvm record")) {
+ ret = -EINVAL;
+ goto out_delete;
+ }
/*
* Do not use 'isa' recorded in kvm_exit tracepoint since it is not
@@ -1068,9 +1072,13 @@ static int read_events(struct perf_kvm_stat *kvm)
*/
ret = cpu_isa_config(kvm);
if (ret < 0)
- return ret;
+ goto out_delete;
- return perf_session__process_events(kvm->session);
+ ret = perf_session__process_events(kvm->session);
+
+out_delete:
+ perf_session__delete(kvm->session);
+ return ret;
}
static int parse_target_str(struct perf_kvm_stat *kvm)
@@ -1309,6 +1317,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
"show events other than"
" HLT (x86 only) or Wait state (s390 only)"
" that take longer than duration usecs"),
+ OPT_UINTEGER(0, "proc-map-timeout", &kvm->opts.proc_map_timeout,
+ "per thread proc mmap processing timeout in ms"),
OPT_END()
};
const char * const live_usage[] = {
@@ -1336,6 +1346,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
kvm->opts.target.uses_mmap = false;
kvm->opts.target.uid_str = NULL;
kvm->opts.target.uid = UINT_MAX;
+ kvm->opts.proc_map_timeout = 500;
symbol__init(NULL);
disable_buildid_cache();
@@ -1391,7 +1402,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
perf_session__set_id_hdr_size(kvm->session);
ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
- kvm->evlist->threads, false);
+ kvm->evlist->threads, false, kvm->opts.proc_map_timeout);
err = kvm_live_open_events(kvm);
if (err)
goto out;
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index d49c2ab..de16aae 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -769,6 +769,7 @@ static void dump_threads(void)
t = perf_session__findnew(session, st->tid);
pr_info("%10d: %s\n", st->tid, thread__comm_str(t));
node = rb_next(node);
+ thread__put(t);
};
}
@@ -810,6 +811,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
struct perf_evsel *evsel,
struct machine *machine)
{
+ int err = 0;
struct thread *thread = machine__findnew_thread(machine, sample->pid,
sample->tid);
@@ -821,10 +823,12 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
if (evsel->handler != NULL) {
tracepoint_handler f = evsel->handler;
- return f(evsel, sample);
+ err = f(evsel, sample);
}
- return 0;
+ thread__put(thread);
+
+ return err;
}
static void sort_result(void)
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 675216e..80170aa 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -74,7 +74,7 @@ dump_raw_samples(struct perf_tool *tool,
}
if (al.filtered || (mem->hide_unresolved && al.sym == NULL))
- return 0;
+ goto out_put;
if (al.map != NULL)
al.map->dso->hit = 1;
@@ -103,7 +103,8 @@ dump_raw_samples(struct perf_tool *tool,
symbol_conf.field_sep,
al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
al.sym ? al.sym->name : "???");
-
+out_put:
+ addr_location__put(&al);
return 0;
}
@@ -123,7 +124,6 @@ static int report_raw_events(struct perf_mem *mem)
.mode = PERF_DATA_MODE_READ,
.force = mem->force,
};
- int err = -EINVAL;
int ret;
struct perf_session *session = perf_session__new(&file, false,
&mem->tool);
@@ -134,24 +134,21 @@ static int report_raw_events(struct perf_mem *mem)
if (mem->cpu_list) {
ret = perf_session__cpu_bitmap(session, mem->cpu_list,
mem->cpu_bitmap);
- if (ret)
+ if (ret < 0)
goto out_delete;
}
- if (symbol__init(&session->header.env) < 0)
- return -1;
+ ret = symbol__init(&session->header.env);
+ if (ret < 0)
+ goto out_delete;
printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
- err = perf_session__process_events(session);
- if (err)
- return err;
-
- return 0;
+ ret = perf_session__process_events(session);
out_delete:
perf_session__delete(session);
- return err;
+ return ret;
}
static int report_events(int argc, const char **argv, struct perf_mem *mem)
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index f7b1af6..b81cec3 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -44,25 +44,19 @@
#define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*"
#define DEFAULT_FUNC_FILTER "!_*"
+#define DEFAULT_LIST_FILTER "*:*"
/* Session management structure */
static struct {
+ int command; /* Command short_name */
bool list_events;
- bool force_add;
- bool show_lines;
- bool show_vars;
- bool show_ext_vars;
- bool show_funcs;
- bool mod_events;
bool uprobes;
bool quiet;
bool target_used;
int nevents;
struct perf_probe_event events[MAX_PROBES];
- struct strlist *dellist;
struct line_range line_range;
char *target;
- int max_probe_points;
struct strfilter *filter;
} params;
@@ -93,6 +87,28 @@ static int parse_probe_event(const char *str)
return ret;
}
+static int params_add_filter(const char *str)
+{
+ const char *err = NULL;
+ int ret = 0;
+
+ pr_debug2("Add filter: %s\n", str);
+ if (!params.filter) {
+ params.filter = strfilter__new(str, &err);
+ if (!params.filter)
+ ret = err ? -EINVAL : -ENOMEM;
+ } else
+ ret = strfilter__or(params.filter, str, &err);
+
+ if (ret == -EINVAL) {
+ pr_err("Filter parse error at %td.\n", err - str + 1);
+ pr_err("Source: \"%s\"\n", str);
+ pr_err(" %*c\n", (int)(err - str + 1), '^');
+ }
+
+ return ret;
+}
+
static int set_target(const char *ptr)
{
int found = 0;
@@ -152,34 +168,11 @@ static int parse_probe_event_argv(int argc, const char **argv)
len += sprintf(&buf[len], "%s ", argv[i]);
}
- params.mod_events = true;
ret = parse_probe_event(buf);
free(buf);
return ret;
}
-static int opt_add_probe_event(const struct option *opt __maybe_unused,
- const char *str, int unset __maybe_unused)
-{
- if (str) {
- params.mod_events = true;
- return parse_probe_event(str);
- } else
- return 0;
-}
-
-static int opt_del_probe_event(const struct option *opt __maybe_unused,
- const char *str, int unset __maybe_unused)
-{
- if (str) {
- params.mod_events = true;
- if (!params.dellist)
- params.dellist = strlist__new(true, NULL);
- strlist__add(params.dellist, str);
- }
- return 0;
-}
-
static int opt_set_target(const struct option *opt, const char *str,
int unset __maybe_unused)
{
@@ -217,8 +210,10 @@ static int opt_set_target(const struct option *opt, const char *str,
return ret;
}
+/* Command option callbacks */
+
#ifdef HAVE_DWARF_SUPPORT
-static int opt_show_lines(const struct option *opt __maybe_unused,
+static int opt_show_lines(const struct option *opt,
const char *str, int unset __maybe_unused)
{
int ret = 0;
@@ -226,19 +221,19 @@ static int opt_show_lines(const struct option *opt __maybe_unused,
if (!str)
return 0;
- if (params.show_lines) {
+ if (params.command == 'L') {
pr_warning("Warning: more than one --line options are"
" detected. Only the first one is valid.\n");
return 0;
}
- params.show_lines = true;
+ params.command = opt->short_name;
ret = parse_line_range_desc(str, &params.line_range);
return ret;
}
-static int opt_show_vars(const struct option *opt __maybe_unused,
+static int opt_show_vars(const struct option *opt,
const char *str, int unset __maybe_unused)
{
struct perf_probe_event *pev = &params.events[params.nevents];
@@ -252,29 +247,39 @@ static int opt_show_vars(const struct option *opt __maybe_unused,
pr_err(" Error: '--vars' doesn't accept arguments.\n");
return -EINVAL;
}
- params.show_vars = true;
+ params.command = opt->short_name;
return ret;
}
#endif
+static int opt_add_probe_event(const struct option *opt,
+ const char *str, int unset __maybe_unused)
+{
+ if (str) {
+ params.command = opt->short_name;
+ return parse_probe_event(str);
+ }
+
+ return 0;
+}
+
+static int opt_set_filter_with_command(const struct option *opt,
+ const char *str, int unset)
+{
+ if (!unset)
+ params.command = opt->short_name;
+
+ if (str)
+ return params_add_filter(str);
+
+ return 0;
+}
static int opt_set_filter(const struct option *opt __maybe_unused,
const char *str, int unset __maybe_unused)
{
- const char *err;
-
- if (str) {
- pr_debug2("Set filter: %s\n", str);
- if (params.filter)
- strfilter__delete(params.filter);
- params.filter = strfilter__new(str, &err);
- if (!params.filter) {
- pr_err("Filter parse error at %td.\n", err - str + 1);
- pr_err("Source: \"%s\"\n", str);
- pr_err(" %*c\n", (int)(err - str + 1), '^');
- return -EINVAL;
- }
- }
+ if (str)
+ return params_add_filter(str);
return 0;
}
@@ -290,12 +295,9 @@ static void cleanup_params(void)
for (i = 0; i < params.nevents; i++)
clear_perf_probe_event(params.events + i);
- if (params.dellist)
- strlist__delete(params.dellist);
line_range__clear(&params.line_range);
free(params.target);
- if (params.filter)
- strfilter__delete(params.filter);
+ strfilter__delete(params.filter);
memset(&params, 0, sizeof(params));
}
@@ -316,22 +318,24 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
"perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
"perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
"perf probe [<options>] --del '[GROUP:]EVENT' ...",
- "perf probe --list",
+ "perf probe --list [GROUP:]EVENT ...",
#ifdef HAVE_DWARF_SUPPORT
"perf probe [<options>] --line 'LINEDESC'",
"perf probe [<options>] --vars 'PROBEPOINT'",
#endif
+ "perf probe [<options>] --funcs",
NULL
-};
+ };
struct option options[] = {
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show parsed arguments, etc)"),
OPT_BOOLEAN('q', "quiet", &params.quiet,
"be quiet (do not show any mesages)"),
- OPT_BOOLEAN('l', "list", &params.list_events,
- "list up current probe events"),
+ OPT_CALLBACK_DEFAULT('l', "list", NULL, "[GROUP:]EVENT",
+ "list up probe events",
+ opt_set_filter_with_command, DEFAULT_LIST_FILTER),
OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
- opt_del_probe_event),
+ opt_set_filter_with_command),
OPT_CALLBACK('a', "add", NULL,
#ifdef HAVE_DWARF_SUPPORT
"[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT"
@@ -356,7 +360,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
"\t\tARG:\tProbe argument (kprobe-tracer argument format.)\n",
#endif
opt_add_probe_event),
- OPT_BOOLEAN('f', "force", &params.force_add, "forcibly add events"
+ OPT_BOOLEAN('f', "force", &probe_conf.force_add, "forcibly add events"
" with existing name"),
#ifdef HAVE_DWARF_SUPPORT
OPT_CALLBACK('L', "line", NULL,
@@ -365,8 +369,10 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK('V', "vars", NULL,
"FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT",
"Show accessible variables on PROBEDEF", opt_show_vars),
- OPT_BOOLEAN('\0', "externs", &params.show_ext_vars,
+ OPT_BOOLEAN('\0', "externs", &probe_conf.show_ext_vars,
"Show external variables too (with --vars only)"),
+ OPT_BOOLEAN('\0', "range", &probe_conf.show_location_range,
+ "Show variables location range in scope (with --vars only)"),
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
OPT_STRING('s', "source", &symbol_conf.source_prefix,
@@ -374,12 +380,15 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK('m', "module", NULL, "modname|path",
"target module name (for online) or path (for offline)",
opt_set_target),
+ OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines,
+ "Don't search inlined functions"),
#endif
OPT__DRY_RUN(&probe_event_dry_run),
- OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
+ OPT_INTEGER('\0', "max-probes", &probe_conf.max_probes,
"Set how many probe points can be found for a probe."),
- OPT_BOOLEAN('F', "funcs", &params.show_funcs,
- "Show potential probe-able functions."),
+ OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
+ "Show potential probe-able functions.",
+ opt_set_filter_with_command, DEFAULT_FUNC_FILTER),
OPT_CALLBACK('\0', "filter", NULL,
"[!]FILTER", "Set a filter (with --vars/funcs only)\n"
"\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n"
@@ -402,6 +411,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE);
set_option_flag(options, 'V', "vars", PARSE_OPT_EXCLUSIVE);
#endif
+ set_option_flag(options, 'F', "funcs", PARSE_OPT_EXCLUSIVE);
argc = parse_options(argc, argv, options, probe_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
@@ -410,11 +420,16 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
pr_warning(" Error: '-' is not supported.\n");
usage_with_options(probe_usage, options);
}
+ if (params.command && params.command != 'a') {
+ pr_warning(" Error: another command except --add is set.\n");
+ usage_with_options(probe_usage, options);
+ }
ret = parse_probe_event_argv(argc, argv);
if (ret < 0) {
pr_err_with_code(" Error: Command Parse Error.", ret);
return ret;
}
+ params.command = 'a';
}
if (params.quiet) {
@@ -425,89 +440,70 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
verbose = -1;
}
- if (params.max_probe_points == 0)
- params.max_probe_points = MAX_PROBES;
-
- if ((!params.nevents && !params.dellist && !params.list_events &&
- !params.show_lines && !params.show_funcs))
- usage_with_options(probe_usage, options);
+ if (probe_conf.max_probes == 0)
+ probe_conf.max_probes = MAX_PROBES;
/*
* Only consider the user's kernel image path if given.
*/
symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
- if (params.list_events) {
+ switch (params.command) {
+ case 'l':
if (params.uprobes) {
pr_warning(" Error: Don't use --list with --exec.\n");
usage_with_options(probe_usage, options);
}
- ret = show_perf_probe_events();
+ ret = show_perf_probe_events(params.filter);
if (ret < 0)
pr_err_with_code(" Error: Failed to show event list.", ret);
return ret;
- }
- if (params.show_funcs) {
- if (!params.filter)
- params.filter = strfilter__new(DEFAULT_FUNC_FILTER,
- NULL);
+ case 'F':
ret = show_available_funcs(params.target, params.filter,
params.uprobes);
- strfilter__delete(params.filter);
- params.filter = NULL;
if (ret < 0)
pr_err_with_code(" Error: Failed to show functions.", ret);
return ret;
- }
-
#ifdef HAVE_DWARF_SUPPORT
- if (params.show_lines) {
+ case 'L':
ret = show_line_range(&params.line_range, params.target,
params.uprobes);
if (ret < 0)
pr_err_with_code(" Error: Failed to show lines.", ret);
return ret;
- }
- if (params.show_vars) {
+ case 'V':
if (!params.filter)
params.filter = strfilter__new(DEFAULT_VAR_FILTER,
NULL);
ret = show_available_vars(params.events, params.nevents,
- params.max_probe_points,
- params.target,
- params.filter,
- params.show_ext_vars);
- strfilter__delete(params.filter);
- params.filter = NULL;
+ params.filter);
if (ret < 0)
pr_err_with_code(" Error: Failed to show vars.", ret);
return ret;
- }
#endif
-
- if (params.dellist) {
- ret = del_perf_probe_events(params.dellist);
+ case 'd':
+ ret = del_perf_probe_events(params.filter);
if (ret < 0) {
pr_err_with_code(" Error: Failed to delete events.", ret);
return ret;
}
- }
-
- if (params.nevents) {
+ break;
+ case 'a':
/* Ensure the last given target is used */
if (params.target && !params.target_used) {
pr_warning(" Error: -x/-m must follow the probe definitions.\n");
usage_with_options(probe_usage, options);
}
- ret = add_perf_probe_events(params.events, params.nevents,
- params.max_probe_points,
- params.force_add);
+ ret = add_perf_probe_events(params.events, params.nevents);
if (ret < 0) {
pr_err_with_code(" Error: Failed to add events.", ret);
return ret;
}
+ break;
+ default:
+ usage_with_options(probe_usage, options);
}
return 0;
}
@@ -522,5 +518,5 @@ int cmd_probe(int argc, const char **argv, const char *prefix)
cleanup_params();
}
- return ret;
+ return ret < 0 ? ret : 0;
}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index c3efdfb..142eeb3 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -27,6 +27,10 @@
#include "util/cpumap.h"
#include "util/thread_map.h"
#include "util/data.h"
+#include "util/perf_regs.h"
+#include "util/auxtrace.h"
+#include "util/parse-branch-options.h"
+#include "util/parse-regs-options.h"
#include <unistd.h>
#include <sched.h>
@@ -38,6 +42,7 @@ struct record {
struct record_opts opts;
u64 bytes_written;
struct perf_data_file file;
+ struct auxtrace_record *itr;
struct perf_evlist *evlist;
struct perf_session *session;
const char *progname;
@@ -110,9 +115,12 @@ out:
return rc;
}
-static volatile int done = 0;
+static volatile int done;
static volatile int signr = -1;
-static volatile int child_finished = 0;
+static volatile int child_finished;
+static volatile int auxtrace_snapshot_enabled;
+static volatile int auxtrace_snapshot_err;
+static volatile int auxtrace_record__snapshot_started;
static void sig_handler(int sig)
{
@@ -133,6 +141,133 @@ static void record__sig_exit(void)
raise(signr);
}
+#ifdef HAVE_AUXTRACE_SUPPORT
+
+static int record__process_auxtrace(struct perf_tool *tool,
+ union perf_event *event, void *data1,
+ size_t len1, void *data2, size_t len2)
+{
+ struct record *rec = container_of(tool, struct record, tool);
+ struct perf_data_file *file = &rec->file;
+ size_t padding;
+ u8 pad[8] = {0};
+
+ if (!perf_data_file__is_pipe(file)) {
+ off_t file_offset;
+ int fd = perf_data_file__fd(file);
+ int err;
+
+ file_offset = lseek(fd, 0, SEEK_CUR);
+ if (file_offset == -1)
+ return -1;
+ err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
+ event, file_offset);
+ if (err)
+ return err;
+ }
+
+ /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
+ padding = (len1 + len2) & 7;
+ if (padding)
+ padding = 8 - padding;
+
+ record__write(rec, event, event->header.size);
+ record__write(rec, data1, len1);
+ if (len2)
+ record__write(rec, data2, len2);
+ record__write(rec, &pad, padding);
+
+ return 0;
+}
+
+static int record__auxtrace_mmap_read(struct record *rec,
+ struct auxtrace_mmap *mm)
+{
+ int ret;
+
+ ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
+ record__process_auxtrace);
+ if (ret < 0)
+ return ret;
+
+ if (ret)
+ rec->samples++;
+
+ return 0;
+}
+
+static int record__auxtrace_mmap_read_snapshot(struct record *rec,
+ struct auxtrace_mmap *mm)
+{
+ int ret;
+
+ ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
+ record__process_auxtrace,
+ rec->opts.auxtrace_snapshot_size);
+ if (ret < 0)
+ return ret;
+
+ if (ret)
+ rec->samples++;
+
+ return 0;
+}
+
+static int record__auxtrace_read_snapshot_all(struct record *rec)
+{
+ int i;
+ int rc = 0;
+
+ for (i = 0; i < rec->evlist->nr_mmaps; i++) {
+ struct auxtrace_mmap *mm =
+ &rec->evlist->mmap[i].auxtrace_mmap;
+
+ if (!mm->base)
+ continue;
+
+ if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
+ rc = -1;
+ goto out;
+ }
+ }
+out:
+ return rc;
+}
+
+static void record__read_auxtrace_snapshot(struct record *rec)
+{
+ pr_debug("Recording AUX area tracing snapshot\n");
+ if (record__auxtrace_read_snapshot_all(rec) < 0) {
+ auxtrace_snapshot_err = -1;
+ } else {
+ auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr);
+ if (!auxtrace_snapshot_err)
+ auxtrace_snapshot_enabled = 1;
+ }
+}
+
+#else
+
+static inline
+int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
+ struct auxtrace_mmap *mm __maybe_unused)
+{
+ return 0;
+}
+
+static inline
+void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
+{
+}
+
+static inline
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
+{
+ return 0;
+}
+
+#endif
+
static int record__open(struct record *rec)
{
char msg[512];
@@ -146,7 +281,7 @@ static int record__open(struct record *rec)
evlist__for_each(evlist, pos) {
try_again:
- if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
+ if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
if (verbose)
ui__warning("%s\n", msg);
@@ -169,13 +304,16 @@ try_again:
goto out;
}
- if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
+ if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
+ opts->auxtrace_mmap_pages,
+ opts->auxtrace_snapshot_mode) < 0) {
if (errno == EPERM) {
pr_err("Permission error mapping pages.\n"
"Consider increasing "
"/proc/sys/kernel/perf_event_mlock_kb,\n"
"or try again with a smaller value of -m/--mmap_pages.\n"
- "(current value: %u)\n", opts->mmap_pages);
+ "(current value: %u,%u)\n",
+ opts->mmap_pages, opts->auxtrace_mmap_pages);
rc = -errno;
} else {
pr_err("failed to mmap with %d (%s)\n", errno,
@@ -209,12 +347,9 @@ static int process_buildids(struct record *rec)
struct perf_data_file *file = &rec->file;
struct perf_session *session = rec->session;
- u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
- if (size == 0)
+ if (file->size == 0)
return 0;
- file->size = size;
-
/*
* During this process, it'll load kernel map and replace the
* dso->long_name to a real pathname it found. In this case
@@ -270,12 +405,20 @@ static int record__mmap_read_all(struct record *rec)
int rc = 0;
for (i = 0; i < rec->evlist->nr_mmaps; i++) {
+ struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
+
if (rec->evlist->mmap[i].base) {
if (record__mmap_read(rec, i) != 0) {
rc = -1;
goto out;
}
}
+
+ if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
+ record__auxtrace_mmap_read(rec, mm) != 0) {
+ rc = -1;
+ goto out;
+ }
}
/*
@@ -305,6 +448,9 @@ static void record__init_features(struct record *rec)
if (!rec->opts.branch_stack)
perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
+
+ if (!rec->opts.full_auxtrace)
+ perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
}
static volatile int workload_exec_errno;
@@ -323,6 +469,8 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
child_finished = 1;
}
+static void snapshot_sig_handler(int sig);
+
static int __cmd_record(struct record *rec, int argc, const char **argv)
{
int err;
@@ -343,6 +491,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
signal(SIGTERM, sig_handler);
+ if (rec->opts.auxtrace_snapshot_mode)
+ signal(SIGUSR2, snapshot_sig_handler);
+ else
+ signal(SIGUSR2, SIG_IGN);
session = perf_session__new(file, false, tool);
if (session == NULL) {
@@ -371,6 +523,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
goto out_child;
}
+ /*
+ * Normally perf_session__new would do this, but it doesn't have the
+ * evlist.
+ */
+ if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
+ pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
+ rec->tool.ordered_events = false;
+ }
+
if (!rec->evlist->nr_groups)
perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
@@ -421,6 +582,13 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
}
}
+ if (rec->opts.full_auxtrace) {
+ err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
+ session, process_synthesized_event);
+ if (err)
+ goto out_delete_session;
+ }
+
err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
machine);
if (err < 0)
@@ -441,7 +609,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
}
err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
- process_synthesized_event, opts->sample_address);
+ process_synthesized_event, opts->sample_address,
+ opts->proc_map_timeout);
if (err != 0)
goto out_child;
@@ -475,14 +644,27 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
perf_evlist__enable(rec->evlist);
}
+ auxtrace_snapshot_enabled = 1;
for (;;) {
int hits = rec->samples;
if (record__mmap_read_all(rec) < 0) {
+ auxtrace_snapshot_enabled = 0;
err = -1;
goto out_child;
}
+ if (auxtrace_record__snapshot_started) {
+ auxtrace_record__snapshot_started = 0;
+ if (!auxtrace_snapshot_err)
+ record__read_auxtrace_snapshot(rec);
+ if (auxtrace_snapshot_err) {
+ pr_err("AUX area tracing snapshot failed\n");
+ err = -1;
+ goto out_child;
+ }
+ }
+
if (hits == rec->samples) {
if (done || draining)
break;
@@ -505,10 +687,12 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
* disable events in this case.
*/
if (done && !disabled && !target__none(&opts->target)) {
+ auxtrace_snapshot_enabled = 0;
perf_evlist__disable(rec->evlist);
disabled = true;
}
}
+ auxtrace_snapshot_enabled = 0;
if (forks && workload_exec_errno) {
char msg[STRERR_BUFSIZE];
@@ -544,16 +728,25 @@ out_child:
if (!err && !file->is_pipe) {
rec->session->header.data_size += rec->bytes_written;
+ file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
- if (!rec->no_buildid)
+ if (!rec->no_buildid) {
process_buildids(rec);
+ /*
+ * We take all buildids when the file contains
+ * AUX area tracing data because we do not decode the
+ * trace because it would take too long.
+ */
+ if (rec->opts.full_auxtrace)
+ dsos__hit_all(rec->session);
+ }
perf_session__write_header(rec->session, rec->evlist, fd, true);
}
if (!err && !quiet) {
char samples[128];
- if (rec->samples)
+ if (rec->samples && !rec->opts.full_auxtrace)
scnprintf(samples, sizeof(samples),
" (%" PRIu64 " samples)", rec->samples);
else
@@ -569,94 +762,6 @@ out_delete_session:
return status;
}
-#define BRANCH_OPT(n, m) \
- { .name = n, .mode = (m) }
-
-#define BRANCH_END { .name = NULL }
-
-struct branch_mode {
- const char *name;
- int mode;
-};
-
-static const struct branch_mode branch_modes[] = {
- BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
- BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
- BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
- BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
- BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
- BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
- BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
- BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
- BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
- BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
- BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
- BRANCH_END
-};
-
-static int
-parse_branch_stack(const struct option *opt, const char *str, int unset)
-{
-#define ONLY_PLM \
- (PERF_SAMPLE_BRANCH_USER |\
- PERF_SAMPLE_BRANCH_KERNEL |\
- PERF_SAMPLE_BRANCH_HV)
-
- uint64_t *mode = (uint64_t *)opt->value;
- const struct branch_mode *br;
- char *s, *os = NULL, *p;
- int ret = -1;
-
- if (unset)
- return 0;
-
- /*
- * cannot set it twice, -b + --branch-filter for instance
- */
- if (*mode)
- return -1;
-
- /* str may be NULL in case no arg is passed to -b */
- if (str) {
- /* because str is read-only */
- s = os = strdup(str);
- if (!s)
- return -1;
-
- for (;;) {
- p = strchr(s, ',');
- if (p)
- *p = '\0';
-
- for (br = branch_modes; br->name; br++) {
- if (!strcasecmp(s, br->name))
- break;
- }
- if (!br->name) {
- ui__warning("unknown branch filter %s,"
- " check man page\n", s);
- goto error;
- }
-
- *mode |= br->mode;
-
- if (!p)
- break;
-
- s = p + 1;
- }
- }
- ret = 0;
-
- /* default to any branch */
- if ((*mode & ~ONLY_PLM) == 0) {
- *mode = PERF_SAMPLE_BRANCH_ANY;
- }
-error:
- free(os);
- return ret;
-}
-
static void callchain_debug(void)
{
static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
@@ -668,12 +773,14 @@ static void callchain_debug(void)
callchain_param.dump_size);
}
-int record_parse_callchain_opt(const struct option *opt __maybe_unused,
+int record_parse_callchain_opt(const struct option *opt,
const char *arg,
int unset)
{
int ret;
+ struct record_opts *record = (struct record_opts *)opt->value;
+ record->callgraph_set = true;
callchain_param.enabled = !unset;
/* --no-call-graph */
@@ -683,17 +790,20 @@ int record_parse_callchain_opt(const struct option *opt __maybe_unused,
return 0;
}
- ret = parse_callchain_record_opt(arg);
+ ret = parse_callchain_record_opt(arg, &callchain_param);
if (!ret)
callchain_debug();
return ret;
}
-int record_callchain_opt(const struct option *opt __maybe_unused,
+int record_callchain_opt(const struct option *opt,
const char *arg __maybe_unused,
int unset __maybe_unused)
{
+ struct record_opts *record = (struct record_opts *)opt->value;
+
+ record->callgraph_set = true;
callchain_param.enabled = true;
if (callchain_param.record_mode == CALLCHAIN_NONE)
@@ -795,6 +905,49 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
return -1;
}
+static int record__parse_mmap_pages(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ struct record_opts *opts = opt->value;
+ char *s, *p;
+ unsigned int mmap_pages;
+ int ret;
+
+ if (!str)
+ return -EINVAL;
+
+ s = strdup(str);
+ if (!s)
+ return -ENOMEM;
+
+ p = strchr(s, ',');
+ if (p)
+ *p = '\0';
+
+ if (*s) {
+ ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
+ if (ret)
+ goto out_free;
+ opts->mmap_pages = mmap_pages;
+ }
+
+ if (!p) {
+ ret = 0;
+ goto out_free;
+ }
+
+ ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
+ if (ret)
+ goto out_free;
+
+ opts->auxtrace_mmap_pages = mmap_pages;
+
+out_free:
+ free(s);
+ return ret;
+}
+
static const char * const __record_usage[] = {
"perf record [<options>] [<command>]",
"perf record [<options>] -- <command> [<options>]",
@@ -823,13 +976,16 @@ static struct record record = {
.uses_mmap = true,
.default_per_cpu = true,
},
+ .proc_map_timeout = 500,
},
.tool = {
.sample = process_sample_event,
.fork = perf_event__process_fork,
+ .exit = perf_event__process_exit,
.comm = perf_event__process_comm,
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
+ .ordered_events = true,
},
};
@@ -854,6 +1010,9 @@ struct option __record_options[] = {
parse_events_option),
OPT_CALLBACK(0, "filter", &record.evlist, "filter",
"event filter", parse_filter),
+ OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
+ NULL, "don't record events from perf itself",
+ exclude_perf),
OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
"record events on existing process id"),
OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
@@ -875,9 +1034,9 @@ struct option __record_options[] = {
&record.opts.no_inherit_set,
"child tasks do not inherit counters"),
OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
- OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
- "number of mmap data pages",
- perf_evlist__parse_mmap_pages),
+ OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
+ "number of mmap data pages and AUX area tracing mmap pages",
+ record__parse_mmap_pages),
OPT_BOOLEAN(0, "group", &record.opts.group,
"put the counters into a counter group"),
OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
@@ -891,10 +1050,11 @@ struct option __record_options[] = {
OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
"per thread counts"),
- OPT_BOOLEAN('d', "data", &record.opts.sample_address,
- "Sample addresses"),
- OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
- OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
+ OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
+ OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
+ &record.opts.sample_time_set,
+ "Record the sample timestamps"),
+ OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
"don't sample"),
OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
@@ -922,13 +1082,20 @@ struct option __record_options[] = {
"sample transaction flags (special events only)"),
OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
"use per-thread mmaps"),
- OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs,
- "Sample machine registers on interrupt"),
+ OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
+ "sample selected machine registers on interrupt,"
+ " use -I ? to list register names", parse_regs),
OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
"Record running/enabled time of read (:S) events"),
OPT_CALLBACK('k', "clockid", &record.opts,
"clockid", "clockid to use for events, see clock_gettime()",
parse_clockid),
+ OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
+ "opts", "AUX area tracing Snapshot Mode", ""),
+ OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
+ "per thread proc mmap processing timeout in ms"),
+ OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
+ "Record context switch events"),
OPT_END()
};
@@ -936,7 +1103,7 @@ struct option *record_options = __record_options;
int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
{
- int err = -ENOMEM;
+ int err;
struct record *rec = &record;
char errbuf[BUFSIZ];
@@ -956,6 +1123,24 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
" system-wide mode\n");
usage_with_options(record_usage, record_options);
}
+ if (rec->opts.record_switch_events &&
+ !perf_can_record_switch_events()) {
+ ui__error("kernel does not support recording context switch events (--switch-events option)\n");
+ usage_with_options(record_usage, record_options);
+ }
+
+ if (!rec->itr) {
+ rec->itr = auxtrace_record__init(rec->evlist, &err);
+ if (err)
+ return err;
+ }
+
+ err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
+ rec->opts.auxtrace_snapshot_opts);
+ if (err)
+ return err;
+
+ err = -ENOMEM;
symbol__init(NULL);
@@ -1002,6 +1187,10 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
usage_with_options(record_usage, record_options);
+ err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
+ if (err)
+ goto out_symbol_exit;
+
if (record_opts__config(&rec->opts)) {
err = -EINVAL;
goto out_symbol_exit;
@@ -1011,5 +1200,15 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
out_symbol_exit:
perf_evlist__delete(rec->evlist);
symbol__exit();
+ auxtrace_record__free(rec->itr);
return err;
}
+
+static void snapshot_sig_handler(int sig __maybe_unused)
+{
+ if (!auxtrace_snapshot_enabled)
+ return;
+ auxtrace_snapshot_enabled = 0;
+ auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr);
+ auxtrace_record__snapshot_started = 1;
+}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 476cdf7..62b285e 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -36,6 +36,8 @@
#include "util/data.h"
#include "arch/common.h"
+#include "util/auxtrace.h"
+
#include <dlfcn.h>
#include <linux/bitmap.h>
@@ -51,6 +53,7 @@ struct report {
bool mem_mode;
bool header;
bool header_only;
+ bool nonany_branch_mode;
int max_stack;
struct perf_read_values show_threads_values;
const char *pretty_printing_style;
@@ -100,6 +103,9 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
if (!ui__has_annotation())
return 0;
+ hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
+ rep->nonany_branch_mode);
+
if (sort__mode == SORT_MODE__BRANCH) {
bi = he->branch_info;
err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
@@ -137,10 +143,12 @@ static int process_sample_event(struct perf_tool *tool,
struct report *rep = container_of(tool, struct report, tool);
struct addr_location al;
struct hist_entry_iter iter = {
- .hide_unresolved = rep->hide_unresolved,
- .add_entry_cb = hist_iter__report_callback,
+ .evsel = evsel,
+ .sample = sample,
+ .hide_unresolved = rep->hide_unresolved,
+ .add_entry_cb = hist_iter__report_callback,
};
- int ret;
+ int ret = 0;
if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
pr_debug("problem processing %d event, skipping it.\n",
@@ -149,10 +157,10 @@ static int process_sample_event(struct perf_tool *tool,
}
if (rep->hide_unresolved && al.sym == NULL)
- return 0;
+ goto out_put;
if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
- return 0;
+ goto out_put;
if (sort__mode == SORT_MODE__BRANCH)
iter.ops = &hist_iter_branch;
@@ -166,11 +174,11 @@ static int process_sample_event(struct perf_tool *tool,
if (al.map != NULL)
al.map->dso->hit = 1;
- ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack,
- rep);
+ ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep);
if (ret < 0)
pr_debug("problem adding hist entry, skipping event\n");
-
+out_put:
+ addr_location__put(&al);
return ret;
}
@@ -254,6 +262,12 @@ static int report__setup_sample_type(struct report *rep)
else
callchain_param.record_mode = CALLCHAIN_FP;
}
+
+ /* ??? handle more cases than just ANY? */
+ if (!(perf_evlist__combined_branch_type(session->evlist) &
+ PERF_SAMPLE_BRANCH_ANY))
+ rep->nonany_branch_mode = true;
+
return 0;
}
@@ -302,6 +316,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
if (evname != NULL)
ret += fprintf(fp, " of event '%s'", evname);
+ if (symbol_conf.show_ref_callgraph &&
+ strstr(evname, "call-graph=no")) {
+ ret += fprintf(fp, ", show reference callgraph");
+ }
+
if (rep->mem_mode) {
ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events);
ret += fprintf(fp, "\n# Sort order : %s", sort_order ? : default_mem_sort_order);
@@ -316,6 +335,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
{
struct perf_evsel *pos;
+ fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n", evlist->stats.total_lost_samples);
evlist__for_each(evlist, pos) {
struct hists *hists = evsel__hists(pos);
const char *evname = perf_evsel__name(pos);
@@ -329,16 +349,15 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
fprintf(stdout, "\n\n");
}
- if (sort_order == default_sort_order &&
- parent_pattern == default_parent_pattern) {
+ if (sort_order == NULL &&
+ parent_pattern == default_parent_pattern)
fprintf(stdout, "#\n# (%s)\n#\n", help);
- if (rep->show_threads) {
- bool style = !strcmp(rep->pretty_printing_style, "raw");
- perf_read_values_display(stdout, &rep->show_threads_values,
- style);
- perf_read_values_destroy(&rep->show_threads_values);
- }
+ if (rep->show_threads) {
+ bool style = !strcmp(rep->pretty_printing_style, "raw");
+ perf_read_values_display(stdout, &rep->show_threads_values,
+ style);
+ perf_read_values_destroy(&rep->show_threads_values);
}
return 0;
@@ -585,6 +604,7 @@ parse_percent_limit(const struct option *opt, const char *str,
int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
{
struct perf_session *session;
+ struct itrace_synth_opts itrace_synth_opts = { .set = 0, };
struct stat st;
bool has_br_stack = false;
int branch_mode = -1;
@@ -607,6 +627,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
.attr = perf_event__process_attr,
.tracing_data = perf_event__process_tracing_data,
.build_id = perf_event__process_build_id,
+ .id_index = perf_event__process_id_index,
+ .auxtrace_info = perf_event__process_auxtrace_info,
+ .auxtrace = perf_event__process_auxtrace,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
@@ -717,6 +740,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
"Don't show entries under that percent", parse_percent_limit),
OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
"how to display percentage of filtered entries", parse_filter_percentage),
+ OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
+ "Instruction Tracing options",
+ itrace_parse_synth_opts),
+ OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename,
+ "Show full source file name path for source lines"),
+ OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph,
+ "Show callgraph from reference event"),
OPT_END()
};
struct perf_data_file file = {
@@ -731,6 +761,17 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
argc = parse_options(argc, argv, options, report_usage, 0);
+ if (symbol_conf.vmlinux_name &&
+ access(symbol_conf.vmlinux_name, R_OK)) {
+ pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name);
+ return -EINVAL;
+ }
+ if (symbol_conf.kallsyms_name &&
+ access(symbol_conf.kallsyms_name, R_OK)) {
+ pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name);
+ return -EINVAL;
+ }
+
if (report.use_stdio)
use_browser = 0;
else if (report.use_tui)
@@ -761,6 +802,8 @@ repeat:
report.queue_size);
}
+ session->itrace_synth_opts = &itrace_synth_opts;
+
report.session = session;
has_br_stack = perf_header__has_feat(&session->header,
@@ -803,8 +846,8 @@ repeat:
goto error;
}
- /* Force tty output for header output. */
- if (report.header || report.header_only)
+ /* Force tty output for header output and per-thread stat. */
+ if (report.header || report.header_only || report.show_threads)
use_browser = 0;
if (strcmp(input_name, "-") != 0)
@@ -815,8 +858,10 @@ repeat:
if (report.header || report.header_only) {
perf_session__fprintf_info(session, stdout,
report.show_full_info);
- if (report.header_only)
- return 0;
+ if (report.header_only) {
+ ret = 0;
+ goto error;
+ }
} else if (use_browser == 0) {
fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n",
stdout);
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 5275bab..3396261 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -95,6 +95,7 @@ struct work_atoms {
u64 total_lat;
u64 nb_atoms;
u64 total_runtime;
+ int num_merged;
};
typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
@@ -168,9 +169,10 @@ struct perf_sched {
u64 all_runtime;
u64 all_count;
u64 cpu_last_switched[MAX_CPUS];
- struct rb_root atom_root, sorted_atom_root;
+ struct rb_root atom_root, sorted_atom_root, merged_atom_root;
struct list_head sort_list, cmp_pid;
bool force;
+ bool skip_merge;
};
static u64 get_nsecs(void)
@@ -770,7 +772,7 @@ static int replay_fork_event(struct perf_sched *sched,
if (child == NULL || parent == NULL) {
pr_debug("thread does not exist on fork event: child %p, parent %p\n",
child, parent);
- return 0;
+ goto out_put;
}
if (verbose) {
@@ -781,6 +783,9 @@ static int replay_fork_event(struct perf_sched *sched,
register_pid(sched, parent->tid, thread__comm_str(parent));
register_pid(sched, child->tid, thread__comm_str(child));
+out_put:
+ thread__put(child);
+ thread__put(parent);
return 0;
}
@@ -957,7 +962,7 @@ static int latency_switch_event(struct perf_sched *sched,
struct work_atoms *out_events, *in_events;
struct thread *sched_out, *sched_in;
u64 timestamp0, timestamp = sample->time;
- int cpu = sample->cpu;
+ int cpu = sample->cpu, err = -1;
s64 delta;
BUG_ON(cpu >= MAX_CPUS || cpu < 0);
@@ -976,15 +981,17 @@ static int latency_switch_event(struct perf_sched *sched,
sched_out = machine__findnew_thread(machine, -1, prev_pid);
sched_in = machine__findnew_thread(machine, -1, next_pid);
+ if (sched_out == NULL || sched_in == NULL)
+ goto out_put;
out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
if (!out_events) {
if (thread_atoms_insert(sched, sched_out))
- return -1;
+ goto out_put;
out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
if (!out_events) {
pr_err("out-event: Internal tree error");
- return -1;
+ goto out_put;
}
}
if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp))
@@ -993,22 +1000,25 @@ static int latency_switch_event(struct perf_sched *sched,
in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
if (!in_events) {
if (thread_atoms_insert(sched, sched_in))
- return -1;
+ goto out_put;
in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
if (!in_events) {
pr_err("in-event: Internal tree error");
- return -1;
+ goto out_put;
}
/*
* Take came in we have not heard about yet,
* add in an initial atom in runnable state:
*/
if (add_sched_out_event(in_events, 'R', timestamp))
- return -1;
+ goto out_put;
}
add_sched_in_event(in_events, timestamp);
-
- return 0;
+ err = 0;
+out_put:
+ thread__put(sched_out);
+ thread__put(sched_in);
+ return err;
}
static int latency_runtime_event(struct perf_sched *sched,
@@ -1021,23 +1031,29 @@ static int latency_runtime_event(struct perf_sched *sched,
struct thread *thread = machine__findnew_thread(machine, -1, pid);
struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
u64 timestamp = sample->time;
- int cpu = sample->cpu;
+ int cpu = sample->cpu, err = -1;
+
+ if (thread == NULL)
+ return -1;
BUG_ON(cpu >= MAX_CPUS || cpu < 0);
if (!atoms) {
if (thread_atoms_insert(sched, thread))
- return -1;
+ goto out_put;
atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
if (!atoms) {
pr_err("in-event: Internal tree error");
- return -1;
+ goto out_put;
}
if (add_sched_out_event(atoms, 'R', timestamp))
- return -1;
+ goto out_put;
}
add_runtime_event(atoms, runtime, timestamp);
- return 0;
+ err = 0;
+out_put:
+ thread__put(thread);
+ return err;
}
static int latency_wakeup_event(struct perf_sched *sched,
@@ -1050,19 +1066,22 @@ static int latency_wakeup_event(struct perf_sched *sched,
struct work_atom *atom;
struct thread *wakee;
u64 timestamp = sample->time;
+ int err = -1;
wakee = machine__findnew_thread(machine, -1, pid);
+ if (wakee == NULL)
+ return -1;
atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
if (!atoms) {
if (thread_atoms_insert(sched, wakee))
- return -1;
+ goto out_put;
atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
if (!atoms) {
pr_err("wakeup-event: Internal tree error");
- return -1;
+ goto out_put;
}
if (add_sched_out_event(atoms, 'S', timestamp))
- return -1;
+ goto out_put;
}
BUG_ON(list_empty(&atoms->work_list));
@@ -1081,17 +1100,21 @@ static int latency_wakeup_event(struct perf_sched *sched,
* skip in this case.
*/
if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING)
- return 0;
+ goto out_ok;
sched->nr_timestamps++;
if (atom->sched_out_time > timestamp) {
sched->nr_unordered_timestamps++;
- return 0;
+ goto out_ok;
}
atom->state = THREAD_WAIT_CPU;
atom->wake_up_time = timestamp;
- return 0;
+out_ok:
+ err = 0;
+out_put:
+ thread__put(wakee);
+ return err;
}
static int latency_migrate_task_event(struct perf_sched *sched,
@@ -1104,6 +1127,7 @@ static int latency_migrate_task_event(struct perf_sched *sched,
struct work_atoms *atoms;
struct work_atom *atom;
struct thread *migrant;
+ int err = -1;
/*
* Only need to worry about migration when profiling one CPU.
@@ -1112,18 +1136,20 @@ static int latency_migrate_task_event(struct perf_sched *sched,
return 0;
migrant = machine__findnew_thread(machine, -1, pid);
+ if (migrant == NULL)
+ return -1;
atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
if (!atoms) {
if (thread_atoms_insert(sched, migrant))
- return -1;
+ goto out_put;
register_pid(sched, migrant->tid, thread__comm_str(migrant));
atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
if (!atoms) {
pr_err("migration-event: Internal tree error");
- return -1;
+ goto out_put;
}
if (add_sched_out_event(atoms, 'R', timestamp))
- return -1;
+ goto out_put;
}
BUG_ON(list_empty(&atoms->work_list));
@@ -1135,8 +1161,10 @@ static int latency_migrate_task_event(struct perf_sched *sched,
if (atom->sched_out_time > timestamp)
sched->nr_unordered_timestamps++;
-
- return 0;
+ err = 0;
+out_put:
+ thread__put(migrant);
+ return err;
}
static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list)
@@ -1156,7 +1184,10 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
sched->all_runtime += work_list->total_runtime;
sched->all_count += work_list->nb_atoms;
- ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
+ if (work_list->num_merged > 1)
+ ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread), work_list->num_merged);
+ else
+ ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
for (i = 0; i < 24 - ret; i++)
printf(" ");
@@ -1276,17 +1307,22 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
static void perf_sched__sort_lat(struct perf_sched *sched)
{
struct rb_node *node;
-
+ struct rb_root *root = &sched->atom_root;
+again:
for (;;) {
struct work_atoms *data;
- node = rb_first(&sched->atom_root);
+ node = rb_first(root);
if (!node)
break;
- rb_erase(node, &sched->atom_root);
+ rb_erase(node, root);
data = rb_entry(node, struct work_atoms, node);
__thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list);
}
+ if (root == &sched->atom_root) {
+ root = &sched->merged_atom_root;
+ goto again;
+ }
}
static int process_sched_wakeup_event(struct perf_tool *tool,
@@ -1330,8 +1366,10 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
}
sched_in = machine__findnew_thread(machine, -1, next_pid);
+ if (sched_in == NULL)
+ return -1;
- sched->curr_thread[this_cpu] = sched_in;
+ sched->curr_thread[this_cpu] = thread__get(sched_in);
printf(" ");
@@ -1381,6 +1419,8 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
printf("\n");
}
+ thread__put(sched_in);
+
return 0;
}
@@ -1542,6 +1582,59 @@ static void print_bad_events(struct perf_sched *sched)
}
}
+static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+ struct work_atoms *this;
+ const char *comm = thread__comm_str(data->thread), *this_comm;
+
+ while (*new) {
+ int cmp;
+
+ this = container_of(*new, struct work_atoms, node);
+ parent = *new;
+
+ this_comm = thread__comm_str(this->thread);
+ cmp = strcmp(comm, this_comm);
+ if (cmp > 0) {
+ new = &((*new)->rb_left);
+ } else if (cmp < 0) {
+ new = &((*new)->rb_right);
+ } else {
+ this->num_merged++;
+ this->total_runtime += data->total_runtime;
+ this->nb_atoms += data->nb_atoms;
+ this->total_lat += data->total_lat;
+ list_splice(&data->work_list, &this->work_list);
+ if (this->max_lat < data->max_lat) {
+ this->max_lat = data->max_lat;
+ this->max_lat_at = data->max_lat_at;
+ }
+ zfree(&data);
+ return;
+ }
+ }
+
+ data->num_merged++;
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+}
+
+static void perf_sched__merge_lat(struct perf_sched *sched)
+{
+ struct work_atoms *data;
+ struct rb_node *node;
+
+ if (sched->skip_merge)
+ return;
+
+ while ((node = rb_first(&sched->atom_root))) {
+ rb_erase(node, &sched->atom_root);
+ data = rb_entry(node, struct work_atoms, node);
+ __merge_work_atoms(&sched->merged_atom_root, data);
+ }
+}
+
static int perf_sched__lat(struct perf_sched *sched)
{
struct rb_node *next;
@@ -1551,6 +1644,7 @@ static int perf_sched__lat(struct perf_sched *sched)
if (perf_sched__read_events(sched))
return -1;
+ perf_sched__merge_lat(sched);
perf_sched__sort_lat(sched);
printf("\n -----------------------------------------------------------------------------------------------------------------\n");
@@ -1702,6 +1796,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
.profile_cpu = -1,
.next_shortname1 = 'A',
.next_shortname2 = '0',
+ .skip_merge = 0,
};
const struct option latency_options[] = {
OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
@@ -1712,6 +1807,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
"CPU to profile on"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
+ OPT_BOOLEAN('p', "pids", &sched.skip_merge,
+ "latency stats per pid instead of per comm"),
OPT_END()
};
const struct option replay_options[] = {
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 58f10b8..eb51325 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -6,6 +6,7 @@
#include "util/exec_cmd.h"
#include "util/header.h"
#include "util/parse-options.h"
+#include "util/perf_regs.h"
#include "util/session.h"
#include "util/tool.h"
#include "util/symbol.h"
@@ -16,6 +17,7 @@
#include "util/evsel.h"
#include "util/sort.h"
#include "util/data.h"
+#include "util/auxtrace.h"
#include <linux/bitmap.h>
static char const *script_name;
@@ -26,6 +28,7 @@ static u64 nr_unordered;
static bool no_callchain;
static bool latency_format;
static bool system_wide;
+static bool print_flags;
static const char *cpu_list;
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -44,6 +47,7 @@ enum perf_output_field {
PERF_OUTPUT_SYMOFFSET = 1U << 11,
PERF_OUTPUT_SRCLINE = 1U << 12,
PERF_OUTPUT_PERIOD = 1U << 13,
+ PERF_OUTPUT_IREGS = 1U << 14,
};
struct output_option {
@@ -64,6 +68,7 @@ struct output_option {
{.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET},
{.str = "srcline", .field = PERF_OUTPUT_SRCLINE},
{.str = "period", .field = PERF_OUTPUT_PERIOD},
+ {.str = "iregs", .field = PERF_OUTPUT_IREGS},
};
/* default set to maintain compatibility with current format */
@@ -146,9 +151,10 @@ static const char *output_field2str(enum perf_output_field field)
#define PRINT_FIELD(x) (output[attr->type].fields & PERF_OUTPUT_##x)
-static int perf_evsel__check_stype(struct perf_evsel *evsel,
- u64 sample_type, const char *sample_msg,
- enum perf_output_field field)
+static int perf_evsel__do_check_stype(struct perf_evsel *evsel,
+ u64 sample_type, const char *sample_msg,
+ enum perf_output_field field,
+ bool allow_user_set)
{
struct perf_event_attr *attr = &evsel->attr;
int type = attr->type;
@@ -158,6 +164,8 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel,
return 0;
if (output[type].user_set) {
+ if (allow_user_set)
+ return 0;
evname = perf_evsel__name(evsel);
pr_err("Samples for '%s' event do not have %s attribute set. "
"Cannot print '%s' field.\n",
@@ -175,10 +183,22 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel,
return 0;
}
+static int perf_evsel__check_stype(struct perf_evsel *evsel,
+ u64 sample_type, const char *sample_msg,
+ enum perf_output_field field)
+{
+ return perf_evsel__do_check_stype(evsel, sample_type, sample_msg, field,
+ false);
+}
+
static int perf_evsel__check_attr(struct perf_evsel *evsel,
struct perf_session *session)
{
struct perf_event_attr *attr = &evsel->attr;
+ bool allow_user_set;
+
+ allow_user_set = perf_header__has_feat(&session->header,
+ HEADER_AUXTRACE);
if (PRINT_FIELD(TRACE) &&
!perf_session__has_traces(session, "record -R"))
@@ -191,8 +211,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
}
if (PRINT_FIELD(ADDR) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR",
- PERF_OUTPUT_ADDR))
+ perf_evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR",
+ PERF_OUTPUT_ADDR, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
@@ -229,8 +249,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
return -EINVAL;
if (PRINT_FIELD(CPU) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_CPU, "CPU",
- PERF_OUTPUT_CPU))
+ perf_evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU",
+ PERF_OUTPUT_CPU, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(PERIOD) &&
@@ -238,6 +258,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
PERF_OUTPUT_PERIOD))
return -EINVAL;
+ if (PRINT_FIELD(IREGS) &&
+ perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS",
+ PERF_OUTPUT_IREGS))
+ return -EINVAL;
+
return 0;
}
@@ -335,6 +360,24 @@ out:
return 0;
}
+static void print_sample_iregs(union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct thread *thread __maybe_unused,
+ struct perf_event_attr *attr)
+{
+ struct regs_dump *regs = &sample->intr_regs;
+ uint64_t mask = attr->sample_regs_intr;
+ unsigned i = 0, r;
+
+ if (!regs)
+ return;
+
+ for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
+ u64 val = regs->regs[i++];
+ printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val);
+ }
+}
+
static void print_sample_start(struct perf_sample *sample,
struct thread *thread,
struct perf_evsel *evsel)
@@ -445,6 +488,25 @@ static void print_sample_bts(union perf_event *event,
printf("\n");
}
+static void print_sample_flags(u32 flags)
+{
+ const char *chars = PERF_IP_FLAG_CHARS;
+ const int n = strlen(PERF_IP_FLAG_CHARS);
+ char str[33];
+ int i, pos = 0;
+
+ for (i = 0; i < n; i++, flags >>= 1) {
+ if (flags & 1)
+ str[pos++] = chars[i];
+ }
+ for (; i < 32; i++, flags >>= 1) {
+ if (flags & 1)
+ str[pos++] = '?';
+ }
+ str[pos] = 0;
+ printf(" %-4s ", str);
+}
+
static void process_event(union perf_event *event, struct perf_sample *sample,
struct perf_evsel *evsel, struct addr_location *al)
{
@@ -464,6 +526,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
printf("%s: ", evname ? evname : "[unknown]");
}
+ if (print_flags)
+ print_sample_flags(sample->flags);
+
if (is_bts_event(attr)) {
print_sample_bts(event, sample, evsel, thread, al);
return;
@@ -486,6 +551,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
PERF_MAX_STACK_DEPTH);
}
+ if (PRINT_FIELD(IREGS))
+ print_sample_iregs(event, sample, thread, attr);
+
printf("\n");
}
@@ -568,13 +636,14 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
}
if (al.filtered)
- return 0;
+ goto out_put;
if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
- return 0;
+ goto out_put;
scripting_ops->process_event(event, sample, evsel, &al);
-
+out_put:
+ addr_location__put(&al);
return 0;
}
@@ -583,6 +652,7 @@ struct perf_script {
struct perf_session *session;
bool show_task_events;
bool show_mmap_events;
+ bool show_switch_events;
};
static int process_attr(struct perf_tool *tool, union perf_event *event,
@@ -621,7 +691,7 @@ static int process_comm_event(struct perf_tool *tool,
struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
struct perf_session *session = script->session;
- struct perf_evsel *evsel = perf_evlist__first(session->evlist);
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
int ret = -1;
thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid);
@@ -642,8 +712,8 @@ static int process_comm_event(struct perf_tool *tool,
print_sample_start(sample, thread, evsel);
perf_event__fprintf(event, stdout);
ret = 0;
-
out:
+ thread__put(thread);
return ret;
}
@@ -655,7 +725,7 @@ static int process_fork_event(struct perf_tool *tool,
struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
struct perf_session *session = script->session;
- struct perf_evsel *evsel = perf_evlist__first(session->evlist);
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
if (perf_event__process_fork(tool, event, sample, machine) < 0)
return -1;
@@ -674,6 +744,7 @@ static int process_fork_event(struct perf_tool *tool,
}
print_sample_start(sample, thread, evsel);
perf_event__fprintf(event, stdout);
+ thread__put(thread);
return 0;
}
@@ -682,10 +753,11 @@ static int process_exit_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
+ int err = 0;
struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
struct perf_session *session = script->session;
- struct perf_evsel *evsel = perf_evlist__first(session->evlist);
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid);
if (thread == NULL) {
@@ -703,9 +775,10 @@ static int process_exit_event(struct perf_tool *tool,
perf_event__fprintf(event, stdout);
if (perf_event__process_exit(tool, event, sample, machine) < 0)
- return -1;
+ err = -1;
- return 0;
+ thread__put(thread);
+ return err;
}
static int process_mmap_event(struct perf_tool *tool,
@@ -716,7 +789,7 @@ static int process_mmap_event(struct perf_tool *tool,
struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
struct perf_session *session = script->session;
- struct perf_evsel *evsel = perf_evlist__first(session->evlist);
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
if (perf_event__process_mmap(tool, event, sample, machine) < 0)
return -1;
@@ -735,7 +808,7 @@ static int process_mmap_event(struct perf_tool *tool,
}
print_sample_start(sample, thread, evsel);
perf_event__fprintf(event, stdout);
-
+ thread__put(thread);
return 0;
}
@@ -747,7 +820,7 @@ static int process_mmap2_event(struct perf_tool *tool,
struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
struct perf_session *session = script->session;
- struct perf_evsel *evsel = perf_evlist__first(session->evlist);
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
if (perf_event__process_mmap2(tool, event, sample, machine) < 0)
return -1;
@@ -766,7 +839,33 @@ static int process_mmap2_event(struct perf_tool *tool,
}
print_sample_start(sample, thread, evsel);
perf_event__fprintf(event, stdout);
+ thread__put(thread);
+ return 0;
+}
+
+static int process_switch_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct thread *thread;
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+ struct perf_session *session = script->session;
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+
+ if (perf_event__process_switch(tool, event, sample, machine) < 0)
+ return -1;
+
+ thread = machine__findnew_thread(machine, sample->pid,
+ sample->tid);
+ if (thread == NULL) {
+ pr_debug("problem processing SWITCH event, skipping it.\n");
+ return -1;
+ }
+ print_sample_start(sample, thread, evsel);
+ perf_event__fprintf(event, stdout);
+ thread__put(thread);
return 0;
}
@@ -791,6 +890,8 @@ static int __cmd_script(struct perf_script *script)
script->tool.mmap = process_mmap_event;
script->tool.mmap2 = process_mmap2_event;
}
+ if (script->show_switch_events)
+ script->tool.context_switch = process_switch_event;
ret = perf_session__process_events(script->session);
@@ -999,12 +1100,15 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
}
}
- tok = strtok(tok, ",");
- while (tok) {
+ for (tok = strtok(tok, ","); tok; tok = strtok(NULL, ",")) {
for (i = 0; i < imax; ++i) {
if (strcmp(tok, all_output_options[i].str) == 0)
break;
}
+ if (i == imax && strcmp(tok, "flags") == 0) {
+ print_flags = true;
+ continue;
+ }
if (i == imax) {
fprintf(stderr, "Invalid field requested.\n");
rc = -EINVAL;
@@ -1032,8 +1136,6 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
}
output[type].fields |= all_output_options[i].field;
}
-
- tok = strtok(NULL, ",");
}
if (type >= 0) {
@@ -1488,6 +1590,22 @@ static int have_cmd(int argc, const char **argv)
return 0;
}
+static void script__setup_sample_type(struct perf_script *script)
+{
+ struct perf_session *session = script->session;
+ u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
+
+ if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
+ if ((sample_type & PERF_SAMPLE_REGS_USER) &&
+ (sample_type & PERF_SAMPLE_STACK_USER))
+ callchain_param.record_mode = CALLCHAIN_DWARF;
+ else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ callchain_param.record_mode = CALLCHAIN_LBR;
+ else
+ callchain_param.record_mode = CALLCHAIN_FP;
+ }
+}
+
int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
{
bool show_full_info = false;
@@ -1497,6 +1615,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
char *rec_script_path = NULL;
char *rep_script_path = NULL;
struct perf_session *session;
+ struct itrace_synth_opts itrace_synth_opts = { .set = false, };
char *script_path = NULL;
const char **__argv;
int i, j, err = 0;
@@ -1511,6 +1630,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
.attr = process_attr,
.tracing_data = perf_event__process_tracing_data,
.build_id = perf_event__process_build_id,
+ .id_index = perf_event__process_id_index,
+ .auxtrace_info = perf_event__process_auxtrace_info,
+ .auxtrace = perf_event__process_auxtrace,
+ .auxtrace_error = perf_event__process_auxtrace_error,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
@@ -1549,7 +1672,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
"comma separated output fields prepend with 'type:'. "
"Valid types: hw,sw,trace,raw. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
- "addr,symoff,period", parse_output_fields),
+ "addr,symoff,period,iregs,flags", parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
@@ -1569,7 +1692,19 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
"Show the fork/comm/exit events"),
OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events,
"Show the mmap events"),
+ OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events,
+ "Show context switch events (if recorded)"),
OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
+ OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
+ "Instruction Tracing options",
+ itrace_parse_synth_opts),
+ OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename,
+ "Show full source file name path for source lines"),
+ OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
+ "Enable symbol demangling"),
+ OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
+ "Enable kernel symbol demangling"),
+
OPT_END()
};
const char * const script_subcommands[] = { "record", "report", NULL };
@@ -1764,6 +1899,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
goto out_delete;
script.session = session;
+ script__setup_sample_type(&script);
+
+ session->itrace_synth_opts = &itrace_synth_opts;
if (cpu_list) {
err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
@@ -1776,6 +1914,14 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
else
symbol_conf.use_callchain = false;
+ if (session->tevent.pevent &&
+ pevent_set_function_resolver(session->tevent.pevent,
+ machine__resolve_kernel_addr,
+ &session->machines.host) < 0) {
+ pr_err("%s: failed to set libtraceevent function resolver\n", __func__);
+ return -1;
+ }
+
if (generate_script_lang) {
struct stat perf_stat;
int input;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f7b8218..d46dbb1 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -58,6 +58,7 @@
#include "util/cpumap.h"
#include "util/thread.h"
#include "util/thread_map.h"
+#include "util/counts.h"
#include <stdlib.h>
#include <sys/prctl.h>
@@ -67,14 +68,11 @@
#define CNTR_NOT_SUPPORTED "<not supported>"
#define CNTR_NOT_COUNTED "<not counted>"
-static void print_stat(int argc, const char **argv);
-static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
-static void print_counter(struct perf_evsel *counter, char *prefix);
-static void print_aggr(char *prefix);
+static void print_counters(struct timespec *ts, int argc, const char **argv);
/* Default events used for perf stat -T */
-static const char * const transaction_attrs[] = {
- "task-clock",
+static const char *transaction_attrs = {
+ "task-clock,"
"{"
"instructions,"
"cycles,"
@@ -86,8 +84,8 @@ static const char * const transaction_attrs[] = {
};
/* More limited version when the CPU does not have all events. */
-static const char * const transaction_limited_attrs[] = {
- "task-clock",
+static const char * transaction_limited_attrs = {
+ "task-clock,"
"{"
"instructions,"
"cycles,"
@@ -96,34 +94,14 @@ static const char * const transaction_limited_attrs[] = {
"}"
};
-/* must match transaction_attrs and the beginning limited_attrs */
-enum {
- T_TASK_CLOCK,
- T_INSTRUCTIONS,
- T_CYCLES,
- T_CYCLES_IN_TX,
- T_TRANSACTION_START,
- T_ELISION_START,
- T_CYCLES_IN_TX_CP,
-};
-
static struct perf_evlist *evsel_list;
static struct target target = {
.uid = UINT_MAX,
};
-enum aggr_mode {
- AGGR_NONE,
- AGGR_GLOBAL,
- AGGR_SOCKET,
- AGGR_CORE,
-};
-
static int run_count = 1;
static bool no_inherit = false;
-static bool scale = true;
-static enum aggr_mode aggr_mode = AGGR_GLOBAL;
static volatile pid_t child_pid = -1;
static bool null_run = false;
static int detailed_run = 0;
@@ -133,11 +111,9 @@ static int big_num_opt = -1;
static const char *csv_sep = NULL;
static bool csv_output = false;
static bool group = false;
-static FILE *output = NULL;
static const char *pre_cmd = NULL;
static const char *post_cmd = NULL;
static bool sync_run = false;
-static unsigned int interval = 0;
static unsigned int initial_delay = 0;
static unsigned int unit_width = 4; /* strlen("unit") */
static bool forever = false;
@@ -147,8 +123,9 @@ static int (*aggr_get_id)(struct cpu_map *m, int cpu);
static volatile int done = 0;
-struct perf_stat {
- struct stats res_stats[3];
+static struct perf_stat_config stat_config = {
+ .aggr_mode = AGGR_GLOBAL,
+ .scale = true,
};
static inline void diff_timespec(struct timespec *r, struct timespec *a,
@@ -163,139 +140,17 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a,
}
}
-static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
-{
- return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
-}
-
-static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
-{
- return perf_evsel__cpus(evsel)->nr;
-}
-
-static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
-{
- int i;
- struct perf_stat *ps = evsel->priv;
-
- for (i = 0; i < 3; i++)
- init_stats(&ps->res_stats[i]);
-}
-
-static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
-{
- evsel->priv = zalloc(sizeof(struct perf_stat));
- if (evsel->priv == NULL)
- return -ENOMEM;
- perf_evsel__reset_stat_priv(evsel);
- return 0;
-}
-
-static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
-{
- zfree(&evsel->priv);
-}
-
-static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
-{
- void *addr;
- size_t sz;
-
- sz = sizeof(*evsel->counts) +
- (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
-
- addr = zalloc(sz);
- if (!addr)
- return -ENOMEM;
-
- evsel->prev_raw_counts = addr;
-
- return 0;
-}
-
-static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
-{
- zfree(&evsel->prev_raw_counts);
-}
-
-static void perf_evlist__free_stats(struct perf_evlist *evlist)
-{
- struct perf_evsel *evsel;
-
- evlist__for_each(evlist, evsel) {
- perf_evsel__free_stat_priv(evsel);
- perf_evsel__free_counts(evsel);
- perf_evsel__free_prev_raw_counts(evsel);
- }
-}
-
-static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
-{
- struct perf_evsel *evsel;
-
- evlist__for_each(evlist, evsel) {
- if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
- perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 ||
- (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0))
- goto out_free;
- }
-
- return 0;
-
-out_free:
- perf_evlist__free_stats(evlist);
- return -1;
-}
-
-static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
-static struct stats runtime_cycles_stats[MAX_NR_CPUS];
-static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
-static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
-static struct stats runtime_branches_stats[MAX_NR_CPUS];
-static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
-static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
-static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
-static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
-static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
-static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
-static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
-static struct stats walltime_nsecs_stats;
-static struct stats runtime_transaction_stats[MAX_NR_CPUS];
-static struct stats runtime_elision_stats[MAX_NR_CPUS];
-
-static void perf_stat__reset_stats(struct perf_evlist *evlist)
+static void perf_stat__reset_stats(void)
{
- struct perf_evsel *evsel;
-
- evlist__for_each(evlist, evsel) {
- perf_evsel__reset_stat_priv(evsel);
- perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
- }
-
- memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
- memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
- memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
- memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
- memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
- memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
- memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
- memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
- memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
- memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
- memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
- memset(runtime_cycles_in_tx_stats, 0,
- sizeof(runtime_cycles_in_tx_stats));
- memset(runtime_transaction_stats, 0,
- sizeof(runtime_transaction_stats));
- memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
- memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+ perf_evlist__reset_stats(evsel_list);
+ perf_stat__reset_shadow_stats();
}
static int create_perf_stat_counter(struct perf_evsel *evsel)
{
struct perf_event_attr *attr = &evsel->attr;
- if (scale)
+ if (stat_config.scale)
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING;
@@ -325,183 +180,6 @@ static inline int nsec_counter(struct perf_evsel *evsel)
return 0;
}
-static struct perf_evsel *nth_evsel(int n)
-{
- static struct perf_evsel **array;
- static int array_len;
- struct perf_evsel *ev;
- int j;
-
- /* Assumes this only called when evsel_list does not change anymore. */
- if (!array) {
- evlist__for_each(evsel_list, ev)
- array_len++;
- array = malloc(array_len * sizeof(void *));
- if (!array)
- exit(ENOMEM);
- j = 0;
- evlist__for_each(evsel_list, ev)
- array[j++] = ev;
- }
- if (n < array_len)
- return array[n];
- return NULL;
-}
-
-/*
- * Update various tracking values we maintain to print
- * more semantic information such as miss/hit ratios,
- * instruction rates, etc:
- */
-static void update_shadow_stats(struct perf_evsel *counter, u64 *count,
- int cpu)
-{
- if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
- update_stats(&runtime_nsecs_stats[cpu], count[0]);
- else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
- update_stats(&runtime_cycles_stats[cpu], count[0]);
- else if (transaction_run &&
- perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX)))
- update_stats(&runtime_cycles_in_tx_stats[cpu], count[0]);
- else if (transaction_run &&
- perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
- update_stats(&runtime_transaction_stats[cpu], count[0]);
- else if (transaction_run &&
- perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
- update_stats(&runtime_elision_stats[cpu], count[0]);
- else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
- update_stats(&runtime_stalled_cycles_front_stats[cpu], count[0]);
- else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
- update_stats(&runtime_stalled_cycles_back_stats[cpu], count[0]);
- else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
- update_stats(&runtime_branches_stats[cpu], count[0]);
- else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
- update_stats(&runtime_cacherefs_stats[cpu], count[0]);
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
- update_stats(&runtime_l1_dcache_stats[cpu], count[0]);
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
- update_stats(&runtime_l1_icache_stats[cpu], count[0]);
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
- update_stats(&runtime_ll_cache_stats[cpu], count[0]);
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
- update_stats(&runtime_dtlb_cache_stats[cpu], count[0]);
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
- update_stats(&runtime_itlb_cache_stats[cpu], count[0]);
-}
-
-static void zero_per_pkg(struct perf_evsel *counter)
-{
- if (counter->per_pkg_mask)
- memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
-}
-
-static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
-{
- unsigned long *mask = counter->per_pkg_mask;
- struct cpu_map *cpus = perf_evsel__cpus(counter);
- int s;
-
- *skip = false;
-
- if (!counter->per_pkg)
- return 0;
-
- if (cpu_map__empty(cpus))
- return 0;
-
- if (!mask) {
- mask = zalloc(MAX_NR_CPUS);
- if (!mask)
- return -ENOMEM;
-
- counter->per_pkg_mask = mask;
- }
-
- s = cpu_map__get_socket(cpus, cpu);
- if (s < 0)
- return -1;
-
- *skip = test_and_set_bit(s, mask) == 1;
- return 0;
-}
-
-static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused,
- struct perf_counts_values *count)
-{
- struct perf_counts_values *aggr = &evsel->counts->aggr;
- static struct perf_counts_values zero;
- bool skip = false;
-
- if (check_per_pkg(evsel, cpu, &skip)) {
- pr_err("failed to read per-pkg counter\n");
- return -1;
- }
-
- if (skip)
- count = &zero;
-
- switch (aggr_mode) {
- case AGGR_CORE:
- case AGGR_SOCKET:
- case AGGR_NONE:
- if (!evsel->snapshot)
- perf_evsel__compute_deltas(evsel, cpu, count);
- perf_counts_values__scale(count, scale, NULL);
- evsel->counts->cpu[cpu] = *count;
- if (aggr_mode == AGGR_NONE)
- update_shadow_stats(evsel, count->values, cpu);
- break;
- case AGGR_GLOBAL:
- aggr->val += count->val;
- if (scale) {
- aggr->ena += count->ena;
- aggr->run += count->run;
- }
- default:
- break;
- }
-
- return 0;
-}
-
-static int read_counter(struct perf_evsel *counter);
-
-/*
- * Read out the results of a single counter:
- * aggregate counts across CPUs in system-wide mode
- */
-static int read_counter_aggr(struct perf_evsel *counter)
-{
- struct perf_counts_values *aggr = &counter->counts->aggr;
- struct perf_stat *ps = counter->priv;
- u64 *count = counter->counts->aggr.values;
- int i;
-
- aggr->val = aggr->ena = aggr->run = 0;
-
- if (read_counter(counter))
- return -1;
-
- if (!counter->snapshot)
- perf_evsel__compute_deltas(counter, -1, aggr);
- perf_counts_values__scale(aggr, scale, &counter->counts->scaled);
-
- for (i = 0; i < 3; i++)
- update_stats(&ps->res_stats[i], count[i]);
-
- if (verbose) {
- fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
- perf_evsel__name(counter), count[0], count[1], count[2]);
- }
-
- /*
- * Save the full runtime - to allow normalization during printout:
- */
- update_shadow_stats(counter, count, 0);
-
- return 0;
-}
-
/*
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
@@ -518,12 +196,12 @@ static int read_counter(struct perf_evsel *counter)
if (counter->system_wide)
nthreads = 1;
- if (counter->per_pkg)
- zero_per_pkg(counter);
-
for (thread = 0; thread < nthreads; thread++) {
for (cpu = 0; cpu < ncpus; cpu++) {
- if (perf_evsel__read_cb(counter, cpu, thread, read_cb))
+ struct perf_counts_values *count;
+
+ count = perf_counts(counter->counts, cpu, thread);
+ if (perf_evsel__read(counter, cpu, thread, count))
return -1;
}
}
@@ -531,68 +209,34 @@ static int read_counter(struct perf_evsel *counter)
return 0;
}
-static void print_interval(void)
+static void read_counters(bool close_counters)
{
- static int num_print_interval;
struct perf_evsel *counter;
- struct perf_stat *ps;
- struct timespec ts, rs;
- char prefix[64];
- if (aggr_mode == AGGR_GLOBAL) {
- evlist__for_each(evsel_list, counter) {
- ps = counter->priv;
- memset(ps->res_stats, 0, sizeof(ps->res_stats));
- read_counter_aggr(counter);
- }
- } else {
- evlist__for_each(evsel_list, counter) {
- ps = counter->priv;
- memset(ps->res_stats, 0, sizeof(ps->res_stats));
- read_counter(counter);
- }
- }
+ evlist__for_each(evsel_list, counter) {
+ if (read_counter(counter))
+ pr_warning("failed to read counter %s\n", counter->name);
- clock_gettime(CLOCK_MONOTONIC, &ts);
- diff_timespec(&rs, &ts, &ref_time);
- sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
+ if (perf_stat_process_counter(&stat_config, counter))
+ pr_warning("failed to process counter %s\n", counter->name);
- if (num_print_interval == 0 && !csv_output) {
- switch (aggr_mode) {
- case AGGR_SOCKET:
- fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit");
- break;
- case AGGR_CORE:
- fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit");
- break;
- case AGGR_NONE:
- fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit");
- break;
- case AGGR_GLOBAL:
- default:
- fprintf(output, "# time counts %*s events\n", unit_width, "unit");
+ if (close_counters) {
+ perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
+ thread_map__nr(evsel_list->threads));
}
}
+}
- if (++num_print_interval == 25)
- num_print_interval = 0;
+static void process_interval(void)
+{
+ struct timespec ts, rs;
- switch (aggr_mode) {
- case AGGR_CORE:
- case AGGR_SOCKET:
- print_aggr(prefix);
- break;
- case AGGR_NONE:
- evlist__for_each(evsel_list, counter)
- print_counter(counter, prefix);
- break;
- case AGGR_GLOBAL:
- default:
- evlist__for_each(evsel_list, counter)
- print_counter_aggr(counter, prefix);
- }
+ read_counters(false);
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ diff_timespec(&rs, &ts, &ref_time);
- fflush(output);
+ print_counters(&rs, 0, NULL);
}
static void handle_initial_delay(void)
@@ -624,6 +268,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
static int __run_perf_stat(int argc, const char **argv)
{
+ int interval = stat_config.interval;
char msg[512];
unsigned long long t0, t1;
struct perf_evsel *counter;
@@ -665,7 +310,10 @@ static int __run_perf_stat(int argc, const char **argv)
ui__warning("%s event is not supported by the kernel.\n",
perf_evsel__name(counter));
counter->supported = false;
- continue;
+
+ if ((counter->leader != counter) ||
+ !(counter->leader->nr_members > 1))
+ continue;
}
perf_evsel__open_strerror(counter, &target,
@@ -704,7 +352,7 @@ static int __run_perf_stat(int argc, const char **argv)
if (interval) {
while (!waitpid(child_pid, &status, WNOHANG)) {
nanosleep(&ts, NULL);
- print_interval();
+ process_interval();
}
}
wait(&status);
@@ -722,7 +370,7 @@ static int __run_perf_stat(int argc, const char **argv)
while (!done) {
nanosleep(&ts, NULL);
if (interval)
- print_interval();
+ process_interval();
}
}
@@ -730,18 +378,7 @@ static int __run_perf_stat(int argc, const char **argv)
update_stats(&walltime_nsecs_stats, t1 - t0);
- if (aggr_mode == AGGR_GLOBAL) {
- evlist__for_each(evsel_list, counter) {
- read_counter_aggr(counter);
- perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
- thread_map__nr(evsel_list->threads));
- }
- } else {
- evlist__for_each(evsel_list, counter) {
- read_counter(counter);
- perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
- }
- }
+ read_counters(true);
return WEXITSTATUS(status);
}
@@ -775,13 +412,13 @@ static int run_perf_stat(int argc, const char **argv)
static void print_running(u64 run, u64 ena)
{
if (csv_output) {
- fprintf(output, "%s%" PRIu64 "%s%.2f",
+ fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
csv_sep,
run,
csv_sep,
ena ? 100.0 * run / ena : 100.0);
} else if (run != ena) {
- fprintf(output, " (%.2f%%)", 100.0 * run / ena);
+ fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena);
}
}
@@ -790,9 +427,9 @@ static void print_noise_pct(double total, double avg)
double pct = rel_stddev_stats(total, avg);
if (csv_output)
- fprintf(output, "%s%.2f%%", csv_sep, pct);
+ fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
else if (pct)
- fprintf(output, " ( +-%6.2f%% )", pct);
+ fprintf(stat_config.output, " ( +-%6.2f%% )", pct);
}
static void print_noise(struct perf_evsel *evsel, double avg)
@@ -808,9 +445,9 @@ static void print_noise(struct perf_evsel *evsel, double avg)
static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
{
- switch (aggr_mode) {
+ switch (stat_config.aggr_mode) {
case AGGR_CORE:
- fprintf(output, "S%d-C%*d%s%*d%s",
+ fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
cpu_map__id_to_socket(id),
csv_output ? 0 : -8,
cpu_map__id_to_cpu(id),
@@ -820,7 +457,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
csv_sep);
break;
case AGGR_SOCKET:
- fprintf(output, "S%*d%s%*d%s",
+ fprintf(stat_config.output, "S%*d%s%*d%s",
csv_output ? 0 : -5,
id,
csv_sep,
@@ -829,10 +466,18 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
csv_sep);
break;
case AGGR_NONE:
- fprintf(output, "CPU%*d%s",
+ fprintf(stat_config.output, "CPU%*d%s",
csv_output ? 0 : -4,
perf_evsel__cpus(evsel)->map[id], csv_sep);
break;
+ case AGGR_THREAD:
+ fprintf(stat_config.output, "%*s-%*d%s",
+ csv_output ? 0 : 16,
+ thread_map__comm(evsel->threads, id),
+ csv_output ? 0 : -8,
+ thread_map__pid(evsel->threads, id),
+ csv_sep);
+ break;
case AGGR_GLOBAL:
default:
break;
@@ -841,6 +486,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
{
+ FILE *output = stat_config.output;
double msecs = avg / 1e6;
const char *fmt_v, *fmt_n;
char name[25];
@@ -865,7 +511,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
if (evsel->cgrp)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
- if (csv_output || interval)
+ if (csv_output || stat_config.interval)
return;
if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
@@ -875,188 +521,9 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
fprintf(output, " ");
}
-/* used for get_ratio_color() */
-enum grc_type {
- GRC_STALLED_CYCLES_FE,
- GRC_STALLED_CYCLES_BE,
- GRC_CACHE_MISSES,
- GRC_MAX_NR
-};
-
-static const char *get_ratio_color(enum grc_type type, double ratio)
-{
- static const double grc_table[GRC_MAX_NR][3] = {
- [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
- [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
- [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
- };
- const char *color = PERF_COLOR_NORMAL;
-
- if (ratio > grc_table[type][0])
- color = PERF_COLOR_RED;
- else if (ratio > grc_table[type][1])
- color = PERF_COLOR_MAGENTA;
- else if (ratio > grc_table[type][2])
- color = PERF_COLOR_YELLOW;
-
- return color;
-}
-
-static void print_stalled_cycles_frontend(int cpu,
- struct perf_evsel *evsel
- __maybe_unused, double avg)
-{
- double total, ratio = 0.0;
- const char *color;
-
- total = avg_stats(&runtime_cycles_stats[cpu]);
-
- if (total)
- ratio = avg / total * 100.0;
-
- color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
-
- fprintf(output, " # ");
- color_fprintf(output, color, "%6.2f%%", ratio);
- fprintf(output, " frontend cycles idle ");
-}
-
-static void print_stalled_cycles_backend(int cpu,
- struct perf_evsel *evsel
- __maybe_unused, double avg)
-{
- double total, ratio = 0.0;
- const char *color;
-
- total = avg_stats(&runtime_cycles_stats[cpu]);
-
- if (total)
- ratio = avg / total * 100.0;
-
- color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
-
- fprintf(output, " # ");
- color_fprintf(output, color, "%6.2f%%", ratio);
- fprintf(output, " backend cycles idle ");
-}
-
-static void print_branch_misses(int cpu,
- struct perf_evsel *evsel __maybe_unused,
- double avg)
-{
- double total, ratio = 0.0;
- const char *color;
-
- total = avg_stats(&runtime_branches_stats[cpu]);
-
- if (total)
- ratio = avg / total * 100.0;
-
- color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
- fprintf(output, " # ");
- color_fprintf(output, color, "%6.2f%%", ratio);
- fprintf(output, " of all branches ");
-}
-
-static void print_l1_dcache_misses(int cpu,
- struct perf_evsel *evsel __maybe_unused,
- double avg)
-{
- double total, ratio = 0.0;
- const char *color;
-
- total = avg_stats(&runtime_l1_dcache_stats[cpu]);
-
- if (total)
- ratio = avg / total * 100.0;
-
- color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
- fprintf(output, " # ");
- color_fprintf(output, color, "%6.2f%%", ratio);
- fprintf(output, " of all L1-dcache hits ");
-}
-
-static void print_l1_icache_misses(int cpu,
- struct perf_evsel *evsel __maybe_unused,
- double avg)
-{
- double total, ratio = 0.0;
- const char *color;
-
- total = avg_stats(&runtime_l1_icache_stats[cpu]);
-
- if (total)
- ratio = avg / total * 100.0;
-
- color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
- fprintf(output, " # ");
- color_fprintf(output, color, "%6.2f%%", ratio);
- fprintf(output, " of all L1-icache hits ");
-}
-
-static void print_dtlb_cache_misses(int cpu,
- struct perf_evsel *evsel __maybe_unused,
- double avg)
-{
- double total, ratio = 0.0;
- const char *color;
-
- total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
-
- if (total)
- ratio = avg / total * 100.0;
-
- color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
- fprintf(output, " # ");
- color_fprintf(output, color, "%6.2f%%", ratio);
- fprintf(output, " of all dTLB cache hits ");
-}
-
-static void print_itlb_cache_misses(int cpu,
- struct perf_evsel *evsel __maybe_unused,
- double avg)
-{
- double total, ratio = 0.0;
- const char *color;
-
- total = avg_stats(&runtime_itlb_cache_stats[cpu]);
-
- if (total)
- ratio = avg / total * 100.0;
-
- color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
- fprintf(output, " # ");
- color_fprintf(output, color, "%6.2f%%", ratio);
- fprintf(output, " of all iTLB cache hits ");
-}
-
-static void print_ll_cache_misses(int cpu,
- struct perf_evsel *evsel __maybe_unused,
- double avg)
-{
- double total, ratio = 0.0;
- const char *color;
-
- total = avg_stats(&runtime_ll_cache_stats[cpu]);
-
- if (total)
- ratio = avg / total * 100.0;
-
- color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
- fprintf(output, " # ");
- color_fprintf(output, color, "%6.2f%%", ratio);
- fprintf(output, " of all LL-cache hits ");
-}
-
static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
{
- double total, ratio = 0.0, total2;
+ FILE *output = stat_config.output;
double sc = evsel->scale;
const char *fmt;
int cpu = cpu_map__id_to_cpu(id);
@@ -1072,7 +539,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
aggr_printout(evsel, id, nr);
- if (aggr_mode == AGGR_GLOBAL)
+ if (stat_config.aggr_mode == AGGR_GLOBAL)
cpu = 0;
fprintf(output, fmt, avg, csv_sep);
@@ -1087,147 +554,18 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
if (evsel->cgrp)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
- if (csv_output || interval)
+ if (csv_output || stat_config.interval)
return;
- if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
- total = avg_stats(&runtime_cycles_stats[cpu]);
- if (total) {
- ratio = avg / total;
- fprintf(output, " # %5.2f insns per cycle ", ratio);
- } else {
- fprintf(output, " ");
- }
- total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
- total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
-
- if (total && avg) {
- ratio = total / avg;
- fprintf(output, "\n");
- if (aggr_mode == AGGR_NONE)
- fprintf(output, " ");
- fprintf(output, " # %5.2f stalled cycles per insn", ratio);
- }
-
- } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
- runtime_branches_stats[cpu].n != 0) {
- print_branch_misses(cpu, evsel, avg);
- } else if (
- evsel->attr.type == PERF_TYPE_HW_CACHE &&
- evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
- ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
- ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
- runtime_l1_dcache_stats[cpu].n != 0) {
- print_l1_dcache_misses(cpu, evsel, avg);
- } else if (
- evsel->attr.type == PERF_TYPE_HW_CACHE &&
- evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
- ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
- ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
- runtime_l1_icache_stats[cpu].n != 0) {
- print_l1_icache_misses(cpu, evsel, avg);
- } else if (
- evsel->attr.type == PERF_TYPE_HW_CACHE &&
- evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
- ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
- ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
- runtime_dtlb_cache_stats[cpu].n != 0) {
- print_dtlb_cache_misses(cpu, evsel, avg);
- } else if (
- evsel->attr.type == PERF_TYPE_HW_CACHE &&
- evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
- ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
- ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
- runtime_itlb_cache_stats[cpu].n != 0) {
- print_itlb_cache_misses(cpu, evsel, avg);
- } else if (
- evsel->attr.type == PERF_TYPE_HW_CACHE &&
- evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
- ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
- ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
- runtime_ll_cache_stats[cpu].n != 0) {
- print_ll_cache_misses(cpu, evsel, avg);
- } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
- runtime_cacherefs_stats[cpu].n != 0) {
- total = avg_stats(&runtime_cacherefs_stats[cpu]);
-
- if (total)
- ratio = avg * 100 / total;
-
- fprintf(output, " # %8.3f %% of all cache refs ", ratio);
-
- } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
- print_stalled_cycles_frontend(cpu, evsel, avg);
- } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
- print_stalled_cycles_backend(cpu, evsel, avg);
- } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
- total = avg_stats(&runtime_nsecs_stats[cpu]);
-
- if (total) {
- ratio = avg / total;
- fprintf(output, " # %8.3f GHz ", ratio);
- } else {
- fprintf(output, " ");
- }
- } else if (transaction_run &&
- perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) {
- total = avg_stats(&runtime_cycles_stats[cpu]);
- if (total)
- fprintf(output,
- " # %5.2f%% transactional cycles ",
- 100.0 * (avg / total));
- } else if (transaction_run &&
- perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) {
- total = avg_stats(&runtime_cycles_stats[cpu]);
- total2 = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
- if (total2 < avg)
- total2 = avg;
- if (total)
- fprintf(output,
- " # %5.2f%% aborted cycles ",
- 100.0 * ((total2-avg) / total));
- } else if (transaction_run &&
- perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
- avg > 0 &&
- runtime_cycles_in_tx_stats[cpu].n != 0) {
- total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
-
- if (total)
- ratio = total / avg;
-
- fprintf(output, " # %8.0f cycles / transaction ", ratio);
- } else if (transaction_run &&
- perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
- avg > 0 &&
- runtime_cycles_in_tx_stats[cpu].n != 0) {
- total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
-
- if (total)
- ratio = total / avg;
-
- fprintf(output, " # %8.0f cycles / elision ", ratio);
- } else if (runtime_nsecs_stats[cpu].n != 0) {
- char unit = 'M';
-
- total = avg_stats(&runtime_nsecs_stats[cpu]);
-
- if (total)
- ratio = 1000.0 * avg / total;
- if (ratio < 0.001) {
- ratio *= 1000;
- unit = 'K';
- }
-
- fprintf(output, " # %8.3f %c/sec ", ratio, unit);
- } else {
- fprintf(output, " ");
- }
+ perf_stat__print_shadow_stats(output, evsel, avg, cpu,
+ stat_config.aggr_mode);
}
static void print_aggr(char *prefix)
{
+ FILE *output = stat_config.output;
struct perf_evsel *counter;
- int cpu, cpu2, s, s2, id, nr;
+ int cpu, s, s2, id, nr;
double uval;
u64 ena, run, val;
@@ -1240,13 +578,12 @@ static void print_aggr(char *prefix)
val = ena = run = 0;
nr = 0;
for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
- cpu2 = perf_evsel__cpus(counter)->map[cpu];
- s2 = aggr_get_id(evsel_list->cpus, cpu2);
+ s2 = aggr_get_id(perf_evsel__cpus(counter), cpu);
if (s2 != id)
continue;
- val += counter->counts->cpu[cpu].val;
- ena += counter->counts->cpu[cpu].ena;
- run += counter->counts->cpu[cpu].run;
+ val += perf_counts(counter->counts, cpu, 0)->val;
+ ena += perf_counts(counter->counts, cpu, 0)->ena;
+ run += perf_counts(counter->counts, cpu, 0)->run;
nr++;
}
if (prefix)
@@ -1292,12 +629,48 @@ static void print_aggr(char *prefix)
}
}
+static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
+{
+ FILE *output = stat_config.output;
+ int nthreads = thread_map__nr(counter->threads);
+ int ncpus = cpu_map__nr(counter->cpus);
+ int cpu, thread;
+ double uval;
+
+ for (thread = 0; thread < nthreads; thread++) {
+ u64 ena = 0, run = 0, val = 0;
+
+ for (cpu = 0; cpu < ncpus; cpu++) {
+ val += perf_counts(counter->counts, cpu, thread)->val;
+ ena += perf_counts(counter->counts, cpu, thread)->ena;
+ run += perf_counts(counter->counts, cpu, thread)->run;
+ }
+
+ if (prefix)
+ fprintf(output, "%s", prefix);
+
+ uval = val * counter->scale;
+
+ if (nsec_counter(counter))
+ nsec_printout(thread, 0, counter, uval);
+ else
+ abs_printout(thread, 0, counter, uval);
+
+ if (!csv_output)
+ print_noise(counter, 1.0);
+
+ print_running(run, ena);
+ fputc('\n', output);
+ }
+}
+
/*
* Print out the results of a single counter:
* aggregated counts in system-wide mode
*/
static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
{
+ FILE *output = stat_config.output;
struct perf_stat *ps = counter->priv;
double avg = avg_stats(&ps->res_stats[0]);
int scaled = counter->counts->scaled;
@@ -1349,14 +722,15 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
*/
static void print_counter(struct perf_evsel *counter, char *prefix)
{
+ FILE *output = stat_config.output;
u64 ena, run, val;
double uval;
int cpu;
for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
- val = counter->counts->cpu[cpu].val;
- ena = counter->counts->cpu[cpu].ena;
- run = counter->counts->cpu[cpu].run;
+ val = perf_counts(counter->counts, cpu, 0)->val;
+ ena = perf_counts(counter->counts, cpu, 0)->ena;
+ run = perf_counts(counter->counts, cpu, 0)->run;
if (prefix)
fprintf(output, "%s", prefix);
@@ -1401,9 +775,40 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
}
}
-static void print_stat(int argc, const char **argv)
+static void print_interval(char *prefix, struct timespec *ts)
{
- struct perf_evsel *counter;
+ FILE *output = stat_config.output;
+ static int num_print_interval;
+
+ sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
+
+ if (num_print_interval == 0 && !csv_output) {
+ switch (stat_config.aggr_mode) {
+ case AGGR_SOCKET:
+ fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit");
+ break;
+ case AGGR_CORE:
+ fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit");
+ break;
+ case AGGR_NONE:
+ fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit");
+ break;
+ case AGGR_THREAD:
+ fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit");
+ break;
+ case AGGR_GLOBAL:
+ default:
+ fprintf(output, "# time counts %*s events\n", unit_width, "unit");
+ }
+ }
+
+ if (++num_print_interval == 25)
+ num_print_interval = 0;
+}
+
+static void print_header(int argc, const char **argv)
+{
+ FILE *output = stat_config.output;
int i;
fflush(stdout);
@@ -1429,43 +834,67 @@ static void print_stat(int argc, const char **argv)
fprintf(output, " (%d runs)", run_count);
fprintf(output, ":\n\n");
}
+}
+
+static void print_footer(void)
+{
+ FILE *output = stat_config.output;
+
+ if (!null_run)
+ fprintf(output, "\n");
+ fprintf(output, " %17.9f seconds time elapsed",
+ avg_stats(&walltime_nsecs_stats)/1e9);
+ if (run_count > 1) {
+ fprintf(output, " ");
+ print_noise_pct(stddev_stats(&walltime_nsecs_stats),
+ avg_stats(&walltime_nsecs_stats));
+ }
+ fprintf(output, "\n\n");
+}
- switch (aggr_mode) {
+static void print_counters(struct timespec *ts, int argc, const char **argv)
+{
+ int interval = stat_config.interval;
+ struct perf_evsel *counter;
+ char buf[64], *prefix = NULL;
+
+ if (interval)
+ print_interval(prefix = buf, ts);
+ else
+ print_header(argc, argv);
+
+ switch (stat_config.aggr_mode) {
case AGGR_CORE:
case AGGR_SOCKET:
- print_aggr(NULL);
+ print_aggr(prefix);
+ break;
+ case AGGR_THREAD:
+ evlist__for_each(evsel_list, counter)
+ print_aggr_thread(counter, prefix);
break;
case AGGR_GLOBAL:
evlist__for_each(evsel_list, counter)
- print_counter_aggr(counter, NULL);
+ print_counter_aggr(counter, prefix);
break;
case AGGR_NONE:
evlist__for_each(evsel_list, counter)
- print_counter(counter, NULL);
+ print_counter(counter, prefix);
break;
default:
break;
}
- if (!csv_output) {
- if (!null_run)
- fprintf(output, "\n");
- fprintf(output, " %17.9f seconds time elapsed",
- avg_stats(&walltime_nsecs_stats)/1e9);
- if (run_count > 1) {
- fprintf(output, " ");
- print_noise_pct(stddev_stats(&walltime_nsecs_stats),
- avg_stats(&walltime_nsecs_stats));
- }
- fprintf(output, "\n\n");
- }
+ if (!interval && !csv_output)
+ print_footer();
+
+ fflush(stat_config.output);
}
static volatile int signr = -1;
static void skip_signal(int signo)
{
- if ((child_pid == -1) || interval)
+ if ((child_pid == -1) || stat_config.interval)
done = 1;
signr = signo;
@@ -1513,7 +942,7 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
static int perf_stat_init_aggr_mode(void)
{
- switch (aggr_mode) {
+ switch (stat_config.aggr_mode) {
case AGGR_SOCKET:
if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
perror("cannot build socket map");
@@ -1530,23 +959,13 @@ static int perf_stat_init_aggr_mode(void)
break;
case AGGR_NONE:
case AGGR_GLOBAL:
+ case AGGR_THREAD:
default:
break;
}
return 0;
}
-static int setup_events(const char * const *attrs, unsigned len)
-{
- unsigned i;
-
- for (i = 0; i < len; i++) {
- if (parse_events(evsel_list, attrs[i]))
- return -1;
- }
- return 0;
-}
-
/*
* Add default attributes, if there were no attributes specified or
* if -d/--detailed, -d -d or -d -d -d is used:
@@ -1668,12 +1087,10 @@ static int add_default_attributes(void)
int err;
if (pmu_have_event("cpu", "cycles-ct") &&
pmu_have_event("cpu", "el-start"))
- err = setup_events(transaction_attrs,
- ARRAY_SIZE(transaction_attrs));
+ err = parse_events(evsel_list, transaction_attrs, NULL);
else
- err = setup_events(transaction_limited_attrs,
- ARRAY_SIZE(transaction_limited_attrs));
- if (err < 0) {
+ err = parse_events(evsel_list, transaction_limited_attrs, NULL);
+ if (err) {
fprintf(stderr, "Cannot set up transaction events\n");
return -1;
}
@@ -1731,7 +1148,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
"system-wide collection from all CPUs"),
OPT_BOOLEAN('g', "group", &group,
"put the counters into a counter group"),
- OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
+ OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_INTEGER('r', "repeat", &run_count,
@@ -1747,7 +1164,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
stat__set_big_num),
OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
"list of cpus to monitor in system-wide"),
- OPT_SET_UINT('A', "no-aggr", &aggr_mode,
+ OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
"disable CPU count aggregation", AGGR_NONE),
OPT_STRING('x', "field-separator", &csv_sep, "separator",
"print counts with custom separator"),
@@ -1761,12 +1178,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
"command to run prior to the measured command"),
OPT_STRING(0, "post", &post_cmd, "command",
"command to run after to the measured command"),
- OPT_UINTEGER('I', "interval-print", &interval,
+ OPT_UINTEGER('I', "interval-print", &stat_config.interval,
"print counts at regular interval in ms (>= 100)"),
- OPT_SET_UINT(0, "per-socket", &aggr_mode,
+ OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
"aggregate counts per processor socket", AGGR_SOCKET),
- OPT_SET_UINT(0, "per-core", &aggr_mode,
+ OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
"aggregate counts per physical processor core", AGGR_CORE),
+ OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
+ "aggregate counts per thread", AGGR_THREAD),
OPT_UINTEGER('D', "delay", &initial_delay,
"ms to wait before starting measurement after program start"),
OPT_END()
@@ -1777,6 +1196,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
};
int status = -EINVAL, run_idx;
const char *mode;
+ FILE *output = stderr;
+ unsigned int interval;
setlocale(LC_ALL, "");
@@ -1787,7 +1208,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
argc = parse_options(argc, argv, options, stat_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
- output = stderr;
+ interval = stat_config.interval;
+
if (output_name && strcmp(output_name, "-"))
output = NULL;
@@ -1824,6 +1246,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
}
}
+ stat_config.output = output;
+
if (csv_sep) {
csv_output = true;
if (!strcmp(csv_sep, "\\t"))
@@ -1858,8 +1282,20 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
run_count = 1;
}
- /* no_aggr, cgroup are for system-wide only */
- if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) &&
+ if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
+ fprintf(stderr, "The --per-thread option is only available "
+ "when monitoring via -p -t options.\n");
+ parse_options_usage(NULL, options, "p", 1);
+ parse_options_usage(NULL, options, "t", 1);
+ goto out;
+ }
+
+ /*
+ * no_aggr, cgroup are for system-wide only
+ * --per-thread is aggregated per thread, we dont mix it with cpu mode
+ */
+ if (((stat_config.aggr_mode != AGGR_GLOBAL &&
+ stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
!target__has_cpu(&target)) {
fprintf(stderr, "both cgroup and no-aggregation "
"modes only available in system-wide mode\n");
@@ -1887,6 +1323,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
}
goto out;
}
+
+ /*
+ * Initialize thread_map with comm names,
+ * so we could print it out on output.
+ */
+ if (stat_config.aggr_mode == AGGR_THREAD)
+ thread_map__read_comms(evsel_list->threads);
+
if (interval && interval < 100) {
pr_err("print interval must be >= 100ms\n");
parse_options_usage(stat_usage, options, "I", 1);
@@ -1920,13 +1364,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
status = run_perf_stat(argc, argv);
if (forever && status != -1) {
- print_stat(argc, argv);
- perf_stat__reset_stats(evsel_list);
+ print_counters(NULL, argc, argv);
+ perf_stat__reset_stats();
}
}
if (!forever && status != -1 && !interval)
- print_stat(argc, argv);
+ print_counters(NULL, argc, argv);
perf_evlist__free_stats(evsel_list);
out:
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index e50fe11..30e5962 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -61,13 +61,13 @@ struct timechart {
tasks_only,
with_backtrace,
topology;
+ bool force;
/* IO related settings */
- u64 io_events;
bool io_only,
skip_eagain;
+ u64 io_events;
u64 min_time,
merge_dist;
- bool force;
};
struct per_pidcomm;
@@ -523,7 +523,7 @@ static const char *cat_backtrace(union perf_event *event,
* Discard all.
*/
zfree(&p);
- goto exit;
+ goto exit_put;
}
continue;
}
@@ -538,7 +538,8 @@ static const char *cat_backtrace(union perf_event *event,
else
fprintf(f, "..... %016" PRIx64 "\n", ip);
}
-
+exit_put:
+ addr_location__put(&al);
exit:
fclose(f);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1cb3436..8c465c8 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -40,6 +40,7 @@
#include "util/xyarray.h"
#include "util/sort.h"
#include "util/intlist.h"
+#include "util/parse-branch-options.h"
#include "arch/common.h"
#include "util/debug.h"
@@ -235,10 +236,13 @@ static void perf_top__show_details(struct perf_top *top)
more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel,
0, top->sym_pcnt_filter, top->print_entries, 4);
- if (top->zero)
- symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
- else
- symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
+
+ if (top->evlist->enabled) {
+ if (top->zero)
+ symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
+ else
+ symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
+ }
if (more != 0)
printf("%d lines not displayed, maybe increase display entries [e]\n", more);
out_unlock:
@@ -276,11 +280,13 @@ static void perf_top__print_sym_table(struct perf_top *top)
return;
}
- if (top->zero) {
- hists__delete_entries(hists);
- } else {
- hists__decay_entries(hists, top->hide_user_symbols,
- top->hide_kernel_symbols);
+ if (top->evlist->enabled) {
+ if (top->zero) {
+ hists__delete_entries(hists);
+ } else {
+ hists__decay_entries(hists, top->hide_user_symbols,
+ top->hide_kernel_symbols);
+ }
}
hists__collapse_resort(hists, NULL);
@@ -545,11 +551,13 @@ static void perf_top__sort_new_samples(void *arg)
hists = evsel__hists(t->sym_evsel);
- if (t->zero) {
- hists__delete_entries(hists);
- } else {
- hists__decay_entries(hists, t->hide_user_symbols,
- t->hide_kernel_symbols);
+ if (t->evlist->enabled) {
+ if (t->zero) {
+ hists__delete_entries(hists);
+ } else {
+ hists__decay_entries(hists, t->hide_user_symbols,
+ t->hide_kernel_symbols);
+ }
}
hists__collapse_resort(hists, NULL);
@@ -579,7 +587,8 @@ static void *display_thread_tui(void *arg)
hists->uid_filter_str = top->record_opts.target.uid_str;
}
- perf_evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent,
+ perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
+ top->min_percent,
&top->session->header.env);
done = 1;
@@ -593,8 +602,8 @@ static void display_sig(int sig __maybe_unused)
static void display_setup_sig(void)
{
- signal(SIGSEGV, display_sig);
- signal(SIGFPE, display_sig);
+ signal(SIGSEGV, sighandler_dump_stack);
+ signal(SIGFPE, sighandler_dump_stack);
signal(SIGINT, display_sig);
signal(SIGQUIT, display_sig);
signal(SIGTERM, display_sig);
@@ -687,6 +696,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
perf_top__record_precise_ip(top, he, evsel->idx, ip);
}
+ hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
+ !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
return 0;
}
@@ -733,7 +744,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
"Check /proc/sys/kernel/kptr_restrict.\n\n"
"Kernel%s samples will not be resolved.\n",
- !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
+ al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
" modules" : "");
if (use_browser <= 0)
sleep(5);
@@ -775,7 +786,9 @@ static void perf_event__process_sample(struct perf_tool *tool,
if (al.sym == NULL || !al.sym->ignore) {
struct hists *hists = evsel__hists(evsel);
struct hist_entry_iter iter = {
- .add_entry_cb = hist_iter__top_callback,
+ .evsel = evsel,
+ .sample = sample,
+ .add_entry_cb = hist_iter__top_callback,
};
if (symbol_conf.cumulate_callchain)
@@ -785,15 +798,14 @@ static void perf_event__process_sample(struct perf_tool *tool,
pthread_mutex_lock(&hists->lock);
- err = hist_entry_iter__add(&iter, &al, evsel, sample,
- top->max_stack, top);
+ err = hist_entry_iter__add(&iter, &al, top->max_stack, top);
if (err < 0)
pr_err("Problem incrementing symbol period, skipping event\n");
pthread_mutex_unlock(&hists->lock);
}
- return;
+ addr_location__put(&al);
}
static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
@@ -950,7 +962,7 @@ static int __cmd_top(struct perf_top *top)
goto out_delete;
machine__synthesize_threads(&top->session->machines.host, &opts->target,
- top->evlist->threads, false);
+ top->evlist->threads, false, opts->proc_map_timeout);
ret = perf_top__start_counters(top);
if (ret)
goto out_delete;
@@ -1060,6 +1072,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
.target = {
.uses_mmap = true,
},
+ .proc_map_timeout = 500,
},
.max_stack = PERF_MAX_STACK_DEPTH,
.sym_pcnt_filter = 5,
@@ -1159,6 +1172,14 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str,
"width[,width...]",
"don't try to adjust column width, use these fixed values"),
+ OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout,
+ "per thread proc mmap processing timeout in ms"),
+ OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack,
+ "branch any", "sample any taken branches",
+ parse_branch_stack),
+ OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
+ "branch filter mask", "branch stack filter modes",
+ parse_branch_stack),
OPT_END()
};
const char * const top_usage[] = {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index e124741..4e3abba 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1,8 +1,27 @@
+/*
+ * builtin-trace.c
+ *
+ * Builtin 'trace' command:
+ *
+ * Display a continuously updated trace of any workload, CPU, specific PID,
+ * system wide, etc. Default format is loosely strace like, but any other
+ * event may be specified using --event.
+ *
+ * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Initially based on the 'trace' prototype by Thomas Gleixner:
+ *
+ * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
#include <traceevent/event-parse.h>
#include "builtin.h"
#include "util/color.h"
#include "util/debug.h"
#include "util/evlist.h"
+#include "util/exec_cmd.h"
#include "util/machine.h"
#include "util/session.h"
#include "util/thread.h"
@@ -16,7 +35,6 @@
#include <libaudit.h>
#include <stdlib.h>
-#include <sys/eventfd.h>
#include <sys/mman.h>
#include <linux/futex.h>
@@ -27,6 +45,7 @@
#ifndef MADV_HWPOISON
# define MADV_HWPOISON 100
+
#endif
#ifndef MADV_MERGEABLE
@@ -41,6 +60,51 @@
# define EFD_SEMAPHORE 1
#endif
+#ifndef EFD_NONBLOCK
+# define EFD_NONBLOCK 00004000
+#endif
+
+#ifndef EFD_CLOEXEC
+# define EFD_CLOEXEC 02000000
+#endif
+
+#ifndef O_CLOEXEC
+# define O_CLOEXEC 02000000
+#endif
+
+#ifndef SOCK_DCCP
+# define SOCK_DCCP 6
+#endif
+
+#ifndef SOCK_CLOEXEC
+# define SOCK_CLOEXEC 02000000
+#endif
+
+#ifndef SOCK_NONBLOCK
+# define SOCK_NONBLOCK 00004000
+#endif
+
+#ifndef MSG_CMSG_CLOEXEC
+# define MSG_CMSG_CLOEXEC 0x40000000
+#endif
+
+#ifndef PERF_FLAG_FD_NO_GROUP
+# define PERF_FLAG_FD_NO_GROUP (1UL << 0)
+#endif
+
+#ifndef PERF_FLAG_FD_OUTPUT
+# define PERF_FLAG_FD_OUTPUT (1UL << 1)
+#endif
+
+#ifndef PERF_FLAG_PID_CGROUP
+# define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
+#endif
+
+#ifndef PERF_FLAG_FD_CLOEXEC
+# define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
+#endif
+
+
struct tp_field {
int offset;
union {
@@ -203,42 +267,6 @@ out_delete:
({ struct syscall_tp *fields = evsel->priv; \
fields->name.pointer(&fields->name, sample); })
-static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
- void *sys_enter_handler,
- void *sys_exit_handler)
-{
- int ret = -1;
- struct perf_evsel *sys_enter, *sys_exit;
-
- sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
- if (sys_enter == NULL)
- goto out;
-
- if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
- goto out_delete_sys_enter;
-
- sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
- if (sys_exit == NULL)
- goto out_delete_sys_enter;
-
- if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
- goto out_delete_sys_exit;
-
- perf_evlist__add(evlist, sys_enter);
- perf_evlist__add(evlist, sys_exit);
-
- ret = 0;
-out:
- return ret;
-
-out_delete_sys_exit:
- perf_evsel__delete_priv(sys_exit);
-out_delete_sys_enter:
- perf_evsel__delete_priv(sys_enter);
- goto out;
-}
-
-
struct syscall_arg {
unsigned long val;
struct thread *thread;
@@ -331,6 +359,14 @@ static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
#define SCA_HEX syscall_arg__scnprintf_hex
+static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ return scnprintf(bf, size, "%d", arg->val);
+}
+
+#define SCA_INT syscall_arg__scnprintf_int
+
static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
struct syscall_arg *arg)
{
@@ -552,6 +588,15 @@ static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
static DEFINE_STRARRAY(itimers);
+static const char *keyctl_options[] = {
+ "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
+ "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
+ "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
+ "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
+ "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
+};
+static DEFINE_STRARRAY(keyctl_options);
+
static const char *whences[] = { "SET", "CUR", "END",
#ifdef SEEK_DATA
"DATA",
@@ -582,7 +627,8 @@ static DEFINE_STRARRAY(sighow);
static const char *clockid[] = {
"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
- "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
+ "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
+ "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
};
static DEFINE_STRARRAY(clockid);
@@ -727,6 +773,11 @@ static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
+static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
+ struct syscall_arg *arg);
+
+#define SCA_FILENAME syscall_arg__scnprintf_filename
+
static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
struct syscall_arg *arg)
{
@@ -783,6 +834,34 @@ static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
+static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+ if (flags == 0)
+ return 0;
+
+#define P_FLAG(n) \
+ if (flags & PERF_FLAG_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~PERF_FLAG_##n; \
+ }
+
+ P_FLAG(FD_NO_GROUP);
+ P_FLAG(FD_OUTPUT);
+ P_FLAG(PID_CGROUP);
+ P_FLAG(FD_CLOEXEC);
+#undef P_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
+
static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
struct syscall_arg *arg)
{
@@ -926,14 +1005,23 @@ static struct syscall_fmt {
bool hexret;
} syscall_fmts[] = {
{ .name = "access", .errmsg = true,
- .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
+ [1] = SCA_ACCMODE, /* mode */ }, },
{ .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
{ .name = "brk", .hexret = true,
.arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
+ { .name = "chdir", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
+ { .name = "chmod", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
+ { .name = "chroot", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
{ .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
{ .name = "close", .errmsg = true,
.arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
{ .name = "connect", .errmsg = true, },
+ { .name = "creat", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
{ .name = "dup", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
{ .name = "dup2", .errmsg = true,
@@ -944,7 +1032,8 @@ static struct syscall_fmt {
{ .name = "eventfd2", .errmsg = true,
.arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
{ .name = "faccessat", .errmsg = true,
- .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
+ .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
+ [1] = SCA_FILENAME, /* filename */ }, },
{ .name = "fadvise64", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
{ .name = "fallocate", .errmsg = true,
@@ -954,11 +1043,13 @@ static struct syscall_fmt {
{ .name = "fchmod", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
{ .name = "fchmodat", .errmsg = true,
- .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
+ .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
+ [1] = SCA_FILENAME, /* filename */ }, },
{ .name = "fchown", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
{ .name = "fchownat", .errmsg = true,
- .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
+ .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
+ [1] = SCA_FILENAME, /* filename */ }, },
{ .name = "fcntl", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */
[1] = SCA_STRARRAY, /* cmd */ },
@@ -973,7 +1064,8 @@ static struct syscall_fmt {
{ .name = "fstat", .errmsg = true, .alias = "newfstat",
.arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
{ .name = "fstatat", .errmsg = true, .alias = "newfstatat",
- .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
+ .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
+ [1] = SCA_FILENAME, /* filename */ }, },
{ .name = "fstatfs", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
{ .name = "fsync", .errmsg = true,
@@ -983,13 +1075,18 @@ static struct syscall_fmt {
{ .name = "futex", .errmsg = true,
.arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
{ .name = "futimesat", .errmsg = true,
- .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
+ .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
+ [1] = SCA_FILENAME, /* filename */ }, },
{ .name = "getdents", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
{ .name = "getdents64", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
{ .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
{ .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
+ { .name = "getxattr", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
+ { .name = "inotify_add_watch", .errmsg = true,
+ .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
{ .name = "ioctl", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */
#if defined(__i386__) || defined(__x86_64__)
@@ -1002,22 +1099,44 @@ static struct syscall_fmt {
#else
[2] = SCA_HEX, /* arg */ }, },
#endif
+ { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
{ .name = "kill", .errmsg = true,
.arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
+ { .name = "lchown", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
+ { .name = "lgetxattr", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
{ .name = "linkat", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
+ { .name = "listxattr", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
+ { .name = "llistxattr", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
+ { .name = "lremovexattr", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
{ .name = "lseek", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */
[2] = SCA_STRARRAY, /* whence */ },
.arg_parm = { [2] = &strarray__whences, /* whence */ }, },
- { .name = "lstat", .errmsg = true, .alias = "newlstat", },
+ { .name = "lsetxattr", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
+ { .name = "lstat", .errmsg = true, .alias = "newlstat",
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
+ { .name = "lsxattr", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
{ .name = "madvise", .errmsg = true,
.arg_scnprintf = { [0] = SCA_HEX, /* start */
[2] = SCA_MADV_BHV, /* behavior */ }, },
+ { .name = "mkdir", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
{ .name = "mkdirat", .errmsg = true,
- .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
+ .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
+ [1] = SCA_FILENAME, /* pathname */ }, },
+ { .name = "mknod", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
{ .name = "mknodat", .errmsg = true,
- .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
+ .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
+ [1] = SCA_FILENAME, /* filename */ }, },
{ .name = "mlock", .errmsg = true,
.arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
{ .name = "mlockall", .errmsg = true,
@@ -1030,6 +1149,8 @@ static struct syscall_fmt {
{ .name = "mprotect", .errmsg = true,
.arg_scnprintf = { [0] = SCA_HEX, /* start */
[2] = SCA_MMAP_PROT, /* prot */ }, },
+ { .name = "mq_unlink", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
{ .name = "mremap", .hexret = true,
.arg_scnprintf = { [0] = SCA_HEX, /* addr */
[3] = SCA_MREMAP_FLAGS, /* flags */
@@ -1041,15 +1162,23 @@ static struct syscall_fmt {
{ .name = "name_to_handle_at", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
{ .name = "newfstatat", .errmsg = true,
- .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
+ .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
+ [1] = SCA_FILENAME, /* filename */ }, },
{ .name = "open", .errmsg = true,
- .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
+ [1] = SCA_OPEN_FLAGS, /* flags */ }, },
{ .name = "open_by_handle_at", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
[2] = SCA_OPEN_FLAGS, /* flags */ }, },
{ .name = "openat", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
+ [1] = SCA_FILENAME, /* filename */
[2] = SCA_OPEN_FLAGS, /* flags */ }, },
+ { .name = "perf_event_open", .errmsg = true,
+ .arg_scnprintf = { [1] = SCA_INT, /* pid */
+ [2] = SCA_INT, /* cpu */
+ [3] = SCA_FD, /* group_fd */
+ [4] = SCA_PERF_FLAGS, /* flags */ }, },
{ .name = "pipe2", .errmsg = true,
.arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
{ .name = "poll", .errmsg = true, .timeout = true, },
@@ -1065,18 +1194,28 @@ static struct syscall_fmt {
.arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
{ .name = "read", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
+ { .name = "readlink", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
{ .name = "readlinkat", .errmsg = true,
- .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
+ .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
+ [1] = SCA_FILENAME, /* pathname */ }, },
{ .name = "readv", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
{ .name = "recvfrom", .errmsg = true,
- .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
+ .arg_scnprintf = { [0] = SCA_FD, /* fd */
+ [3] = SCA_MSG_FLAGS, /* flags */ }, },
{ .name = "recvmmsg", .errmsg = true,
- .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
+ .arg_scnprintf = { [0] = SCA_FD, /* fd */
+ [3] = SCA_MSG_FLAGS, /* flags */ }, },
{ .name = "recvmsg", .errmsg = true,
- .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
+ .arg_scnprintf = { [0] = SCA_FD, /* fd */
+ [2] = SCA_MSG_FLAGS, /* flags */ }, },
+ { .name = "removexattr", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
{ .name = "renameat", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
+ { .name = "rmdir", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
{ .name = "rt_sigaction", .errmsg = true,
.arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
{ .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
@@ -1086,13 +1225,18 @@ static struct syscall_fmt {
.arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
{ .name = "select", .errmsg = true, .timeout = true, },
{ .name = "sendmmsg", .errmsg = true,
- .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
+ .arg_scnprintf = { [0] = SCA_FD, /* fd */
+ [3] = SCA_MSG_FLAGS, /* flags */ }, },
{ .name = "sendmsg", .errmsg = true,
- .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
+ .arg_scnprintf = { [0] = SCA_FD, /* fd */
+ [2] = SCA_MSG_FLAGS, /* flags */ }, },
{ .name = "sendto", .errmsg = true,
- .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
+ .arg_scnprintf = { [0] = SCA_FD, /* fd */
+ [3] = SCA_MSG_FLAGS, /* flags */ }, },
{ .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
{ .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
+ { .name = "setxattr", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
{ .name = "shutdown", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
{ .name = "socket", .errmsg = true,
@@ -1103,18 +1247,35 @@ static struct syscall_fmt {
.arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
[1] = SCA_SK_TYPE, /* type */ },
.arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
- { .name = "stat", .errmsg = true, .alias = "newstat", },
+ { .name = "stat", .errmsg = true, .alias = "newstat",
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
+ { .name = "statfs", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
+ { .name = "swapoff", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
+ { .name = "swapon", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
{ .name = "symlinkat", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
{ .name = "tgkill", .errmsg = true,
.arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
{ .name = "tkill", .errmsg = true,
.arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
+ { .name = "truncate", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
{ .name = "uname", .errmsg = true, .alias = "newuname", },
{ .name = "unlinkat", .errmsg = true,
- .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
+ .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
+ [1] = SCA_FILENAME, /* pathname */ }, },
+ { .name = "utime", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
{ .name = "utimensat", .errmsg = true,
- .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
+ .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
+ [1] = SCA_FILENAME, /* filename */ }, },
+ { .name = "utimes", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
+ { .name = "vmsplice", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
{ .name = "write", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
{ .name = "writev", .errmsg = true,
@@ -1138,7 +1299,6 @@ struct syscall {
int nr_args;
struct format_field *args;
const char *name;
- bool filtered;
bool is_exit;
struct syscall_fmt *fmt;
size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
@@ -1159,6 +1319,11 @@ static size_t fprintf_duration(unsigned long t, FILE *fp)
return printed + fprintf(fp, "): ");
}
+/**
+ * filename.ptr: The filename char pointer that will be vfs_getname'd
+ * filename.entry_str_pos: Where to insert the string translated from
+ * filename.ptr by the vfs_getname tracepoint/kprobe.
+ */
struct thread_trace {
u64 entry_time;
u64 exit_time;
@@ -1167,6 +1332,13 @@ struct thread_trace {
unsigned long pfmaj, pfmin;
char *entry_str;
double runtime_ms;
+ struct {
+ unsigned long ptr;
+ short int entry_str_pos;
+ bool pending_open;
+ unsigned int namelen;
+ char *name;
+ } filename;
struct {
int max;
char **table;
@@ -1213,6 +1385,8 @@ fail:
#define TRACE_PFMAJ (1 << 0)
#define TRACE_PFMIN (1 << 1)
+static const size_t trace__entry_str_size = 2048;
+
struct trace {
struct perf_tool tool;
struct {
@@ -1222,6 +1396,10 @@ struct trace {
struct {
int max;
struct syscall *table;
+ struct {
+ struct perf_evsel *sys_enter,
+ *sys_exit;
+ } events;
} syscalls;
struct record_opts opts;
struct perf_evlist *evlist;
@@ -1231,7 +1409,10 @@ struct trace {
FILE *output;
unsigned long nr_events;
struct strlist *ev_qualifier;
- const char *last_vfs_getname;
+ struct {
+ size_t nr;
+ int *entries;
+ } ev_qualifier_ids;
struct intlist *tid_list;
struct intlist *pid_list;
struct {
@@ -1255,6 +1436,7 @@ struct trace {
bool show_tool_stats;
bool trace_syscalls;
bool force;
+ bool vfs_getname;
int trace_pgfaults;
};
@@ -1358,6 +1540,27 @@ static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
return printed;
}
+static void thread__set_filename_pos(struct thread *thread, const char *bf,
+ unsigned long ptr)
+{
+ struct thread_trace *ttrace = thread__priv(thread);
+
+ ttrace->filename.ptr = ptr;
+ ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
+}
+
+static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ unsigned long ptr = arg->val;
+
+ if (!arg->trace->vfs_getname)
+ return scnprintf(bf, size, "%#x", ptr);
+
+ thread__set_filename_pos(arg->thread, bf, ptr);
+ return 0;
+}
+
static bool trace__filter_duration(struct trace *trace, double t)
{
return t < (trace->duration_filter * NSEC_PER_MSEC);
@@ -1432,8 +1635,12 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
if (trace->host == NULL)
return -ENOMEM;
+ if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
+ return -errno;
+
err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
- evlist->threads, trace__tool_process, false);
+ evlist->threads, trace__tool_process, false,
+ trace->opts.proc_map_timeout);
if (err)
symbol__exit();
@@ -1492,19 +1699,6 @@ static int trace__read_syscall_info(struct trace *trace, int id)
sc = trace->syscalls.table + id;
sc->name = name;
- if (trace->ev_qualifier) {
- bool in = strlist__find(trace->ev_qualifier, name) != NULL;
-
- if (!(in ^ trace->not_ev_qualifier)) {
- sc->filtered = true;
- /*
- * No need to do read tracepoint information since this will be
- * filtered out.
- */
- return 0;
- }
- }
-
sc->fmt = syscall_fmt__find(sc->name);
snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
@@ -1531,6 +1725,52 @@ static int trace__read_syscall_info(struct trace *trace, int id)
return syscall__set_arg_fmts(sc);
}
+static int trace__validate_ev_qualifier(struct trace *trace)
+{
+ int err = 0, i;
+ struct str_node *pos;
+
+ trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
+ trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
+ sizeof(trace->ev_qualifier_ids.entries[0]));
+
+ if (trace->ev_qualifier_ids.entries == NULL) {
+ fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
+ trace->output);
+ err = -EINVAL;
+ goto out;
+ }
+
+ i = 0;
+
+ strlist__for_each(pos, trace->ev_qualifier) {
+ const char *sc = pos->s;
+ int id = audit_name_to_syscall(sc, trace->audit.machine);
+
+ if (id < 0) {
+ if (err == 0) {
+ fputs("Error:\tInvalid syscall ", trace->output);
+ err = -EINVAL;
+ } else {
+ fputs(", ", trace->output);
+ }
+
+ fputs(sc, trace->output);
+ }
+
+ trace->ev_qualifier_ids.entries[i++] = id;
+ }
+
+ if (err < 0) {
+ fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
+ "\nHint:\tand: 'man syscalls'\n", trace->output);
+ zfree(&trace->ev_qualifier_ids.entries);
+ trace->ev_qualifier_ids.nr = 0;
+ }
+out:
+ return err;
+}
+
/*
* args is to be interpreted as a series of longs but we need to handle
* 8-byte unaligned accesses. args points to raw_data within the event
@@ -1712,27 +1952,24 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
void *args;
size_t printed = 0;
struct thread *thread;
- int id = perf_evsel__sc_tp_uint(evsel, id, sample);
+ int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
struct syscall *sc = trace__syscall_info(trace, evsel, id);
struct thread_trace *ttrace;
if (sc == NULL)
return -1;
- if (sc->filtered)
- return 0;
-
thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
ttrace = thread__trace(thread, trace->output);
if (ttrace == NULL)
- return -1;
+ goto out_put;
args = perf_evsel__sc_tp_ptr(evsel, args, sample);
if (ttrace->entry_str == NULL) {
- ttrace->entry_str = malloc(1024);
+ ttrace->entry_str = malloc(trace__entry_str_size);
if (!ttrace->entry_str)
- return -1;
+ goto out_put;
}
if (!trace->summary_only)
@@ -1740,9 +1977,9 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
ttrace->entry_time = sample->time;
msg = ttrace->entry_str;
- printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
+ printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
- printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
+ printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
args, trace, thread);
if (sc->is_exit) {
@@ -1750,15 +1987,20 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
fprintf(trace->output, "%-70s\n", ttrace->entry_str);
}
- } else
+ } else {
ttrace->entry_pending = true;
+ /* See trace__vfs_getname & trace__sys_exit */
+ ttrace->filename.pending_open = false;
+ }
if (trace->current != thread) {
thread__put(trace->current);
trace->current = thread__get(thread);
}
-
- return 0;
+ err = 0;
+out_put:
+ thread__put(thread);
+ return err;
}
static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
@@ -1768,29 +2010,26 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
long ret;
u64 duration = 0;
struct thread *thread;
- int id = perf_evsel__sc_tp_uint(evsel, id, sample);
+ int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
struct syscall *sc = trace__syscall_info(trace, evsel, id);
struct thread_trace *ttrace;
if (sc == NULL)
return -1;
- if (sc->filtered)
- return 0;
-
thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
ttrace = thread__trace(thread, trace->output);
if (ttrace == NULL)
- return -1;
+ goto out_put;
if (trace->summary)
thread__update_stats(ttrace, id, sample);
ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
- if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
- trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
- trace->last_vfs_getname = NULL;
+ if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
+ trace__set_fd_pathname(thread, ret, ttrace->filename.name);
+ ttrace->filename.pending_open = false;
++trace->stats.vfs_getname;
}
@@ -1835,15 +2074,66 @@ signed_print:
fputc('\n', trace->output);
out:
ttrace->entry_pending = false;
-
- return 0;
+ err = 0;
+out_put:
+ thread__put(thread);
+ return err;
}
static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
union perf_event *event __maybe_unused,
struct perf_sample *sample)
{
- trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
+ struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+ struct thread_trace *ttrace;
+ size_t filename_len, entry_str_len, to_move;
+ ssize_t remaining_space;
+ char *pos;
+ const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
+
+ if (!thread)
+ goto out;
+
+ ttrace = thread__priv(thread);
+ if (!ttrace)
+ goto out;
+
+ filename_len = strlen(filename);
+
+ if (ttrace->filename.namelen < filename_len) {
+ char *f = realloc(ttrace->filename.name, filename_len + 1);
+
+ if (f == NULL)
+ goto out;
+
+ ttrace->filename.namelen = filename_len;
+ ttrace->filename.name = f;
+ }
+
+ strcpy(ttrace->filename.name, filename);
+ ttrace->filename.pending_open = true;
+
+ if (!ttrace->filename.ptr)
+ goto out;
+
+ entry_str_len = strlen(ttrace->entry_str);
+ remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
+ if (remaining_space <= 0)
+ goto out;
+
+ if (filename_len > (size_t)remaining_space) {
+ filename += filename_len - remaining_space;
+ filename_len = remaining_space;
+ }
+
+ to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
+ pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
+ memmove(pos + filename_len, pos, to_move);
+ memcpy(pos, filename, filename_len);
+
+ ttrace->filename.ptr = 0;
+ ttrace->filename.entry_str_pos = 0;
+out:
return 0;
}
@@ -1863,6 +2153,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs
ttrace->runtime_ms += runtime_ms;
trace->runtime_ms += runtime_ms;
+ thread__put(thread);
return 0;
out_dump:
@@ -1872,6 +2163,7 @@ out_dump:
(pid_t)perf_evsel__intval(evsel, sample, "pid"),
runtime,
perf_evsel__intval(evsel, sample, "vruntime"));
+ thread__put(thread);
return 0;
}
@@ -1924,11 +2216,12 @@ static int trace__pgfault(struct trace *trace,
struct addr_location al;
char map_type = 'd';
struct thread_trace *ttrace;
+ int err = -1;
thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
ttrace = thread__trace(thread, trace->output);
if (ttrace == NULL)
- return -1;
+ goto out_put;
if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
ttrace->pfmaj++;
@@ -1936,7 +2229,7 @@ static int trace__pgfault(struct trace *trace,
ttrace->pfmin++;
if (trace->summary_only)
- return 0;
+ goto out;
thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
sample->ip, &al);
@@ -1967,8 +2260,11 @@ static int trace__pgfault(struct trace *trace,
print_location(trace->output, sample, &al, true, false);
fprintf(trace->output, " (%c%c)\n", map_type, al.level);
-
- return 0;
+out:
+ err = 0;
+out_put:
+ thread__put(thread);
+ return err;
}
static bool skip_sample(struct trace *trace, struct perf_sample *sample)
@@ -2090,19 +2386,20 @@ static int trace__record(struct trace *trace, int argc, const char **argv)
static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
-static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
+static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
{
struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
if (evsel == NULL)
- return;
+ return false;
if (perf_evsel__field(evsel, "pathname") == NULL) {
perf_evsel__delete(evsel);
- return;
+ return false;
}
evsel->handler = trace__vfs_getname;
perf_evlist__add(evlist, evsel);
+ return true;
}
static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
@@ -2159,9 +2456,68 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
}
}
+static int trace__add_syscall_newtp(struct trace *trace)
+{
+ int ret = -1;
+ struct perf_evlist *evlist = trace->evlist;
+ struct perf_evsel *sys_enter, *sys_exit;
+
+ sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
+ if (sys_enter == NULL)
+ goto out;
+
+ if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
+ goto out_delete_sys_enter;
+
+ sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
+ if (sys_exit == NULL)
+ goto out_delete_sys_enter;
+
+ if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
+ goto out_delete_sys_exit;
+
+ perf_evlist__add(evlist, sys_enter);
+ perf_evlist__add(evlist, sys_exit);
+
+ trace->syscalls.events.sys_enter = sys_enter;
+ trace->syscalls.events.sys_exit = sys_exit;
+
+ ret = 0;
+out:
+ return ret;
+
+out_delete_sys_exit:
+ perf_evsel__delete_priv(sys_exit);
+out_delete_sys_enter:
+ perf_evsel__delete_priv(sys_enter);
+ goto out;
+}
+
+static int trace__set_ev_qualifier_filter(struct trace *trace)
+{
+ int err = -1;
+ char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
+ trace->ev_qualifier_ids.nr,
+ trace->ev_qualifier_ids.entries);
+
+ if (filter == NULL)
+ goto out_enomem;
+
+ if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
+ err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
+
+ free(filter);
+out:
+ return err;
+out_enomem:
+ errno = ENOMEM;
+ goto out;
+}
+
static int trace__run(struct trace *trace, int argc, const char **argv)
{
struct perf_evlist *evlist = trace->evlist;
+ struct perf_evsel *evsel;
int err = -1, i;
unsigned long before;
const bool forks = argc > 0;
@@ -2169,13 +2525,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
trace->live = true;
- if (trace->trace_syscalls &&
- perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
- trace__sys_exit))
+ if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
goto out_error_raw_syscalls;
if (trace->trace_syscalls)
- perf_evlist__add_vfs_getname(evlist);
+ trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
@@ -2229,24 +2583,36 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
*/
if (trace->filter_pids.nr > 0)
err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
- else if (evlist->threads->map[0] == -1)
+ else if (thread_map__pid(evlist->threads, 0) == -1)
err = perf_evlist__set_filter_pid(evlist, getpid());
- if (err < 0) {
- printf("err=%d,%s\n", -err, strerror(-err));
- exit(1);
+ if (err < 0)
+ goto out_error_mem;
+
+ if (trace->ev_qualifier_ids.nr > 0) {
+ err = trace__set_ev_qualifier_filter(trace);
+ if (err < 0)
+ goto out_errno;
+
+ pr_debug("event qualifier tracepoint filter: %s\n",
+ trace->syscalls.events.sys_exit->filter);
}
+ err = perf_evlist__apply_filters(evlist, &evsel);
+ if (err < 0)
+ goto out_error_apply_filters;
+
err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
if (err < 0)
goto out_error_mmap;
+ if (!target__none(&trace->opts.target))
+ perf_evlist__enable(evlist);
+
if (forks)
perf_evlist__start_workload(evlist);
- else
- perf_evlist__enable(evlist);
- trace->multiple_threads = evlist->threads->map[0] == -1 ||
+ trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
evlist->threads->nr > 1 ||
perf_evlist__first(evlist)->attr.inherit;
again:
@@ -2272,6 +2638,11 @@ next_event:
if (interrupted)
goto out_disable;
+
+ if (done && !draining) {
+ perf_evlist__disable(evlist);
+ draining = true;
+ }
}
}
@@ -2332,10 +2703,21 @@ out_error_open:
out_error:
fprintf(trace->output, "%s\n", errbuf);
goto out_delete_evlist;
+
+out_error_apply_filters:
+ fprintf(trace->output,
+ "Failed to set filter \"%s\" on event %s with %d (%s)\n",
+ evsel->filter, perf_evsel__name(evsel), errno,
+ strerror_r(errno, errbuf, sizeof(errbuf)));
+ goto out_delete_evlist;
}
out_error_mem:
fprintf(trace->output, "Not enough memory to run!\n");
goto out_delete_evlist;
+
+out_errno:
+ fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
+ goto out_delete_evlist;
}
static int trace__replay(struct trace *trace)
@@ -2456,9 +2838,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
printed += fprintf(fp, "\n");
- printed += fprintf(fp, " syscall calls min avg max stddev\n");
- printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
- printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
+ printed += fprintf(fp, " syscall calls total min avg max stddev\n");
+ printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
+ printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
/* each int_node is a syscall */
while (inode) {
@@ -2475,8 +2857,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
sc = &trace->syscalls.table[inode->i];
printed += fprintf(fp, " %-15s", sc->name);
- printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
- n, min, avg);
+ printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
+ n, avg * n, min, avg);
printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
}
@@ -2646,8 +3028,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
.user_interval = ULLONG_MAX,
.no_buffering = true,
.mmap_pages = UINT_MAX,
+ .proc_map_timeout = 500,
},
- .output = stdout,
+ .output = stderr,
.show_comm = true,
.trace_syscalls = true,
};
@@ -2660,16 +3043,15 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "comm", &trace.show_comm,
"show the thread COMM next to its id"),
OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
- OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
- "list of events to trace"),
+ OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
OPT_STRING('o', "output", &output_name, "file", "output file name"),
OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
"trace events on existing process id"),
OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
"trace events on existing thread id"),
- OPT_CALLBACK(0, "filter-pids", &trace, "float",
- "show only events with duration > N.M ms", trace__set_filter_pids),
+ OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
+ "pids to filter (by the kernel)", trace__set_filter_pids),
OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
"system-wide collection from all CPUs"),
OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
@@ -2696,6 +3078,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
"Trace pagefaults", parse_pagefaults, "maj"),
OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
+ OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
+ "per thread proc mmap processing timeout in ms"),
OPT_END()
};
const char * const trace_subcommands[] = { "record", NULL };
@@ -2706,11 +3090,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
signal(SIGFPE, sighandler_dump_stack);
trace.evlist = perf_evlist__new();
- if (trace.evlist == NULL)
- return -ENOMEM;
if (trace.evlist == NULL) {
pr_err("Not enough memory to run!\n");
+ err = -ENOMEM;
goto out;
}
@@ -2748,17 +3131,24 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
if (ev_qualifier_str != NULL) {
const char *s = ev_qualifier_str;
+ struct strlist_config slist_config = {
+ .dirname = system_path(STRACE_GROUPS_DIR),
+ };
trace.not_ev_qualifier = *s == '!';
if (trace.not_ev_qualifier)
++s;
- trace.ev_qualifier = strlist__new(true, s);
+ trace.ev_qualifier = strlist__new(s, &slist_config);
if (trace.ev_qualifier == NULL) {
fputs("Not enough memory to parse event qualifier",
trace.output);
err = -ENOMEM;
goto out_close;
}
+
+ err = trace__validate_ev_qualifier(&trace);
+ if (err)
+ goto out_close;
}
err = target__validate(&trace.opts.target);
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 59a98c6..827557f 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -11,9 +11,9 @@ ifneq ($(obj-perf),)
obj-perf := $(abspath $(obj-perf))/
endif
-$(shell echo -n > .config-detected)
-detected = $(shell echo "$(1)=y" >> .config-detected)
-detected_var = $(shell echo "$(1)=$($(1))" >> .config-detected)
+$(shell printf "" > $(OUTPUT).config-detected)
+detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected)
+detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected)
CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS)
@@ -32,7 +32,7 @@ ifeq ($(ARCH),x86)
LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
$(call detected,CONFIG_X86_64)
else
- LIBUNWIND_LIBS = -lunwind -lunwind-x86
+ LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind
endif
NO_PERF_REGS := 0
endif
@@ -130,6 +130,8 @@ endif
ifeq ($(DEBUG),0)
CFLAGS += -O6
+else
+ CFLAGS += $(call cc-option,-Og,-O0)
endif
ifdef PARSER_DEBUG
@@ -268,6 +270,10 @@ else
endif # libelf support
endif # NO_LIBELF
+ifdef NO_DWARF
+ NO_LIBDW_DWARF_UNWIND := 1
+endif
+
ifndef NO_LIBELF
CFLAGS += -DHAVE_LIBELF_SUPPORT
EXTLIBS += -lelf
@@ -291,7 +297,11 @@ ifndef NO_LIBELF
else
CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
LDFLAGS += $(LIBDW_LDFLAGS)
- EXTLIBS += -ldw
+ DWARFLIBS := -ldw
+ ifeq ($(findstring -static,${LDFLAGS}),-static)
+ DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
+ endif
+ EXTLIBS += ${DWARFLIBS}
$(call detected,CONFIG_DWARF)
endif # PERF_HAVE_DWARF_REGS
endif # NO_DWARF
@@ -610,6 +620,11 @@ ifdef LIBBABELTRACE
endif
endif
+ifndef NO_AUXTRACE
+ $(call detected,CONFIG_AUXTRACE)
+ CFLAGS += -DHAVE_AUXTRACE_SUPPORT
+endif
+
# Among the variables below, these:
# perfexecdir
# template_dir
@@ -627,12 +642,13 @@ ifndef DESTDIR
prefix ?= $(HOME)
endif
bindir_relative = bin
-bindir = $(prefix)/$(bindir_relative)
+bindir = $(abspath $(prefix)/$(bindir_relative))
mandir = share/man
infodir = share/info
perfexecdir = libexec/perf-core
sharedir = $(prefix)/share
template_dir = share/perf-core/templates
+STRACE_GROUPS_DIR = share/perf-core/strace/groups
htmldir = share/doc/perf-doc
ifeq ($(prefix),/usr)
sysconfdir = /etc
@@ -652,6 +668,7 @@ libdir = $(prefix)/$(lib)
# Shell quote (do not use $(call) to accommodate ancient setups);
ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
+STRACE_GROUPS_DIR_SQ = $(subst ','\'',$(STRACE_GROUPS_DIR))
DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
bindir_SQ = $(subst ','\'',$(bindir))
mandir_SQ = $(subst ','\'',$(mandir))
@@ -665,10 +682,13 @@ libdir_SQ = $(subst ','\'',$(libdir))
ifneq ($(filter /%,$(firstword $(perfexecdir))),)
perfexec_instdir = $(perfexecdir)
+STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR)
else
perfexec_instdir = $(prefix)/$(perfexecdir)
+STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR)
endif
perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
+STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR))
# If we install to $(HOME) we keep the traceevent default:
# $(HOME)/.traceevent/plugins
@@ -702,6 +722,7 @@ $(call detected_var,htmldir_SQ)
$(call detected_var,infodir_SQ)
$(call detected_var,mandir_SQ)
$(call detected_var,ETC_PERFCONFIG_SQ)
+$(call detected_var,STRACE_GROUPS_DIR_SQ)
$(call detected_var,prefix_SQ)
$(call detected_var,perfexecdir_SQ)
$(call detected_var,LIBDIR)
diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak
index c16ce83..0ebef09 100644
--- a/tools/perf/config/utilities.mak
+++ b/tools/perf/config/utilities.mak
@@ -177,3 +177,22 @@ $(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
endef
_ge_attempt = $(if $(get-executable),$(get-executable),$(call _gea_err,$(2)))
_gea_err = $(if $(1),$(error Please set '$(1)' appropriately))
+
+# try-run
+# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
+# Exit code chooses option. "$$TMP" is can be used as temporary file and
+# is automatically cleaned up.
+try-run = $(shell set -e; \
+ TMP="$(TMPOUT).$$$$.tmp"; \
+ TMPO="$(TMPOUT).$$$$.o"; \
+ if ($(1)) >/dev/null 2>&1; \
+ then echo "$(2)"; \
+ else echo "$(3)"; \
+ fi; \
+ rm -f "$$TMP" "$$TMPO")
+
+# cc-option
+# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
+
+cc-option = $(call try-run,\
+ $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 6ef6816..83a25ce 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -6,11 +6,9 @@
#include <sys/syscall.h>
#include <linux/types.h>
#include <linux/perf_event.h>
+#include <asm/barrier.h>
#if defined(__i386__)
-#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
-#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
-#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define CPUINFO_PROC {"model name"}
#ifndef __NR_perf_event_open
@@ -25,9 +23,6 @@
#endif
#if defined(__x86_64__)
-#define mb() asm volatile("mfence" ::: "memory")
-#define wmb() asm volatile("sfence" ::: "memory")
-#define rmb() asm volatile("lfence" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define CPUINFO_PROC {"model name"}
#ifndef __NR_perf_event_open
@@ -43,129 +38,63 @@
#ifdef __powerpc__
#include "../../arch/powerpc/include/uapi/asm/unistd.h"
-#define mb() asm volatile ("sync" ::: "memory")
-#define wmb() asm volatile ("sync" ::: "memory")
-#define rmb() asm volatile ("sync" ::: "memory")
#define CPUINFO_PROC {"cpu"}
#endif
#ifdef __s390__
-#define mb() asm volatile("bcr 15,0" ::: "memory")
-#define wmb() asm volatile("bcr 15,0" ::: "memory")
-#define rmb() asm volatile("bcr 15,0" ::: "memory")
#define CPUINFO_PROC {"vendor_id"}
#endif
#ifdef __sh__
-#if defined(__SH4A__) || defined(__SH5__)
-# define mb() asm volatile("synco" ::: "memory")
-# define wmb() asm volatile("synco" ::: "memory")
-# define rmb() asm volatile("synco" ::: "memory")
-#else
-# define mb() asm volatile("" ::: "memory")
-# define wmb() asm volatile("" ::: "memory")
-# define rmb() asm volatile("" ::: "memory")
-#endif
#define CPUINFO_PROC {"cpu type"}
#endif
#ifdef __hppa__
-#define mb() asm volatile("" ::: "memory")
-#define wmb() asm volatile("" ::: "memory")
-#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC {"cpu"}
#endif
#ifdef __sparc__
-#ifdef __LP64__
-#define mb() asm volatile("ba,pt %%xcc, 1f\n" \
- "membar #StoreLoad\n" \
- "1:\n":::"memory")
-#else
-#define mb() asm volatile("":::"memory")
-#endif
-#define wmb() asm volatile("":::"memory")
-#define rmb() asm volatile("":::"memory")
#define CPUINFO_PROC {"cpu"}
#endif
#ifdef __alpha__
-#define mb() asm volatile("mb" ::: "memory")
-#define wmb() asm volatile("wmb" ::: "memory")
-#define rmb() asm volatile("mb" ::: "memory")
#define CPUINFO_PROC {"cpu model"}
#endif
#ifdef __ia64__
-#define mb() asm volatile ("mf" ::: "memory")
-#define wmb() asm volatile ("mf" ::: "memory")
-#define rmb() asm volatile ("mf" ::: "memory")
#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
#define CPUINFO_PROC {"model name"}
#endif
#ifdef __arm__
-/*
- * Use the __kuser_memory_barrier helper in the CPU helper page. See
- * arch/arm/kernel/entry-armv.S in the kernel source for details.
- */
-#define mb() ((void(*)(void))0xffff0fa0)()
-#define wmb() ((void(*)(void))0xffff0fa0)()
-#define rmb() ((void(*)(void))0xffff0fa0)()
#define CPUINFO_PROC {"model name", "Processor"}
#endif
#ifdef __aarch64__
-#define mb() asm volatile("dmb ish" ::: "memory")
-#define wmb() asm volatile("dmb ishst" ::: "memory")
-#define rmb() asm volatile("dmb ishld" ::: "memory")
#define cpu_relax() asm volatile("yield" ::: "memory")
#endif
#ifdef __mips__
-#define mb() asm volatile( \
- ".set mips2\n\t" \
- "sync\n\t" \
- ".set mips0" \
- : /* no output */ \
- : /* no input */ \
- : "memory")
-#define wmb() mb()
-#define rmb() mb()
#define CPUINFO_PROC {"cpu model"}
#endif
#ifdef __arc__
-#define mb() asm volatile("" ::: "memory")
-#define wmb() asm volatile("" ::: "memory")
-#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC {"Processor"}
#endif
#ifdef __metag__
-#define mb() asm volatile("" ::: "memory")
-#define wmb() asm volatile("" ::: "memory")
-#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC {"CPU"}
#endif
#ifdef __xtensa__
-#define mb() asm volatile("memw" ::: "memory")
-#define wmb() asm volatile("memw" ::: "memory")
-#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC {"core ID"}
#endif
#ifdef __tile__
-#define mb() asm volatile ("mf" ::: "memory")
-#define wmb() asm volatile ("mf" ::: "memory")
-#define rmb() asm volatile ("mf" ::: "memory")
#define cpu_relax() asm volatile ("mfspr zero, PASS" ::: "memory")
#define CPUINFO_PROC {"model name"}
#endif
-#define barrier() asm volatile ("" ::: "memory")
-
#ifndef cpu_relax
#define cpu_relax() barrier()
#endif
diff --git a/tools/perf/perf-with-kcore.sh b/tools/perf/perf-with-kcore.sh
index c7ff90a..7e47a7c 100644
--- a/tools/perf/perf-with-kcore.sh
+++ b/tools/perf/perf-with-kcore.sh
@@ -50,7 +50,7 @@ copy_kcore()
fi
rm -f perf.data.junk
- ("$PERF" record -o perf.data.junk $PERF_OPTIONS -- sleep 60) >/dev/null 2>/dev/null &
+ ("$PERF" record -o perf.data.junk "${PERF_OPTIONS[@]}" -- sleep 60) >/dev/null 2>/dev/null &
PERF_PID=$!
# Need to make sure that perf has started
@@ -160,18 +160,18 @@ record()
echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2
fi
- if echo "$PERF_OPTIONS" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
+ if echo "${PERF_OPTIONS[@]}" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2
fi
- if echo "$PERF_OPTIONS" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
+ if echo "${PERF_OPTIONS[@]}" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then
echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2
fi
- if echo "$PERF_OPTIONS" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
+ if echo "${PERF_OPTIONS[@]}" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
true
- elif echo "$PERF_OPTIONS" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
+ elif echo "${PERF_OPTIONS[@]}" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
true
elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then
echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2
@@ -193,8 +193,8 @@ record()
mkdir "$PERF_DATA_DIR"
- echo "$PERF record -o $PERF_DATA_DIR/perf.data $PERF_OPTIONS -- $*"
- "$PERF" record -o "$PERF_DATA_DIR/perf.data" $PERF_OPTIONS -- $* || true
+ echo "$PERF record -o $PERF_DATA_DIR/perf.data ${PERF_OPTIONS[@]} -- $@"
+ "$PERF" record -o "$PERF_DATA_DIR/perf.data" "${PERF_OPTIONS[@]}" -- "$@" || true
if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then
exit 1
@@ -209,8 +209,8 @@ subcommand()
{
find_perf
check_buildid_cache_permissions
- echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $*"
- "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" $*
+ echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $@"
+ "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" "$@"
}
if [ "$1" = "fix_buildid_cache_permissions" ] ; then
@@ -234,7 +234,7 @@ fi
case "$PERF_SUB_COMMAND" in
"record")
while [ "$1" != "--" ] ; do
- PERF_OPTIONS+="$1 "
+ PERF_OPTIONS+=("$1")
shift || break
done
if [ "$1" != "--" ] ; then
@@ -242,16 +242,16 @@ case "$PERF_SUB_COMMAND" in
usage
fi
shift
- record $*
+ record "$@"
;;
"script")
- subcommand $*
+ subcommand "$@"
;;
"report")
- subcommand $*
+ subcommand "$@"
;;
"inject")
- subcommand $*
+ subcommand "$@"
;;
*)
usage
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index b857fcb..07dbff5 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -231,7 +231,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
(*argc)--;
} else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) {
perf_debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR));
- fprintf(stderr, "dir: %s\n", debugfs_mountpoint);
+ fprintf(stderr, "dir: %s\n", tracing_path);
if (envchanged)
*envchanged = 1;
} else if (!strcmp(cmd, "--list-cmds")) {
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index e14bb63..90129ac 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -51,19 +51,28 @@ struct record_opts {
bool sample_address;
bool sample_weight;
bool sample_time;
+ bool sample_time_set;
+ bool callgraph_set;
bool period;
- bool sample_intr_regs;
bool running_time;
+ bool full_auxtrace;
+ bool auxtrace_snapshot_mode;
+ bool record_switch_events;
unsigned int freq;
unsigned int mmap_pages;
+ unsigned int auxtrace_mmap_pages;
unsigned int user_freq;
u64 branch_stack;
+ u64 sample_intr_regs;
u64 default_interval;
u64 user_interval;
+ size_t auxtrace_snapshot_size;
+ const char *auxtrace_snapshot_opts;
bool sample_transaction;
unsigned initial_delay;
bool use_clockid;
clockid_t clockid;
+ unsigned int proc_map_timeout;
};
struct option;
diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py
index 2225162..b9d5083 100755
--- a/tools/perf/python/twatch.py
+++ b/tools/perf/python/twatch.py
@@ -18,10 +18,20 @@ import perf
def main():
cpus = perf.cpu_map()
threads = perf.thread_map()
- evsel = perf.evsel(task = 1, comm = 1, mmap = 0,
+ evsel = perf.evsel(type = perf.TYPE_SOFTWARE,
+ config = perf.COUNT_SW_DUMMY,
+ task = 1, comm = 1, mmap = 0, freq = 0,
wakeup_events = 1, watermark = 1,
sample_id_all = 1,
sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU)
+
+ """What we want are just the PERF_RECORD_ lifetime events for threads,
+ using the default, PERF_TYPE_HARDWARE + PERF_COUNT_HW_CYCLES & freq=1
+ (the default), makes perf reenable irq_vectors:local_timer_entry, when
+ disabling nohz, not good for some use cases where all we want is to get
+ threads comes and goes... So use (perf.TYPE_SOFTWARE, perf_COUNT_SW_DUMMY,
+ freq=0) instead."""
+
evsel.open(cpus = cpus, threads = threads);
evlist = perf.evlist(cpus, threads)
evlist.add(evsel)
diff --git a/tools/perf/scripts/python/bin/compaction-times-record b/tools/perf/scripts/python/bin/compaction-times-record
new file mode 100644
index 0000000..6edcd40
--- /dev/null
+++ b/tools/perf/scripts/python/bin/compaction-times-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -e compaction:mm_compaction_begin -e compaction:mm_compaction_end -e compaction:mm_compaction_migratepages -e compaction:mm_compaction_isolate_migratepages -e compaction:mm_compaction_isolate_freepages $@
diff --git a/tools/perf/scripts/python/bin/compaction-times-report b/tools/perf/scripts/python/bin/compaction-times-report
new file mode 100644
index 0000000..3dc1389
--- /dev/null
+++ b/tools/perf/scripts/python/bin/compaction-times-report
@@ -0,0 +1,4 @@
+#!/bin/bash
+#description: display time taken by mm compaction
+#args: [-h] [-u] [-p|-pv] [-t | [-m] [-fs] [-ms]] [pid|pid-range|comm-regex]
+perf script -s "$PERF_EXEC_PATH"/scripts/python/compaction-times.py $@
diff --git a/tools/perf/scripts/python/call-graph-from-postgresql.py b/tools/perf/scripts/python/call-graph-from-postgresql.py
new file mode 100644
index 0000000..e78fdc2
--- /dev/null
+++ b/tools/perf/scripts/python/call-graph-from-postgresql.py
@@ -0,0 +1,327 @@
+#!/usr/bin/python2
+# call-graph-from-postgresql.py: create call-graph from postgresql database
+# Copyright (c) 2014, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+
+# To use this script you will need to have exported data using the
+# export-to-postgresql.py script. Refer to that script for details.
+#
+# Following on from the example in the export-to-postgresql.py script, a
+# call-graph can be displayed for the pt_example database like this:
+#
+# python tools/perf/scripts/python/call-graph-from-postgresql.py pt_example
+#
+# Note this script supports connecting to remote databases by setting hostname,
+# port, username, password, and dbname e.g.
+#
+# python tools/perf/scripts/python/call-graph-from-postgresql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example"
+#
+# The result is a GUI window with a tree representing a context-sensitive
+# call-graph. Expanding a couple of levels of the tree and adjusting column
+# widths to suit will display something like:
+#
+# Call Graph: pt_example
+# Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%)
+# v- ls
+# v- 2638:2638
+# v- _start ld-2.19.so 1 10074071 100.0 211135 100.0
+# |- unknown unknown 1 13198 0.1 1 0.0
+# >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3
+# >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3
+# v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4
+# >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1
+# >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0
+# >- __libc_csu_init ls 1 10354 0.1 10 0.0
+# |- _setjmp libc-2.19.so 1 0 0.0 4 0.0
+# v- main ls 1 8182043 99.6 180254 99.9
+#
+# Points to note:
+# The top level is a command name (comm)
+# The next level is a thread (pid:tid)
+# Subsequent levels are functions
+# 'Count' is the number of calls
+# 'Time' is the elapsed time until the function returns
+# Percentages are relative to the level above
+# 'Branch Count' is the total number of branches for that function and all
+# functions that it calls
+
+import sys
+from PySide.QtCore import *
+from PySide.QtGui import *
+from PySide.QtSql import *
+from decimal import *
+
+class TreeItem():
+
+ def __init__(self, db, row, parent_item):
+ self.db = db
+ self.row = row
+ self.parent_item = parent_item
+ self.query_done = False;
+ self.child_count = 0
+ self.child_items = []
+ self.data = ["", "", "", "", "", "", ""]
+ self.comm_id = 0
+ self.thread_id = 0
+ self.call_path_id = 1
+ self.branch_count = 0
+ self.time = 0
+ if not parent_item:
+ self.setUpRoot()
+
+ def setUpRoot(self):
+ self.query_done = True
+ query = QSqlQuery(self.db)
+ ret = query.exec_('SELECT id, comm FROM comms')
+ if not ret:
+ raise Exception("Query failed: " + query.lastError().text())
+ while query.next():
+ if not query.value(0):
+ continue
+ child_item = TreeItem(self.db, self.child_count, self)
+ self.child_items.append(child_item)
+ self.child_count += 1
+ child_item.setUpLevel1(query.value(0), query.value(1))
+
+ def setUpLevel1(self, comm_id, comm):
+ self.query_done = True;
+ self.comm_id = comm_id
+ self.data[0] = comm
+ self.child_items = []
+ self.child_count = 0
+ query = QSqlQuery(self.db)
+ ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id))
+ if not ret:
+ raise Exception("Query failed: " + query.lastError().text())
+ while query.next():
+ child_item = TreeItem(self.db, self.child_count, self)
+ self.child_items.append(child_item)
+ self.child_count += 1
+ child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2))
+
+ def setUpLevel2(self, comm_id, thread_id, pid, tid):
+ self.comm_id = comm_id
+ self.thread_id = thread_id
+ self.data[0] = str(pid) + ":" + str(tid)
+
+ def getChildItem(self, row):
+ return self.child_items[row]
+
+ def getParentItem(self):
+ return self.parent_item
+
+ def getRow(self):
+ return self.row
+
+ def timePercent(self, b):
+ if not self.time:
+ return "0.0"
+ x = (b * Decimal(100)) / self.time
+ return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
+
+ def branchPercent(self, b):
+ if not self.branch_count:
+ return "0.0"
+ x = (b * Decimal(100)) / self.branch_count
+ return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
+
+ def addChild(self, call_path_id, name, dso, count, time, branch_count):
+ child_item = TreeItem(self.db, self.child_count, self)
+ child_item.comm_id = self.comm_id
+ child_item.thread_id = self.thread_id
+ child_item.call_path_id = call_path_id
+ child_item.branch_count = branch_count
+ child_item.time = time
+ child_item.data[0] = name
+ if dso == "[kernel.kallsyms]":
+ dso = "[kernel]"
+ child_item.data[1] = dso
+ child_item.data[2] = str(count)
+ child_item.data[3] = str(time)
+ child_item.data[4] = self.timePercent(time)
+ child_item.data[5] = str(branch_count)
+ child_item.data[6] = self.branchPercent(branch_count)
+ self.child_items.append(child_item)
+ self.child_count += 1
+
+ def selectCalls(self):
+ self.query_done = True;
+ query = QSqlQuery(self.db)
+ ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, '
+ '( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), '
+ '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), '
+ '( SELECT ip FROM call_paths where id = call_path_id ) '
+ 'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) +
+ 'ORDER BY call_path_id')
+ if not ret:
+ raise Exception("Query failed: " + query.lastError().text())
+ last_call_path_id = 0
+ name = ""
+ dso = ""
+ count = 0
+ branch_count = 0
+ total_branch_count = 0
+ time = 0
+ total_time = 0
+ while query.next():
+ if query.value(1) == last_call_path_id:
+ count += 1
+ branch_count += query.value(2)
+ time += query.value(4) - query.value(3)
+ else:
+ if count:
+ self.addChild(last_call_path_id, name, dso, count, time, branch_count)
+ last_call_path_id = query.value(1)
+ name = query.value(5)
+ dso = query.value(6)
+ count = 1
+ total_branch_count += branch_count
+ total_time += time
+ branch_count = query.value(2)
+ time = query.value(4) - query.value(3)
+ if count:
+ self.addChild(last_call_path_id, name, dso, count, time, branch_count)
+ total_branch_count += branch_count
+ total_time += time
+ # Top level does not have time or branch count, so fix that here
+ if total_branch_count > self.branch_count:
+ self.branch_count = total_branch_count
+ if self.branch_count:
+ for child_item in self.child_items:
+ child_item.data[6] = self.branchPercent(child_item.branch_count)
+ if total_time > self.time:
+ self.time = total_time
+ if self.time:
+ for child_item in self.child_items:
+ child_item.data[4] = self.timePercent(child_item.time)
+
+ def childCount(self):
+ if not self.query_done:
+ self.selectCalls()
+ return self.child_count
+
+ def columnCount(self):
+ return 7
+
+ def columnHeader(self, column):
+ headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
+ return headers[column]
+
+ def getData(self, column):
+ return self.data[column]
+
+class TreeModel(QAbstractItemModel):
+
+ def __init__(self, db, parent=None):
+ super(TreeModel, self).__init__(parent)
+ self.db = db
+ self.root = TreeItem(db, 0, None)
+
+ def columnCount(self, parent):
+ return self.root.columnCount()
+
+ def rowCount(self, parent):
+ if parent.isValid():
+ parent_item = parent.internalPointer()
+ else:
+ parent_item = self.root
+ return parent_item.childCount()
+
+ def headerData(self, section, orientation, role):
+ if role == Qt.TextAlignmentRole:
+ if section > 1:
+ return Qt.AlignRight
+ if role != Qt.DisplayRole:
+ return None
+ if orientation != Qt.Horizontal:
+ return None
+ return self.root.columnHeader(section)
+
+ def parent(self, child):
+ child_item = child.internalPointer()
+ if child_item is self.root:
+ return QModelIndex()
+ parent_item = child_item.getParentItem()
+ return self.createIndex(parent_item.getRow(), 0, parent_item)
+
+ def index(self, row, column, parent):
+ if parent.isValid():
+ parent_item = parent.internalPointer()
+ else:
+ parent_item = self.root
+ child_item = parent_item.getChildItem(row)
+ return self.createIndex(row, column, child_item)
+
+ def data(self, index, role):
+ if role == Qt.TextAlignmentRole:
+ if index.column() > 1:
+ return Qt.AlignRight
+ if role != Qt.DisplayRole:
+ return None
+ index_item = index.internalPointer()
+ return index_item.getData(index.column())
+
+class MainWindow(QMainWindow):
+
+ def __init__(self, db, dbname, parent=None):
+ super(MainWindow, self).__init__(parent)
+
+ self.setObjectName("MainWindow")
+ self.setWindowTitle("Call Graph: " + dbname)
+ self.move(100, 100)
+ self.resize(800, 600)
+ style = self.style()
+ icon = style.standardIcon(QStyle.SP_MessageBoxInformation)
+ self.setWindowIcon(icon);
+
+ self.model = TreeModel(db)
+
+ self.view = QTreeView()
+ self.view.setModel(self.model)
+
+ self.setCentralWidget(self.view)
+
+if __name__ == '__main__':
+ if (len(sys.argv) < 2):
+ print >> sys.stderr, "Usage is: call-graph-from-postgresql.py <database name>"
+ raise Exception("Too few arguments")
+
+ dbname = sys.argv[1]
+
+ db = QSqlDatabase.addDatabase('QPSQL')
+
+ opts = dbname.split()
+ for opt in opts:
+ if '=' in opt:
+ opt = opt.split('=')
+ if opt[0] == 'hostname':
+ db.setHostName(opt[1])
+ elif opt[0] == 'port':
+ db.setPort(int(opt[1]))
+ elif opt[0] == 'username':
+ db.setUserName(opt[1])
+ elif opt[0] == 'password':
+ db.setPassword(opt[1])
+ elif opt[0] == 'dbname':
+ dbname = opt[1]
+ else:
+ dbname = opt
+
+ db.setDatabaseName(dbname)
+ if not db.open():
+ raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
+
+ app = QApplication(sys.argv)
+ window = MainWindow(db, dbname)
+ window.show()
+ err = app.exec_()
+ db.close()
+ sys.exit(err)
diff --git a/tools/perf/scripts/python/compaction-times.py b/tools/perf/scripts/python/compaction-times.py
new file mode 100644
index 0000000..239cb05
--- /dev/null
+++ b/tools/perf/scripts/python/compaction-times.py
@@ -0,0 +1,311 @@
+# report time spent in compaction
+# Licensed under the terms of the GNU GPL License version 2
+
+# testing:
+# 'echo 1 > /proc/sys/vm/compact_memory' to force compaction of all zones
+
+import os
+import sys
+import re
+
+import signal
+signal.signal(signal.SIGPIPE, signal.SIG_DFL)
+
+usage = "usage: perf script report compaction-times.py -- [-h] [-u] [-p|-pv] [-t | [-m] [-fs] [-ms]] [pid|pid-range|comm-regex]\n"
+
+class popt:
+ DISP_DFL = 0
+ DISP_PROC = 1
+ DISP_PROC_VERBOSE=2
+
+class topt:
+ DISP_TIME = 0
+ DISP_MIG = 1
+ DISP_ISOLFREE = 2
+ DISP_ISOLMIG = 4
+ DISP_ALL = 7
+
+class comm_filter:
+ def __init__(self, re):
+ self.re = re
+
+ def filter(self, pid, comm):
+ m = self.re.search(comm)
+ return m == None or m.group() == ""
+
+class pid_filter:
+ def __init__(self, low, high):
+ self.low = (0 if low == "" else int(low))
+ self.high = (0 if high == "" else int(high))
+
+ def filter(self, pid, comm):
+ return not (pid >= self.low and (self.high == 0 or pid <= self.high))
+
+def set_type(t):
+ global opt_disp
+ opt_disp = (t if opt_disp == topt.DISP_ALL else opt_disp|t)
+
+def ns(sec, nsec):
+ return (sec * 1000000000) + nsec
+
+def time(ns):
+ return "%dns" % ns if opt_ns else "%dus" % (round(ns, -3) / 1000)
+
+class pair:
+ def __init__(self, aval, bval, alabel = None, blabel = None):
+ self.alabel = alabel
+ self.blabel = blabel
+ self.aval = aval
+ self.bval = bval
+
+ def __add__(self, rhs):
+ self.aval += rhs.aval
+ self.bval += rhs.bval
+ return self
+
+ def __str__(self):
+ return "%s=%d %s=%d" % (self.alabel, self.aval, self.blabel, self.bval)
+
+class cnode:
+ def __init__(self, ns):
+ self.ns = ns
+ self.migrated = pair(0, 0, "moved", "failed")
+ self.fscan = pair(0,0, "scanned", "isolated")
+ self.mscan = pair(0,0, "scanned", "isolated")
+
+ def __add__(self, rhs):
+ self.ns += rhs.ns
+ self.migrated += rhs.migrated
+ self.fscan += rhs.fscan
+ self.mscan += rhs.mscan
+ return self
+
+ def __str__(self):
+ prev = 0
+ s = "%s " % time(self.ns)
+ if (opt_disp & topt.DISP_MIG):
+ s += "migration: %s" % self.migrated
+ prev = 1
+ if (opt_disp & topt.DISP_ISOLFREE):
+ s += "%sfree_scanner: %s" % (" " if prev else "", self.fscan)
+ prev = 1
+ if (opt_disp & topt.DISP_ISOLMIG):
+ s += "%smigration_scanner: %s" % (" " if prev else "", self.mscan)
+ return s
+
+ def complete(self, secs, nsecs):
+ self.ns = ns(secs, nsecs) - self.ns
+
+ def increment(self, migrated, fscan, mscan):
+ if (migrated != None):
+ self.migrated += migrated
+ if (fscan != None):
+ self.fscan += fscan
+ if (mscan != None):
+ self.mscan += mscan
+
+
+class chead:
+ heads = {}
+ val = cnode(0);
+ fobj = None
+
+ @classmethod
+ def add_filter(cls, filter):
+ cls.fobj = filter
+
+ @classmethod
+ def create_pending(cls, pid, comm, start_secs, start_nsecs):
+ filtered = 0
+ try:
+ head = cls.heads[pid]
+ filtered = head.is_filtered()
+ except KeyError:
+ if cls.fobj != None:
+ filtered = cls.fobj.filter(pid, comm)
+ head = cls.heads[pid] = chead(comm, pid, filtered)
+
+ if not filtered:
+ head.mark_pending(start_secs, start_nsecs)
+
+ @classmethod
+ def increment_pending(cls, pid, migrated, fscan, mscan):
+ head = cls.heads[pid]
+ if not head.is_filtered():
+ if head.is_pending():
+ head.do_increment(migrated, fscan, mscan)
+ else:
+ sys.stderr.write("missing start compaction event for pid %d\n" % pid)
+
+ @classmethod
+ def complete_pending(cls, pid, secs, nsecs):
+ head = cls.heads[pid]
+ if not head.is_filtered():
+ if head.is_pending():
+ head.make_complete(secs, nsecs)
+ else:
+ sys.stderr.write("missing start compaction event for pid %d\n" % pid)
+
+ @classmethod
+ def gen(cls):
+ if opt_proc != popt.DISP_DFL:
+ for i in cls.heads:
+ yield cls.heads[i]
+
+ @classmethod
+ def str(cls):
+ return cls.val
+
+ def __init__(self, comm, pid, filtered):
+ self.comm = comm
+ self.pid = pid
+ self.val = cnode(0)
+ self.pending = None
+ self.filtered = filtered
+ self.list = []
+
+ def __add__(self, rhs):
+ self.ns += rhs.ns
+ self.val += rhs.val
+ return self
+
+ def mark_pending(self, secs, nsecs):
+ self.pending = cnode(ns(secs, nsecs))
+
+ def do_increment(self, migrated, fscan, mscan):
+ self.pending.increment(migrated, fscan, mscan)
+
+ def make_complete(self, secs, nsecs):
+ self.pending.complete(secs, nsecs)
+ chead.val += self.pending
+
+ if opt_proc != popt.DISP_DFL:
+ self.val += self.pending
+
+ if opt_proc == popt.DISP_PROC_VERBOSE:
+ self.list.append(self.pending)
+ self.pending = None
+
+ def enumerate(self):
+ if opt_proc == popt.DISP_PROC_VERBOSE and not self.is_filtered():
+ for i, pelem in enumerate(self.list):
+ sys.stdout.write("%d[%s].%d: %s\n" % (self.pid, self.comm, i+1, pelem))
+
+ def is_pending(self):
+ return self.pending != None
+
+ def is_filtered(self):
+ return self.filtered
+
+ def display(self):
+ if not self.is_filtered():
+ sys.stdout.write("%d[%s]: %s\n" % (self.pid, self.comm, self.val))
+
+
+def trace_end():
+ sys.stdout.write("total: %s\n" % chead.str())
+ for i in chead.gen():
+ i.display(),
+ i.enumerate()
+
+def compaction__mm_compaction_migratepages(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, nr_migrated, nr_failed):
+
+ chead.increment_pending(common_pid,
+ pair(nr_migrated, nr_failed), None, None)
+
+def compaction__mm_compaction_isolate_freepages(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, start_pfn, end_pfn, nr_scanned, nr_taken):
+
+ chead.increment_pending(common_pid,
+ None, pair(nr_scanned, nr_taken), None)
+
+def compaction__mm_compaction_isolate_migratepages(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, start_pfn, end_pfn, nr_scanned, nr_taken):
+
+ chead.increment_pending(common_pid,
+ None, None, pair(nr_scanned, nr_taken))
+
+def compaction__mm_compaction_end(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, zone_start, migrate_start, free_start, zone_end,
+ sync, status):
+
+ chead.complete_pending(common_pid, common_secs, common_nsecs)
+
+def compaction__mm_compaction_begin(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, zone_start, migrate_start, free_start, zone_end,
+ sync):
+
+ chead.create_pending(common_pid, common_comm, common_secs, common_nsecs)
+
+def pr_help():
+ global usage
+
+ sys.stdout.write(usage)
+ sys.stdout.write("\n")
+ sys.stdout.write("-h display this help\n")
+ sys.stdout.write("-p display by process\n")
+ sys.stdout.write("-pv display by process (verbose)\n")
+ sys.stdout.write("-t display stall times only\n")
+ sys.stdout.write("-m display stats for migration\n")
+ sys.stdout.write("-fs display stats for free scanner\n")
+ sys.stdout.write("-ms display stats for migration scanner\n")
+ sys.stdout.write("-u display results in microseconds (default nanoseconds)\n")
+
+
+comm_re = None
+pid_re = None
+pid_regex = "^(\d*)-(\d*)$|^(\d*)$"
+
+opt_proc = popt.DISP_DFL
+opt_disp = topt.DISP_ALL
+
+opt_ns = True
+
+argc = len(sys.argv) - 1
+if argc >= 1:
+ pid_re = re.compile(pid_regex)
+
+ for i, opt in enumerate(sys.argv[1:]):
+ if opt[0] == "-":
+ if opt == "-h":
+ pr_help()
+ exit(0);
+ elif opt == "-p":
+ opt_proc = popt.DISP_PROC
+ elif opt == "-pv":
+ opt_proc = popt.DISP_PROC_VERBOSE
+ elif opt == '-u':
+ opt_ns = False
+ elif opt == "-t":
+ set_type(topt.DISP_TIME)
+ elif opt == "-m":
+ set_type(topt.DISP_MIG)
+ elif opt == "-fs":
+ set_type(topt.DISP_ISOLFREE)
+ elif opt == "-ms":
+ set_type(topt.DISP_ISOLMIG)
+ else:
+ sys.exit(usage)
+
+ elif i == argc - 1:
+ m = pid_re.search(opt)
+ if m != None and m.group() != "":
+ if m.group(3) != None:
+ f = pid_filter(m.group(3), m.group(3))
+ else:
+ f = pid_filter(m.group(1), m.group(2))
+ else:
+ try:
+ comm_re=re.compile(opt)
+ except:
+ sys.stderr.write("invalid regex '%s'" % opt)
+ sys.exit(usage)
+ f = comm_filter(comm_re)
+
+ chead.add_filter(f)
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index 4cdafd8..84a3203 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -15,6 +15,53 @@ import sys
import struct
import datetime
+# To use this script you will need to have installed package python-pyside which
+# provides LGPL-licensed Python bindings for Qt. You will also need the package
+# libqt4-sql-psql for Qt postgresql support.
+#
+# The script assumes postgresql is running on the local machine and that the
+# user has postgresql permissions to create databases. Examples of installing
+# postgresql and adding such a user are:
+#
+# fedora:
+#
+# $ sudo yum install postgresql postgresql-server python-pyside qt-postgresql
+# $ sudo su - postgres -c initdb
+# $ sudo service postgresql start
+# $ sudo su - postgres
+# $ createuser <your user id here>
+# Shall the new role be a superuser? (y/n) y
+#
+# ubuntu:
+#
+# $ sudo apt-get install postgresql
+# $ sudo su - postgres
+# $ createuser <your user id here>
+# Shall the new role be a superuser? (y/n) y
+#
+# An example of using this script with Intel PT:
+#
+# $ perf record -e intel_pt//u ls
+# $ perf script -s ~/libexec/perf-core/scripts/python/export-to-postgresql.py pt_example branches calls
+# 2015-05-29 12:49:23.464364 Creating database...
+# 2015-05-29 12:49:26.281717 Writing to intermediate files...
+# 2015-05-29 12:49:27.190383 Copying to database...
+# 2015-05-29 12:49:28.140451 Removing intermediate files...
+# 2015-05-29 12:49:28.147451 Adding primary keys
+# 2015-05-29 12:49:28.655683 Adding foreign keys
+# 2015-05-29 12:49:29.365350 Done
+#
+# To browse the database, psql can be used e.g.
+#
+# $ psql pt_example
+# pt_example=# select * from samples_view where id < 100;
+# pt_example=# \d+
+# pt_example=# \d+ samples_view
+# pt_example=# \q
+#
+# An example of using the database is provided by the script
+# call-graph-from-postgresql.py. Refer to that script for details.
+
from PySide.QtSql import *
# Need to access PostgreSQL C library directly to use COPY FROM STDIN
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 6a8801b..c1518bd 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -3,9 +3,9 @@ perf-y += parse-events.o
perf-y += dso-data.o
perf-y += attr.o
perf-y += vmlinux-kallsyms.o
-perf-y += open-syscall.o
-perf-y += open-syscall-all-cpus.o
-perf-y += open-syscall-tp-fields.o
+perf-y += openat-syscall.o
+perf-y += openat-syscall-all-cpus.o
+perf-y += openat-syscall-tp-fields.o
perf-y += mmap-basic.o
perf-y += perf-record.o
perf-y += rdpmc.o
@@ -31,10 +31,12 @@ perf-y += code-reading.o
perf-y += sample-parsing.o
perf-y += parse-no-sample-id-all.o
perf-y += kmod-path.o
+perf-y += thread-map.o
+perf-y += llvm.o
perf-$(CONFIG_X86) += perf-time-to-tsc.o
-ifeq ($(ARCH),$(filter $(ARCH),x86 arm))
+ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
endif
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 4f40981..136cd93 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -23,12 +23,12 @@ static struct test {
.func = test__vmlinux_matches_kallsyms,
},
{
- .desc = "detect open syscall event",
- .func = test__open_syscall_event,
+ .desc = "detect openat syscall event",
+ .func = test__openat_syscall_event,
},
{
- .desc = "detect open syscall event on all cpus",
- .func = test__open_syscall_event_on_all_cpus,
+ .desc = "detect openat syscall event on all cpus",
+ .func = test__openat_syscall_event_on_all_cpus,
},
{
.desc = "read samples using the mmap interface",
@@ -73,8 +73,8 @@ static struct test {
.func = test__perf_evsel__tp_sched_test,
},
{
- .desc = "Generate and check syscalls:sys_enter_open event fields",
- .func = test__syscall_open_tp_fields,
+ .desc = "Generate and check syscalls:sys_enter_openat event fields",
+ .func = test__syscall_openat_tp_fields,
},
{
.desc = "struct perf_event_attr setup",
@@ -126,7 +126,7 @@ static struct test {
.desc = "Test parsing with no sample_id_all bit set",
.func = test__parse_no_sample_id_all,
},
-#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
+#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT
{
.desc = "Test dwarf unwind",
@@ -171,6 +171,14 @@ static struct test {
.func = test__kmod_path__parse,
},
{
+ .desc = "Test thread map",
+ .func = test__thread_map,
+ },
+ {
+ .desc = "Test LLVM searching and compiling",
+ .func = test__llvm,
+ },
+ {
.func = NULL,
},
};
@@ -219,7 +227,7 @@ static int run_test(struct test *test)
wait(&status);
if (WIFEXITED(status)) {
- err = WEXITSTATUS(status);
+ err = (signed char)WEXITSTATUS(status);
pr_debug("test child finished with %d\n", err);
} else if (WIFSIGNALED(status)) {
err = -1;
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index f671ec3..39c784a 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -248,6 +248,7 @@ static int process_sample_event(struct machine *machine,
struct perf_sample sample;
struct thread *thread;
u8 cpumode;
+ int ret;
if (perf_evlist__parse_sample(evlist, event, &sample)) {
pr_debug("perf_evlist__parse_sample failed\n");
@@ -262,7 +263,9 @@ static int process_sample_event(struct machine *machine,
cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
- return read_object_code(sample.ip, READLEN, cpumode, thread, state);
+ ret = read_object_code(sample.ip, READLEN, cpumode, thread, state);
+ thread__put(thread);
+ return ret;
}
static int process_event(struct machine *machine, struct perf_evlist *evlist,
@@ -448,7 +451,7 @@ static int do_test_code_reading(bool try_kcore)
}
ret = perf_event__synthesize_thread_map(NULL, threads,
- perf_event__process, machine, false);
+ perf_event__process, machine, false, 500);
if (ret < 0) {
pr_debug("perf_event__synthesize_thread_map failed\n");
goto out_err;
@@ -457,13 +460,13 @@ static int do_test_code_reading(bool try_kcore)
thread = machine__findnew_thread(machine, pid, pid);
if (!thread) {
pr_debug("machine__findnew_thread failed\n");
- goto out_err;
+ goto out_put;
}
cpus = cpu_map__new(NULL);
if (!cpus) {
pr_debug("cpu_map__new failed\n");
- goto out_err;
+ goto out_put;
}
while (1) {
@@ -472,7 +475,7 @@ static int do_test_code_reading(bool try_kcore)
evlist = perf_evlist__new();
if (!evlist) {
pr_debug("perf_evlist__new failed\n");
- goto out_err;
+ goto out_put;
}
perf_evlist__set_maps(evlist, cpus, threads);
@@ -482,10 +485,10 @@ static int do_test_code_reading(bool try_kcore)
else
str = "cycles";
pr_debug("Parsing event '%s'\n", str);
- ret = parse_events(evlist, str);
+ ret = parse_events(evlist, str, NULL);
if (ret < 0) {
pr_debug("parse_events failed\n");
- goto out_err;
+ goto out_put;
}
perf_evlist__config(evlist, &opts);
@@ -506,7 +509,7 @@ static int do_test_code_reading(bool try_kcore)
continue;
}
pr_debug("perf_evlist__open failed\n");
- goto out_err;
+ goto out_put;
}
break;
}
@@ -514,7 +517,7 @@ static int do_test_code_reading(bool try_kcore)
ret = perf_evlist__mmap(evlist, UINT_MAX, false);
if (ret < 0) {
pr_debug("perf_evlist__mmap failed\n");
- goto out_err;
+ goto out_put;
}
perf_evlist__enable(evlist);
@@ -525,7 +528,7 @@ static int do_test_code_reading(bool try_kcore)
ret = process_events(machine, evlist, &state);
if (ret < 0)
- goto out_err;
+ goto out_put;
if (!have_vmlinux && !have_kcore && !try_kcore)
err = TEST_CODE_READING_NO_KERNEL_OBJ;
@@ -535,12 +538,15 @@ static int do_test_code_reading(bool try_kcore)
err = TEST_CODE_READING_NO_ACCESS;
else
err = TEST_CODE_READING_OK;
+out_put:
+ thread__put(thread);
out_err:
+
if (evlist) {
perf_evlist__delete(evlist);
} else {
- cpu_map__delete(cpus);
- thread_map__delete(threads);
+ cpu_map__put(cpus);
+ thread_map__put(threads);
}
machines__destroy_kernel_maps(&machines);
machine__delete_threads(machine);
diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index 513e5fe..a218aea 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -99,6 +99,17 @@ struct test_data_offset offsets[] = {
},
};
+/* move it from util/dso.c for compatibility */
+static int dso__data_fd(struct dso *dso, struct machine *machine)
+{
+ int fd = dso__data_get_fd(dso, machine);
+
+ if (fd >= 0)
+ dso__data_put_fd(dso);
+
+ return fd;
+}
+
int test__dso_data(void)
{
struct machine machine;
@@ -155,7 +166,7 @@ int test__dso_data(void)
free(buf);
}
- dso__delete(dso);
+ dso__put(dso);
unlink(file);
return 0;
}
@@ -215,7 +226,7 @@ static void dsos__delete(int cnt)
struct dso *dso = dsos[i];
unlink(dso->name);
- dso__delete(dso);
+ dso__put(dso);
}
free(dsos);
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 0bf06be..40b36c4 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -28,7 +28,7 @@ static int init_live_machine(struct machine *machine)
pid_t pid = getpid();
return perf_event__synthesize_mmap_events(NULL, &event, pid, pid,
- mmap_handler, machine, true);
+ mmap_handler, machine, true, 500);
}
#define MAX_STACK 8
@@ -170,6 +170,7 @@ int test__dwarf_unwind(void)
}
err = krava_1(thread);
+ thread__put(thread);
out:
machine__delete_threads(machine);
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index b8d8341..3fa7159 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -23,7 +23,7 @@ static int perf_evsel__roundtrip_cache_name_test(void)
for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
__perf_evsel__hw_cache_type_op_res_name(type, op, i,
name, sizeof(name));
- err = parse_events(evlist, name);
+ err = parse_events(evlist, name, NULL);
if (err)
ret = err;
}
@@ -71,7 +71,7 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names)
return -ENOMEM;
for (i = 0; i < nr_names; ++i) {
- err = parse_events(evlist, names[i]);
+ err = parse_events(evlist, names[i], NULL);
if (err) {
pr_debug("failed to parse event '%s', err %d\n",
names[i], err);
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index a62c091..ce80b27 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -96,6 +96,7 @@ struct machine *setup_fake_machine(struct machines *machines)
goto out;
thread__set_comm(thread, fake_threads[i].comm, 0);
+ thread__put(thread);
}
for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
@@ -120,8 +121,7 @@ struct machine *setup_fake_machine(struct machines *machines)
size_t k;
struct dso *dso;
- dso = __dsos__findnew(&machine->user_dsos,
- fake_symbols[i].dso_name);
+ dso = machine__findnew_dso(machine, fake_symbols[i].dso_name);
if (dso == NULL)
goto out;
@@ -134,11 +134,15 @@ struct machine *setup_fake_machine(struct machines *machines)
sym = symbol__new(fsym->start, fsym->length,
STB_GLOBAL, fsym->name);
- if (sym == NULL)
+ if (sym == NULL) {
+ dso__put(dso);
goto out;
+ }
symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
}
+
+ dso__put(dso);
}
return machine;
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 1861996..7ed73701 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -87,6 +87,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
},
};
struct hist_entry_iter iter = {
+ .evsel = evsel,
+ .sample = &sample,
.hide_unresolved = false,
};
@@ -104,9 +106,11 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
&sample) < 0)
goto out;
- if (hist_entry_iter__add(&iter, &al, evsel, &sample,
- PERF_MAX_STACK_DEPTH, NULL) < 0)
+ if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+ NULL) < 0) {
+ addr_location__put(&al);
goto out;
+ }
fake_samples[i].thread = al.thread;
fake_samples[i].map = al.map;
@@ -275,6 +279,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine)
symbol_conf.use_callchain = false;
symbol_conf.cumulate_callchain = false;
+ perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
setup_sorting();
callchain_register_param(&callchain_param);
@@ -421,6 +426,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine)
symbol_conf.use_callchain = true;
symbol_conf.cumulate_callchain = false;
+ perf_evsel__set_sample_bit(evsel, CALLCHAIN);
setup_sorting();
callchain_register_param(&callchain_param);
@@ -478,6 +484,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine)
symbol_conf.use_callchain = false;
symbol_conf.cumulate_callchain = true;
+ perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
setup_sorting();
callchain_register_param(&callchain_param);
@@ -661,6 +668,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine)
symbol_conf.use_callchain = true;
symbol_conf.cumulate_callchain = true;
+ perf_evsel__set_sample_bit(evsel, CALLCHAIN);
setup_sorting();
callchain_register_param(&callchain_param);
@@ -695,7 +703,7 @@ int test__hists_cumulate(void)
TEST_ASSERT_VAL("No memory", evlist);
- err = parse_events(evlist, "cpu-clock");
+ err = parse_events(evlist, "cpu-clock", NULL);
if (err)
goto out;
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 59e53db..ce48775 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -63,6 +63,8 @@ static int add_hist_entries(struct perf_evlist *evlist,
},
};
struct hist_entry_iter iter = {
+ .evsel = evsel,
+ .sample = &sample,
.ops = &hist_iter_normal,
.hide_unresolved = false,
};
@@ -81,9 +83,11 @@ static int add_hist_entries(struct perf_evlist *evlist,
&sample) < 0)
goto out;
- if (hist_entry_iter__add(&iter, &al, evsel, &sample,
- PERF_MAX_STACK_DEPTH, NULL) < 0)
+ if (hist_entry_iter__add(&iter, &al,
+ PERF_MAX_STACK_DEPTH, NULL) < 0) {
+ addr_location__put(&al);
goto out;
+ }
fake_samples[i].thread = al.thread;
fake_samples[i].map = al.map;
@@ -108,10 +112,10 @@ int test__hists_filter(void)
TEST_ASSERT_VAL("No memory", evlist);
- err = parse_events(evlist, "cpu-clock");
+ err = parse_events(evlist, "cpu-clock", NULL);
if (err)
goto out;
- err = parse_events(evlist, "task-clock");
+ err = parse_events(evlist, "task-clock", NULL);
if (err)
goto out;
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 278ba834..8c102b0 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -91,8 +91,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
he = __hists__add_entry(hists, &al, NULL,
NULL, NULL, 1, 1, 0, true);
- if (he == NULL)
+ if (he == NULL) {
+ addr_location__put(&al);
goto out;
+ }
fake_common_samples[k].thread = al.thread;
fake_common_samples[k].map = al.map;
@@ -115,8 +117,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
he = __hists__add_entry(hists, &al, NULL,
NULL, NULL, 1, 1, 0, true);
- if (he == NULL)
+ if (he == NULL) {
+ addr_location__put(&al);
goto out;
+ }
fake_samples[i][k].thread = al.thread;
fake_samples[i][k].map = al.map;
@@ -282,10 +286,10 @@ int test__hists_link(void)
if (evlist == NULL)
return -ENOMEM;
- err = parse_events(evlist, "cpu-clock");
+ err = parse_events(evlist, "cpu-clock", NULL);
if (err)
goto out;
- err = parse_events(evlist, "task-clock");
+ err = parse_events(evlist, "task-clock", NULL);
if (err)
goto out;
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index b52c9fa..adbebc8 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -57,6 +57,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
},
};
struct hist_entry_iter iter = {
+ .evsel = evsel,
+ .sample = &sample,
.ops = &hist_iter_normal,
.hide_unresolved = false,
};
@@ -70,9 +72,11 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
&sample) < 0)
goto out;
- if (hist_entry_iter__add(&iter, &al, evsel, &sample,
- PERF_MAX_STACK_DEPTH, NULL) < 0)
+ if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+ NULL) < 0) {
+ addr_location__put(&al);
goto out;
+ }
fake_samples[i].thread = al.thread;
fake_samples[i].map = al.map;
@@ -590,7 +594,7 @@ int test__hists_output(void)
TEST_ASSERT_VAL("No memory", evlist);
- err = parse_events(evlist, "cpu-clock");
+ err = parse_events(evlist, "cpu-clock", NULL);
if (err)
goto out;
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index 7a5ab7b..4d4b983 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -78,8 +78,8 @@ int test__keep_tracking(void)
perf_evlist__set_maps(evlist, cpus, threads);
- CHECK__(parse_events(evlist, "dummy:u"));
- CHECK__(parse_events(evlist, "cycles:u"));
+ CHECK__(parse_events(evlist, "dummy:u", NULL));
+ CHECK__(parse_events(evlist, "cycles:u", NULL));
perf_evlist__config(evlist, &opts);
@@ -144,8 +144,8 @@ out_err:
perf_evlist__disable(evlist);
perf_evlist__delete(evlist);
} else {
- cpu_map__delete(cpus);
- thread_map__delete(threads);
+ cpu_map__put(cpus);
+ thread_map__put(threads);
}
return err;
diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c
index e8d7cbb..08c433b 100644
--- a/tools/perf/tests/kmod-path.c
+++ b/tools/perf/tests/kmod-path.c
@@ -34,9 +34,21 @@ static int test(const char *path, bool alloc_name, bool alloc_ext,
return 0;
}
+static int test_is_kernel_module(const char *path, int cpumode, bool expect)
+{
+ TEST_ASSERT_VAL("is_kernel_module",
+ (!!is_kernel_module(path, cpumode)) == (!!expect));
+ pr_debug("%s (cpumode: %d) - is_kernel_module: %s\n",
+ path, cpumode, expect ? "true" : "false");
+ return 0;
+}
+
#define T(path, an, ae, k, c, n, e) \
TEST_ASSERT_VAL("failed", !test(path, an, ae, k, c, n, e))
+#define M(path, c, e) \
+ TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e))
+
int test__kmod_path__parse(void)
{
/* path alloc_name alloc_ext kmod comp name ext */
@@ -44,30 +56,90 @@ int test__kmod_path__parse(void)
T("/xxxx/xxxx/x-x.ko", false , true , true, false, NULL , NULL);
T("/xxxx/xxxx/x-x.ko", true , false , true, false, "[x_x]", NULL);
T("/xxxx/xxxx/x-x.ko", false , false , true, false, NULL , NULL);
+ M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+ M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_KERNEL, true);
+ M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_USER, false);
/* path alloc_name alloc_ext kmod comp name ext */
T("/xxxx/xxxx/x.ko.gz", true , true , true, true, "[x]", "gz");
T("/xxxx/xxxx/x.ko.gz", false , true , true, true, NULL , "gz");
T("/xxxx/xxxx/x.ko.gz", true , false , true, true, "[x]", NULL);
T("/xxxx/xxxx/x.ko.gz", false , false , true, true, NULL , NULL);
+ M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+ M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
+ M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_USER, false);
/* path alloc_name alloc_ext kmod comp name ext */
T("/xxxx/xxxx/x.gz", true , true , false, true, "x.gz" ,"gz");
T("/xxxx/xxxx/x.gz", false , true , false, true, NULL ,"gz");
T("/xxxx/xxxx/x.gz", true , false , false, true, "x.gz" , NULL);
T("/xxxx/xxxx/x.gz", false , false , false, true, NULL , NULL);
+ M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+ M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_KERNEL, false);
+ M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_USER, false);
/* path alloc_name alloc_ext kmod comp name ext */
T("x.gz", true , true , false, true, "x.gz", "gz");
T("x.gz", false , true , false, true, NULL , "gz");
T("x.gz", true , false , false, true, "x.gz", NULL);
T("x.gz", false , false , false, true, NULL , NULL);
+ M("x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+ M("x.gz", PERF_RECORD_MISC_KERNEL, false);
+ M("x.gz", PERF_RECORD_MISC_USER, false);
/* path alloc_name alloc_ext kmod comp name ext */
T("x.ko.gz", true , true , true, true, "[x]", "gz");
T("x.ko.gz", false , true , true, true, NULL , "gz");
T("x.ko.gz", true , false , true, true, "[x]", NULL);
T("x.ko.gz", false , false , true, true, NULL , NULL);
+ M("x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+ M("x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
+ M("x.ko.gz", PERF_RECORD_MISC_USER, false);
+
+ /* path alloc_name alloc_ext kmod comp name ext */
+ T("[test_module]", true , true , true, false, "[test_module]", NULL);
+ T("[test_module]", false , true , true, false, NULL , NULL);
+ T("[test_module]", true , false , true, false, "[test_module]", NULL);
+ T("[test_module]", false , false , true, false, NULL , NULL);
+ M("[test_module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+ M("[test_module]", PERF_RECORD_MISC_KERNEL, true);
+ M("[test_module]", PERF_RECORD_MISC_USER, false);
+
+ /* path alloc_name alloc_ext kmod comp name ext */
+ T("[test.module]", true , true , true, false, "[test.module]", NULL);
+ T("[test.module]", false , true , true, false, NULL , NULL);
+ T("[test.module]", true , false , true, false, "[test.module]", NULL);
+ T("[test.module]", false , false , true, false, NULL , NULL);
+ M("[test.module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+ M("[test.module]", PERF_RECORD_MISC_KERNEL, true);
+ M("[test.module]", PERF_RECORD_MISC_USER, false);
+
+ /* path alloc_name alloc_ext kmod comp name ext */
+ T("[vdso]", true , true , false, false, "[vdso]", NULL);
+ T("[vdso]", false , true , false, false, NULL , NULL);
+ T("[vdso]", true , false , false, false, "[vdso]", NULL);
+ T("[vdso]", false , false , false, false, NULL , NULL);
+ M("[vdso]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+ M("[vdso]", PERF_RECORD_MISC_KERNEL, false);
+ M("[vdso]", PERF_RECORD_MISC_USER, false);
+
+ /* path alloc_name alloc_ext kmod comp name ext */
+ T("[vsyscall]", true , true , false, false, "[vsyscall]", NULL);
+ T("[vsyscall]", false , true , false, false, NULL , NULL);
+ T("[vsyscall]", true , false , false, false, "[vsyscall]", NULL);
+ T("[vsyscall]", false , false , false, false, NULL , NULL);
+ M("[vsyscall]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+ M("[vsyscall]", PERF_RECORD_MISC_KERNEL, false);
+ M("[vsyscall]", PERF_RECORD_MISC_USER, false);
+
+ /* path alloc_name alloc_ext kmod comp name ext */
+ T("[kernel.kallsyms]", true , true , false, false, "[kernel.kallsyms]", NULL);
+ T("[kernel.kallsyms]", false , true , false, false, NULL , NULL);
+ T("[kernel.kallsyms]", true , false , false, false, "[kernel.kallsyms]", NULL);
+ T("[kernel.kallsyms]", false , false , false, false, NULL , NULL);
+ M("[kernel.kallsyms]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+ M("[kernel.kallsyms]", PERF_RECORD_MISC_KERNEL, false);
+ M("[kernel.kallsyms]", PERF_RECORD_MISC_USER, false);
return 0;
}
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
new file mode 100644
index 0000000..52d5597
--- /dev/null
+++ b/tools/perf/tests/llvm.c
@@ -0,0 +1,98 @@
+#include <stdio.h>
+#include <bpf/libbpf.h>
+#include <util/llvm-utils.h>
+#include <util/cache.h>
+#include "tests.h"
+#include "debug.h"
+
+static int perf_config_cb(const char *var, const char *val,
+ void *arg __maybe_unused)
+{
+ return perf_default_config(var, val, arg);
+}
+
+/*
+ * Randomly give it a "version" section since we don't really load it
+ * into kernel
+ */
+static const char test_bpf_prog[] =
+ "__attribute__((section(\"do_fork\"), used)) "
+ "int fork(void *ctx) {return 0;} "
+ "char _license[] __attribute__((section(\"license\"), used)) = \"GPL\";"
+ "int _version __attribute__((section(\"version\"), used)) = 0x40100;";
+
+#ifdef HAVE_LIBBPF_SUPPORT
+static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
+{
+ struct bpf_object *obj;
+
+ obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL);
+ if (!obj)
+ return -1;
+ bpf_object__close(obj);
+ return 0;
+}
+#else
+static int test__bpf_parsing(void *obj_buf __maybe_unused,
+ size_t obj_buf_sz __maybe_unused)
+{
+ fprintf(stderr, " (skip bpf parsing)");
+ return 0;
+}
+#endif
+
+int test__llvm(void)
+{
+ char *tmpl_new, *clang_opt_new;
+ void *obj_buf;
+ size_t obj_buf_sz;
+ int err, old_verbose;
+
+ perf_config(perf_config_cb, NULL);
+
+ /*
+ * Skip this test if user's .perfconfig doesn't set [llvm] section
+ * and clang is not found in $PATH, and this is not perf test -v
+ */
+ if (verbose == 0 && !llvm_param.user_set_param && llvm__search_clang()) {
+ fprintf(stderr, " (no clang, try 'perf test -v LLVM')");
+ return TEST_SKIP;
+ }
+
+ old_verbose = verbose;
+ /*
+ * llvm is verbosity when error. Suppress all error output if
+ * not 'perf test -v'.
+ */
+ if (verbose == 0)
+ verbose = -1;
+
+ if (!llvm_param.clang_bpf_cmd_template)
+ return -1;
+
+ if (!llvm_param.clang_opt)
+ llvm_param.clang_opt = strdup("");
+
+ err = asprintf(&tmpl_new, "echo '%s' | %s", test_bpf_prog,
+ llvm_param.clang_bpf_cmd_template);
+ if (err < 0)
+ return -1;
+ err = asprintf(&clang_opt_new, "-xc %s", llvm_param.clang_opt);
+ if (err < 0)
+ return -1;
+
+ llvm_param.clang_bpf_cmd_template = tmpl_new;
+ llvm_param.clang_opt = clang_opt_new;
+ err = llvm__compile_bpf("-", &obj_buf, &obj_buf_sz);
+
+ verbose = old_verbose;
+ if (err) {
+ if (!verbose)
+ fprintf(stderr, " (use -v to see error message)");
+ return -1;
+ }
+
+ err = test__bpf_parsing(obj_buf, obj_buf_sz);
+ free(obj_buf);
+ return err;
+}
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index bff8532..ba31c4b 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -1,5 +1,16 @@
+ifndef MK
+ifeq ($(MAKECMDGOALS),)
+# no target specified, trigger the whole suite
+all:
+ @echo "Testing Makefile"; $(MAKE) -sf tests/make MK=Makefile
+ @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf
+else
+# run only specific test over 'Makefile'
+%:
+ @echo "Testing Makefile"; $(MAKE) -sf tests/make MK=Makefile $@
+endif
+else
PERF := .
-MK := Makefile
include config/Makefile.arch
@@ -32,6 +43,7 @@ make_no_backtrace := NO_BACKTRACE=1
make_no_libnuma := NO_LIBNUMA=1
make_no_libaudit := NO_LIBAUDIT=1
make_no_libbionic := NO_LIBBIONIC=1
+make_no_auxtrace := NO_AUXTRACE=1
make_tags := tags
make_cscope := cscope
make_help := help
@@ -46,17 +58,24 @@ make_install_man := install-man
make_install_html := install-html
make_install_info := install-info
make_install_pdf := install-pdf
+make_install_prefix := install prefix=/tmp/krava
+make_install_prefix_slash := install prefix=/tmp/krava/
make_static := LDFLAGS=-static
# all the NO_* variable combined
make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
-make_minimal += NO_LIBDW_DWARF_UNWIND=1
+make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1
# $(run) contains all available tests
run := make_pure
+# Targets 'clean all' can be run together only through top level
+# Makefile because we detect clean target in Makefile.perf and
+# disable features detection
+ifeq ($(MK),Makefile)
run += make_clean_all
+endif
run += make_python_perf_so
run += make_debug
run += make_no_libperl
@@ -74,6 +93,7 @@ run += make_no_backtrace
run += make_no_libnuma
run += make_no_libaudit
run += make_no_libbionic
+run += make_no_auxtrace
run += make_help
run += make_doc
run += make_perf_o
@@ -81,6 +101,8 @@ run += make_util_map_o
run += make_util_pmu_bison_o
run += make_install
run += make_install_bin
+run += make_install_prefix
+run += make_install_prefix_slash
# FIXME 'install-*' commented out till they're fixed
# run += make_install_doc
# run += make_install_man
@@ -155,6 +177,15 @@ test_make_install_O := $(call test_dest_files,$(installed_files_all))
test_make_install_bin := $(call test_dest_files,$(installed_files_bin))
test_make_install_bin_O := $(call test_dest_files,$(installed_files_bin))
+# We prefix all installed files for make_install_prefix(_slash)
+# with '/tmp/krava' to match installed/prefix-ed files.
+installed_files_all_prefix := $(addprefix /tmp/krava/,$(installed_files_all))
+test_make_install_prefix := $(call test_dest_files,$(installed_files_all_prefix))
+test_make_install_prefix_O := $(call test_dest_files,$(installed_files_all_prefix))
+
+test_make_install_prefix_slash := $(test_make_install_prefix)
+test_make_install_prefix_slash_O := $(test_make_install_prefix_O)
+
# FIXME nothing gets installed
test_make_install_man := test -f $$TMP_DEST/share/man/man1/perf.1
test_make_install_man_O := $(test_make_install_man)
@@ -223,10 +254,23 @@ tarpkg:
echo "- $@: $$cmd" && echo $$cmd > $@ && \
( eval $$cmd ) >> $@ 2>&1
-all: $(run) $(run_O) tarpkg
+make_kernelsrc:
+ @echo "- make -C <kernelsrc> tools/perf"
+ $(call clean); \
+ (make -C ../.. tools/perf) > $@ 2>&1 && \
+ test -x perf && rm -f $@ || (cat $@ ; false)
+
+make_kernelsrc_tools:
+ @echo "- make -C <kernelsrc>/tools perf"
+ $(call clean); \
+ (make -C ../../tools perf) > $@ 2>&1 && \
+ test -x perf && rm -f $@ || (cat $@ ; false)
+
+all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
@echo OK
out: $(run_O)
@echo OK
.PHONY: all $(run) $(run_O) tarpkg clean
+endif # ifndef MK
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 9b9622a3..666b67a 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -23,10 +23,8 @@ int test__basic_mmap(void)
struct cpu_map *cpus;
struct perf_evlist *evlist;
cpu_set_t cpu_set;
- const char *syscall_names[] = { "getsid", "getppid", "getpgrp",
- "getpgid", };
- pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp,
- (void*)getpgid };
+ const char *syscall_names[] = { "getsid", "getppid", "getpgid", };
+ pid_t (*syscalls[])(void) = { (void *)getsid, getppid, (void*)getpgid };
#define nsyscalls ARRAY_SIZE(syscall_names)
unsigned int nr_events[nsyscalls],
expected_nr_events[nsyscalls], i, j;
@@ -142,8 +140,8 @@ out_delete_evlist:
cpus = NULL;
threads = NULL;
out_free_cpus:
- cpu_map__delete(cpus);
+ cpu_map__put(cpus);
out_free_threads:
- thread_map__delete(threads);
+ thread_map__put(threads);
return err;
}
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index 2113f1c..145050e 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -129,7 +129,7 @@ static int synth_all(struct machine *machine)
{
return perf_event__synthesize_threads(NULL,
perf_event__process,
- machine, 0);
+ machine, 0, 500);
}
static int synth_process(struct machine *machine)
@@ -141,9 +141,9 @@ static int synth_process(struct machine *machine)
err = perf_event__synthesize_thread_map(NULL, map,
perf_event__process,
- machine, 0);
+ machine, 0, 500);
- thread_map__delete(map);
+ thread_map__put(map);
return err;
}
@@ -191,6 +191,8 @@ static int mmap_events(synth_cb synth)
PERF_RECORD_MISC_USER, MAP__FUNCTION,
(unsigned long) (td->map + 1), &al);
+ thread__put(thread);
+
if (!al.map) {
pr_debug("failed, couldn't find map\n");
err = -1;
diff --git a/tools/perf/tests/open-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index 3ec885c..a572f87 100644
--- a/tools/perf/tests/open-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -3,13 +3,14 @@
#include "thread_map.h"
#include "cpumap.h"
#include "debug.h"
+#include "stat.h"
-int test__open_syscall_event_on_all_cpus(void)
+int test__openat_syscall_event_on_all_cpus(void)
{
int err = -1, fd, cpu;
struct cpu_map *cpus;
struct perf_evsel *evsel;
- unsigned int nr_open_calls = 111, i;
+ unsigned int nr_openat_calls = 111, i;
cpu_set_t cpu_set;
struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
char sbuf[STRERR_BUFSIZE];
@@ -27,7 +28,7 @@ int test__open_syscall_event_on_all_cpus(void)
CPU_ZERO(&cpu_set);
- evsel = perf_evsel__newtp("syscalls", "sys_enter_open");
+ evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
if (evsel == NULL) {
if (tracefs_configured())
pr_debug("is tracefs mounted on /sys/kernel/tracing?\n");
@@ -46,7 +47,7 @@ int test__open_syscall_event_on_all_cpus(void)
}
for (cpu = 0; cpu < cpus->nr; ++cpu) {
- unsigned int ncalls = nr_open_calls + cpu;
+ unsigned int ncalls = nr_openat_calls + cpu;
/*
* XXX eventually lift this restriction in a way that
* keeps perf building on older glibc installations
@@ -66,7 +67,7 @@ int test__open_syscall_event_on_all_cpus(void)
goto out_close_fd;
}
for (i = 0; i < ncalls; ++i) {
- fd = open("/etc/passwd", O_RDONLY);
+ fd = openat(0, "/etc/passwd", O_RDONLY);
close(fd);
}
CPU_CLR(cpus->map[cpu], &cpu_set);
@@ -77,7 +78,7 @@ int test__open_syscall_event_on_all_cpus(void)
* we use the auto allocation it will allocate just for 1 cpu,
* as we start by cpu 0.
*/
- if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) {
+ if (perf_evsel__alloc_counts(evsel, cpus->nr, 1) < 0) {
pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
goto out_close_fd;
}
@@ -96,10 +97,10 @@ int test__open_syscall_event_on_all_cpus(void)
break;
}
- expected = nr_open_calls + cpu;
- if (evsel->counts->cpu[cpu].val != expected) {
+ expected = nr_openat_calls + cpu;
+ if (perf_counts(evsel->counts, cpu, 0)->val != expected) {
pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
- expected, cpus->map[cpu], evsel->counts->cpu[cpu].val);
+ expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val);
err = -1;
}
}
@@ -110,6 +111,6 @@ out_close_fd:
out_evsel_delete:
perf_evsel__delete(evsel);
out_thread_map_delete:
- thread_map__delete(threads);
+ thread_map__put(threads);
return err;
}
diff --git a/tools/perf/tests/open-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 127dcae..01a1962 100644
--- a/tools/perf/tests/open-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -5,7 +5,7 @@
#include "tests.h"
#include "debug.h"
-int test__syscall_open_tp_fields(void)
+int test__syscall_openat_tp_fields(void)
{
struct record_opts opts = {
.target = {
@@ -29,7 +29,7 @@ int test__syscall_open_tp_fields(void)
goto out;
}
- evsel = perf_evsel__newtp("syscalls", "sys_enter_open");
+ evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
if (evsel == NULL) {
pr_debug("%s: perf_evsel__newtp\n", __func__);
goto out_delete_evlist;
@@ -45,7 +45,7 @@ int test__syscall_open_tp_fields(void)
perf_evsel__config(evsel, &opts);
- evlist->threads->map[0] = getpid();
+ thread_map__set_pid(evlist->threads, 0, getpid());
err = perf_evlist__open(evlist);
if (err < 0) {
@@ -66,7 +66,7 @@ int test__syscall_open_tp_fields(void)
/*
* Generate the event:
*/
- open(filename, flags);
+ openat(AT_FDCWD, filename, flags);
while (1) {
int before = nr_events;
diff --git a/tools/perf/tests/open-syscall.c b/tools/perf/tests/openat-syscall.c
index 07aa319..c9a37bc 100644
--- a/tools/perf/tests/open-syscall.c
+++ b/tools/perf/tests/openat-syscall.c
@@ -3,11 +3,11 @@
#include "debug.h"
#include "tests.h"
-int test__open_syscall_event(void)
+int test__openat_syscall_event(void)
{
int err = -1, fd;
struct perf_evsel *evsel;
- unsigned int nr_open_calls = 111, i;
+ unsigned int nr_openat_calls = 111, i;
struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
char sbuf[STRERR_BUFSIZE];
@@ -16,7 +16,7 @@ int test__open_syscall_event(void)
return -1;
}
- evsel = perf_evsel__newtp("syscalls", "sys_enter_open");
+ evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
if (evsel == NULL) {
if (tracefs_configured())
pr_debug("is tracefs mounted on /sys/kernel/tracing?\n");
@@ -34,8 +34,8 @@ int test__open_syscall_event(void)
goto out_evsel_delete;
}
- for (i = 0; i < nr_open_calls; ++i) {
- fd = open("/etc/passwd", O_RDONLY);
+ for (i = 0; i < nr_openat_calls; ++i) {
+ fd = openat(0, "/etc/passwd", O_RDONLY);
close(fd);
}
@@ -44,9 +44,9 @@ int test__open_syscall_event(void)
goto out_close_fd;
}
- if (evsel->counts->cpu[0].val != nr_open_calls) {
+ if (perf_counts(evsel->counts, 0, 0)->val != nr_openat_calls) {
pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n",
- nr_open_calls, evsel->counts->cpu[0].val);
+ nr_openat_calls, perf_counts(evsel->counts, 0, 0)->val);
goto out_close_fd;
}
@@ -56,6 +56,6 @@ out_close_fd:
out_evsel_delete:
perf_evsel__delete(evsel);
out_thread_map_delete:
- thread_map__delete(threads);
+ thread_map__put(threads);
return err;
}
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 3de7449..9b6b2b63 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -82,8 +82,12 @@ static int test__checkevent_symbolic_name_config(struct perf_evlist *evlist)
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ /*
+ * The period value gets configured within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
TEST_ASSERT_VAL("wrong period",
- 100000 == evsel->attr.sample_period);
+ 0 == evsel->attr.sample_period);
TEST_ASSERT_VAL("wrong config1",
0 == evsel->attr.config1);
TEST_ASSERT_VAL("wrong config2",
@@ -406,7 +410,11 @@ static int test__checkevent_pmu(struct perf_evlist *evlist)
TEST_ASSERT_VAL("wrong config", 10 == evsel->attr.config);
TEST_ASSERT_VAL("wrong config1", 1 == evsel->attr.config1);
TEST_ASSERT_VAL("wrong config2", 3 == evsel->attr.config2);
- TEST_ASSERT_VAL("wrong period", 1000 == evsel->attr.sample_period);
+ /*
+ * The period value gets configured within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
return 0;
}
@@ -427,7 +435,7 @@ static int test__checkevent_list(struct perf_evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
- /* syscalls:sys_enter_open:k */
+ /* syscalls:sys_enter_openat:k */
evsel = perf_evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
TEST_ASSERT_VAL("wrong sample_type",
@@ -471,6 +479,39 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist)
return 0;
}
+static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ /* cpu/config=1,call-graph=fp,time,period=100000/ */
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
+ /*
+ * The period, time and callgraph value gets configured
+ * within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
+ TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+ TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+ /* cpu/config=2,call-graph=no,time=0,period=2000/ */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config);
+ /*
+ * The period, time and callgraph value gets configured
+ * within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
+ TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+ TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+ return 0;
+}
+
static int test__checkevent_pmu_events(struct perf_evlist *evlist)
{
struct perf_evsel *evsel = perf_evlist__first(evlist);
@@ -665,7 +706,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries);
TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
- /* group1 syscalls:sys_enter_open:H */
+ /* group1 syscalls:sys_enter_openat:H */
evsel = leader = perf_evlist__first(evlist);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
TEST_ASSERT_VAL("wrong sample_type",
@@ -1293,7 +1334,7 @@ struct evlist_test {
static struct evlist_test test__events[] = {
{
- .name = "syscalls:sys_enter_open",
+ .name = "syscalls:sys_enter_openat",
.check = test__checkevent_tracepoint,
.id = 0,
},
@@ -1353,7 +1394,7 @@ static struct evlist_test test__events[] = {
.id = 11,
},
{
- .name = "syscalls:sys_enter_open:k",
+ .name = "syscalls:sys_enter_openat:k",
.check = test__checkevent_tracepoint_modifier,
.id = 12,
},
@@ -1408,7 +1449,7 @@ static struct evlist_test test__events[] = {
.id = 22,
},
{
- .name = "r1,syscalls:sys_enter_open:k,1:1:hp",
+ .name = "r1,syscalls:sys_enter_openat:k,1:1:hp",
.check = test__checkevent_list,
.id = 23,
},
@@ -1443,7 +1484,7 @@ static struct evlist_test test__events[] = {
.id = 29,
},
{
- .name = "group1{syscalls:sys_enter_open:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u",
+ .name = "group1{syscalls:sys_enter_openat:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u",
.check = test__group3,
.id = 30,
},
@@ -1547,6 +1588,11 @@ static struct evlist_test test__events_pmu[] = {
.check = test__checkevent_pmu_name,
.id = 1,
},
+ {
+ .name = "cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/",
+ .check = test__checkevent_pmu_partial_time_callgraph,
+ .id = 2,
+ },
};
struct terms_test {
@@ -1571,7 +1617,7 @@ static int test_event(struct evlist_test *e)
if (evlist == NULL)
return -ENOMEM;
- ret = parse_events(evlist, e->name);
+ ret = parse_events(evlist, e->name, NULL);
if (ret) {
pr_debug("failed to parse event '%s', err %d\n",
e->name, ret);
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index f238442..5f49484 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -68,7 +68,7 @@ int test__perf_time_to_tsc(void)
perf_evlist__set_maps(evlist, cpus, threads);
- CHECK__(parse_events(evlist, "cycles:u"));
+ CHECK__(parse_events(evlist, "cycles:u", NULL));
perf_evlist__config(evlist, &opts);
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index eeb68bb1..faa04e9 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -152,7 +152,8 @@ int test__pmu(void)
if (ret)
break;
- ret = perf_pmu__config_terms(&formats, &attr, terms, false);
+ ret = perf_pmu__config_terms(&formats, &attr, terms,
+ false, NULL);
if (ret)
break;
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index cc68648..e698742 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -347,7 +347,7 @@ int test__switch_tracking(void)
perf_evlist__set_maps(evlist, cpus, threads);
/* First event */
- err = parse_events(evlist, "cpu-clock:u");
+ err = parse_events(evlist, "cpu-clock:u", NULL);
if (err) {
pr_debug("Failed to parse event dummy:u\n");
goto out_err;
@@ -356,7 +356,7 @@ int test__switch_tracking(void)
cpu_clocks_evsel = perf_evlist__last(evlist);
/* Second event */
- err = parse_events(evlist, "cycles:u");
+ err = parse_events(evlist, "cycles:u", NULL);
if (err) {
pr_debug("Failed to parse event cycles:u\n");
goto out_err;
@@ -371,7 +371,7 @@ int test__switch_tracking(void)
goto out;
}
- err = parse_events(evlist, sched_switch);
+ err = parse_events(evlist, sched_switch, NULL);
if (err) {
pr_debug("Failed to parse event %s\n", sched_switch);
goto out_err;
@@ -401,7 +401,7 @@ int test__switch_tracking(void)
perf_evsel__set_sample_bit(cycles_evsel, TIME);
/* Fourth event */
- err = parse_events(evlist, "dummy:u");
+ err = parse_events(evlist, "dummy:u", NULL);
if (err) {
pr_debug("Failed to parse event dummy:u\n");
goto out_err;
@@ -560,8 +560,8 @@ out:
perf_evlist__disable(evlist);
perf_evlist__delete(evlist);
} else {
- cpu_map__delete(cpus);
- thread_map__delete(threads);
+ cpu_map__put(cpus);
+ thread_map__put(threads);
}
return err;
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 52758a3..bf113a2 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -9,6 +9,15 @@ do { \
} \
} while (0)
+#define TEST_ASSERT_EQUAL(text, val, expected) \
+do { \
+ if (val != expected) { \
+ pr_debug("FAILED %s:%d %s (%d != %d)\n", \
+ __FILE__, __LINE__, text, val, expected); \
+ return -1; \
+ } \
+} while (0)
+
enum {
TEST_OK = 0,
TEST_FAIL = -1,
@@ -17,14 +26,14 @@ enum {
/* Tests */
int test__vmlinux_matches_kallsyms(void);
-int test__open_syscall_event(void);
-int test__open_syscall_event_on_all_cpus(void);
+int test__openat_syscall_event(void);
+int test__openat_syscall_event_on_all_cpus(void);
int test__basic_mmap(void);
int test__PERF_RECORD(void);
int test__rdpmc(void);
int test__perf_evsel__roundtrip_name_test(void);
int test__perf_evsel__tp_sched_test(void);
-int test__syscall_open_tp_fields(void);
+int test__syscall_openat_tp_fields(void);
int test__pmu(void);
int test__attr(void);
int test__dso_data(void);
@@ -52,8 +61,10 @@ int test__switch_tracking(void);
int test__fdarray__filter(void);
int test__fdarray__add(void);
int test__kmod_path__parse(void);
+int test__thread_map(void);
+int test__llvm(void);
-#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
+#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT
struct thread;
struct perf_sample;
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
new file mode 100644
index 0000000..138a0e3
--- /dev/null
+++ b/tools/perf/tests/thread-map.c
@@ -0,0 +1,42 @@
+#include <sys/types.h>
+#include <unistd.h>
+#include "tests.h"
+#include "thread_map.h"
+#include "debug.h"
+
+int test__thread_map(void)
+{
+ struct thread_map *map;
+
+ /* test map on current pid */
+ map = thread_map__new_by_pid(getpid());
+ TEST_ASSERT_VAL("failed to alloc map", map);
+
+ thread_map__read_comms(map);
+
+ TEST_ASSERT_VAL("wrong nr", map->nr == 1);
+ TEST_ASSERT_VAL("wrong pid",
+ thread_map__pid(map, 0) == getpid());
+ TEST_ASSERT_VAL("wrong comm",
+ thread_map__comm(map, 0) &&
+ !strcmp(thread_map__comm(map, 0), "perf"));
+ TEST_ASSERT_VAL("wrong refcnt",
+ atomic_read(&map->refcnt) == 1);
+ thread_map__put(map);
+
+ /* test dummy pid */
+ map = thread_map__new_dummy();
+ TEST_ASSERT_VAL("failed to alloc map", map);
+
+ thread_map__read_comms(map);
+
+ TEST_ASSERT_VAL("wrong nr", map->nr == 1);
+ TEST_ASSERT_VAL("wrong pid", thread_map__pid(map, 0) == -1);
+ TEST_ASSERT_VAL("wrong comm",
+ thread_map__comm(map, 0) &&
+ !strcmp(thread_map__comm(map, 0), "dummy"));
+ TEST_ASSERT_VAL("wrong refcnt",
+ atomic_read(&map->refcnt) == 1);
+ thread_map__put(map);
+ return 0;
+}
diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c
index b028499..01fabb1 100644
--- a/tools/perf/tests/thread-mg-share.c
+++ b/tools/perf/tests/thread-mg-share.c
@@ -43,7 +43,7 @@ int test__thread_mg_share(void)
leader && t1 && t2 && t3 && other);
mg = leader->mg;
- TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 4);
+ TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 4);
/* test the map groups pointer is shared */
TEST_ASSERT_VAL("map groups don't match", mg == t1->mg);
@@ -58,33 +58,40 @@ int test__thread_mg_share(void)
other_leader = machine__find_thread(machine, 4, 4);
TEST_ASSERT_VAL("failed to find other leader", other_leader);
+ /*
+ * Ok, now that all the rbtree related operations were done,
+ * lets remove all of them from there so that we can do the
+ * refcounting tests.
+ */
+ machine__remove_thread(machine, leader);
+ machine__remove_thread(machine, t1);
+ machine__remove_thread(machine, t2);
+ machine__remove_thread(machine, t3);
+ machine__remove_thread(machine, other);
+ machine__remove_thread(machine, other_leader);
+
other_mg = other->mg;
- TEST_ASSERT_VAL("wrong refcnt", other_mg->refcnt == 2);
+ TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 2);
TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg);
/* release thread group */
- thread__delete(leader);
- TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 3);
+ thread__put(leader);
+ TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 3);
- thread__delete(t1);
- TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 2);
+ thread__put(t1);
+ TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 2);
- thread__delete(t2);
- TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 1);
+ thread__put(t2);
+ TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 1);
- thread__delete(t3);
+ thread__put(t3);
/* release other group */
- thread__delete(other_leader);
- TEST_ASSERT_VAL("wrong refcnt", other_mg->refcnt == 1);
+ thread__put(other_leader);
+ TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 1);
- thread__delete(other);
-
- /*
- * Cannot call machine__delete_threads(machine) now,
- * because we've already released all the threads.
- */
+ thread__put(other);
machines__exit(&machines);
return 0;
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 3d90880..b34c5fc 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -23,9 +23,10 @@ int test__vmlinux_matches_kallsyms(void)
int err = -1;
struct rb_node *nd;
struct symbol *sym;
- struct map *kallsyms_map, *vmlinux_map;
+ struct map *kallsyms_map, *vmlinux_map, *map;
struct machine kallsyms, vmlinux;
enum map_type type = MAP__FUNCTION;
+ struct maps *maps = &vmlinux.kmaps.maps[type];
u64 mem_start, mem_end;
/*
@@ -184,8 +185,8 @@ detour:
pr_info("Maps only in vmlinux:\n");
- for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) {
- struct map *pos = rb_entry(nd, struct map, rb_node), *pair;
+ for (map = maps__first(maps); map; map = map__next(map)) {
+ struct map *
/*
* If it is the kernel, kallsyms is always "[kernel.kallsyms]", while
* the kernel will have the path for the vmlinux file being used,
@@ -193,22 +194,22 @@ detour:
* both cases.
*/
pair = map_groups__find_by_name(&kallsyms.kmaps, type,
- (pos->dso->kernel ?
- pos->dso->short_name :
- pos->dso->name));
+ (map->dso->kernel ?
+ map->dso->short_name :
+ map->dso->name));
if (pair)
pair->priv = 1;
else
- map__fprintf(pos, stderr);
+ map__fprintf(map, stderr);
}
pr_info("Maps in vmlinux with a different name in kallsyms:\n");
- for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) {
- struct map *pos = rb_entry(nd, struct map, rb_node), *pair;
+ for (map = maps__first(maps); map; map = map__next(map)) {
+ struct map *pair;
- mem_start = vmlinux_map->unmap_ip(vmlinux_map, pos->start);
- mem_end = vmlinux_map->unmap_ip(vmlinux_map, pos->end);
+ mem_start = vmlinux_map->unmap_ip(vmlinux_map, map->start);
+ mem_end = vmlinux_map->unmap_ip(vmlinux_map, map->end);
pair = map_groups__find(&kallsyms.kmaps, type, mem_start);
if (pair == NULL || pair->priv)
@@ -217,7 +218,7 @@ detour:
if (pair->start == mem_start) {
pair->priv = 1;
pr_info(" %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
- pos->start, pos->end, pos->pgoff, pos->dso->name);
+ map->start, map->end, map->pgoff, map->dso->name);
if (mem_end != pair->end)
pr_info(":\n*%" PRIx64 "-%" PRIx64 " %" PRIx64,
pair->start, pair->end, pair->pgoff);
@@ -228,12 +229,11 @@ detour:
pr_info("Maps only in kallsyms:\n");
- for (nd = rb_first(&kallsyms.kmaps.maps[type]);
- nd; nd = rb_next(nd)) {
- struct map *pos = rb_entry(nd, struct map, rb_node);
+ maps = &kallsyms.kmaps.maps[type];
- if (!pos->priv)
- map__fprintf(pos, stderr);
+ for (map = maps__first(maps); map; map = map__next(map)) {
+ if (!map->priv)
+ map__fprintf(map, stderr);
}
out:
machine__exit(&kallsyms);
diff --git a/tools/perf/trace/strace/groups/file b/tools/perf/trace/strace/groups/file
new file mode 100644
index 0000000..62378a8
--- /dev/null
+++ b/tools/perf/trace/strace/groups/file
@@ -0,0 +1,18 @@
+access
+chmod
+creat
+execve
+faccessat
+getcwd
+lstat
+mkdir
+open
+openat
+quotactl
+readlink
+rename
+rmdir
+stat
+statfs
+symlink
+unlink
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index 6680fa5..c6c7e51 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -46,6 +46,21 @@ void ui_browser__gotorc(struct ui_browser *browser, int y, int x)
SLsmg_gotorc(browser->y + y, browser->x + x);
}
+void ui_browser__write_nstring(struct ui_browser *browser __maybe_unused, const char *msg,
+ unsigned int width)
+{
+ slsmg_write_nstring(msg, width);
+}
+
+void ui_browser__printf(struct ui_browser *browser __maybe_unused, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ slsmg_vprintf(fmt, args);
+ va_end(args);
+}
+
static struct list_head *
ui_browser__list_head_filter_entries(struct ui_browser *browser,
struct list_head *pos)
@@ -234,7 +249,7 @@ void __ui_browser__show_title(struct ui_browser *browser, const char *title)
{
SLsmg_gotorc(0, 0);
ui_browser__set_color(browser, HE_COLORSET_ROOT);
- slsmg_write_nstring(title, browser->width + 1);
+ ui_browser__write_nstring(browser, title, browser->width + 1);
}
void ui_browser__show_title(struct ui_browser *browser, const char *title)
diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
index 92ae721..f3cef56 100644
--- a/tools/perf/ui/browser.h
+++ b/tools/perf/ui/browser.h
@@ -37,6 +37,9 @@ void ui_browser__refresh_dimensions(struct ui_browser *browser);
void ui_browser__reset_index(struct ui_browser *browser);
void ui_browser__gotorc(struct ui_browser *browser, int y, int x);
+void ui_browser__write_nstring(struct ui_browser *browser, const char *msg,
+ unsigned int width);
+void ui_browser__printf(struct ui_browser *browser, const char *fmt, ...);
void ui_browser__write_graph(struct ui_browser *browser, int graph);
void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
u64 start, u64 end);
@@ -58,8 +61,8 @@ int ui_browser__help_window(struct ui_browser *browser, const char *text);
bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text);
int ui_browser__input_window(const char *title, const char *text, char *input,
const char *exit_msg, int delay_sec);
-struct perf_session_env;
-int tui__header_window(struct perf_session_env *env);
+struct perf_env;
+int tui__header_window(struct perf_env *env);
void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence);
unsigned int ui_browser__argv_refresh(struct ui_browser *browser);
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index e5250eb..29739b3 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -1,7 +1,6 @@
#include "../../util/util.h"
#include "../browser.h"
#include "../helpline.h"
-#include "../libslang.h"
#include "../ui.h"
#include "../util.h"
#include "../../util/annotate.h"
@@ -11,16 +10,24 @@
#include "../../util/evsel.h"
#include <pthread.h>
+struct disasm_line_samples {
+ double percent;
+ u64 nr;
+};
+
+#define IPC_WIDTH 6
+#define CYCLES_WIDTH 6
+
struct browser_disasm_line {
- struct rb_node rb_node;
- u32 idx;
- int idx_asm;
- int jump_sources;
+ struct rb_node rb_node;
+ u32 idx;
+ int idx_asm;
+ int jump_sources;
/*
* actual length of this array is saved on the nr_events field
* of the struct annotate_browser
*/
- double percent[1];
+ struct disasm_line_samples samples[1];
};
static struct annotate_browser_opt {
@@ -28,7 +35,8 @@ static struct annotate_browser_opt {
use_offset,
jump_arrows,
show_linenr,
- show_nr_jumps;
+ show_nr_jumps,
+ show_total_period;
} annotate_browser__opts = {
.use_offset = true,
.jump_arrows = true,
@@ -47,6 +55,7 @@ struct annotate_browser {
int max_jump_sources;
int nr_jumps;
bool searching_backwards;
+ bool have_cycles;
u8 addr_width;
u8 jumps_width;
u8 target_width;
@@ -90,6 +99,15 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br
return ui_browser__set_color(&browser->b, color);
}
+static int annotate_browser__pcnt_width(struct annotate_browser *ab)
+{
+ int w = 7 * ab->nr_events;
+
+ if (ab->have_cycles)
+ w += IPC_WIDTH + CYCLES_WIDTH;
+ return w;
+}
+
static void annotate_browser__write(struct ui_browser *browser, void *entry, int row)
{
struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
@@ -100,24 +118,46 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
(!current_entry || (browser->use_navkeypressed &&
!browser->navkeypressed)));
int width = browser->width, printed;
- int i, pcnt_width = 7 * ab->nr_events;
+ int i, pcnt_width = annotate_browser__pcnt_width(ab);
double percent_max = 0.0;
char bf[256];
for (i = 0; i < ab->nr_events; i++) {
- if (bdl->percent[i] > percent_max)
- percent_max = bdl->percent[i];
+ if (bdl->samples[i].percent > percent_max)
+ percent_max = bdl->samples[i].percent;
}
if (dl->offset != -1 && percent_max != 0.0) {
- for (i = 0; i < ab->nr_events; i++) {
- ui_browser__set_percent_color(browser, bdl->percent[i],
- current_entry);
- slsmg_printf("%6.2f ", bdl->percent[i]);
+ if (percent_max != 0.0) {
+ for (i = 0; i < ab->nr_events; i++) {
+ ui_browser__set_percent_color(browser,
+ bdl->samples[i].percent,
+ current_entry);
+ if (annotate_browser__opts.show_total_period) {
+ ui_browser__printf(browser, "%6" PRIu64 " ",
+ bdl->samples[i].nr);
+ } else {
+ ui_browser__printf(browser, "%6.2f ",
+ bdl->samples[i].percent);
+ }
+ }
+ } else {
+ ui_browser__write_nstring(browser, " ", 7 * ab->nr_events);
}
} else {
ui_browser__set_percent_color(browser, 0, current_entry);
- slsmg_write_nstring(" ", pcnt_width);
+ ui_browser__write_nstring(browser, " ", 7 * ab->nr_events);
+ }
+ if (ab->have_cycles) {
+ if (dl->ipc)
+ ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->ipc);
+ else
+ ui_browser__write_nstring(browser, " ", IPC_WIDTH);
+ if (dl->cycles)
+ ui_browser__printf(browser, "%*" PRIu64 " ",
+ CYCLES_WIDTH - 1, dl->cycles);
+ else
+ ui_browser__write_nstring(browser, " ", CYCLES_WIDTH);
}
SLsmg_write_char(' ');
@@ -127,7 +167,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
width += 1;
if (!*dl->line)
- slsmg_write_nstring(" ", width - pcnt_width);
+ ui_browser__write_nstring(browser, " ", width - pcnt_width);
else if (dl->offset == -1) {
if (dl->line_nr && annotate_browser__opts.show_linenr)
printed = scnprintf(bf, sizeof(bf), "%-*d ",
@@ -135,8 +175,8 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
else
printed = scnprintf(bf, sizeof(bf), "%*s ",
ab->addr_width, " ");
- slsmg_write_nstring(bf, printed);
- slsmg_write_nstring(dl->line, width - printed - pcnt_width + 1);
+ ui_browser__write_nstring(browser, bf, printed);
+ ui_browser__write_nstring(browser, dl->line, width - printed - pcnt_width + 1);
} else {
u64 addr = dl->offset;
int color = -1;
@@ -155,7 +195,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
bdl->jump_sources);
prev = annotate_browser__set_jumps_percent_color(ab, bdl->jump_sources,
current_entry);
- slsmg_write_nstring(bf, printed);
+ ui_browser__write_nstring(browser, bf, printed);
ui_browser__set_color(browser, prev);
}
@@ -169,7 +209,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
if (change_color)
color = ui_browser__set_color(browser, HE_COLORSET_ADDR);
- slsmg_write_nstring(bf, printed);
+ ui_browser__write_nstring(browser, bf, printed);
if (change_color)
ui_browser__set_color(browser, color);
if (dl->ins && dl->ins->ops->scnprintf) {
@@ -183,11 +223,11 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
ui_browser__write_graph(browser, SLSMG_RARROW_CHAR);
SLsmg_write_char(' ');
} else {
- slsmg_write_nstring(" ", 2);
+ ui_browser__write_nstring(browser, " ", 2);
}
} else {
if (strcmp(dl->name, "retq")) {
- slsmg_write_nstring(" ", 2);
+ ui_browser__write_nstring(browser, " ", 2);
} else {
ui_browser__write_graph(browser, SLSMG_LARROW_CHAR);
SLsmg_write_char(' ');
@@ -195,7 +235,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
}
disasm_line__scnprintf(dl, bf, sizeof(bf), !annotate_browser__opts.use_offset);
- slsmg_write_nstring(bf, width - pcnt_width - 3 - printed);
+ ui_browser__write_nstring(browser, bf, width - pcnt_width - 3 - printed);
}
if (current_entry)
@@ -220,7 +260,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
unsigned int from, to;
struct map_symbol *ms = ab->b.priv;
struct symbol *sym = ms->sym;
- u8 pcnt_width = 7;
+ u8 pcnt_width = annotate_browser__pcnt_width(ab);
/* PLT symbols contain external offsets */
if (strstr(sym->name, "@plt"))
@@ -244,8 +284,6 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
to = (u64)btarget->idx;
}
- pcnt_width *= ab->nr_events;
-
ui_browser__set_color(browser, HE_COLORSET_CODE);
__ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
from, to);
@@ -255,9 +293,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser)
{
struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
int ret = ui_browser__list_head_refresh(browser);
- int pcnt_width;
-
- pcnt_width = 7 * ab->nr_events;
+ int pcnt_width = annotate_browser__pcnt_width(ab);
if (annotate_browser__opts.jump_arrows)
annotate_browser__draw_current_jump(browser);
@@ -273,9 +309,9 @@ static int disasm__cmp(struct browser_disasm_line *a,
int i;
for (i = 0; i < nr_pcnt; i++) {
- if (a->percent[i] == b->percent[i])
+ if (a->samples[i].percent == b->samples[i].percent)
continue;
- return a->percent[i] < b->percent[i];
+ return a->samples[i].percent < b->samples[i].percent;
}
return 0;
}
@@ -366,17 +402,20 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
next = disasm__get_next_ip_line(&notes->src->source, pos);
for (i = 0; i < browser->nr_events; i++) {
- bpos->percent[i] = disasm__calc_percent(notes,
+ u64 nr_samples;
+
+ bpos->samples[i].percent = disasm__calc_percent(notes,
evsel->idx + i,
pos->offset,
next ? next->offset : len,
- &path);
+ &path, &nr_samples);
+ bpos->samples[i].nr = nr_samples;
- if (max_percent < bpos->percent[i])
- max_percent = bpos->percent[i];
+ if (max_percent < bpos->samples[i].percent)
+ max_percent = bpos->samples[i].percent;
}
- if (max_percent < 0.01) {
+ if (max_percent < 0.01 && pos->ipc == 0) {
RB_CLEAR_NODE(&bpos->rb_node);
continue;
}
@@ -737,6 +776,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
"n Search next string\n"
"o Toggle disassembler output/simplified view\n"
"s Toggle source code view\n"
+ "t Toggle total period view\n"
"/ Search string\n"
"k Toggle line numbers\n"
"r Run available scripts\n"
@@ -812,6 +852,11 @@ show_sup_ins:
ui_helpline__puts("Actions are only available for 'callq', 'retq' & jump instructions.");
}
continue;
+ case 't':
+ annotate_browser__opts.show_total_period =
+ !annotate_browser__opts.show_total_period;
+ annotate_browser__update_addr_width(browser);
+ continue;
case K_LEFT:
case K_ESC:
case 'q':
@@ -832,15 +877,92 @@ out:
int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
struct hist_browser_timer *hbt)
{
+ /* Set default value for show_total_period. */
+ annotate_browser__opts.show_total_period =
+ symbol_conf.show_total_period;
+
return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt);
}
int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
struct hist_browser_timer *hbt)
{
+ /* reset abort key so that it can get Ctrl-C as a key */
+ SLang_reset_tty();
+ SLang_init_tty(0, 0, 0);
+
return map_symbol__tui_annotate(&he->ms, evsel, hbt);
}
+
+static unsigned count_insn(struct annotate_browser *browser, u64 start, u64 end)
+{
+ unsigned n_insn = 0;
+ u64 offset;
+
+ for (offset = start; offset <= end; offset++) {
+ if (browser->offsets[offset])
+ n_insn++;
+ }
+ return n_insn;
+}
+
+static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end,
+ struct cyc_hist *ch)
+{
+ unsigned n_insn;
+ u64 offset;
+
+ n_insn = count_insn(browser, start, end);
+ if (n_insn && ch->num && ch->cycles) {
+ float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
+
+ /* Hide data when there are too many overlaps. */
+ if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2)
+ return;
+
+ for (offset = start; offset <= end; offset++) {
+ struct disasm_line *dl = browser->offsets[offset];
+
+ if (dl)
+ dl->ipc = ipc;
+ }
+ }
+}
+
+/*
+ * This should probably be in util/annotate.c to share with the tty
+ * annotate, but right now we need the per byte offsets arrays,
+ * which are only here.
+ */
+static void annotate__compute_ipc(struct annotate_browser *browser, size_t size,
+ struct symbol *sym)
+{
+ u64 offset;
+ struct annotation *notes = symbol__annotation(sym);
+
+ if (!notes->src || !notes->src->cycles_hist)
+ return;
+
+ pthread_mutex_lock(&notes->lock);
+ for (offset = 0; offset < size; ++offset) {
+ struct cyc_hist *ch;
+
+ ch = &notes->src->cycles_hist[offset];
+ if (ch && ch->cycles) {
+ struct disasm_line *dl;
+
+ if (ch->have_start)
+ count_and_fill(browser, ch->start, offset, ch);
+ dl = browser->offsets[offset];
+ if (dl && ch->num_aggr)
+ dl->cycles = ch->cycles_aggr / ch->num_aggr;
+ browser->have_cycles = true;
+ }
+ }
+ pthread_mutex_unlock(&notes->lock);
+}
+
static void annotate_browser__mark_jump_targets(struct annotate_browser *browser,
size_t size)
{
@@ -925,7 +1047,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
if (perf_evsel__is_group_event(evsel)) {
nr_pcnt = evsel->nr_members;
- sizeof_bdl += sizeof(double) * (nr_pcnt - 1);
+ sizeof_bdl += sizeof(struct disasm_line_samples) *
+ (nr_pcnt - 1);
}
if (symbol__annotate(sym, map, sizeof_bdl) < 0) {
@@ -962,6 +1085,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
}
annotate_browser__mark_jump_targets(&browser, size);
+ annotate__compute_ipc(&browser, size, sym);
browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size);
browser.max_addr_width = hex_width(sym->end);
@@ -1002,6 +1126,7 @@ static struct annotate_config {
ANNOTATE_CFG(show_linenr),
ANNOTATE_CFG(show_nr_jumps),
ANNOTATE_CFG(use_offset),
+ ANNOTATE_CFG(show_total_period),
};
#undef ANNOTATE_CFG
diff --git a/tools/perf/ui/browsers/header.c b/tools/perf/ui/browsers/header.c
index e8278c5..edbeaaf 100644
--- a/tools/perf/ui/browsers/header.c
+++ b/tools/perf/ui/browsers/header.c
@@ -25,7 +25,7 @@ static void ui_browser__argv_write(struct ui_browser *browser,
ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
HE_COLORSET_NORMAL);
- slsmg_write_nstring(str, browser->width);
+ ui_browser__write_nstring(browser, str, browser->width);
}
static int list_menu__run(struct ui_browser *menu)
@@ -91,7 +91,7 @@ static int ui__list_menu(int argc, char * const argv[])
return list_menu__run(&menu);
}
-int tui__header_window(struct perf_session_env *env)
+int tui__header_window(struct perf_env *env)
{
int i, argc = 0;
char **argv;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 995b7a8..cf86f2d 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1,5 +1,4 @@
#include <stdio.h>
-#include "../libslang.h"
#include <stdlib.h>
#include <string.h>
#include <linux/rbtree.h>
@@ -25,6 +24,9 @@ struct hist_browser {
struct hists *hists;
struct hist_entry *he_selection;
struct map_symbol *selection;
+ struct hist_browser_timer *hbt;
+ struct pstack *pstack;
+ struct perf_env *env;
int print_seq;
bool show_dso;
bool show_headers;
@@ -45,7 +47,7 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd,
static bool hist_browser__has_filter(struct hist_browser *hb)
{
- return hists__has_filter(hb->hists) || hb->min_pcnt;
+ return hists__has_filter(hb->hists) || hb->min_pcnt || symbol_conf.has_filter;
}
static int hist_browser__get_folding(struct hist_browser *browser)
@@ -60,7 +62,7 @@ static int hist_browser__get_folding(struct hist_browser *browser)
struct hist_entry *he =
rb_entry(nd, struct hist_entry, rb_node);
- if (he->ms.unfolded)
+ if (he->unfolded)
unfolded_rows += he->nr_rows;
}
return unfolded_rows;
@@ -136,24 +138,19 @@ static char tree__folded_sign(bool unfolded)
return unfolded ? '-' : '+';
}
-static char map_symbol__folded(const struct map_symbol *ms)
-{
- return ms->has_children ? tree__folded_sign(ms->unfolded) : ' ';
-}
-
static char hist_entry__folded(const struct hist_entry *he)
{
- return map_symbol__folded(&he->ms);
+ return he->has_children ? tree__folded_sign(he->unfolded) : ' ';
}
static char callchain_list__folded(const struct callchain_list *cl)
{
- return map_symbol__folded(&cl->ms);
+ return cl->has_children ? tree__folded_sign(cl->unfolded) : ' ';
}
-static void map_symbol__set_folding(struct map_symbol *ms, bool unfold)
+static void callchain_list__set_folding(struct callchain_list *cl, bool unfold)
{
- ms->unfolded = unfold ? ms->has_children : false;
+ cl->unfolded = unfold ? cl->has_children : false;
}
static int callchain_node__count_rows_rb_tree(struct callchain_node *node)
@@ -189,7 +186,7 @@ static int callchain_node__count_rows(struct callchain_node *node)
list_for_each_entry(chain, &node->val, list) {
++n;
- unfolded = chain->ms.unfolded;
+ unfolded = chain->unfolded;
}
if (unfolded)
@@ -211,15 +208,27 @@ static int callchain__count_rows(struct rb_root *chain)
return n;
}
-static bool map_symbol__toggle_fold(struct map_symbol *ms)
+static bool hist_entry__toggle_fold(struct hist_entry *he)
+{
+ if (!he)
+ return false;
+
+ if (!he->has_children)
+ return false;
+
+ he->unfolded = !he->unfolded;
+ return true;
+}
+
+static bool callchain_list__toggle_fold(struct callchain_list *cl)
{
- if (!ms)
+ if (!cl)
return false;
- if (!ms->has_children)
+ if (!cl->has_children)
return false;
- ms->unfolded = !ms->unfolded;
+ cl->unfolded = !cl->unfolded;
return true;
}
@@ -235,10 +244,10 @@ static void callchain_node__init_have_children_rb_tree(struct callchain_node *no
list_for_each_entry(chain, &child->val, list) {
if (first) {
first = false;
- chain->ms.has_children = chain->list.next != &child->val ||
+ chain->has_children = chain->list.next != &child->val ||
!RB_EMPTY_ROOT(&child->rb_root);
} else
- chain->ms.has_children = chain->list.next == &child->val &&
+ chain->has_children = chain->list.next == &child->val &&
!RB_EMPTY_ROOT(&child->rb_root);
}
@@ -252,11 +261,11 @@ static void callchain_node__init_have_children(struct callchain_node *node,
struct callchain_list *chain;
chain = list_entry(node->val.next, struct callchain_list, list);
- chain->ms.has_children = has_sibling;
+ chain->has_children = has_sibling;
if (!list_empty(&node->val)) {
chain = list_entry(node->val.prev, struct callchain_list, list);
- chain->ms.has_children = !RB_EMPTY_ROOT(&node->rb_root);
+ chain->has_children = !RB_EMPTY_ROOT(&node->rb_root);
}
callchain_node__init_have_children_rb_tree(node);
@@ -276,7 +285,7 @@ static void callchain__init_have_children(struct rb_root *root)
static void hist_entry__init_have_children(struct hist_entry *he)
{
if (!he->init_have_children) {
- he->ms.has_children = !RB_EMPTY_ROOT(&he->sorted_chain);
+ he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain);
callchain__init_have_children(&he->sorted_chain);
he->init_have_children = true;
}
@@ -284,14 +293,22 @@ static void hist_entry__init_have_children(struct hist_entry *he)
static bool hist_browser__toggle_fold(struct hist_browser *browser)
{
- if (map_symbol__toggle_fold(browser->selection)) {
- struct hist_entry *he = browser->he_selection;
+ struct hist_entry *he = browser->he_selection;
+ struct map_symbol *ms = browser->selection;
+ struct callchain_list *cl = container_of(ms, struct callchain_list, ms);
+ bool has_children;
+ if (ms == &he->ms)
+ has_children = hist_entry__toggle_fold(he);
+ else
+ has_children = callchain_list__toggle_fold(cl);
+
+ if (has_children) {
hist_entry__init_have_children(he);
browser->b.nr_entries -= he->nr_rows;
browser->nr_callchain_rows -= he->nr_rows;
- if (he->ms.unfolded)
+ if (he->unfolded)
he->nr_rows = callchain__count_rows(&he->sorted_chain);
else
he->nr_rows = 0;
@@ -318,8 +335,8 @@ static int callchain_node__set_folding_rb_tree(struct callchain_node *node, bool
list_for_each_entry(chain, &child->val, list) {
++n;
- map_symbol__set_folding(&chain->ms, unfold);
- has_children = chain->ms.has_children;
+ callchain_list__set_folding(chain, unfold);
+ has_children = chain->has_children;
}
if (has_children)
@@ -337,8 +354,8 @@ static int callchain_node__set_folding(struct callchain_node *node, bool unfold)
list_for_each_entry(chain, &node->val, list) {
++n;
- map_symbol__set_folding(&chain->ms, unfold);
- has_children = chain->ms.has_children;
+ callchain_list__set_folding(chain, unfold);
+ has_children = chain->has_children;
}
if (has_children)
@@ -363,9 +380,9 @@ static int callchain__set_folding(struct rb_root *chain, bool unfold)
static void hist_entry__set_folding(struct hist_entry *he, bool unfold)
{
hist_entry__init_have_children(he);
- map_symbol__set_folding(&he->ms, unfold);
+ he->unfolded = unfold ? he->has_children : false;
- if (he->ms.has_children) {
+ if (he->has_children) {
int n = callchain__set_folding(&he->sorted_chain, unfold);
he->nr_rows = unfold ? n : 0;
} else
@@ -406,11 +423,11 @@ static void ui_browser__warn_lost_events(struct ui_browser *browser)
"Or reduce the sampling frequency.");
}
-static int hist_browser__run(struct hist_browser *browser,
- struct hist_browser_timer *hbt)
+static int hist_browser__run(struct hist_browser *browser, const char *help)
{
int key;
char title[160];
+ struct hist_browser_timer *hbt = browser->hbt;
int delay_secs = hbt ? hbt->refresh : 0;
browser->b.entries = &browser->hists->entries;
@@ -418,8 +435,7 @@ static int hist_browser__run(struct hist_browser *browser,
hists__browser_title(browser->hists, hbt, title, sizeof(title));
- if (ui_browser__show(&browser->b, title,
- "Press '?' for help on key bindings") < 0)
+ if (ui_browser__show(&browser->b, title, help) < 0)
return -1;
while (1) {
@@ -523,10 +539,10 @@ static void hist_browser__show_callchain_entry(struct hist_browser *browser,
ui_browser__set_color(&browser->b, color);
hist_browser__gotorc(browser, row, 0);
- slsmg_write_nstring(" ", offset);
- slsmg_printf("%c", folded_sign);
+ ui_browser__write_nstring(&browser->b, " ", offset);
+ ui_browser__printf(&browser->b, "%c", folded_sign);
ui_browser__write_graph(&browser->b, show_annotated ? SLSMG_RARROW_CHAR : ' ');
- slsmg_write_nstring(str, width);
+ ui_browser__write_nstring(&browser->b, str, width);
}
static void hist_browser__fprintf_callchain_entry(struct hist_browser *b __maybe_unused,
@@ -663,7 +679,7 @@ static int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...)
ui_browser__set_percent_color(arg->b, percent, arg->current_entry);
ret = scnprintf(hpp->buf, hpp->size, fmt, len, percent);
- slsmg_printf("%s", hpp->buf);
+ ui_browser__printf(arg->b, "%s", hpp->buf);
advance_hpp(hpp, ret);
return ret;
@@ -696,10 +712,11 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt, \
struct hist_entry *he) \
{ \
if (!symbol_conf.cumulate_callchain) { \
+ struct hpp_arg *arg = hpp->ptr; \
int len = fmt->user_len ?: fmt->len; \
int ret = scnprintf(hpp->buf, hpp->size, \
"%*s", len, "N/A"); \
- slsmg_printf("%s", hpp->buf); \
+ ui_browser__printf(arg->b, "%s", hpp->buf); \
\
return ret; \
} \
@@ -784,12 +801,12 @@ static int hist_browser__show_entry(struct hist_browser *browser,
if (first) {
if (symbol_conf.use_callchain) {
- slsmg_printf("%c ", folded_sign);
+ ui_browser__printf(&browser->b, "%c ", folded_sign);
width -= 2;
}
first = false;
} else {
- slsmg_printf(" ");
+ ui_browser__printf(&browser->b, " ");
width -= 2;
}
@@ -797,7 +814,7 @@ static int hist_browser__show_entry(struct hist_browser *browser,
width -= fmt->color(fmt, &hpp, entry);
} else {
width -= fmt->entry(fmt, &hpp, entry);
- slsmg_printf("%s", s);
+ ui_browser__printf(&browser->b, "%s", s);
}
}
@@ -805,7 +822,7 @@ static int hist_browser__show_entry(struct hist_browser *browser,
if (!browser->b.navkeypressed)
width += 1;
- slsmg_write_nstring("", width);
+ ui_browser__write_nstring(&browser->b, "", width);
++row;
++printed;
@@ -882,7 +899,7 @@ static void hist_browser__show_headers(struct hist_browser *browser)
hists__scnprintf_headers(headers, sizeof(headers), browser->hists);
ui_browser__gotorc(&browser->b, 0, 0);
ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
- slsmg_write_nstring(headers, browser->b.width + 1);
+ ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
}
static void ui_browser__hists_init_top(struct ui_browser *browser)
@@ -1016,7 +1033,7 @@ do_offset:
if (offset > 0) {
do {
h = rb_entry(nd, struct hist_entry, rb_node);
- if (h->ms.unfolded) {
+ if (h->unfolded) {
u16 remaining = h->nr_rows - h->row_offset;
if (offset > remaining) {
offset -= remaining;
@@ -1037,7 +1054,7 @@ do_offset:
} else if (offset < 0) {
while (1) {
h = rb_entry(nd, struct hist_entry, rb_node);
- if (h->ms.unfolded) {
+ if (h->unfolded) {
if (first) {
if (-offset > h->row_offset) {
offset += h->row_offset;
@@ -1074,7 +1091,7 @@ do_offset:
* row_offset at its last entry.
*/
h = rb_entry(nd, struct hist_entry, rb_node);
- if (h->ms.unfolded)
+ if (h->unfolded)
h->row_offset = h->nr_rows;
break;
}
@@ -1195,7 +1212,9 @@ static int hist_browser__dump(struct hist_browser *browser)
return 0;
}
-static struct hist_browser *hist_browser__new(struct hists *hists)
+static struct hist_browser *hist_browser__new(struct hists *hists,
+ struct hist_browser_timer *hbt,
+ struct perf_env *env)
{
struct hist_browser *browser = zalloc(sizeof(*browser));
@@ -1206,6 +1225,8 @@ static struct hist_browser *hist_browser__new(struct hists *hists)
browser->b.seek = ui_browser__hists_seek;
browser->b.use_navkeypressed = true;
browser->show_headers = symbol_conf.show_hist_headers;
+ browser->hbt = hbt;
+ browser->env = env;
}
return browser;
@@ -1246,6 +1267,8 @@ static int hists__browser_title(struct hists *hists,
const char *ev_name = perf_evsel__name(evsel);
char buf[512];
size_t buflen = sizeof(buf);
+ char ref[30] = " show reference callgraph, ";
+ bool enable_ref = false;
if (symbol_conf.filter_relative) {
nr_samples = hists->stats.nr_non_filtered_samples;
@@ -1271,10 +1294,13 @@ static int hists__browser_title(struct hists *hists,
}
}
+ if (symbol_conf.show_ref_callgraph &&
+ strstr(ev_name, "call-graph=no"))
+ enable_ref = true;
nr_samples = convert_unit(nr_samples, &unit);
printed = scnprintf(bf, size,
- "Samples: %lu%c of event '%s', Event count (approx.): %" PRIu64,
- nr_samples, unit, ev_name, nr_events);
+ "Samples: %lu%c of event '%s',%sEvent count (approx.): %" PRIu64,
+ nr_samples, unit, ev_name, enable_ref ? ref : " ", nr_events);
if (hists->uid_filter_str)
@@ -1395,6 +1421,257 @@ close_file_and_continue:
return ret;
}
+struct popup_action {
+ struct thread *thread;
+ struct dso *dso;
+ struct map_symbol ms;
+
+ int (*fn)(struct hist_browser *browser, struct popup_action *act);
+};
+
+static int
+do_annotate(struct hist_browser *browser, struct popup_action *act)
+{
+ struct perf_evsel *evsel;
+ struct annotation *notes;
+ struct hist_entry *he;
+ int err;
+
+ if (!objdump_path && perf_session_env__lookup_objdump(browser->env))
+ return 0;
+
+ notes = symbol__annotation(act->ms.sym);
+ if (!notes->src)
+ return 0;
+
+ evsel = hists_to_evsel(browser->hists);
+ err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt);
+ he = hist_browser__selected_entry(browser);
+ /*
+ * offer option to annotate the other branch source or target
+ * (if they exists) when returning from annotate
+ */
+ if ((err == 'q' || err == CTRL('c')) && he->branch_info)
+ return 1;
+
+ ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
+ if (err)
+ ui_browser__handle_resize(&browser->b);
+ return 0;
+}
+
+static int
+add_annotate_opt(struct hist_browser *browser __maybe_unused,
+ struct popup_action *act, char **optstr,
+ struct map *map, struct symbol *sym)
+{
+ if (sym == NULL || map->dso->annotate_warned)
+ return 0;
+
+ if (asprintf(optstr, "Annotate %s", sym->name) < 0)
+ return 0;
+
+ act->ms.map = map;
+ act->ms.sym = sym;
+ act->fn = do_annotate;
+ return 1;
+}
+
+static int
+do_zoom_thread(struct hist_browser *browser, struct popup_action *act)
+{
+ struct thread *thread = act->thread;
+
+ if (browser->hists->thread_filter) {
+ pstack__remove(browser->pstack, &browser->hists->thread_filter);
+ perf_hpp__set_elide(HISTC_THREAD, false);
+ thread__zput(browser->hists->thread_filter);
+ ui_helpline__pop();
+ } else {
+ ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
+ thread->comm_set ? thread__comm_str(thread) : "",
+ thread->tid);
+ browser->hists->thread_filter = thread__get(thread);
+ perf_hpp__set_elide(HISTC_THREAD, false);
+ pstack__push(browser->pstack, &browser->hists->thread_filter);
+ }
+
+ hists__filter_by_thread(browser->hists);
+ hist_browser__reset(browser);
+ return 0;
+}
+
+static int
+add_thread_opt(struct hist_browser *browser, struct popup_action *act,
+ char **optstr, struct thread *thread)
+{
+ if (thread == NULL)
+ return 0;
+
+ if (asprintf(optstr, "Zoom %s %s(%d) thread",
+ browser->hists->thread_filter ? "out of" : "into",
+ thread->comm_set ? thread__comm_str(thread) : "",
+ thread->tid) < 0)
+ return 0;
+
+ act->thread = thread;
+ act->fn = do_zoom_thread;
+ return 1;
+}
+
+static int
+do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
+{
+ struct dso *dso = act->dso;
+
+ if (browser->hists->dso_filter) {
+ pstack__remove(browser->pstack, &browser->hists->dso_filter);
+ perf_hpp__set_elide(HISTC_DSO, false);
+ browser->hists->dso_filter = NULL;
+ ui_helpline__pop();
+ } else {
+ if (dso == NULL)
+ return 0;
+ ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"",
+ dso->kernel ? "the Kernel" : dso->short_name);
+ browser->hists->dso_filter = dso;
+ perf_hpp__set_elide(HISTC_DSO, true);
+ pstack__push(browser->pstack, &browser->hists->dso_filter);
+ }
+
+ hists__filter_by_dso(browser->hists);
+ hist_browser__reset(browser);
+ return 0;
+}
+
+static int
+add_dso_opt(struct hist_browser *browser, struct popup_action *act,
+ char **optstr, struct dso *dso)
+{
+ if (dso == NULL)
+ return 0;
+
+ if (asprintf(optstr, "Zoom %s %s DSO",
+ browser->hists->dso_filter ? "out of" : "into",
+ dso->kernel ? "the Kernel" : dso->short_name) < 0)
+ return 0;
+
+ act->dso = dso;
+ act->fn = do_zoom_dso;
+ return 1;
+}
+
+static int
+do_browse_map(struct hist_browser *browser __maybe_unused,
+ struct popup_action *act)
+{
+ map__browse(act->ms.map);
+ return 0;
+}
+
+static int
+add_map_opt(struct hist_browser *browser __maybe_unused,
+ struct popup_action *act, char **optstr, struct map *map)
+{
+ if (map == NULL)
+ return 0;
+
+ if (asprintf(optstr, "Browse map details") < 0)
+ return 0;
+
+ act->ms.map = map;
+ act->fn = do_browse_map;
+ return 1;
+}
+
+static int
+do_run_script(struct hist_browser *browser __maybe_unused,
+ struct popup_action *act)
+{
+ char script_opt[64];
+ memset(script_opt, 0, sizeof(script_opt));
+
+ if (act->thread) {
+ scnprintf(script_opt, sizeof(script_opt), " -c %s ",
+ thread__comm_str(act->thread));
+ } else if (act->ms.sym) {
+ scnprintf(script_opt, sizeof(script_opt), " -S %s ",
+ act->ms.sym->name);
+ }
+
+ script_browse(script_opt);
+ return 0;
+}
+
+static int
+add_script_opt(struct hist_browser *browser __maybe_unused,
+ struct popup_action *act, char **optstr,
+ struct thread *thread, struct symbol *sym)
+{
+ if (thread) {
+ if (asprintf(optstr, "Run scripts for samples of thread [%s]",
+ thread__comm_str(thread)) < 0)
+ return 0;
+ } else if (sym) {
+ if (asprintf(optstr, "Run scripts for samples of symbol [%s]",
+ sym->name) < 0)
+ return 0;
+ } else {
+ if (asprintf(optstr, "Run scripts for all samples") < 0)
+ return 0;
+ }
+
+ act->thread = thread;
+ act->ms.sym = sym;
+ act->fn = do_run_script;
+ return 1;
+}
+
+static int
+do_switch_data(struct hist_browser *browser __maybe_unused,
+ struct popup_action *act __maybe_unused)
+{
+ if (switch_data_file()) {
+ ui__warning("Won't switch the data files due to\n"
+ "no valid data file get selected!\n");
+ return 0;
+ }
+
+ return K_SWITCH_INPUT_DATA;
+}
+
+static int
+add_switch_opt(struct hist_browser *browser,
+ struct popup_action *act, char **optstr)
+{
+ if (!is_report_browser(browser->hbt))
+ return 0;
+
+ if (asprintf(optstr, "Switch to another data file in PWD") < 0)
+ return 0;
+
+ act->fn = do_switch_data;
+ return 1;
+}
+
+static int
+do_exit_browser(struct hist_browser *browser __maybe_unused,
+ struct popup_action *act __maybe_unused)
+{
+ return 0;
+}
+
+static int
+add_exit_opt(struct hist_browser *browser __maybe_unused,
+ struct popup_action *act, char **optstr)
+{
+ if (asprintf(optstr, "Exit") < 0)
+ return 0;
+
+ act->fn = do_exit_browser;
+ return 1;
+}
+
static void hist_browser__update_nr_entries(struct hist_browser *hb)
{
u64 nr_entries = 0;
@@ -1418,17 +1695,17 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
bool left_exits,
struct hist_browser_timer *hbt,
float min_pcnt,
- struct perf_session_env *env)
+ struct perf_env *env)
{
struct hists *hists = evsel__hists(evsel);
- struct hist_browser *browser = hist_browser__new(hists);
+ struct hist_browser *browser = hist_browser__new(hists, hbt, env);
struct branch_info *bi;
- struct pstack *fstack;
- char *options[16];
+#define MAX_OPTIONS 16
+ char *options[MAX_OPTIONS];
+ struct popup_action actions[MAX_OPTIONS];
int nr_options = 0;
int key = -1;
char buf[64];
- char script_opt[64];
int delay_secs = hbt ? hbt->refresh : 0;
struct perf_hpp_fmt *fmt;
@@ -1463,23 +1740,29 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
"t Zoom into current Thread\n"
"V Verbose (DSO names in callchains, etc)\n"
"z Toggle zeroing of samples\n"
+ "f Enable/Disable events\n"
"/ Filter symbol by name";
if (browser == NULL)
return -1;
+ /* reset abort key so that it can get Ctrl-C as a key */
+ SLang_reset_tty();
+ SLang_init_tty(0, 0, 0);
+
if (min_pcnt) {
browser->min_pcnt = min_pcnt;
hist_browser__update_nr_entries(browser);
}
- fstack = pstack__new(2);
- if (fstack == NULL)
+ browser->pstack = pstack__new(2);
+ if (browser->pstack == NULL)
goto out;
ui_helpline__push(helpline);
memset(options, 0, sizeof(options));
+ memset(actions, 0, sizeof(actions));
perf_hpp__for_each_format(fmt)
perf_hpp__reset_width(fmt, hists);
@@ -1489,16 +1772,12 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
while (1) {
struct thread *thread = NULL;
- const struct dso *dso = NULL;
- int choice = 0,
- annotate = -2, zoom_dso = -2, zoom_thread = -2,
- annotate_f = -2, annotate_t = -2, browse_map = -2;
- int scripts_comm = -2, scripts_symbol = -2,
- scripts_all = -2, switch_data = -2;
+ struct dso *dso = NULL;
+ int choice = 0;
nr_options = 0;
- key = hist_browser__run(browser, hbt);
+ key = hist_browser__run(browser, helpline);
if (browser->he_selection != NULL) {
thread = hist_browser__selected_thread(browser);
@@ -1526,17 +1805,25 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
browser->selection->sym == NULL ||
browser->selection->map->dso->annotate_warned)
continue;
- goto do_annotate;
+
+ actions->ms.map = browser->selection->map;
+ actions->ms.sym = browser->selection->sym;
+ do_annotate(browser, actions);
+ continue;
case 'P':
hist_browser__dump(browser);
continue;
case 'd':
- goto zoom_dso;
+ actions->dso = dso;
+ do_zoom_dso(browser, actions);
+ continue;
case 'V':
browser->show_dso = !browser->show_dso;
continue;
case 't':
- goto zoom_thread;
+ actions->thread = thread;
+ do_zoom_thread(browser, actions);
+ continue;
case '/':
if (ui_browser__input_window("Symbol to show",
"Please enter the name of symbol you want to see",
@@ -1548,12 +1835,18 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
}
continue;
case 'r':
- if (is_report_browser(hbt))
- goto do_scripts;
+ if (is_report_browser(hbt)) {
+ actions->thread = NULL;
+ actions->ms.sym = NULL;
+ do_run_script(browser, actions);
+ }
continue;
case 's':
- if (is_report_browser(hbt))
- goto do_data_switch;
+ if (is_report_browser(hbt)) {
+ key = do_switch_data(browser, actions);
+ if (key == K_SWITCH_INPUT_DATA)
+ goto out_free_stack;
+ }
continue;
case 'i':
/* env->arch is NULL for live-mode (i.e. perf top) */
@@ -1580,34 +1873,61 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
case K_RIGHT:
/* menu */
break;
+ case K_ESC:
case K_LEFT: {
const void *top;
- if (pstack__empty(fstack)) {
+ if (pstack__empty(browser->pstack)) {
/*
* Go back to the perf_evsel_menu__run or other user
*/
if (left_exits)
goto out_free_stack;
+
+ if (key == K_ESC &&
+ ui_browser__dialog_yesno(&browser->b,
+ "Do you really want to exit?"))
+ goto out_free_stack;
+
continue;
}
- top = pstack__pop(fstack);
- if (top == &browser->hists->dso_filter)
- goto zoom_out_dso;
+ top = pstack__peek(browser->pstack);
+ if (top == &browser->hists->dso_filter) {
+ /*
+ * No need to set actions->dso here since
+ * it's just to remove the current filter.
+ * Ditto for thread below.
+ */
+ do_zoom_dso(browser, actions);
+ }
if (top == &browser->hists->thread_filter)
- goto zoom_out_thread;
+ do_zoom_thread(browser, actions);
continue;
}
- case K_ESC:
- if (!left_exits &&
- !ui_browser__dialog_yesno(&browser->b,
- "Do you really want to exit?"))
- continue;
- /* Fall thru */
case 'q':
case CTRL('c'):
goto out_free_stack;
+ case 'f':
+ if (!is_report_browser(hbt)) {
+ struct perf_top *top = hbt->arg;
+
+ perf_evlist__toggle_enable(top->evlist);
+ /*
+ * No need to refresh, resort/decay histogram
+ * entries if we are not collecting samples:
+ */
+ if (top->evlist->enabled) {
+ helpline = "Press 'f' to disable the events or 'h' to see other hotkeys";
+ hbt->refresh = delay_secs;
+ } else {
+ helpline = "Press 'f' again to re-enable the events";
+ hbt->refresh = 0;
+ }
+ continue;
+ }
+ /* Fall thru */
default:
+ helpline = "Press '?' for help on key bindings";
continue;
}
@@ -1623,196 +1943,71 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
if (bi == NULL)
goto skip_annotation;
- if (bi->from.sym != NULL &&
- !bi->from.map->dso->annotate_warned &&
- asprintf(&options[nr_options], "Annotate %s", bi->from.sym->name) > 0) {
- annotate_f = nr_options++;
- }
-
- if (bi->to.sym != NULL &&
- !bi->to.map->dso->annotate_warned &&
- (bi->to.sym != bi->from.sym ||
- bi->to.map->dso != bi->from.map->dso) &&
- asprintf(&options[nr_options], "Annotate %s", bi->to.sym->name) > 0) {
- annotate_t = nr_options++;
- }
+ nr_options += add_annotate_opt(browser,
+ &actions[nr_options],
+ &options[nr_options],
+ bi->from.map,
+ bi->from.sym);
+ if (bi->to.sym != bi->from.sym)
+ nr_options += add_annotate_opt(browser,
+ &actions[nr_options],
+ &options[nr_options],
+ bi->to.map,
+ bi->to.sym);
} else {
- if (browser->selection->sym != NULL &&
- !browser->selection->map->dso->annotate_warned) {
- struct annotation *notes;
-
- notes = symbol__annotation(browser->selection->sym);
-
- if (notes->src &&
- asprintf(&options[nr_options], "Annotate %s",
- browser->selection->sym->name) > 0) {
- annotate = nr_options++;
- }
- }
+ nr_options += add_annotate_opt(browser,
+ &actions[nr_options],
+ &options[nr_options],
+ browser->selection->map,
+ browser->selection->sym);
}
skip_annotation:
- if (thread != NULL &&
- asprintf(&options[nr_options], "Zoom %s %s(%d) thread",
- (browser->hists->thread_filter ? "out of" : "into"),
- (thread->comm_set ? thread__comm_str(thread) : ""),
- thread->tid) > 0)
- zoom_thread = nr_options++;
-
- if (dso != NULL &&
- asprintf(&options[nr_options], "Zoom %s %s DSO",
- (browser->hists->dso_filter ? "out of" : "into"),
- (dso->kernel ? "the Kernel" : dso->short_name)) > 0)
- zoom_dso = nr_options++;
-
- if (browser->selection != NULL &&
- browser->selection->map != NULL &&
- asprintf(&options[nr_options], "Browse map details") > 0)
- browse_map = nr_options++;
+ nr_options += add_thread_opt(browser, &actions[nr_options],
+ &options[nr_options], thread);
+ nr_options += add_dso_opt(browser, &actions[nr_options],
+ &options[nr_options], dso);
+ nr_options += add_map_opt(browser, &actions[nr_options],
+ &options[nr_options],
+ browser->selection->map);
/* perf script support */
if (browser->he_selection) {
- struct symbol *sym;
-
- if (asprintf(&options[nr_options], "Run scripts for samples of thread [%s]",
- thread__comm_str(browser->he_selection->thread)) > 0)
- scripts_comm = nr_options++;
-
- sym = browser->he_selection->ms.sym;
- if (sym && sym->namelen &&
- asprintf(&options[nr_options], "Run scripts for samples of symbol [%s]",
- sym->name) > 0)
- scripts_symbol = nr_options++;
+ nr_options += add_script_opt(browser,
+ &actions[nr_options],
+ &options[nr_options],
+ thread, NULL);
+ nr_options += add_script_opt(browser,
+ &actions[nr_options],
+ &options[nr_options],
+ NULL, browser->selection->sym);
}
-
- if (asprintf(&options[nr_options], "Run scripts for all samples") > 0)
- scripts_all = nr_options++;
-
- if (is_report_browser(hbt) && asprintf(&options[nr_options],
- "Switch to another data file in PWD") > 0)
- switch_data = nr_options++;
+ nr_options += add_script_opt(browser, &actions[nr_options],
+ &options[nr_options], NULL, NULL);
+ nr_options += add_switch_opt(browser, &actions[nr_options],
+ &options[nr_options]);
add_exit_option:
- options[nr_options++] = (char *)"Exit";
-retry_popup_menu:
- choice = ui__popup_menu(nr_options, options);
-
- if (choice == nr_options - 1)
- break;
-
- if (choice == -1) {
- free_popup_options(options, nr_options - 1);
- continue;
- }
-
- if (choice == annotate || choice == annotate_t || choice == annotate_f) {
- struct hist_entry *he;
- struct annotation *notes;
- struct map_symbol ms;
- int err;
-do_annotate:
- if (!objdump_path && perf_session_env__lookup_objdump(env))
- continue;
-
- he = hist_browser__selected_entry(browser);
- if (he == NULL)
- continue;
+ nr_options += add_exit_opt(browser, &actions[nr_options],
+ &options[nr_options]);
- if (choice == annotate_f) {
- ms.map = he->branch_info->from.map;
- ms.sym = he->branch_info->from.sym;
- } else if (choice == annotate_t) {
- ms.map = he->branch_info->to.map;
- ms.sym = he->branch_info->to.sym;
- } else {
- ms = *browser->selection;
- }
-
- notes = symbol__annotation(ms.sym);
- if (!notes->src)
- continue;
-
- err = map_symbol__tui_annotate(&ms, evsel, hbt);
- /*
- * offer option to annotate the other branch source or target
- * (if they exists) when returning from annotate
- */
- if ((err == 'q' || err == CTRL('c'))
- && annotate_t != -2 && annotate_f != -2)
- goto retry_popup_menu;
-
- ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
- if (err)
- ui_browser__handle_resize(&browser->b);
-
- } else if (choice == browse_map)
- map__browse(browser->selection->map);
- else if (choice == zoom_dso) {
-zoom_dso:
- if (browser->hists->dso_filter) {
- pstack__remove(fstack, &browser->hists->dso_filter);
-zoom_out_dso:
- ui_helpline__pop();
- browser->hists->dso_filter = NULL;
- perf_hpp__set_elide(HISTC_DSO, false);
- } else {
- if (dso == NULL)
- continue;
- ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"",
- dso->kernel ? "the Kernel" : dso->short_name);
- browser->hists->dso_filter = dso;
- perf_hpp__set_elide(HISTC_DSO, true);
- pstack__push(fstack, &browser->hists->dso_filter);
- }
- hists__filter_by_dso(hists);
- hist_browser__reset(browser);
- } else if (choice == zoom_thread) {
-zoom_thread:
- if (browser->hists->thread_filter) {
- pstack__remove(fstack, &browser->hists->thread_filter);
-zoom_out_thread:
- ui_helpline__pop();
- thread__zput(browser->hists->thread_filter);
- perf_hpp__set_elide(HISTC_THREAD, false);
- } else {
- ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
- thread->comm_set ? thread__comm_str(thread) : "",
- thread->tid);
- browser->hists->thread_filter = thread__get(thread);
- perf_hpp__set_elide(HISTC_THREAD, false);
- pstack__push(fstack, &browser->hists->thread_filter);
- }
- hists__filter_by_thread(hists);
- hist_browser__reset(browser);
- }
- /* perf scripts support */
- else if (choice == scripts_all || choice == scripts_comm ||
- choice == scripts_symbol) {
-do_scripts:
- memset(script_opt, 0, 64);
+ do {
+ struct popup_action *act;
- if (choice == scripts_comm)
- sprintf(script_opt, " -c %s ", thread__comm_str(browser->he_selection->thread));
+ choice = ui__popup_menu(nr_options, options);
+ if (choice == -1 || choice >= nr_options)
+ break;
- if (choice == scripts_symbol)
- sprintf(script_opt, " -S %s ", browser->he_selection->ms.sym->name);
+ act = &actions[choice];
+ key = act->fn(browser, act);
+ } while (key == 1);
- script_browse(script_opt);
- }
- /* Switch to another data file */
- else if (choice == switch_data) {
-do_data_switch:
- if (!switch_data_file()) {
- key = K_SWITCH_INPUT_DATA;
- break;
- } else
- ui__warning("Won't switch the data files due to\n"
- "no valid data file get selected!\n");
- }
+ if (key == K_SWITCH_INPUT_DATA)
+ break;
}
out_free_stack:
- pstack__delete(fstack);
+ pstack__delete(browser->pstack);
out:
hist_browser__delete(browser);
- free_popup_options(options, nr_options - 1);
+ free_popup_options(options, MAX_OPTIONS);
return key;
}
@@ -1821,7 +2016,7 @@ struct perf_evsel_menu {
struct perf_evsel *selection;
bool lost_events, lost_events_warned;
float min_pcnt;
- struct perf_session_env *env;
+ struct perf_env *env;
};
static void perf_evsel_menu__write(struct ui_browser *browser,
@@ -1855,7 +2050,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
nr_events = convert_unit(nr_events, &unit);
printed = scnprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events,
unit, unit == ' ' ? "" : " ", ev_name);
- slsmg_printf("%s", bf);
+ ui_browser__printf(browser, "%s", bf);
nr_events = hists->stats.nr_events[PERF_RECORD_LOST];
if (nr_events != 0) {
@@ -1868,7 +2063,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
warn = bf;
}
- slsmg_write_nstring(warn, browser->width - printed);
+ ui_browser__write_nstring(browser, warn, browser->width - printed);
if (current_entry)
menu->selection = evsel;
@@ -1931,15 +2126,11 @@ browse_hists:
else
pos = perf_evsel__prev(pos);
goto browse_hists;
- case K_ESC:
- if (!ui_browser__dialog_yesno(&menu->b,
- "Do you really want to exit?"))
- continue;
- /* Fall thru */
case K_SWITCH_INPUT_DATA:
case 'q':
case CTRL('c'):
goto out;
+ case K_ESC:
default:
continue;
}
@@ -1978,7 +2169,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
int nr_entries, const char *help,
struct hist_browser_timer *hbt,
float min_pcnt,
- struct perf_session_env *env)
+ struct perf_env *env)
{
struct perf_evsel *pos;
struct perf_evsel_menu menu = {
@@ -2011,7 +2202,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
struct hist_browser_timer *hbt,
float min_pcnt,
- struct perf_session_env *env)
+ struct perf_env *env)
{
int nr_entries = evlist->nr_entries;
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index b11639f..8c154c7 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -1,4 +1,3 @@
-#include "../libslang.h"
#include <elf.h>
#include <inttypes.h>
#include <sys/ttydefaults.h>
@@ -26,13 +25,13 @@ static void map_browser__write(struct ui_browser *browser, void *nd, int row)
int width;
ui_browser__set_percent_color(browser, 0, current_entry);
- slsmg_printf("%*" PRIx64 " %*" PRIx64 " %c ",
- mb->addrlen, sym->start, mb->addrlen, sym->end,
- sym->binding == STB_GLOBAL ? 'g' :
- sym->binding == STB_LOCAL ? 'l' : 'w');
+ ui_browser__printf(browser, "%*" PRIx64 " %*" PRIx64 " %c ",
+ mb->addrlen, sym->start, mb->addrlen, sym->end,
+ sym->binding == STB_GLOBAL ? 'g' :
+ sym->binding == STB_LOCAL ? 'l' : 'w');
width = browser->width - ((mb->addrlen * 2) + 4);
if (width > 0)
- slsmg_write_nstring(sym->name, width);
+ ui_browser__write_nstring(browser, sym->name, width);
}
/* FIXME uber-kludgy, see comment on cmd_report... */
diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c
index 402d2bd..e13b48d 100644
--- a/tools/perf/ui/browsers/scripts.c
+++ b/tools/perf/ui/browsers/scripts.c
@@ -81,7 +81,7 @@ static void script_browser__write(struct ui_browser *browser,
ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
HE_COLORSET_NORMAL);
- slsmg_write_nstring(sline->line, browser->width);
+ ui_browser__write_nstring(browser, sline->line, browser->width);
}
static int script_browser__run(struct perf_script_browser *browser)
diff --git a/tools/perf/ui/libslang.h b/tools/perf/ui/libslang.h
index 4d54b64..db81669 100644
--- a/tools/perf/ui/libslang.h
+++ b/tools/perf/ui/libslang.h
@@ -14,12 +14,15 @@
#if SLANG_VERSION < 20104
#define slsmg_printf(msg, args...) \
SLsmg_printf((char *)(msg), ##args)
+#define slsmg_vprintf(msg, vargs) \
+ SLsmg_vprintf((char *)(msg), vargs)
#define slsmg_write_nstring(msg, len) \
SLsmg_write_nstring((char *)(msg), len)
#define sltt_set_color(obj, name, fg, bg) \
SLtt_set_color(obj,(char *)(name), (char *)(fg), (char *)(bg))
#else
#define slsmg_printf SLsmg_printf
+#define slsmg_vprintf SLsmg_vprintf
#define slsmg_write_nstring SLsmg_write_nstring
#define sltt_set_color SLtt_set_color
#endif
diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c
index c61d14b..c4b9900 100644
--- a/tools/perf/ui/tui/progress.c
+++ b/tools/perf/ui/tui/progress.c
@@ -33,9 +33,26 @@ static void tui_progress__update(struct ui_progress *p)
pthread_mutex_unlock(&ui__lock);
}
+static void tui_progress__finish(void)
+{
+ int y;
+
+ if (use_browser <= 0)
+ return;
+
+ ui__refresh_dimensions(false);
+ pthread_mutex_lock(&ui__lock);
+ y = SLtt_Screen_Rows / 2 - 2;
+ SLsmg_set_color(0);
+ SLsmg_fill_region(y, 0, 3, SLtt_Screen_Cols, ' ');
+ SLsmg_refresh();
+ pthread_mutex_unlock(&ui__lock);
+}
+
static struct ui_progress_ops tui_progress__ops =
{
- .update = tui_progress__update,
+ .update = tui_progress__update,
+ .finish = tui_progress__finish,
};
void tui_progress__init(void)
diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c
index b77e1d7..60d1f29 100644
--- a/tools/perf/ui/tui/setup.c
+++ b/tools/perf/ui/tui/setup.c
@@ -129,7 +129,7 @@ int ui__init(void)
err = SLsmg_init_smg();
if (err < 0)
goto out;
- err = SLang_init_tty(0, 0, 0);
+ err = SLang_init_tty(-1, 0, 0);
if (err < 0)
goto out;
diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c
index bf890f7..d96ad7c 100644
--- a/tools/perf/ui/tui/util.c
+++ b/tools/perf/ui/tui/util.c
@@ -21,7 +21,7 @@ static void ui_browser__argv_write(struct ui_browser *browser,
ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
HE_COLORSET_NORMAL);
- slsmg_write_nstring(*arg, browser->width);
+ ui_browser__write_nstring(browser, *arg, browser->width);
}
static int popup_menu__run(struct ui_browser *menu)
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 797490a..349bc96 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -14,6 +14,7 @@ libperf-y += find_next_bit.o
libperf-y += help.o
libperf-y += kallsyms.o
libperf-y += levenshtein.o
+libperf-y += llvm-utils.o
libperf-y += parse-options.o
libperf-y += parse-events.o
libperf-y += path.o
@@ -67,15 +68,25 @@ libperf-y += target.o
libperf-y += rblist.o
libperf-y += intlist.o
libperf-y += vdso.o
+libperf-y += counts.o
libperf-y += stat.o
+libperf-y += stat-shadow.o
libperf-y += record.o
libperf-y += srcline.o
libperf-y += data.o
libperf-$(CONFIG_X86) += tsc.o
+libperf-$(CONFIG_AUXTRACE) += tsc.o
libperf-y += cloexec.o
libperf-y += thread-stack.o
+libperf-$(CONFIG_AUXTRACE) += auxtrace.o
+libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
+libperf-$(CONFIG_AUXTRACE) += intel-pt.o
+libperf-$(CONFIG_AUXTRACE) += intel-bts.o
+libperf-y += parse-branch-options.o
+libperf-y += parse-regs-options.o
libperf-$(CONFIG_LIBELF) += symbol-elf.o
+libperf-$(CONFIG_LIBELF) += probe-file.o
libperf-$(CONFIG_LIBELF) += probe-event.o
ifndef CONFIG_LIBELF
@@ -101,23 +112,23 @@ CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="B
$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
$(call rule_mkdir)
- @$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
$(call rule_mkdir)
- @$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
+ $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
$(call rule_mkdir)
- @$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
$(OUTPUT)util/pmu-bison.c: util/pmu.y
$(call rule_mkdir)
- @$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_
+ $(Q)$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_
CFLAGS_parse-events-flex.o += -w
CFLAGS_pmu-flex.o += -w
-CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
+CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
@@ -136,10 +147,10 @@ $(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
-$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c FORCE
+$(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
-$(OUTPUT)util/hweight.o: ../../lib/hweight.c FORCE
+$(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 7f5bdfc..d1eece7 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -473,17 +473,73 @@ int symbol__alloc_hist(struct symbol *sym)
return 0;
}
+/* The cycles histogram is lazily allocated. */
+static int symbol__alloc_hist_cycles(struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ const size_t size = symbol__size(sym);
+
+ notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist));
+ if (notes->src->cycles_hist == NULL)
+ return -1;
+ return 0;
+}
+
void symbol__annotate_zero_histograms(struct symbol *sym)
{
struct annotation *notes = symbol__annotation(sym);
pthread_mutex_lock(&notes->lock);
- if (notes->src != NULL)
+ if (notes->src != NULL) {
memset(notes->src->histograms, 0,
notes->src->nr_histograms * notes->src->sizeof_sym_hist);
+ if (notes->src->cycles_hist)
+ memset(notes->src->cycles_hist, 0,
+ symbol__size(sym) * sizeof(struct cyc_hist));
+ }
pthread_mutex_unlock(&notes->lock);
}
+static int __symbol__account_cycles(struct annotation *notes,
+ u64 start,
+ unsigned offset, unsigned cycles,
+ unsigned have_start)
+{
+ struct cyc_hist *ch;
+
+ ch = notes->src->cycles_hist;
+ /*
+ * For now we can only account one basic block per
+ * final jump. But multiple could be overlapping.
+ * Always account the longest one. So when
+ * a shorter one has been already seen throw it away.
+ *
+ * We separately always account the full cycles.
+ */
+ ch[offset].num_aggr++;
+ ch[offset].cycles_aggr += cycles;
+
+ if (!have_start && ch[offset].have_start)
+ return 0;
+ if (ch[offset].num) {
+ if (have_start && (!ch[offset].have_start ||
+ ch[offset].start > start)) {
+ ch[offset].have_start = 0;
+ ch[offset].cycles = 0;
+ ch[offset].num = 0;
+ if (ch[offset].reset < 0xffff)
+ ch[offset].reset++;
+ } else if (have_start &&
+ ch[offset].start < start)
+ return 0;
+ }
+ ch[offset].have_start = have_start;
+ ch[offset].start = start;
+ ch[offset].cycles += cycles;
+ ch[offset].num++;
+ return 0;
+}
+
static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
struct annotation *notes, int evidx, u64 addr)
{
@@ -506,6 +562,21 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
return 0;
}
+static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles)
+{
+ struct annotation *notes = symbol__annotation(sym);
+
+ if (notes->src == NULL) {
+ if (symbol__alloc_hist(sym) < 0)
+ return NULL;
+ }
+ if (!notes->src->cycles_hist && cycles) {
+ if (symbol__alloc_hist_cycles(sym) < 0)
+ return NULL;
+ }
+ return notes;
+}
+
static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
int evidx, u64 addr)
{
@@ -513,14 +584,71 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
if (sym == NULL)
return 0;
+ notes = symbol__get_annotation(sym, false);
+ if (notes == NULL)
+ return -ENOMEM;
+ return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
+}
+
+static int symbol__account_cycles(u64 addr, u64 start,
+ struct symbol *sym, unsigned cycles)
+{
+ struct annotation *notes;
+ unsigned offset;
+
+ if (sym == NULL)
+ return 0;
+ notes = symbol__get_annotation(sym, true);
+ if (notes == NULL)
+ return -ENOMEM;
+ if (addr < sym->start || addr >= sym->end)
+ return -ERANGE;
- notes = symbol__annotation(sym);
- if (notes->src == NULL) {
- if (symbol__alloc_hist(sym) < 0)
- return -ENOMEM;
+ if (start) {
+ if (start < sym->start || start >= sym->end)
+ return -ERANGE;
+ if (start >= addr)
+ start = 0;
}
+ offset = addr - sym->start;
+ return __symbol__account_cycles(notes,
+ start ? start - sym->start : 0,
+ offset, cycles,
+ !!start);
+}
- return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
+int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
+ struct addr_map_symbol *start,
+ unsigned cycles)
+{
+ u64 saddr = 0;
+ int err;
+
+ if (!cycles)
+ return 0;
+
+ /*
+ * Only set start when IPC can be computed. We can only
+ * compute it when the basic block is completely in a single
+ * function.
+ * Special case the case when the jump is elsewhere, but
+ * it starts on the function start.
+ */
+ if (start &&
+ (start->sym == ams->sym ||
+ (ams->sym &&
+ start->addr == ams->sym->start + ams->map->start)))
+ saddr = start->al_addr;
+ if (saddr == 0)
+ pr_debug2("BB with bad start: addr %"PRIx64" start %"PRIx64" sym %"PRIx64" saddr %"PRIx64"\n",
+ ams->addr,
+ start ? start->addr : 0,
+ ams->sym ? ams->sym->start + ams->map->start : 0,
+ saddr);
+ err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles);
+ if (err)
+ pr_debug2("account_cycles failed %d\n", err);
+ return err;
}
int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx)
@@ -647,14 +775,15 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa
}
double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
- s64 end, const char **path)
+ s64 end, const char **path, u64 *nr_samples)
{
struct source_line *src_line = notes->src->lines;
double percent = 0.0;
+ *nr_samples = 0;
if (src_line) {
size_t sizeof_src_line = sizeof(*src_line) +
- sizeof(src_line->p) * (src_line->nr_pcnt - 1);
+ sizeof(src_line->samples) * (src_line->nr_pcnt - 1);
while (offset < end) {
src_line = (void *)notes->src->lines +
@@ -663,7 +792,8 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
if (*path == NULL)
*path = src_line->path;
- percent += src_line->p[evidx].percent;
+ percent += src_line->samples[evidx].percent;
+ *nr_samples += src_line->samples[evidx].nr;
offset++;
}
} else {
@@ -673,8 +803,10 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
while (offset < end)
hits += h->addr[offset++];
- if (h->sum)
+ if (h->sum) {
+ *nr_samples = hits;
percent = 100.0 * hits / h->sum;
+ }
}
return percent;
@@ -689,8 +821,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
if (dl->offset != -1) {
const char *path = NULL;
+ u64 nr_samples;
double percent, max_percent = 0.0;
double *ppercents = &percent;
+ u64 *psamples = &nr_samples;
int i, nr_percent = 1;
const char *color;
struct annotation *notes = symbol__annotation(sym);
@@ -703,8 +837,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
if (perf_evsel__is_group_event(evsel)) {
nr_percent = evsel->nr_members;
ppercents = calloc(nr_percent, sizeof(double));
- if (ppercents == NULL)
+ psamples = calloc(nr_percent, sizeof(u64));
+ if (ppercents == NULL || psamples == NULL) {
return -1;
+ }
}
for (i = 0; i < nr_percent; i++) {
@@ -712,9 +848,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
notes->src->lines ? i : evsel->idx + i,
offset,
next ? next->offset : (s64) len,
- &path);
+ &path, &nr_samples);
ppercents[i] = percent;
+ psamples[i] = nr_samples;
if (percent > max_percent)
max_percent = percent;
}
@@ -752,8 +889,14 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
for (i = 0; i < nr_percent; i++) {
percent = ppercents[i];
+ nr_samples = psamples[i];
color = get_percent_color(percent);
- color_fprintf(stdout, color, " %7.2f", percent);
+
+ if (symbol_conf.show_total_period)
+ color_fprintf(stdout, color, " %7" PRIu64,
+ nr_samples);
+ else
+ color_fprintf(stdout, color, " %7.2f", percent);
}
printf(" : ");
@@ -763,6 +906,9 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
if (ppercents != &percent)
free(ppercents);
+ if (psamples != &nr_samples)
+ free(psamples);
+
} else if (max_lines && printed >= max_lines)
return 1;
else {
@@ -980,6 +1126,7 @@ fallback:
dso->annotate_warned = 1;
pr_err("Can't annotate %s:\n\n"
"No vmlinux file%s\nwas found in the path.\n\n"
+ "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n"
"Please use:\n\n"
" perf buildid-cache -vu vmlinux\n\n"
"or:\n\n"
@@ -1096,7 +1243,7 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin
ret = strcmp(iter->path, src_line->path);
if (ret == 0) {
for (i = 0; i < src_line->nr_pcnt; i++)
- iter->p[i].percent_sum += src_line->p[i].percent;
+ iter->samples[i].percent_sum += src_line->samples[i].percent;
return;
}
@@ -1107,7 +1254,7 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin
}
for (i = 0; i < src_line->nr_pcnt; i++)
- src_line->p[i].percent_sum = src_line->p[i].percent;
+ src_line->samples[i].percent_sum = src_line->samples[i].percent;
rb_link_node(&src_line->node, parent, p);
rb_insert_color(&src_line->node, root);
@@ -1118,9 +1265,9 @@ static int cmp_source_line(struct source_line *a, struct source_line *b)
int i;
for (i = 0; i < a->nr_pcnt; i++) {
- if (a->p[i].percent_sum == b->p[i].percent_sum)
+ if (a->samples[i].percent_sum == b->samples[i].percent_sum)
continue;
- return a->p[i].percent_sum > b->p[i].percent_sum;
+ return a->samples[i].percent_sum > b->samples[i].percent_sum;
}
return 0;
@@ -1172,7 +1319,7 @@ static void symbol__free_source_line(struct symbol *sym, int len)
int i;
sizeof_src_line = sizeof(*src_line) +
- (sizeof(src_line->p) * (src_line->nr_pcnt - 1));
+ (sizeof(src_line->samples) * (src_line->nr_pcnt - 1));
for (i = 0; i < len; i++) {
free_srcline(src_line->path);
@@ -1204,7 +1351,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map,
h_sum += h->sum;
}
nr_pcnt = evsel->nr_members;
- sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->p);
+ sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->samples);
}
if (!h_sum)
@@ -1224,10 +1371,10 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map,
for (k = 0; k < nr_pcnt; k++) {
h = annotation__histogram(notes, evidx + k);
- src_line->p[k].percent = 100.0 * h->addr[i] / h->sum;
+ src_line->samples[k].percent = 100.0 * h->addr[i] / h->sum;
- if (src_line->p[k].percent > percent_max)
- percent_max = src_line->p[k].percent;
+ if (src_line->samples[k].percent > percent_max)
+ percent_max = src_line->samples[k].percent;
}
if (percent_max <= 0.5)
@@ -1267,7 +1414,7 @@ static void print_summary(struct rb_root *root, const char *filename)
src_line = rb_entry(node, struct source_line, node);
for (i = 0; i < src_line->nr_pcnt; i++) {
- percent = src_line->p[i].percent_sum;
+ percent = src_line->samples[i].percent_sum;
color = get_percent_color(percent);
color_fprintf(stdout, color, " %7.2f", percent);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index cadbdc9..e999609 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -59,6 +59,8 @@ struct disasm_line {
char *name;
struct ins *ins;
int line_nr;
+ float ipc;
+ u64 cycles;
struct ins_operands ops;
};
@@ -72,23 +74,35 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa
int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
size_t disasm__fprintf(struct list_head *head, FILE *fp);
double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
- s64 end, const char **path);
+ s64 end, const char **path, u64 *nr_samples);
struct sym_hist {
u64 sum;
u64 addr[0];
};
-struct source_line_percent {
+struct cyc_hist {
+ u64 start;
+ u64 cycles;
+ u64 cycles_aggr;
+ u32 num;
+ u32 num_aggr;
+ u8 have_start;
+ /* 1 byte padding */
+ u16 reset;
+};
+
+struct source_line_samples {
double percent;
double percent_sum;
+ double nr;
};
struct source_line {
struct rb_node node;
char *path;
int nr_pcnt;
- struct source_line_percent p[1];
+ struct source_line_samples samples[1];
};
/** struct annotated_source - symbols with hits have this attached as in sannotation
@@ -96,6 +110,7 @@ struct source_line {
* @histogram: Array of addr hit histograms per event being monitored
* @lines: If 'print_lines' is specified, per source code line percentages
* @source: source parsed from a disassembler like objdump -dS
+ * @cyc_hist: Average cycles per basic block
*
* lines is allocated, percentages calculated and all sorted by percentage
* when the annotation is about to be presented, so the percentages are for
@@ -108,6 +123,7 @@ struct annotated_source {
struct source_line *lines;
int nr_histograms;
int sizeof_sym_hist;
+ struct cyc_hist *cycles_hist;
struct sym_hist histograms[0];
};
@@ -129,6 +145,10 @@ static inline struct annotation *symbol__annotation(struct symbol *sym)
int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx);
+int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
+ struct addr_map_symbol *start,
+ unsigned cycles);
+
int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
int symbol__alloc_hist(struct symbol *sym);
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
new file mode 100644
index 0000000..a980e7c
--- /dev/null
+++ b/tools/perf/util/auxtrace.c
@@ -0,0 +1,1370 @@
+/*
+ * auxtrace.c: AUX area trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <stdbool.h>
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <linux/string.h>
+
+#include <sys/param.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <linux/list.h>
+
+#include "../perf.h"
+#include "util.h"
+#include "evlist.h"
+#include "cpumap.h"
+#include "thread_map.h"
+#include "asm/bug.h"
+#include "auxtrace.h"
+
+#include <linux/hash.h>
+
+#include "event.h"
+#include "session.h"
+#include "debug.h"
+#include "parse-options.h"
+
+#include "intel-pt.h"
+#include "intel-bts.h"
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+ struct auxtrace_mmap_params *mp,
+ void *userpg, int fd)
+{
+ struct perf_event_mmap_page *pc = userpg;
+
+ WARN_ONCE(mm->base, "Uninitialized auxtrace_mmap\n");
+
+ mm->userpg = userpg;
+ mm->mask = mp->mask;
+ mm->len = mp->len;
+ mm->prev = 0;
+ mm->idx = mp->idx;
+ mm->tid = mp->tid;
+ mm->cpu = mp->cpu;
+
+ if (!mp->len) {
+ mm->base = NULL;
+ return 0;
+ }
+
+#if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+ pr_err("Cannot use AUX area tracing mmaps\n");
+ return -1;
+#endif
+
+ pc->aux_offset = mp->offset;
+ pc->aux_size = mp->len;
+
+ mm->base = mmap(NULL, mp->len, mp->prot, MAP_SHARED, fd, mp->offset);
+ if (mm->base == MAP_FAILED) {
+ pr_debug2("failed to mmap AUX area\n");
+ mm->base = NULL;
+ return -1;
+ }
+
+ return 0;
+}
+
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm)
+{
+ if (mm->base) {
+ munmap(mm->base, mm->len);
+ mm->base = NULL;
+ }
+}
+
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+ off_t auxtrace_offset,
+ unsigned int auxtrace_pages,
+ bool auxtrace_overwrite)
+{
+ if (auxtrace_pages) {
+ mp->offset = auxtrace_offset;
+ mp->len = auxtrace_pages * (size_t)page_size;
+ mp->mask = is_power_of_2(mp->len) ? mp->len - 1 : 0;
+ mp->prot = PROT_READ | (auxtrace_overwrite ? 0 : PROT_WRITE);
+ pr_debug2("AUX area mmap length %zu\n", mp->len);
+ } else {
+ mp->len = 0;
+ }
+}
+
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+ struct perf_evlist *evlist, int idx,
+ bool per_cpu)
+{
+ mp->idx = idx;
+
+ if (per_cpu) {
+ mp->cpu = evlist->cpus->map[idx];
+ if (evlist->threads)
+ mp->tid = thread_map__pid(evlist->threads, 0);
+ else
+ mp->tid = -1;
+ } else {
+ mp->cpu = -1;
+ mp->tid = thread_map__pid(evlist->threads, idx);
+ }
+}
+
+#define AUXTRACE_INIT_NR_QUEUES 32
+
+static struct auxtrace_queue *auxtrace_alloc_queue_array(unsigned int nr_queues)
+{
+ struct auxtrace_queue *queue_array;
+ unsigned int max_nr_queues, i;
+
+ max_nr_queues = UINT_MAX / sizeof(struct auxtrace_queue);
+ if (nr_queues > max_nr_queues)
+ return NULL;
+
+ queue_array = calloc(nr_queues, sizeof(struct auxtrace_queue));
+ if (!queue_array)
+ return NULL;
+
+ for (i = 0; i < nr_queues; i++) {
+ INIT_LIST_HEAD(&queue_array[i].head);
+ queue_array[i].priv = NULL;
+ }
+
+ return queue_array;
+}
+
+int auxtrace_queues__init(struct auxtrace_queues *queues)
+{
+ queues->nr_queues = AUXTRACE_INIT_NR_QUEUES;
+ queues->queue_array = auxtrace_alloc_queue_array(queues->nr_queues);
+ if (!queues->queue_array)
+ return -ENOMEM;
+ return 0;
+}
+
+static int auxtrace_queues__grow(struct auxtrace_queues *queues,
+ unsigned int new_nr_queues)
+{
+ unsigned int nr_queues = queues->nr_queues;
+ struct auxtrace_queue *queue_array;
+ unsigned int i;
+
+ if (!nr_queues)
+ nr_queues = AUXTRACE_INIT_NR_QUEUES;
+
+ while (nr_queues && nr_queues < new_nr_queues)
+ nr_queues <<= 1;
+
+ if (nr_queues < queues->nr_queues || nr_queues < new_nr_queues)
+ return -EINVAL;
+
+ queue_array = auxtrace_alloc_queue_array(nr_queues);
+ if (!queue_array)
+ return -ENOMEM;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ list_splice_tail(&queues->queue_array[i].head,
+ &queue_array[i].head);
+ queue_array[i].priv = queues->queue_array[i].priv;
+ }
+
+ queues->nr_queues = nr_queues;
+ queues->queue_array = queue_array;
+
+ return 0;
+}
+
+static void *auxtrace_copy_data(u64 size, struct perf_session *session)
+{
+ int fd = perf_data_file__fd(session->file);
+ void *p;
+ ssize_t ret;
+
+ if (size > SSIZE_MAX)
+ return NULL;
+
+ p = malloc(size);
+ if (!p)
+ return NULL;
+
+ ret = readn(fd, p, size);
+ if (ret != (ssize_t)size) {
+ free(p);
+ return NULL;
+ }
+
+ return p;
+}
+
+static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
+ unsigned int idx,
+ struct auxtrace_buffer *buffer)
+{
+ struct auxtrace_queue *queue;
+ int err;
+
+ if (idx >= queues->nr_queues) {
+ err = auxtrace_queues__grow(queues, idx + 1);
+ if (err)
+ return err;
+ }
+
+ queue = &queues->queue_array[idx];
+
+ if (!queue->set) {
+ queue->set = true;
+ queue->tid = buffer->tid;
+ queue->cpu = buffer->cpu;
+ } else if (buffer->cpu != queue->cpu || buffer->tid != queue->tid) {
+ pr_err("auxtrace queue conflict: cpu %d, tid %d vs cpu %d, tid %d\n",
+ queue->cpu, queue->tid, buffer->cpu, buffer->tid);
+ return -EINVAL;
+ }
+
+ buffer->buffer_nr = queues->next_buffer_nr++;
+
+ list_add_tail(&buffer->list, &queue->head);
+
+ queues->new_data = true;
+ queues->populated = true;
+
+ return 0;
+}
+
+/* Limit buffers to 32MiB on 32-bit */
+#define BUFFER_LIMIT_FOR_32_BIT (32 * 1024 * 1024)
+
+static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
+ unsigned int idx,
+ struct auxtrace_buffer *buffer)
+{
+ u64 sz = buffer->size;
+ bool consecutive = false;
+ struct auxtrace_buffer *b;
+ int err;
+
+ while (sz > BUFFER_LIMIT_FOR_32_BIT) {
+ b = memdup(buffer, sizeof(struct auxtrace_buffer));
+ if (!b)
+ return -ENOMEM;
+ b->size = BUFFER_LIMIT_FOR_32_BIT;
+ b->consecutive = consecutive;
+ err = auxtrace_queues__add_buffer(queues, idx, b);
+ if (err) {
+ auxtrace_buffer__free(b);
+ return err;
+ }
+ buffer->data_offset += BUFFER_LIMIT_FOR_32_BIT;
+ sz -= BUFFER_LIMIT_FOR_32_BIT;
+ consecutive = true;
+ }
+
+ buffer->size = sz;
+ buffer->consecutive = consecutive;
+
+ return 0;
+}
+
+static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ unsigned int idx,
+ struct auxtrace_buffer *buffer)
+{
+ if (session->one_mmap) {
+ buffer->data = buffer->data_offset - session->one_mmap_offset +
+ session->one_mmap_addr;
+ } else if (perf_data_file__is_pipe(session->file)) {
+ buffer->data = auxtrace_copy_data(buffer->size, session);
+ if (!buffer->data)
+ return -ENOMEM;
+ buffer->data_needs_freeing = true;
+ } else if (BITS_PER_LONG == 32 &&
+ buffer->size > BUFFER_LIMIT_FOR_32_BIT) {
+ int err;
+
+ err = auxtrace_queues__split_buffer(queues, idx, buffer);
+ if (err)
+ return err;
+ }
+
+ return auxtrace_queues__add_buffer(queues, idx, buffer);
+}
+
+int auxtrace_queues__add_event(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ union perf_event *event, off_t data_offset,
+ struct auxtrace_buffer **buffer_ptr)
+{
+ struct auxtrace_buffer *buffer;
+ unsigned int idx;
+ int err;
+
+ buffer = zalloc(sizeof(struct auxtrace_buffer));
+ if (!buffer)
+ return -ENOMEM;
+
+ buffer->pid = -1;
+ buffer->tid = event->auxtrace.tid;
+ buffer->cpu = event->auxtrace.cpu;
+ buffer->data_offset = data_offset;
+ buffer->offset = event->auxtrace.offset;
+ buffer->reference = event->auxtrace.reference;
+ buffer->size = event->auxtrace.size;
+ idx = event->auxtrace.idx;
+
+ err = auxtrace_queues__add_event_buffer(queues, session, idx, buffer);
+ if (err)
+ goto out_err;
+
+ if (buffer_ptr)
+ *buffer_ptr = buffer;
+
+ return 0;
+
+out_err:
+ auxtrace_buffer__free(buffer);
+ return err;
+}
+
+static int auxtrace_queues__add_indexed_event(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ off_t file_offset, size_t sz)
+{
+ union perf_event *event;
+ int err;
+ char buf[PERF_SAMPLE_MAX_SIZE];
+
+ err = perf_session__peek_event(session, file_offset, buf,
+ PERF_SAMPLE_MAX_SIZE, &event, NULL);
+ if (err)
+ return err;
+
+ if (event->header.type == PERF_RECORD_AUXTRACE) {
+ if (event->header.size < sizeof(struct auxtrace_event) ||
+ event->header.size != sz) {
+ err = -EINVAL;
+ goto out;
+ }
+ file_offset += event->header.size;
+ err = auxtrace_queues__add_event(queues, session, event,
+ file_offset, NULL);
+ }
+out:
+ return err;
+}
+
+void auxtrace_queues__free(struct auxtrace_queues *queues)
+{
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ while (!list_empty(&queues->queue_array[i].head)) {
+ struct auxtrace_buffer *buffer;
+
+ buffer = list_entry(queues->queue_array[i].head.next,
+ struct auxtrace_buffer, list);
+ list_del(&buffer->list);
+ auxtrace_buffer__free(buffer);
+ }
+ }
+
+ zfree(&queues->queue_array);
+ queues->nr_queues = 0;
+}
+
+static void auxtrace_heapify(struct auxtrace_heap_item *heap_array,
+ unsigned int pos, unsigned int queue_nr,
+ u64 ordinal)
+{
+ unsigned int parent;
+
+ while (pos) {
+ parent = (pos - 1) >> 1;
+ if (heap_array[parent].ordinal <= ordinal)
+ break;
+ heap_array[pos] = heap_array[parent];
+ pos = parent;
+ }
+ heap_array[pos].queue_nr = queue_nr;
+ heap_array[pos].ordinal = ordinal;
+}
+
+int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
+ u64 ordinal)
+{
+ struct auxtrace_heap_item *heap_array;
+
+ if (queue_nr >= heap->heap_sz) {
+ unsigned int heap_sz = AUXTRACE_INIT_NR_QUEUES;
+
+ while (heap_sz <= queue_nr)
+ heap_sz <<= 1;
+ heap_array = realloc(heap->heap_array,
+ heap_sz * sizeof(struct auxtrace_heap_item));
+ if (!heap_array)
+ return -ENOMEM;
+ heap->heap_array = heap_array;
+ heap->heap_sz = heap_sz;
+ }
+
+ auxtrace_heapify(heap->heap_array, heap->heap_cnt++, queue_nr, ordinal);
+
+ return 0;
+}
+
+void auxtrace_heap__free(struct auxtrace_heap *heap)
+{
+ zfree(&heap->heap_array);
+ heap->heap_cnt = 0;
+ heap->heap_sz = 0;
+}
+
+void auxtrace_heap__pop(struct auxtrace_heap *heap)
+{
+ unsigned int pos, last, heap_cnt = heap->heap_cnt;
+ struct auxtrace_heap_item *heap_array;
+
+ if (!heap_cnt)
+ return;
+
+ heap->heap_cnt -= 1;
+
+ heap_array = heap->heap_array;
+
+ pos = 0;
+ while (1) {
+ unsigned int left, right;
+
+ left = (pos << 1) + 1;
+ if (left >= heap_cnt)
+ break;
+ right = left + 1;
+ if (right >= heap_cnt) {
+ heap_array[pos] = heap_array[left];
+ return;
+ }
+ if (heap_array[left].ordinal < heap_array[right].ordinal) {
+ heap_array[pos] = heap_array[left];
+ pos = left;
+ } else {
+ heap_array[pos] = heap_array[right];
+ pos = right;
+ }
+ }
+
+ last = heap_cnt - 1;
+ auxtrace_heapify(heap_array, pos, heap_array[last].queue_nr,
+ heap_array[last].ordinal);
+}
+
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr)
+{
+ if (itr)
+ return itr->info_priv_size(itr);
+ return 0;
+}
+
+static int auxtrace_not_supported(void)
+{
+ pr_err("AUX area tracing is not supported on this architecture\n");
+ return -EINVAL;
+}
+
+int auxtrace_record__info_fill(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct auxtrace_info_event *auxtrace_info,
+ size_t priv_size)
+{
+ if (itr)
+ return itr->info_fill(itr, session, auxtrace_info, priv_size);
+ return auxtrace_not_supported();
+}
+
+void auxtrace_record__free(struct auxtrace_record *itr)
+{
+ if (itr)
+ itr->free(itr);
+}
+
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr)
+{
+ if (itr && itr->snapshot_start)
+ return itr->snapshot_start(itr);
+ return 0;
+}
+
+int auxtrace_record__snapshot_finish(struct auxtrace_record *itr)
+{
+ if (itr && itr->snapshot_finish)
+ return itr->snapshot_finish(itr);
+ return 0;
+}
+
+int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
+ struct auxtrace_mmap *mm,
+ unsigned char *data, u64 *head, u64 *old)
+{
+ if (itr && itr->find_snapshot)
+ return itr->find_snapshot(itr, idx, mm, data, head, old);
+ return 0;
+}
+
+int auxtrace_record__options(struct auxtrace_record *itr,
+ struct perf_evlist *evlist,
+ struct record_opts *opts)
+{
+ if (itr)
+ return itr->recording_options(itr, evlist, opts);
+ return 0;
+}
+
+u64 auxtrace_record__reference(struct auxtrace_record *itr)
+{
+ if (itr)
+ return itr->reference(itr);
+ return 0;
+}
+
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
+ struct record_opts *opts, const char *str)
+{
+ if (!str)
+ return 0;
+
+ if (itr)
+ return itr->parse_snapshot_options(itr, opts, str);
+
+ pr_err("No AUX area tracing to snapshot\n");
+ return -EINVAL;
+}
+
+struct auxtrace_record *__weak
+auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, int *err)
+{
+ *err = 0;
+ return NULL;
+}
+
+static int auxtrace_index__alloc(struct list_head *head)
+{
+ struct auxtrace_index *auxtrace_index;
+
+ auxtrace_index = malloc(sizeof(struct auxtrace_index));
+ if (!auxtrace_index)
+ return -ENOMEM;
+
+ auxtrace_index->nr = 0;
+ INIT_LIST_HEAD(&auxtrace_index->list);
+
+ list_add_tail(&auxtrace_index->list, head);
+
+ return 0;
+}
+
+void auxtrace_index__free(struct list_head *head)
+{
+ struct auxtrace_index *auxtrace_index, *n;
+
+ list_for_each_entry_safe(auxtrace_index, n, head, list) {
+ list_del(&auxtrace_index->list);
+ free(auxtrace_index);
+ }
+}
+
+static struct auxtrace_index *auxtrace_index__last(struct list_head *head)
+{
+ struct auxtrace_index *auxtrace_index;
+ int err;
+
+ if (list_empty(head)) {
+ err = auxtrace_index__alloc(head);
+ if (err)
+ return NULL;
+ }
+
+ auxtrace_index = list_entry(head->prev, struct auxtrace_index, list);
+
+ if (auxtrace_index->nr >= PERF_AUXTRACE_INDEX_ENTRY_COUNT) {
+ err = auxtrace_index__alloc(head);
+ if (err)
+ return NULL;
+ auxtrace_index = list_entry(head->prev, struct auxtrace_index,
+ list);
+ }
+
+ return auxtrace_index;
+}
+
+int auxtrace_index__auxtrace_event(struct list_head *head,
+ union perf_event *event, off_t file_offset)
+{
+ struct auxtrace_index *auxtrace_index;
+ size_t nr;
+
+ auxtrace_index = auxtrace_index__last(head);
+ if (!auxtrace_index)
+ return -ENOMEM;
+
+ nr = auxtrace_index->nr;
+ auxtrace_index->entries[nr].file_offset = file_offset;
+ auxtrace_index->entries[nr].sz = event->header.size;
+ auxtrace_index->nr += 1;
+
+ return 0;
+}
+
+static int auxtrace_index__do_write(int fd,
+ struct auxtrace_index *auxtrace_index)
+{
+ struct auxtrace_index_entry ent;
+ size_t i;
+
+ for (i = 0; i < auxtrace_index->nr; i++) {
+ ent.file_offset = auxtrace_index->entries[i].file_offset;
+ ent.sz = auxtrace_index->entries[i].sz;
+ if (writen(fd, &ent, sizeof(ent)) != sizeof(ent))
+ return -errno;
+ }
+ return 0;
+}
+
+int auxtrace_index__write(int fd, struct list_head *head)
+{
+ struct auxtrace_index *auxtrace_index;
+ u64 total = 0;
+ int err;
+
+ list_for_each_entry(auxtrace_index, head, list)
+ total += auxtrace_index->nr;
+
+ if (writen(fd, &total, sizeof(total)) != sizeof(total))
+ return -errno;
+
+ list_for_each_entry(auxtrace_index, head, list) {
+ err = auxtrace_index__do_write(fd, auxtrace_index);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int auxtrace_index__process_entry(int fd, struct list_head *head,
+ bool needs_swap)
+{
+ struct auxtrace_index *auxtrace_index;
+ struct auxtrace_index_entry ent;
+ size_t nr;
+
+ if (readn(fd, &ent, sizeof(ent)) != sizeof(ent))
+ return -1;
+
+ auxtrace_index = auxtrace_index__last(head);
+ if (!auxtrace_index)
+ return -1;
+
+ nr = auxtrace_index->nr;
+ if (needs_swap) {
+ auxtrace_index->entries[nr].file_offset =
+ bswap_64(ent.file_offset);
+ auxtrace_index->entries[nr].sz = bswap_64(ent.sz);
+ } else {
+ auxtrace_index->entries[nr].file_offset = ent.file_offset;
+ auxtrace_index->entries[nr].sz = ent.sz;
+ }
+
+ auxtrace_index->nr = nr + 1;
+
+ return 0;
+}
+
+int auxtrace_index__process(int fd, u64 size, struct perf_session *session,
+ bool needs_swap)
+{
+ struct list_head *head = &session->auxtrace_index;
+ u64 nr;
+
+ if (readn(fd, &nr, sizeof(u64)) != sizeof(u64))
+ return -1;
+
+ if (needs_swap)
+ nr = bswap_64(nr);
+
+ if (sizeof(u64) + nr * sizeof(struct auxtrace_index_entry) > size)
+ return -1;
+
+ while (nr--) {
+ int err;
+
+ err = auxtrace_index__process_entry(fd, head, needs_swap);
+ if (err)
+ return -1;
+ }
+
+ return 0;
+}
+
+static int auxtrace_queues__process_index_entry(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ struct auxtrace_index_entry *ent)
+{
+ return auxtrace_queues__add_indexed_event(queues, session,
+ ent->file_offset, ent->sz);
+}
+
+int auxtrace_queues__process_index(struct auxtrace_queues *queues,
+ struct perf_session *session)
+{
+ struct auxtrace_index *auxtrace_index;
+ struct auxtrace_index_entry *ent;
+ size_t i;
+ int err;
+
+ list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
+ for (i = 0; i < auxtrace_index->nr; i++) {
+ ent = &auxtrace_index->entries[i];
+ err = auxtrace_queues__process_index_entry(queues,
+ session,
+ ent);
+ if (err)
+ return err;
+ }
+ }
+ return 0;
+}
+
+struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
+ struct auxtrace_buffer *buffer)
+{
+ if (buffer) {
+ if (list_is_last(&buffer->list, &queue->head))
+ return NULL;
+ return list_entry(buffer->list.next, struct auxtrace_buffer,
+ list);
+ } else {
+ if (list_empty(&queue->head))
+ return NULL;
+ return list_entry(queue->head.next, struct auxtrace_buffer,
+ list);
+ }
+}
+
+void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd)
+{
+ size_t adj = buffer->data_offset & (page_size - 1);
+ size_t size = buffer->size + adj;
+ off_t file_offset = buffer->data_offset - adj;
+ void *addr;
+
+ if (buffer->data)
+ return buffer->data;
+
+ addr = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, file_offset);
+ if (addr == MAP_FAILED)
+ return NULL;
+
+ buffer->mmap_addr = addr;
+ buffer->mmap_size = size;
+
+ buffer->data = addr + adj;
+
+ return buffer->data;
+}
+
+void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer)
+{
+ if (!buffer->data || !buffer->mmap_addr)
+ return;
+ munmap(buffer->mmap_addr, buffer->mmap_size);
+ buffer->mmap_addr = NULL;
+ buffer->mmap_size = 0;
+ buffer->data = NULL;
+ buffer->use_data = NULL;
+}
+
+void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer)
+{
+ auxtrace_buffer__put_data(buffer);
+ if (buffer->data_needs_freeing) {
+ buffer->data_needs_freeing = false;
+ zfree(&buffer->data);
+ buffer->use_data = NULL;
+ buffer->size = 0;
+ }
+}
+
+void auxtrace_buffer__free(struct auxtrace_buffer *buffer)
+{
+ auxtrace_buffer__drop_data(buffer);
+ free(buffer);
+}
+
+void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
+ int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+ const char *msg)
+{
+ size_t size;
+
+ memset(auxtrace_error, 0, sizeof(struct auxtrace_error_event));
+
+ auxtrace_error->header.type = PERF_RECORD_AUXTRACE_ERROR;
+ auxtrace_error->type = type;
+ auxtrace_error->code = code;
+ auxtrace_error->cpu = cpu;
+ auxtrace_error->pid = pid;
+ auxtrace_error->tid = tid;
+ auxtrace_error->ip = ip;
+ strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG);
+
+ size = (void *)auxtrace_error->msg - (void *)auxtrace_error +
+ strlen(auxtrace_error->msg) + 1;
+ auxtrace_error->header.size = PERF_ALIGN(size, sizeof(u64));
+}
+
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
+ struct perf_tool *tool,
+ struct perf_session *session,
+ perf_event__handler_t process)
+{
+ union perf_event *ev;
+ size_t priv_size;
+ int err;
+
+ pr_debug2("Synthesizing auxtrace information\n");
+ priv_size = auxtrace_record__info_priv_size(itr);
+ ev = zalloc(sizeof(struct auxtrace_info_event) + priv_size);
+ if (!ev)
+ return -ENOMEM;
+
+ ev->auxtrace_info.header.type = PERF_RECORD_AUXTRACE_INFO;
+ ev->auxtrace_info.header.size = sizeof(struct auxtrace_info_event) +
+ priv_size;
+ err = auxtrace_record__info_fill(itr, session, &ev->auxtrace_info,
+ priv_size);
+ if (err)
+ goto out_free;
+
+ err = process(tool, ev, NULL, NULL);
+out_free:
+ free(ev);
+ return err;
+}
+
+static bool auxtrace__dont_decode(struct perf_session *session)
+{
+ return !session->itrace_synth_opts ||
+ session->itrace_synth_opts->dont_decode;
+}
+
+int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ enum auxtrace_type type = event->auxtrace_info.type;
+
+ if (dump_trace)
+ fprintf(stdout, " type: %u\n", type);
+
+ switch (type) {
+ case PERF_AUXTRACE_INTEL_PT:
+ return intel_pt_process_auxtrace_info(event, session);
+ case PERF_AUXTRACE_INTEL_BTS:
+ return intel_bts_process_auxtrace_info(event, session);
+ case PERF_AUXTRACE_UNKNOWN:
+ default:
+ return -EINVAL;
+ }
+}
+
+s64 perf_event__process_auxtrace(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ s64 err;
+
+ if (dump_trace)
+ fprintf(stdout, " size: %#"PRIx64" offset: %#"PRIx64" ref: %#"PRIx64" idx: %u tid: %d cpu: %d\n",
+ event->auxtrace.size, event->auxtrace.offset,
+ event->auxtrace.reference, event->auxtrace.idx,
+ event->auxtrace.tid, event->auxtrace.cpu);
+
+ if (auxtrace__dont_decode(session))
+ return event->auxtrace.size;
+
+ if (!session->auxtrace || event->header.type != PERF_RECORD_AUXTRACE)
+ return -EINVAL;
+
+ err = session->auxtrace->process_auxtrace_event(session, event, tool);
+ if (err < 0)
+ return err;
+
+ return event->auxtrace.size;
+}
+
+#define PERF_ITRACE_DEFAULT_PERIOD_TYPE PERF_ITRACE_PERIOD_NANOSECS
+#define PERF_ITRACE_DEFAULT_PERIOD 100000
+#define PERF_ITRACE_DEFAULT_CALLCHAIN_SZ 16
+#define PERF_ITRACE_MAX_CALLCHAIN_SZ 1024
+
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
+{
+ synth_opts->instructions = true;
+ synth_opts->branches = true;
+ synth_opts->transactions = true;
+ synth_opts->errors = true;
+ synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+ synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+ synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
+}
+
+/*
+ * Please check tools/perf/Documentation/perf-script.txt for information
+ * about the options parsed here, which is introduced after this cset,
+ * when support in 'perf script' for these options is introduced.
+ */
+int itrace_parse_synth_opts(const struct option *opt, const char *str,
+ int unset)
+{
+ struct itrace_synth_opts *synth_opts = opt->value;
+ const char *p;
+ char *endptr;
+ bool period_type_set = false;
+
+ synth_opts->set = true;
+
+ if (unset) {
+ synth_opts->dont_decode = true;
+ return 0;
+ }
+
+ if (!str) {
+ itrace_synth_opts__set_default(synth_opts);
+ return 0;
+ }
+
+ for (p = str; *p;) {
+ switch (*p++) {
+ case 'i':
+ synth_opts->instructions = true;
+ while (*p == ' ' || *p == ',')
+ p += 1;
+ if (isdigit(*p)) {
+ synth_opts->period = strtoull(p, &endptr, 10);
+ p = endptr;
+ while (*p == ' ' || *p == ',')
+ p += 1;
+ switch (*p++) {
+ case 'i':
+ synth_opts->period_type =
+ PERF_ITRACE_PERIOD_INSTRUCTIONS;
+ period_type_set = true;
+ break;
+ case 't':
+ synth_opts->period_type =
+ PERF_ITRACE_PERIOD_TICKS;
+ period_type_set = true;
+ break;
+ case 'm':
+ synth_opts->period *= 1000;
+ /* Fall through */
+ case 'u':
+ synth_opts->period *= 1000;
+ /* Fall through */
+ case 'n':
+ if (*p++ != 's')
+ goto out_err;
+ synth_opts->period_type =
+ PERF_ITRACE_PERIOD_NANOSECS;
+ period_type_set = true;
+ break;
+ case '\0':
+ goto out;
+ default:
+ goto out_err;
+ }
+ }
+ break;
+ case 'b':
+ synth_opts->branches = true;
+ break;
+ case 'x':
+ synth_opts->transactions = true;
+ break;
+ case 'e':
+ synth_opts->errors = true;
+ break;
+ case 'd':
+ synth_opts->log = true;
+ break;
+ case 'c':
+ synth_opts->branches = true;
+ synth_opts->calls = true;
+ break;
+ case 'r':
+ synth_opts->branches = true;
+ synth_opts->returns = true;
+ break;
+ case 'g':
+ synth_opts->callchain = true;
+ synth_opts->callchain_sz =
+ PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
+ while (*p == ' ' || *p == ',')
+ p += 1;
+ if (isdigit(*p)) {
+ unsigned int val;
+
+ val = strtoul(p, &endptr, 10);
+ p = endptr;
+ if (!val || val > PERF_ITRACE_MAX_CALLCHAIN_SZ)
+ goto out_err;
+ synth_opts->callchain_sz = val;
+ }
+ break;
+ case ' ':
+ case ',':
+ break;
+ default:
+ goto out_err;
+ }
+ }
+out:
+ if (synth_opts->instructions) {
+ if (!period_type_set)
+ synth_opts->period_type =
+ PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+ if (!synth_opts->period)
+ synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+ }
+
+ return 0;
+
+out_err:
+ pr_err("Bad Instruction Tracing options '%s'\n", str);
+ return -EINVAL;
+}
+
+static const char * const auxtrace_error_type_name[] = {
+ [PERF_AUXTRACE_ERROR_ITRACE] = "instruction trace",
+};
+
+static const char *auxtrace_error_name(int type)
+{
+ const char *error_type_name = NULL;
+
+ if (type < PERF_AUXTRACE_ERROR_MAX)
+ error_type_name = auxtrace_error_type_name[type];
+ if (!error_type_name)
+ error_type_name = "unknown AUX";
+ return error_type_name;
+}
+
+size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp)
+{
+ struct auxtrace_error_event *e = &event->auxtrace_error;
+ int ret;
+
+ ret = fprintf(fp, " %s error type %u",
+ auxtrace_error_name(e->type), e->type);
+ ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRIx64" code %u: %s\n",
+ e->cpu, e->pid, e->tid, e->ip, e->code, e->msg);
+ return ret;
+}
+
+void perf_session__auxtrace_error_inc(struct perf_session *session,
+ union perf_event *event)
+{
+ struct auxtrace_error_event *e = &event->auxtrace_error;
+
+ if (e->type < PERF_AUXTRACE_ERROR_MAX)
+ session->evlist->stats.nr_auxtrace_errors[e->type] += 1;
+}
+
+void events_stats__auxtrace_error_warn(const struct events_stats *stats)
+{
+ int i;
+
+ for (i = 0; i < PERF_AUXTRACE_ERROR_MAX; i++) {
+ if (!stats->nr_auxtrace_errors[i])
+ continue;
+ ui__warning("%u %s errors\n",
+ stats->nr_auxtrace_errors[i],
+ auxtrace_error_name(i));
+ }
+}
+
+int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ if (auxtrace__dont_decode(session))
+ return 0;
+
+ perf_event__fprintf_auxtrace_error(event, stdout);
+ return 0;
+}
+
+static int __auxtrace_mmap__read(struct auxtrace_mmap *mm,
+ struct auxtrace_record *itr,
+ struct perf_tool *tool, process_auxtrace_t fn,
+ bool snapshot, size_t snapshot_size)
+{
+ u64 head, old = mm->prev, offset, ref;
+ unsigned char *data = mm->base;
+ size_t size, head_off, old_off, len1, len2, padding;
+ union perf_event ev;
+ void *data1, *data2;
+
+ if (snapshot) {
+ head = auxtrace_mmap__read_snapshot_head(mm);
+ if (auxtrace_record__find_snapshot(itr, mm->idx, mm, data,
+ &head, &old))
+ return -1;
+ } else {
+ head = auxtrace_mmap__read_head(mm);
+ }
+
+ if (old == head)
+ return 0;
+
+ pr_debug3("auxtrace idx %d old %#"PRIx64" head %#"PRIx64" diff %#"PRIx64"\n",
+ mm->idx, old, head, head - old);
+
+ if (mm->mask) {
+ head_off = head & mm->mask;
+ old_off = old & mm->mask;
+ } else {
+ head_off = head % mm->len;
+ old_off = old % mm->len;
+ }
+
+ if (head_off > old_off)
+ size = head_off - old_off;
+ else
+ size = mm->len - (old_off - head_off);
+
+ if (snapshot && size > snapshot_size)
+ size = snapshot_size;
+
+ ref = auxtrace_record__reference(itr);
+
+ if (head > old || size <= head || mm->mask) {
+ offset = head - size;
+ } else {
+ /*
+ * When the buffer size is not a power of 2, 'head' wraps at the
+ * highest multiple of the buffer size, so we have to subtract
+ * the remainder here.
+ */
+ u64 rem = (0ULL - mm->len) % mm->len;
+
+ offset = head - size - rem;
+ }
+
+ if (size > head_off) {
+ len1 = size - head_off;
+ data1 = &data[mm->len - len1];
+ len2 = head_off;
+ data2 = &data[0];
+ } else {
+ len1 = size;
+ data1 = &data[head_off - len1];
+ len2 = 0;
+ data2 = NULL;
+ }
+
+ if (itr->alignment) {
+ unsigned int unwanted = len1 % itr->alignment;
+
+ len1 -= unwanted;
+ size -= unwanted;
+ }
+
+ /* padding must be written by fn() e.g. record__process_auxtrace() */
+ padding = size & 7;
+ if (padding)
+ padding = 8 - padding;
+
+ memset(&ev, 0, sizeof(ev));
+ ev.auxtrace.header.type = PERF_RECORD_AUXTRACE;
+ ev.auxtrace.header.size = sizeof(ev.auxtrace);
+ ev.auxtrace.size = size + padding;
+ ev.auxtrace.offset = offset;
+ ev.auxtrace.reference = ref;
+ ev.auxtrace.idx = mm->idx;
+ ev.auxtrace.tid = mm->tid;
+ ev.auxtrace.cpu = mm->cpu;
+
+ if (fn(tool, &ev, data1, len1, data2, len2))
+ return -1;
+
+ mm->prev = head;
+
+ if (!snapshot) {
+ auxtrace_mmap__write_tail(mm, head);
+ if (itr->read_finish) {
+ int err;
+
+ err = itr->read_finish(itr, mm->idx);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ return 1;
+}
+
+int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
+ struct perf_tool *tool, process_auxtrace_t fn)
+{
+ return __auxtrace_mmap__read(mm, itr, tool, fn, false, 0);
+}
+
+int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm,
+ struct auxtrace_record *itr,
+ struct perf_tool *tool, process_auxtrace_t fn,
+ size_t snapshot_size)
+{
+ return __auxtrace_mmap__read(mm, itr, tool, fn, true, snapshot_size);
+}
+
+/**
+ * struct auxtrace_cache - hash table to implement a cache
+ * @hashtable: the hashtable
+ * @sz: hashtable size (number of hlists)
+ * @entry_size: size of an entry
+ * @limit: limit the number of entries to this maximum, when reached the cache
+ * is dropped and caching begins again with an empty cache
+ * @cnt: current number of entries
+ * @bits: hashtable size (@sz = 2^@bits)
+ */
+struct auxtrace_cache {
+ struct hlist_head *hashtable;
+ size_t sz;
+ size_t entry_size;
+ size_t limit;
+ size_t cnt;
+ unsigned int bits;
+};
+
+struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size,
+ unsigned int limit_percent)
+{
+ struct auxtrace_cache *c;
+ struct hlist_head *ht;
+ size_t sz, i;
+
+ c = zalloc(sizeof(struct auxtrace_cache));
+ if (!c)
+ return NULL;
+
+ sz = 1UL << bits;
+
+ ht = calloc(sz, sizeof(struct hlist_head));
+ if (!ht)
+ goto out_free;
+
+ for (i = 0; i < sz; i++)
+ INIT_HLIST_HEAD(&ht[i]);
+
+ c->hashtable = ht;
+ c->sz = sz;
+ c->entry_size = entry_size;
+ c->limit = (c->sz * limit_percent) / 100;
+ c->bits = bits;
+
+ return c;
+
+out_free:
+ free(c);
+ return NULL;
+}
+
+static void auxtrace_cache__drop(struct auxtrace_cache *c)
+{
+ struct auxtrace_cache_entry *entry;
+ struct hlist_node *tmp;
+ size_t i;
+
+ if (!c)
+ return;
+
+ for (i = 0; i < c->sz; i++) {
+ hlist_for_each_entry_safe(entry, tmp, &c->hashtable[i], hash) {
+ hlist_del(&entry->hash);
+ auxtrace_cache__free_entry(c, entry);
+ }
+ }
+
+ c->cnt = 0;
+}
+
+void auxtrace_cache__free(struct auxtrace_cache *c)
+{
+ if (!c)
+ return;
+
+ auxtrace_cache__drop(c);
+ free(c->hashtable);
+ free(c);
+}
+
+void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c)
+{
+ return malloc(c->entry_size);
+}
+
+void auxtrace_cache__free_entry(struct auxtrace_cache *c __maybe_unused,
+ void *entry)
+{
+ free(entry);
+}
+
+int auxtrace_cache__add(struct auxtrace_cache *c, u32 key,
+ struct auxtrace_cache_entry *entry)
+{
+ if (c->limit && ++c->cnt > c->limit)
+ auxtrace_cache__drop(c);
+
+ entry->key = key;
+ hlist_add_head(&entry->hash, &c->hashtable[hash_32(key, c->bits)]);
+
+ return 0;
+}
+
+void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key)
+{
+ struct auxtrace_cache_entry *entry;
+ struct hlist_head *hlist;
+
+ if (!c)
+ return NULL;
+
+ hlist = &c->hashtable[hash_32(key, c->bits)];
+ hlist_for_each_entry(entry, hlist, hash) {
+ if (entry->key == key)
+ return entry;
+ }
+
+ return NULL;
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
new file mode 100644
index 0000000..bf72b77
--- /dev/null
+++ b/tools/perf/util/auxtrace.h
@@ -0,0 +1,646 @@
+/*
+ * auxtrace.h: AUX area trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_AUXTRACE_H
+#define __PERF_AUXTRACE_H
+
+#include <sys/types.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/list.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include "../perf.h"
+#include "event.h"
+#include "session.h"
+#include "debug.h"
+
+union perf_event;
+struct perf_session;
+struct perf_evlist;
+struct perf_tool;
+struct option;
+struct record_opts;
+struct auxtrace_info_event;
+struct events_stats;
+
+enum auxtrace_type {
+ PERF_AUXTRACE_UNKNOWN,
+ PERF_AUXTRACE_INTEL_PT,
+ PERF_AUXTRACE_INTEL_BTS,
+};
+
+enum itrace_period_type {
+ PERF_ITRACE_PERIOD_INSTRUCTIONS,
+ PERF_ITRACE_PERIOD_TICKS,
+ PERF_ITRACE_PERIOD_NANOSECS,
+};
+
+/**
+ * struct itrace_synth_opts - AUX area tracing synthesis options.
+ * @set: indicates whether or not options have been set
+ * @inject: indicates the event (not just the sample) must be fully synthesized
+ * because 'perf inject' will write it out
+ * @instructions: whether to synthesize 'instructions' events
+ * @branches: whether to synthesize 'branches' events
+ * @transactions: whether to synthesize events for transactions
+ * @errors: whether to synthesize decoder error events
+ * @dont_decode: whether to skip decoding entirely
+ * @log: write a decoding log
+ * @calls: limit branch samples to calls (can be combined with @returns)
+ * @returns: limit branch samples to returns (can be combined with @calls)
+ * @callchain: add callchain to 'instructions' events
+ * @callchain_sz: maximum callchain size
+ * @period: 'instructions' events period
+ * @period_type: 'instructions' events period type
+ */
+struct itrace_synth_opts {
+ bool set;
+ bool inject;
+ bool instructions;
+ bool branches;
+ bool transactions;
+ bool errors;
+ bool dont_decode;
+ bool log;
+ bool calls;
+ bool returns;
+ bool callchain;
+ unsigned int callchain_sz;
+ unsigned long long period;
+ enum itrace_period_type period_type;
+};
+
+/**
+ * struct auxtrace_index_entry - indexes a AUX area tracing event within a
+ * perf.data file.
+ * @file_offset: offset within the perf.data file
+ * @sz: size of the event
+ */
+struct auxtrace_index_entry {
+ u64 file_offset;
+ u64 sz;
+};
+
+#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
+
+/**
+ * struct auxtrace_index - index of AUX area tracing events within a perf.data
+ * file.
+ * @list: linking a number of arrays of entries
+ * @nr: number of entries
+ * @entries: array of entries
+ */
+struct auxtrace_index {
+ struct list_head list;
+ size_t nr;
+ struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
+};
+
+/**
+ * struct auxtrace - session callbacks to allow AUX area data decoding.
+ * @process_event: lets the decoder see all session events
+ * @flush_events: process any remaining data
+ * @free_events: free resources associated with event processing
+ * @free: free resources associated with the session
+ */
+struct auxtrace {
+ int (*process_event)(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool);
+ int (*process_auxtrace_event)(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool);
+ int (*flush_events)(struct perf_session *session,
+ struct perf_tool *tool);
+ void (*free_events)(struct perf_session *session);
+ void (*free)(struct perf_session *session);
+};
+
+/**
+ * struct auxtrace_buffer - a buffer containing AUX area tracing data.
+ * @list: buffers are queued in a list held by struct auxtrace_queue
+ * @size: size of the buffer in bytes
+ * @pid: in per-thread mode, the pid this buffer is associated with
+ * @tid: in per-thread mode, the tid this buffer is associated with
+ * @cpu: in per-cpu mode, the cpu this buffer is associated with
+ * @data: actual buffer data (can be null if the data has not been loaded)
+ * @data_offset: file offset at which the buffer can be read
+ * @mmap_addr: mmap address at which the buffer can be read
+ * @mmap_size: size of the mmap at @mmap_addr
+ * @data_needs_freeing: @data was malloc'd so free it when it is no longer
+ * needed
+ * @consecutive: the original data was split up and this buffer is consecutive
+ * to the previous buffer
+ * @offset: offset as determined by aux_head / aux_tail members of struct
+ * perf_event_mmap_page
+ * @reference: an implementation-specific reference determined when the data is
+ * recorded
+ * @buffer_nr: used to number each buffer
+ * @use_size: implementation actually only uses this number of bytes
+ * @use_data: implementation actually only uses data starting at this address
+ */
+struct auxtrace_buffer {
+ struct list_head list;
+ size_t size;
+ pid_t pid;
+ pid_t tid;
+ int cpu;
+ void *data;
+ off_t data_offset;
+ void *mmap_addr;
+ size_t mmap_size;
+ bool data_needs_freeing;
+ bool consecutive;
+ u64 offset;
+ u64 reference;
+ u64 buffer_nr;
+ size_t use_size;
+ void *use_data;
+};
+
+/**
+ * struct auxtrace_queue - a queue of AUX area tracing data buffers.
+ * @head: head of buffer list
+ * @tid: in per-thread mode, the tid this queue is associated with
+ * @cpu: in per-cpu mode, the cpu this queue is associated with
+ * @set: %true once this queue has been dedicated to a specific thread or cpu
+ * @priv: implementation-specific data
+ */
+struct auxtrace_queue {
+ struct list_head head;
+ pid_t tid;
+ int cpu;
+ bool set;
+ void *priv;
+};
+
+/**
+ * struct auxtrace_queues - an array of AUX area tracing queues.
+ * @queue_array: array of queues
+ * @nr_queues: number of queues
+ * @new_data: set whenever new data is queued
+ * @populated: queues have been fully populated using the auxtrace_index
+ * @next_buffer_nr: used to number each buffer
+ */
+struct auxtrace_queues {
+ struct auxtrace_queue *queue_array;
+ unsigned int nr_queues;
+ bool new_data;
+ bool populated;
+ u64 next_buffer_nr;
+};
+
+/**
+ * struct auxtrace_heap_item - element of struct auxtrace_heap.
+ * @queue_nr: queue number
+ * @ordinal: value used for sorting (lowest ordinal is top of the heap) expected
+ * to be a timestamp
+ */
+struct auxtrace_heap_item {
+ unsigned int queue_nr;
+ u64 ordinal;
+};
+
+/**
+ * struct auxtrace_heap - a heap suitable for sorting AUX area tracing queues.
+ * @heap_array: the heap
+ * @heap_cnt: the number of elements in the heap
+ * @heap_sz: maximum number of elements (grows as needed)
+ */
+struct auxtrace_heap {
+ struct auxtrace_heap_item *heap_array;
+ unsigned int heap_cnt;
+ unsigned int heap_sz;
+};
+
+/**
+ * struct auxtrace_mmap - records an mmap of the auxtrace buffer.
+ * @base: address of mapped area
+ * @userpg: pointer to buffer's perf_event_mmap_page
+ * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
+ * @len: size of mapped area
+ * @prev: previous aux_head
+ * @idx: index of this mmap
+ * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
+ * mmap) otherwise %0
+ * @cpu: cpu number for a per-cpu mmap otherwise %-1
+ */
+struct auxtrace_mmap {
+ void *base;
+ void *userpg;
+ size_t mask;
+ size_t len;
+ u64 prev;
+ int idx;
+ pid_t tid;
+ int cpu;
+};
+
+/**
+ * struct auxtrace_mmap_params - parameters to set up struct auxtrace_mmap.
+ * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
+ * @offset: file offset of mapped area
+ * @len: size of mapped area
+ * @prot: mmap memory protection
+ * @idx: index of this mmap
+ * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
+ * mmap) otherwise %0
+ * @cpu: cpu number for a per-cpu mmap otherwise %-1
+ */
+struct auxtrace_mmap_params {
+ size_t mask;
+ off_t offset;
+ size_t len;
+ int prot;
+ int idx;
+ pid_t tid;
+ int cpu;
+};
+
+/**
+ * struct auxtrace_record - callbacks for recording AUX area data.
+ * @recording_options: validate and process recording options
+ * @info_priv_size: return the size of the private data in auxtrace_info_event
+ * @info_fill: fill-in the private data in auxtrace_info_event
+ * @free: free this auxtrace record structure
+ * @snapshot_start: starting a snapshot
+ * @snapshot_finish: finishing a snapshot
+ * @find_snapshot: find data to snapshot within auxtrace mmap
+ * @parse_snapshot_options: parse snapshot options
+ * @reference: provide a 64-bit reference number for auxtrace_event
+ * @read_finish: called after reading from an auxtrace mmap
+ */
+struct auxtrace_record {
+ int (*recording_options)(struct auxtrace_record *itr,
+ struct perf_evlist *evlist,
+ struct record_opts *opts);
+ size_t (*info_priv_size)(struct auxtrace_record *itr);
+ int (*info_fill)(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct auxtrace_info_event *auxtrace_info,
+ size_t priv_size);
+ void (*free)(struct auxtrace_record *itr);
+ int (*snapshot_start)(struct auxtrace_record *itr);
+ int (*snapshot_finish)(struct auxtrace_record *itr);
+ int (*find_snapshot)(struct auxtrace_record *itr, int idx,
+ struct auxtrace_mmap *mm, unsigned char *data,
+ u64 *head, u64 *old);
+ int (*parse_snapshot_options)(struct auxtrace_record *itr,
+ struct record_opts *opts,
+ const char *str);
+ u64 (*reference)(struct auxtrace_record *itr);
+ int (*read_finish)(struct auxtrace_record *itr, int idx);
+ unsigned int alignment;
+};
+
+#ifdef HAVE_AUXTRACE_SUPPORT
+
+/*
+ * In snapshot mode the mmapped page is read-only which makes using
+ * __sync_val_compare_and_swap() problematic. However, snapshot mode expects
+ * the buffer is not updated while the snapshot is made (e.g. Intel PT disables
+ * the event) so there is not a race anyway.
+ */
+static inline u64 auxtrace_mmap__read_snapshot_head(struct auxtrace_mmap *mm)
+{
+ struct perf_event_mmap_page *pc = mm->userpg;
+ u64 head = ACCESS_ONCE(pc->aux_head);
+
+ /* Ensure all reads are done after we read the head */
+ rmb();
+ return head;
+}
+
+static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
+{
+ struct perf_event_mmap_page *pc = mm->userpg;
+#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+ u64 head = ACCESS_ONCE(pc->aux_head);
+#else
+ u64 head = __sync_val_compare_and_swap(&pc->aux_head, 0, 0);
+#endif
+
+ /* Ensure all reads are done after we read the head */
+ rmb();
+ return head;
+}
+
+static inline void auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
+{
+ struct perf_event_mmap_page *pc = mm->userpg;
+#if BITS_PER_LONG != 64 && defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+ u64 old_tail;
+#endif
+
+ /* Ensure all reads are done before we write the tail out */
+ mb();
+#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+ pc->aux_tail = tail;
+#else
+ do {
+ old_tail = __sync_val_compare_and_swap(&pc->aux_tail, 0, 0);
+ } while (!__sync_bool_compare_and_swap(&pc->aux_tail, old_tail, tail));
+#endif
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+ struct auxtrace_mmap_params *mp,
+ void *userpg, int fd);
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+ off_t auxtrace_offset,
+ unsigned int auxtrace_pages,
+ bool auxtrace_overwrite);
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+ struct perf_evlist *evlist, int idx,
+ bool per_cpu);
+
+typedef int (*process_auxtrace_t)(struct perf_tool *tool,
+ union perf_event *event, void *data1,
+ size_t len1, void *data2, size_t len2);
+
+int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
+ struct perf_tool *tool, process_auxtrace_t fn);
+
+int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm,
+ struct auxtrace_record *itr,
+ struct perf_tool *tool, process_auxtrace_t fn,
+ size_t snapshot_size);
+
+int auxtrace_queues__init(struct auxtrace_queues *queues);
+int auxtrace_queues__add_event(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ union perf_event *event, off_t data_offset,
+ struct auxtrace_buffer **buffer_ptr);
+void auxtrace_queues__free(struct auxtrace_queues *queues);
+int auxtrace_queues__process_index(struct auxtrace_queues *queues,
+ struct perf_session *session);
+struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
+ struct auxtrace_buffer *buffer);
+void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd);
+void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer);
+void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer);
+void auxtrace_buffer__free(struct auxtrace_buffer *buffer);
+
+int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
+ u64 ordinal);
+void auxtrace_heap__pop(struct auxtrace_heap *heap);
+void auxtrace_heap__free(struct auxtrace_heap *heap);
+
+struct auxtrace_cache_entry {
+ struct hlist_node hash;
+ u32 key;
+};
+
+struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size,
+ unsigned int limit_percent);
+void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache);
+void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c);
+void auxtrace_cache__free_entry(struct auxtrace_cache *c, void *entry);
+int auxtrace_cache__add(struct auxtrace_cache *c, u32 key,
+ struct auxtrace_cache_entry *entry);
+void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key);
+
+struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
+ int *err);
+
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
+ struct record_opts *opts,
+ const char *str);
+int auxtrace_record__options(struct auxtrace_record *itr,
+ struct perf_evlist *evlist,
+ struct record_opts *opts);
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr);
+int auxtrace_record__info_fill(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct auxtrace_info_event *auxtrace_info,
+ size_t priv_size);
+void auxtrace_record__free(struct auxtrace_record *itr);
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr);
+int auxtrace_record__snapshot_finish(struct auxtrace_record *itr);
+int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
+ struct auxtrace_mmap *mm,
+ unsigned char *data, u64 *head, u64 *old);
+u64 auxtrace_record__reference(struct auxtrace_record *itr);
+
+int auxtrace_index__auxtrace_event(struct list_head *head, union perf_event *event,
+ off_t file_offset);
+int auxtrace_index__write(int fd, struct list_head *head);
+int auxtrace_index__process(int fd, u64 size, struct perf_session *session,
+ bool needs_swap);
+void auxtrace_index__free(struct list_head *head);
+
+void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
+ int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+ const char *msg);
+
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
+ struct perf_tool *tool,
+ struct perf_session *session,
+ perf_event__handler_t process);
+int perf_event__process_auxtrace_info(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session);
+s64 perf_event__process_auxtrace(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session);
+int perf_event__process_auxtrace_error(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session);
+int itrace_parse_synth_opts(const struct option *opt, const char *str,
+ int unset);
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts);
+
+size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp);
+void perf_session__auxtrace_error_inc(struct perf_session *session,
+ union perf_event *event);
+void events_stats__auxtrace_error_warn(const struct events_stats *stats);
+
+static inline int auxtrace__process_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool)
+{
+ if (!session->auxtrace)
+ return 0;
+
+ return session->auxtrace->process_event(session, event, sample, tool);
+}
+
+static inline int auxtrace__flush_events(struct perf_session *session,
+ struct perf_tool *tool)
+{
+ if (!session->auxtrace)
+ return 0;
+
+ return session->auxtrace->flush_events(session, tool);
+}
+
+static inline void auxtrace__free_events(struct perf_session *session)
+{
+ if (!session->auxtrace)
+ return;
+
+ return session->auxtrace->free_events(session);
+}
+
+static inline void auxtrace__free(struct perf_session *session)
+{
+ if (!session->auxtrace)
+ return;
+
+ return session->auxtrace->free(session);
+}
+
+#else
+
+static inline struct auxtrace_record *
+auxtrace_record__init(struct perf_evlist *evlist __maybe_unused,
+ int *err __maybe_unused)
+{
+ *err = 0;
+ return NULL;
+}
+
+static inline
+void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused)
+{
+}
+
+static inline int
+perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused,
+ struct perf_tool *tool __maybe_unused,
+ struct perf_session *session __maybe_unused,
+ perf_event__handler_t process __maybe_unused)
+{
+ return -EINVAL;
+}
+
+static inline
+int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused,
+ struct record_opts *opts __maybe_unused)
+{
+ return 0;
+}
+
+#define perf_event__process_auxtrace_info 0
+#define perf_event__process_auxtrace 0
+#define perf_event__process_auxtrace_error 0
+
+static inline
+void perf_session__auxtrace_error_inc(struct perf_session *session
+ __maybe_unused,
+ union perf_event *event
+ __maybe_unused)
+{
+}
+
+static inline
+void events_stats__auxtrace_error_warn(const struct events_stats *stats
+ __maybe_unused)
+{
+}
+
+static inline
+int itrace_parse_synth_opts(const struct option *opt __maybe_unused,
+ const char *str __maybe_unused,
+ int unset __maybe_unused)
+{
+ pr_err("AUX area tracing not supported\n");
+ return -EINVAL;
+}
+
+static inline
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
+ struct record_opts *opts __maybe_unused,
+ const char *str)
+{
+ if (!str)
+ return 0;
+ pr_err("AUX area tracing not supported\n");
+ return -EINVAL;
+}
+
+static inline
+int auxtrace__process_event(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static inline
+int auxtrace__flush_events(struct perf_session *session __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static inline
+void auxtrace__free_events(struct perf_session *session __maybe_unused)
+{
+}
+
+static inline
+void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache __maybe_unused)
+{
+}
+
+static inline
+void auxtrace__free(struct perf_session *session __maybe_unused)
+{
+}
+
+static inline
+int auxtrace_index__write(int fd __maybe_unused,
+ struct list_head *head __maybe_unused)
+{
+ return -EINVAL;
+}
+
+static inline
+int auxtrace_index__process(int fd __maybe_unused,
+ u64 size __maybe_unused,
+ struct perf_session *session __maybe_unused,
+ bool needs_swap __maybe_unused)
+{
+ return -EINVAL;
+}
+
+static inline
+void auxtrace_index__free(struct list_head *head __maybe_unused)
+{
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+ struct auxtrace_mmap_params *mp,
+ void *userpg, int fd);
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+ off_t auxtrace_offset,
+ unsigned int auxtrace_pages,
+ bool auxtrace_overwrite);
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+ struct perf_evlist *evlist, int idx,
+ bool per_cpu);
+
+#endif
+
+#endif
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 61867df..d909459 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -43,6 +43,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
if (al.map != NULL)
al.map->dso->hit = 1;
+ thread__put(thread);
return 0;
}
@@ -59,8 +60,10 @@ static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused,
dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
event->fork.ppid, event->fork.ptid);
- if (thread)
+ if (thread) {
machine__remove_thread(machine, thread);
+ thread__put(thread);
+ }
return 0;
}
@@ -90,6 +93,38 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf)
return raw - build_id;
}
+int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id)
+{
+ char notes[PATH_MAX];
+ u8 build_id[BUILD_ID_SIZE];
+ int ret;
+
+ if (!root_dir)
+ root_dir = "";
+
+ scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir);
+
+ ret = sysfs__read_build_id(notes, build_id, sizeof(build_id));
+ if (ret < 0)
+ return ret;
+
+ return build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+}
+
+int filename__sprintf_build_id(const char *pathname, char *sbuild_id)
+{
+ u8 build_id[BUILD_ID_SIZE];
+ int ret;
+
+ ret = filename__read_build_id(pathname, build_id, sizeof(build_id));
+ if (ret < 0)
+ return ret;
+ else if (ret != sizeof(build_id))
+ return -EINVAL;
+
+ return build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+}
+
/* asnprintf consolidates asprintf and snprintf */
static int asnprintf(char **strp, size_t size, const char *fmt, ...)
{
@@ -121,7 +156,7 @@ static char *build_id__filename(const char *sbuild_id, char *bf, size_t size)
char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size)
{
- char build_id_hex[BUILD_ID_SIZE * 2 + 1];
+ char build_id_hex[SBUILD_ID_SIZE];
if (!dso->has_build_id)
return NULL;
@@ -159,15 +194,20 @@ static int write_buildid(const char *name, size_t name_len, u8 *build_id,
return write_padded(fd, name, name_len + 1, len);
}
-static int __dsos__write_buildid_table(struct list_head *head,
- struct machine *machine,
- pid_t pid, u16 misc, int fd)
+static int machine__write_buildid_table(struct machine *machine, int fd)
{
+ int err = 0;
char nm[PATH_MAX];
struct dso *pos;
+ u16 kmisc = PERF_RECORD_MISC_KERNEL,
+ umisc = PERF_RECORD_MISC_USER;
+
+ if (!machine__is_host(machine)) {
+ kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
+ umisc = PERF_RECORD_MISC_GUEST_USER;
+ }
- dsos__for_each_with_build_id(pos, head) {
- int err;
+ dsos__for_each_with_build_id(pos, &machine->dsos.head) {
const char *name;
size_t name_len;
@@ -186,32 +226,12 @@ static int __dsos__write_buildid_table(struct list_head *head,
name_len = pos->long_name_len + 1;
}
- err = write_buildid(name, name_len, pos->build_id,
- pid, misc, fd);
+ err = write_buildid(name, name_len, pos->build_id, machine->pid,
+ pos->kernel ? kmisc : umisc, fd);
if (err)
- return err;
- }
-
- return 0;
-}
-
-static int machine__write_buildid_table(struct machine *machine, int fd)
-{
- int err;
- u16 kmisc = PERF_RECORD_MISC_KERNEL,
- umisc = PERF_RECORD_MISC_USER;
-
- if (!machine__is_host(machine)) {
- kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
- umisc = PERF_RECORD_MISC_GUEST_USER;
+ break;
}
- err = __dsos__write_buildid_table(&machine->kernel_dsos.head, machine,
- machine->pid, kmisc, fd);
- if (err == 0)
- err = __dsos__write_buildid_table(&machine->user_dsos.head,
- machine, machine->pid, umisc,
- fd);
return err;
}
@@ -244,13 +264,7 @@ static int __dsos__hit_all(struct list_head *head)
static int machine__hit_all_dsos(struct machine *machine)
{
- int err;
-
- err = __dsos__hit_all(&machine->kernel_dsos.head);
- if (err)
- return err;
-
- return __dsos__hit_all(&machine->user_dsos.head);
+ return __dsos__hit_all(&machine->dsos.head);
}
int dsos__hit_all(struct perf_session *session)
@@ -309,7 +323,7 @@ int build_id_cache__list_build_ids(const char *pathname,
struct dirent *d;
int ret = 0;
- list = strlist__new(true, NULL);
+ list = strlist__new(NULL, NULL);
dir_name = build_id_cache__dirname_from_path(pathname, false, false);
if (!list || !dir_name) {
ret = -ENOMEM;
@@ -402,7 +416,7 @@ static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
const char *name, bool is_kallsyms,
bool is_vdso)
{
- char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+ char sbuild_id[SBUILD_ID_SIZE];
build_id__sprintf(build_id, build_id_size, sbuild_id);
@@ -490,9 +504,7 @@ static int __dsos__cache_build_ids(struct list_head *head,
static int machine__cache_build_ids(struct machine *machine)
{
- int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine);
- ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine);
- return ret;
+ return __dsos__cache_build_ids(&machine->dsos.head, machine);
}
int perf_session__cache_build_ids(struct perf_session *session)
@@ -517,11 +529,7 @@ int perf_session__cache_build_ids(struct perf_session *session)
static bool machine__read_build_ids(struct machine *machine, bool with_hits)
{
- bool ret;
-
- ret = __dsos__read_build_ids(&machine->kernel_dsos.head, with_hits);
- ret |= __dsos__read_build_ids(&machine->user_dsos.head, with_hits);
- return ret;
+ return __dsos__read_build_ids(&machine->dsos.head, with_hits);
}
bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index 8501122..27a14a8 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -1,7 +1,8 @@
#ifndef PERF_BUILD_ID_H_
#define PERF_BUILD_ID_H_ 1
-#define BUILD_ID_SIZE 20
+#define BUILD_ID_SIZE 20
+#define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1)
#include "tool.h"
#include "strlist.h"
@@ -11,6 +12,9 @@ extern struct perf_tool build_id__mark_dso_hit_ops;
struct dso;
int build_id__sprintf(const u8 *build_id, int len, char *bf);
+int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
+int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
+
char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size);
int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index fbcca21..c861373 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -30,7 +30,6 @@ extern const char *perf_config_dirname(const char *, const char *);
/* pager.c */
extern void setup_pager(void);
-extern const char *pager_program;
extern int pager_in_use(void);
extern int pager_use_color;
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 9f643ee..773fe13 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -25,96 +25,9 @@
__thread struct callchain_cursor callchain_cursor;
-#ifdef HAVE_DWARF_UNWIND_SUPPORT
-static int get_stack_size(const char *str, unsigned long *_size)
-{
- char *endptr;
- unsigned long size;
- unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
-
- size = strtoul(str, &endptr, 0);
-
- do {
- if (*endptr)
- break;
-
- size = round_up(size, sizeof(u64));
- if (!size || size > max_size)
- break;
-
- *_size = size;
- return 0;
-
- } while (0);
-
- pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
- max_size, str);
- return -1;
-}
-#endif /* HAVE_DWARF_UNWIND_SUPPORT */
-
-int parse_callchain_record_opt(const char *arg)
+int parse_callchain_record_opt(const char *arg, struct callchain_param *param)
{
- char *tok, *name, *saveptr = NULL;
- char *buf;
- int ret = -1;
-
- /* We need buffer that we know we can write to. */
- buf = malloc(strlen(arg) + 1);
- if (!buf)
- return -ENOMEM;
-
- strcpy(buf, arg);
-
- tok = strtok_r((char *)buf, ",", &saveptr);
- name = tok ? : (char *)buf;
-
- do {
- /* Framepointer style */
- if (!strncmp(name, "fp", sizeof("fp"))) {
- if (!strtok_r(NULL, ",", &saveptr)) {
- callchain_param.record_mode = CALLCHAIN_FP;
- ret = 0;
- } else
- pr_err("callchain: No more arguments "
- "needed for --call-graph fp\n");
- break;
-
-#ifdef HAVE_DWARF_UNWIND_SUPPORT
- /* Dwarf style */
- } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
- const unsigned long default_stack_dump_size = 8192;
-
- ret = 0;
- callchain_param.record_mode = CALLCHAIN_DWARF;
- callchain_param.dump_size = default_stack_dump_size;
-
- tok = strtok_r(NULL, ",", &saveptr);
- if (tok) {
- unsigned long size = 0;
-
- ret = get_stack_size(tok, &size);
- callchain_param.dump_size = size;
- }
-#endif /* HAVE_DWARF_UNWIND_SUPPORT */
- } else if (!strncmp(name, "lbr", sizeof("lbr"))) {
- if (!strtok_r(NULL, ",", &saveptr)) {
- callchain_param.record_mode = CALLCHAIN_LBR;
- ret = 0;
- } else
- pr_err("callchain: No more arguments "
- "needed for --call-graph lbr\n");
- break;
- } else {
- pr_err("callchain: Unknown --call-graph option "
- "value: %s\n", arg);
- break;
- }
-
- } while (0);
-
- free(buf);
- return ret;
+ return parse_callchain_record(arg, param);
}
static int parse_callchain_mode(const char *value)
@@ -219,7 +132,7 @@ int perf_callchain_config(const char *var, const char *value)
var += sizeof("call-graph.") - 1;
if (!strcmp(var, "record-mode"))
- return parse_callchain_record_opt(value);
+ return parse_callchain_record_opt(value, &callchain_param);
#ifdef HAVE_DWARF_UNWIND_SUPPORT
if (!strcmp(var, "dump-size")) {
unsigned long size = 0;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 6033a0a..acee2b3 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -72,6 +72,10 @@ extern struct callchain_param callchain_param;
struct callchain_list {
u64 ip;
struct map_symbol ms;
+ struct /* for TUI */ {
+ bool unfolded;
+ bool has_children;
+ };
char *srcline;
struct list_head list;
};
@@ -173,7 +177,8 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
bool hide_unresolved);
extern const char record_callchain_help[];
-int parse_callchain_record_opt(const char *arg);
+extern int parse_callchain_record(const char *arg, struct callchain_param *param);
+int parse_callchain_record_opt(const char *arg, struct callchain_param *param);
int parse_callchain_report_opt(const char *arg);
int perf_callchain_config(const char *var, const char *value);
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 88f7be3..32e12ecf 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -115,23 +115,19 @@ static int add_cgroup(struct perf_evlist *evlist, char *str)
goto found;
n++;
}
- if (cgrp->refcnt == 0)
+ if (atomic_read(&cgrp->refcnt) == 0)
free(cgrp);
return -1;
found:
- cgrp->refcnt++;
+ atomic_inc(&cgrp->refcnt);
counter->cgrp = cgrp;
return 0;
}
void close_cgroup(struct cgroup_sel *cgrp)
{
- if (!cgrp)
- return;
-
- /* XXX: not reentrant */
- if (--cgrp->refcnt == 0) {
+ if (cgrp && atomic_dec_and_test(&cgrp->refcnt)) {
close(cgrp->fd);
zfree(&cgrp->name);
free(cgrp);
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index 89acd6d..b4b8cb4 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -1,12 +1,14 @@
#ifndef __CGROUP_H__
#define __CGROUP_H__
+#include <linux/atomic.h>
+
struct option;
struct cgroup_sel {
char *name;
int fd;
- int refcnt;
+ atomic_t refcnt;
};
diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c
index 85b5238..2babdda 100644
--- a/tools/perf/util/cloexec.c
+++ b/tools/perf/util/cloexec.c
@@ -7,11 +7,15 @@
static unsigned long flag = PERF_FLAG_FD_CLOEXEC;
+#ifdef __GLIBC_PREREQ
+#if !__GLIBC_PREREQ(2, 6)
int __weak sched_getcpu(void)
{
errno = ENOSYS;
return -1;
}
+#endif
+#endif
static int perf_flag_probe(void)
{
diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h
index 68888c2..3bee677 100644
--- a/tools/perf/util/cloexec.h
+++ b/tools/perf/util/cloexec.h
@@ -4,7 +4,7 @@
unsigned long perf_event_open_cloexec_flag(void);
#ifdef __GLIBC_PREREQ
-#if !__GLIBC_PREREQ(2, 6)
+#if !__GLIBC_PREREQ(2, 6) && !defined(__UCLIBC__)
extern int sched_getcpu(void) __THROW;
#endif
#endif
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
index 55355b3..9b95654 100644
--- a/tools/perf/util/color.c
+++ b/tools/perf/util/color.c
@@ -67,8 +67,9 @@ static int __color_vsnprintf(char *bf, size_t size, const char *color,
return r;
}
+/* Colors are not included in return value */
static int __color_vfprintf(FILE *fp, const char *color, const char *fmt,
- va_list args, const char *trail)
+ va_list args)
{
int r = 0;
@@ -83,12 +84,10 @@ static int __color_vfprintf(FILE *fp, const char *color, const char *fmt,
}
if (perf_use_color_default && *color)
- r += fprintf(fp, "%s", color);
+ fprintf(fp, "%s", color);
r += vfprintf(fp, fmt, args);
if (perf_use_color_default && *color)
- r += fprintf(fp, "%s", PERF_COLOR_RESET);
- if (trail)
- r += fprintf(fp, "%s", trail);
+ fprintf(fp, "%s", PERF_COLOR_RESET);
return r;
}
@@ -100,7 +99,7 @@ int color_vsnprintf(char *bf, size_t size, const char *color,
int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args)
{
- return __color_vfprintf(fp, color, fmt, args, NULL);
+ return __color_vfprintf(fp, color, fmt, args);
}
int color_snprintf(char *bf, size_t size, const char *color,
@@ -126,16 +125,6 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...)
return r;
}
-int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...)
-{
- va_list args;
- int r;
- va_start(args, fmt);
- r = __color_vfprintf(fp, color, fmt, args, "\n");
- va_end(args);
- return r;
-}
-
/*
* This function splits the buffer by newlines and colors the lines individually.
*
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
index 38146f9..a93997f 100644
--- a/tools/perf/util/color.h
+++ b/tools/perf/util/color.h
@@ -35,7 +35,6 @@ int color_vsnprintf(char *bf, size_t size, const char *color,
int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args);
int color_fprintf(FILE *fp, const char *color, const char *fmt, ...);
int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...);
-int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...);
int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf);
int value_color_snprintf(char *bf, size_t size, const char *fmt, double value);
int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...);
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index b2bb59d..21b7ff3 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -2,24 +2,27 @@
#include "util.h"
#include <stdlib.h>
#include <stdio.h>
+#include <linux/atomic.h>
struct comm_str {
char *str;
struct rb_node rb_node;
- int ref;
+ atomic_t refcnt;
};
/* Should perhaps be moved to struct machine */
static struct rb_root comm_str_root;
-static void comm_str__get(struct comm_str *cs)
+static struct comm_str *comm_str__get(struct comm_str *cs)
{
- cs->ref++;
+ if (cs)
+ atomic_inc(&cs->refcnt);
+ return cs;
}
static void comm_str__put(struct comm_str *cs)
{
- if (!--cs->ref) {
+ if (cs && atomic_dec_and_test(&cs->refcnt)) {
rb_erase(&cs->rb_node, &comm_str_root);
zfree(&cs->str);
free(cs);
@@ -40,6 +43,8 @@ static struct comm_str *comm_str__alloc(const char *str)
return NULL;
}
+ atomic_set(&cs->refcnt, 0);
+
return cs;
}
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index e18f653..2e452ac 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -12,6 +12,7 @@
#include "cache.h"
#include "exec_cmd.h"
#include "util/hist.h" /* perf_hist_config */
+#include "util/llvm-utils.h" /* perf_llvm_config */
#define MAXNAME (256)
@@ -408,6 +409,9 @@ int perf_default_config(const char *var, const char *value,
if (!prefixcmp(var, "call-graph."))
return perf_callchain_config(var, value);
+ if (!prefixcmp(var, "llvm."))
+ return perf_llvm_config(var, value);
+
/* Add other config variables here. */
return 0;
}
diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
new file mode 100644
index 0000000..e3fde31
--- /dev/null
+++ b/tools/perf/util/counts.c
@@ -0,0 +1,52 @@
+#include <stdlib.h>
+#include "evsel.h"
+#include "counts.h"
+
+struct perf_counts *perf_counts__new(int ncpus, int nthreads)
+{
+ struct perf_counts *counts = zalloc(sizeof(*counts));
+
+ if (counts) {
+ struct xyarray *values;
+
+ values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values));
+ if (!values) {
+ free(counts);
+ return NULL;
+ }
+
+ counts->values = values;
+ }
+
+ return counts;
+}
+
+void perf_counts__delete(struct perf_counts *counts)
+{
+ if (counts) {
+ xyarray__delete(counts->values);
+ free(counts);
+ }
+}
+
+static void perf_counts__reset(struct perf_counts *counts)
+{
+ xyarray__reset(counts->values);
+}
+
+void perf_evsel__reset_counts(struct perf_evsel *evsel)
+{
+ perf_counts__reset(evsel->counts);
+}
+
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+ evsel->counts = perf_counts__new(ncpus, nthreads);
+ return evsel->counts != NULL ? 0 : -ENOMEM;
+}
+
+void perf_evsel__free_counts(struct perf_evsel *evsel)
+{
+ perf_counts__delete(evsel->counts);
+ evsel->counts = NULL;
+}
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
new file mode 100644
index 0000000..34d8baa
--- /dev/null
+++ b/tools/perf/util/counts.h
@@ -0,0 +1,37 @@
+#ifndef __PERF_COUNTS_H
+#define __PERF_COUNTS_H
+
+#include "xyarray.h"
+
+struct perf_counts_values {
+ union {
+ struct {
+ u64 val;
+ u64 ena;
+ u64 run;
+ };
+ u64 values[3];
+ };
+};
+
+struct perf_counts {
+ s8 scaled;
+ struct perf_counts_values aggr;
+ struct xyarray *values;
+};
+
+
+static inline struct perf_counts_values*
+perf_counts(struct perf_counts *counts, int cpu, int thread)
+{
+ return xyarray__entry(counts->values, cpu, thread);
+}
+
+struct perf_counts *perf_counts__new(int ncpus, int nthreads);
+void perf_counts__delete(struct perf_counts *counts);
+
+void perf_evsel__reset_counts(struct perf_evsel *evsel);
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads);
+void perf_evsel__free_counts(struct perf_evsel *evsel);
+
+#endif /* __PERF_COUNTS_H */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index c4e55b7..3667e21 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -5,6 +5,7 @@
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
+#include "asm/bug.h"
static struct cpu_map *cpu_map__default_new(void)
{
@@ -22,6 +23,7 @@ static struct cpu_map *cpu_map__default_new(void)
cpus->map[i] = i;
cpus->nr = nr_cpus;
+ atomic_set(&cpus->refcnt, 1);
}
return cpus;
@@ -35,6 +37,7 @@ static struct cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
if (cpus != NULL) {
cpus->nr = nr_cpus;
memcpy(cpus->map, tmp_cpus, payload_size);
+ atomic_set(&cpus->refcnt, 1);
}
return cpus;
@@ -194,14 +197,32 @@ struct cpu_map *cpu_map__dummy_new(void)
if (cpus != NULL) {
cpus->nr = 1;
cpus->map[0] = -1;
+ atomic_set(&cpus->refcnt, 1);
}
return cpus;
}
-void cpu_map__delete(struct cpu_map *map)
+static void cpu_map__delete(struct cpu_map *map)
{
- free(map);
+ if (map) {
+ WARN_ONCE(atomic_read(&map->refcnt) != 0,
+ "cpu_map refcnt unbalanced\n");
+ free(map);
+ }
+}
+
+struct cpu_map *cpu_map__get(struct cpu_map *map)
+{
+ if (map)
+ atomic_inc(&map->refcnt);
+ return map;
+}
+
+void cpu_map__put(struct cpu_map *map)
+{
+ if (map && atomic_dec_and_test(&map->refcnt))
+ cpu_map__delete(map);
}
int cpu_map__get_socket(struct cpu_map *map, int idx)
@@ -263,6 +284,7 @@ static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
/* ensure we process id in increasing order */
qsort(c->map, c->nr, sizeof(int), cmp_ids);
+ atomic_set(&cpus->refcnt, 1);
*res = c;
return 0;
}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 61a6548..0af9cec 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -3,18 +3,19 @@
#include <stdio.h>
#include <stdbool.h>
+#include <linux/atomic.h>
#include "perf.h"
#include "util/debug.h"
struct cpu_map {
+ atomic_t refcnt;
int nr;
int map[];
};
struct cpu_map *cpu_map__new(const char *cpu_list);
struct cpu_map *cpu_map__dummy_new(void);
-void cpu_map__delete(struct cpu_map *map);
struct cpu_map *cpu_map__read(FILE *file);
size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
int cpu_map__get_socket(struct cpu_map *map, int idx);
@@ -22,6 +23,9 @@ int cpu_map__get_core(struct cpu_map *map, int idx);
int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
+struct cpu_map *cpu_map__get(struct cpu_map *map);
+void cpu_map__put(struct cpu_map *map);
+
static inline int cpu_map__socket(struct cpu_map *sock, int s)
{
if (!sock || s > sock->nr || s < 0)
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index dd17c9a..5bfc119 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -14,6 +14,7 @@
#include <babeltrace/ctf-writer/event.h>
#include <babeltrace/ctf-writer/event-types.h>
#include <babeltrace/ctf-writer/event-fields.h>
+#include <babeltrace/ctf-ir/utils.h>
#include <babeltrace/ctf/events.h>
#include <traceevent/event-parse.h>
#include "asm/bug.h"
@@ -38,12 +39,21 @@ struct evsel_priv {
struct bt_ctf_event_class *event_class;
};
+#define MAX_CPUS 4096
+
+struct ctf_stream {
+ struct bt_ctf_stream *stream;
+ int cpu;
+ u32 count;
+};
+
struct ctf_writer {
/* writer primitives */
- struct bt_ctf_writer *writer;
- struct bt_ctf_stream *stream;
- struct bt_ctf_stream_class *stream_class;
- struct bt_ctf_clock *clock;
+ struct bt_ctf_writer *writer;
+ struct ctf_stream **stream;
+ int stream_cnt;
+ struct bt_ctf_stream_class *stream_class;
+ struct bt_ctf_clock *clock;
/* data types */
union {
@@ -65,6 +75,9 @@ struct convert {
u64 events_size;
u64 events_count;
+
+ /* Ordered events configured queue size. */
+ u64 queue_size;
};
static int value_set(struct bt_ctf_field_type *type,
@@ -153,6 +166,43 @@ get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field)
return cw->data.u32;
}
+static unsigned long long adjust_signedness(unsigned long long value_int, int size)
+{
+ unsigned long long value_mask;
+
+ /*
+ * value_mask = (1 << (size * 8 - 1)) - 1.
+ * Directly set value_mask for code readers.
+ */
+ switch (size) {
+ case 1:
+ value_mask = 0x7fULL;
+ break;
+ case 2:
+ value_mask = 0x7fffULL;
+ break;
+ case 4:
+ value_mask = 0x7fffffffULL;
+ break;
+ case 8:
+ /*
+ * For 64 bit value, return it self. There is no need
+ * to fill high bit.
+ */
+ /* Fall through */
+ default:
+ /* BUG! */
+ return value_int;
+ }
+
+ /* If it is a positive value, don't adjust. */
+ if ((value_int & (~0ULL - value_mask)) == 0)
+ return value_int;
+
+ /* Fill upper part of value_int with 1 to make it a negative long long. */
+ return (value_int & value_mask) | ~value_mask;
+}
+
static int add_tracepoint_field_value(struct ctf_writer *cw,
struct bt_ctf_event_class *event_class,
struct bt_ctf_event *event,
@@ -164,7 +214,6 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
struct bt_ctf_field *field;
const char *name = fmtf->name;
void *data = sample->raw_data;
- unsigned long long value_int;
unsigned long flags = fmtf->flags;
unsigned int n_items;
unsigned int i;
@@ -172,6 +221,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
unsigned int len;
int ret;
+ name = fmtf->alias;
offset = fmtf->offset;
len = fmtf->size;
if (flags & FIELD_IS_STRING)
@@ -208,11 +258,6 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
type = get_tracepoint_field_type(cw, fmtf);
for (i = 0; i < n_items; i++) {
- if (!(flags & FIELD_IS_STRING))
- value_int = pevent_read_number(
- fmtf->event->pevent,
- data + offset + i * len, len);
-
if (flags & FIELD_IS_ARRAY)
field = bt_ctf_field_array_get_field(array_field, i);
else
@@ -226,12 +271,21 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
if (flags & FIELD_IS_STRING)
ret = bt_ctf_field_string_set_value(field,
data + offset + i * len);
- else if (!(flags & FIELD_IS_SIGNED))
- ret = bt_ctf_field_unsigned_integer_set_value(
- field, value_int);
- else
- ret = bt_ctf_field_signed_integer_set_value(
- field, value_int);
+ else {
+ unsigned long long value_int;
+
+ value_int = pevent_read_number(
+ fmtf->event->pevent,
+ data + offset + i * len, len);
+
+ if (!(flags & FIELD_IS_SIGNED))
+ ret = bt_ctf_field_unsigned_integer_set_value(
+ field, value_int);
+ else
+ ret = bt_ctf_field_signed_integer_set_value(
+ field, adjust_signedness(value_int, len));
+ }
+
if (ret) {
pr_err("failed to set file value %s\n", name);
goto err_put_field;
@@ -346,12 +400,6 @@ static int add_generic_values(struct ctf_writer *cw,
return -1;
}
- if (type & PERF_SAMPLE_CPU) {
- ret = value_set_u32(cw, event, "perf_cpu", sample->cpu);
- if (ret)
- return -1;
- }
-
if (type & PERF_SAMPLE_PERIOD) {
ret = value_set_u64(cw, event, "perf_period", sample->period);
if (ret)
@@ -381,6 +429,129 @@ static int add_generic_values(struct ctf_writer *cw,
return 0;
}
+static int ctf_stream__flush(struct ctf_stream *cs)
+{
+ int err = 0;
+
+ if (cs) {
+ err = bt_ctf_stream_flush(cs->stream);
+ if (err)
+ pr_err("CTF stream %d flush failed\n", cs->cpu);
+
+ pr("Flush stream for cpu %d (%u samples)\n",
+ cs->cpu, cs->count);
+
+ cs->count = 0;
+ }
+
+ return err;
+}
+
+static struct ctf_stream *ctf_stream__create(struct ctf_writer *cw, int cpu)
+{
+ struct ctf_stream *cs;
+ struct bt_ctf_field *pkt_ctx = NULL;
+ struct bt_ctf_field *cpu_field = NULL;
+ struct bt_ctf_stream *stream = NULL;
+ int ret;
+
+ cs = zalloc(sizeof(*cs));
+ if (!cs) {
+ pr_err("Failed to allocate ctf stream\n");
+ return NULL;
+ }
+
+ stream = bt_ctf_writer_create_stream(cw->writer, cw->stream_class);
+ if (!stream) {
+ pr_err("Failed to create CTF stream\n");
+ goto out;
+ }
+
+ pkt_ctx = bt_ctf_stream_get_packet_context(stream);
+ if (!pkt_ctx) {
+ pr_err("Failed to obtain packet context\n");
+ goto out;
+ }
+
+ cpu_field = bt_ctf_field_structure_get_field(pkt_ctx, "cpu_id");
+ bt_ctf_field_put(pkt_ctx);
+ if (!cpu_field) {
+ pr_err("Failed to obtain cpu field\n");
+ goto out;
+ }
+
+ ret = bt_ctf_field_unsigned_integer_set_value(cpu_field, (u32) cpu);
+ if (ret) {
+ pr_err("Failed to update CPU number\n");
+ goto out;
+ }
+
+ bt_ctf_field_put(cpu_field);
+
+ cs->cpu = cpu;
+ cs->stream = stream;
+ return cs;
+
+out:
+ if (cpu_field)
+ bt_ctf_field_put(cpu_field);
+ if (stream)
+ bt_ctf_stream_put(stream);
+
+ free(cs);
+ return NULL;
+}
+
+static void ctf_stream__delete(struct ctf_stream *cs)
+{
+ if (cs) {
+ bt_ctf_stream_put(cs->stream);
+ free(cs);
+ }
+}
+
+static struct ctf_stream *ctf_stream(struct ctf_writer *cw, int cpu)
+{
+ struct ctf_stream *cs = cw->stream[cpu];
+
+ if (!cs) {
+ cs = ctf_stream__create(cw, cpu);
+ cw->stream[cpu] = cs;
+ }
+
+ return cs;
+}
+
+static int get_sample_cpu(struct ctf_writer *cw, struct perf_sample *sample,
+ struct perf_evsel *evsel)
+{
+ int cpu = 0;
+
+ if (evsel->attr.sample_type & PERF_SAMPLE_CPU)
+ cpu = sample->cpu;
+
+ if (cpu > cw->stream_cnt) {
+ pr_err("Event was recorded for CPU %d, limit is at %d.\n",
+ cpu, cw->stream_cnt);
+ cpu = 0;
+ }
+
+ return cpu;
+}
+
+#define STREAM_FLUSH_COUNT 100000
+
+/*
+ * Currently we have no other way to determine the
+ * time for the stream flush other than keep track
+ * of the number of events and check it against
+ * threshold.
+ */
+static bool is_flush_needed(struct ctf_stream *cs)
+{
+ return cs->count >= STREAM_FLUSH_COUNT;
+}
+
static int process_sample_event(struct perf_tool *tool,
union perf_event *_event __maybe_unused,
struct perf_sample *sample,
@@ -390,6 +561,7 @@ static int process_sample_event(struct perf_tool *tool,
struct convert *c = container_of(tool, struct convert, tool);
struct evsel_priv *priv = evsel->priv;
struct ctf_writer *cw = &c->writer;
+ struct ctf_stream *cs;
struct bt_ctf_event_class *event_class;
struct bt_ctf_event *event;
int ret;
@@ -424,9 +596,93 @@ static int process_sample_event(struct perf_tool *tool,
return -1;
}
- bt_ctf_stream_append_event(cw->stream, event);
+ cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
+ if (cs) {
+ if (is_flush_needed(cs))
+ ctf_stream__flush(cs);
+
+ cs->count++;
+ bt_ctf_stream_append_event(cs->stream, event);
+ }
+
bt_ctf_event_put(event);
- return 0;
+ return cs ? 0 : -1;
+}
+
+/* If dup < 0, add a prefix. Else, add _dupl_X suffix. */
+static char *change_name(char *name, char *orig_name, int dup)
+{
+ char *new_name = NULL;
+ size_t len;
+
+ if (!name)
+ name = orig_name;
+
+ if (dup >= 10)
+ goto out;
+ /*
+ * Add '_' prefix to potential keywork. According to
+ * Mathieu Desnoyers (https://lkml.org/lkml/2015/1/23/652),
+ * futher CTF spec updating may require us to use '$'.
+ */
+ if (dup < 0)
+ len = strlen(name) + sizeof("_");
+ else
+ len = strlen(orig_name) + sizeof("_dupl_X");
+
+ new_name = malloc(len);
+ if (!new_name)
+ goto out;
+
+ if (dup < 0)
+ snprintf(new_name, len, "_%s", name);
+ else
+ snprintf(new_name, len, "%s_dupl_%d", orig_name, dup);
+
+out:
+ if (name != orig_name)
+ free(name);
+ return new_name;
+}
+
+static int event_class_add_field(struct bt_ctf_event_class *event_class,
+ struct bt_ctf_field_type *type,
+ struct format_field *field)
+{
+ struct bt_ctf_field_type *t = NULL;
+ char *name;
+ int dup = 1;
+ int ret;
+
+ /* alias was already assigned */
+ if (field->alias != field->name)
+ return bt_ctf_event_class_add_field(event_class, type,
+ (char *)field->alias);
+
+ name = field->name;
+
+ /* If 'name' is a keywork, add prefix. */
+ if (bt_ctf_validate_identifier(name))
+ name = change_name(name, field->name, -1);
+
+ if (!name) {
+ pr_err("Failed to fix invalid identifier.");
+ return -1;
+ }
+ while ((t = bt_ctf_event_class_get_field_by_name(event_class, name))) {
+ bt_ctf_field_type_put(t);
+ name = change_name(name, field->name, dup++);
+ if (!name) {
+ pr_err("Failed to create dup name for '%s'\n", field->name);
+ return -1;
+ }
+ }
+
+ ret = bt_ctf_event_class_add_field(event_class, type, name);
+ if (!ret)
+ field->alias = name;
+
+ return ret;
}
static int add_tracepoint_fields_types(struct ctf_writer *cw,
@@ -457,14 +713,14 @@ static int add_tracepoint_fields_types(struct ctf_writer *cw,
if (flags & FIELD_IS_ARRAY)
type = bt_ctf_field_type_array_create(type, field->arraylen);
- ret = bt_ctf_event_class_add_field(event_class, type,
- field->name);
+ ret = event_class_add_field(event_class, type, field);
if (flags & FIELD_IS_ARRAY)
bt_ctf_field_type_put(type);
if (ret) {
- pr_err("Failed to add field '%s\n", field->name);
+ pr_err("Failed to add field '%s': %d\n",
+ field->name, ret);
return -1;
}
}
@@ -508,7 +764,7 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
do { \
pr2(" field '%s'\n", n); \
if (bt_ctf_event_class_add_field(cl, t, n)) { \
- pr_err("Failed to add field '%s;\n", n); \
+ pr_err("Failed to add field '%s';\n", n); \
return -1; \
} \
} while (0)
@@ -528,9 +784,6 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
if (type & PERF_SAMPLE_STREAM_ID)
ADD_FIELD(event_class, cw->data.u64, "perf_stream_id");
- if (type & PERF_SAMPLE_CPU)
- ADD_FIELD(event_class, cw->data.u32, "perf_cpu");
-
if (type & PERF_SAMPLE_PERIOD)
ADD_FIELD(event_class, cw->data.u64, "perf_period");
@@ -604,6 +857,39 @@ static int setup_events(struct ctf_writer *cw, struct perf_session *session)
return 0;
}
+static int setup_streams(struct ctf_writer *cw, struct perf_session *session)
+{
+ struct ctf_stream **stream;
+ struct perf_header *ph = &session->header;
+ int ncpus;
+
+ /*
+ * Try to get the number of cpus used in the data file,
+ * if not present fallback to the MAX_CPUS.
+ */
+ ncpus = ph->env.nr_cpus_avail ?: MAX_CPUS;
+
+ stream = zalloc(sizeof(*stream) * ncpus);
+ if (!stream) {
+ pr_err("Failed to allocate streams.\n");
+ return -ENOMEM;
+ }
+
+ cw->stream = stream;
+ cw->stream_cnt = ncpus;
+ return 0;
+}
+
+static void free_streams(struct ctf_writer *cw)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < cw->stream_cnt; cpu++)
+ ctf_stream__delete(cw->stream[cpu]);
+
+ free(cw->stream);
+}
+
static int ctf_writer__setup_env(struct ctf_writer *cw,
struct perf_session *session)
{
@@ -713,7 +999,7 @@ static void ctf_writer__cleanup(struct ctf_writer *cw)
ctf_writer__cleanup_data(cw);
bt_ctf_clock_put(cw->clock);
- bt_ctf_stream_put(cw->stream);
+ free_streams(cw);
bt_ctf_stream_class_put(cw->stream_class);
bt_ctf_writer_put(cw->writer);
@@ -725,8 +1011,9 @@ static int ctf_writer__init(struct ctf_writer *cw, const char *path)
{
struct bt_ctf_writer *writer;
struct bt_ctf_stream_class *stream_class;
- struct bt_ctf_stream *stream;
struct bt_ctf_clock *clock;
+ struct bt_ctf_field_type *pkt_ctx_type;
+ int ret;
/* CTF writer */
writer = bt_ctf_writer_create(path);
@@ -767,14 +1054,15 @@ static int ctf_writer__init(struct ctf_writer *cw, const char *path)
if (ctf_writer__init_data(cw))
goto err_cleanup;
- /* CTF stream instance */
- stream = bt_ctf_writer_create_stream(writer, stream_class);
- if (!stream) {
- pr("Failed to create CTF stream.\n");
+ /* Add cpu_id for packet context */
+ pkt_ctx_type = bt_ctf_stream_class_get_packet_context_type(stream_class);
+ if (!pkt_ctx_type)
goto err_cleanup;
- }
- cw->stream = stream;
+ ret = bt_ctf_field_type_structure_add_field(pkt_ctx_type, cw->data.u32, "cpu_id");
+ bt_ctf_field_type_put(pkt_ctx_type);
+ if (ret)
+ goto err_cleanup;
/* CTF clock writer setup */
if (bt_ctf_writer_add_clock(writer, clock)) {
@@ -791,6 +1079,28 @@ err:
return -1;
}
+static int ctf_writer__flush_streams(struct ctf_writer *cw)
+{
+ int cpu, ret = 0;
+
+ for (cpu = 0; cpu < cw->stream_cnt && !ret; cpu++)
+ ret = ctf_stream__flush(cw->stream[cpu]);
+
+ return ret;
+}
+
+static int convert__config(const char *var, const char *value, void *cb)
+{
+ struct convert *c = cb;
+
+ if (!strcmp(var, "convert.queue-size")) {
+ c->queue_size = perf_config_u64(var, value);
+ return 0;
+ }
+
+ return perf_default_config(var, value, cb);
+}
+
int bt_convert__perf2ctf(const char *input, const char *path, bool force)
{
struct perf_session *session;
@@ -817,6 +1127,8 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
struct ctf_writer *cw = &c.writer;
int err = -1;
+ perf_config(convert__config, &c);
+
/* CTF writer */
if (ctf_writer__init(cw, path))
return -1;
@@ -826,6 +1138,11 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
if (!session)
goto free_writer;
+ if (c.queue_size) {
+ ordered_events__set_alloc_size(&session->ordered_events,
+ c.queue_size);
+ }
+
/* CTF writer env/clock setup */
if (ctf_writer__setup_env(cw, session))
goto free_session;
@@ -834,9 +1151,14 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
if (setup_events(cw, session))
goto free_session;
+ if (setup_streams(cw, session))
+ goto free_session;
+
err = perf_session__process_events(session);
if (!err)
- err = bt_ctf_stream_flush(cw->stream);
+ err = ctf_writer__flush_streams(cw);
+ else
+ pr_err("Error during conversion.\n");
fprintf(stderr,
"[ perf data convert: Converted '%s' into CTF data '%s' ]\n",
@@ -847,11 +1169,15 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
(double) c.events_size / 1024.0 / 1024.0,
c.events_count);
- /* its all good */
-free_session:
perf_session__delete(session);
+ ctf_writer__cleanup(cw);
+
+ return err;
+free_session:
+ perf_session__delete(session);
free_writer:
ctf_writer__cleanup(cw);
+ pr_err("Error during conversion setup.\n");
return err;
}
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index bb39a3f..1c9689e 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -122,6 +122,7 @@ int db_export__machine(struct db_export *dbe, struct machine *machine)
int db_export__thread(struct db_export *dbe, struct thread *thread,
struct machine *machine, struct comm *comm)
{
+ struct thread *main_thread;
u64 main_thread_db_id = 0;
int err;
@@ -131,8 +132,6 @@ int db_export__thread(struct db_export *dbe, struct thread *thread,
thread->db_id = ++dbe->thread_last_db_id;
if (thread->pid_ != -1) {
- struct thread *main_thread;
-
if (thread->pid_ == thread->tid) {
main_thread = thread;
} else {
@@ -144,14 +143,16 @@ int db_export__thread(struct db_export *dbe, struct thread *thread,
err = db_export__thread(dbe, main_thread, machine,
comm);
if (err)
- return err;
+ goto out_put;
if (comm) {
err = db_export__comm_thread(dbe, comm, thread);
if (err)
- return err;
+ goto out_put;
}
}
main_thread_db_id = main_thread->db_id;
+ if (main_thread != thread)
+ thread__put(main_thread);
}
if (dbe->export_thread)
@@ -159,6 +160,10 @@ int db_export__thread(struct db_export *dbe, struct thread *thread,
machine);
return 0;
+
+out_put:
+ thread__put(main_thread);
+ return err;
}
int db_export__comm(struct db_export *dbe, struct comm *comm,
@@ -229,7 +234,7 @@ int db_export__symbol(struct db_export *dbe, struct symbol *sym,
static struct thread *get_main_thread(struct machine *machine, struct thread *thread)
{
if (thread->pid_ == thread->tid)
- return thread;
+ return thread__get(thread);
if (thread->pid_ == -1)
return NULL;
@@ -309,12 +314,12 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
err = db_export__thread(dbe, thread, al->machine, comm);
if (err)
- return err;
+ goto out_put;
if (comm) {
err = db_export__comm(dbe, comm, main_thread);
if (err)
- return err;
+ goto out_put;
es.comm_db_id = comm->db_id;
}
@@ -322,7 +327,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
err = db_ids_from_al(dbe, al, &es.dso_db_id, &es.sym_db_id, &es.offset);
if (err)
- return err;
+ goto out_put;
if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
sample_addr_correlates_sym(&evsel->attr)) {
@@ -332,20 +337,22 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
err = db_ids_from_al(dbe, &addr_al, &es.addr_dso_db_id,
&es.addr_sym_db_id, &es.addr_offset);
if (err)
- return err;
+ goto out_put;
if (dbe->crp) {
err = thread_stack__process(thread, comm, sample, al,
&addr_al, es.db_id,
dbe->crp);
if (err)
- return err;
+ goto out_put;
}
}
if (dbe->export_sample)
- return dbe->export_sample(dbe, &es);
+ err = dbe->export_sample(dbe, &es);
- return 0;
+out_put:
+ thread__put(main_thread);
+ return err;
}
static struct {
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 2da5581..86d9c73 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -36,6 +36,11 @@ static int _eprintf(int level, int var, const char *fmt, va_list args)
return ret;
}
+int veprintf(int level, int var, const char *fmt, va_list args)
+{
+ return _eprintf(level, var, fmt, args);
+}
+
int eprintf(int level, int var, const char *fmt, ...)
{
va_list args;
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index caac2fd..8b9a088 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -50,6 +50,7 @@ void pr_stat(const char *fmt, ...);
int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5)));
+int veprintf(int level, int var, const char *fmt, va_list args);
int perf_debug_option(const char *str);
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index fc0ddd5..7c0c083 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -4,6 +4,7 @@
#include "symbol.h"
#include "dso.h"
#include "machine.h"
+#include "auxtrace.h"
#include "util.h"
#include "debug.h"
@@ -165,12 +166,28 @@ bool is_supported_compression(const char *ext)
return false;
}
-bool is_kernel_module(const char *pathname)
+bool is_kernel_module(const char *pathname, int cpumode)
{
struct kmod_path m;
-
- if (kmod_path__parse(&m, pathname))
- return NULL;
+ int mode = cpumode & PERF_RECORD_MISC_CPUMODE_MASK;
+
+ WARN_ONCE(mode != cpumode,
+ "Internal error: passing unmasked cpumode (%x) to is_kernel_module",
+ cpumode);
+
+ switch (mode) {
+ case PERF_RECORD_MISC_USER:
+ case PERF_RECORD_MISC_HYPERVISOR:
+ case PERF_RECORD_MISC_GUEST_USER:
+ return false;
+ /* Treat PERF_RECORD_MISC_CPUMODE_UNKNOWN as kernel */
+ default:
+ if (kmod_path__parse(&m, pathname)) {
+ pr_err("Failed to check whether %s is a kernel module or not. Assume it is.",
+ pathname);
+ return true;
+ }
+ }
return m.kmod;
}
@@ -214,12 +231,33 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
{
const char *name = strrchr(path, '/');
const char *ext = strrchr(path, '.');
+ bool is_simple_name = false;
memset(m, 0x0, sizeof(*m));
name = name ? name + 1 : path;
+ /*
+ * '.' is also a valid character for module name. For example:
+ * [aaa.bbb] is a valid module name. '[' should have higher
+ * priority than '.ko' suffix.
+ *
+ * The kernel names are from machine__mmap_name. Such
+ * name should belong to kernel itself, not kernel module.
+ */
+ if (name[0] == '[') {
+ is_simple_name = true;
+ if ((strncmp(name, "[kernel.kallsyms]", 17) == 0) ||
+ (strncmp(name, "[guest.kernel.kallsyms", 22) == 0) ||
+ (strncmp(name, "[vdso]", 6) == 0) ||
+ (strncmp(name, "[vsyscall]", 10) == 0)) {
+ m->kmod = false;
+
+ } else
+ m->kmod = true;
+ }
+
/* No extension, just return name. */
- if (ext == NULL) {
+ if ((ext == NULL) || is_simple_name) {
if (alloc_name) {
m->name = strdup(name);
return m->name ? 0 : -ENOMEM;
@@ -264,6 +302,7 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
*/
static LIST_HEAD(dso__data_open);
static long dso__data_open_cnt;
+static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER;
static void dso__list_add(struct dso *dso)
{
@@ -433,18 +472,12 @@ static void check_data_close(void)
*/
void dso__data_close(struct dso *dso)
{
+ pthread_mutex_lock(&dso__data_open_lock);
close_dso(dso);
+ pthread_mutex_unlock(&dso__data_open_lock);
}
-/**
- * dso__data_fd - Get dso's data file descriptor
- * @dso: dso object
- * @machine: machine object
- *
- * External interface to find dso's file, open it and
- * returns file descriptor.
- */
-int dso__data_fd(struct dso *dso, struct machine *machine)
+static void try_to_open_dso(struct dso *dso, struct machine *machine)
{
enum dso_binary_type binary_type_data[] = {
DSO_BINARY_TYPE__BUILD_ID_CACHE,
@@ -453,11 +486,8 @@ int dso__data_fd(struct dso *dso, struct machine *machine)
};
int i = 0;
- if (dso->data.status == DSO_DATA_STATUS_ERROR)
- return -1;
-
if (dso->data.fd >= 0)
- goto out;
+ return;
if (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND) {
dso->data.fd = open_dso(dso, machine);
@@ -477,10 +507,38 @@ out:
dso->data.status = DSO_DATA_STATUS_OK;
else
dso->data.status = DSO_DATA_STATUS_ERROR;
+}
+
+/**
+ * dso__data_get_fd - Get dso's data file descriptor
+ * @dso: dso object
+ * @machine: machine object
+ *
+ * External interface to find dso's file, open it and
+ * returns file descriptor. It should be paired with
+ * dso__data_put_fd() if it returns non-negative value.
+ */
+int dso__data_get_fd(struct dso *dso, struct machine *machine)
+{
+ if (dso->data.status == DSO_DATA_STATUS_ERROR)
+ return -1;
+
+ if (pthread_mutex_lock(&dso__data_open_lock) < 0)
+ return -1;
+
+ try_to_open_dso(dso, machine);
+
+ if (dso->data.fd < 0)
+ pthread_mutex_unlock(&dso__data_open_lock);
return dso->data.fd;
}
+void dso__data_put_fd(struct dso *dso __maybe_unused)
+{
+ pthread_mutex_unlock(&dso__data_open_lock);
+}
+
bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
{
u32 flag = 1 << by;
@@ -494,10 +552,12 @@ bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
}
static void
-dso_cache__free(struct rb_root *root)
+dso_cache__free(struct dso *dso)
{
+ struct rb_root *root = &dso->data.cache;
struct rb_node *next = rb_first(root);
+ pthread_mutex_lock(&dso->lock);
while (next) {
struct dso_cache *cache;
@@ -506,10 +566,12 @@ dso_cache__free(struct rb_root *root)
rb_erase(&cache->rb_node, root);
free(cache);
}
+ pthread_mutex_unlock(&dso->lock);
}
-static struct dso_cache *dso_cache__find(const struct rb_root *root, u64 offset)
+static struct dso_cache *dso_cache__find(struct dso *dso, u64 offset)
{
+ const struct rb_root *root = &dso->data.cache;
struct rb_node * const *p = &root->rb_node;
const struct rb_node *parent = NULL;
struct dso_cache *cache;
@@ -528,17 +590,20 @@ static struct dso_cache *dso_cache__find(const struct rb_root *root, u64 offset)
else
return cache;
}
+
return NULL;
}
-static void
-dso_cache__insert(struct rb_root *root, struct dso_cache *new)
+static struct dso_cache *
+dso_cache__insert(struct dso *dso, struct dso_cache *new)
{
+ struct rb_root *root = &dso->data.cache;
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct dso_cache *cache;
u64 offset = new->offset;
+ pthread_mutex_lock(&dso->lock);
while (*p != NULL) {
u64 end;
@@ -550,10 +615,17 @@ dso_cache__insert(struct rb_root *root, struct dso_cache *new)
p = &(*p)->rb_left;
else if (offset >= end)
p = &(*p)->rb_right;
+ else
+ goto out;
}
rb_link_node(&new->rb_node, parent, p);
rb_insert_color(&new->rb_node, root);
+
+ cache = NULL;
+out:
+ pthread_mutex_unlock(&dso->lock);
+ return cache;
}
static ssize_t
@@ -568,19 +640,33 @@ dso_cache__memcpy(struct dso_cache *cache, u64 offset,
}
static ssize_t
-dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
+dso_cache__read(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size)
{
struct dso_cache *cache;
+ struct dso_cache *old;
ssize_t ret;
do {
u64 cache_offset;
- ret = -ENOMEM;
-
cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
if (!cache)
+ return -ENOMEM;
+
+ pthread_mutex_lock(&dso__data_open_lock);
+
+ /*
+ * dso->data.fd might be closed if other thread opened another
+ * file (dso) due to open file limit (RLIMIT_NOFILE).
+ */
+ try_to_open_dso(dso, machine);
+
+ if (dso->data.fd < 0) {
+ ret = -errno;
+ dso->data.status = DSO_DATA_STATUS_ERROR;
break;
+ }
cache_offset = offset & DSO__DATA_CACHE_MASK;
@@ -590,11 +676,20 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
cache->offset = cache_offset;
cache->size = ret;
- dso_cache__insert(&dso->data.cache, cache);
+ } while (0);
- ret = dso_cache__memcpy(cache, offset, data, size);
+ pthread_mutex_unlock(&dso__data_open_lock);
- } while (0);
+ if (ret > 0) {
+ old = dso_cache__insert(dso, cache);
+ if (old) {
+ /* we lose the race */
+ free(cache);
+ cache = old;
+ }
+
+ ret = dso_cache__memcpy(cache, offset, data, size);
+ }
if (ret <= 0)
free(cache);
@@ -602,16 +697,16 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
return ret;
}
-static ssize_t dso_cache_read(struct dso *dso, u64 offset,
- u8 *data, ssize_t size)
+static ssize_t dso_cache_read(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size)
{
struct dso_cache *cache;
- cache = dso_cache__find(&dso->data.cache, offset);
+ cache = dso_cache__find(dso, offset);
if (cache)
return dso_cache__memcpy(cache, offset, data, size);
else
- return dso_cache__read(dso, offset, data, size);
+ return dso_cache__read(dso, machine, offset, data, size);
}
/*
@@ -619,7 +714,8 @@ static ssize_t dso_cache_read(struct dso *dso, u64 offset,
* in the rb_tree. Any read to already cached data is served
* by cached data.
*/
-static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
+static ssize_t cached_read(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size)
{
ssize_t r = 0;
u8 *p = data;
@@ -627,7 +723,7 @@ static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
do {
ssize_t ret;
- ret = dso_cache_read(dso, offset, p, size);
+ ret = dso_cache_read(dso, machine, offset, p, size);
if (ret < 0)
return ret;
@@ -647,21 +743,44 @@ static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
return r;
}
-static int data_file_size(struct dso *dso)
+static int data_file_size(struct dso *dso, struct machine *machine)
{
+ int ret = 0;
struct stat st;
char sbuf[STRERR_BUFSIZE];
- if (!dso->data.file_size) {
- if (fstat(dso->data.fd, &st)) {
- pr_err("dso mmap failed, fstat: %s\n",
- strerror_r(errno, sbuf, sizeof(sbuf)));
- return -1;
- }
- dso->data.file_size = st.st_size;
+ if (dso->data.file_size)
+ return 0;
+
+ if (dso->data.status == DSO_DATA_STATUS_ERROR)
+ return -1;
+
+ pthread_mutex_lock(&dso__data_open_lock);
+
+ /*
+ * dso->data.fd might be closed if other thread opened another
+ * file (dso) due to open file limit (RLIMIT_NOFILE).
+ */
+ try_to_open_dso(dso, machine);
+
+ if (dso->data.fd < 0) {
+ ret = -errno;
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ goto out;
}
- return 0;
+ if (fstat(dso->data.fd, &st) < 0) {
+ ret = -errno;
+ pr_err("dso cache fstat failed: %s\n",
+ strerror_r(errno, sbuf, sizeof(sbuf)));
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ goto out;
+ }
+ dso->data.file_size = st.st_size;
+
+out:
+ pthread_mutex_unlock(&dso__data_open_lock);
+ return ret;
}
/**
@@ -673,23 +792,17 @@ static int data_file_size(struct dso *dso)
*/
off_t dso__data_size(struct dso *dso, struct machine *machine)
{
- int fd;
-
- fd = dso__data_fd(dso, machine);
- if (fd < 0)
- return fd;
-
- if (data_file_size(dso))
+ if (data_file_size(dso, machine))
return -1;
/* For now just estimate dso data size is close to file size */
return dso->data.file_size;
}
-static ssize_t data_read_offset(struct dso *dso, u64 offset,
- u8 *data, ssize_t size)
+static ssize_t data_read_offset(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size)
{
- if (data_file_size(dso))
+ if (data_file_size(dso, machine))
return -1;
/* Check the offset sanity. */
@@ -699,7 +812,7 @@ static ssize_t data_read_offset(struct dso *dso, u64 offset,
if (offset + size < offset)
return -1;
- return cached_read(dso, offset, data, size);
+ return cached_read(dso, machine, offset, data, size);
}
/**
@@ -716,10 +829,10 @@ static ssize_t data_read_offset(struct dso *dso, u64 offset,
ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
u64 offset, u8 *data, ssize_t size)
{
- if (dso__data_fd(dso, machine) < 0)
+ if (dso->data.status == DSO_DATA_STATUS_ERROR)
return -1;
- return data_read_offset(dso, offset, data, size);
+ return data_read_offset(dso, machine, offset, data, size);
}
/**
@@ -751,13 +864,13 @@ struct map *dso__new_map(const char *name)
return map;
}
-struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
- const char *short_name, int dso_type)
+struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
+ const char *short_name, int dso_type)
{
/*
* The kernel dso could be created by build_id processing.
*/
- struct dso *dso = __dsos__findnew(&machine->kernel_dsos, name);
+ struct dso *dso = machine__findnew_dso(machine, name);
/*
* We need to run this in all cases, since during the build_id
@@ -776,8 +889,8 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
* Either one of the dso or name parameter must be non-NULL or the
* function will not work.
*/
-static struct dso *dso__findlink_by_longname(struct rb_root *root,
- struct dso *dso, const char *name)
+static struct dso *__dso__findlink_by_longname(struct rb_root *root,
+ struct dso *dso, const char *name)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
@@ -824,10 +937,10 @@ static struct dso *dso__findlink_by_longname(struct rb_root *root,
return NULL;
}
-static inline struct dso *
-dso__find_by_longname(const struct rb_root *root, const char *name)
+static inline struct dso *__dso__find_by_longname(struct rb_root *root,
+ const char *name)
{
- return dso__findlink_by_longname((struct rb_root *)root, NULL, name);
+ return __dso__findlink_by_longname(root, NULL, name);
}
void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
@@ -935,6 +1048,8 @@ struct dso *dso__new(const char *name)
RB_CLEAR_NODE(&dso->rb_node);
INIT_LIST_HEAD(&dso->node);
INIT_LIST_HEAD(&dso->data.open_entry);
+ pthread_mutex_init(&dso->lock, NULL);
+ atomic_set(&dso->refcnt, 1);
}
return dso;
@@ -961,12 +1076,27 @@ void dso__delete(struct dso *dso)
}
dso__data_close(dso);
- dso_cache__free(&dso->data.cache);
+ auxtrace_cache__free(dso->auxtrace_cache);
+ dso_cache__free(dso);
dso__free_a2l(dso);
zfree(&dso->symsrc_filename);
+ pthread_mutex_destroy(&dso->lock);
free(dso);
}
+struct dso *dso__get(struct dso *dso)
+{
+ if (dso)
+ atomic_inc(&dso->refcnt);
+ return dso;
+}
+
+void dso__put(struct dso *dso)
+{
+ if (dso && atomic_dec_and_test(&dso->refcnt))
+ dso__delete(dso);
+}
+
void dso__set_build_id(struct dso *dso, void *build_id)
{
memcpy(dso->build_id, build_id, sizeof(dso->build_id));
@@ -1033,14 +1163,41 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
return have_build_id;
}
-void dsos__add(struct dsos *dsos, struct dso *dso)
+void __dsos__add(struct dsos *dsos, struct dso *dso)
{
list_add_tail(&dso->node, &dsos->head);
- dso__findlink_by_longname(&dsos->root, dso, NULL);
+ __dso__findlink_by_longname(&dsos->root, dso, NULL);
+ /*
+ * It is now in the linked list, grab a reference, then garbage collect
+ * this when needing memory, by looking at LRU dso instances in the
+ * list with atomic_read(&dso->refcnt) == 1, i.e. no references
+ * anywhere besides the one for the list, do, under a lock for the
+ * list: remove it from the list, then a dso__put(), that probably will
+ * be the last and will then call dso__delete(), end of life.
+ *
+ * That, or at the end of the 'struct machine' lifetime, when all
+ * 'struct dso' instances will be removed from the list, in
+ * dsos__exit(), if they have no other reference from some other data
+ * structure.
+ *
+ * E.g.: after processing a 'perf.data' file and storing references
+ * to objects instantiated while processing events, we will have
+ * references to the 'thread', 'map', 'dso' structs all from 'struct
+ * hist_entry' instances, but we may not need anything not referenced,
+ * so we might as well call machines__exit()/machines__delete() and
+ * garbage collect it.
+ */
+ dso__get(dso);
+}
+
+void dsos__add(struct dsos *dsos, struct dso *dso)
+{
+ pthread_rwlock_wrlock(&dsos->lock);
+ __dsos__add(dsos, dso);
+ pthread_rwlock_unlock(&dsos->lock);
}
-struct dso *dsos__find(const struct dsos *dsos, const char *name,
- bool cmp_short)
+struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
{
struct dso *pos;
@@ -1050,15 +1207,24 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name,
return pos;
return NULL;
}
- return dso__find_by_longname(&dsos->root, name);
+ return __dso__find_by_longname(&dsos->root, name);
+}
+
+struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
+{
+ struct dso *dso;
+ pthread_rwlock_rdlock(&dsos->lock);
+ dso = __dsos__find(dsos, name, cmp_short);
+ pthread_rwlock_unlock(&dsos->lock);
+ return dso;
}
-struct dso *dsos__addnew(struct dsos *dsos, const char *name)
+struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
{
struct dso *dso = dso__new(name);
if (dso != NULL) {
- dsos__add(dsos, dso);
+ __dsos__add(dsos, dso);
dso__set_basename(dso);
}
return dso;
@@ -1066,9 +1232,18 @@ struct dso *dsos__addnew(struct dsos *dsos, const char *name)
struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
{
- struct dso *dso = dsos__find(dsos, name, false);
+ struct dso *dso = __dsos__find(dsos, name, false);
- return dso ? dso : dsos__addnew(dsos, name);
+ return dso ? dso : __dsos__addnew(dsos, name);
+}
+
+struct dso *dsos__findnew(struct dsos *dsos, const char *name)
+{
+ struct dso *dso;
+ pthread_rwlock_wrlock(&dsos->lock);
+ dso = dso__get(__dsos__findnew(dsos, name));
+ pthread_rwlock_unlock(&dsos->lock);
+ return dso;
}
size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
@@ -1130,12 +1305,15 @@ size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp)
enum dso_type dso__type(struct dso *dso, struct machine *machine)
{
int fd;
+ enum dso_type type = DSO__TYPE_UNKNOWN;
- fd = dso__data_fd(dso, machine);
- if (fd < 0)
- return DSO__TYPE_UNKNOWN;
+ fd = dso__data_get_fd(dso, machine);
+ if (fd >= 0) {
+ type = dso__type_fd(fd);
+ dso__data_put_fd(dso);
+ }
- return dso__type_fd(fd);
+ return type;
}
int dso__strerror_load(struct dso *dso, char *buf, size_t buflen)
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index e0901b4..fc8db9c 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -1,9 +1,11 @@
#ifndef __PERF_DSO
#define __PERF_DSO
+#include <linux/atomic.h>
#include <linux/types.h>
#include <linux/rbtree.h>
#include <stdbool.h>
+#include <pthread.h>
#include <linux/types.h>
#include <linux/bitops.h>
#include "map.h"
@@ -124,13 +126,21 @@ struct dso_cache {
struct dsos {
struct list_head head;
struct rb_root root; /* rbtree root sorted by long name */
+ pthread_rwlock_t lock;
};
+struct auxtrace_cache;
+
struct dso {
+ pthread_mutex_t lock;
struct list_head node;
struct rb_node rb_node; /* rbtree node sorted by long name */
struct rb_root symbols[MAP__NR_TYPES];
struct rb_root symbol_names[MAP__NR_TYPES];
+ struct {
+ u64 addr;
+ struct symbol *symbol;
+ } last_find_result[MAP__NR_TYPES];
void *a2l;
char *symsrc_filename;
unsigned int a2l_fails;
@@ -156,6 +166,7 @@ struct dso {
u16 long_name_len;
u16 short_name_len;
void *dwfl; /* DWARF debug info */
+ struct auxtrace_cache *auxtrace_cache;
/* dso data file */
struct {
@@ -173,7 +184,7 @@ struct dso {
void *priv;
u64 db_id;
};
-
+ atomic_t refcnt;
char name[0];
};
@@ -200,6 +211,17 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated);
int dso__name_len(const struct dso *dso);
+struct dso *dso__get(struct dso *dso);
+void dso__put(struct dso *dso);
+
+static inline void __dso__zput(struct dso **dso)
+{
+ dso__put(*dso);
+ *dso = NULL;
+}
+
+#define dso__zput(dso) __dso__zput(&dso)
+
bool dso__loaded(const struct dso *dso, enum map_type type);
bool dso__sorted_by_name(const struct dso *dso, enum map_type type);
@@ -216,7 +238,7 @@ char dso__symtab_origin(const struct dso *dso);
int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type,
char *root_dir, char *filename, size_t size);
bool is_supported_compression(const char *ext);
-bool is_kernel_module(const char *pathname);
+bool is_kernel_module(const char *pathname, int cpumode);
bool decompress_to_file(const char *ext, const char *filename, int output_fd);
bool dso__needs_decompress(struct dso *dso);
@@ -236,7 +258,8 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
/*
* The dso__data_* external interface provides following functions:
- * dso__data_fd
+ * dso__data_get_fd
+ * dso__data_put_fd
* dso__data_close
* dso__data_size
* dso__data_read_offset
@@ -253,8 +276,11 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
* The current usage of the dso__data_* interface is as follows:
*
* Get DSO's fd:
- * int fd = dso__data_fd(dso, machine);
- * USE 'fd' SOMEHOW
+ * int fd = dso__data_get_fd(dso, machine);
+ * if (fd >= 0) {
+ * USE 'fd' SOMEHOW
+ * dso__data_put_fd(dso);
+ * }
*
* Read DSO's data:
* n = dso__data_read_offset(dso_0, &machine, 0, buf, BUFSIZE);
@@ -273,7 +299,8 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
*
* TODO
*/
-int dso__data_fd(struct dso *dso, struct machine *machine);
+int dso__data_get_fd(struct dso *dso, struct machine *machine);
+void dso__data_put_fd(struct dso *dso __maybe_unused);
void dso__data_close(struct dso *dso);
off_t dso__data_size(struct dso *dso, struct machine *machine);
@@ -285,16 +312,20 @@ ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by);
struct map *dso__new_map(const char *name);
-struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
- const char *short_name, int dso_type);
+struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
+ const char *short_name, int dso_type);
+void __dsos__add(struct dsos *dsos, struct dso *dso);
void dsos__add(struct dsos *dsos, struct dso *dso);
-struct dso *dsos__addnew(struct dsos *dsos, const char *name);
-struct dso *dsos__find(const struct dsos *dsos, const char *name,
- bool cmp_short);
+struct dso *__dsos__addnew(struct dsos *dsos, const char *name);
+struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
+struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
struct dso *__dsos__findnew(struct dsos *dsos, const char *name);
+struct dso *dsos__findnew(struct dsos *dsos, const char *name);
bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
+void dso__reset_find_symbol_cache(struct dso *dso);
+
size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
bool (skip)(struct dso *dso, int parm), int parm);
size_t __dsos__fprintf(struct list_head *head, FILE *fp);
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index c34e024..a509aa84 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -139,11 +139,27 @@ int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
{
const char *name;
+
name = dwarf_diename(dw_die);
return name ? (strcmp(tname, name) == 0) : false;
}
/**
+ * die_match_name - Match diename and glob
+ * @dw_die: a DIE
+ * @glob: a string of target glob pattern
+ *
+ * Glob matching the name of @dw_die and @glob. Return false if matching fail.
+ */
+bool die_match_name(Dwarf_Die *dw_die, const char *glob)
+{
+ const char *name;
+
+ name = dwarf_diename(dw_die);
+ return name ? strglobmatch(name, glob) : false;
+}
+
+/**
* die_get_call_lineno - Get callsite line number of inline-function instance
* @in_die: a DIE of an inlined function instance
*
@@ -417,6 +433,43 @@ struct __addr_die_search_param {
Dwarf_Die *die_mem;
};
+static int __die_search_func_tail_cb(Dwarf_Die *fn_die, void *data)
+{
+ struct __addr_die_search_param *ad = data;
+ Dwarf_Addr addr = 0;
+
+ if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
+ !dwarf_highpc(fn_die, &addr) &&
+ addr == ad->addr) {
+ memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
+ return DWARF_CB_ABORT;
+ }
+ return DWARF_CB_OK;
+}
+
+/**
+ * die_find_tailfunc - Search for a non-inlined function with tail call at
+ * given address
+ * @cu_die: a CU DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search for a non-inlined function DIE with tail call at @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ */
+Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem)
+{
+ struct __addr_die_search_param ad;
+ ad.addr = addr;
+ ad.die_mem = die_mem;
+ /* dwarf_getscopes can't find subprogram. */
+ if (!dwarf_getfuncs(cu_die, __die_search_func_tail_cb, &ad, 0))
+ return NULL;
+ else
+ return die_mem;
+}
+
/* die_find callback for non-inlined function search */
static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
{
@@ -681,15 +734,18 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
Dwarf_Lines *lines;
Dwarf_Line *line;
Dwarf_Addr addr;
- const char *fname;
+ const char *fname, *decf = NULL;
int lineno, ret = 0;
+ int decl = 0, inl;
Dwarf_Die die_mem, *cu_die;
size_t nlines, i;
/* Get the CU die */
- if (dwarf_tag(rt_die) != DW_TAG_compile_unit)
+ if (dwarf_tag(rt_die) != DW_TAG_compile_unit) {
cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL);
- else
+ dwarf_decl_line(rt_die, &decl);
+ decf = dwarf_decl_file(rt_die);
+ } else
cu_die = rt_die;
if (!cu_die) {
pr_debug2("Failed to get CU from given DIE.\n");
@@ -714,15 +770,21 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
continue;
}
/* Filter lines based on address */
- if (rt_die != cu_die)
+ if (rt_die != cu_die) {
/*
* Address filtering
* The line is included in given function, and
* no inline block includes it.
*/
- if (!dwarf_haspc(rt_die, addr) ||
- die_find_inlinefunc(rt_die, addr, &die_mem))
+ if (!dwarf_haspc(rt_die, addr))
continue;
+ if (die_find_inlinefunc(rt_die, addr, &die_mem)) {
+ dwarf_decl_line(&die_mem, &inl);
+ if (inl != decl ||
+ decf != dwarf_decl_file(&die_mem))
+ continue;
+ }
+ }
/* Get source line */
fname = dwarf_linesrc(line, NULL, NULL);
@@ -832,19 +894,17 @@ Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
/**
* die_get_typename - Get the name of given variable DIE
* @vr_die: a variable DIE
- * @buf: a buffer for result type name
- * @len: a max-length of @buf
+ * @buf: a strbuf for result type name
*
- * Get the name of @vr_die and stores it to @buf. Return the actual length
- * of type name if succeeded. Return -E2BIG if @len is not enough long, and
- * Return -ENOENT if failed to find type name.
+ * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded.
+ * and Return -ENOENT if failed to find type name.
* Note that the result will stores typedef name if possible, and stores
* "*(function_type)" if the type is a function pointer.
*/
-int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
+int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf)
{
Dwarf_Die type;
- int tag, ret, ret2;
+ int tag, ret;
const char *tmp = "";
if (__die_get_real_type(vr_die, &type) == NULL)
@@ -855,8 +915,8 @@ int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
tmp = "*";
else if (tag == DW_TAG_subroutine_type) {
/* Function pointer */
- ret = snprintf(buf, len, "(function_type)");
- return (ret >= len) ? -E2BIG : ret;
+ strbuf_addf(buf, "(function_type)");
+ return 0;
} else {
if (!dwarf_diename(&type))
return -ENOENT;
@@ -867,39 +927,156 @@ int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
else if (tag == DW_TAG_enumeration_type)
tmp = "enum ";
/* Write a base name */
- ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type));
- return (ret >= len) ? -E2BIG : ret;
- }
- ret = die_get_typename(&type, buf, len);
- if (ret > 0) {
- ret2 = snprintf(buf + ret, len - ret, "%s", tmp);
- ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
+ strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type));
+ return 0;
}
+ ret = die_get_typename(&type, buf);
+ if (ret == 0)
+ strbuf_addf(buf, "%s", tmp);
+
return ret;
}
/**
* die_get_varname - Get the name and type of given variable DIE
* @vr_die: a variable DIE
- * @buf: a buffer for type and variable name
- * @len: the max-length of @buf
+ * @buf: a strbuf for type and variable name
*
* Get the name and type of @vr_die and stores it in @buf as "type\tname".
*/
-int die_get_varname(Dwarf_Die *vr_die, char *buf, int len)
+int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
{
- int ret, ret2;
+ int ret;
- ret = die_get_typename(vr_die, buf, len);
+ ret = die_get_typename(vr_die, buf);
if (ret < 0) {
pr_debug("Failed to get type, make it unknown.\n");
- ret = snprintf(buf, len, "(unknown_type)");
+ strbuf_addf(buf, "(unknown_type)");
}
- if (ret > 0) {
- ret2 = snprintf(buf + ret, len - ret, "\t%s",
- dwarf_diename(vr_die));
- ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
+
+ strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
+
+ return 0;
+}
+
+/**
+ * die_get_var_innermost_scope - Get innermost scope range of given variable DIE
+ * @sp_die: a subprogram DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for variable byte offset range
+ *
+ * Get the innermost scope range of @vr_die and stores it in @buf as
+ * "@<function_name+[NN-NN,NN-NN]>".
+ */
+static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
+ struct strbuf *buf)
+{
+ Dwarf_Die *scopes;
+ int count;
+ size_t offset = 0;
+ Dwarf_Addr base;
+ Dwarf_Addr start, end;
+ Dwarf_Addr entry;
+ int ret;
+ bool first = true;
+ const char *name;
+
+ ret = dwarf_entrypc(sp_die, &entry);
+ if (ret)
+ return ret;
+
+ name = dwarf_diename(sp_die);
+ if (!name)
+ return -ENOENT;
+
+ count = dwarf_getscopes_die(vr_die, &scopes);
+
+ /* (*SCOPES)[1] is the DIE for the scope containing that scope */
+ if (count <= 1) {
+ ret = -EINVAL;
+ goto out;
}
+
+ while ((offset = dwarf_ranges(&scopes[1], offset, &base,
+ &start, &end)) > 0) {
+ start -= entry;
+ end -= entry;
+
+ if (first) {
+ strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
+ name, start, end);
+ first = false;
+ } else {
+ strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
+ start, end);
+ }
+ }
+
+ if (!first)
+ strbuf_addf(buf, "]>");
+
+out:
+ free(scopes);
return ret;
}
+/**
+ * die_get_var_range - Get byte offset range of given variable DIE
+ * @sp_die: a subprogram DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for type and variable name and byte offset range
+ *
+ * Get the byte offset range of @vr_die and stores it in @buf as
+ * "@<function_name+[NN-NN,NN-NN]>".
+ */
+int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf)
+{
+ int ret = 0;
+ Dwarf_Addr base;
+ Dwarf_Addr start, end;
+ Dwarf_Addr entry;
+ Dwarf_Op *op;
+ size_t nops;
+ size_t offset = 0;
+ Dwarf_Attribute attr;
+ bool first = true;
+ const char *name;
+
+ ret = dwarf_entrypc(sp_die, &entry);
+ if (ret)
+ return ret;
+
+ name = dwarf_diename(sp_die);
+ if (!name)
+ return -ENOENT;
+
+ if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL)
+ return -EINVAL;
+
+ while ((offset = dwarf_getlocations(
+ &attr, offset, &base,
+ &start, &end, &op, &nops)) > 0) {
+ if (start == 0) {
+ /* Single Location Descriptions */
+ ret = die_get_var_innermost_scope(sp_die, vr_die, buf);
+ return ret;
+ }
+
+ /* Location Lists */
+ start -= entry;
+ end -= entry;
+ if (first) {
+ strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
+ name, start, end);
+ first = false;
+ } else {
+ strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
+ start, end);
+ }
+ }
+
+ if (!first)
+ strbuf_addf(buf, "]>");
+
+ return ret;
+}
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index af7dbcd..c42ec36 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -47,6 +47,9 @@ extern bool die_is_func_instance(Dwarf_Die *dw_die);
/* Compare diename and tname */
extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
+/* Matching diename with glob pattern */
+extern bool die_match_name(Dwarf_Die *dw_die, const char *glob);
+
/* Get callsite line number of inline-function instance */
extern int die_get_call_lineno(Dwarf_Die *in_die);
@@ -82,6 +85,10 @@ extern Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
Dwarf_Die *die_mem);
+/* Search a non-inlined function with tail call at given address */
+Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem);
+
/* Search the top inlined function including given address */
extern Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
Dwarf_Die *die_mem);
@@ -114,8 +121,10 @@ extern Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
Dwarf_Die *die_mem);
/* Get the name of given variable DIE */
-extern int die_get_typename(Dwarf_Die *vr_die, char *buf, int len);
+extern int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
/* Get the name and type of given variable DIE, stored as "type\tname" */
-extern int die_get_varname(Dwarf_Die *vr_die, char *buf, int len);
+extern int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
+extern int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
+ struct strbuf *buf);
#endif
diff --git a/tools/perf/util/environment.c b/tools/perf/util/environment.c
index 275b0ee..7405123 100644
--- a/tools/perf/util/environment.c
+++ b/tools/perf/util/environment.c
@@ -5,5 +5,4 @@
*/
#include "cache.h"
-const char *pager_program;
int pager_use_color = 1;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index ff866c4..7ff6127 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -23,12 +23,20 @@ static const char *perf_event__names[] = {
[PERF_RECORD_FORK] = "FORK",
[PERF_RECORD_READ] = "READ",
[PERF_RECORD_SAMPLE] = "SAMPLE",
+ [PERF_RECORD_AUX] = "AUX",
+ [PERF_RECORD_ITRACE_START] = "ITRACE_START",
+ [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES",
+ [PERF_RECORD_SWITCH] = "SWITCH",
+ [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
[PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID",
[PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND",
[PERF_RECORD_ID_INDEX] = "ID_INDEX",
+ [PERF_RECORD_AUXTRACE_INFO] = "AUXTRACE_INFO",
+ [PERF_RECORD_AUXTRACE] = "AUXTRACE",
+ [PERF_RECORD_AUXTRACE_ERROR] = "AUXTRACE_ERROR",
};
const char *perf_event__name(unsigned int id)
@@ -212,10 +220,14 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
pid_t pid, pid_t tgid,
perf_event__handler_t process,
struct machine *machine,
- bool mmap_data)
+ bool mmap_data,
+ unsigned int proc_map_timeout)
{
char filename[PATH_MAX];
FILE *fp;
+ unsigned long long t;
+ bool truncation = false;
+ unsigned long long timeout = proc_map_timeout * 1000000ULL;
int rc = 0;
if (machine__is_default_guest(machine))
@@ -234,6 +246,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
}
event->header.type = PERF_RECORD_MMAP2;
+ t = rdclock();
while (1) {
char bf[BUFSIZ];
@@ -247,6 +260,15 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
if (fgets(bf, sizeof(bf), fp) == NULL)
break;
+ if ((rdclock() - t) > timeout) {
+ pr_warning("Reading %s time out. "
+ "You may want to increase "
+ "the time limit by --proc-map-timeout\n",
+ filename);
+ truncation = true;
+ goto out;
+ }
+
/* ensure null termination since stack will be reused. */
strcpy(execname, "");
@@ -295,6 +317,10 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
}
+out:
+ if (truncation)
+ event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
+
if (!strcmp(execname, ""))
strcpy(execname, anonstr);
@@ -313,6 +339,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
rc = -1;
break;
}
+
+ if (truncation)
+ break;
}
fclose(fp);
@@ -324,8 +353,9 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
struct machine *machine)
{
int rc = 0;
- struct rb_node *nd;
+ struct map *pos;
struct map_groups *kmaps = &machine->kmaps;
+ struct maps *maps = &kmaps->maps[MAP__FUNCTION];
union perf_event *event = zalloc((sizeof(event->mmap) +
machine->id_hdr_size));
if (event == NULL) {
@@ -345,10 +375,8 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
else
event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
- for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]);
- nd; nd = rb_next(nd)) {
+ for (pos = maps__first(maps); pos; pos = map__next(pos)) {
size_t size;
- struct map *pos = rb_entry(nd, struct map, rb_node);
if (pos->dso->kernel)
continue;
@@ -381,7 +409,9 @@ static int __event__synthesize_thread(union perf_event *comm_event,
pid_t pid, int full,
perf_event__handler_t process,
struct perf_tool *tool,
- struct machine *machine, bool mmap_data)
+ struct machine *machine,
+ bool mmap_data,
+ unsigned int proc_map_timeout)
{
char filename[PATH_MAX];
DIR *tasks;
@@ -398,7 +428,8 @@ static int __event__synthesize_thread(union perf_event *comm_event,
return -1;
return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
- process, machine, mmap_data);
+ process, machine, mmap_data,
+ proc_map_timeout);
}
if (machine__is_default_guest(machine))
@@ -439,7 +470,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
if (_pid == pid) {
/* process the parent's maps too */
rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
- process, machine, mmap_data);
+ process, machine, mmap_data, proc_map_timeout);
if (rc)
break;
}
@@ -453,7 +484,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
struct thread_map *threads,
perf_event__handler_t process,
struct machine *machine,
- bool mmap_data)
+ bool mmap_data,
+ unsigned int proc_map_timeout)
{
union perf_event *comm_event, *mmap_event, *fork_event;
int err = -1, thread, j;
@@ -474,9 +506,9 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
for (thread = 0; thread < threads->nr; ++thread) {
if (__event__synthesize_thread(comm_event, mmap_event,
fork_event,
- threads->map[thread], 0,
+ thread_map__pid(threads, thread), 0,
process, tool, machine,
- mmap_data)) {
+ mmap_data, proc_map_timeout)) {
err = -1;
break;
}
@@ -485,12 +517,12 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
* comm.pid is set to thread group id by
* perf_event__synthesize_comm
*/
- if ((int) comm_event->comm.pid != threads->map[thread]) {
+ if ((int) comm_event->comm.pid != thread_map__pid(threads, thread)) {
bool need_leader = true;
/* is thread group leader in thread_map? */
for (j = 0; j < threads->nr; ++j) {
- if ((int) comm_event->comm.pid == threads->map[j]) {
+ if ((int) comm_event->comm.pid == thread_map__pid(threads, j)) {
need_leader = false;
break;
}
@@ -502,7 +534,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
fork_event,
comm_event->comm.pid, 0,
process, tool, machine,
- mmap_data)) {
+ mmap_data, proc_map_timeout)) {
err = -1;
break;
}
@@ -519,7 +551,9 @@ out:
int perf_event__synthesize_threads(struct perf_tool *tool,
perf_event__handler_t process,
- struct machine *machine, bool mmap_data)
+ struct machine *machine,
+ bool mmap_data,
+ unsigned int proc_map_timeout)
{
DIR *proc;
char proc_path[PATH_MAX];
@@ -559,7 +593,8 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
* one thread couldn't be synthesized.
*/
__event__synthesize_thread(comm_event, mmap_event, fork_event, pid,
- 1, process, tool, machine, mmap_data);
+ 1, process, tool, machine, mmap_data,
+ proc_map_timeout);
}
err = 0;
@@ -692,6 +727,38 @@ int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
return machine__process_lost_event(machine, event, sample);
}
+int perf_event__process_aux(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_aux_event(machine, event);
+}
+
+int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_itrace_start_event(machine, event);
+}
+
+int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_lost_samples_event(machine, event, sample);
+}
+
+int perf_event__process_switch(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_switch_event(machine, event);
+}
+
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
{
return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
@@ -755,6 +822,35 @@ int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
return machine__process_exit_event(machine, event, sample);
}
+size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " offset: %#"PRIx64" size: %#"PRIx64" flags: %#"PRIx64" [%s%s]\n",
+ event->aux.aux_offset, event->aux.aux_size,
+ event->aux.flags,
+ event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "",
+ event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "");
+}
+
+size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " pid: %u tid: %u\n",
+ event->itrace_start.pid, event->itrace_start.tid);
+}
+
+size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp)
+{
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+ const char *in_out = out ? "OUT" : "IN ";
+
+ if (event->header.type == PERF_RECORD_SWITCH)
+ return fprintf(fp, " %s\n", in_out);
+
+ return fprintf(fp, " %s %s pid/tid: %5u/%-5u\n",
+ in_out, out ? "next" : "prev",
+ event->context_switch.next_prev_pid,
+ event->context_switch.next_prev_tid);
+}
+
size_t perf_event__fprintf(union perf_event *event, FILE *fp)
{
size_t ret = fprintf(fp, "PERF_RECORD_%s",
@@ -774,6 +870,16 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
case PERF_RECORD_MMAP2:
ret += perf_event__fprintf_mmap2(event, fp);
break;
+ case PERF_RECORD_AUX:
+ ret += perf_event__fprintf_aux(event, fp);
+ break;
+ case PERF_RECORD_ITRACE_START:
+ ret += perf_event__fprintf_itrace_start(event, fp);
+ break;
+ case PERF_RECORD_SWITCH:
+ case PERF_RECORD_SWITCH_CPU_WIDE:
+ ret += perf_event__fprintf_switch(event, fp);
+ break;
default:
ret += fprintf(fp, "\n");
}
@@ -877,6 +983,10 @@ void thread__find_addr_location(struct thread *thread,
al->sym = NULL;
}
+/*
+ * Callers need to drop the reference to al->thread, obtained in
+ * machine__findnew_thread()
+ */
int perf_event__preprocess_sample(const union perf_event *event,
struct machine *machine,
struct addr_location *al,
@@ -937,6 +1047,17 @@ int perf_event__preprocess_sample(const union perf_event *event,
return 0;
}
+/*
+ * The preprocess_sample method will return with reference counts for the
+ * in it, when done using (and perhaps getting ref counts if needing to
+ * keep a pointer to one of those entries) it must be paired with
+ * addr_location__put(), so that the refcounts can be decremented.
+ */
+void addr_location__put(struct addr_location *al)
+{
+ thread__zput(al->thread);
+}
+
bool is_bts_event(struct perf_event_attr *attr)
{
return attr->type == PERF_TYPE_HARDWARE &&
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 09b9e8d..f729df5 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -52,6 +52,11 @@ struct lost_event {
u64 lost;
};
+struct lost_samples_event {
+ struct perf_event_header header;
+ u64 lost;
+};
+
/*
* PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID
*/
@@ -129,7 +134,8 @@ struct branch_flags {
u64 predicted:1;
u64 in_tx:1;
u64 abort:1;
- u64 reserved:60;
+ u64 cycles:16;
+ u64 reserved:44;
};
struct branch_entry {
@@ -157,6 +163,8 @@ enum {
PERF_IP_FLAG_IN_TX = 1ULL << 10,
};
+#define PERF_IP_FLAG_CHARS "bcrosyiABEx"
+
#define PERF_BRANCH_MASK (\
PERF_IP_FLAG_BRANCH |\
PERF_IP_FLAG_CALL |\
@@ -215,9 +223,17 @@ enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_HEADER_BUILD_ID = 67,
PERF_RECORD_FINISHED_ROUND = 68,
PERF_RECORD_ID_INDEX = 69,
+ PERF_RECORD_AUXTRACE_INFO = 70,
+ PERF_RECORD_AUXTRACE = 71,
+ PERF_RECORD_AUXTRACE_ERROR = 72,
PERF_RECORD_HEADER_MAX
};
+enum auxtrace_error_type {
+ PERF_AUXTRACE_ERROR_ITRACE = 1,
+ PERF_AUXTRACE_ERROR_MAX
+};
+
/*
* The kernel collects the number of events it couldn't send in a stretch and
* when possible sends this number in a PERF_RECORD_LOST event. The number of
@@ -225,6 +241,12 @@ enum perf_user_event_type { /* above any possible kernel type */
* total_lost tells exactly how many events the kernel in fact lost, i.e. it is
* the sum of all struct lost_event.lost fields reported.
*
+ * The kernel discards mixed up samples and sends the number in a
+ * PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored
+ * in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells
+ * exactly how many samples the kernel in fact dropped, i.e. it is the sum of
+ * all struct lost_samples_event.lost fields reported.
+ *
* The total_period is needed because by default auto-freq is used, so
* multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
* the total number of low level events, it is necessary to to sum all struct
@@ -234,6 +256,7 @@ struct events_stats {
u64 total_period;
u64 total_non_filtered_period;
u64 total_lost;
+ u64 total_lost_samples;
u64 total_invalid_chains;
u32 nr_events[PERF_RECORD_HEADER_MAX];
u32 nr_non_filtered_samples;
@@ -242,6 +265,8 @@ struct events_stats {
u32 nr_invalid_chains;
u32 nr_unknown_id;
u32 nr_unprocessable_samples;
+ u32 nr_auxtrace_errors[PERF_AUXTRACE_ERROR_MAX];
+ u32 nr_proc_map_timeout;
};
struct attr_event {
@@ -280,6 +305,56 @@ struct id_index_event {
struct id_index_entry entries[0];
};
+struct auxtrace_info_event {
+ struct perf_event_header header;
+ u32 type;
+ u32 reserved__; /* For alignment */
+ u64 priv[];
+};
+
+struct auxtrace_event {
+ struct perf_event_header header;
+ u64 size;
+ u64 offset;
+ u64 reference;
+ u32 idx;
+ u32 tid;
+ u32 cpu;
+ u32 reserved__; /* For alignment */
+};
+
+#define MAX_AUXTRACE_ERROR_MSG 64
+
+struct auxtrace_error_event {
+ struct perf_event_header header;
+ u32 type;
+ u32 code;
+ u32 cpu;
+ u32 pid;
+ u32 tid;
+ u32 reserved__; /* For alignment */
+ u64 ip;
+ char msg[MAX_AUXTRACE_ERROR_MSG];
+};
+
+struct aux_event {
+ struct perf_event_header header;
+ u64 aux_offset;
+ u64 aux_size;
+ u64 flags;
+};
+
+struct itrace_start_event {
+ struct perf_event_header header;
+ u32 pid, tid;
+};
+
+struct context_switch_event {
+ struct perf_event_header header;
+ u32 next_prev_pid;
+ u32 next_prev_tid;
+};
+
union perf_event {
struct perf_event_header header;
struct mmap_event mmap;
@@ -287,6 +362,7 @@ union perf_event {
struct comm_event comm;
struct fork_event fork;
struct lost_event lost;
+ struct lost_samples_event lost_samples;
struct read_event read;
struct throttle_event throttle;
struct sample_event sample;
@@ -295,6 +371,12 @@ union perf_event {
struct tracing_data_event tracing_data;
struct build_id_event build_id;
struct id_index_event id_index;
+ struct auxtrace_info_event auxtrace_info;
+ struct auxtrace_event auxtrace;
+ struct auxtrace_error_event auxtrace_error;
+ struct aux_event aux;
+ struct itrace_start_event itrace_start;
+ struct context_switch_event context_switch;
};
void perf_event__print_totals(void);
@@ -310,10 +392,12 @@ typedef int (*perf_event__handler_t)(struct perf_tool *tool,
int perf_event__synthesize_thread_map(struct perf_tool *tool,
struct thread_map *threads,
perf_event__handler_t process,
- struct machine *machine, bool mmap_data);
+ struct machine *machine, bool mmap_data,
+ unsigned int proc_map_timeout);
int perf_event__synthesize_threads(struct perf_tool *tool,
perf_event__handler_t process,
- struct machine *machine, bool mmap_data);
+ struct machine *machine, bool mmap_data,
+ unsigned int proc_map_timeout);
int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine);
@@ -330,6 +414,22 @@ int perf_event__process_lost(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
+int perf_event__process_lost_samples(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_aux(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_itrace_start(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_switch(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
int perf_event__process_mmap(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -358,6 +458,8 @@ int perf_event__preprocess_sample(const union perf_event *event,
struct addr_location *al,
struct perf_sample *sample);
+void addr_location__put(struct addr_location *al);
+
struct thread;
bool is_bts_event(struct perf_event_attr *attr);
@@ -381,12 +483,16 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
pid_t pid, pid_t tgid,
perf_event__handler_t process,
struct machine *machine,
- bool mmap_data);
+ bool mmap_data,
+ unsigned int proc_map_timeout);
size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
size_t perf_event__fprintf(union perf_event *event, FILE *fp);
u64 kallsyms__get_function_start(const char *kallsyms_filename,
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 080be93..d51a520 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -98,6 +98,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
evlist__for_each_safe(evlist, n, pos) {
list_del_init(&pos->node);
+ pos->evlist = NULL;
perf_evsel__delete(pos);
}
@@ -114,8 +115,8 @@ void perf_evlist__delete(struct perf_evlist *evlist)
{
perf_evlist__munmap(evlist);
perf_evlist__close(evlist);
- cpu_map__delete(evlist->cpus);
- thread_map__delete(evlist->threads);
+ cpu_map__put(evlist->cpus);
+ thread_map__put(evlist->threads);
evlist->cpus = NULL;
evlist->threads = NULL;
perf_evlist__purge(evlist);
@@ -125,6 +126,7 @@ void perf_evlist__delete(struct perf_evlist *evlist)
void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
{
+ entry->evlist = evlist;
list_add_tail(&entry->node, &evlist->entries);
entry->idx = evlist->nr_entries;
entry->tracking = !entry->idx;
@@ -297,6 +299,8 @@ void perf_evlist__disable(struct perf_evlist *evlist)
PERF_EVENT_IOC_DISABLE, 0);
}
}
+
+ evlist->enabled = false;
}
void perf_evlist__enable(struct perf_evlist *evlist)
@@ -316,6 +320,13 @@ void perf_evlist__enable(struct perf_evlist *evlist)
PERF_EVENT_IOC_ENABLE, 0);
}
}
+
+ evlist->enabled = true;
+}
+
+void perf_evlist__toggle_enable(struct perf_evlist *evlist)
+{
+ (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
}
int perf_evlist__disable_event(struct perf_evlist *evlist,
@@ -539,7 +550,7 @@ static void perf_evlist__set_sid_idx(struct perf_evlist *evlist,
else
sid->cpu = -1;
if (!evsel->system_wide && evlist->threads && thread >= 0)
- sid->tid = evlist->threads->map[thread];
+ sid->tid = thread_map__pid(evlist->threads, thread);
else
sid->tid = -1;
}
@@ -564,7 +575,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
{
struct perf_sample_id *sid;
- if (evlist->nr_entries == 1)
+ if (evlist->nr_entries == 1 || !id)
return perf_evlist__first(evlist);
sid = perf_evlist__id2sid(evlist, id);
@@ -634,11 +645,18 @@ static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
{
struct perf_mmap *md = &evlist->mmap[idx];
- u64 head = perf_mmap__read_head(md);
+ u64 head;
u64 old = md->prev;
unsigned char *data = md->base + page_size;
union perf_event *event = NULL;
+ /*
+ * Check if event was unmapped due to a POLLHUP/POLLERR.
+ */
+ if (!atomic_read(&md->refcnt))
+ return NULL;
+
+ head = perf_mmap__read_head(md);
if (evlist->overwrite) {
/*
* If we're further behind than half the buffer, there's a chance
@@ -695,19 +713,19 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
static bool perf_mmap__empty(struct perf_mmap *md)
{
- return perf_mmap__read_head(md) == md->prev;
+ return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
}
static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
{
- ++evlist->mmap[idx].refcnt;
+ atomic_inc(&evlist->mmap[idx].refcnt);
}
static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
{
- BUG_ON(evlist->mmap[idx].refcnt == 0);
+ BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0);
- if (--evlist->mmap[idx].refcnt == 0)
+ if (atomic_dec_and_test(&evlist->mmap[idx].refcnt))
__perf_evlist__munmap(evlist, idx);
}
@@ -721,17 +739,46 @@ void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
perf_mmap__write_tail(md, old);
}
- if (md->refcnt == 1 && perf_mmap__empty(md))
+ if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md))
perf_evlist__mmap_put(evlist, idx);
}
+int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
+ struct auxtrace_mmap_params *mp __maybe_unused,
+ void *userpg __maybe_unused,
+ int fd __maybe_unused)
+{
+ return 0;
+}
+
+void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
+{
+}
+
+void __weak auxtrace_mmap_params__init(
+ struct auxtrace_mmap_params *mp __maybe_unused,
+ off_t auxtrace_offset __maybe_unused,
+ unsigned int auxtrace_pages __maybe_unused,
+ bool auxtrace_overwrite __maybe_unused)
+{
+}
+
+void __weak auxtrace_mmap_params__set_idx(
+ struct auxtrace_mmap_params *mp __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused,
+ int idx __maybe_unused,
+ bool per_cpu __maybe_unused)
+{
+}
+
static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
{
if (evlist->mmap[idx].base != NULL) {
munmap(evlist->mmap[idx].base, evlist->mmap_len);
evlist->mmap[idx].base = NULL;
- evlist->mmap[idx].refcnt = 0;
+ atomic_set(&evlist->mmap[idx].refcnt, 0);
}
+ auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap);
}
void perf_evlist__munmap(struct perf_evlist *evlist)
@@ -759,6 +806,7 @@ static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
struct mmap_params {
int prot;
int mask;
+ struct auxtrace_mmap_params auxtrace_mp;
};
static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
@@ -777,7 +825,7 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
* evlist layer can't just drop it when filtering events in
* perf_evlist__filter_pollfd().
*/
- evlist->mmap[idx].refcnt = 2;
+ atomic_set(&evlist->mmap[idx].refcnt, 2);
evlist->mmap[idx].prev = 0;
evlist->mmap[idx].mask = mp->mask;
evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot,
@@ -789,6 +837,10 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
return -1;
}
+ if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap,
+ &mp->auxtrace_mp, evlist->mmap[idx].base, fd))
+ return -1;
+
return 0;
}
@@ -853,6 +905,9 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
for (cpu = 0; cpu < nr_cpus; cpu++) {
int output = -1;
+ auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
+ true);
+
for (thread = 0; thread < nr_threads; thread++) {
if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
thread, &output))
@@ -878,6 +933,9 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
for (thread = 0; thread < nr_threads; thread++) {
int output = -1;
+ auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
+ false);
+
if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
&output))
goto out_unmap;
@@ -960,10 +1018,8 @@ static long parse_pages_arg(const char *str, unsigned long min,
return pages;
}
-int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
- int unset __maybe_unused)
+int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
{
- unsigned int *mmap_pages = opt->value;
unsigned long max = UINT_MAX;
long pages;
@@ -980,20 +1036,32 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
return 0;
}
+int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ return __perf_evlist__parse_mmap_pages(opt->value, str);
+}
+
/**
- * perf_evlist__mmap - Create mmaps to receive events.
+ * perf_evlist__mmap_ex - Create mmaps to receive events.
* @evlist: list of events
* @pages: map length in pages
* @overwrite: overwrite older events?
+ * @auxtrace_pages - auxtrace map length in pages
+ * @auxtrace_overwrite - overwrite older auxtrace data?
*
* If @overwrite is %false the user needs to signal event consumption using
* perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this
* automatically.
*
+ * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
+ * consumption using auxtrace_mmap__write_tail().
+ *
* Return: %0 on success, negative error code otherwise.
*/
-int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
- bool overwrite)
+int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
+ bool overwrite, unsigned int auxtrace_pages,
+ bool auxtrace_overwrite)
{
struct perf_evsel *evsel;
const struct cpu_map *cpus = evlist->cpus;
@@ -1013,6 +1081,9 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
pr_debug("mmap size %zuB\n", evlist->mmap_len);
mp.mask = evlist->mmap_len - page_size - 1;
+ auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
+ auxtrace_pages, auxtrace_overwrite);
+
evlist__for_each(evlist, evsel) {
if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
evsel->sample_id == NULL &&
@@ -1026,6 +1097,38 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
return perf_evlist__mmap_per_cpu(evlist, &mp);
}
+int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
+ bool overwrite)
+{
+ return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false);
+}
+
+static int perf_evlist__propagate_maps(struct perf_evlist *evlist,
+ bool has_user_cpus)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each(evlist, evsel) {
+ /*
+ * We already have cpus for evsel (via PMU sysfs) so
+ * keep it, if there's no target cpu list defined.
+ */
+ if (evsel->cpus && has_user_cpus)
+ cpu_map__put(evsel->cpus);
+
+ if (!evsel->cpus || has_user_cpus)
+ evsel->cpus = cpu_map__get(evlist->cpus);
+
+ evsel->threads = thread_map__get(evlist->threads);
+
+ if ((evlist->cpus && !evsel->cpus) ||
+ (evlist->threads && !evsel->threads))
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
{
evlist->threads = thread_map__new_str(target->pid, target->tid,
@@ -1042,14 +1145,31 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
if (evlist->cpus == NULL)
goto out_delete_threads;
- return 0;
+ return perf_evlist__propagate_maps(evlist, !!target->cpu_list);
out_delete_threads:
- thread_map__delete(evlist->threads);
+ thread_map__put(evlist->threads);
evlist->threads = NULL;
return -1;
}
+int perf_evlist__set_maps(struct perf_evlist *evlist,
+ struct cpu_map *cpus,
+ struct thread_map *threads)
+{
+ if (evlist->cpus)
+ cpu_map__put(evlist->cpus);
+
+ evlist->cpus = cpus;
+
+ if (evlist->threads)
+ thread_map__put(evlist->threads);
+
+ evlist->threads = threads;
+
+ return perf_evlist__propagate_maps(evlist, false);
+}
+
int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
{
struct perf_evsel *evsel;
@@ -1061,7 +1181,11 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e
if (evsel->filter == NULL)
continue;
- err = perf_evsel__set_filter(evsel, ncpus, nthreads, evsel->filter);
+ /*
+ * filters only work for tracepoint event, which doesn't have cpu limit.
+ * So evlist and evsel should always be same.
+ */
+ err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter);
if (err) {
*err_evsel = evsel;
break;
@@ -1075,11 +1199,9 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
{
struct perf_evsel *evsel;
int err = 0;
- const int ncpus = cpu_map__nr(evlist->cpus),
- nthreads = thread_map__nr(evlist->threads);
evlist__for_each(evlist, evsel) {
- err = perf_evsel__set_filter(evsel, ncpus, nthreads, filter);
+ err = perf_evsel__set_filter(evsel, filter);
if (err)
break;
}
@@ -1157,6 +1279,16 @@ u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
return __perf_evlist__combined_sample_type(evlist);
}
+u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ u64 branch_type = 0;
+
+ evlist__for_each(evlist, evsel)
+ branch_type |= evsel->attr.branch_sample_type;
+ return branch_type;
+}
+
bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
{
struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
@@ -1278,7 +1410,7 @@ static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist)
out:
return err;
out_free_cpus:
- cpu_map__delete(evlist->cpus);
+ cpu_map__put(evlist->cpus);
evlist->cpus = NULL;
goto out;
}
@@ -1400,7 +1532,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *tar
__func__, __LINE__);
goto out_close_pipes;
}
- evlist->threads->map[0] = evlist->workload.pid;
+ thread_map__set_pid(evlist->threads, 0, evlist->workload.pid);
}
close(child_ready_pipe[1]);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index b5cce95..b39a619 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -1,6 +1,7 @@
#ifndef __PERF_EVLIST_H
#define __PERF_EVLIST_H 1
+#include <linux/atomic.h>
#include <linux/list.h>
#include <api/fd/array.h>
#include <stdio.h>
@@ -8,6 +9,7 @@
#include "event.h"
#include "evsel.h"
#include "util.h"
+#include "auxtrace.h"
#include <unistd.h>
struct pollfd;
@@ -26,8 +28,9 @@ struct record_opts;
struct perf_mmap {
void *base;
int mask;
- int refcnt;
+ atomic_t refcnt;
u64 prev;
+ struct auxtrace_mmap auxtrace_mmap;
char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8)));
};
@@ -37,6 +40,8 @@ struct perf_evlist {
int nr_entries;
int nr_groups;
int nr_mmaps;
+ bool overwrite;
+ bool enabled;
size_t mmap_len;
int id_pos;
int is_pos;
@@ -45,13 +50,13 @@ struct perf_evlist {
int cork_fd;
pid_t pid;
} workload;
- bool overwrite;
struct fdarray pollfd;
struct perf_mmap *mmap;
struct thread_map *threads;
struct cpu_map *cpus;
struct perf_evsel *selected;
struct events_stats stats;
+ struct perf_env *env;
};
struct perf_evsel_str_handler {
@@ -110,6 +115,8 @@ void perf_evlist__close(struct perf_evlist *evlist);
void perf_evlist__set_id_pos(struct perf_evlist *evlist);
bool perf_can_sample_identifier(void);
+bool perf_can_record_switch_events(void);
+bool perf_can_record_cpu_wide(void);
void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts);
int record_opts__config(struct record_opts *opts);
@@ -122,16 +129,21 @@ int perf_evlist__start_workload(struct perf_evlist *evlist);
struct option;
+int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str);
int perf_evlist__parse_mmap_pages(const struct option *opt,
const char *str,
int unset);
+int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
+ bool overwrite, unsigned int auxtrace_pages,
+ bool auxtrace_overwrite);
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
bool overwrite);
void perf_evlist__munmap(struct perf_evlist *evlist);
void perf_evlist__disable(struct perf_evlist *evlist);
void perf_evlist__enable(struct perf_evlist *evlist);
+void perf_evlist__toggle_enable(struct perf_evlist *evlist);
int perf_evlist__disable_event(struct perf_evlist *evlist,
struct perf_evsel *evsel);
@@ -143,14 +155,9 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
void perf_evlist__set_selected(struct perf_evlist *evlist,
struct perf_evsel *evsel);
-static inline void perf_evlist__set_maps(struct perf_evlist *evlist,
- struct cpu_map *cpus,
- struct thread_map *threads)
-{
- evlist->cpus = cpus;
- evlist->threads = threads;
-}
-
+int perf_evlist__set_maps(struct perf_evlist *evlist,
+ struct cpu_map *cpus,
+ struct thread_map *threads);
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target);
int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel);
@@ -160,6 +167,7 @@ void perf_evlist__set_leader(struct perf_evlist *evlist);
u64 perf_evlist__read_format(struct perf_evlist *evlist);
u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist);
u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist);
+u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist);
bool perf_evlist__sample_id_all(struct perf_evlist *evlist);
u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist);
@@ -280,5 +288,4 @@ void perf_evlist__to_front(struct perf_evlist *evlist,
void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
struct perf_evsel *tracking_evsel);
-
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 33e3fd8..c53f791 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -26,6 +26,7 @@
#include "perf_regs.h"
#include "debug.h"
#include "trace-event.h"
+#include "stat.h"
static struct {
bool sample_id_all;
@@ -205,10 +206,13 @@ void perf_evsel__init(struct perf_evsel *evsel,
evsel->leader = evsel;
evsel->unit = "";
evsel->scale = 1.0;
+ evsel->evlist = NULL;
INIT_LIST_HEAD(&evsel->node);
+ INIT_LIST_HEAD(&evsel->config_terms);
perf_evsel__object.init(evsel);
evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
perf_evsel__calc_id_pos(evsel);
+ evsel->cmdline_group_boundary = false;
}
struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
@@ -542,14 +546,15 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
static void
perf_evsel__config_callgraph(struct perf_evsel *evsel,
- struct record_opts *opts)
+ struct record_opts *opts,
+ struct callchain_param *param)
{
bool function = perf_evsel__is_function_event(evsel);
struct perf_event_attr *attr = &evsel->attr;
perf_evsel__set_sample_bit(evsel, CALLCHAIN);
- if (callchain_param.record_mode == CALLCHAIN_LBR) {
+ if (param->record_mode == CALLCHAIN_LBR) {
if (!opts->branch_stack) {
if (attr->exclude_user) {
pr_warning("LBR callstack option is only available "
@@ -565,12 +570,12 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
"Falling back to framepointers.\n");
}
- if (callchain_param.record_mode == CALLCHAIN_DWARF) {
+ if (param->record_mode == CALLCHAIN_DWARF) {
if (!function) {
perf_evsel__set_sample_bit(evsel, REGS_USER);
perf_evsel__set_sample_bit(evsel, STACK_USER);
attr->sample_regs_user = PERF_REGS_MASK;
- attr->sample_stack_user = callchain_param.dump_size;
+ attr->sample_stack_user = param->dump_size;
attr->exclude_callchain_user = 1;
} else {
pr_info("Cannot use DWARF unwind for function trace event,"
@@ -584,6 +589,97 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
}
}
+static void
+perf_evsel__reset_callgraph(struct perf_evsel *evsel,
+ struct callchain_param *param)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+
+ perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+ if (param->record_mode == CALLCHAIN_LBR) {
+ perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+ attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_CALL_STACK);
+ }
+ if (param->record_mode == CALLCHAIN_DWARF) {
+ perf_evsel__reset_sample_bit(evsel, REGS_USER);
+ perf_evsel__reset_sample_bit(evsel, STACK_USER);
+ }
+}
+
+static void apply_config_terms(struct perf_evsel *evsel,
+ struct record_opts *opts)
+{
+ struct perf_evsel_config_term *term;
+ struct list_head *config_terms = &evsel->config_terms;
+ struct perf_event_attr *attr = &evsel->attr;
+ struct callchain_param param;
+ u32 dump_size = 0;
+ char *callgraph_buf = NULL;
+
+ /* callgraph default */
+ param.record_mode = callchain_param.record_mode;
+
+ list_for_each_entry(term, config_terms, list) {
+ switch (term->type) {
+ case PERF_EVSEL__CONFIG_TERM_PERIOD:
+ attr->sample_period = term->val.period;
+ attr->freq = 0;
+ break;
+ case PERF_EVSEL__CONFIG_TERM_FREQ:
+ attr->sample_freq = term->val.freq;
+ attr->freq = 1;
+ break;
+ case PERF_EVSEL__CONFIG_TERM_TIME:
+ if (term->val.time)
+ perf_evsel__set_sample_bit(evsel, TIME);
+ else
+ perf_evsel__reset_sample_bit(evsel, TIME);
+ break;
+ case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
+ callgraph_buf = term->val.callgraph;
+ break;
+ case PERF_EVSEL__CONFIG_TERM_STACK_USER:
+ dump_size = term->val.stack_user;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* User explicitly set per-event callgraph, clear the old setting and reset. */
+ if ((callgraph_buf != NULL) || (dump_size > 0)) {
+
+ /* parse callgraph parameters */
+ if (callgraph_buf != NULL) {
+ if (!strcmp(callgraph_buf, "no")) {
+ param.enabled = false;
+ param.record_mode = CALLCHAIN_NONE;
+ } else {
+ param.enabled = true;
+ if (parse_callchain_record(callgraph_buf, &param)) {
+ pr_err("per-event callgraph setting for %s failed. "
+ "Apply callgraph global setting for it\n",
+ evsel->name);
+ return;
+ }
+ }
+ }
+ if (dump_size > 0) {
+ dump_size = round_up(dump_size, sizeof(u64));
+ param.dump_size = dump_size;
+ }
+
+ /* If global callgraph set, clear it */
+ if (callchain_param.enabled)
+ perf_evsel__reset_callgraph(evsel, &callchain_param);
+
+ /* set perf-event callgraph */
+ if (param.enabled)
+ perf_evsel__config_callgraph(evsel, opts, &param);
+ }
+}
+
/*
* The enable_on_exec/disabled value strategy:
*
@@ -688,10 +784,10 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
evsel->attr.exclude_callchain_user = 1;
if (callchain_param.enabled && !evsel->no_aux_samples)
- perf_evsel__config_callgraph(evsel, opts);
+ perf_evsel__config_callgraph(evsel, opts, &callchain_param);
if (opts->sample_intr_regs) {
- attr->sample_regs_intr = PERF_REGS_MASK;
+ attr->sample_regs_intr = opts->sample_intr_regs;
perf_evsel__set_sample_bit(evsel, REGS_INTR);
}
@@ -706,7 +802,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
*/
if (opts->sample_time &&
(!perf_missing_features.sample_id_all &&
- (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu)))
+ (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu ||
+ opts->sample_time_set)))
perf_evsel__set_sample_bit(evsel, TIME);
if (opts->raw_samples && !evsel->no_aux_samples) {
@@ -735,6 +832,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
attr->mmap2 = track && !perf_missing_features.mmap2;
attr->comm = track;
+ if (opts->record_switch_events)
+ attr->context_switch = track;
+
if (opts->sample_transaction)
perf_evsel__set_sample_bit(evsel, TRANSACTION);
@@ -771,6 +871,12 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
attr->use_clockid = 1;
attr->clockid = opts->clockid;
}
+
+ /*
+ * Apply event specific term settings,
+ * it overloads any global configuration.
+ */
+ apply_config_terms(evsel, opts);
}
static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
@@ -814,14 +920,44 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ncpus, int nthrea
return 0;
}
-int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
- const char *filter)
+int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
+ const char *filter)
{
return perf_evsel__run_ioctl(evsel, ncpus, nthreads,
PERF_EVENT_IOC_SET_FILTER,
(void *)filter);
}
+int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter)
+{
+ char *new_filter = strdup(filter);
+
+ if (new_filter != NULL) {
+ free(evsel->filter);
+ evsel->filter = new_filter;
+ return 0;
+ }
+
+ return -1;
+}
+
+int perf_evsel__append_filter(struct perf_evsel *evsel,
+ const char *op, const char *filter)
+{
+ char *new_filter;
+
+ if (evsel->filter == NULL)
+ return perf_evsel__set_filter(evsel, filter);
+
+ if (asprintf(&new_filter,"(%s) %s (%s)", evsel->filter, op, filter) > 0) {
+ free(evsel->filter);
+ evsel->filter = new_filter;
+ return 0;
+ }
+
+ return -1;
+}
+
int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads)
{
return perf_evsel__run_ioctl(evsel, ncpus, nthreads,
@@ -851,19 +987,6 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
return 0;
}
-void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus)
-{
- memset(evsel->counts, 0, (sizeof(*evsel->counts) +
- (ncpus * sizeof(struct perf_counts_values))));
-}
-
-int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
-{
- evsel->counts = zalloc((sizeof(*evsel->counts) +
- (ncpus * sizeof(struct perf_counts_values))));
- return evsel->counts != NULL ? 0 : -ENOMEM;
-}
-
static void perf_evsel__free_fd(struct perf_evsel *evsel)
{
xyarray__delete(evsel->fd);
@@ -877,6 +1000,16 @@ static void perf_evsel__free_id(struct perf_evsel *evsel)
zfree(&evsel->id);
}
+static void perf_evsel__free_config_terms(struct perf_evsel *evsel)
+{
+ struct perf_evsel_config_term *term, *h;
+
+ list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
+ list_del(&term->list);
+ free(term);
+ }
+}
+
void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
int cpu, thread;
@@ -891,17 +1024,16 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
}
}
-void perf_evsel__free_counts(struct perf_evsel *evsel)
-{
- zfree(&evsel->counts);
-}
-
void perf_evsel__exit(struct perf_evsel *evsel)
{
assert(list_empty(&evsel->node));
+ assert(evsel->evlist == NULL);
perf_evsel__free_fd(evsel);
perf_evsel__free_id(evsel);
+ perf_evsel__free_config_terms(evsel);
close_cgroup(evsel->cgrp);
+ cpu_map__put(evsel->cpus);
+ thread_map__put(evsel->threads);
zfree(&evsel->group_name);
zfree(&evsel->name);
perf_evsel__object.fini(evsel);
@@ -913,7 +1045,7 @@ void perf_evsel__delete(struct perf_evsel *evsel)
free(evsel);
}
-void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu,
+void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
struct perf_counts_values *count)
{
struct perf_counts_values tmp;
@@ -925,8 +1057,8 @@ void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu,
tmp = evsel->prev_raw_counts->aggr;
evsel->prev_raw_counts->aggr = *count;
} else {
- tmp = evsel->prev_raw_counts->cpu[cpu];
- evsel->prev_raw_counts->cpu[cpu] = *count;
+ tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread);
+ *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count;
}
count->val = count->val - tmp.val;
@@ -954,20 +1086,18 @@ void perf_counts_values__scale(struct perf_counts_values *count,
*pscaled = scaled;
}
-int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread,
- perf_evsel__read_cb_t cb)
+int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+ struct perf_counts_values *count)
{
- struct perf_counts_values count;
-
- memset(&count, 0, sizeof(count));
+ memset(count, 0, sizeof(*count));
if (FD(evsel, cpu, thread) < 0)
return -EINVAL;
- if (readn(FD(evsel, cpu, thread), &count, sizeof(count)) < 0)
+ if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) < 0)
return -errno;
- return cb(evsel, cpu, thread, &count);
+ return 0;
}
int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
@@ -979,15 +1109,15 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
if (FD(evsel, cpu, thread) < 0)
return -EINVAL;
- if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
+ if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
return -ENOMEM;
if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
return -errno;
- perf_evsel__compute_deltas(evsel, cpu, &count);
+ perf_evsel__compute_deltas(evsel, cpu, thread, &count);
perf_counts_values__scale(&count, scale, NULL);
- evsel->counts->cpu[cpu] = count;
+ *perf_counts(evsel->counts, cpu, thread) = count;
return 0;
}
@@ -1058,7 +1188,7 @@ static void __p_read_format(char *buf, size_t size, u64 value)
#define BUF_SIZE 1024
-#define p_hex(val) snprintf(buf, BUF_SIZE, "%"PRIx64, (uint64_t)(val))
+#define p_hex(val) snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val))
#define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
#define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val)
@@ -1112,6 +1242,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(mmap2, p_unsigned);
PRINT_ATTRf(comm_exec, p_unsigned);
PRINT_ATTRf(use_clockid, p_unsigned);
+ PRINT_ATTRf(context_switch, p_unsigned);
PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
PRINT_ATTRf(bp_type, p_unsigned);
@@ -1121,6 +1252,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(sample_stack_user, p_unsigned);
PRINT_ATTRf(clockid, p_signed);
PRINT_ATTRf(sample_regs_intr, p_hex);
+ PRINT_ATTRf(aux_watermark, p_unsigned);
return ret;
}
@@ -1183,7 +1315,7 @@ retry_sample_id:
int group_fd;
if (!evsel->cgrp && !evsel->system_wide)
- pid = threads->map[thread];
+ pid = thread_map__pid(threads, thread);
group_fd = get_group_fd(evsel, cpu, thread);
retry_open:
@@ -2091,8 +2223,13 @@ int perf_evsel__fprintf(struct perf_evsel *evsel,
printed += perf_event_attr__fprintf(fp, &evsel->attr,
__print_attr__fprintf, &first);
} else if (details->freq) {
- printed += comma_fprintf(fp, &first, " sample_freq=%" PRIu64,
- (u64)evsel->attr.sample_freq);
+ const char *term = "sample_freq";
+
+ if (!evsel->attr.freq)
+ term = "sample_period";
+
+ printed += comma_fprintf(fp, &first, " %s=%" PRIu64,
+ term, (u64)evsel->attr.sample_freq);
}
out:
fputc('\n', fp);
@@ -2148,7 +2285,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
case EMFILE:
return scnprintf(msg, size, "%s",
"Too many events are opened.\n"
- "Try again after reducing the number of events.");
+ "Probably the maximum number of open file descriptors has been reached.\n"
+ "Hint: Try again after reducing the number of events.\n"
+ "Hint: Try increasing the limit with 'ulimit -n <limit>'");
case ENODEV:
if (target->cpu_list)
return scnprintf(msg, size, "%s",
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index e486151..298e6bb 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -8,23 +8,8 @@
#include <linux/types.h>
#include "xyarray.h"
#include "symbol.h"
-
-struct perf_counts_values {
- union {
- struct {
- u64 val;
- u64 ena;
- u64 run;
- };
- u64 values[3];
- };
-};
-
-struct perf_counts {
- s8 scaled;
- struct perf_counts_values aggr;
- struct perf_counts_values cpu[];
-};
+#include "cpumap.h"
+#include "counts.h"
struct perf_evsel;
@@ -46,8 +31,38 @@ struct perf_sample_id {
struct cgroup_sel;
+/*
+ * The 'struct perf_evsel_config_term' is used to pass event
+ * specific configuration data to perf_evsel__config routine.
+ * It is allocated within event parsing and attached to
+ * perf_evsel::config_terms list head.
+*/
+enum {
+ PERF_EVSEL__CONFIG_TERM_PERIOD,
+ PERF_EVSEL__CONFIG_TERM_FREQ,
+ PERF_EVSEL__CONFIG_TERM_TIME,
+ PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
+ PERF_EVSEL__CONFIG_TERM_STACK_USER,
+ PERF_EVSEL__CONFIG_TERM_MAX,
+};
+
+struct perf_evsel_config_term {
+ struct list_head list;
+ int type;
+ union {
+ u64 period;
+ u64 freq;
+ bool time;
+ char *callgraph;
+ u64 stack_user;
+ } val;
+};
+
/** struct perf_evsel - event selector
*
+ * @evlist - evlist this evsel is in, if it is in one.
+ * @node - To insert it into evlist->entries or in other list_heads, say in
+ * the event parsing routines.
* @name - Can be set to retain the original event name passed by the user,
* so that when showing results in tools such as 'perf stat', we
* show the name used, not some alias.
@@ -61,6 +76,7 @@ struct cgroup_sel;
*/
struct perf_evsel {
struct list_head node;
+ struct perf_evlist *evlist;
struct perf_event_attr attr;
char *filter;
struct xyarray *fd;
@@ -73,7 +89,6 @@ struct perf_evsel {
char *name;
double scale;
const char *unit;
- bool snapshot;
struct event_format *tp_format;
union {
void *priv;
@@ -83,9 +98,11 @@ struct perf_evsel {
struct cgroup_sel *cgrp;
void *handler;
struct cpu_map *cpus;
+ struct thread_map *threads;
unsigned int sample_size;
int id_pos;
int is_pos;
+ bool snapshot;
bool supported;
bool needs_swap;
bool no_aux_samples;
@@ -93,13 +110,15 @@ struct perf_evsel {
bool system_wide;
bool tracking;
bool per_pkg;
- unsigned long *per_pkg_mask;
/* parse modifier helper */
int exclude_GH;
int nr_members;
int sample_read;
+ unsigned long *per_pkg_mask;
struct perf_evsel *leader;
char *group_name;
+ bool cmdline_group_boundary;
+ struct list_head config_terms;
};
union u64_swap {
@@ -113,10 +132,20 @@ struct thread_map;
struct perf_evlist;
struct record_opts;
+static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
+{
+ return evsel->cpus;
+}
+
+static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
+{
+ return perf_evsel__cpus(evsel)->nr;
+}
+
void perf_counts_values__scale(struct perf_counts_values *count,
bool scale, s8 *pscaled);
-void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu,
+void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
struct perf_counts_values *count);
int perf_evsel__object_config(size_t object_size,
@@ -170,9 +199,6 @@ const char *perf_evsel__group_name(struct perf_evsel *evsel);
int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size);
int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
-int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
-void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus);
-void perf_evsel__free_counts(struct perf_evsel *evsel);
void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
@@ -189,8 +215,11 @@ void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
void perf_evsel__set_sample_id(struct perf_evsel *evsel,
bool use_sample_identifier);
-int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
- const char *filter);
+int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter);
+int perf_evsel__append_filter(struct perf_evsel *evsel,
+ const char *op, const char *filter);
+int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
+ const char *filter);
int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads);
int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
@@ -236,12 +265,8 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1,
(a)->attr.type == (b)->attr.type && \
(a)->attr.config == (b)->attr.config)
-typedef int (perf_evsel__read_cb_t)(struct perf_evsel *evsel,
- int cpu, int thread,
- struct perf_counts_values *count);
-
-int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread,
- perf_evsel__read_cb_t cb);
+int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+ struct perf_counts_values *count);
int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
int cpu, int thread, bool scale);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 918fd8a..4181454 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -869,6 +869,20 @@ static int write_branch_stack(int fd __maybe_unused,
return 0;
}
+static int write_auxtrace(int fd, struct perf_header *h,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ struct perf_session *session;
+ int err;
+
+ session = container_of(h, struct perf_session, header);
+
+ err = auxtrace_index__write(fd, &session->auxtrace_index);
+ if (err < 0)
+ pr_err("Failed to write auxtrace index\n");
+ return err;
+}
+
static void print_hostname(struct perf_header *ph, int fd __maybe_unused,
FILE *fp)
{
@@ -909,17 +923,13 @@ static void print_cmdline(struct perf_header *ph, int fd __maybe_unused,
FILE *fp)
{
int nr, i;
- char *str;
nr = ph->env.nr_cmdline;
- str = ph->env.cmdline;
fprintf(fp, "# cmdline : ");
- for (i = 0; i < nr; i++) {
- fprintf(fp, "%s ", str);
- str += strlen(str) + 1;
- }
+ for (i = 0; i < nr; i++)
+ fprintf(fp, "%s ", ph->env.cmdline_argv[i]);
fputc('\n', fp);
}
@@ -1049,8 +1059,7 @@ out:
free(buf);
return events;
error:
- if (events)
- free_event_desc(events);
+ free_event_desc(events);
events = NULL;
goto out;
}
@@ -1151,6 +1160,12 @@ static void print_branch_stack(struct perf_header *ph __maybe_unused,
fprintf(fp, "# contains samples with branch stack\n");
}
+static void print_auxtrace(struct perf_header *ph __maybe_unused,
+ int fd __maybe_unused, FILE *fp)
+{
+ fprintf(fp, "# contains AUX area data (e.g. instruction trace)\n");
+}
+
static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused,
FILE *fp)
{
@@ -1218,9 +1233,8 @@ static int __event_process_build_id(struct build_id_event *bev,
struct perf_session *session)
{
int err = -1;
- struct dsos *dsos;
struct machine *machine;
- u16 misc;
+ u16 cpumode;
struct dso *dso;
enum dso_kernel_type dso_type;
@@ -1228,39 +1242,37 @@ static int __event_process_build_id(struct build_id_event *bev,
if (!machine)
goto out;
- misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ cpumode = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
- switch (misc) {
+ switch (cpumode) {
case PERF_RECORD_MISC_KERNEL:
dso_type = DSO_TYPE_KERNEL;
- dsos = &machine->kernel_dsos;
break;
case PERF_RECORD_MISC_GUEST_KERNEL:
dso_type = DSO_TYPE_GUEST_KERNEL;
- dsos = &machine->kernel_dsos;
break;
case PERF_RECORD_MISC_USER:
case PERF_RECORD_MISC_GUEST_USER:
dso_type = DSO_TYPE_USER;
- dsos = &machine->user_dsos;
break;
default:
goto out;
}
- dso = __dsos__findnew(dsos, filename);
+ dso = machine__findnew_dso(machine, filename);
if (dso != NULL) {
char sbuild_id[BUILD_ID_SIZE * 2 + 1];
dso__set_build_id(dso, &bev->build_id);
- if (!is_kernel_module(filename))
+ if (!is_kernel_module(filename, cpumode))
dso->kernel = dso_type;
build_id__sprintf(dso->build_id, sizeof(dso->build_id),
sbuild_id);
pr_debug("build id event received for %s: %s\n",
dso->long_name, sbuild_id);
+ dso__put(dso);
}
err = 0;
@@ -1525,14 +1537,13 @@ process_event_desc(struct perf_file_section *section __maybe_unused,
return 0;
}
-static int process_cmdline(struct perf_file_section *section __maybe_unused,
+static int process_cmdline(struct perf_file_section *section,
struct perf_header *ph, int fd,
void *data __maybe_unused)
{
ssize_t ret;
- char *str;
- u32 nr, i;
- struct strbuf sb;
+ char *str, *cmdline = NULL, **argv = NULL;
+ u32 nr, i, len = 0;
ret = readn(fd, &nr, sizeof(nr));
if (ret != sizeof(nr))
@@ -1542,22 +1553,32 @@ static int process_cmdline(struct perf_file_section *section __maybe_unused,
nr = bswap_32(nr);
ph->env.nr_cmdline = nr;
- strbuf_init(&sb, 128);
+
+ cmdline = zalloc(section->size + nr + 1);
+ if (!cmdline)
+ return -1;
+
+ argv = zalloc(sizeof(char *) * (nr + 1));
+ if (!argv)
+ goto error;
for (i = 0; i < nr; i++) {
str = do_read_string(fd, ph);
if (!str)
goto error;
- /* include a NULL character at the end */
- strbuf_add(&sb, str, strlen(str) + 1);
+ argv[i] = cmdline + len;
+ memcpy(argv[i], str, strlen(str) + 1);
+ len += strlen(str) + 1;
free(str);
}
- ph->env.cmdline = strbuf_detach(&sb, NULL);
+ ph->env.cmdline = cmdline;
+ ph->env.cmdline_argv = (const char **) argv;
return 0;
error:
- strbuf_release(&sb);
+ free(argv);
+ free(cmdline);
return -1;
}
@@ -1821,6 +1842,22 @@ out_free:
return ret;
}
+static int process_auxtrace(struct perf_file_section *section,
+ struct perf_header *ph, int fd,
+ void *data __maybe_unused)
+{
+ struct perf_session *session;
+ int err;
+
+ session = container_of(ph, struct perf_session, header);
+
+ err = auxtrace_index__process(fd, section->size, session,
+ ph->needs_swap);
+ if (err < 0)
+ pr_err("Failed to process auxtrace index\n");
+ return err;
+}
+
struct feature_ops {
int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
void (*print)(struct perf_header *h, int fd, FILE *fp);
@@ -1861,6 +1898,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPA(HEADER_BRANCH_STACK, branch_stack),
FEAT_OPP(HEADER_PMU_MAPPINGS, pmu_mappings),
FEAT_OPP(HEADER_GROUP_DESC, group_desc),
+ FEAT_OPP(HEADER_AUXTRACE, auxtrace),
};
struct header_print_data {
@@ -2476,6 +2514,7 @@ int perf_session__read_header(struct perf_session *session)
if (session->evlist == NULL)
return -ENOMEM;
+ session->evlist->env = &header->env;
if (perf_data_file__is_pipe(file))
return perf_header__read_pipe(session);
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 3bb90ac..396e496 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -30,6 +30,7 @@ enum {
HEADER_BRANCH_STACK,
HEADER_PMU_MAPPINGS,
HEADER_GROUP_DESC,
+ HEADER_AUXTRACE,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
@@ -65,7 +66,7 @@ struct perf_header;
int perf_file_header__read(struct perf_file_header *header,
struct perf_header *ph, int fd);
-struct perf_session_env {
+struct perf_env {
char *hostname;
char *os_release;
char *version;
@@ -83,6 +84,7 @@ struct perf_session_env {
int nr_pmu_mappings;
int nr_groups;
char *cmdline;
+ const char **cmdline_argv;
char *sibling_cores;
char *sibling_threads;
char *numa_nodes;
@@ -96,7 +98,7 @@ struct perf_header {
u64 data_size;
u64 feat_offset;
DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
- struct perf_session_env env;
+ struct perf_env env;
};
struct perf_evlist;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index cc22b91..08b6cd9 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -151,6 +151,12 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
+ if (h->srcline)
+ hists__new_col_len(hists, HISTC_SRCLINE, strlen(h->srcline));
+
+ if (h->srcfile)
+ hists__new_col_len(hists, HISTC_SRCFILE, strlen(h->srcfile));
+
if (h->transaction)
hists__new_col_len(hists, HISTC_TRANSACTION,
hist_entry__transaction_len());
@@ -313,8 +319,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
memset(&he->stat, 0, sizeof(he->stat));
}
- if (he->ms.map)
- he->ms.map->referenced = true;
+ map__get(he->ms.map);
if (he->branch_info) {
/*
@@ -324,6 +329,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
*/
he->branch_info = malloc(sizeof(*he->branch_info));
if (he->branch_info == NULL) {
+ map__zput(he->ms.map);
free(he->stat_acc);
free(he);
return NULL;
@@ -332,17 +338,13 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
memcpy(he->branch_info, template->branch_info,
sizeof(*he->branch_info));
- if (he->branch_info->from.map)
- he->branch_info->from.map->referenced = true;
- if (he->branch_info->to.map)
- he->branch_info->to.map->referenced = true;
+ map__get(he->branch_info->from.map);
+ map__get(he->branch_info->to.map);
}
if (he->mem_info) {
- if (he->mem_info->iaddr.map)
- he->mem_info->iaddr.map->referenced = true;
- if (he->mem_info->daddr.map)
- he->mem_info->daddr.map->referenced = true;
+ map__get(he->mem_info->iaddr.map);
+ map__get(he->mem_info->daddr.map);
}
if (symbol_conf.use_callchain)
@@ -362,10 +364,10 @@ static u8 symbol__parent_filter(const struct symbol *parent)
return 0;
}
-static struct hist_entry *add_hist_entry(struct hists *hists,
- struct hist_entry *entry,
- struct addr_location *al,
- bool sample_self)
+static struct hist_entry *hists__findnew_entry(struct hists *hists,
+ struct hist_entry *entry,
+ struct addr_location *al,
+ bool sample_self)
{
struct rb_node **p;
struct rb_node *parent = NULL;
@@ -407,9 +409,8 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
* the history counter to increment.
*/
if (he->ms.map != entry->ms.map) {
- he->ms.map = entry->ms.map;
- if (he->ms.map)
- he->ms.map->referenced = true;
+ map__put(he->ms.map);
+ he->ms.map = map__get(entry->ms.map);
}
goto out;
}
@@ -468,7 +469,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
.transaction = transaction,
};
- return add_hist_entry(hists, &entry, al, sample_self);
+ return hists__findnew_entry(hists, &entry, al, sample_self);
}
static int
@@ -548,9 +549,9 @@ iter_finish_mem_entry(struct hist_entry_iter *iter,
out:
/*
- * We don't need to free iter->priv (mem_info) here since
- * the mem info was either already freed in add_hist_entry() or
- * passed to a new hist entry by hist_entry__new().
+ * We don't need to free iter->priv (mem_info) here since the mem info
+ * was either already freed in hists__findnew_entry() or passed to a
+ * new hist entry by hist_entry__new().
*/
iter->priv = NULL;
@@ -623,7 +624,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
* and not events sampled. Thus we use a pseudo period of 1.
*/
he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
- 1, 1, 0, true);
+ 1, bi->flags.cycles ? bi->flags.cycles : 1,
+ 0, true);
if (he == NULL)
return -ENOMEM;
@@ -765,6 +767,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
struct hist_entry **he_cache = iter->priv;
struct hist_entry *he;
struct hist_entry he_tmp = {
+ .hists = evsel__hists(evsel),
.cpu = al->cpu,
.thread = al->thread,
.comm = thread__comm(al->thread),
@@ -851,19 +854,15 @@ const struct hist_iter_ops hist_iter_cumulative = {
};
int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
- struct perf_evsel *evsel, struct perf_sample *sample,
int max_stack_depth, void *arg)
{
int err, err2;
- err = sample__resolve_callchain(sample, &iter->parent, evsel, al,
- max_stack_depth);
+ err = sample__resolve_callchain(iter->sample, &iter->parent,
+ iter->evsel, al, max_stack_depth);
if (err)
return err;
- iter->evsel = evsel;
- iter->sample = sample;
-
err = iter->ops->prepare_entry(iter, al);
if (err)
goto out;
@@ -937,10 +936,24 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
void hist_entry__delete(struct hist_entry *he)
{
thread__zput(he->thread);
- zfree(&he->branch_info);
- zfree(&he->mem_info);
+ map__zput(he->ms.map);
+
+ if (he->branch_info) {
+ map__zput(he->branch_info->from.map);
+ map__zput(he->branch_info->to.map);
+ zfree(&he->branch_info);
+ }
+
+ if (he->mem_info) {
+ map__zput(he->mem_info->iaddr.map);
+ map__zput(he->mem_info->daddr.map);
+ zfree(&he->mem_info);
+ }
+
zfree(&he->stat_acc);
free_srcline(he->srcline);
+ if (he->srcfile && he->srcfile[0])
+ free(he->srcfile);
free_callchain(he->callchain);
free(he);
}
@@ -1096,13 +1109,14 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h)
static void __hists__insert_output_entry(struct rb_root *entries,
struct hist_entry *he,
- u64 min_callchain_hits)
+ u64 min_callchain_hits,
+ bool use_callchain)
{
struct rb_node **p = &entries->rb_node;
struct rb_node *parent = NULL;
struct hist_entry *iter;
- if (symbol_conf.use_callchain)
+ if (use_callchain)
callchain_param.sort(&he->sorted_chain, he->callchain,
min_callchain_hits, &callchain_param);
@@ -1126,6 +1140,13 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
struct rb_node *next;
struct hist_entry *n;
u64 min_callchain_hits;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
+ bool use_callchain;
+
+ if (evsel && !symbol_conf.show_ref_callgraph)
+ use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
+ else
+ use_callchain = symbol_conf.use_callchain;
min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100);
@@ -1144,7 +1165,7 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
n = rb_entry(next, struct hist_entry, rb_node_in);
next = rb_next(&n->rb_node_in);
- __hists__insert_output_entry(&hists->entries, n, min_callchain_hits);
+ __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain);
hists__inc_stats(hists, n);
if (!n->filtered)
@@ -1163,7 +1184,7 @@ static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h
return;
/* force fold unfiltered entry for simplicity */
- h->ms.unfolded = false;
+ h->unfolded = false;
h->row_offset = 0;
h->nr_rows = 0;
@@ -1411,6 +1432,39 @@ int hists__link(struct hists *leader, struct hists *other)
return 0;
}
+void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
+ struct perf_sample *sample, bool nonany_branch_mode)
+{
+ struct branch_info *bi;
+
+ /* If we have branch cycles always annotate them. */
+ if (bs && bs->nr && bs->entries[0].flags.cycles) {
+ int i;
+
+ bi = sample__resolve_bstack(sample, al);
+ if (bi) {
+ struct addr_map_symbol *prev = NULL;
+
+ /*
+ * Ignore errors, still want to process the
+ * other entries.
+ *
+ * For non standard branch modes always
+ * force no IPC (prev == NULL)
+ *
+ * Note that perf stores branches reversed from
+ * program order!
+ */
+ for (i = bs->nr - 1; i >= 0; i--) {
+ addr_map_symbol__account_cycles(&bi[i].from,
+ nonany_branch_mode ? NULL : prev,
+ bi[i].flags.cycles);
+ prev = &bi[i].to;
+ }
+ free(bi);
+ }
+ }
+}
size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp)
{
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 9f31b89..de6d58e 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -30,6 +30,7 @@ enum hist_column {
HISTC_PARENT,
HISTC_CPU,
HISTC_SRCLINE,
+ HISTC_SRCFILE,
HISTC_MISPREDICT,
HISTC_IN_TX,
HISTC_ABORT,
@@ -47,6 +48,7 @@ enum hist_column {
HISTC_MEM_SNOOP,
HISTC_MEM_DCACHELINE,
HISTC_TRANSACTION,
+ HISTC_CYCLES,
HISTC_NR_COLS, /* Last entry */
};
@@ -111,7 +113,6 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
u64 weight, u64 transaction,
bool sample_self);
int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
- struct perf_evsel *evsel, struct perf_sample *sample,
int max_stack_depth, void *arg);
int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
@@ -312,7 +313,7 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
struct hist_browser_timer *hbt,
float min_pcnt,
- struct perf_session_env *env);
+ struct perf_env *env);
int script_browse(const char *script_opt);
#else
static inline
@@ -320,7 +321,7 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
const char *help __maybe_unused,
struct hist_browser_timer *hbt __maybe_unused,
float min_pcnt __maybe_unused,
- struct perf_session_env *env __maybe_unused)
+ struct perf_env *env __maybe_unused)
{
return 0;
}
@@ -350,6 +351,9 @@ static inline int script_browse(const char *script_opt __maybe_unused)
unsigned int hists__sort_list_width(struct hists *hists);
+void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
+ struct perf_sample *sample, bool nonany_branch_mode);
+
struct option;
int parse_filter_percentage(const struct option *opt __maybe_unused,
const char *arg, int unset __maybe_unused);
diff --git a/tools/perf/util/include/linux/poison.h b/tools/perf/util/include/linux/poison.h
deleted file mode 100644
index fef6dbc..0000000
--- a/tools/perf/util/include/linux/poison.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../../include/linux/poison.h"
diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h
deleted file mode 100644
index 2a030c5..0000000
--- a/tools/perf/util/include/linux/rbtree.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#include <stdbool.h>
-#include "../../../../include/linux/rbtree.h"
diff --git a/tools/perf/util/include/linux/rbtree_augmented.h b/tools/perf/util/include/linux/rbtree_augmented.h
deleted file mode 100644
index 9d6fcdf..0000000
--- a/tools/perf/util/include/linux/rbtree_augmented.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#include <stdbool.h>
-#include "../../../../include/linux/rbtree_augmented.h"
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
new file mode 100644
index 0000000..ea76862
--- /dev/null
+++ b/tools/perf/util/intel-bts.c
@@ -0,0 +1,933 @@
+/*
+ * intel-bts.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <endian.h>
+#include <byteswap.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "cpumap.h"
+#include "color.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "machine.h"
+#include "session.h"
+#include "util.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "debug.h"
+#include "tsc.h"
+#include "auxtrace.h"
+#include "intel-pt-decoder/intel-pt-insn-decoder.h"
+#include "intel-bts.h"
+
+#define MAX_TIMESTAMP (~0ULL)
+
+#define INTEL_BTS_ERR_NOINSN 5
+#define INTEL_BTS_ERR_LOST 9
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define le64_to_cpu bswap_64
+#else
+#define le64_to_cpu
+#endif
+
+struct intel_bts {
+ struct auxtrace auxtrace;
+ struct auxtrace_queues queues;
+ struct auxtrace_heap heap;
+ u32 auxtrace_type;
+ struct perf_session *session;
+ struct machine *machine;
+ bool sampling_mode;
+ bool snapshot_mode;
+ bool data_queued;
+ u32 pmu_type;
+ struct perf_tsc_conversion tc;
+ bool cap_user_time_zero;
+ struct itrace_synth_opts synth_opts;
+ bool sample_branches;
+ u32 branches_filter;
+ u64 branches_sample_type;
+ u64 branches_id;
+ size_t branches_event_size;
+ bool synth_needs_swap;
+};
+
+struct intel_bts_queue {
+ struct intel_bts *bts;
+ unsigned int queue_nr;
+ struct auxtrace_buffer *buffer;
+ bool on_heap;
+ bool done;
+ pid_t pid;
+ pid_t tid;
+ int cpu;
+ u64 time;
+ struct intel_pt_insn intel_pt_insn;
+ u32 sample_flags;
+};
+
+struct branch {
+ u64 from;
+ u64 to;
+ u64 misc;
+};
+
+static void intel_bts_dump(struct intel_bts *bts __maybe_unused,
+ unsigned char *buf, size_t len)
+{
+ struct branch *branch;
+ size_t i, pos = 0, br_sz = sizeof(struct branch), sz;
+ const char *color = PERF_COLOR_BLUE;
+
+ color_fprintf(stdout, color,
+ ". ... Intel BTS data: size %zu bytes\n",
+ len);
+
+ while (len) {
+ if (len >= br_sz)
+ sz = br_sz;
+ else
+ sz = len;
+ printf(".");
+ color_fprintf(stdout, color, " %08x: ", pos);
+ for (i = 0; i < sz; i++)
+ color_fprintf(stdout, color, " %02x", buf[i]);
+ for (; i < br_sz; i++)
+ color_fprintf(stdout, color, " ");
+ if (len >= br_sz) {
+ branch = (struct branch *)buf;
+ color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n",
+ le64_to_cpu(branch->from),
+ le64_to_cpu(branch->to),
+ le64_to_cpu(branch->misc) & 0x10 ?
+ "pred" : "miss");
+ } else {
+ color_fprintf(stdout, color, " Bad record!\n");
+ }
+ pos += sz;
+ buf += sz;
+ len -= sz;
+ }
+}
+
+static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf,
+ size_t len)
+{
+ printf(".\n");
+ intel_bts_dump(bts, buf, len);
+}
+
+static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
+{
+ union perf_event event;
+ int err;
+
+ auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+ INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
+ sample->tid, 0, "Lost trace data");
+
+ err = perf_session__deliver_synth_event(bts->session, &event, NULL);
+ if (err)
+ pr_err("Intel BTS: failed to deliver error event, error %d\n",
+ err);
+
+ return err;
+}
+
+static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts,
+ unsigned int queue_nr)
+{
+ struct intel_bts_queue *btsq;
+
+ btsq = zalloc(sizeof(struct intel_bts_queue));
+ if (!btsq)
+ return NULL;
+
+ btsq->bts = bts;
+ btsq->queue_nr = queue_nr;
+ btsq->pid = -1;
+ btsq->tid = -1;
+ btsq->cpu = -1;
+
+ return btsq;
+}
+
+static int intel_bts_setup_queue(struct intel_bts *bts,
+ struct auxtrace_queue *queue,
+ unsigned int queue_nr)
+{
+ struct intel_bts_queue *btsq = queue->priv;
+
+ if (list_empty(&queue->head))
+ return 0;
+
+ if (!btsq) {
+ btsq = intel_bts_alloc_queue(bts, queue_nr);
+ if (!btsq)
+ return -ENOMEM;
+ queue->priv = btsq;
+
+ if (queue->cpu != -1)
+ btsq->cpu = queue->cpu;
+ btsq->tid = queue->tid;
+ }
+
+ if (bts->sampling_mode)
+ return 0;
+
+ if (!btsq->on_heap && !btsq->buffer) {
+ int ret;
+
+ btsq->buffer = auxtrace_buffer__next(queue, NULL);
+ if (!btsq->buffer)
+ return 0;
+
+ ret = auxtrace_heap__add(&bts->heap, queue_nr,
+ btsq->buffer->reference);
+ if (ret)
+ return ret;
+ btsq->on_heap = true;
+ }
+
+ return 0;
+}
+
+static int intel_bts_setup_queues(struct intel_bts *bts)
+{
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < bts->queues.nr_queues; i++) {
+ ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i],
+ i);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static inline int intel_bts_update_queues(struct intel_bts *bts)
+{
+ if (bts->queues.new_data) {
+ bts->queues.new_data = false;
+ return intel_bts_setup_queues(bts);
+ }
+ return 0;
+}
+
+static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a,
+ unsigned char *buf_b, size_t len_b)
+{
+ size_t offs, len;
+
+ if (len_a > len_b)
+ offs = len_a - len_b;
+ else
+ offs = 0;
+
+ for (; offs < len_a; offs += sizeof(struct branch)) {
+ len = len_a - offs;
+ if (!memcmp(buf_a + offs, buf_b, len))
+ return buf_b + len;
+ }
+
+ return buf_b;
+}
+
+static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
+ struct auxtrace_buffer *b)
+{
+ struct auxtrace_buffer *a;
+ void *start;
+
+ if (b->list.prev == &queue->head)
+ return 0;
+ a = list_entry(b->list.prev, struct auxtrace_buffer, list);
+ start = intel_bts_find_overlap(a->data, a->size, b->data, b->size);
+ if (!start)
+ return -EINVAL;
+ b->use_size = b->data + b->size - start;
+ b->use_data = start;
+ return 0;
+}
+
+static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
+ struct branch *branch)
+{
+ int ret;
+ struct intel_bts *bts = btsq->bts;
+ union perf_event event;
+ struct perf_sample sample = { .ip = 0, };
+
+ event.sample.header.type = PERF_RECORD_SAMPLE;
+ event.sample.header.misc = PERF_RECORD_MISC_USER;
+ event.sample.header.size = sizeof(struct perf_event_header);
+
+ sample.ip = le64_to_cpu(branch->from);
+ sample.pid = btsq->pid;
+ sample.tid = btsq->tid;
+ sample.addr = le64_to_cpu(branch->to);
+ sample.id = btsq->bts->branches_id;
+ sample.stream_id = btsq->bts->branches_id;
+ sample.period = 1;
+ sample.cpu = btsq->cpu;
+ sample.flags = btsq->sample_flags;
+ sample.insn_len = btsq->intel_pt_insn.length;
+
+ if (bts->synth_opts.inject) {
+ event.sample.header.size = bts->branches_event_size;
+ ret = perf_event__synthesize_sample(&event,
+ bts->branches_sample_type,
+ 0, &sample,
+ bts->synth_needs_swap);
+ if (ret)
+ return ret;
+ }
+
+ ret = perf_session__deliver_synth_event(bts->session, &event, &sample);
+ if (ret)
+ pr_err("Intel BTS: failed to deliver branch event, error %d\n",
+ ret);
+
+ return ret;
+}
+
+static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
+{
+ struct machine *machine = btsq->bts->machine;
+ struct thread *thread;
+ struct addr_location al;
+ unsigned char buf[1024];
+ size_t bufsz;
+ ssize_t len;
+ int x86_64;
+ uint8_t cpumode;
+ int err = -1;
+
+ bufsz = intel_pt_insn_max_size();
+
+ if (machine__kernel_ip(machine, ip))
+ cpumode = PERF_RECORD_MISC_KERNEL;
+ else
+ cpumode = PERF_RECORD_MISC_USER;
+
+ thread = machine__find_thread(machine, -1, btsq->tid);
+ if (!thread)
+ return -1;
+
+ thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
+ if (!al.map || !al.map->dso)
+ goto out_put;
+
+ len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, bufsz);
+ if (len <= 0)
+ goto out_put;
+
+ /* Load maps to ensure dso->is_64_bit has been updated */
+ map__load(al.map, machine->symbol_filter);
+
+ x86_64 = al.map->dso->is_64_bit;
+
+ if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn))
+ goto out_put;
+
+ err = 0;
+out_put:
+ thread__put(thread);
+ return err;
+}
+
+static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
+ pid_t tid, u64 ip)
+{
+ union perf_event event;
+ int err;
+
+ auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+ INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
+ "Failed to get instruction");
+
+ err = perf_session__deliver_synth_event(bts->session, &event, NULL);
+ if (err)
+ pr_err("Intel BTS: failed to deliver error event, error %d\n",
+ err);
+
+ return err;
+}
+
+static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
+ struct branch *branch)
+{
+ int err;
+
+ if (!branch->from) {
+ if (branch->to)
+ btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_BEGIN;
+ else
+ btsq->sample_flags = 0;
+ btsq->intel_pt_insn.length = 0;
+ } else if (!branch->to) {
+ btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_END;
+ btsq->intel_pt_insn.length = 0;
+ } else {
+ err = intel_bts_get_next_insn(btsq, branch->from);
+ if (err) {
+ btsq->sample_flags = 0;
+ btsq->intel_pt_insn.length = 0;
+ if (!btsq->bts->synth_opts.errors)
+ return 0;
+ err = intel_bts_synth_error(btsq->bts, btsq->cpu,
+ btsq->pid, btsq->tid,
+ branch->from);
+ return err;
+ }
+ btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op);
+ /* Check for an async branch into the kernel */
+ if (!machine__kernel_ip(btsq->bts->machine, branch->from) &&
+ machine__kernel_ip(btsq->bts->machine, branch->to) &&
+ btsq->sample_flags != (PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_SYSCALLRET))
+ btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_INTERRUPT;
+ }
+
+ return 0;
+}
+
+static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
+ struct auxtrace_buffer *buffer)
+{
+ struct branch *branch;
+ size_t sz, bsz = sizeof(struct branch);
+ u32 filter = btsq->bts->branches_filter;
+ int err = 0;
+
+ if (buffer->use_data) {
+ sz = buffer->use_size;
+ branch = buffer->use_data;
+ } else {
+ sz = buffer->size;
+ branch = buffer->data;
+ }
+
+ if (!btsq->bts->sample_branches)
+ return 0;
+
+ for (; sz > bsz; branch += 1, sz -= bsz) {
+ if (!branch->from && !branch->to)
+ continue;
+ intel_bts_get_branch_type(btsq, branch);
+ if (filter && !(filter & btsq->sample_flags))
+ continue;
+ err = intel_bts_synth_branch_sample(btsq, branch);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
+{
+ struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer;
+ struct auxtrace_queue *queue;
+ struct thread *thread;
+ int err;
+
+ if (btsq->done)
+ return 1;
+
+ if (btsq->pid == -1) {
+ thread = machine__find_thread(btsq->bts->machine, -1,
+ btsq->tid);
+ if (thread)
+ btsq->pid = thread->pid_;
+ } else {
+ thread = machine__findnew_thread(btsq->bts->machine, btsq->pid,
+ btsq->tid);
+ }
+
+ queue = &btsq->bts->queues.queue_array[btsq->queue_nr];
+
+ if (!buffer)
+ buffer = auxtrace_buffer__next(queue, NULL);
+
+ if (!buffer) {
+ if (!btsq->bts->sampling_mode)
+ btsq->done = 1;
+ err = 1;
+ goto out_put;
+ }
+
+ /* Currently there is no support for split buffers */
+ if (buffer->consecutive) {
+ err = -EINVAL;
+ goto out_put;
+ }
+
+ if (!buffer->data) {
+ int fd = perf_data_file__fd(btsq->bts->session->file);
+
+ buffer->data = auxtrace_buffer__get_data(buffer, fd);
+ if (!buffer->data) {
+ err = -ENOMEM;
+ goto out_put;
+ }
+ }
+
+ if (btsq->bts->snapshot_mode && !buffer->consecutive &&
+ intel_bts_do_fix_overlap(queue, buffer)) {
+ err = -ENOMEM;
+ goto out_put;
+ }
+
+ if (!btsq->bts->synth_opts.callchain && thread &&
+ (!old_buffer || btsq->bts->sampling_mode ||
+ (btsq->bts->snapshot_mode && !buffer->consecutive)))
+ thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
+
+ err = intel_bts_process_buffer(btsq, buffer);
+
+ auxtrace_buffer__drop_data(buffer);
+
+ btsq->buffer = auxtrace_buffer__next(queue, buffer);
+ if (btsq->buffer) {
+ if (timestamp)
+ *timestamp = btsq->buffer->reference;
+ } else {
+ if (!btsq->bts->sampling_mode)
+ btsq->done = 1;
+ }
+out_put:
+ thread__put(thread);
+ return err;
+}
+
+static int intel_bts_flush_queue(struct intel_bts_queue *btsq)
+{
+ u64 ts = 0;
+ int ret;
+
+ while (1) {
+ ret = intel_bts_process_queue(btsq, &ts);
+ if (ret < 0)
+ return ret;
+ if (ret)
+ break;
+ }
+ return 0;
+}
+
+static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid)
+{
+ struct auxtrace_queues *queues = &bts->queues;
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ struct auxtrace_queue *queue = &bts->queues.queue_array[i];
+ struct intel_bts_queue *btsq = queue->priv;
+
+ if (btsq && btsq->tid == tid)
+ return intel_bts_flush_queue(btsq);
+ }
+ return 0;
+}
+
+static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
+{
+ while (1) {
+ unsigned int queue_nr;
+ struct auxtrace_queue *queue;
+ struct intel_bts_queue *btsq;
+ u64 ts = 0;
+ int ret;
+
+ if (!bts->heap.heap_cnt)
+ return 0;
+
+ if (bts->heap.heap_array[0].ordinal > timestamp)
+ return 0;
+
+ queue_nr = bts->heap.heap_array[0].queue_nr;
+ queue = &bts->queues.queue_array[queue_nr];
+ btsq = queue->priv;
+
+ auxtrace_heap__pop(&bts->heap);
+
+ ret = intel_bts_process_queue(btsq, &ts);
+ if (ret < 0) {
+ auxtrace_heap__add(&bts->heap, queue_nr, ts);
+ return ret;
+ }
+
+ if (!ret) {
+ ret = auxtrace_heap__add(&bts->heap, queue_nr, ts);
+ if (ret < 0)
+ return ret;
+ } else {
+ btsq->on_heap = false;
+ }
+ }
+
+ return 0;
+}
+
+static int intel_bts_process_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+ u64 timestamp;
+ int err;
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events) {
+ pr_err("Intel BTS requires ordered events\n");
+ return -EINVAL;
+ }
+
+ if (sample->time && sample->time != (u64)-1)
+ timestamp = perf_time_to_tsc(sample->time, &bts->tc);
+ else
+ timestamp = 0;
+
+ err = intel_bts_update_queues(bts);
+ if (err)
+ return err;
+
+ err = intel_bts_process_queues(bts, timestamp);
+ if (err)
+ return err;
+ if (event->header.type == PERF_RECORD_EXIT) {
+ err = intel_bts_process_tid_exit(bts, event->comm.tid);
+ if (err)
+ return err;
+ }
+
+ if (event->header.type == PERF_RECORD_AUX &&
+ (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
+ bts->synth_opts.errors)
+ err = intel_bts_lost(bts, sample);
+
+ return err;
+}
+
+static int intel_bts_process_auxtrace_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+
+ if (bts->sampling_mode)
+ return 0;
+
+ if (!bts->data_queued) {
+ struct auxtrace_buffer *buffer;
+ off_t data_offset;
+ int fd = perf_data_file__fd(session->file);
+ int err;
+
+ if (perf_data_file__is_pipe(session->file)) {
+ data_offset = 0;
+ } else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
+
+ err = auxtrace_queues__add_event(&bts->queues, session, event,
+ data_offset, &buffer);
+ if (err)
+ return err;
+
+ /* Dump here now we have copied a piped trace out of the pipe */
+ if (dump_trace) {
+ if (auxtrace_buffer__get_data(buffer, fd)) {
+ intel_bts_dump_event(bts, buffer->data,
+ buffer->size);
+ auxtrace_buffer__put_data(buffer);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int intel_bts_flush(struct perf_session *session __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+ int ret;
+
+ if (dump_trace || bts->sampling_mode)
+ return 0;
+
+ if (!tool->ordered_events)
+ return -EINVAL;
+
+ ret = intel_bts_update_queues(bts);
+ if (ret < 0)
+ return ret;
+
+ return intel_bts_process_queues(bts, MAX_TIMESTAMP);
+}
+
+static void intel_bts_free_queue(void *priv)
+{
+ struct intel_bts_queue *btsq = priv;
+
+ if (!btsq)
+ return;
+ free(btsq);
+}
+
+static void intel_bts_free_events(struct perf_session *session)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+ struct auxtrace_queues *queues = &bts->queues;
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ intel_bts_free_queue(queues->queue_array[i].priv);
+ queues->queue_array[i].priv = NULL;
+ }
+ auxtrace_queues__free(queues);
+}
+
+static void intel_bts_free(struct perf_session *session)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+
+ auxtrace_heap__free(&bts->heap);
+ intel_bts_free_events(session);
+ session->auxtrace = NULL;
+ free(bts);
+}
+
+struct intel_bts_synth {
+ struct perf_tool dummy_tool;
+ struct perf_session *session;
+};
+
+static int intel_bts_event_synth(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct intel_bts_synth *intel_bts_synth =
+ container_of(tool, struct intel_bts_synth, dummy_tool);
+
+ return perf_session__deliver_synth_event(intel_bts_synth->session,
+ event, NULL);
+}
+
+static int intel_bts_synth_event(struct perf_session *session,
+ struct perf_event_attr *attr, u64 id)
+{
+ struct intel_bts_synth intel_bts_synth;
+
+ memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
+ intel_bts_synth.session = session;
+
+ return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
+ &id, intel_bts_event_synth);
+}
+
+static int intel_bts_synth_events(struct intel_bts *bts,
+ struct perf_session *session)
+{
+ struct perf_evlist *evlist = session->evlist;
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr;
+ bool found = false;
+ u64 id;
+ int err;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->attr.type == bts->pmu_type && evsel->ids) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ pr_debug("There are no selected events with Intel BTS data\n");
+ return 0;
+ }
+
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+ attr.size = sizeof(struct perf_event_attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
+ attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+ PERF_SAMPLE_PERIOD;
+ attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+ attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
+ attr.exclude_user = evsel->attr.exclude_user;
+ attr.exclude_kernel = evsel->attr.exclude_kernel;
+ attr.exclude_hv = evsel->attr.exclude_hv;
+ attr.exclude_host = evsel->attr.exclude_host;
+ attr.exclude_guest = evsel->attr.exclude_guest;
+ attr.sample_id_all = evsel->attr.sample_id_all;
+ attr.read_format = evsel->attr.read_format;
+
+ id = evsel->id[0] + 1000000000;
+ if (!id)
+ id = 1;
+
+ if (bts->synth_opts.branches) {
+ attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+ attr.sample_period = 1;
+ attr.sample_type |= PERF_SAMPLE_ADDR;
+ pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+ id, (u64)attr.sample_type);
+ err = intel_bts_synth_event(session, &attr, id);
+ if (err) {
+ pr_err("%s: failed to synthesize 'branches' event type\n",
+ __func__);
+ return err;
+ }
+ bts->sample_branches = true;
+ bts->branches_sample_type = attr.sample_type;
+ bts->branches_id = id;
+ /*
+ * We only use sample types from PERF_SAMPLE_MASK so we can use
+ * __perf_evsel__sample_size() here.
+ */
+ bts->branches_event_size = sizeof(struct sample_event) +
+ __perf_evsel__sample_size(attr.sample_type);
+ }
+
+ bts->synth_needs_swap = evsel->needs_swap;
+
+ return 0;
+}
+
+static const char * const intel_bts_info_fmts[] = {
+ [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n",
+ [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
+ [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
+ [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n",
+ [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
+ [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
+};
+
+static void intel_bts_print_info(u64 *arr, int start, int finish)
+{
+ int i;
+
+ if (!dump_trace)
+ return;
+
+ for (i = start; i <= finish; i++)
+ fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
+}
+
+u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE];
+
+int intel_bts_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
+ size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE;
+ struct intel_bts *bts;
+ int err;
+
+ if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
+ min_sz)
+ return -EINVAL;
+
+ bts = zalloc(sizeof(struct intel_bts));
+ if (!bts)
+ return -ENOMEM;
+
+ err = auxtrace_queues__init(&bts->queues);
+ if (err)
+ goto err_free;
+
+ bts->session = session;
+ bts->machine = &session->machines.host; /* No kvm support */
+ bts->auxtrace_type = auxtrace_info->type;
+ bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE];
+ bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT];
+ bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT];
+ bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO];
+ bts->cap_user_time_zero =
+ auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO];
+ bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE];
+
+ bts->sampling_mode = false;
+
+ bts->auxtrace.process_event = intel_bts_process_event;
+ bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event;
+ bts->auxtrace.flush_events = intel_bts_flush;
+ bts->auxtrace.free_events = intel_bts_free_events;
+ bts->auxtrace.free = intel_bts_free;
+ session->auxtrace = &bts->auxtrace;
+
+ intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
+ INTEL_BTS_SNAPSHOT_MODE);
+
+ if (dump_trace)
+ return 0;
+
+ if (session->itrace_synth_opts && session->itrace_synth_opts->set)
+ bts->synth_opts = *session->itrace_synth_opts;
+ else
+ itrace_synth_opts__set_default(&bts->synth_opts);
+
+ if (bts->synth_opts.calls)
+ bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_TRACE_END;
+ if (bts->synth_opts.returns)
+ bts->branches_filter |= PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_TRACE_BEGIN;
+
+ err = intel_bts_synth_events(bts, session);
+ if (err)
+ goto err_free_queues;
+
+ err = auxtrace_queues__process_index(&bts->queues, session);
+ if (err)
+ goto err_free_queues;
+
+ if (bts->queues.populated)
+ bts->data_queued = true;
+
+ return 0;
+
+err_free_queues:
+ auxtrace_queues__free(&bts->queues);
+ session->auxtrace = NULL;
+err_free:
+ free(bts);
+ return err;
+}
diff --git a/tools/perf/util/intel-bts.h b/tools/perf/util/intel-bts.h
new file mode 100644
index 0000000..ca65e21
--- /dev/null
+++ b/tools/perf/util/intel-bts.h
@@ -0,0 +1,43 @@
+/*
+ * intel-bts.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__PERF_INTEL_BTS_H__
+#define INCLUDE__PERF_INTEL_BTS_H__
+
+#define INTEL_BTS_PMU_NAME "intel_bts"
+
+enum {
+ INTEL_BTS_PMU_TYPE,
+ INTEL_BTS_TIME_SHIFT,
+ INTEL_BTS_TIME_MULT,
+ INTEL_BTS_TIME_ZERO,
+ INTEL_BTS_CAP_USER_TIME_ZERO,
+ INTEL_BTS_SNAPSHOT_MODE,
+ INTEL_BTS_AUXTRACE_PRIV_MAX,
+};
+
+#define INTEL_BTS_AUXTRACE_PRIV_SIZE (INTEL_BTS_AUXTRACE_PRIV_MAX * sizeof(u64))
+
+struct auxtrace_record;
+struct perf_tool;
+union perf_event;
+struct perf_session;
+
+struct auxtrace_record *intel_bts_recording_init(int *err);
+
+int intel_bts_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
new file mode 100644
index 0000000..2386322
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/Build
@@ -0,0 +1,12 @@
+libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
+
+inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk
+inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt
+
+$(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
+ $(call rule_mkdir)
+ @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
+
+$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
+
+CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder -Wno-override-init
diff --git a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
new file mode 100644
index 0000000..51756734
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
@@ -0,0 +1,386 @@
+#!/bin/awk -f
+# gen-insn-attr-x86.awk: Instruction attribute table generator
+# Written by Masami Hiramatsu <mhiramat@redhat.com>
+#
+# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
+
+# Awk implementation sanity check
+function check_awk_implement() {
+ if (sprintf("%x", 0) != "0")
+ return "Your awk has a printf-format problem."
+ return ""
+}
+
+# Clear working vars
+function clear_vars() {
+ delete table
+ delete lptable2
+ delete lptable1
+ delete lptable3
+ eid = -1 # escape id
+ gid = -1 # group id
+ aid = -1 # AVX id
+ tname = ""
+}
+
+BEGIN {
+ # Implementation error checking
+ awkchecked = check_awk_implement()
+ if (awkchecked != "") {
+ print "Error: " awkchecked > "/dev/stderr"
+ print "Please try to use gawk." > "/dev/stderr"
+ exit 1
+ }
+
+ # Setup generating tables
+ print "/* x86 opcode map generated from x86-opcode-map.txt */"
+ print "/* Do not change this code. */\n"
+ ggid = 1
+ geid = 1
+ gaid = 0
+ delete etable
+ delete gtable
+ delete atable
+
+ opnd_expr = "^[A-Za-z/]"
+ ext_expr = "^\\("
+ sep_expr = "^\\|$"
+ group_expr = "^Grp[0-9A-Za-z]+"
+
+ imm_expr = "^[IJAOL][a-z]"
+ imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+ imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+ imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
+ imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
+ imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
+ imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
+ imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
+ imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
+ imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
+ imm_flag["Ob"] = "INAT_MOFFSET"
+ imm_flag["Ov"] = "INAT_MOFFSET"
+ imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+
+ modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
+ force64_expr = "\\([df]64\\)"
+ rex_expr = "^REX(\\.[XRWB]+)*"
+ fpu_expr = "^ESC" # TODO
+
+ lprefix1_expr = "\\((66|!F3)\\)"
+ lprefix2_expr = "\\(F3\\)"
+ lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)"
+ lprefix_expr = "\\((66|F2|F3)\\)"
+ max_lprefix = 4
+
+ # All opcodes starting with lower-case 'v' or with (v1) superscript
+ # accepts VEX prefix
+ vexok_opcode_expr = "^v.*"
+ vexok_expr = "\\(v1\\)"
+ # All opcodes with (v) superscript supports *only* VEX prefix
+ vexonly_expr = "\\(v\\)"
+
+ prefix_expr = "\\(Prefix\\)"
+ prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
+ prefix_num["REPNE"] = "INAT_PFX_REPNE"
+ prefix_num["REP/REPE"] = "INAT_PFX_REPE"
+ prefix_num["XACQUIRE"] = "INAT_PFX_REPNE"
+ prefix_num["XRELEASE"] = "INAT_PFX_REPE"
+ prefix_num["LOCK"] = "INAT_PFX_LOCK"
+ prefix_num["SEG=CS"] = "INAT_PFX_CS"
+ prefix_num["SEG=DS"] = "INAT_PFX_DS"
+ prefix_num["SEG=ES"] = "INAT_PFX_ES"
+ prefix_num["SEG=FS"] = "INAT_PFX_FS"
+ prefix_num["SEG=GS"] = "INAT_PFX_GS"
+ prefix_num["SEG=SS"] = "INAT_PFX_SS"
+ prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
+ prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
+ prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
+
+ clear_vars()
+}
+
+function semantic_error(msg) {
+ print "Semantic error at " NR ": " msg > "/dev/stderr"
+ exit 1
+}
+
+function debug(msg) {
+ print "DEBUG: " msg
+}
+
+function array_size(arr, i,c) {
+ c = 0
+ for (i in arr)
+ c++
+ return c
+}
+
+/^Table:/ {
+ print "/* " $0 " */"
+ if (tname != "")
+ semantic_error("Hit Table: before EndTable:.");
+}
+
+/^Referrer:/ {
+ if (NF != 1) {
+ # escape opcode table
+ ref = ""
+ for (i = 2; i <= NF; i++)
+ ref = ref $i
+ eid = escape[ref]
+ tname = sprintf("inat_escape_table_%d", eid)
+ }
+}
+
+/^AVXcode:/ {
+ if (NF != 1) {
+ # AVX/escape opcode table
+ aid = $2
+ if (gaid <= aid)
+ gaid = aid + 1
+ if (tname == "") # AVX only opcode table
+ tname = sprintf("inat_avx_table_%d", $2)
+ }
+ if (aid == -1 && eid == -1) # primary opcode table
+ tname = "inat_primary_table"
+}
+
+/^GrpTable:/ {
+ print "/* " $0 " */"
+ if (!($2 in group))
+ semantic_error("No group: " $2 )
+ gid = group[$2]
+ tname = "inat_group_table_" gid
+}
+
+function print_table(tbl,name,fmt,n)
+{
+ print "const insn_attr_t " name " = {"
+ for (i = 0; i < n; i++) {
+ id = sprintf(fmt, i)
+ if (tbl[id])
+ print " [" id "] = " tbl[id] ","
+ }
+ print "};"
+}
+
+/^EndTable/ {
+ if (gid != -1) {
+ # print group tables
+ if (array_size(table) != 0) {
+ print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,0] = tname
+ }
+ if (array_size(lptable1) != 0) {
+ print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,1] = tname "_1"
+ }
+ if (array_size(lptable2) != 0) {
+ print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,2] = tname "_2"
+ }
+ if (array_size(lptable3) != 0) {
+ print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,3] = tname "_3"
+ }
+ } else {
+ # print primary/escaped tables
+ if (array_size(table) != 0) {
+ print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,0] = tname
+ if (aid >= 0)
+ atable[aid,0] = tname
+ }
+ if (array_size(lptable1) != 0) {
+ print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,1] = tname "_1"
+ if (aid >= 0)
+ atable[aid,1] = tname "_1"
+ }
+ if (array_size(lptable2) != 0) {
+ print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,2] = tname "_2"
+ if (aid >= 0)
+ atable[aid,2] = tname "_2"
+ }
+ if (array_size(lptable3) != 0) {
+ print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,3] = tname "_3"
+ if (aid >= 0)
+ atable[aid,3] = tname "_3"
+ }
+ }
+ print ""
+ clear_vars()
+}
+
+function add_flags(old,new) {
+ if (old && new)
+ return old " | " new
+ else if (old)
+ return old
+ else
+ return new
+}
+
+# convert operands to flags.
+function convert_operands(count,opnd, i,j,imm,mod)
+{
+ imm = null
+ mod = null
+ for (j = 1; j <= count; j++) {
+ i = opnd[j]
+ if (match(i, imm_expr) == 1) {
+ if (!imm_flag[i])
+ semantic_error("Unknown imm opnd: " i)
+ if (imm) {
+ if (i != "Ib")
+ semantic_error("Second IMM error")
+ imm = add_flags(imm, "INAT_SCNDIMM")
+ } else
+ imm = imm_flag[i]
+ } else if (match(i, modrm_expr))
+ mod = "INAT_MODRM"
+ }
+ return add_flags(imm, mod)
+}
+
+/^[0-9a-f]+\:/ {
+ if (NR == 1)
+ next
+ # get index
+ idx = "0x" substr($1, 1, index($1,":") - 1)
+ if (idx in table)
+ semantic_error("Redefine " idx " in " tname)
+
+ # check if escaped opcode
+ if ("escape" == $2) {
+ if ($3 != "#")
+ semantic_error("No escaped name")
+ ref = ""
+ for (i = 4; i <= NF; i++)
+ ref = ref $i
+ if (ref in escape)
+ semantic_error("Redefine escape (" ref ")")
+ escape[ref] = geid
+ geid++
+ table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
+ next
+ }
+
+ variant = null
+ # converts
+ i = 2
+ while (i <= NF) {
+ opcode = $(i++)
+ delete opnds
+ ext = null
+ flags = null
+ opnd = null
+ # parse one opcode
+ if (match($i, opnd_expr)) {
+ opnd = $i
+ count = split($(i++), opnds, ",")
+ flags = convert_operands(count, opnds)
+ }
+ if (match($i, ext_expr))
+ ext = $(i++)
+ if (match($i, sep_expr))
+ i++
+ else if (i < NF)
+ semantic_error($i " is not a separator")
+
+ # check if group opcode
+ if (match(opcode, group_expr)) {
+ if (!(opcode in group)) {
+ group[opcode] = ggid
+ ggid++
+ }
+ flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
+ }
+ # check force(or default) 64bit
+ if (match(ext, force64_expr))
+ flags = add_flags(flags, "INAT_FORCE64")
+
+ # check REX prefix
+ if (match(opcode, rex_expr))
+ flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
+
+ # check coprocessor escape : TODO
+ if (match(opcode, fpu_expr))
+ flags = add_flags(flags, "INAT_MODRM")
+
+ # check VEX codes
+ if (match(ext, vexonly_expr))
+ flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
+ else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
+ flags = add_flags(flags, "INAT_VEXOK")
+
+ # check prefixes
+ if (match(ext, prefix_expr)) {
+ if (!prefix_num[opcode])
+ semantic_error("Unknown prefix: " opcode)
+ flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
+ }
+ if (length(flags) == 0)
+ continue
+ # check if last prefix
+ if (match(ext, lprefix1_expr)) {
+ lptable1[idx] = add_flags(lptable1[idx],flags)
+ variant = "INAT_VARIANT"
+ }
+ if (match(ext, lprefix2_expr)) {
+ lptable2[idx] = add_flags(lptable2[idx],flags)
+ variant = "INAT_VARIANT"
+ }
+ if (match(ext, lprefix3_expr)) {
+ lptable3[idx] = add_flags(lptable3[idx],flags)
+ variant = "INAT_VARIANT"
+ }
+ if (!match(ext, lprefix_expr)){
+ table[idx] = add_flags(table[idx],flags)
+ }
+ }
+ if (variant)
+ table[idx] = add_flags(table[idx],variant)
+}
+
+END {
+ if (awkchecked != "")
+ exit 1
+ # print escape opcode map's array
+ print "/* Escape opcode map array */"
+ print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
+ "[INAT_LSTPFX_MAX + 1] = {"
+ for (i = 0; i < geid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (etable[i,j])
+ print " ["i"]["j"] = "etable[i,j]","
+ print "};\n"
+ # print group opcode map's array
+ print "/* Group opcode map array */"
+ print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1] = {"
+ for (i = 0; i < ggid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (gtable[i,j])
+ print " ["i"]["j"] = "gtable[i,j]","
+ print "};\n"
+ # print AVX opcode map's array
+ print "/* AVX opcode map array */"
+ print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1] = {"
+ for (i = 0; i < gaid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (atable[i,j])
+ print " ["i"]["j"] = "atable[i,j]","
+ print "};"
+}
diff --git a/tools/perf/util/intel-pt-decoder/inat.c b/tools/perf/util/intel-pt-decoder/inat.c
new file mode 100644
index 0000000..906d94a
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/inat.c
@@ -0,0 +1,96 @@
+/*
+ * x86 instruction attribute tables
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include "insn.h"
+
+/* Attribute tables are generated from opcode map */
+#include "inat-tables.c"
+
+/* Attribute search APIs */
+insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
+{
+ return inat_primary_table[opcode];
+}
+
+int inat_get_last_prefix_id(insn_byte_t last_pfx)
+{
+ insn_attr_t lpfx_attr;
+
+ lpfx_attr = inat_get_opcode_attribute(last_pfx);
+ return inat_last_prefix_id(lpfx_attr);
+}
+
+insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id,
+ insn_attr_t esc_attr)
+{
+ const insn_attr_t *table;
+ int n;
+
+ n = inat_escape_id(esc_attr);
+
+ table = inat_escape_tables[n][0];
+ if (!table)
+ return 0;
+ if (inat_has_variant(table[opcode]) && lpfx_id) {
+ table = inat_escape_tables[n][lpfx_id];
+ if (!table)
+ return 0;
+ }
+ return table[opcode];
+}
+
+insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id,
+ insn_attr_t grp_attr)
+{
+ const insn_attr_t *table;
+ int n;
+
+ n = inat_group_id(grp_attr);
+
+ table = inat_group_tables[n][0];
+ if (!table)
+ return inat_group_common_attribute(grp_attr);
+ if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) {
+ table = inat_group_tables[n][lpfx_id];
+ if (!table)
+ return inat_group_common_attribute(grp_attr);
+ }
+ return table[X86_MODRM_REG(modrm)] |
+ inat_group_common_attribute(grp_attr);
+}
+
+insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
+ insn_byte_t vex_p)
+{
+ const insn_attr_t *table;
+ if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
+ return 0;
+ /* At first, this checks the master table */
+ table = inat_avx_tables[vex_m][0];
+ if (!table)
+ return 0;
+ if (!inat_is_group(table[opcode]) && vex_p) {
+ /* If this is not a group, get attribute directly */
+ table = inat_avx_tables[vex_m][vex_p];
+ if (!table)
+ return 0;
+ }
+ return table[opcode];
+}
diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h
new file mode 100644
index 0000000..611645e
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/inat.h
@@ -0,0 +1,221 @@
+#ifndef _ASM_X86_INAT_H
+#define _ASM_X86_INAT_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include "inat_types.h"
+
+/*
+ * Internal bits. Don't use bitmasks directly, because these bits are
+ * unstable. You should use checking functions.
+ */
+
+#define INAT_OPCODE_TABLE_SIZE 256
+#define INAT_GROUP_TABLE_SIZE 8
+
+/* Legacy last prefixes */
+#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */
+#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */
+#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */
+/* Other Legacy prefixes */
+#define INAT_PFX_LOCK 4 /* 0xF0 */
+#define INAT_PFX_CS 5 /* 0x2E */
+#define INAT_PFX_DS 6 /* 0x3E */
+#define INAT_PFX_ES 7 /* 0x26 */
+#define INAT_PFX_FS 8 /* 0x64 */
+#define INAT_PFX_GS 9 /* 0x65 */
+#define INAT_PFX_SS 10 /* 0x36 */
+#define INAT_PFX_ADDRSZ 11 /* 0x67 */
+/* x86-64 REX prefix */
+#define INAT_PFX_REX 12 /* 0x4X */
+/* AVX VEX prefixes */
+#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */
+#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */
+
+#define INAT_LSTPFX_MAX 3
+#define INAT_LGCPFX_MAX 11
+
+/* Immediate size */
+#define INAT_IMM_BYTE 1
+#define INAT_IMM_WORD 2
+#define INAT_IMM_DWORD 3
+#define INAT_IMM_QWORD 4
+#define INAT_IMM_PTR 5
+#define INAT_IMM_VWORD32 6
+#define INAT_IMM_VWORD 7
+
+/* Legacy prefix */
+#define INAT_PFX_OFFS 0
+#define INAT_PFX_BITS 4
+#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1)
+#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS)
+/* Escape opcodes */
+#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS)
+#define INAT_ESC_BITS 2
+#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1)
+#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS)
+/* Group opcodes (1-16) */
+#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS)
+#define INAT_GRP_BITS 5
+#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1)
+#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS)
+/* Immediates */
+#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS)
+#define INAT_IMM_BITS 3
+#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
+/* Flags */
+#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS)
+#define INAT_MODRM (1 << (INAT_FLAG_OFFS))
+#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1))
+#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2))
+#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3))
+#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
+#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
+#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
+/* Attribute making macros for attribute tables */
+#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
+#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
+#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM)
+#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS)
+
+/* Attribute search APIs */
+extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
+extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
+extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
+ int lpfx_id,
+ insn_attr_t esc_attr);
+extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
+ int lpfx_id,
+ insn_attr_t esc_attr);
+extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
+ insn_byte_t vex_m,
+ insn_byte_t vex_pp);
+
+/* Attribute checking functions */
+static inline int inat_is_legacy_prefix(insn_attr_t attr)
+{
+ attr &= INAT_PFX_MASK;
+ return attr && attr <= INAT_LGCPFX_MAX;
+}
+
+static inline int inat_is_address_size_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ;
+}
+
+static inline int inat_is_operand_size_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
+}
+
+static inline int inat_is_rex_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
+}
+
+static inline int inat_last_prefix_id(insn_attr_t attr)
+{
+ if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
+ return 0;
+ else
+ return attr & INAT_PFX_MASK;
+}
+
+static inline int inat_is_vex_prefix(insn_attr_t attr)
+{
+ attr &= INAT_PFX_MASK;
+ return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3;
+}
+
+static inline int inat_is_vex3_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
+}
+
+static inline int inat_is_escape(insn_attr_t attr)
+{
+ return attr & INAT_ESC_MASK;
+}
+
+static inline int inat_escape_id(insn_attr_t attr)
+{
+ return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS;
+}
+
+static inline int inat_is_group(insn_attr_t attr)
+{
+ return attr & INAT_GRP_MASK;
+}
+
+static inline int inat_group_id(insn_attr_t attr)
+{
+ return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS;
+}
+
+static inline int inat_group_common_attribute(insn_attr_t attr)
+{
+ return attr & ~INAT_GRP_MASK;
+}
+
+static inline int inat_has_immediate(insn_attr_t attr)
+{
+ return attr & INAT_IMM_MASK;
+}
+
+static inline int inat_immediate_size(insn_attr_t attr)
+{
+ return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
+}
+
+static inline int inat_has_modrm(insn_attr_t attr)
+{
+ return attr & INAT_MODRM;
+}
+
+static inline int inat_is_force64(insn_attr_t attr)
+{
+ return attr & INAT_FORCE64;
+}
+
+static inline int inat_has_second_immediate(insn_attr_t attr)
+{
+ return attr & INAT_SCNDIMM;
+}
+
+static inline int inat_has_moffset(insn_attr_t attr)
+{
+ return attr & INAT_MOFFSET;
+}
+
+static inline int inat_has_variant(insn_attr_t attr)
+{
+ return attr & INAT_VARIANT;
+}
+
+static inline int inat_accept_vex(insn_attr_t attr)
+{
+ return attr & INAT_VEXOK;
+}
+
+static inline int inat_must_vex(insn_attr_t attr)
+{
+ return attr & INAT_VEXONLY;
+}
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/inat_types.h b/tools/perf/util/intel-pt-decoder/inat_types.h
new file mode 100644
index 0000000..cb3c20c
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/inat_types.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_X86_INAT_TYPES_H
+#define _ASM_X86_INAT_TYPES_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+/* Instruction attributes */
+typedef unsigned int insn_attr_t;
+typedef unsigned char insn_byte_t;
+typedef signed int insn_value_t;
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c
new file mode 100644
index 0000000..47314a6
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/insn.c
@@ -0,0 +1,594 @@
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004, 2009
+ */
+
+#ifdef __KERNEL__
+#include <linux/string.h>
+#else
+#include <string.h>
+#endif
+#include "inat.h"
+#include "insn.h"
+
+/* Verify next sizeof(t) bytes can be on the same instruction */
+#define validate_next(t, insn, n) \
+ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
+
+#define __get_next(t, insn) \
+ ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+
+#define __peek_nbyte_next(t, insn, n) \
+ ({ t r = *(t*)((insn)->next_byte + n); r; })
+
+#define get_next(t, insn) \
+ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
+
+#define peek_nbyte_next(t, insn, n) \
+ ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
+
+#define peek_next(t, insn) peek_nbyte_next(t, insn, 0)
+
+/**
+ * insn_init() - initialize struct insn
+ * @insn: &struct insn to be initialized
+ * @kaddr: address (in kernel memory) of instruction (or copy thereof)
+ * @x86_64: !0 for 64-bit kernel or 64-bit app
+ */
+void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
+{
+ /*
+ * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
+ * even if the input buffer is long enough to hold them.
+ */
+ if (buf_len > MAX_INSN_SIZE)
+ buf_len = MAX_INSN_SIZE;
+
+ memset(insn, 0, sizeof(*insn));
+ insn->kaddr = kaddr;
+ insn->end_kaddr = kaddr + buf_len;
+ insn->next_byte = kaddr;
+ insn->x86_64 = x86_64 ? 1 : 0;
+ insn->opnd_bytes = 4;
+ if (x86_64)
+ insn->addr_bytes = 8;
+ else
+ insn->addr_bytes = 4;
+}
+
+/**
+ * insn_get_prefixes - scan x86 instruction prefix bytes
+ * @insn: &struct insn containing instruction
+ *
+ * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
+ * to point to the (first) opcode. No effect if @insn->prefixes.got
+ * is already set.
+ */
+void insn_get_prefixes(struct insn *insn)
+{
+ struct insn_field *prefixes = &insn->prefixes;
+ insn_attr_t attr;
+ insn_byte_t b, lb;
+ int i, nb;
+
+ if (prefixes->got)
+ return;
+
+ nb = 0;
+ lb = 0;
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ while (inat_is_legacy_prefix(attr)) {
+ /* Skip if same prefix */
+ for (i = 0; i < nb; i++)
+ if (prefixes->bytes[i] == b)
+ goto found;
+ if (nb == 4)
+ /* Invalid instruction */
+ break;
+ prefixes->bytes[nb++] = b;
+ if (inat_is_address_size_prefix(attr)) {
+ /* address size switches 2/4 or 4/8 */
+ if (insn->x86_64)
+ insn->addr_bytes ^= 12;
+ else
+ insn->addr_bytes ^= 6;
+ } else if (inat_is_operand_size_prefix(attr)) {
+ /* oprand size switches 2/4 */
+ insn->opnd_bytes ^= 6;
+ }
+found:
+ prefixes->nbytes++;
+ insn->next_byte++;
+ lb = b;
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ }
+ /* Set the last prefix */
+ if (lb && lb != insn->prefixes.bytes[3]) {
+ if (unlikely(insn->prefixes.bytes[3])) {
+ /* Swap the last prefix */
+ b = insn->prefixes.bytes[3];
+ for (i = 0; i < nb; i++)
+ if (prefixes->bytes[i] == lb)
+ prefixes->bytes[i] = b;
+ }
+ insn->prefixes.bytes[3] = lb;
+ }
+
+ /* Decode REX prefix */
+ if (insn->x86_64) {
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ if (inat_is_rex_prefix(attr)) {
+ insn->rex_prefix.value = b;
+ insn->rex_prefix.nbytes = 1;
+ insn->next_byte++;
+ if (X86_REX_W(b))
+ /* REX.W overrides opnd_size */
+ insn->opnd_bytes = 8;
+ }
+ }
+ insn->rex_prefix.got = 1;
+
+ /* Decode VEX prefix */
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ if (inat_is_vex_prefix(attr)) {
+ insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
+ if (!insn->x86_64) {
+ /*
+ * In 32-bits mode, if the [7:6] bits (mod bits of
+ * ModRM) on the second byte are not 11b, it is
+ * LDS or LES.
+ */
+ if (X86_MODRM_MOD(b2) != 3)
+ goto vex_end;
+ }
+ insn->vex_prefix.bytes[0] = b;
+ insn->vex_prefix.bytes[1] = b2;
+ if (inat_is_vex3_prefix(attr)) {
+ b2 = peek_nbyte_next(insn_byte_t, insn, 2);
+ insn->vex_prefix.bytes[2] = b2;
+ insn->vex_prefix.nbytes = 3;
+ insn->next_byte += 3;
+ if (insn->x86_64 && X86_VEX_W(b2))
+ /* VEX.W overrides opnd_size */
+ insn->opnd_bytes = 8;
+ } else {
+ /*
+ * For VEX2, fake VEX3-like byte#2.
+ * Makes it easier to decode vex.W, vex.vvvv,
+ * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
+ */
+ insn->vex_prefix.bytes[2] = b2 & 0x7f;
+ insn->vex_prefix.nbytes = 2;
+ insn->next_byte += 2;
+ }
+ }
+vex_end:
+ insn->vex_prefix.got = 1;
+
+ prefixes->got = 1;
+
+err_out:
+ return;
+}
+
+/**
+ * insn_get_opcode - collect opcode(s)
+ * @insn: &struct insn containing instruction
+ *
+ * Populates @insn->opcode, updates @insn->next_byte to point past the
+ * opcode byte(s), and set @insn->attr (except for groups).
+ * If necessary, first collects any preceding (prefix) bytes.
+ * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
+ * is already 1.
+ */
+void insn_get_opcode(struct insn *insn)
+{
+ struct insn_field *opcode = &insn->opcode;
+ insn_byte_t op;
+ int pfx_id;
+ if (opcode->got)
+ return;
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+
+ /* Get first opcode */
+ op = get_next(insn_byte_t, insn);
+ opcode->bytes[0] = op;
+ opcode->nbytes = 1;
+
+ /* Check if there is VEX prefix or not */
+ if (insn_is_avx(insn)) {
+ insn_byte_t m, p;
+ m = insn_vex_m_bits(insn);
+ p = insn_vex_p_bits(insn);
+ insn->attr = inat_get_avx_attribute(op, m, p);
+ if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
+ insn->attr = 0; /* This instruction is bad */
+ goto end; /* VEX has only 1 byte for opcode */
+ }
+
+ insn->attr = inat_get_opcode_attribute(op);
+ while (inat_is_escape(insn->attr)) {
+ /* Get escaped opcode */
+ op = get_next(insn_byte_t, insn);
+ opcode->bytes[opcode->nbytes++] = op;
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
+ }
+ if (inat_must_vex(insn->attr))
+ insn->attr = 0; /* This instruction is bad */
+end:
+ opcode->got = 1;
+
+err_out:
+ return;
+}
+
+/**
+ * insn_get_modrm - collect ModRM byte, if any
+ * @insn: &struct insn containing instruction
+ *
+ * Populates @insn->modrm and updates @insn->next_byte to point past the
+ * ModRM byte, if any. If necessary, first collects the preceding bytes
+ * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
+ */
+void insn_get_modrm(struct insn *insn)
+{
+ struct insn_field *modrm = &insn->modrm;
+ insn_byte_t pfx_id, mod;
+ if (modrm->got)
+ return;
+ if (!insn->opcode.got)
+ insn_get_opcode(insn);
+
+ if (inat_has_modrm(insn->attr)) {
+ mod = get_next(insn_byte_t, insn);
+ modrm->value = mod;
+ modrm->nbytes = 1;
+ if (inat_is_group(insn->attr)) {
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_group_attribute(mod, pfx_id,
+ insn->attr);
+ if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
+ insn->attr = 0; /* This is bad */
+ }
+ }
+
+ if (insn->x86_64 && inat_is_force64(insn->attr))
+ insn->opnd_bytes = 8;
+ modrm->got = 1;
+
+err_out:
+ return;
+}
+
+
+/**
+ * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte. No effect if @insn->x86_64 is 0.
+ */
+int insn_rip_relative(struct insn *insn)
+{
+ struct insn_field *modrm = &insn->modrm;
+
+ if (!insn->x86_64)
+ return 0;
+ if (!modrm->got)
+ insn_get_modrm(insn);
+ /*
+ * For rip-relative instructions, the mod field (top 2 bits)
+ * is zero and the r/m field (bottom 3 bits) is 0x5.
+ */
+ return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
+}
+
+/**
+ * insn_get_sib() - Get the SIB byte of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte.
+ */
+void insn_get_sib(struct insn *insn)
+{
+ insn_byte_t modrm;
+
+ if (insn->sib.got)
+ return;
+ if (!insn->modrm.got)
+ insn_get_modrm(insn);
+ if (insn->modrm.nbytes) {
+ modrm = (insn_byte_t)insn->modrm.value;
+ if (insn->addr_bytes != 2 &&
+ X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
+ insn->sib.value = get_next(insn_byte_t, insn);
+ insn->sib.nbytes = 1;
+ }
+ }
+ insn->sib.got = 1;
+
+err_out:
+ return;
+}
+
+
+/**
+ * insn_get_displacement() - Get the displacement of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * SIB byte.
+ * Displacement value is sign-expanded.
+ */
+void insn_get_displacement(struct insn *insn)
+{
+ insn_byte_t mod, rm, base;
+
+ if (insn->displacement.got)
+ return;
+ if (!insn->sib.got)
+ insn_get_sib(insn);
+ if (insn->modrm.nbytes) {
+ /*
+ * Interpreting the modrm byte:
+ * mod = 00 - no displacement fields (exceptions below)
+ * mod = 01 - 1-byte displacement field
+ * mod = 10 - displacement field is 4 bytes, or 2 bytes if
+ * address size = 2 (0x67 prefix in 32-bit mode)
+ * mod = 11 - no memory operand
+ *
+ * If address size = 2...
+ * mod = 00, r/m = 110 - displacement field is 2 bytes
+ *
+ * If address size != 2...
+ * mod != 11, r/m = 100 - SIB byte exists
+ * mod = 00, SIB base = 101 - displacement field is 4 bytes
+ * mod = 00, r/m = 101 - rip-relative addressing, displacement
+ * field is 4 bytes
+ */
+ mod = X86_MODRM_MOD(insn->modrm.value);
+ rm = X86_MODRM_RM(insn->modrm.value);
+ base = X86_SIB_BASE(insn->sib.value);
+ if (mod == 3)
+ goto out;
+ if (mod == 1) {
+ insn->displacement.value = get_next(char, insn);
+ insn->displacement.nbytes = 1;
+ } else if (insn->addr_bytes == 2) {
+ if ((mod == 0 && rm == 6) || mod == 2) {
+ insn->displacement.value =
+ get_next(short, insn);
+ insn->displacement.nbytes = 2;
+ }
+ } else {
+ if ((mod == 0 && rm == 5) || mod == 2 ||
+ (mod == 0 && base == 5)) {
+ insn->displacement.value = get_next(int, insn);
+ insn->displacement.nbytes = 4;
+ }
+ }
+ }
+out:
+ insn->displacement.got = 1;
+
+err_out:
+ return;
+}
+
+/* Decode moffset16/32/64. Return 0 if failed */
+static int __get_moffset(struct insn *insn)
+{
+ switch (insn->addr_bytes) {
+ case 2:
+ insn->moffset1.value = get_next(short, insn);
+ insn->moffset1.nbytes = 2;
+ break;
+ case 4:
+ insn->moffset1.value = get_next(int, insn);
+ insn->moffset1.nbytes = 4;
+ break;
+ case 8:
+ insn->moffset1.value = get_next(int, insn);
+ insn->moffset1.nbytes = 4;
+ insn->moffset2.value = get_next(int, insn);
+ insn->moffset2.nbytes = 4;
+ break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+ insn->moffset1.got = insn->moffset2.got = 1;
+
+ return 1;
+
+err_out:
+ return 0;
+}
+
+/* Decode imm v32(Iz). Return 0 if failed */
+static int __get_immv32(struct insn *insn)
+{
+ switch (insn->opnd_bytes) {
+ case 2:
+ insn->immediate.value = get_next(short, insn);
+ insn->immediate.nbytes = 2;
+ break;
+ case 4:
+ case 8:
+ insn->immediate.value = get_next(int, insn);
+ insn->immediate.nbytes = 4;
+ break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+
+ return 1;
+
+err_out:
+ return 0;
+}
+
+/* Decode imm v64(Iv/Ov), Return 0 if failed */
+static int __get_immv(struct insn *insn)
+{
+ switch (insn->opnd_bytes) {
+ case 2:
+ insn->immediate1.value = get_next(short, insn);
+ insn->immediate1.nbytes = 2;
+ break;
+ case 4:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ break;
+ case 8:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ insn->immediate2.value = get_next(int, insn);
+ insn->immediate2.nbytes = 4;
+ break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+ insn->immediate1.got = insn->immediate2.got = 1;
+
+ return 1;
+err_out:
+ return 0;
+}
+
+/* Decode ptr16:16/32(Ap) */
+static int __get_immptr(struct insn *insn)
+{
+ switch (insn->opnd_bytes) {
+ case 2:
+ insn->immediate1.value = get_next(short, insn);
+ insn->immediate1.nbytes = 2;
+ break;
+ case 4:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ break;
+ case 8:
+ /* ptr16:64 is not exist (no segment) */
+ return 0;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+ insn->immediate2.value = get_next(unsigned short, insn);
+ insn->immediate2.nbytes = 2;
+ insn->immediate1.got = insn->immediate2.got = 1;
+
+ return 1;
+err_out:
+ return 0;
+}
+
+/**
+ * insn_get_immediate() - Get the immediates of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * displacement bytes.
+ * Basically, most of immediates are sign-expanded. Unsigned-value can be
+ * get by bit masking with ((1 << (nbytes * 8)) - 1)
+ */
+void insn_get_immediate(struct insn *insn)
+{
+ if (insn->immediate.got)
+ return;
+ if (!insn->displacement.got)
+ insn_get_displacement(insn);
+
+ if (inat_has_moffset(insn->attr)) {
+ if (!__get_moffset(insn))
+ goto err_out;
+ goto done;
+ }
+
+ if (!inat_has_immediate(insn->attr))
+ /* no immediates */
+ goto done;
+
+ switch (inat_immediate_size(insn->attr)) {
+ case INAT_IMM_BYTE:
+ insn->immediate.value = get_next(char, insn);
+ insn->immediate.nbytes = 1;
+ break;
+ case INAT_IMM_WORD:
+ insn->immediate.value = get_next(short, insn);
+ insn->immediate.nbytes = 2;
+ break;
+ case INAT_IMM_DWORD:
+ insn->immediate.value = get_next(int, insn);
+ insn->immediate.nbytes = 4;
+ break;
+ case INAT_IMM_QWORD:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ insn->immediate2.value = get_next(int, insn);
+ insn->immediate2.nbytes = 4;
+ break;
+ case INAT_IMM_PTR:
+ if (!__get_immptr(insn))
+ goto err_out;
+ break;
+ case INAT_IMM_VWORD32:
+ if (!__get_immv32(insn))
+ goto err_out;
+ break;
+ case INAT_IMM_VWORD:
+ if (!__get_immv(insn))
+ goto err_out;
+ break;
+ default:
+ /* Here, insn must have an immediate, but failed */
+ goto err_out;
+ }
+ if (inat_has_second_immediate(insn->attr)) {
+ insn->immediate2.value = get_next(char, insn);
+ insn->immediate2.nbytes = 1;
+ }
+done:
+ insn->immediate.got = 1;
+
+err_out:
+ return;
+}
+
+/**
+ * insn_get_length() - Get the length of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * immediates bytes.
+ */
+void insn_get_length(struct insn *insn)
+{
+ if (insn->length)
+ return;
+ if (!insn->immediate.got)
+ insn_get_immediate(insn);
+ insn->length = (unsigned char)((unsigned long)insn->next_byte
+ - (unsigned long)insn->kaddr);
+}
diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h
new file mode 100644
index 0000000..dd12da0
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/insn.h
@@ -0,0 +1,201 @@
+#ifndef _ASM_X86_INSN_H
+#define _ASM_X86_INSN_H
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+/* insn_attr_t is defined in inat.h */
+#include "inat.h"
+
+struct insn_field {
+ union {
+ insn_value_t value;
+ insn_byte_t bytes[4];
+ };
+ /* !0 if we've run insn_get_xxx() for this field */
+ unsigned char got;
+ unsigned char nbytes;
+};
+
+struct insn {
+ struct insn_field prefixes; /*
+ * Prefixes
+ * prefixes.bytes[3]: last prefix
+ */
+ struct insn_field rex_prefix; /* REX prefix */
+ struct insn_field vex_prefix; /* VEX prefix */
+ struct insn_field opcode; /*
+ * opcode.bytes[0]: opcode1
+ * opcode.bytes[1]: opcode2
+ * opcode.bytes[2]: opcode3
+ */
+ struct insn_field modrm;
+ struct insn_field sib;
+ struct insn_field displacement;
+ union {
+ struct insn_field immediate;
+ struct insn_field moffset1; /* for 64bit MOV */
+ struct insn_field immediate1; /* for 64bit imm or off16/32 */
+ };
+ union {
+ struct insn_field moffset2; /* for 64bit MOV */
+ struct insn_field immediate2; /* for 64bit imm or seg16 */
+ };
+
+ insn_attr_t attr;
+ unsigned char opnd_bytes;
+ unsigned char addr_bytes;
+ unsigned char length;
+ unsigned char x86_64;
+
+ const insn_byte_t *kaddr; /* kernel address of insn to analyze */
+ const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */
+ const insn_byte_t *next_byte;
+};
+
+#define MAX_INSN_SIZE 15
+
+#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
+#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
+#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
+
+#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6)
+#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
+#define X86_SIB_BASE(sib) ((sib) & 0x07)
+
+#define X86_REX_W(rex) ((rex) & 8)
+#define X86_REX_R(rex) ((rex) & 4)
+#define X86_REX_X(rex) ((rex) & 2)
+#define X86_REX_B(rex) ((rex) & 1)
+
+/* VEX bit flags */
+#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */
+#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */
+#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */
+#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
+#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
+/* VEX bit fields */
+#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
+#define X86_VEX2_M 1 /* VEX2.M always 1 */
+#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
+
+extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
+extern void insn_get_prefixes(struct insn *insn);
+extern void insn_get_opcode(struct insn *insn);
+extern void insn_get_modrm(struct insn *insn);
+extern void insn_get_sib(struct insn *insn);
+extern void insn_get_displacement(struct insn *insn);
+extern void insn_get_immediate(struct insn *insn);
+extern void insn_get_length(struct insn *insn);
+
+/* Attribute will be determined after getting ModRM (for opcode groups) */
+static inline void insn_get_attribute(struct insn *insn)
+{
+ insn_get_modrm(insn);
+}
+
+/* Instruction uses RIP-relative addressing */
+extern int insn_rip_relative(struct insn *insn);
+
+/* Init insn for kernel text */
+static inline void kernel_insn_init(struct insn *insn,
+ const void *kaddr, int buf_len)
+{
+#ifdef CONFIG_X86_64
+ insn_init(insn, kaddr, buf_len, 1);
+#else /* CONFIG_X86_32 */
+ insn_init(insn, kaddr, buf_len, 0);
+#endif
+}
+
+static inline int insn_is_avx(struct insn *insn)
+{
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+ return (insn->vex_prefix.value != 0);
+}
+
+/* Ensure this instruction is decoded completely */
+static inline int insn_complete(struct insn *insn)
+{
+ return insn->opcode.got && insn->modrm.got && insn->sib.got &&
+ insn->displacement.got && insn->immediate.got;
+}
+
+static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
+{
+ if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
+ return X86_VEX2_M;
+ else
+ return X86_VEX3_M(insn->vex_prefix.bytes[1]);
+}
+
+static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
+{
+ if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
+ return X86_VEX_P(insn->vex_prefix.bytes[1]);
+ else
+ return X86_VEX_P(insn->vex_prefix.bytes[2]);
+}
+
+/* Get the last prefix id from last prefix or VEX prefix */
+static inline int insn_last_prefix_id(struct insn *insn)
+{
+ if (insn_is_avx(insn))
+ return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
+
+ if (insn->prefixes.bytes[3])
+ return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
+
+ return 0;
+}
+
+/* Offset of each field from kaddr */
+static inline int insn_offset_rex_prefix(struct insn *insn)
+{
+ return insn->prefixes.nbytes;
+}
+static inline int insn_offset_vex_prefix(struct insn *insn)
+{
+ return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
+}
+static inline int insn_offset_opcode(struct insn *insn)
+{
+ return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes;
+}
+static inline int insn_offset_modrm(struct insn *insn)
+{
+ return insn_offset_opcode(insn) + insn->opcode.nbytes;
+}
+static inline int insn_offset_sib(struct insn *insn)
+{
+ return insn_offset_modrm(insn) + insn->modrm.nbytes;
+}
+static inline int insn_offset_displacement(struct insn *insn)
+{
+ return insn_offset_sib(insn) + insn->sib.nbytes;
+}
+static inline int insn_offset_immediate(struct insn *insn)
+{
+ return insn_offset_displacement(insn) + insn->displacement.nbytes;
+}
+
+#endif /* _ASM_X86_INSN_H */
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
new file mode 100644
index 0000000..22ba502
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -0,0 +1,2345 @@
+/*
+ * intel_pt_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include "../cache.h"
+#include "../util.h"
+
+#include "intel-pt-insn-decoder.h"
+#include "intel-pt-pkt-decoder.h"
+#include "intel-pt-decoder.h"
+#include "intel-pt-log.h"
+
+#define INTEL_PT_BLK_SIZE 1024
+
+#define BIT63 (((uint64_t)1 << 63))
+
+#define INTEL_PT_RETURN 1
+
+/* Maximum number of loops with no packets consumed i.e. stuck in a loop */
+#define INTEL_PT_MAX_LOOPS 10000
+
+struct intel_pt_blk {
+ struct intel_pt_blk *prev;
+ uint64_t ip[INTEL_PT_BLK_SIZE];
+};
+
+struct intel_pt_stack {
+ struct intel_pt_blk *blk;
+ struct intel_pt_blk *spare;
+ int pos;
+};
+
+enum intel_pt_pkt_state {
+ INTEL_PT_STATE_NO_PSB,
+ INTEL_PT_STATE_NO_IP,
+ INTEL_PT_STATE_ERR_RESYNC,
+ INTEL_PT_STATE_IN_SYNC,
+ INTEL_PT_STATE_TNT,
+ INTEL_PT_STATE_TIP,
+ INTEL_PT_STATE_TIP_PGD,
+ INTEL_PT_STATE_FUP,
+ INTEL_PT_STATE_FUP_NO_TIP,
+};
+
+#ifdef INTEL_PT_STRICT
+#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_NO_PSB
+#else
+#define INTEL_PT_STATE_ERR1 (decoder->pkt_state)
+#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_IP
+#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_ERR_RESYNC
+#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_IN_SYNC
+#endif
+
+struct intel_pt_decoder {
+ int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
+ int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
+ uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
+ uint64_t max_insn_cnt, void *data);
+ void *data;
+ struct intel_pt_state state;
+ const unsigned char *buf;
+ size_t len;
+ bool return_compression;
+ bool mtc_insn;
+ bool pge;
+ bool have_tma;
+ bool have_cyc;
+ uint64_t pos;
+ uint64_t last_ip;
+ uint64_t ip;
+ uint64_t cr3;
+ uint64_t timestamp;
+ uint64_t tsc_timestamp;
+ uint64_t ref_timestamp;
+ uint64_t ret_addr;
+ uint64_t ctc_timestamp;
+ uint64_t ctc_delta;
+ uint64_t cycle_cnt;
+ uint64_t cyc_ref_timestamp;
+ uint32_t last_mtc;
+ uint32_t tsc_ctc_ratio_n;
+ uint32_t tsc_ctc_ratio_d;
+ uint32_t tsc_ctc_mult;
+ uint32_t tsc_slip;
+ uint32_t ctc_rem_mask;
+ int mtc_shift;
+ struct intel_pt_stack stack;
+ enum intel_pt_pkt_state pkt_state;
+ struct intel_pt_pkt packet;
+ struct intel_pt_pkt tnt;
+ int pkt_step;
+ int pkt_len;
+ int last_packet_type;
+ unsigned int cbr;
+ unsigned int max_non_turbo_ratio;
+ double max_non_turbo_ratio_fp;
+ double cbr_cyc_to_tsc;
+ double calc_cyc_to_tsc;
+ bool have_calc_cyc_to_tsc;
+ int exec_mode;
+ unsigned int insn_bytes;
+ uint64_t sign_bit;
+ uint64_t sign_bits;
+ uint64_t period;
+ enum intel_pt_period_type period_type;
+ uint64_t tot_insn_cnt;
+ uint64_t period_insn_cnt;
+ uint64_t period_mask;
+ uint64_t period_ticks;
+ uint64_t last_masked_timestamp;
+ bool continuous_period;
+ bool overflow;
+ bool set_fup_tx_flags;
+ unsigned int fup_tx_flags;
+ unsigned int tx_flags;
+ uint64_t timestamp_insn_cnt;
+ uint64_t stuck_ip;
+ int no_progress;
+ int stuck_ip_prd;
+ int stuck_ip_cnt;
+ const unsigned char *next_buf;
+ size_t next_len;
+ unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
+};
+
+static uint64_t intel_pt_lower_power_of_2(uint64_t x)
+{
+ int i;
+
+ for (i = 0; x != 1; i++)
+ x >>= 1;
+
+ return x << i;
+}
+
+static void intel_pt_setup_period(struct intel_pt_decoder *decoder)
+{
+ if (decoder->period_type == INTEL_PT_PERIOD_TICKS) {
+ uint64_t period;
+
+ period = intel_pt_lower_power_of_2(decoder->period);
+ decoder->period_mask = ~(period - 1);
+ decoder->period_ticks = period;
+ }
+}
+
+static uint64_t multdiv(uint64_t t, uint32_t n, uint32_t d)
+{
+ if (!d)
+ return 0;
+ return (t / d) * n + ((t % d) * n) / d;
+}
+
+struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
+{
+ struct intel_pt_decoder *decoder;
+
+ if (!params->get_trace || !params->walk_insn)
+ return NULL;
+
+ decoder = zalloc(sizeof(struct intel_pt_decoder));
+ if (!decoder)
+ return NULL;
+
+ decoder->get_trace = params->get_trace;
+ decoder->walk_insn = params->walk_insn;
+ decoder->data = params->data;
+ decoder->return_compression = params->return_compression;
+
+ decoder->sign_bit = (uint64_t)1 << 47;
+ decoder->sign_bits = ~(((uint64_t)1 << 48) - 1);
+
+ decoder->period = params->period;
+ decoder->period_type = params->period_type;
+
+ decoder->max_non_turbo_ratio = params->max_non_turbo_ratio;
+ decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio;
+
+ intel_pt_setup_period(decoder);
+
+ decoder->mtc_shift = params->mtc_period;
+ decoder->ctc_rem_mask = (1 << decoder->mtc_shift) - 1;
+
+ decoder->tsc_ctc_ratio_n = params->tsc_ctc_ratio_n;
+ decoder->tsc_ctc_ratio_d = params->tsc_ctc_ratio_d;
+
+ if (!decoder->tsc_ctc_ratio_n)
+ decoder->tsc_ctc_ratio_d = 0;
+
+ if (decoder->tsc_ctc_ratio_d) {
+ if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d))
+ decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n /
+ decoder->tsc_ctc_ratio_d;
+
+ /*
+ * Allow for timestamps appearing to backwards because a TSC
+ * packet has slipped past a MTC packet, so allow 2 MTC ticks
+ * or ...
+ */
+ decoder->tsc_slip = multdiv(2 << decoder->mtc_shift,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+ /* ... or 0x100 paranoia */
+ if (decoder->tsc_slip < 0x100)
+ decoder->tsc_slip = 0x100;
+
+ intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift);
+ intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n);
+ intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d);
+ intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult);
+ intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip);
+
+ return decoder;
+}
+
+static void intel_pt_pop_blk(struct intel_pt_stack *stack)
+{
+ struct intel_pt_blk *blk = stack->blk;
+
+ stack->blk = blk->prev;
+ if (!stack->spare)
+ stack->spare = blk;
+ else
+ free(blk);
+}
+
+static uint64_t intel_pt_pop(struct intel_pt_stack *stack)
+{
+ if (!stack->pos) {
+ if (!stack->blk)
+ return 0;
+ intel_pt_pop_blk(stack);
+ if (!stack->blk)
+ return 0;
+ stack->pos = INTEL_PT_BLK_SIZE;
+ }
+ return stack->blk->ip[--stack->pos];
+}
+
+static int intel_pt_alloc_blk(struct intel_pt_stack *stack)
+{
+ struct intel_pt_blk *blk;
+
+ if (stack->spare) {
+ blk = stack->spare;
+ stack->spare = NULL;
+ } else {
+ blk = malloc(sizeof(struct intel_pt_blk));
+ if (!blk)
+ return -ENOMEM;
+ }
+
+ blk->prev = stack->blk;
+ stack->blk = blk;
+ stack->pos = 0;
+ return 0;
+}
+
+static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip)
+{
+ int err;
+
+ if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) {
+ err = intel_pt_alloc_blk(stack);
+ if (err)
+ return err;
+ }
+
+ stack->blk->ip[stack->pos++] = ip;
+ return 0;
+}
+
+static void intel_pt_clear_stack(struct intel_pt_stack *stack)
+{
+ while (stack->blk)
+ intel_pt_pop_blk(stack);
+ stack->pos = 0;
+}
+
+static void intel_pt_free_stack(struct intel_pt_stack *stack)
+{
+ intel_pt_clear_stack(stack);
+ zfree(&stack->blk);
+ zfree(&stack->spare);
+}
+
+void intel_pt_decoder_free(struct intel_pt_decoder *decoder)
+{
+ intel_pt_free_stack(&decoder->stack);
+ free(decoder);
+}
+
+static int intel_pt_ext_err(int code)
+{
+ switch (code) {
+ case -ENOMEM:
+ return INTEL_PT_ERR_NOMEM;
+ case -ENOSYS:
+ return INTEL_PT_ERR_INTERN;
+ case -EBADMSG:
+ return INTEL_PT_ERR_BADPKT;
+ case -ENODATA:
+ return INTEL_PT_ERR_NODATA;
+ case -EILSEQ:
+ return INTEL_PT_ERR_NOINSN;
+ case -ENOENT:
+ return INTEL_PT_ERR_MISMAT;
+ case -EOVERFLOW:
+ return INTEL_PT_ERR_OVR;
+ case -ENOSPC:
+ return INTEL_PT_ERR_LOST;
+ case -ELOOP:
+ return INTEL_PT_ERR_NELOOP;
+ default:
+ return INTEL_PT_ERR_UNK;
+ }
+}
+
+static const char *intel_pt_err_msgs[] = {
+ [INTEL_PT_ERR_NOMEM] = "Memory allocation failed",
+ [INTEL_PT_ERR_INTERN] = "Internal error",
+ [INTEL_PT_ERR_BADPKT] = "Bad packet",
+ [INTEL_PT_ERR_NODATA] = "No more data",
+ [INTEL_PT_ERR_NOINSN] = "Failed to get instruction",
+ [INTEL_PT_ERR_MISMAT] = "Trace doesn't match instruction",
+ [INTEL_PT_ERR_OVR] = "Overflow packet",
+ [INTEL_PT_ERR_LOST] = "Lost trace data",
+ [INTEL_PT_ERR_UNK] = "Unknown error!",
+ [INTEL_PT_ERR_NELOOP] = "Never-ending loop",
+};
+
+int intel_pt__strerror(int code, char *buf, size_t buflen)
+{
+ if (code < 1 || code > INTEL_PT_ERR_MAX)
+ code = INTEL_PT_ERR_UNK;
+ strlcpy(buf, intel_pt_err_msgs[code], buflen);
+ return 0;
+}
+
+static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder,
+ const struct intel_pt_pkt *packet,
+ uint64_t last_ip)
+{
+ uint64_t ip;
+
+ switch (packet->count) {
+ case 2:
+ ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
+ packet->payload;
+ break;
+ case 4:
+ ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
+ packet->payload;
+ break;
+ case 6:
+ ip = packet->payload;
+ break;
+ default:
+ return 0;
+ }
+
+ if (ip & decoder->sign_bit)
+ return ip | decoder->sign_bits;
+
+ return ip;
+}
+
+static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
+{
+ decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet,
+ decoder->last_ip);
+}
+
+static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
+{
+ intel_pt_set_last_ip(decoder);
+ decoder->ip = decoder->last_ip;
+}
+
+static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder)
+{
+ intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos,
+ decoder->buf);
+}
+
+static int intel_pt_bug(struct intel_pt_decoder *decoder)
+{
+ intel_pt_log("ERROR: Internal error\n");
+ decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
+ return -ENOSYS;
+}
+
+static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder)
+{
+ decoder->tx_flags = 0;
+}
+
+static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder)
+{
+ decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX;
+}
+
+static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
+{
+ intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
+ decoder->pkt_len = 1;
+ decoder->pkt_step = 1;
+ intel_pt_decoder_log_packet(decoder);
+ if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) {
+ intel_pt_log("ERROR: Bad packet\n");
+ decoder->pkt_state = INTEL_PT_STATE_ERR1;
+ }
+ return -EBADMSG;
+}
+
+static int intel_pt_get_data(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_buffer buffer = { .buf = 0, };
+ int ret;
+
+ decoder->pkt_step = 0;
+
+ intel_pt_log("Getting more data\n");
+ ret = decoder->get_trace(&buffer, decoder->data);
+ if (ret)
+ return ret;
+ decoder->buf = buffer.buf;
+ decoder->len = buffer.len;
+ if (!decoder->len) {
+ intel_pt_log("No more data\n");
+ return -ENODATA;
+ }
+ if (!buffer.consecutive) {
+ decoder->ip = 0;
+ decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
+ decoder->ref_timestamp = buffer.ref_timestamp;
+ decoder->timestamp = 0;
+ decoder->have_tma = false;
+ decoder->state.trace_nr = buffer.trace_nr;
+ intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
+ decoder->ref_timestamp);
+ return -ENOLINK;
+ }
+
+ return 0;
+}
+
+static int intel_pt_get_next_data(struct intel_pt_decoder *decoder)
+{
+ if (!decoder->next_buf)
+ return intel_pt_get_data(decoder);
+
+ decoder->buf = decoder->next_buf;
+ decoder->len = decoder->next_len;
+ decoder->next_buf = 0;
+ decoder->next_len = 0;
+ return 0;
+}
+
+static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
+{
+ unsigned char *buf = decoder->temp_buf;
+ size_t old_len, len, n;
+ int ret;
+
+ old_len = decoder->len;
+ len = decoder->len;
+ memcpy(buf, decoder->buf, len);
+
+ ret = intel_pt_get_data(decoder);
+ if (ret) {
+ decoder->pos += old_len;
+ return ret < 0 ? ret : -EINVAL;
+ }
+
+ n = INTEL_PT_PKT_MAX_SZ - len;
+ if (n > decoder->len)
+ n = decoder->len;
+ memcpy(buf + len, decoder->buf, n);
+ len += n;
+
+ ret = intel_pt_get_packet(buf, len, &decoder->packet);
+ if (ret < (int)old_len) {
+ decoder->next_buf = decoder->buf;
+ decoder->next_len = decoder->len;
+ decoder->buf = buf;
+ decoder->len = old_len;
+ return intel_pt_bad_packet(decoder);
+ }
+
+ decoder->next_buf = decoder->buf + (ret - old_len);
+ decoder->next_len = decoder->len - (ret - old_len);
+
+ decoder->buf = buf;
+ decoder->len = ret;
+
+ return ret;
+}
+
+struct intel_pt_pkt_info {
+ struct intel_pt_decoder *decoder;
+ struct intel_pt_pkt packet;
+ uint64_t pos;
+ int pkt_len;
+ int last_packet_type;
+ void *data;
+};
+
+typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info);
+
+/* Lookahead packets in current buffer */
+static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
+ intel_pt_pkt_cb_t cb, void *data)
+{
+ struct intel_pt_pkt_info pkt_info;
+ const unsigned char *buf = decoder->buf;
+ size_t len = decoder->len;
+ int ret;
+
+ pkt_info.decoder = decoder;
+ pkt_info.pos = decoder->pos;
+ pkt_info.pkt_len = decoder->pkt_step;
+ pkt_info.last_packet_type = decoder->last_packet_type;
+ pkt_info.data = data;
+
+ while (1) {
+ do {
+ pkt_info.pos += pkt_info.pkt_len;
+ buf += pkt_info.pkt_len;
+ len -= pkt_info.pkt_len;
+
+ if (!len)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ ret = intel_pt_get_packet(buf, len, &pkt_info.packet);
+ if (!ret)
+ return INTEL_PT_NEED_MORE_BYTES;
+ if (ret < 0)
+ return ret;
+
+ pkt_info.pkt_len = ret;
+ } while (pkt_info.packet.type == INTEL_PT_PAD);
+
+ ret = cb(&pkt_info);
+ if (ret)
+ return 0;
+
+ pkt_info.last_packet_type = pkt_info.packet.type;
+ }
+}
+
+struct intel_pt_calc_cyc_to_tsc_info {
+ uint64_t cycle_cnt;
+ unsigned int cbr;
+ uint32_t last_mtc;
+ uint64_t ctc_timestamp;
+ uint64_t ctc_delta;
+ uint64_t tsc_timestamp;
+ uint64_t timestamp;
+ bool have_tma;
+ bool from_mtc;
+ double cbr_cyc_to_tsc;
+};
+
+static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
+{
+ struct intel_pt_decoder *decoder = pkt_info->decoder;
+ struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data;
+ uint64_t timestamp;
+ double cyc_to_tsc;
+ unsigned int cbr;
+ uint32_t mtc, mtc_delta, ctc, fc, ctc_rem;
+
+ switch (pkt_info->packet.type) {
+ case INTEL_PT_TNT:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ case INTEL_PT_FUP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PIP:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_PAD:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ return 0;
+
+ case INTEL_PT_MTC:
+ if (!data->have_tma)
+ return 0;
+
+ mtc = pkt_info->packet.payload;
+ if (mtc > data->last_mtc)
+ mtc_delta = mtc - data->last_mtc;
+ else
+ mtc_delta = mtc + 256 - data->last_mtc;
+ data->ctc_delta += mtc_delta << decoder->mtc_shift;
+ data->last_mtc = mtc;
+
+ if (decoder->tsc_ctc_mult) {
+ timestamp = data->ctc_timestamp +
+ data->ctc_delta * decoder->tsc_ctc_mult;
+ } else {
+ timestamp = data->ctc_timestamp +
+ multdiv(data->ctc_delta,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+
+ if (timestamp < data->timestamp)
+ return 1;
+
+ if (pkt_info->last_packet_type != INTEL_PT_CYC) {
+ data->timestamp = timestamp;
+ return 0;
+ }
+
+ break;
+
+ case INTEL_PT_TSC:
+ timestamp = pkt_info->packet.payload |
+ (data->timestamp & (0xffULL << 56));
+ if (data->from_mtc && timestamp < data->timestamp &&
+ data->timestamp - timestamp < decoder->tsc_slip)
+ return 1;
+ while (timestamp < data->timestamp)
+ timestamp += (1ULL << 56);
+ if (pkt_info->last_packet_type != INTEL_PT_CYC) {
+ if (data->from_mtc)
+ return 1;
+ data->tsc_timestamp = timestamp;
+ data->timestamp = timestamp;
+ return 0;
+ }
+ break;
+
+ case INTEL_PT_TMA:
+ if (data->from_mtc)
+ return 1;
+
+ if (!decoder->tsc_ctc_ratio_d)
+ return 0;
+
+ ctc = pkt_info->packet.payload;
+ fc = pkt_info->packet.count;
+ ctc_rem = ctc & decoder->ctc_rem_mask;
+
+ data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
+
+ data->ctc_timestamp = data->tsc_timestamp - fc;
+ if (decoder->tsc_ctc_mult) {
+ data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
+ } else {
+ data->ctc_timestamp -=
+ multdiv(ctc_rem, decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+
+ data->ctc_delta = 0;
+ data->have_tma = true;
+
+ return 0;
+
+ case INTEL_PT_CYC:
+ data->cycle_cnt += pkt_info->packet.payload;
+ return 0;
+
+ case INTEL_PT_CBR:
+ cbr = pkt_info->packet.payload;
+ if (data->cbr && data->cbr != cbr)
+ return 1;
+ data->cbr = cbr;
+ data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+ return 0;
+
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_OVF:
+ case INTEL_PT_BAD: /* Does not happen */
+ default:
+ return 1;
+ }
+
+ if (!data->cbr && decoder->cbr) {
+ data->cbr = decoder->cbr;
+ data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc;
+ }
+
+ if (!data->cycle_cnt)
+ return 1;
+
+ cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt;
+
+ if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc &&
+ cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) {
+ intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n",
+ cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
+ return 1;
+ }
+
+ decoder->calc_cyc_to_tsc = cyc_to_tsc;
+ decoder->have_calc_cyc_to_tsc = true;
+
+ if (data->cbr) {
+ intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n",
+ cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
+ } else {
+ intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n",
+ cyc_to_tsc, pkt_info->pos);
+ }
+
+ return 1;
+}
+
+static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
+ bool from_mtc)
+{
+ struct intel_pt_calc_cyc_to_tsc_info data = {
+ .cycle_cnt = 0,
+ .cbr = 0,
+ .last_mtc = decoder->last_mtc,
+ .ctc_timestamp = decoder->ctc_timestamp,
+ .ctc_delta = decoder->ctc_delta,
+ .tsc_timestamp = decoder->tsc_timestamp,
+ .timestamp = decoder->timestamp,
+ .have_tma = decoder->have_tma,
+ .from_mtc = from_mtc,
+ .cbr_cyc_to_tsc = 0,
+ };
+
+ intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data);
+}
+
+static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
+{
+ int ret;
+
+ decoder->last_packet_type = decoder->packet.type;
+
+ do {
+ decoder->pos += decoder->pkt_step;
+ decoder->buf += decoder->pkt_step;
+ decoder->len -= decoder->pkt_step;
+
+ if (!decoder->len) {
+ ret = intel_pt_get_next_data(decoder);
+ if (ret)
+ return ret;
+ }
+
+ ret = intel_pt_get_packet(decoder->buf, decoder->len,
+ &decoder->packet);
+ if (ret == INTEL_PT_NEED_MORE_BYTES &&
+ decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
+ ret = intel_pt_get_split_packet(decoder);
+ if (ret < 0)
+ return ret;
+ }
+ if (ret <= 0)
+ return intel_pt_bad_packet(decoder);
+
+ decoder->pkt_len = ret;
+ decoder->pkt_step = ret;
+ intel_pt_decoder_log_packet(decoder);
+ } while (decoder->packet.type == INTEL_PT_PAD);
+
+ return 0;
+}
+
+static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp, masked_timestamp;
+
+ timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
+ masked_timestamp = timestamp & decoder->period_mask;
+ if (decoder->continuous_period) {
+ if (masked_timestamp != decoder->last_masked_timestamp)
+ return 1;
+ } else {
+ timestamp += 1;
+ masked_timestamp = timestamp & decoder->period_mask;
+ if (masked_timestamp != decoder->last_masked_timestamp) {
+ decoder->last_masked_timestamp = masked_timestamp;
+ decoder->continuous_period = true;
+ }
+ }
+ return decoder->period_ticks - (timestamp - masked_timestamp);
+}
+
+static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder)
+{
+ switch (decoder->period_type) {
+ case INTEL_PT_PERIOD_INSTRUCTIONS:
+ return decoder->period - decoder->period_insn_cnt;
+ case INTEL_PT_PERIOD_TICKS:
+ return intel_pt_next_period(decoder);
+ case INTEL_PT_PERIOD_NONE:
+ case INTEL_PT_PERIOD_MTC:
+ default:
+ return 0;
+ }
+}
+
+static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp, masked_timestamp;
+
+ switch (decoder->period_type) {
+ case INTEL_PT_PERIOD_INSTRUCTIONS:
+ decoder->period_insn_cnt = 0;
+ break;
+ case INTEL_PT_PERIOD_TICKS:
+ timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
+ masked_timestamp = timestamp & decoder->period_mask;
+ decoder->last_masked_timestamp = masked_timestamp;
+ break;
+ case INTEL_PT_PERIOD_NONE:
+ case INTEL_PT_PERIOD_MTC:
+ default:
+ break;
+ }
+
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+}
+
+static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
+ struct intel_pt_insn *intel_pt_insn, uint64_t ip)
+{
+ uint64_t max_insn_cnt, insn_cnt = 0;
+ int err;
+
+ if (!decoder->mtc_insn)
+ decoder->mtc_insn = true;
+
+ max_insn_cnt = intel_pt_next_sample(decoder);
+
+ err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip,
+ max_insn_cnt, decoder->data);
+
+ decoder->tot_insn_cnt += insn_cnt;
+ decoder->timestamp_insn_cnt += insn_cnt;
+ decoder->period_insn_cnt += insn_cnt;
+
+ if (err) {
+ decoder->no_progress = 0;
+ decoder->pkt_state = INTEL_PT_STATE_ERR2;
+ intel_pt_log_at("ERROR: Failed to get instruction",
+ decoder->ip);
+ if (err == -ENOENT)
+ return -ENOLINK;
+ return -EILSEQ;
+ }
+
+ if (ip && decoder->ip == ip) {
+ err = -EAGAIN;
+ goto out;
+ }
+
+ if (max_insn_cnt && insn_cnt >= max_insn_cnt)
+ intel_pt_sample_insn(decoder);
+
+ if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) {
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->ip += intel_pt_insn->length;
+ err = INTEL_PT_RETURN;
+ goto out;
+ }
+
+ if (intel_pt_insn->op == INTEL_PT_OP_CALL) {
+ /* Zero-length calls are excluded */
+ if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL ||
+ intel_pt_insn->rel) {
+ err = intel_pt_push(&decoder->stack, decoder->ip +
+ intel_pt_insn->length);
+ if (err)
+ goto out;
+ }
+ } else if (intel_pt_insn->op == INTEL_PT_OP_RET) {
+ decoder->ret_addr = intel_pt_pop(&decoder->stack);
+ }
+
+ if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) {
+ int cnt = decoder->no_progress++;
+
+ decoder->state.from_ip = decoder->ip;
+ decoder->ip += intel_pt_insn->length +
+ intel_pt_insn->rel;
+ decoder->state.to_ip = decoder->ip;
+ err = INTEL_PT_RETURN;
+
+ /*
+ * Check for being stuck in a loop. This can happen if a
+ * decoder error results in the decoder erroneously setting the
+ * ip to an address that is itself in an infinite loop that
+ * consumes no packets. When that happens, there must be an
+ * unconditional branch.
+ */
+ if (cnt) {
+ if (cnt == 1) {
+ decoder->stuck_ip = decoder->state.to_ip;
+ decoder->stuck_ip_prd = 1;
+ decoder->stuck_ip_cnt = 1;
+ } else if (cnt > INTEL_PT_MAX_LOOPS ||
+ decoder->state.to_ip == decoder->stuck_ip) {
+ intel_pt_log_at("ERROR: Never-ending loop",
+ decoder->state.to_ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ err = -ELOOP;
+ goto out;
+ } else if (!--decoder->stuck_ip_cnt) {
+ decoder->stuck_ip_prd += 1;
+ decoder->stuck_ip_cnt = decoder->stuck_ip_prd;
+ decoder->stuck_ip = decoder->state.to_ip;
+ }
+ }
+ goto out_no_progress;
+ }
+out:
+ decoder->no_progress = 0;
+out_no_progress:
+ decoder->state.insn_op = intel_pt_insn->op;
+ decoder->state.insn_len = intel_pt_insn->length;
+
+ if (decoder->tx_flags & INTEL_PT_IN_TX)
+ decoder->state.flags |= INTEL_PT_IN_TX;
+
+ return err;
+}
+
+static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_insn intel_pt_insn;
+ uint64_t ip;
+ int err;
+
+ ip = decoder->last_ip;
+
+ while (1) {
+ err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
+ if (err == INTEL_PT_RETURN)
+ return 0;
+ if (err == -EAGAIN) {
+ if (decoder->set_fup_tx_flags) {
+ decoder->set_fup_tx_flags = false;
+ decoder->tx_flags = decoder->fup_tx_flags;
+ decoder->state.type = INTEL_PT_TRANSACTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.flags = decoder->fup_tx_flags;
+ return 0;
+ }
+ return err;
+ }
+ decoder->set_fup_tx_flags = false;
+ if (err)
+ return err;
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+ intel_pt_log_at("ERROR: Unexpected indirect branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ return -ENOENT;
+ }
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+ intel_pt_log_at("ERROR: Unexpected conditional branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ return -ENOENT;
+ }
+
+ intel_pt_bug(decoder);
+ }
+}
+
+static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_insn intel_pt_insn;
+ int err;
+
+ err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+ if (err == INTEL_PT_RETURN)
+ return 0;
+ if (err)
+ return err;
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+ if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ if (decoder->packet.count != 0)
+ decoder->ip = decoder->last_ip;
+ } else {
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.from_ip = decoder->ip;
+ if (decoder->packet.count == 0) {
+ decoder->state.to_ip = 0;
+ } else {
+ decoder->state.to_ip = decoder->last_ip;
+ decoder->ip = decoder->last_ip;
+ }
+ }
+ return 0;
+ }
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+ intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ return -ENOENT;
+ }
+
+ return intel_pt_bug(decoder);
+}
+
+static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_insn intel_pt_insn;
+ int err;
+
+ while (1) {
+ err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+ if (err == INTEL_PT_RETURN)
+ return 0;
+ if (err)
+ return err;
+
+ if (intel_pt_insn.op == INTEL_PT_OP_RET) {
+ if (!decoder->return_compression) {
+ intel_pt_log_at("ERROR: RET when expecting conditional branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+ }
+ if (!decoder->ret_addr) {
+ intel_pt_log_at("ERROR: Bad RET compression (stack empty)",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+ }
+ if (!(decoder->tnt.payload & BIT63)) {
+ intel_pt_log_at("ERROR: Bad RET compression (TNT=N)",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+ }
+ decoder->tnt.count -= 1;
+ if (!decoder->tnt.count)
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->tnt.payload <<= 1;
+ decoder->state.from_ip = decoder->ip;
+ decoder->ip = decoder->ret_addr;
+ decoder->state.to_ip = decoder->ip;
+ return 0;
+ }
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+ /* Handle deferred TIPs */
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type != INTEL_PT_TIP ||
+ decoder->packet.count == 0) {
+ intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ decoder->pkt_step = 0;
+ return -ENOENT;
+ }
+ intel_pt_set_last_ip(decoder);
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = decoder->last_ip;
+ decoder->ip = decoder->last_ip;
+ return 0;
+ }
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+ decoder->tnt.count -= 1;
+ if (!decoder->tnt.count)
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ if (decoder->tnt.payload & BIT63) {
+ decoder->tnt.payload <<= 1;
+ decoder->state.from_ip = decoder->ip;
+ decoder->ip += intel_pt_insn.length +
+ intel_pt_insn.rel;
+ decoder->state.to_ip = decoder->ip;
+ return 0;
+ }
+ /* Instruction sample for a non-taken branch */
+ if (decoder->state.type & INTEL_PT_INSTRUCTION) {
+ decoder->tnt.payload <<= 1;
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->ip += intel_pt_insn.length;
+ return 0;
+ }
+ decoder->ip += intel_pt_insn.length;
+ if (!decoder->tnt.count)
+ return -EAGAIN;
+ decoder->tnt.payload <<= 1;
+ continue;
+ }
+
+ return intel_pt_bug(decoder);
+ }
+}
+
+static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
+{
+ unsigned int fup_tx_flags;
+ int err;
+
+ fup_tx_flags = decoder->packet.payload &
+ (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX);
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->fup_tx_flags = fup_tx_flags;
+ decoder->set_fup_tx_flags = true;
+ if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX))
+ *no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after MODE.TSX",
+ decoder->pos);
+ intel_pt_update_in_tx(decoder);
+ }
+ return 0;
+}
+
+static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp;
+
+ decoder->have_tma = false;
+
+ if (decoder->ref_timestamp) {
+ timestamp = decoder->packet.payload |
+ (decoder->ref_timestamp & (0xffULL << 56));
+ if (timestamp < decoder->ref_timestamp) {
+ if (decoder->ref_timestamp - timestamp > (1ULL << 55))
+ timestamp += (1ULL << 56);
+ } else {
+ if (timestamp - decoder->ref_timestamp > (1ULL << 55))
+ timestamp -= (1ULL << 56);
+ }
+ decoder->tsc_timestamp = timestamp;
+ decoder->timestamp = timestamp;
+ decoder->ref_timestamp = 0;
+ decoder->timestamp_insn_cnt = 0;
+ } else if (decoder->timestamp) {
+ timestamp = decoder->packet.payload |
+ (decoder->timestamp & (0xffULL << 56));
+ decoder->tsc_timestamp = timestamp;
+ if (timestamp < decoder->timestamp &&
+ decoder->timestamp - timestamp < decoder->tsc_slip) {
+ intel_pt_log_to("Suppressing backwards timestamp",
+ timestamp);
+ timestamp = decoder->timestamp;
+ }
+ while (timestamp < decoder->timestamp) {
+ intel_pt_log_to("Wraparound timestamp", timestamp);
+ timestamp += (1ULL << 56);
+ decoder->tsc_timestamp = timestamp;
+ }
+ decoder->timestamp = timestamp;
+ decoder->timestamp_insn_cnt = 0;
+ }
+
+ if (decoder->last_packet_type == INTEL_PT_CYC) {
+ decoder->cyc_ref_timestamp = decoder->timestamp;
+ decoder->cycle_cnt = 0;
+ decoder->have_calc_cyc_to_tsc = false;
+ intel_pt_calc_cyc_to_tsc(decoder, false);
+ }
+
+ intel_pt_log_to("Setting timestamp", decoder->timestamp);
+}
+
+static int intel_pt_overflow(struct intel_pt_decoder *decoder)
+{
+ intel_pt_log("ERROR: Buffer overflow\n");
+ intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
+ decoder->cbr = 0;
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ decoder->overflow = true;
+ return -EOVERFLOW;
+}
+
+static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
+{
+ uint32_t ctc = decoder->packet.payload;
+ uint32_t fc = decoder->packet.count;
+ uint32_t ctc_rem = ctc & decoder->ctc_rem_mask;
+
+ if (!decoder->tsc_ctc_ratio_d)
+ return;
+
+ decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
+ decoder->ctc_timestamp = decoder->tsc_timestamp - fc;
+ if (decoder->tsc_ctc_mult) {
+ decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
+ } else {
+ decoder->ctc_timestamp -= multdiv(ctc_rem,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+ decoder->ctc_delta = 0;
+ decoder->have_tma = true;
+ intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n",
+ decoder->ctc_timestamp, decoder->last_mtc, ctc_rem);
+}
+
+static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp;
+ uint32_t mtc, mtc_delta;
+
+ if (!decoder->have_tma)
+ return;
+
+ mtc = decoder->packet.payload;
+
+ if (mtc > decoder->last_mtc)
+ mtc_delta = mtc - decoder->last_mtc;
+ else
+ mtc_delta = mtc + 256 - decoder->last_mtc;
+
+ decoder->ctc_delta += mtc_delta << decoder->mtc_shift;
+
+ if (decoder->tsc_ctc_mult) {
+ timestamp = decoder->ctc_timestamp +
+ decoder->ctc_delta * decoder->tsc_ctc_mult;
+ } else {
+ timestamp = decoder->ctc_timestamp +
+ multdiv(decoder->ctc_delta,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+
+ if (timestamp < decoder->timestamp)
+ intel_pt_log("Suppressing MTC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
+ timestamp, decoder->timestamp);
+ else
+ decoder->timestamp = timestamp;
+
+ decoder->timestamp_insn_cnt = 0;
+ decoder->last_mtc = mtc;
+
+ if (decoder->last_packet_type == INTEL_PT_CYC) {
+ decoder->cyc_ref_timestamp = decoder->timestamp;
+ decoder->cycle_cnt = 0;
+ decoder->have_calc_cyc_to_tsc = false;
+ intel_pt_calc_cyc_to_tsc(decoder, true);
+ }
+}
+
+static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
+{
+ unsigned int cbr = decoder->packet.payload;
+
+ if (decoder->cbr == cbr)
+ return;
+
+ decoder->cbr = cbr;
+ decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+}
+
+static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp = decoder->cyc_ref_timestamp;
+
+ decoder->have_cyc = true;
+
+ decoder->cycle_cnt += decoder->packet.payload;
+
+ if (!decoder->cyc_ref_timestamp)
+ return;
+
+ if (decoder->have_calc_cyc_to_tsc)
+ timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc;
+ else if (decoder->cbr)
+ timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc;
+ else
+ return;
+
+ if (timestamp < decoder->timestamp)
+ intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
+ timestamp, decoder->timestamp);
+ else
+ decoder->timestamp = timestamp;
+}
+
+/* Walk PSB+ packets when already in sync. */
+static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_PSBEND:
+ return 0;
+
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ case INTEL_PT_TNT:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_BAD:
+ case INTEL_PT_PSB:
+ decoder->have_tma = false;
+ intel_pt_log("ERROR: Unexpected packet\n");
+ return -EAGAIN;
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_TSC:
+ intel_pt_calc_tsc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ decoder->exec_mode = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_PIP:
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ break;
+
+ case INTEL_PT_FUP:
+ decoder->pge = true;
+ intel_pt_set_last_ip(decoder);
+ break;
+
+ case INTEL_PT_MODE_TSX:
+ intel_pt_update_in_tx(decoder);
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ if (decoder->period_type == INTEL_PT_PERIOD_MTC)
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+ break;
+
+ case INTEL_PT_CYC:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ default:
+ break;
+ }
+ }
+}
+
+static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ if (decoder->tx_flags & INTEL_PT_ABORT_TX) {
+ decoder->tx_flags = 0;
+ decoder->state.flags &= ~INTEL_PT_IN_TX;
+ decoder->state.flags |= INTEL_PT_ABORT_TX;
+ } else {
+ decoder->state.flags |= INTEL_PT_ASYNC;
+ }
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TNT:
+ case INTEL_PT_FUP:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
+ case INTEL_PT_CBR:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_BAD:
+ case INTEL_PT_PSBEND:
+ intel_pt_log("ERROR: Missing TIP after FUP\n");
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_TIP_PGD:
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ if (decoder->packet.count != 0) {
+ intel_pt_set_ip(decoder);
+ intel_pt_log("Omitting PGD ip " x64_fmt "\n",
+ decoder->ip);
+ }
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ return 0;
+
+ case INTEL_PT_TIP_PGE:
+ decoder->pge = true;
+ intel_pt_log("Omitting PGE ip " x64_fmt "\n",
+ decoder->ip);
+ decoder->state.from_ip = 0;
+ if (decoder->packet.count == 0) {
+ decoder->state.to_ip = 0;
+ } else {
+ intel_pt_set_ip(decoder);
+ decoder->state.to_ip = decoder->ip;
+ }
+ return 0;
+
+ case INTEL_PT_TIP:
+ decoder->state.from_ip = decoder->ip;
+ if (decoder->packet.count == 0) {
+ decoder->state.to_ip = 0;
+ } else {
+ intel_pt_set_ip(decoder);
+ decoder->state.to_ip = decoder->ip;
+ }
+ return 0;
+
+ case INTEL_PT_PIP:
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ if (decoder->period_type == INTEL_PT_PERIOD_MTC)
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ decoder->exec_mode = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ break;
+
+ default:
+ return intel_pt_bug(decoder);
+ }
+ }
+}
+
+static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
+{
+ bool no_tip = false;
+ int err;
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+next:
+ switch (decoder->packet.type) {
+ case INTEL_PT_TNT:
+ if (!decoder->packet.count)
+ break;
+ decoder->tnt = decoder->packet;
+ decoder->pkt_state = INTEL_PT_STATE_TNT;
+ err = intel_pt_walk_tnt(decoder);
+ if (err == -EAGAIN)
+ break;
+ return err;
+
+ case INTEL_PT_TIP_PGD:
+ if (decoder->packet.count != 0)
+ intel_pt_set_last_ip(decoder);
+ decoder->pkt_state = INTEL_PT_STATE_TIP_PGD;
+ return intel_pt_walk_tip(decoder);
+
+ case INTEL_PT_TIP_PGE: {
+ decoder->pge = true;
+ if (decoder->packet.count == 0) {
+ intel_pt_log_at("Skipping zero TIP.PGE",
+ decoder->pos);
+ break;
+ }
+ intel_pt_set_ip(decoder);
+ decoder->state.from_ip = 0;
+ decoder->state.to_ip = decoder->ip;
+ return 0;
+ }
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_TIP:
+ if (decoder->packet.count != 0)
+ intel_pt_set_last_ip(decoder);
+ decoder->pkt_state = INTEL_PT_STATE_TIP;
+ return intel_pt_walk_tip(decoder);
+
+ case INTEL_PT_FUP:
+ if (decoder->packet.count == 0) {
+ intel_pt_log_at("Skipping zero FUP",
+ decoder->pos);
+ no_tip = false;
+ break;
+ }
+ intel_pt_set_last_ip(decoder);
+ err = intel_pt_walk_fup(decoder);
+ if (err != -EAGAIN) {
+ if (err)
+ return err;
+ if (no_tip)
+ decoder->pkt_state =
+ INTEL_PT_STATE_FUP_NO_TIP;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_FUP;
+ return 0;
+ }
+ if (no_tip) {
+ no_tip = false;
+ break;
+ }
+ return intel_pt_walk_fup_tip(decoder);
+
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
+ break;
+
+ case INTEL_PT_PSB:
+ intel_pt_clear_stack(&decoder->stack);
+ err = intel_pt_walk_psbend(decoder);
+ if (err == -EAGAIN)
+ goto next;
+ if (err)
+ return err;
+ break;
+
+ case INTEL_PT_PIP:
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ if (decoder->period_type != INTEL_PT_PERIOD_MTC)
+ break;
+ /*
+ * Ensure that there has been an instruction since the
+ * last MTC.
+ */
+ if (!decoder->mtc_insn)
+ break;
+ decoder->mtc_insn = false;
+ /* Ensure that there is a timestamp */
+ if (!decoder->timestamp)
+ break;
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->mtc_insn = false;
+ return 0;
+
+ case INTEL_PT_TSC:
+ intel_pt_calc_tsc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ decoder->exec_mode = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_MODE_TSX:
+ /* MODE_TSX need not be followed by FUP */
+ if (!decoder->pge) {
+ intel_pt_update_in_tx(decoder);
+ break;
+ }
+ err = intel_pt_mode_tsx(decoder, &no_tip);
+ if (err)
+ return err;
+ goto next;
+
+ case INTEL_PT_BAD: /* Does not happen */
+ return intel_pt_bug(decoder);
+
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ break;
+
+ default:
+ return intel_pt_bug(decoder);
+ }
+ }
+}
+
+/* Walk PSB+ packets to get in sync. */
+static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TIP_PGD:
+ decoder->continuous_period = false;
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ intel_pt_log("ERROR: Unexpected packet\n");
+ return -ENOENT;
+
+ case INTEL_PT_FUP:
+ decoder->pge = true;
+ if (decoder->last_ip || decoder->packet.count == 6 ||
+ decoder->packet.count == 0) {
+ uint64_t current_ip = decoder->ip;
+
+ intel_pt_set_ip(decoder);
+ if (current_ip)
+ intel_pt_log_to("Setting IP",
+ decoder->ip);
+ }
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TSC:
+ intel_pt_calc_tsc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_PIP:
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ decoder->exec_mode = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_MODE_TSX:
+ intel_pt_update_in_tx(decoder);
+ break;
+
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ intel_pt_clear_tx_flags(decoder);
+ case INTEL_PT_TNT:
+ decoder->have_tma = false;
+ intel_pt_log("ERROR: Unexpected packet\n");
+ if (decoder->ip)
+ decoder->pkt_state = INTEL_PT_STATE_ERR4;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+
+ case INTEL_PT_BAD: /* Does not happen */
+ return intel_pt_bug(decoder);
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_PSBEND:
+ return 0;
+
+ case INTEL_PT_PSB:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ default:
+ break;
+ }
+ }
+}
+
+static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TIP_PGD:
+ decoder->continuous_period = false;
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
+ if (decoder->last_ip || decoder->packet.count == 6 ||
+ decoder->packet.count == 0)
+ intel_pt_set_ip(decoder);
+ if (decoder->ip)
+ return 0;
+ break;
+
+ case INTEL_PT_FUP:
+ if (decoder->overflow) {
+ if (decoder->last_ip ||
+ decoder->packet.count == 6 ||
+ decoder->packet.count == 0)
+ intel_pt_set_ip(decoder);
+ if (decoder->ip)
+ return 0;
+ }
+ if (decoder->packet.count)
+ intel_pt_set_last_ip(decoder);
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TSC:
+ intel_pt_calc_tsc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_PIP:
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ decoder->exec_mode = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_MODE_TSX:
+ intel_pt_update_in_tx(decoder);
+ break;
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_BAD: /* Does not happen */
+ return intel_pt_bug(decoder);
+
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
+ break;
+
+ case INTEL_PT_PSB:
+ err = intel_pt_walk_psb(decoder);
+ if (err)
+ return err;
+ if (decoder->ip) {
+ /* Do not have a sample */
+ decoder->state.type = 0;
+ return 0;
+ }
+ break;
+
+ case INTEL_PT_TNT:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ default:
+ break;
+ }
+ }
+}
+
+static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ intel_pt_log("Scanning for full IP\n");
+ err = intel_pt_walk_to_ip(decoder);
+ if (err)
+ return err;
+
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->overflow = false;
+
+ decoder->state.from_ip = 0;
+ decoder->state.to_ip = decoder->ip;
+ intel_pt_log_to("Setting IP", decoder->ip);
+
+ return 0;
+}
+
+static int intel_pt_part_psb(struct intel_pt_decoder *decoder)
+{
+ const unsigned char *end = decoder->buf + decoder->len;
+ size_t i;
+
+ for (i = INTEL_PT_PSB_LEN - 1; i; i--) {
+ if (i > decoder->len)
+ continue;
+ if (!memcmp(end - i, INTEL_PT_PSB_STR, i))
+ return i;
+ }
+ return 0;
+}
+
+static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb)
+{
+ size_t rest_psb = INTEL_PT_PSB_LEN - part_psb;
+ const char *psb = INTEL_PT_PSB_STR;
+
+ if (rest_psb > decoder->len ||
+ memcmp(decoder->buf, psb + part_psb, rest_psb))
+ return 0;
+
+ return rest_psb;
+}
+
+static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
+ int part_psb)
+{
+ int rest_psb, ret;
+
+ decoder->pos += decoder->len;
+ decoder->len = 0;
+
+ ret = intel_pt_get_next_data(decoder);
+ if (ret)
+ return ret;
+
+ rest_psb = intel_pt_rest_psb(decoder, part_psb);
+ if (!rest_psb)
+ return 0;
+
+ decoder->pos -= part_psb;
+ decoder->next_buf = decoder->buf + rest_psb;
+ decoder->next_len = decoder->len - rest_psb;
+ memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+ decoder->buf = decoder->temp_buf;
+ decoder->len = INTEL_PT_PSB_LEN;
+
+ return 0;
+}
+
+static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
+{
+ unsigned char *next;
+ int ret;
+
+ intel_pt_log("Scanning for PSB\n");
+ while (1) {
+ if (!decoder->len) {
+ ret = intel_pt_get_next_data(decoder);
+ if (ret)
+ return ret;
+ }
+
+ next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR,
+ INTEL_PT_PSB_LEN);
+ if (!next) {
+ int part_psb;
+
+ part_psb = intel_pt_part_psb(decoder);
+ if (part_psb) {
+ ret = intel_pt_get_split_psb(decoder, part_psb);
+ if (ret)
+ return ret;
+ } else {
+ decoder->pos += decoder->len;
+ decoder->len = 0;
+ }
+ continue;
+ }
+
+ decoder->pkt_step = next - decoder->buf;
+ return intel_pt_get_next_packet(decoder);
+ }
+}
+
+static int intel_pt_sync(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->last_ip = 0;
+ decoder->ip = 0;
+ intel_pt_clear_stack(&decoder->stack);
+
+ err = intel_pt_scan_for_psb(decoder);
+ if (err)
+ return err;
+
+ decoder->pkt_state = INTEL_PT_STATE_NO_IP;
+
+ err = intel_pt_walk_psb(decoder);
+ if (err)
+ return err;
+
+ if (decoder->ip) {
+ decoder->state.type = 0; /* Do not have a sample */
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ } else {
+ return intel_pt_sync_ip(decoder);
+ }
+
+ return 0;
+}
+
+static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t est = decoder->timestamp_insn_cnt << 1;
+
+ if (!decoder->cbr || !decoder->max_non_turbo_ratio)
+ goto out;
+
+ est *= decoder->max_non_turbo_ratio;
+ est /= decoder->cbr;
+out:
+ return decoder->timestamp + est;
+}
+
+const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ do {
+ decoder->state.type = INTEL_PT_BRANCH;
+ decoder->state.flags = 0;
+
+ switch (decoder->pkt_state) {
+ case INTEL_PT_STATE_NO_PSB:
+ err = intel_pt_sync(decoder);
+ break;
+ case INTEL_PT_STATE_NO_IP:
+ decoder->last_ip = 0;
+ /* Fall through */
+ case INTEL_PT_STATE_ERR_RESYNC:
+ err = intel_pt_sync_ip(decoder);
+ break;
+ case INTEL_PT_STATE_IN_SYNC:
+ err = intel_pt_walk_trace(decoder);
+ break;
+ case INTEL_PT_STATE_TNT:
+ err = intel_pt_walk_tnt(decoder);
+ if (err == -EAGAIN)
+ err = intel_pt_walk_trace(decoder);
+ break;
+ case INTEL_PT_STATE_TIP:
+ case INTEL_PT_STATE_TIP_PGD:
+ err = intel_pt_walk_tip(decoder);
+ break;
+ case INTEL_PT_STATE_FUP:
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ err = intel_pt_walk_fup(decoder);
+ if (err == -EAGAIN)
+ err = intel_pt_walk_fup_tip(decoder);
+ else if (!err)
+ decoder->pkt_state = INTEL_PT_STATE_FUP;
+ break;
+ case INTEL_PT_STATE_FUP_NO_TIP:
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ err = intel_pt_walk_fup(decoder);
+ if (err == -EAGAIN)
+ err = intel_pt_walk_trace(decoder);
+ break;
+ default:
+ err = intel_pt_bug(decoder);
+ break;
+ }
+ } while (err == -ENOLINK);
+
+ decoder->state.err = err ? intel_pt_ext_err(err) : 0;
+ decoder->state.timestamp = decoder->timestamp;
+ decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
+ decoder->state.cr3 = decoder->cr3;
+ decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
+
+ if (err)
+ decoder->state.from_ip = decoder->ip;
+
+ return &decoder->state;
+}
+
+static bool intel_pt_at_psb(unsigned char *buf, size_t len)
+{
+ if (len < INTEL_PT_PSB_LEN)
+ return false;
+ return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR,
+ INTEL_PT_PSB_LEN);
+}
+
+/**
+ * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
+ * @buf: pointer to buffer pointer
+ * @len: size of buffer
+ *
+ * Updates the buffer pointer to point to the start of the next PSB packet if
+ * there is one, otherwise the buffer pointer is unchanged. If @buf is updated,
+ * @len is adjusted accordingly.
+ *
+ * Return: %true if a PSB packet is found, %false otherwise.
+ */
+static bool intel_pt_next_psb(unsigned char **buf, size_t *len)
+{
+ unsigned char *next;
+
+ next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+ if (next) {
+ *len -= next - *buf;
+ *buf = next;
+ return true;
+ }
+ return false;
+}
+
+/**
+ * intel_pt_step_psb - move buffer pointer to the start of the following PSB
+ * packet.
+ * @buf: pointer to buffer pointer
+ * @len: size of buffer
+ *
+ * Updates the buffer pointer to point to the start of the following PSB packet
+ * (skipping the PSB at @buf itself) if there is one, otherwise the buffer
+ * pointer is unchanged. If @buf is updated, @len is adjusted accordingly.
+ *
+ * Return: %true if a PSB packet is found, %false otherwise.
+ */
+static bool intel_pt_step_psb(unsigned char **buf, size_t *len)
+{
+ unsigned char *next;
+
+ if (!*len)
+ return false;
+
+ next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+ if (next) {
+ *len -= next - *buf;
+ *buf = next;
+ return true;
+ }
+ return false;
+}
+
+/**
+ * intel_pt_last_psb - find the last PSB packet in a buffer.
+ * @buf: buffer
+ * @len: size of buffer
+ *
+ * This function finds the last PSB in a buffer.
+ *
+ * Return: A pointer to the last PSB in @buf if found, %NULL otherwise.
+ */
+static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
+{
+ const char *n = INTEL_PT_PSB_STR;
+ unsigned char *p;
+ size_t k;
+
+ if (len < INTEL_PT_PSB_LEN)
+ return NULL;
+
+ k = len - INTEL_PT_PSB_LEN + 1;
+ while (1) {
+ p = memrchr(buf, n[0], k);
+ if (!p)
+ return NULL;
+ if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1))
+ return p;
+ k = p - buf;
+ if (!k)
+ return NULL;
+ }
+}
+
+/**
+ * intel_pt_next_tsc - find and return next TSC.
+ * @buf: buffer
+ * @len: size of buffer
+ * @tsc: TSC value returned
+ *
+ * Find a TSC packet in @buf and return the TSC value. This function assumes
+ * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
+ * PSBEND packet is found.
+ *
+ * Return: %true if TSC is found, false otherwise.
+ */
+static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc)
+{
+ struct intel_pt_pkt packet;
+ int ret;
+
+ while (len) {
+ ret = intel_pt_get_packet(buf, len, &packet);
+ if (ret <= 0)
+ return false;
+ if (packet.type == INTEL_PT_TSC) {
+ *tsc = packet.payload;
+ return true;
+ }
+ if (packet.type == INTEL_PT_PSBEND)
+ return false;
+ buf += ret;
+ len -= ret;
+ }
+ return false;
+}
+
+/**
+ * intel_pt_tsc_cmp - compare 7-byte TSCs.
+ * @tsc1: first TSC to compare
+ * @tsc2: second TSC to compare
+ *
+ * This function compares 7-byte TSC values allowing for the possibility that
+ * TSC wrapped around. Generally it is not possible to know if TSC has wrapped
+ * around so for that purpose this function assumes the absolute difference is
+ * less than half the maximum difference.
+ *
+ * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is
+ * after @tsc2.
+ */
+static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
+{
+ const uint64_t halfway = (1ULL << 55);
+
+ if (tsc1 == tsc2)
+ return 0;
+
+ if (tsc1 < tsc2) {
+ if (tsc2 - tsc1 < halfway)
+ return -1;
+ else
+ return 1;
+ } else {
+ if (tsc1 - tsc2 < halfway)
+ return 1;
+ else
+ return -1;
+ }
+}
+
+/**
+ * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data
+ * using TSC.
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ * @buf_b: second buffer
+ * @len_b: size of second buffer
+ *
+ * If the trace contains TSC we can look at the last TSC of @buf_a and the
+ * first TSC of @buf_b in order to determine if the buffers overlap, and then
+ * walk forward in @buf_b until a later TSC is found. A precondition is that
+ * @buf_a and @buf_b are positioned at a PSB.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts, or
+ * @buf_b + @len_b if there is no non-overlapped data.
+ */
+static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
+ size_t len_a,
+ unsigned char *buf_b,
+ size_t len_b)
+{
+ uint64_t tsc_a, tsc_b;
+ unsigned char *p;
+ size_t len;
+
+ p = intel_pt_last_psb(buf_a, len_a);
+ if (!p)
+ return buf_b; /* No PSB in buf_a => no overlap */
+
+ len = len_a - (p - buf_a);
+ if (!intel_pt_next_tsc(p, len, &tsc_a)) {
+ /* The last PSB+ in buf_a is incomplete, so go back one more */
+ len_a -= len;
+ p = intel_pt_last_psb(buf_a, len_a);
+ if (!p)
+ return buf_b; /* No full PSB+ => assume no overlap */
+ len = len_a - (p - buf_a);
+ if (!intel_pt_next_tsc(p, len, &tsc_a))
+ return buf_b; /* No TSC in buf_a => assume no overlap */
+ }
+
+ while (1) {
+ /* Ignore PSB+ with no TSC */
+ if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) &&
+ intel_pt_tsc_cmp(tsc_a, tsc_b) < 0)
+ return buf_b; /* tsc_a < tsc_b => no overlap */
+
+ if (!intel_pt_step_psb(&buf_b, &len_b))
+ return buf_b + len_b; /* No PSB in buf_b => no data */
+ }
+}
+
+/**
+ * intel_pt_find_overlap - determine start of non-overlapped trace data.
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ * @buf_b: second buffer
+ * @len_b: size of second buffer
+ * @have_tsc: can use TSC packets to detect overlap
+ *
+ * When trace samples or snapshots are recorded there is the possibility that
+ * the data overlaps. Note that, for the purposes of decoding, data is only
+ * useful if it begins with a PSB packet.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts, or
+ * @buf_b + @len_b if there is no non-overlapped data.
+ */
+unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
+ unsigned char *buf_b, size_t len_b,
+ bool have_tsc)
+{
+ unsigned char *found;
+
+ /* Buffer 'b' must start at PSB so throw away everything before that */
+ if (!intel_pt_next_psb(&buf_b, &len_b))
+ return buf_b + len_b; /* No PSB */
+
+ if (!intel_pt_next_psb(&buf_a, &len_a))
+ return buf_b; /* No overlap */
+
+ if (have_tsc) {
+ found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b);
+ if (found)
+ return found;
+ }
+
+ /*
+ * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes,
+ * we can ignore the first part of buffer 'a'.
+ */
+ while (len_b < len_a) {
+ if (!intel_pt_step_psb(&buf_a, &len_a))
+ return buf_b; /* No overlap */
+ }
+
+ /* Now len_b >= len_a */
+ if (len_b > len_a) {
+ /* The leftover buffer 'b' must start at a PSB */
+ while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
+ if (!intel_pt_step_psb(&buf_a, &len_a))
+ return buf_b; /* No overlap */
+ }
+ }
+
+ while (1) {
+ /* Potential overlap so check the bytes */
+ found = memmem(buf_a, len_a, buf_b, len_a);
+ if (found)
+ return buf_b + len_a;
+
+ /* Try again at next PSB in buffer 'a' */
+ if (!intel_pt_step_psb(&buf_a, &len_a))
+ return buf_b; /* No overlap */
+
+ /* The leftover buffer 'b' must start at a PSB */
+ while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
+ if (!intel_pt_step_psb(&buf_a, &len_a))
+ return buf_b; /* No overlap */
+ }
+ }
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
new file mode 100644
index 0000000..02c38fe
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -0,0 +1,109 @@
+/*
+ * intel_pt_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__INTEL_PT_DECODER_H__
+#define INCLUDE__INTEL_PT_DECODER_H__
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#include "intel-pt-insn-decoder.h"
+
+#define INTEL_PT_IN_TX (1 << 0)
+#define INTEL_PT_ABORT_TX (1 << 1)
+#define INTEL_PT_ASYNC (1 << 2)
+
+enum intel_pt_sample_type {
+ INTEL_PT_BRANCH = 1 << 0,
+ INTEL_PT_INSTRUCTION = 1 << 1,
+ INTEL_PT_TRANSACTION = 1 << 2,
+};
+
+enum intel_pt_period_type {
+ INTEL_PT_PERIOD_NONE,
+ INTEL_PT_PERIOD_INSTRUCTIONS,
+ INTEL_PT_PERIOD_TICKS,
+ INTEL_PT_PERIOD_MTC,
+};
+
+enum {
+ INTEL_PT_ERR_NOMEM = 1,
+ INTEL_PT_ERR_INTERN,
+ INTEL_PT_ERR_BADPKT,
+ INTEL_PT_ERR_NODATA,
+ INTEL_PT_ERR_NOINSN,
+ INTEL_PT_ERR_MISMAT,
+ INTEL_PT_ERR_OVR,
+ INTEL_PT_ERR_LOST,
+ INTEL_PT_ERR_UNK,
+ INTEL_PT_ERR_NELOOP,
+ INTEL_PT_ERR_MAX,
+};
+
+struct intel_pt_state {
+ enum intel_pt_sample_type type;
+ int err;
+ uint64_t from_ip;
+ uint64_t to_ip;
+ uint64_t cr3;
+ uint64_t tot_insn_cnt;
+ uint64_t timestamp;
+ uint64_t est_timestamp;
+ uint64_t trace_nr;
+ uint32_t flags;
+ enum intel_pt_insn_op insn_op;
+ int insn_len;
+};
+
+struct intel_pt_insn;
+
+struct intel_pt_buffer {
+ const unsigned char *buf;
+ size_t len;
+ bool consecutive;
+ uint64_t ref_timestamp;
+ uint64_t trace_nr;
+};
+
+struct intel_pt_params {
+ int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
+ int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
+ uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
+ uint64_t max_insn_cnt, void *data);
+ void *data;
+ bool return_compression;
+ uint64_t period;
+ enum intel_pt_period_type period_type;
+ unsigned max_non_turbo_ratio;
+ unsigned int mtc_period;
+ uint32_t tsc_ctc_ratio_n;
+ uint32_t tsc_ctc_ratio_d;
+};
+
+struct intel_pt_decoder;
+
+struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params);
+void intel_pt_decoder_free(struct intel_pt_decoder *decoder);
+
+const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
+
+unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
+ unsigned char *buf_b, size_t len_b,
+ bool have_tsc);
+
+int intel_pt__strerror(int code, char *buf, size_t buflen);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
new file mode 100644
index 0000000..d23138c
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -0,0 +1,249 @@
+/*
+ * intel_pt_insn_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+
+#include "event.h"
+
+#include "insn.h"
+
+#include "inat.c"
+#include "insn.c"
+
+#include "intel-pt-insn-decoder.h"
+
+/* Based on branch_type() from perf_event_intel_lbr.c */
+static void intel_pt_insn_decoder(struct insn *insn,
+ struct intel_pt_insn *intel_pt_insn)
+{
+ enum intel_pt_insn_op op = INTEL_PT_OP_OTHER;
+ enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH;
+ int ext;
+
+ if (insn_is_avx(insn)) {
+ intel_pt_insn->op = INTEL_PT_OP_OTHER;
+ intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH;
+ intel_pt_insn->length = insn->length;
+ return;
+ }
+
+ switch (insn->opcode.bytes[0]) {
+ case 0xf:
+ switch (insn->opcode.bytes[1]) {
+ case 0x05: /* syscall */
+ case 0x34: /* sysenter */
+ op = INTEL_PT_OP_SYSCALL;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0x07: /* sysret */
+ case 0x35: /* sysexit */
+ op = INTEL_PT_OP_SYSRET;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0x80 ... 0x8f: /* jcc */
+ op = INTEL_PT_OP_JCC;
+ branch = INTEL_PT_BR_CONDITIONAL;
+ break;
+ default:
+ break;
+ }
+ break;
+ case 0x70 ... 0x7f: /* jcc */
+ op = INTEL_PT_OP_JCC;
+ branch = INTEL_PT_BR_CONDITIONAL;
+ break;
+ case 0xc2: /* near ret */
+ case 0xc3: /* near ret */
+ case 0xca: /* far ret */
+ case 0xcb: /* far ret */
+ op = INTEL_PT_OP_RET;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xcf: /* iret */
+ op = INTEL_PT_OP_IRET;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xcc ... 0xce: /* int */
+ op = INTEL_PT_OP_INT;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xe8: /* call near rel */
+ op = INTEL_PT_OP_CALL;
+ branch = INTEL_PT_BR_UNCONDITIONAL;
+ break;
+ case 0x9a: /* call far absolute */
+ op = INTEL_PT_OP_CALL;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xe0 ... 0xe2: /* loop */
+ op = INTEL_PT_OP_LOOP;
+ branch = INTEL_PT_BR_CONDITIONAL;
+ break;
+ case 0xe3: /* jcc */
+ op = INTEL_PT_OP_JCC;
+ branch = INTEL_PT_BR_CONDITIONAL;
+ break;
+ case 0xe9: /* jmp */
+ case 0xeb: /* jmp */
+ op = INTEL_PT_OP_JMP;
+ branch = INTEL_PT_BR_UNCONDITIONAL;
+ break;
+ case 0xea: /* far jmp */
+ op = INTEL_PT_OP_JMP;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xff: /* call near absolute, call far absolute ind */
+ ext = (insn->modrm.bytes[0] >> 3) & 0x7;
+ switch (ext) {
+ case 2: /* near ind call */
+ case 3: /* far ind call */
+ op = INTEL_PT_OP_CALL;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 4:
+ case 5:
+ op = INTEL_PT_OP_JMP;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ intel_pt_insn->op = op;
+ intel_pt_insn->branch = branch;
+ intel_pt_insn->length = insn->length;
+
+ if (branch == INTEL_PT_BR_CONDITIONAL ||
+ branch == INTEL_PT_BR_UNCONDITIONAL) {
+#if __BYTE_ORDER == __BIG_ENDIAN
+ switch (insn->immediate.nbytes) {
+ case 1:
+ intel_pt_insn->rel = insn->immediate.value;
+ break;
+ case 2:
+ intel_pt_insn->rel =
+ bswap_16((short)insn->immediate.value);
+ break;
+ case 4:
+ intel_pt_insn->rel = bswap_32(insn->immediate.value);
+ break;
+ default:
+ intel_pt_insn->rel = 0;
+ break;
+ }
+#else
+ intel_pt_insn->rel = insn->immediate.value;
+#endif
+ }
+}
+
+int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
+ struct intel_pt_insn *intel_pt_insn)
+{
+ struct insn insn;
+
+ insn_init(&insn, buf, len, x86_64);
+ insn_get_length(&insn);
+ if (!insn_complete(&insn) || insn.length > len)
+ return -1;
+ intel_pt_insn_decoder(&insn, intel_pt_insn);
+ if (insn.length < INTEL_PT_INSN_DBG_BUF_SZ)
+ memcpy(intel_pt_insn->buf, buf, insn.length);
+ else
+ memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_DBG_BUF_SZ);
+ return 0;
+}
+
+const char *branch_name[] = {
+ [INTEL_PT_OP_OTHER] = "Other",
+ [INTEL_PT_OP_CALL] = "Call",
+ [INTEL_PT_OP_RET] = "Ret",
+ [INTEL_PT_OP_JCC] = "Jcc",
+ [INTEL_PT_OP_JMP] = "Jmp",
+ [INTEL_PT_OP_LOOP] = "Loop",
+ [INTEL_PT_OP_IRET] = "IRet",
+ [INTEL_PT_OP_INT] = "Int",
+ [INTEL_PT_OP_SYSCALL] = "Syscall",
+ [INTEL_PT_OP_SYSRET] = "Sysret",
+};
+
+const char *intel_pt_insn_name(enum intel_pt_insn_op op)
+{
+ return branch_name[op];
+}
+
+int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
+ size_t buf_len)
+{
+ switch (intel_pt_insn->branch) {
+ case INTEL_PT_BR_CONDITIONAL:
+ case INTEL_PT_BR_UNCONDITIONAL:
+ return snprintf(buf, buf_len, "%s %s%d",
+ intel_pt_insn_name(intel_pt_insn->op),
+ intel_pt_insn->rel > 0 ? "+" : "",
+ intel_pt_insn->rel);
+ case INTEL_PT_BR_NO_BRANCH:
+ case INTEL_PT_BR_INDIRECT:
+ return snprintf(buf, buf_len, "%s",
+ intel_pt_insn_name(intel_pt_insn->op));
+ default:
+ break;
+ }
+ return 0;
+}
+
+size_t intel_pt_insn_max_size(void)
+{
+ return MAX_INSN_SIZE;
+}
+
+int intel_pt_insn_type(enum intel_pt_insn_op op)
+{
+ switch (op) {
+ case INTEL_PT_OP_OTHER:
+ return 0;
+ case INTEL_PT_OP_CALL:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL;
+ case INTEL_PT_OP_RET:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN;
+ case INTEL_PT_OP_JCC:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
+ case INTEL_PT_OP_JMP:
+ return PERF_IP_FLAG_BRANCH;
+ case INTEL_PT_OP_LOOP:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
+ case INTEL_PT_OP_IRET:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_INTERRUPT;
+ case INTEL_PT_OP_INT:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_INTERRUPT;
+ case INTEL_PT_OP_SYSCALL:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_SYSCALLRET;
+ case INTEL_PT_OP_SYSRET:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_SYSCALLRET;
+ default:
+ return 0;
+ }
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
new file mode 100644
index 0000000..b0adbf3
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
@@ -0,0 +1,65 @@
+/*
+ * intel_pt_insn_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__INTEL_PT_INSN_DECODER_H__
+#define INCLUDE__INTEL_PT_INSN_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define INTEL_PT_INSN_DESC_MAX 32
+#define INTEL_PT_INSN_DBG_BUF_SZ 16
+
+enum intel_pt_insn_op {
+ INTEL_PT_OP_OTHER,
+ INTEL_PT_OP_CALL,
+ INTEL_PT_OP_RET,
+ INTEL_PT_OP_JCC,
+ INTEL_PT_OP_JMP,
+ INTEL_PT_OP_LOOP,
+ INTEL_PT_OP_IRET,
+ INTEL_PT_OP_INT,
+ INTEL_PT_OP_SYSCALL,
+ INTEL_PT_OP_SYSRET,
+};
+
+enum intel_pt_insn_branch {
+ INTEL_PT_BR_NO_BRANCH,
+ INTEL_PT_BR_INDIRECT,
+ INTEL_PT_BR_CONDITIONAL,
+ INTEL_PT_BR_UNCONDITIONAL,
+};
+
+struct intel_pt_insn {
+ enum intel_pt_insn_op op;
+ enum intel_pt_insn_branch branch;
+ int length;
+ int32_t rel;
+ unsigned char buf[INTEL_PT_INSN_DBG_BUF_SZ];
+};
+
+int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
+ struct intel_pt_insn *intel_pt_insn);
+
+const char *intel_pt_insn_name(enum intel_pt_insn_op op);
+
+int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
+ size_t buf_len);
+
+size_t intel_pt_insn_max_size(void);
+
+int intel_pt_insn_type(enum intel_pt_insn_op op);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
new file mode 100644
index 0000000..d09c7d9
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
@@ -0,0 +1,155 @@
+/*
+ * intel_pt_log.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "intel-pt-log.h"
+#include "intel-pt-insn-decoder.h"
+
+#include "intel-pt-pkt-decoder.h"
+
+#define MAX_LOG_NAME 256
+
+static FILE *f;
+static char log_name[MAX_LOG_NAME];
+static bool enable_logging;
+
+void intel_pt_log_enable(void)
+{
+ enable_logging = true;
+}
+
+void intel_pt_log_disable(void)
+{
+ if (f)
+ fflush(f);
+ enable_logging = false;
+}
+
+void intel_pt_log_set_name(const char *name)
+{
+ strncpy(log_name, name, MAX_LOG_NAME - 5);
+ strcat(log_name, ".log");
+}
+
+static void intel_pt_print_data(const unsigned char *buf, int len, uint64_t pos,
+ int indent)
+{
+ int i;
+
+ for (i = 0; i < indent; i++)
+ fprintf(f, " ");
+
+ fprintf(f, " %08" PRIx64 ": ", pos);
+ for (i = 0; i < len; i++)
+ fprintf(f, " %02x", buf[i]);
+ for (; i < 16; i++)
+ fprintf(f, " ");
+ fprintf(f, " ");
+}
+
+static void intel_pt_print_no_data(uint64_t pos, int indent)
+{
+ int i;
+
+ for (i = 0; i < indent; i++)
+ fprintf(f, " ");
+
+ fprintf(f, " %08" PRIx64 ": ", pos);
+ for (i = 0; i < 16; i++)
+ fprintf(f, " ");
+ fprintf(f, " ");
+}
+
+static int intel_pt_log_open(void)
+{
+ if (!enable_logging)
+ return -1;
+
+ if (f)
+ return 0;
+
+ if (!log_name[0])
+ return -1;
+
+ f = fopen(log_name, "w+");
+ if (!f) {
+ enable_logging = false;
+ return -1;
+ }
+
+ return 0;
+}
+
+void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
+ uint64_t pos, const unsigned char *buf)
+{
+ char desc[INTEL_PT_PKT_DESC_MAX];
+
+ if (intel_pt_log_open())
+ return;
+
+ intel_pt_print_data(buf, pkt_len, pos, 0);
+ intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX);
+ fprintf(f, "%s\n", desc);
+}
+
+void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
+{
+ char desc[INTEL_PT_INSN_DESC_MAX];
+ size_t len = intel_pt_insn->length;
+
+ if (intel_pt_log_open())
+ return;
+
+ if (len > INTEL_PT_INSN_DBG_BUF_SZ)
+ len = INTEL_PT_INSN_DBG_BUF_SZ;
+ intel_pt_print_data(intel_pt_insn->buf, len, ip, 8);
+ if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
+ fprintf(f, "%s\n", desc);
+ else
+ fprintf(f, "Bad instruction!\n");
+}
+
+void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
+{
+ char desc[INTEL_PT_INSN_DESC_MAX];
+
+ if (intel_pt_log_open())
+ return;
+
+ intel_pt_print_no_data(ip, 8);
+ if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
+ fprintf(f, "%s\n", desc);
+ else
+ fprintf(f, "Bad instruction!\n");
+}
+
+void intel_pt_log(const char *fmt, ...)
+{
+ va_list args;
+
+ if (intel_pt_log_open())
+ return;
+
+ va_start(args, fmt);
+ vfprintf(f, fmt, args);
+ va_end(args);
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
new file mode 100644
index 0000000..db3942f
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
@@ -0,0 +1,52 @@
+/*
+ * intel_pt_log.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__INTEL_PT_LOG_H__
+#define INCLUDE__INTEL_PT_LOG_H__
+
+#include <stdint.h>
+#include <inttypes.h>
+
+struct intel_pt_pkt;
+
+void intel_pt_log_enable(void);
+void intel_pt_log_disable(void);
+void intel_pt_log_set_name(const char *name);
+
+void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
+ uint64_t pos, const unsigned char *buf);
+
+struct intel_pt_insn;
+
+void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip);
+void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
+ uint64_t ip);
+
+__attribute__((format(printf, 1, 2)))
+void intel_pt_log(const char *fmt, ...);
+
+#define x64_fmt "0x%" PRIx64
+
+static inline void intel_pt_log_at(const char *msg, uint64_t u)
+{
+ intel_pt_log("%s at " x64_fmt "\n", msg, u);
+}
+
+static inline void intel_pt_log_to(const char *msg, uint64_t u)
+{
+ intel_pt_log("%s to " x64_fmt "\n", msg, u);
+}
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
new file mode 100644
index 0000000..b1257c8
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -0,0 +1,518 @@
+/*
+ * intel_pt_pkt_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+
+#include "intel-pt-pkt-decoder.h"
+
+#define BIT(n) (1 << (n))
+
+#define BIT63 ((uint64_t)1 << 63)
+
+#define NR_FLAG BIT63
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define le16_to_cpu bswap_16
+#define le32_to_cpu bswap_32
+#define le64_to_cpu bswap_64
+#define memcpy_le64(d, s, n) do { \
+ memcpy((d), (s), (n)); \
+ *(d) = le64_to_cpu(*(d)); \
+} while (0)
+#else
+#define le16_to_cpu
+#define le32_to_cpu
+#define le64_to_cpu
+#define memcpy_le64 memcpy
+#endif
+
+static const char * const packet_name[] = {
+ [INTEL_PT_BAD] = "Bad Packet!",
+ [INTEL_PT_PAD] = "PAD",
+ [INTEL_PT_TNT] = "TNT",
+ [INTEL_PT_TIP_PGD] = "TIP.PGD",
+ [INTEL_PT_TIP_PGE] = "TIP.PGE",
+ [INTEL_PT_TSC] = "TSC",
+ [INTEL_PT_TMA] = "TMA",
+ [INTEL_PT_MODE_EXEC] = "MODE.Exec",
+ [INTEL_PT_MODE_TSX] = "MODE.TSX",
+ [INTEL_PT_MTC] = "MTC",
+ [INTEL_PT_TIP] = "TIP",
+ [INTEL_PT_FUP] = "FUP",
+ [INTEL_PT_CYC] = "CYC",
+ [INTEL_PT_VMCS] = "VMCS",
+ [INTEL_PT_PSB] = "PSB",
+ [INTEL_PT_PSBEND] = "PSBEND",
+ [INTEL_PT_CBR] = "CBR",
+ [INTEL_PT_TRACESTOP] = "TraceSTOP",
+ [INTEL_PT_PIP] = "PIP",
+ [INTEL_PT_OVF] = "OVF",
+ [INTEL_PT_MNT] = "MNT",
+};
+
+const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
+{
+ return packet_name[type];
+}
+
+static int intel_pt_get_long_tnt(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ uint64_t payload;
+ int count;
+
+ if (len < 8)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ payload = le64_to_cpu(*(uint64_t *)buf);
+
+ for (count = 47; count; count--) {
+ if (payload & BIT63)
+ break;
+ payload <<= 1;
+ }
+
+ packet->type = INTEL_PT_TNT;
+ packet->count = count;
+ packet->payload = payload << 1;
+ return 8;
+}
+
+static int intel_pt_get_pip(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ uint64_t payload = 0;
+
+ if (len < 8)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ packet->type = INTEL_PT_PIP;
+ memcpy_le64(&payload, buf + 2, 6);
+ packet->payload = payload >> 1;
+ if (payload & 1)
+ packet->payload |= NR_FLAG;
+
+ return 8;
+}
+
+static int intel_pt_get_tracestop(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_TRACESTOP;
+ return 2;
+}
+
+static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 4)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_CBR;
+ packet->payload = buf[2];
+ return 4;
+}
+
+static int intel_pt_get_vmcs(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ unsigned int count = (52 - 5) >> 3;
+
+ if (count < 1 || count > 7)
+ return INTEL_PT_BAD_PACKET;
+
+ if (len < count + 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ packet->type = INTEL_PT_VMCS;
+ packet->count = count;
+ memcpy_le64(&packet->payload, buf + 2, count);
+
+ return count + 2;
+}
+
+static int intel_pt_get_ovf(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_OVF;
+ return 2;
+}
+
+static int intel_pt_get_psb(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ int i;
+
+ if (len < 16)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ for (i = 2; i < 16; i += 2) {
+ if (buf[i] != 2 || buf[i + 1] != 0x82)
+ return INTEL_PT_BAD_PACKET;
+ }
+
+ packet->type = INTEL_PT_PSB;
+ return 16;
+}
+
+static int intel_pt_get_psbend(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_PSBEND;
+ return 2;
+}
+
+static int intel_pt_get_tma(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 7)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ packet->type = INTEL_PT_TMA;
+ packet->payload = buf[2] | (buf[3] << 8);
+ packet->count = buf[5] | ((buf[6] & BIT(0)) << 8);
+ return 7;
+}
+
+static int intel_pt_get_pad(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_PAD;
+ return 1;
+}
+
+static int intel_pt_get_mnt(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 11)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_MNT;
+ memcpy_le64(&packet->payload, buf + 3, 8);
+ return 11
+;
+}
+
+static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 3)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ switch (buf[2]) {
+ case 0x88: /* MNT */
+ return intel_pt_get_mnt(buf, len, packet);
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
+static int intel_pt_get_ext(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ switch (buf[1]) {
+ case 0xa3: /* Long TNT */
+ return intel_pt_get_long_tnt(buf, len, packet);
+ case 0x43: /* PIP */
+ return intel_pt_get_pip(buf, len, packet);
+ case 0x83: /* TraceStop */
+ return intel_pt_get_tracestop(packet);
+ case 0x03: /* CBR */
+ return intel_pt_get_cbr(buf, len, packet);
+ case 0xc8: /* VMCS */
+ return intel_pt_get_vmcs(buf, len, packet);
+ case 0xf3: /* OVF */
+ return intel_pt_get_ovf(packet);
+ case 0x82: /* PSB */
+ return intel_pt_get_psb(buf, len, packet);
+ case 0x23: /* PSBEND */
+ return intel_pt_get_psbend(packet);
+ case 0x73: /* TMA */
+ return intel_pt_get_tma(buf, len, packet);
+ case 0xC3: /* 3-byte header */
+ return intel_pt_get_3byte(buf, len, packet);
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
+static int intel_pt_get_short_tnt(unsigned int byte,
+ struct intel_pt_pkt *packet)
+{
+ int count;
+
+ for (count = 6; count; count--) {
+ if (byte & BIT(7))
+ break;
+ byte <<= 1;
+ }
+
+ packet->type = INTEL_PT_TNT;
+ packet->count = count;
+ packet->payload = (uint64_t)byte << 57;
+
+ return 1;
+}
+
+static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf,
+ size_t len, struct intel_pt_pkt *packet)
+{
+ unsigned int offs = 1, shift;
+ uint64_t payload = byte >> 3;
+
+ byte >>= 2;
+ len -= 1;
+ for (shift = 5; byte & 1; shift += 7) {
+ if (offs > 9)
+ return INTEL_PT_BAD_PACKET;
+ if (len < offs)
+ return INTEL_PT_NEED_MORE_BYTES;
+ byte = buf[offs++];
+ payload |= (byte >> 1) << shift;
+ }
+
+ packet->type = INTEL_PT_CYC;
+ packet->payload = payload;
+ return offs;
+}
+
+static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte,
+ const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ switch (byte >> 5) {
+ case 0:
+ packet->count = 0;
+ break;
+ case 1:
+ if (len < 3)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->count = 2;
+ packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
+ break;
+ case 2:
+ if (len < 5)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->count = 4;
+ packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1));
+ break;
+ case 3:
+ case 6:
+ if (len < 7)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->count = 6;
+ memcpy_le64(&packet->payload, buf + 1, 6);
+ break;
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+
+ packet->type = type;
+
+ return packet->count + 1;
+}
+
+static int intel_pt_get_mode(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ switch (buf[1] >> 5) {
+ case 0:
+ packet->type = INTEL_PT_MODE_EXEC;
+ switch (buf[1] & 3) {
+ case 0:
+ packet->payload = 16;
+ break;
+ case 1:
+ packet->payload = 64;
+ break;
+ case 2:
+ packet->payload = 32;
+ break;
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+ break;
+ case 1:
+ packet->type = INTEL_PT_MODE_TSX;
+ if ((buf[1] & 3) == 3)
+ return INTEL_PT_BAD_PACKET;
+ packet->payload = buf[1] & 3;
+ break;
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+
+ return 2;
+}
+
+static int intel_pt_get_tsc(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 8)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_TSC;
+ memcpy_le64(&packet->payload, buf + 1, 7);
+ return 8;
+}
+
+static int intel_pt_get_mtc(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_MTC;
+ packet->payload = buf[1];
+ return 2;
+}
+
+static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ unsigned int byte;
+
+ memset(packet, 0, sizeof(struct intel_pt_pkt));
+
+ if (!len)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ byte = buf[0];
+ if (!(byte & BIT(0))) {
+ if (byte == 0)
+ return intel_pt_get_pad(packet);
+ if (byte == 2)
+ return intel_pt_get_ext(buf, len, packet);
+ return intel_pt_get_short_tnt(byte, packet);
+ }
+
+ if ((byte & 2))
+ return intel_pt_get_cyc(byte, buf, len, packet);
+
+ switch (byte & 0x1f) {
+ case 0x0D:
+ return intel_pt_get_ip(INTEL_PT_TIP, byte, buf, len, packet);
+ case 0x11:
+ return intel_pt_get_ip(INTEL_PT_TIP_PGE, byte, buf, len,
+ packet);
+ case 0x01:
+ return intel_pt_get_ip(INTEL_PT_TIP_PGD, byte, buf, len,
+ packet);
+ case 0x1D:
+ return intel_pt_get_ip(INTEL_PT_FUP, byte, buf, len, packet);
+ case 0x19:
+ switch (byte) {
+ case 0x99:
+ return intel_pt_get_mode(buf, len, packet);
+ case 0x19:
+ return intel_pt_get_tsc(buf, len, packet);
+ case 0x59:
+ return intel_pt_get_mtc(buf, len, packet);
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
+int intel_pt_get_packet(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ int ret;
+
+ ret = intel_pt_do_get_packet(buf, len, packet);
+ if (ret > 0) {
+ while (ret < 8 && len > (size_t)ret && !buf[ret])
+ ret += 1;
+ }
+ return ret;
+}
+
+int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
+ size_t buf_len)
+{
+ int ret, i, nr;
+ unsigned long long payload = packet->payload;
+ const char *name = intel_pt_pkt_name(packet->type);
+
+ switch (packet->type) {
+ case INTEL_PT_BAD:
+ case INTEL_PT_PAD:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_OVF:
+ return snprintf(buf, buf_len, "%s", name);
+ case INTEL_PT_TNT: {
+ size_t blen = buf_len;
+
+ ret = snprintf(buf, blen, "%s ", name);
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ blen -= ret;
+ for (i = 0; i < packet->count; i++) {
+ if (payload & BIT63)
+ ret = snprintf(buf, blen, "T");
+ else
+ ret = snprintf(buf, blen, "N");
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ blen -= ret;
+ payload <<= 1;
+ }
+ ret = snprintf(buf, blen, " (%d)", packet->count);
+ if (ret < 0)
+ return ret;
+ blen -= ret;
+ return buf_len - blen;
+ }
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ case INTEL_PT_FUP:
+ if (!(packet->count))
+ return snprintf(buf, buf_len, "%s no ip", name);
+ case INTEL_PT_CYC:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MTC:
+ case INTEL_PT_MNT:
+ case INTEL_PT_CBR:
+ case INTEL_PT_TSC:
+ return snprintf(buf, buf_len, "%s 0x%llx", name, payload);
+ case INTEL_PT_TMA:
+ return snprintf(buf, buf_len, "%s CTC 0x%x FC 0x%x", name,
+ (unsigned)payload, packet->count);
+ case INTEL_PT_MODE_EXEC:
+ return snprintf(buf, buf_len, "%s %lld", name, payload);
+ case INTEL_PT_MODE_TSX:
+ return snprintf(buf, buf_len, "%s TXAbort:%u InTX:%u",
+ name, (unsigned)(payload >> 1) & 1,
+ (unsigned)payload & 1);
+ case INTEL_PT_PIP:
+ nr = packet->payload & NR_FLAG ? 1 : 0;
+ payload &= ~NR_FLAG;
+ ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)",
+ name, payload, nr);
+ return ret;
+ default:
+ break;
+ }
+ return snprintf(buf, buf_len, "%s 0x%llx (%d)",
+ name, payload, packet->count);
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
new file mode 100644
index 0000000..781bb79
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -0,0 +1,70 @@
+/*
+ * intel_pt_pkt_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__INTEL_PT_PKT_DECODER_H__
+#define INCLUDE__INTEL_PT_PKT_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define INTEL_PT_PKT_DESC_MAX 256
+
+#define INTEL_PT_NEED_MORE_BYTES -1
+#define INTEL_PT_BAD_PACKET -2
+
+#define INTEL_PT_PSB_STR "\002\202\002\202\002\202\002\202" \
+ "\002\202\002\202\002\202\002\202"
+#define INTEL_PT_PSB_LEN 16
+
+#define INTEL_PT_PKT_MAX_SZ 16
+
+enum intel_pt_pkt_type {
+ INTEL_PT_BAD,
+ INTEL_PT_PAD,
+ INTEL_PT_TNT,
+ INTEL_PT_TIP_PGD,
+ INTEL_PT_TIP_PGE,
+ INTEL_PT_TSC,
+ INTEL_PT_TMA,
+ INTEL_PT_MODE_EXEC,
+ INTEL_PT_MODE_TSX,
+ INTEL_PT_MTC,
+ INTEL_PT_TIP,
+ INTEL_PT_FUP,
+ INTEL_PT_CYC,
+ INTEL_PT_VMCS,
+ INTEL_PT_PSB,
+ INTEL_PT_PSBEND,
+ INTEL_PT_CBR,
+ INTEL_PT_TRACESTOP,
+ INTEL_PT_PIP,
+ INTEL_PT_OVF,
+ INTEL_PT_MNT,
+};
+
+struct intel_pt_pkt {
+ enum intel_pt_pkt_type type;
+ int count;
+ uint64_t payload;
+};
+
+const char *intel_pt_pkt_name(enum intel_pt_pkt_type);
+
+int intel_pt_get_packet(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet);
+
+int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
new file mode 100644
index 0000000..816488c
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -0,0 +1,970 @@
+# x86 Opcode Maps
+#
+# This is (mostly) based on following documentations.
+# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C
+# (#326018-047US, June 2013)
+#
+#<Opcode maps>
+# Table: table-name
+# Referrer: escaped-name
+# AVXcode: avx-code
+# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
+# (or)
+# opcode: escape # escaped-name
+# EndTable
+#
+#<group maps>
+# GrpTable: GrpXXX
+# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
+# EndTable
+#
+# AVX Superscripts
+# (v): this opcode requires VEX prefix.
+# (v1): this opcode only supports 128bit VEX.
+#
+# Last Prefix Superscripts
+# - (66): the last prefix is 0x66
+# - (F3): the last prefix is 0xF3
+# - (F2): the last prefix is 0xF2
+# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
+# - (66&F2): Both 0x66 and 0xF2 prefixes are specified.
+
+Table: one byte opcode
+Referrer:
+AVXcode:
+# 0x00 - 0x0f
+00: ADD Eb,Gb
+01: ADD Ev,Gv
+02: ADD Gb,Eb
+03: ADD Gv,Ev
+04: ADD AL,Ib
+05: ADD rAX,Iz
+06: PUSH ES (i64)
+07: POP ES (i64)
+08: OR Eb,Gb
+09: OR Ev,Gv
+0a: OR Gb,Eb
+0b: OR Gv,Ev
+0c: OR AL,Ib
+0d: OR rAX,Iz
+0e: PUSH CS (i64)
+0f: escape # 2-byte escape
+# 0x10 - 0x1f
+10: ADC Eb,Gb
+11: ADC Ev,Gv
+12: ADC Gb,Eb
+13: ADC Gv,Ev
+14: ADC AL,Ib
+15: ADC rAX,Iz
+16: PUSH SS (i64)
+17: POP SS (i64)
+18: SBB Eb,Gb
+19: SBB Ev,Gv
+1a: SBB Gb,Eb
+1b: SBB Gv,Ev
+1c: SBB AL,Ib
+1d: SBB rAX,Iz
+1e: PUSH DS (i64)
+1f: POP DS (i64)
+# 0x20 - 0x2f
+20: AND Eb,Gb
+21: AND Ev,Gv
+22: AND Gb,Eb
+23: AND Gv,Ev
+24: AND AL,Ib
+25: AND rAx,Iz
+26: SEG=ES (Prefix)
+27: DAA (i64)
+28: SUB Eb,Gb
+29: SUB Ev,Gv
+2a: SUB Gb,Eb
+2b: SUB Gv,Ev
+2c: SUB AL,Ib
+2d: SUB rAX,Iz
+2e: SEG=CS (Prefix)
+2f: DAS (i64)
+# 0x30 - 0x3f
+30: XOR Eb,Gb
+31: XOR Ev,Gv
+32: XOR Gb,Eb
+33: XOR Gv,Ev
+34: XOR AL,Ib
+35: XOR rAX,Iz
+36: SEG=SS (Prefix)
+37: AAA (i64)
+38: CMP Eb,Gb
+39: CMP Ev,Gv
+3a: CMP Gb,Eb
+3b: CMP Gv,Ev
+3c: CMP AL,Ib
+3d: CMP rAX,Iz
+3e: SEG=DS (Prefix)
+3f: AAS (i64)
+# 0x40 - 0x4f
+40: INC eAX (i64) | REX (o64)
+41: INC eCX (i64) | REX.B (o64)
+42: INC eDX (i64) | REX.X (o64)
+43: INC eBX (i64) | REX.XB (o64)
+44: INC eSP (i64) | REX.R (o64)
+45: INC eBP (i64) | REX.RB (o64)
+46: INC eSI (i64) | REX.RX (o64)
+47: INC eDI (i64) | REX.RXB (o64)
+48: DEC eAX (i64) | REX.W (o64)
+49: DEC eCX (i64) | REX.WB (o64)
+4a: DEC eDX (i64) | REX.WX (o64)
+4b: DEC eBX (i64) | REX.WXB (o64)
+4c: DEC eSP (i64) | REX.WR (o64)
+4d: DEC eBP (i64) | REX.WRB (o64)
+4e: DEC eSI (i64) | REX.WRX (o64)
+4f: DEC eDI (i64) | REX.WRXB (o64)
+# 0x50 - 0x5f
+50: PUSH rAX/r8 (d64)
+51: PUSH rCX/r9 (d64)
+52: PUSH rDX/r10 (d64)
+53: PUSH rBX/r11 (d64)
+54: PUSH rSP/r12 (d64)
+55: PUSH rBP/r13 (d64)
+56: PUSH rSI/r14 (d64)
+57: PUSH rDI/r15 (d64)
+58: POP rAX/r8 (d64)
+59: POP rCX/r9 (d64)
+5a: POP rDX/r10 (d64)
+5b: POP rBX/r11 (d64)
+5c: POP rSP/r12 (d64)
+5d: POP rBP/r13 (d64)
+5e: POP rSI/r14 (d64)
+5f: POP rDI/r15 (d64)
+# 0x60 - 0x6f
+60: PUSHA/PUSHAD (i64)
+61: POPA/POPAD (i64)
+62: BOUND Gv,Ma (i64)
+63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
+64: SEG=FS (Prefix)
+65: SEG=GS (Prefix)
+66: Operand-Size (Prefix)
+67: Address-Size (Prefix)
+68: PUSH Iz (d64)
+69: IMUL Gv,Ev,Iz
+6a: PUSH Ib (d64)
+6b: IMUL Gv,Ev,Ib
+6c: INS/INSB Yb,DX
+6d: INS/INSW/INSD Yz,DX
+6e: OUTS/OUTSB DX,Xb
+6f: OUTS/OUTSW/OUTSD DX,Xz
+# 0x70 - 0x7f
+70: JO Jb
+71: JNO Jb
+72: JB/JNAE/JC Jb
+73: JNB/JAE/JNC Jb
+74: JZ/JE Jb
+75: JNZ/JNE Jb
+76: JBE/JNA Jb
+77: JNBE/JA Jb
+78: JS Jb
+79: JNS Jb
+7a: JP/JPE Jb
+7b: JNP/JPO Jb
+7c: JL/JNGE Jb
+7d: JNL/JGE Jb
+7e: JLE/JNG Jb
+7f: JNLE/JG Jb
+# 0x80 - 0x8f
+80: Grp1 Eb,Ib (1A)
+81: Grp1 Ev,Iz (1A)
+82: Grp1 Eb,Ib (1A),(i64)
+83: Grp1 Ev,Ib (1A)
+84: TEST Eb,Gb
+85: TEST Ev,Gv
+86: XCHG Eb,Gb
+87: XCHG Ev,Gv
+88: MOV Eb,Gb
+89: MOV Ev,Gv
+8a: MOV Gb,Eb
+8b: MOV Gv,Ev
+8c: MOV Ev,Sw
+8d: LEA Gv,M
+8e: MOV Sw,Ew
+8f: Grp1A (1A) | POP Ev (d64)
+# 0x90 - 0x9f
+90: NOP | PAUSE (F3) | XCHG r8,rAX
+91: XCHG rCX/r9,rAX
+92: XCHG rDX/r10,rAX
+93: XCHG rBX/r11,rAX
+94: XCHG rSP/r12,rAX
+95: XCHG rBP/r13,rAX
+96: XCHG rSI/r14,rAX
+97: XCHG rDI/r15,rAX
+98: CBW/CWDE/CDQE
+99: CWD/CDQ/CQO
+9a: CALLF Ap (i64)
+9b: FWAIT/WAIT
+9c: PUSHF/D/Q Fv (d64)
+9d: POPF/D/Q Fv (d64)
+9e: SAHF
+9f: LAHF
+# 0xa0 - 0xaf
+a0: MOV AL,Ob
+a1: MOV rAX,Ov
+a2: MOV Ob,AL
+a3: MOV Ov,rAX
+a4: MOVS/B Yb,Xb
+a5: MOVS/W/D/Q Yv,Xv
+a6: CMPS/B Xb,Yb
+a7: CMPS/W/D Xv,Yv
+a8: TEST AL,Ib
+a9: TEST rAX,Iz
+aa: STOS/B Yb,AL
+ab: STOS/W/D/Q Yv,rAX
+ac: LODS/B AL,Xb
+ad: LODS/W/D/Q rAX,Xv
+ae: SCAS/B AL,Yb
+# Note: The May 2011 Intel manual shows Xv for the second parameter of the
+# next instruction but Yv is correct
+af: SCAS/W/D/Q rAX,Yv
+# 0xb0 - 0xbf
+b0: MOV AL/R8L,Ib
+b1: MOV CL/R9L,Ib
+b2: MOV DL/R10L,Ib
+b3: MOV BL/R11L,Ib
+b4: MOV AH/R12L,Ib
+b5: MOV CH/R13L,Ib
+b6: MOV DH/R14L,Ib
+b7: MOV BH/R15L,Ib
+b8: MOV rAX/r8,Iv
+b9: MOV rCX/r9,Iv
+ba: MOV rDX/r10,Iv
+bb: MOV rBX/r11,Iv
+bc: MOV rSP/r12,Iv
+bd: MOV rBP/r13,Iv
+be: MOV rSI/r14,Iv
+bf: MOV rDI/r15,Iv
+# 0xc0 - 0xcf
+c0: Grp2 Eb,Ib (1A)
+c1: Grp2 Ev,Ib (1A)
+c2: RETN Iw (f64)
+c3: RETN
+c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
+c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
+c6: Grp11A Eb,Ib (1A)
+c7: Grp11B Ev,Iz (1A)
+c8: ENTER Iw,Ib
+c9: LEAVE (d64)
+ca: RETF Iw
+cb: RETF
+cc: INT3
+cd: INT Ib
+ce: INTO (i64)
+cf: IRET/D/Q
+# 0xd0 - 0xdf
+d0: Grp2 Eb,1 (1A)
+d1: Grp2 Ev,1 (1A)
+d2: Grp2 Eb,CL (1A)
+d3: Grp2 Ev,CL (1A)
+d4: AAM Ib (i64)
+d5: AAD Ib (i64)
+d6:
+d7: XLAT/XLATB
+d8: ESC
+d9: ESC
+da: ESC
+db: ESC
+dc: ESC
+dd: ESC
+de: ESC
+df: ESC
+# 0xe0 - 0xef
+# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
+# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
+# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
+e0: LOOPNE/LOOPNZ Jb (f64)
+e1: LOOPE/LOOPZ Jb (f64)
+e2: LOOP Jb (f64)
+e3: JrCXZ Jb (f64)
+e4: IN AL,Ib
+e5: IN eAX,Ib
+e6: OUT Ib,AL
+e7: OUT Ib,eAX
+# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
+# in "near" jumps and calls is 16-bit. For CALL,
+# push of return address is 16-bit wide, RSP is decremented by 2
+# but is not truncated to 16 bits, unlike RIP.
+e8: CALL Jz (f64)
+e9: JMP-near Jz (f64)
+ea: JMP-far Ap (i64)
+eb: JMP-short Jb (f64)
+ec: IN AL,DX
+ed: IN eAX,DX
+ee: OUT DX,AL
+ef: OUT DX,eAX
+# 0xf0 - 0xff
+f0: LOCK (Prefix)
+f1:
+f2: REPNE (Prefix) | XACQUIRE (Prefix)
+f3: REP/REPE (Prefix) | XRELEASE (Prefix)
+f4: HLT
+f5: CMC
+f6: Grp3_1 Eb (1A)
+f7: Grp3_2 Ev (1A)
+f8: CLC
+f9: STC
+fa: CLI
+fb: STI
+fc: CLD
+fd: STD
+fe: Grp4 (1A)
+ff: Grp5 (1A)
+EndTable
+
+Table: 2-byte opcode (0x0f)
+Referrer: 2-byte escape
+AVXcode: 1
+# 0x0f 0x00-0x0f
+00: Grp6 (1A)
+01: Grp7 (1A)
+02: LAR Gv,Ew
+03: LSL Gv,Ew
+04:
+05: SYSCALL (o64)
+06: CLTS
+07: SYSRET (o64)
+08: INVD
+09: WBINVD
+0a:
+0b: UD2 (1B)
+0c:
+# AMD's prefetch group. Intel supports prefetchw(/1) only.
+0d: GrpP
+0e: FEMMS
+# 3DNow! uses the last imm byte as opcode extension.
+0f: 3DNow! Pq,Qq,Ib
+# 0x0f 0x10-0x1f
+# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands
+# but it actually has operands. And also, vmovss and vmovsd only accept 128bit.
+# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form.
+# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming
+# Reference A.1
+10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1)
+11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1)
+12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2)
+13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1)
+14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66)
+15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66)
+16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3)
+17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
+18: Grp16 (1A)
+19:
+1a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv
+1b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv
+1c:
+1d:
+1e:
+1f: NOP Ev
+# 0x0f 0x20-0x2f
+20: MOV Rd,Cd
+21: MOV Rd,Dd
+22: MOV Cd,Rd
+23: MOV Dd,Rd
+24:
+25:
+26:
+27:
+28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66)
+29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66)
+2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1)
+2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66)
+2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1)
+2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1)
+2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1)
+2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1)
+# 0x0f 0x30-0x3f
+30: WRMSR
+31: RDTSC
+32: RDMSR
+33: RDPMC
+34: SYSENTER
+35: SYSEXIT
+36:
+37: GETSEC
+38: escape # 3-byte escape 1
+39:
+3a: escape # 3-byte escape 2
+3b:
+3c:
+3d:
+3e:
+3f:
+# 0x0f 0x40-0x4f
+40: CMOVO Gv,Ev
+41: CMOVNO Gv,Ev
+42: CMOVB/C/NAE Gv,Ev
+43: CMOVAE/NB/NC Gv,Ev
+44: CMOVE/Z Gv,Ev
+45: CMOVNE/NZ Gv,Ev
+46: CMOVBE/NA Gv,Ev
+47: CMOVA/NBE Gv,Ev
+48: CMOVS Gv,Ev
+49: CMOVNS Gv,Ev
+4a: CMOVP/PE Gv,Ev
+4b: CMOVNP/PO Gv,Ev
+4c: CMOVL/NGE Gv,Ev
+4d: CMOVNL/GE Gv,Ev
+4e: CMOVLE/NG Gv,Ev
+4f: CMOVNLE/G Gv,Ev
+# 0x0f 0x50-0x5f
+50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66)
+51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1)
+52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1)
+53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1)
+54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66)
+55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66)
+56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66)
+57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66)
+58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
+59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
+5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
+5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
+5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
+5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
+5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
+5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1)
+# 0x0f 0x60-0x6f
+60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1)
+61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1)
+62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1)
+63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1)
+64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1)
+65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1)
+66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1)
+67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1)
+68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1)
+69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1)
+6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1)
+6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1)
+6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
+6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
+6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
+6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3)
+# 0x0f 0x70-0x7f
+70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
+71: Grp12 (1A)
+72: Grp13 (1A)
+73: Grp14 (1A)
+74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1)
+75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1)
+76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
+# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
+77: emms | vzeroupper | vzeroall
+78: VMREAD Ey,Gy
+79: VMWRITE Gy,Ey
+7a:
+7b:
+7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
+7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
+7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
+7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
+# 0x0f 0x80-0x8f
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
+80: JO Jz (f64)
+81: JNO Jz (f64)
+82: JB/JC/JNAE Jz (f64)
+83: JAE/JNB/JNC Jz (f64)
+84: JE/JZ Jz (f64)
+85: JNE/JNZ Jz (f64)
+86: JBE/JNA Jz (f64)
+87: JA/JNBE Jz (f64)
+88: JS Jz (f64)
+89: JNS Jz (f64)
+8a: JP/JPE Jz (f64)
+8b: JNP/JPO Jz (f64)
+8c: JL/JNGE Jz (f64)
+8d: JNL/JGE Jz (f64)
+8e: JLE/JNG Jz (f64)
+8f: JNLE/JG Jz (f64)
+# 0x0f 0x90-0x9f
+90: SETO Eb
+91: SETNO Eb
+92: SETB/C/NAE Eb
+93: SETAE/NB/NC Eb
+94: SETE/Z Eb
+95: SETNE/NZ Eb
+96: SETBE/NA Eb
+97: SETA/NBE Eb
+98: SETS Eb
+99: SETNS Eb
+9a: SETP/PE Eb
+9b: SETNP/PO Eb
+9c: SETL/NGE Eb
+9d: SETNL/GE Eb
+9e: SETLE/NG Eb
+9f: SETNLE/G Eb
+# 0x0f 0xa0-0xaf
+a0: PUSH FS (d64)
+a1: POP FS (d64)
+a2: CPUID
+a3: BT Ev,Gv
+a4: SHLD Ev,Gv,Ib
+a5: SHLD Ev,Gv,CL
+a6: GrpPDLK
+a7: GrpRNG
+a8: PUSH GS (d64)
+a9: POP GS (d64)
+aa: RSM
+ab: BTS Ev,Gv
+ac: SHRD Ev,Gv,Ib
+ad: SHRD Ev,Gv,CL
+ae: Grp15 (1A),(1C)
+af: IMUL Gv,Ev
+# 0x0f 0xb0-0xbf
+b0: CMPXCHG Eb,Gb
+b1: CMPXCHG Ev,Gv
+b2: LSS Gv,Mp
+b3: BTR Ev,Gv
+b4: LFS Gv,Mp
+b5: LGS Gv,Mp
+b6: MOVZX Gv,Eb
+b7: MOVZX Gv,Ew
+b8: JMPE (!F3) | POPCNT Gv,Ev (F3)
+b9: Grp10 (1A)
+ba: Grp8 Ev,Ib (1A)
+bb: BTC Ev,Gv
+bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3)
+bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3)
+be: MOVSX Gv,Eb
+bf: MOVSX Gv,Ew
+# 0x0f 0xc0-0xcf
+c0: XADD Eb,Gb
+c1: XADD Ev,Gv
+c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1)
+c3: movnti My,Gy
+c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1)
+c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1)
+c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66)
+c7: Grp9 (1A)
+c8: BSWAP RAX/EAX/R8/R8D
+c9: BSWAP RCX/ECX/R9/R9D
+ca: BSWAP RDX/EDX/R10/R10D
+cb: BSWAP RBX/EBX/R11/R11D
+cc: BSWAP RSP/ESP/R12/R12D
+cd: BSWAP RBP/EBP/R13/R13D
+ce: BSWAP RSI/ESI/R14/R14D
+cf: BSWAP RDI/EDI/R15/R15D
+# 0x0f 0xd0-0xdf
+d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2)
+d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1)
+d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1)
+d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1)
+d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1)
+d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1)
+d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
+d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
+d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
+d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
+da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
+db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1)
+dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
+dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
+de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
+df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1)
+# 0x0f 0xe0-0xef
+e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
+e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
+e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
+e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
+e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
+e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
+e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2)
+e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
+e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
+e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
+ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
+eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1)
+ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
+ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
+ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
+ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1)
+# 0x0f 0xf0-0xff
+f0: vlddqu Vx,Mx (F2)
+f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
+f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1)
+f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1)
+f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1)
+f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1)
+f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1)
+f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1)
+f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1)
+f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1)
+fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1)
+fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
+fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
+fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
+fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
+ff:
+EndTable
+
+Table: 3-byte opcode 1 (0x0f 0x38)
+Referrer: 3-byte escape 1
+AVXcode: 2
+# 0x0f 0x38 0x00-0x0f
+00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1)
+01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1)
+02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1)
+03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1)
+04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1)
+05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1)
+06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1)
+07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1)
+08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1)
+09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1)
+0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1)
+0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1)
+0c: vpermilps Vx,Hx,Wx (66),(v)
+0d: vpermilpd Vx,Hx,Wx (66),(v)
+0e: vtestps Vx,Wx (66),(v)
+0f: vtestpd Vx,Wx (66),(v)
+# 0x0f 0x38 0x10-0x1f
+10: pblendvb Vdq,Wdq (66)
+11:
+12:
+13: vcvtph2ps Vx,Wx,Ib (66),(v)
+14: blendvps Vdq,Wdq (66)
+15: blendvpd Vdq,Wdq (66)
+16: vpermps Vqq,Hqq,Wqq (66),(v)
+17: vptest Vx,Wx (66)
+18: vbroadcastss Vx,Wd (66),(v)
+19: vbroadcastsd Vqq,Wq (66),(v)
+1a: vbroadcastf128 Vqq,Mdq (66),(v)
+1b:
+1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
+1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
+1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
+1f:
+# 0x0f 0x38 0x20-0x2f
+20: vpmovsxbw Vx,Ux/Mq (66),(v1)
+21: vpmovsxbd Vx,Ux/Md (66),(v1)
+22: vpmovsxbq Vx,Ux/Mw (66),(v1)
+23: vpmovsxwd Vx,Ux/Mq (66),(v1)
+24: vpmovsxwq Vx,Ux/Md (66),(v1)
+25: vpmovsxdq Vx,Ux/Mq (66),(v1)
+26:
+27:
+28: vpmuldq Vx,Hx,Wx (66),(v1)
+29: vpcmpeqq Vx,Hx,Wx (66),(v1)
+2a: vmovntdqa Vx,Mx (66),(v1)
+2b: vpackusdw Vx,Hx,Wx (66),(v1)
+2c: vmaskmovps Vx,Hx,Mx (66),(v)
+2d: vmaskmovpd Vx,Hx,Mx (66),(v)
+2e: vmaskmovps Mx,Hx,Vx (66),(v)
+2f: vmaskmovpd Mx,Hx,Vx (66),(v)
+# 0x0f 0x38 0x30-0x3f
+30: vpmovzxbw Vx,Ux/Mq (66),(v1)
+31: vpmovzxbd Vx,Ux/Md (66),(v1)
+32: vpmovzxbq Vx,Ux/Mw (66),(v1)
+33: vpmovzxwd Vx,Ux/Mq (66),(v1)
+34: vpmovzxwq Vx,Ux/Md (66),(v1)
+35: vpmovzxdq Vx,Ux/Mq (66),(v1)
+36: vpermd Vqq,Hqq,Wqq (66),(v)
+37: vpcmpgtq Vx,Hx,Wx (66),(v1)
+38: vpminsb Vx,Hx,Wx (66),(v1)
+39: vpminsd Vx,Hx,Wx (66),(v1)
+3a: vpminuw Vx,Hx,Wx (66),(v1)
+3b: vpminud Vx,Hx,Wx (66),(v1)
+3c: vpmaxsb Vx,Hx,Wx (66),(v1)
+3d: vpmaxsd Vx,Hx,Wx (66),(v1)
+3e: vpmaxuw Vx,Hx,Wx (66),(v1)
+3f: vpmaxud Vx,Hx,Wx (66),(v1)
+# 0x0f 0x38 0x40-0x8f
+40: vpmulld Vx,Hx,Wx (66),(v1)
+41: vphminposuw Vdq,Wdq (66),(v1)
+42:
+43:
+44:
+45: vpsrlvd/q Vx,Hx,Wx (66),(v)
+46: vpsravd Vx,Hx,Wx (66),(v)
+47: vpsllvd/q Vx,Hx,Wx (66),(v)
+# Skip 0x48-0x57
+58: vpbroadcastd Vx,Wx (66),(v)
+59: vpbroadcastq Vx,Wx (66),(v)
+5a: vbroadcasti128 Vqq,Mdq (66),(v)
+# Skip 0x5b-0x77
+78: vpbroadcastb Vx,Wx (66),(v)
+79: vpbroadcastw Vx,Wx (66),(v)
+# Skip 0x7a-0x7f
+80: INVEPT Gy,Mdq (66)
+81: INVPID Gy,Mdq (66)
+82: INVPCID Gy,Mdq (66)
+8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
+8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
+# 0x0f 0x38 0x90-0xbf (FMA)
+90: vgatherdd/q Vx,Hx,Wx (66),(v)
+91: vgatherqd/q Vx,Hx,Wx (66),(v)
+92: vgatherdps/d Vx,Hx,Wx (66),(v)
+93: vgatherqps/d Vx,Hx,Wx (66),(v)
+94:
+95:
+96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v)
+97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
+98: vfmadd132ps/d Vx,Hx,Wx (66),(v)
+99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
+9a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
+9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
+9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
+9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
+9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
+a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
+a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
+a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
+aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
+ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
+ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
+ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
+af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
+b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
+b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
+b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
+ba: vfmsub231ps/d Vx,Hx,Wx (66),(v)
+bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
+bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v)
+bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
+be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
+bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
+# 0x0f 0x38 0xc0-0xff
+db: VAESIMC Vdq,Wdq (66),(v1)
+dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
+dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
+de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
+df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
+f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2)
+f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2)
+f2: ANDN Gy,By,Ey (v)
+f3: Grp17 (1A)
+f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
+f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
+f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
+EndTable
+
+Table: 3-byte opcode 2 (0x0f 0x3a)
+Referrer: 3-byte escape 2
+AVXcode: 3
+# 0x0f 0x3a 0x00-0xff
+00: vpermq Vqq,Wqq,Ib (66),(v)
+01: vpermpd Vqq,Wqq,Ib (66),(v)
+02: vpblendd Vx,Hx,Wx,Ib (66),(v)
+03:
+04: vpermilps Vx,Wx,Ib (66),(v)
+05: vpermilpd Vx,Wx,Ib (66),(v)
+06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
+07:
+08: vroundps Vx,Wx,Ib (66)
+09: vroundpd Vx,Wx,Ib (66)
+0a: vroundss Vss,Wss,Ib (66),(v1)
+0b: vroundsd Vsd,Wsd,Ib (66),(v1)
+0c: vblendps Vx,Hx,Wx,Ib (66)
+0d: vblendpd Vx,Hx,Wx,Ib (66)
+0e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
+0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1)
+14: vpextrb Rd/Mb,Vdq,Ib (66),(v1)
+15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
+16: vpextrd/q Ey,Vdq,Ib (66),(v1)
+17: vextractps Ed,Vdq,Ib (66),(v1)
+18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v)
+19: vextractf128 Wdq,Vqq,Ib (66),(v)
+1d: vcvtps2ph Wx,Vx,Ib (66),(v)
+20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
+21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
+22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
+38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v)
+39: vextracti128 Wdq,Vqq,Ib (66),(v)
+40: vdpps Vx,Hx,Wx,Ib (66)
+41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
+42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1)
+44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
+46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
+4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
+4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
+4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
+60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
+61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
+62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
+63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
+df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
+f0: RORX Gy,Ey,Ib (F2),(v)
+EndTable
+
+GrpTable: Grp1
+0: ADD
+1: OR
+2: ADC
+3: SBB
+4: AND
+5: SUB
+6: XOR
+7: CMP
+EndTable
+
+GrpTable: Grp1A
+0: POP
+EndTable
+
+GrpTable: Grp2
+0: ROL
+1: ROR
+2: RCL
+3: RCR
+4: SHL/SAL
+5: SHR
+6:
+7: SAR
+EndTable
+
+GrpTable: Grp3_1
+0: TEST Eb,Ib
+1:
+2: NOT Eb
+3: NEG Eb
+4: MUL AL,Eb
+5: IMUL AL,Eb
+6: DIV AL,Eb
+7: IDIV AL,Eb
+EndTable
+
+GrpTable: Grp3_2
+0: TEST Ev,Iz
+1:
+2: NOT Ev
+3: NEG Ev
+4: MUL rAX,Ev
+5: IMUL rAX,Ev
+6: DIV rAX,Ev
+7: IDIV rAX,Ev
+EndTable
+
+GrpTable: Grp4
+0: INC Eb
+1: DEC Eb
+EndTable
+
+GrpTable: Grp5
+0: INC Ev
+1: DEC Ev
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
+2: CALLN Ev (f64)
+3: CALLF Ep
+4: JMPN Ev (f64)
+5: JMPF Mp
+6: PUSH Ev (d64)
+7:
+EndTable
+
+GrpTable: Grp6
+0: SLDT Rv/Mw
+1: STR Rv/Mw
+2: LLDT Ew
+3: LTR Ew
+4: VERR Ew
+5: VERW Ew
+EndTable
+
+GrpTable: Grp7
+0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
+1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B)
+2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
+3: LIDT Ms
+4: SMSW Mw/Rv
+5:
+6: LMSW Ew
+7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
+EndTable
+
+GrpTable: Grp8
+4: BT
+5: BTS
+6: BTR
+7: BTC
+EndTable
+
+GrpTable: Grp9
+1: CMPXCHG8B/16B Mq/Mdq
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
+7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
+EndTable
+
+GrpTable: Grp10
+EndTable
+
+# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
+GrpTable: Grp11A
+0: MOV Eb,Ib
+7: XABORT Ib (000),(11B)
+EndTable
+
+GrpTable: Grp11B
+0: MOV Eb,Iz
+7: XBEGIN Jz (000),(11B)
+EndTable
+
+GrpTable: Grp12
+2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1)
+4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1)
+6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp13
+2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
+4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1)
+6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp14
+2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1)
+3: vpsrldq Hx,Ux,Ib (66),(11B),(v1)
+6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1)
+7: vpslldq Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp15
+0: fxsave | RDFSBASE Ry (F3),(11B)
+1: fxstor | RDGSBASE Ry (F3),(11B)
+2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
+3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
+4: XSAVE
+5: XRSTOR | lfence (11B)
+6: XSAVEOPT | mfence (11B)
+7: clflush | sfence (11B)
+EndTable
+
+GrpTable: Grp16
+0: prefetch NTA
+1: prefetch T0
+2: prefetch T1
+3: prefetch T2
+EndTable
+
+GrpTable: Grp17
+1: BLSR By,Ey (v)
+2: BLSMSK By,Ey (v)
+3: BLSI By,Ey (v)
+EndTable
+
+# AMD's Prefetch Group
+GrpTable: GrpP
+0: PREFETCH
+1: PREFETCHW
+EndTable
+
+GrpTable: GrpPDLK
+0: MONTMUL
+1: XSHA1
+2: XSHA2
+EndTable
+
+GrpTable: GrpRNG
+0: xstore-rng
+1: xcrypt-ecb
+2: xcrypt-cbc
+4: xcrypt-cfb
+5: xcrypt-ofb
+EndTable
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
new file mode 100644
index 0000000..bb41c20
--- /dev/null
+++ b/tools/perf/util/intel-pt.c
@@ -0,0 +1,1956 @@
+/*
+ * intel_pt.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "../perf.h"
+#include "session.h"
+#include "machine.h"
+#include "tool.h"
+#include "event.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "map.h"
+#include "color.h"
+#include "util.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "symbol.h"
+#include "callchain.h"
+#include "dso.h"
+#include "debug.h"
+#include "auxtrace.h"
+#include "tsc.h"
+#include "intel-pt.h"
+
+#include "intel-pt-decoder/intel-pt-log.h"
+#include "intel-pt-decoder/intel-pt-decoder.h"
+#include "intel-pt-decoder/intel-pt-insn-decoder.h"
+#include "intel-pt-decoder/intel-pt-pkt-decoder.h"
+
+#define MAX_TIMESTAMP (~0ULL)
+
+struct intel_pt {
+ struct auxtrace auxtrace;
+ struct auxtrace_queues queues;
+ struct auxtrace_heap heap;
+ u32 auxtrace_type;
+ struct perf_session *session;
+ struct machine *machine;
+ struct perf_evsel *switch_evsel;
+ struct thread *unknown_thread;
+ bool timeless_decoding;
+ bool sampling_mode;
+ bool snapshot_mode;
+ bool per_cpu_mmaps;
+ bool have_tsc;
+ bool data_queued;
+ bool est_tsc;
+ bool sync_switch;
+ int have_sched_switch;
+ u32 pmu_type;
+ u64 kernel_start;
+ u64 switch_ip;
+ u64 ptss_ip;
+
+ struct perf_tsc_conversion tc;
+ bool cap_user_time_zero;
+
+ struct itrace_synth_opts synth_opts;
+
+ bool sample_instructions;
+ u64 instructions_sample_type;
+ u64 instructions_sample_period;
+ u64 instructions_id;
+
+ bool sample_branches;
+ u32 branches_filter;
+ u64 branches_sample_type;
+ u64 branches_id;
+
+ bool sample_transactions;
+ u64 transactions_sample_type;
+ u64 transactions_id;
+
+ bool synth_needs_swap;
+
+ u64 tsc_bit;
+ u64 mtc_bit;
+ u64 mtc_freq_bits;
+ u32 tsc_ctc_ratio_n;
+ u32 tsc_ctc_ratio_d;
+ u64 cyc_bit;
+ u64 noretcomp_bit;
+ unsigned max_non_turbo_ratio;
+};
+
+enum switch_state {
+ INTEL_PT_SS_NOT_TRACING,
+ INTEL_PT_SS_UNKNOWN,
+ INTEL_PT_SS_TRACING,
+ INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
+ INTEL_PT_SS_EXPECTING_SWITCH_IP,
+};
+
+struct intel_pt_queue {
+ struct intel_pt *pt;
+ unsigned int queue_nr;
+ struct auxtrace_buffer *buffer;
+ void *decoder;
+ const struct intel_pt_state *state;
+ struct ip_callchain *chain;
+ union perf_event *event_buf;
+ bool on_heap;
+ bool stop;
+ bool step_through_buffers;
+ bool use_buffer_pid_tid;
+ pid_t pid, tid;
+ int cpu;
+ int switch_state;
+ pid_t next_tid;
+ struct thread *thread;
+ bool exclude_kernel;
+ bool have_sample;
+ u64 time;
+ u64 timestamp;
+ u32 flags;
+ u16 insn_len;
+ u64 last_insn_cnt;
+};
+
+static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
+ unsigned char *buf, size_t len)
+{
+ struct intel_pt_pkt packet;
+ size_t pos = 0;
+ int ret, pkt_len, i;
+ char desc[INTEL_PT_PKT_DESC_MAX];
+ const char *color = PERF_COLOR_BLUE;
+
+ color_fprintf(stdout, color,
+ ". ... Intel Processor Trace data: size %zu bytes\n",
+ len);
+
+ while (len) {
+ ret = intel_pt_get_packet(buf, len, &packet);
+ if (ret > 0)
+ pkt_len = ret;
+ else
+ pkt_len = 1;
+ printf(".");
+ color_fprintf(stdout, color, " %08x: ", pos);
+ for (i = 0; i < pkt_len; i++)
+ color_fprintf(stdout, color, " %02x", buf[i]);
+ for (; i < 16; i++)
+ color_fprintf(stdout, color, " ");
+ if (ret > 0) {
+ ret = intel_pt_pkt_desc(&packet, desc,
+ INTEL_PT_PKT_DESC_MAX);
+ if (ret > 0)
+ color_fprintf(stdout, color, " %s\n", desc);
+ } else {
+ color_fprintf(stdout, color, " Bad packet!\n");
+ }
+ pos += pkt_len;
+ buf += pkt_len;
+ len -= pkt_len;
+ }
+}
+
+static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
+ size_t len)
+{
+ printf(".\n");
+ intel_pt_dump(pt, buf, len);
+}
+
+static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
+ struct auxtrace_buffer *b)
+{
+ void *start;
+
+ start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
+ pt->have_tsc);
+ if (!start)
+ return -EINVAL;
+ b->use_size = b->data + b->size - start;
+ b->use_data = start;
+ return 0;
+}
+
+static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
+ struct auxtrace_queue *queue,
+ struct auxtrace_buffer *buffer)
+{
+ if (queue->cpu == -1 && buffer->cpu != -1)
+ ptq->cpu = buffer->cpu;
+
+ ptq->pid = buffer->pid;
+ ptq->tid = buffer->tid;
+
+ intel_pt_log("queue %u cpu %d pid %d tid %d\n",
+ ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
+
+ thread__zput(ptq->thread);
+
+ if (ptq->tid != -1) {
+ if (ptq->pid != -1)
+ ptq->thread = machine__findnew_thread(ptq->pt->machine,
+ ptq->pid,
+ ptq->tid);
+ else
+ ptq->thread = machine__find_thread(ptq->pt->machine, -1,
+ ptq->tid);
+ }
+}
+
+/* This function assumes data is processed sequentially only */
+static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
+ struct auxtrace_queue *queue;
+
+ if (ptq->stop) {
+ b->len = 0;
+ return 0;
+ }
+
+ queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
+
+ buffer = auxtrace_buffer__next(queue, buffer);
+ if (!buffer) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ b->len = 0;
+ return 0;
+ }
+
+ ptq->buffer = buffer;
+
+ if (!buffer->data) {
+ int fd = perf_data_file__fd(ptq->pt->session->file);
+
+ buffer->data = auxtrace_buffer__get_data(buffer, fd);
+ if (!buffer->data)
+ return -ENOMEM;
+ }
+
+ if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
+ intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
+ return -ENOMEM;
+
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+
+ if (buffer->use_data) {
+ b->len = buffer->use_size;
+ b->buf = buffer->use_data;
+ } else {
+ b->len = buffer->size;
+ b->buf = buffer->data;
+ }
+ b->ref_timestamp = buffer->reference;
+
+ if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
+ !buffer->consecutive)) {
+ b->consecutive = false;
+ b->trace_nr = buffer->buffer_nr + 1;
+ } else {
+ b->consecutive = true;
+ }
+
+ if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
+ ptq->tid != buffer->tid))
+ intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
+
+ if (ptq->step_through_buffers)
+ ptq->stop = true;
+
+ if (!b->len)
+ return intel_pt_get_trace(b, data);
+
+ return 0;
+}
+
+struct intel_pt_cache_entry {
+ struct auxtrace_cache_entry entry;
+ u64 insn_cnt;
+ u64 byte_cnt;
+ enum intel_pt_insn_op op;
+ enum intel_pt_insn_branch branch;
+ int length;
+ int32_t rel;
+};
+
+static int intel_pt_config_div(const char *var, const char *value, void *data)
+{
+ int *d = data;
+ long val;
+
+ if (!strcmp(var, "intel-pt.cache-divisor")) {
+ val = strtol(value, NULL, 0);
+ if (val > 0 && val <= INT_MAX)
+ *d = val;
+ }
+
+ return 0;
+}
+
+static int intel_pt_cache_divisor(void)
+{
+ static int d;
+
+ if (d)
+ return d;
+
+ perf_config(intel_pt_config_div, &d);
+
+ if (!d)
+ d = 64;
+
+ return d;
+}
+
+static unsigned int intel_pt_cache_size(struct dso *dso,
+ struct machine *machine)
+{
+ off_t size;
+
+ size = dso__data_size(dso, machine);
+ size /= intel_pt_cache_divisor();
+ if (size < 1000)
+ return 10;
+ if (size > (1 << 21))
+ return 21;
+ return 32 - __builtin_clz(size);
+}
+
+static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
+ struct machine *machine)
+{
+ struct auxtrace_cache *c;
+ unsigned int bits;
+
+ if (dso->auxtrace_cache)
+ return dso->auxtrace_cache;
+
+ bits = intel_pt_cache_size(dso, machine);
+
+ /* Ignoring cache creation failure */
+ c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
+
+ dso->auxtrace_cache = c;
+
+ return c;
+}
+
+static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
+ u64 offset, u64 insn_cnt, u64 byte_cnt,
+ struct intel_pt_insn *intel_pt_insn)
+{
+ struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+ struct intel_pt_cache_entry *e;
+ int err;
+
+ if (!c)
+ return -ENOMEM;
+
+ e = auxtrace_cache__alloc_entry(c);
+ if (!e)
+ return -ENOMEM;
+
+ e->insn_cnt = insn_cnt;
+ e->byte_cnt = byte_cnt;
+ e->op = intel_pt_insn->op;
+ e->branch = intel_pt_insn->branch;
+ e->length = intel_pt_insn->length;
+ e->rel = intel_pt_insn->rel;
+
+ err = auxtrace_cache__add(c, offset, &e->entry);
+ if (err)
+ auxtrace_cache__free_entry(c, e);
+
+ return err;
+}
+
+static struct intel_pt_cache_entry *
+intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
+{
+ struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+
+ if (!c)
+ return NULL;
+
+ return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
+}
+
+static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
+ uint64_t *insn_cnt_ptr, uint64_t *ip,
+ uint64_t to_ip, uint64_t max_insn_cnt,
+ void *data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct machine *machine = ptq->pt->machine;
+ struct thread *thread;
+ struct addr_location al;
+ unsigned char buf[1024];
+ size_t bufsz;
+ ssize_t len;
+ int x86_64;
+ u8 cpumode;
+ u64 offset, start_offset, start_ip;
+ u64 insn_cnt = 0;
+ bool one_map = true;
+
+ if (to_ip && *ip == to_ip)
+ goto out_no_cache;
+
+ bufsz = intel_pt_insn_max_size();
+
+ if (*ip >= ptq->pt->kernel_start)
+ cpumode = PERF_RECORD_MISC_KERNEL;
+ else
+ cpumode = PERF_RECORD_MISC_USER;
+
+ thread = ptq->thread;
+ if (!thread) {
+ if (cpumode != PERF_RECORD_MISC_KERNEL)
+ return -EINVAL;
+ thread = ptq->pt->unknown_thread;
+ }
+
+ while (1) {
+ thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
+ if (!al.map || !al.map->dso)
+ return -EINVAL;
+
+ if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
+ dso__data_status_seen(al.map->dso,
+ DSO_DATA_STATUS_SEEN_ITRACE))
+ return -ENOENT;
+
+ offset = al.map->map_ip(al.map, *ip);
+
+ if (!to_ip && one_map) {
+ struct intel_pt_cache_entry *e;
+
+ e = intel_pt_cache_lookup(al.map->dso, machine, offset);
+ if (e &&
+ (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
+ *insn_cnt_ptr = e->insn_cnt;
+ *ip += e->byte_cnt;
+ intel_pt_insn->op = e->op;
+ intel_pt_insn->branch = e->branch;
+ intel_pt_insn->length = e->length;
+ intel_pt_insn->rel = e->rel;
+ intel_pt_log_insn_no_data(intel_pt_insn, *ip);
+ return 0;
+ }
+ }
+
+ start_offset = offset;
+ start_ip = *ip;
+
+ /* Load maps to ensure dso->is_64_bit has been updated */
+ map__load(al.map, machine->symbol_filter);
+
+ x86_64 = al.map->dso->is_64_bit;
+
+ while (1) {
+ len = dso__data_read_offset(al.map->dso, machine,
+ offset, buf, bufsz);
+ if (len <= 0)
+ return -EINVAL;
+
+ if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
+ return -EINVAL;
+
+ intel_pt_log_insn(intel_pt_insn, *ip);
+
+ insn_cnt += 1;
+
+ if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
+ goto out;
+
+ if (max_insn_cnt && insn_cnt >= max_insn_cnt)
+ goto out_no_cache;
+
+ *ip += intel_pt_insn->length;
+
+ if (to_ip && *ip == to_ip)
+ goto out_no_cache;
+
+ if (*ip >= al.map->end)
+ break;
+
+ offset += intel_pt_insn->length;
+ }
+ one_map = false;
+ }
+out:
+ *insn_cnt_ptr = insn_cnt;
+
+ if (!one_map)
+ goto out_no_cache;
+
+ /*
+ * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
+ * entries.
+ */
+ if (to_ip) {
+ struct intel_pt_cache_entry *e;
+
+ e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
+ if (e)
+ return 0;
+ }
+
+ /* Ignore cache errors */
+ intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
+ *ip - start_ip, intel_pt_insn);
+
+ return 0;
+
+out_no_cache:
+ *insn_cnt_ptr = insn_cnt;
+ return 0;
+}
+
+static bool intel_pt_get_config(struct intel_pt *pt,
+ struct perf_event_attr *attr, u64 *config)
+{
+ if (attr->type == pt->pmu_type) {
+ if (config)
+ *config = attr->config;
+ return true;
+ }
+
+ return false;
+}
+
+static bool intel_pt_exclude_kernel(struct intel_pt *pt)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
+ !evsel->attr.exclude_kernel)
+ return false;
+ }
+ return true;
+}
+
+static bool intel_pt_return_compression(struct intel_pt *pt)
+{
+ struct perf_evsel *evsel;
+ u64 config;
+
+ if (!pt->noretcomp_bit)
+ return true;
+
+ evlist__for_each(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->attr, &config) &&
+ (config & pt->noretcomp_bit))
+ return false;
+ }
+ return true;
+}
+
+static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
+{
+ struct perf_evsel *evsel;
+ unsigned int shift;
+ u64 config;
+
+ if (!pt->mtc_freq_bits)
+ return 0;
+
+ for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
+ config >>= 1;
+
+ evlist__for_each(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->attr, &config))
+ return (config & pt->mtc_freq_bits) >> shift;
+ }
+ return 0;
+}
+
+static bool intel_pt_timeless_decoding(struct intel_pt *pt)
+{
+ struct perf_evsel *evsel;
+ bool timeless_decoding = true;
+ u64 config;
+
+ if (!pt->tsc_bit || !pt->cap_user_time_zero)
+ return true;
+
+ evlist__for_each(pt->session->evlist, evsel) {
+ if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
+ return true;
+ if (intel_pt_get_config(pt, &evsel->attr, &config)) {
+ if (config & pt->tsc_bit)
+ timeless_decoding = false;
+ else
+ return true;
+ }
+ }
+ return timeless_decoding;
+}
+
+static bool intel_pt_tracing_kernel(struct intel_pt *pt)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
+ !evsel->attr.exclude_kernel)
+ return true;
+ }
+ return false;
+}
+
+static bool intel_pt_have_tsc(struct intel_pt *pt)
+{
+ struct perf_evsel *evsel;
+ bool have_tsc = false;
+ u64 config;
+
+ if (!pt->tsc_bit)
+ return false;
+
+ evlist__for_each(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->attr, &config)) {
+ if (config & pt->tsc_bit)
+ have_tsc = true;
+ else
+ return false;
+ }
+ }
+ return have_tsc;
+}
+
+static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
+{
+ u64 quot, rem;
+
+ quot = ns / pt->tc.time_mult;
+ rem = ns % pt->tc.time_mult;
+ return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
+ pt->tc.time_mult;
+}
+
+static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
+ unsigned int queue_nr)
+{
+ struct intel_pt_params params = { .get_trace = 0, };
+ struct intel_pt_queue *ptq;
+
+ ptq = zalloc(sizeof(struct intel_pt_queue));
+ if (!ptq)
+ return NULL;
+
+ if (pt->synth_opts.callchain) {
+ size_t sz = sizeof(struct ip_callchain);
+
+ sz += pt->synth_opts.callchain_sz * sizeof(u64);
+ ptq->chain = zalloc(sz);
+ if (!ptq->chain)
+ goto out_free;
+ }
+
+ ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+ if (!ptq->event_buf)
+ goto out_free;
+
+ ptq->pt = pt;
+ ptq->queue_nr = queue_nr;
+ ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
+ ptq->pid = -1;
+ ptq->tid = -1;
+ ptq->cpu = -1;
+ ptq->next_tid = -1;
+
+ params.get_trace = intel_pt_get_trace;
+ params.walk_insn = intel_pt_walk_next_insn;
+ params.data = ptq;
+ params.return_compression = intel_pt_return_compression(pt);
+ params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
+ params.mtc_period = intel_pt_mtc_period(pt);
+ params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
+ params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
+
+ if (pt->synth_opts.instructions) {
+ if (pt->synth_opts.period) {
+ switch (pt->synth_opts.period_type) {
+ case PERF_ITRACE_PERIOD_INSTRUCTIONS:
+ params.period_type =
+ INTEL_PT_PERIOD_INSTRUCTIONS;
+ params.period = pt->synth_opts.period;
+ break;
+ case PERF_ITRACE_PERIOD_TICKS:
+ params.period_type = INTEL_PT_PERIOD_TICKS;
+ params.period = pt->synth_opts.period;
+ break;
+ case PERF_ITRACE_PERIOD_NANOSECS:
+ params.period_type = INTEL_PT_PERIOD_TICKS;
+ params.period = intel_pt_ns_to_ticks(pt,
+ pt->synth_opts.period);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!params.period) {
+ params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
+ params.period = 1000;
+ }
+ }
+
+ ptq->decoder = intel_pt_decoder_new(&params);
+ if (!ptq->decoder)
+ goto out_free;
+
+ return ptq;
+
+out_free:
+ zfree(&ptq->event_buf);
+ zfree(&ptq->chain);
+ free(ptq);
+ return NULL;
+}
+
+static void intel_pt_free_queue(void *priv)
+{
+ struct intel_pt_queue *ptq = priv;
+
+ if (!ptq)
+ return;
+ thread__zput(ptq->thread);
+ intel_pt_decoder_free(ptq->decoder);
+ zfree(&ptq->event_buf);
+ zfree(&ptq->chain);
+ free(ptq);
+}
+
+static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
+ struct auxtrace_queue *queue)
+{
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (queue->tid == -1 || pt->have_sched_switch) {
+ ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
+ thread__zput(ptq->thread);
+ }
+
+ if (!ptq->thread && ptq->tid != -1)
+ ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
+
+ if (ptq->thread) {
+ ptq->pid = ptq->thread->pid_;
+ if (queue->cpu == -1)
+ ptq->cpu = ptq->thread->cpu;
+ }
+}
+
+static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
+{
+ if (ptq->state->flags & INTEL_PT_ABORT_TX) {
+ ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
+ } else if (ptq->state->flags & INTEL_PT_ASYNC) {
+ if (ptq->state->to_ip)
+ ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_INTERRUPT;
+ else
+ ptq->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_END;
+ ptq->insn_len = 0;
+ } else {
+ if (ptq->state->from_ip)
+ ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
+ else
+ ptq->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_BEGIN;
+ if (ptq->state->flags & INTEL_PT_IN_TX)
+ ptq->flags |= PERF_IP_FLAG_IN_TX;
+ ptq->insn_len = ptq->state->insn_len;
+ }
+}
+
+static int intel_pt_setup_queue(struct intel_pt *pt,
+ struct auxtrace_queue *queue,
+ unsigned int queue_nr)
+{
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (list_empty(&queue->head))
+ return 0;
+
+ if (!ptq) {
+ ptq = intel_pt_alloc_queue(pt, queue_nr);
+ if (!ptq)
+ return -ENOMEM;
+ queue->priv = ptq;
+
+ if (queue->cpu != -1)
+ ptq->cpu = queue->cpu;
+ ptq->tid = queue->tid;
+
+ if (pt->sampling_mode) {
+ if (pt->timeless_decoding)
+ ptq->step_through_buffers = true;
+ if (pt->timeless_decoding || !pt->have_sched_switch)
+ ptq->use_buffer_pid_tid = true;
+ }
+ }
+
+ if (!ptq->on_heap &&
+ (!pt->sync_switch ||
+ ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
+ const struct intel_pt_state *state;
+ int ret;
+
+ if (pt->timeless_decoding)
+ return 0;
+
+ intel_pt_log("queue %u getting timestamp\n", queue_nr);
+ intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
+ queue_nr, ptq->cpu, ptq->pid, ptq->tid);
+ while (1) {
+ state = intel_pt_decode(ptq->decoder);
+ if (state->err) {
+ if (state->err == INTEL_PT_ERR_NODATA) {
+ intel_pt_log("queue %u has no timestamp\n",
+ queue_nr);
+ return 0;
+ }
+ continue;
+ }
+ if (state->timestamp)
+ break;
+ }
+
+ ptq->timestamp = state->timestamp;
+ intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
+ queue_nr, ptq->timestamp);
+ ptq->state = state;
+ ptq->have_sample = true;
+ intel_pt_sample_flags(ptq);
+ ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
+ if (ret)
+ return ret;
+ ptq->on_heap = true;
+ }
+
+ return 0;
+}
+
+static int intel_pt_setup_queues(struct intel_pt *pt)
+{
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < pt->queues.nr_queues; i++) {
+ ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int intel_pt_inject_event(union perf_event *event,
+ struct perf_sample *sample, u64 type,
+ bool swapped)
+{
+ event->header.size = perf_event__sample_event_size(sample, type, 0);
+ return perf_event__synthesize_sample(event, type, 0, sample, swapped);
+}
+
+static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
+{
+ int ret;
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.misc = PERF_RECORD_MISC_USER;
+ event->sample.header.size = sizeof(struct perf_event_header);
+
+ if (!pt->timeless_decoding)
+ sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+
+ sample.ip = ptq->state->from_ip;
+ sample.pid = ptq->pid;
+ sample.tid = ptq->tid;
+ sample.addr = ptq->state->to_ip;
+ sample.id = ptq->pt->branches_id;
+ sample.stream_id = ptq->pt->branches_id;
+ sample.period = 1;
+ sample.cpu = ptq->cpu;
+ sample.flags = ptq->flags;
+ sample.insn_len = ptq->insn_len;
+
+ if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
+ return 0;
+
+ if (pt->synth_opts.inject) {
+ ret = intel_pt_inject_event(event, &sample,
+ pt->branches_sample_type,
+ pt->synth_needs_swap);
+ if (ret)
+ return ret;
+ }
+
+ ret = perf_session__deliver_synth_event(pt->session, event, &sample);
+ if (ret)
+ pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
+ ret);
+
+ return ret;
+}
+
+static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
+{
+ int ret;
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.misc = PERF_RECORD_MISC_USER;
+ event->sample.header.size = sizeof(struct perf_event_header);
+
+ if (!pt->timeless_decoding)
+ sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+
+ sample.ip = ptq->state->from_ip;
+ sample.pid = ptq->pid;
+ sample.tid = ptq->tid;
+ sample.addr = ptq->state->to_ip;
+ sample.id = ptq->pt->instructions_id;
+ sample.stream_id = ptq->pt->instructions_id;
+ sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
+ sample.cpu = ptq->cpu;
+ sample.flags = ptq->flags;
+ sample.insn_len = ptq->insn_len;
+
+ ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
+
+ if (pt->synth_opts.callchain) {
+ thread_stack__sample(ptq->thread, ptq->chain,
+ pt->synth_opts.callchain_sz, sample.ip);
+ sample.callchain = ptq->chain;
+ }
+
+ if (pt->synth_opts.inject) {
+ ret = intel_pt_inject_event(event, &sample,
+ pt->instructions_sample_type,
+ pt->synth_needs_swap);
+ if (ret)
+ return ret;
+ }
+
+ ret = perf_session__deliver_synth_event(pt->session, event, &sample);
+ if (ret)
+ pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
+ ret);
+
+ return ret;
+}
+
+static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
+{
+ int ret;
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.misc = PERF_RECORD_MISC_USER;
+ event->sample.header.size = sizeof(struct perf_event_header);
+
+ if (!pt->timeless_decoding)
+ sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+
+ sample.ip = ptq->state->from_ip;
+ sample.pid = ptq->pid;
+ sample.tid = ptq->tid;
+ sample.addr = ptq->state->to_ip;
+ sample.id = ptq->pt->transactions_id;
+ sample.stream_id = ptq->pt->transactions_id;
+ sample.period = 1;
+ sample.cpu = ptq->cpu;
+ sample.flags = ptq->flags;
+ sample.insn_len = ptq->insn_len;
+
+ if (pt->synth_opts.callchain) {
+ thread_stack__sample(ptq->thread, ptq->chain,
+ pt->synth_opts.callchain_sz, sample.ip);
+ sample.callchain = ptq->chain;
+ }
+
+ if (pt->synth_opts.inject) {
+ ret = intel_pt_inject_event(event, &sample,
+ pt->transactions_sample_type,
+ pt->synth_needs_swap);
+ if (ret)
+ return ret;
+ }
+
+ ret = perf_session__deliver_synth_event(pt->session, event, &sample);
+ if (ret)
+ pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
+ ret);
+
+ return ret;
+}
+
+static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
+ pid_t pid, pid_t tid, u64 ip)
+{
+ union perf_event event;
+ char msg[MAX_AUXTRACE_ERROR_MSG];
+ int err;
+
+ intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
+
+ auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+ code, cpu, pid, tid, ip, msg);
+
+ err = perf_session__deliver_synth_event(pt->session, &event, NULL);
+ if (err)
+ pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
+ err);
+
+ return err;
+}
+
+static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
+{
+ struct auxtrace_queue *queue;
+ pid_t tid = ptq->next_tid;
+ int err;
+
+ if (tid == -1)
+ return 0;
+
+ intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
+
+ err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
+
+ queue = &pt->queues.queue_array[ptq->queue_nr];
+ intel_pt_set_pid_tid_cpu(pt, queue);
+
+ ptq->next_tid = -1;
+
+ return err;
+}
+
+static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
+{
+ struct intel_pt *pt = ptq->pt;
+
+ return ip == pt->switch_ip &&
+ (ptq->flags & PERF_IP_FLAG_BRANCH) &&
+ !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
+}
+
+static int intel_pt_sample(struct intel_pt_queue *ptq)
+{
+ const struct intel_pt_state *state = ptq->state;
+ struct intel_pt *pt = ptq->pt;
+ int err;
+
+ if (!ptq->have_sample)
+ return 0;
+
+ ptq->have_sample = false;
+
+ if (pt->sample_instructions &&
+ (state->type & INTEL_PT_INSTRUCTION)) {
+ err = intel_pt_synth_instruction_sample(ptq);
+ if (err)
+ return err;
+ }
+
+ if (pt->sample_transactions &&
+ (state->type & INTEL_PT_TRANSACTION)) {
+ err = intel_pt_synth_transaction_sample(ptq);
+ if (err)
+ return err;
+ }
+
+ if (!(state->type & INTEL_PT_BRANCH))
+ return 0;
+
+ if (pt->synth_opts.callchain)
+ thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
+ state->to_ip, ptq->insn_len,
+ state->trace_nr);
+ else
+ thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
+
+ if (pt->sample_branches) {
+ err = intel_pt_synth_branch_sample(ptq);
+ if (err)
+ return err;
+ }
+
+ if (!pt->sync_switch)
+ return 0;
+
+ if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
+ switch (ptq->switch_state) {
+ case INTEL_PT_SS_UNKNOWN:
+ case INTEL_PT_SS_EXPECTING_SWITCH_IP:
+ err = intel_pt_next_tid(pt, ptq);
+ if (err)
+ return err;
+ ptq->switch_state = INTEL_PT_SS_TRACING;
+ break;
+ default:
+ ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
+ return 1;
+ }
+ } else if (!state->to_ip) {
+ ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
+ } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
+ ptq->switch_state = INTEL_PT_SS_UNKNOWN;
+ } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
+ state->to_ip == pt->ptss_ip &&
+ (ptq->flags & PERF_IP_FLAG_CALL)) {
+ ptq->switch_state = INTEL_PT_SS_TRACING;
+ }
+
+ return 0;
+}
+
+static u64 intel_pt_switch_ip(struct machine *machine, u64 *ptss_ip)
+{
+ struct map *map;
+ struct symbol *sym, *start;
+ u64 ip, switch_ip = 0;
+
+ if (ptss_ip)
+ *ptss_ip = 0;
+
+ map = machine__kernel_map(machine, MAP__FUNCTION);
+ if (!map)
+ return 0;
+
+ if (map__load(map, machine->symbol_filter))
+ return 0;
+
+ start = dso__first_symbol(map->dso, MAP__FUNCTION);
+
+ for (sym = start; sym; sym = dso__next_symbol(sym)) {
+ if (sym->binding == STB_GLOBAL &&
+ !strcmp(sym->name, "__switch_to")) {
+ ip = map->unmap_ip(map, sym->start);
+ if (ip >= map->start && ip < map->end) {
+ switch_ip = ip;
+ break;
+ }
+ }
+ }
+
+ if (!switch_ip || !ptss_ip)
+ return 0;
+
+ for (sym = start; sym; sym = dso__next_symbol(sym)) {
+ if (!strcmp(sym->name, "perf_trace_sched_switch")) {
+ ip = map->unmap_ip(map, sym->start);
+ if (ip >= map->start && ip < map->end) {
+ *ptss_ip = ip;
+ break;
+ }
+ }
+ }
+
+ return switch_ip;
+}
+
+static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
+{
+ const struct intel_pt_state *state = ptq->state;
+ struct intel_pt *pt = ptq->pt;
+ int err;
+
+ if (!pt->kernel_start) {
+ pt->kernel_start = machine__kernel_start(pt->machine);
+ if (pt->per_cpu_mmaps && pt->have_sched_switch &&
+ !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
+ !pt->sampling_mode) {
+ pt->switch_ip = intel_pt_switch_ip(pt->machine,
+ &pt->ptss_ip);
+ if (pt->switch_ip) {
+ intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
+ pt->switch_ip, pt->ptss_ip);
+ pt->sync_switch = true;
+ }
+ }
+ }
+
+ intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
+ ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
+ while (1) {
+ err = intel_pt_sample(ptq);
+ if (err)
+ return err;
+
+ state = intel_pt_decode(ptq->decoder);
+ if (state->err) {
+ if (state->err == INTEL_PT_ERR_NODATA)
+ return 1;
+ if (pt->sync_switch &&
+ state->from_ip >= pt->kernel_start) {
+ pt->sync_switch = false;
+ intel_pt_next_tid(pt, ptq);
+ }
+ if (pt->synth_opts.errors) {
+ err = intel_pt_synth_error(pt, state->err,
+ ptq->cpu, ptq->pid,
+ ptq->tid,
+ state->from_ip);
+ if (err)
+ return err;
+ }
+ continue;
+ }
+
+ ptq->state = state;
+ ptq->have_sample = true;
+ intel_pt_sample_flags(ptq);
+
+ /* Use estimated TSC upon return to user space */
+ if (pt->est_tsc &&
+ (state->from_ip >= pt->kernel_start || !state->from_ip) &&
+ state->to_ip && state->to_ip < pt->kernel_start) {
+ intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
+ state->timestamp, state->est_timestamp);
+ ptq->timestamp = state->est_timestamp;
+ /* Use estimated TSC in unknown switch state */
+ } else if (pt->sync_switch &&
+ ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
+ intel_pt_is_switch_ip(ptq, state->to_ip) &&
+ ptq->next_tid == -1) {
+ intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
+ state->timestamp, state->est_timestamp);
+ ptq->timestamp = state->est_timestamp;
+ } else if (state->timestamp > ptq->timestamp) {
+ ptq->timestamp = state->timestamp;
+ }
+
+ if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
+ *timestamp = ptq->timestamp;
+ return 0;
+ }
+ }
+ return 0;
+}
+
+static inline int intel_pt_update_queues(struct intel_pt *pt)
+{
+ if (pt->queues.new_data) {
+ pt->queues.new_data = false;
+ return intel_pt_setup_queues(pt);
+ }
+ return 0;
+}
+
+static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
+{
+ unsigned int queue_nr;
+ u64 ts;
+ int ret;
+
+ while (1) {
+ struct auxtrace_queue *queue;
+ struct intel_pt_queue *ptq;
+
+ if (!pt->heap.heap_cnt)
+ return 0;
+
+ if (pt->heap.heap_array[0].ordinal >= timestamp)
+ return 0;
+
+ queue_nr = pt->heap.heap_array[0].queue_nr;
+ queue = &pt->queues.queue_array[queue_nr];
+ ptq = queue->priv;
+
+ intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
+ queue_nr, pt->heap.heap_array[0].ordinal,
+ timestamp);
+
+ auxtrace_heap__pop(&pt->heap);
+
+ if (pt->heap.heap_cnt) {
+ ts = pt->heap.heap_array[0].ordinal + 1;
+ if (ts > timestamp)
+ ts = timestamp;
+ } else {
+ ts = timestamp;
+ }
+
+ intel_pt_set_pid_tid_cpu(pt, queue);
+
+ ret = intel_pt_run_decoder(ptq, &ts);
+
+ if (ret < 0) {
+ auxtrace_heap__add(&pt->heap, queue_nr, ts);
+ return ret;
+ }
+
+ if (!ret) {
+ ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
+ if (ret < 0)
+ return ret;
+ } else {
+ ptq->on_heap = false;
+ }
+ }
+
+ return 0;
+}
+
+static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
+ u64 time_)
+{
+ struct auxtrace_queues *queues = &pt->queues;
+ unsigned int i;
+ u64 ts = 0;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (ptq && (tid == -1 || ptq->tid == tid)) {
+ ptq->time = time_;
+ intel_pt_set_pid_tid_cpu(pt, queue);
+ intel_pt_run_decoder(ptq, &ts);
+ }
+ }
+ return 0;
+}
+
+static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
+{
+ return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
+ sample->pid, sample->tid, 0);
+}
+
+static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
+{
+ unsigned i, j;
+
+ if (cpu < 0 || !pt->queues.nr_queues)
+ return NULL;
+
+ if ((unsigned)cpu >= pt->queues.nr_queues)
+ i = pt->queues.nr_queues - 1;
+ else
+ i = cpu;
+
+ if (pt->queues.queue_array[i].cpu == cpu)
+ return pt->queues.queue_array[i].priv;
+
+ for (j = 0; i > 0; j++) {
+ if (pt->queues.queue_array[--i].cpu == cpu)
+ return pt->queues.queue_array[i].priv;
+ }
+
+ for (; j < pt->queues.nr_queues; j++) {
+ if (pt->queues.queue_array[j].cpu == cpu)
+ return pt->queues.queue_array[j].priv;
+ }
+
+ return NULL;
+}
+
+static int intel_pt_process_switch(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ struct intel_pt_queue *ptq;
+ struct perf_evsel *evsel;
+ pid_t tid;
+ int cpu, err;
+
+ evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
+ if (evsel != pt->switch_evsel)
+ return 0;
+
+ tid = perf_evsel__intval(evsel, sample, "next_pid");
+ cpu = sample->cpu;
+
+ intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
+ cpu, tid, sample->time, perf_time_to_tsc(sample->time,
+ &pt->tc));
+
+ if (!pt->sync_switch)
+ goto out;
+
+ ptq = intel_pt_cpu_to_ptq(pt, cpu);
+ if (!ptq)
+ goto out;
+
+ switch (ptq->switch_state) {
+ case INTEL_PT_SS_NOT_TRACING:
+ ptq->next_tid = -1;
+ break;
+ case INTEL_PT_SS_UNKNOWN:
+ case INTEL_PT_SS_TRACING:
+ ptq->next_tid = tid;
+ ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
+ return 0;
+ case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
+ if (!ptq->on_heap) {
+ ptq->timestamp = perf_time_to_tsc(sample->time,
+ &pt->tc);
+ err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
+ ptq->timestamp);
+ if (err)
+ return err;
+ ptq->on_heap = true;
+ }
+ ptq->switch_state = INTEL_PT_SS_TRACING;
+ break;
+ case INTEL_PT_SS_EXPECTING_SWITCH_IP:
+ ptq->next_tid = tid;
+ intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
+ break;
+ default:
+ break;
+ }
+out:
+ return machine__set_current_tid(pt->machine, cpu, -1, tid);
+}
+
+static int intel_pt_process_itrace_start(struct intel_pt *pt,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ if (!pt->per_cpu_mmaps)
+ return 0;
+
+ intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
+ sample->cpu, event->itrace_start.pid,
+ event->itrace_start.tid, sample->time,
+ perf_time_to_tsc(sample->time, &pt->tc));
+
+ return machine__set_current_tid(pt->machine, sample->cpu,
+ event->itrace_start.pid,
+ event->itrace_start.tid);
+}
+
+static int intel_pt_process_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+ u64 timestamp;
+ int err = 0;
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events) {
+ pr_err("Intel Processor Trace requires ordered events\n");
+ return -EINVAL;
+ }
+
+ if (sample->time && sample->time != (u64)-1)
+ timestamp = perf_time_to_tsc(sample->time, &pt->tc);
+ else
+ timestamp = 0;
+
+ if (timestamp || pt->timeless_decoding) {
+ err = intel_pt_update_queues(pt);
+ if (err)
+ return err;
+ }
+
+ if (pt->timeless_decoding) {
+ if (event->header.type == PERF_RECORD_EXIT) {
+ err = intel_pt_process_timeless_queues(pt,
+ event->comm.tid,
+ sample->time);
+ }
+ } else if (timestamp) {
+ err = intel_pt_process_queues(pt, timestamp);
+ }
+ if (err)
+ return err;
+
+ if (event->header.type == PERF_RECORD_AUX &&
+ (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
+ pt->synth_opts.errors) {
+ err = intel_pt_lost(pt, sample);
+ if (err)
+ return err;
+ }
+
+ if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
+ err = intel_pt_process_switch(pt, sample);
+ else if (event->header.type == PERF_RECORD_ITRACE_START)
+ err = intel_pt_process_itrace_start(pt, event, sample);
+
+ intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
+ perf_event__name(event->header.type), event->header.type,
+ sample->cpu, sample->time, timestamp);
+
+ return err;
+}
+
+static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+ int ret;
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events)
+ return -EINVAL;
+
+ ret = intel_pt_update_queues(pt);
+ if (ret < 0)
+ return ret;
+
+ if (pt->timeless_decoding)
+ return intel_pt_process_timeless_queues(pt, -1,
+ MAX_TIMESTAMP - 1);
+
+ return intel_pt_process_queues(pt, MAX_TIMESTAMP);
+}
+
+static void intel_pt_free_events(struct perf_session *session)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+ struct auxtrace_queues *queues = &pt->queues;
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ intel_pt_free_queue(queues->queue_array[i].priv);
+ queues->queue_array[i].priv = NULL;
+ }
+ intel_pt_log_disable();
+ auxtrace_queues__free(queues);
+}
+
+static void intel_pt_free(struct perf_session *session)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+
+ auxtrace_heap__free(&pt->heap);
+ intel_pt_free_events(session);
+ session->auxtrace = NULL;
+ thread__delete(pt->unknown_thread);
+ free(pt);
+}
+
+static int intel_pt_process_auxtrace_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+
+ if (pt->sampling_mode)
+ return 0;
+
+ if (!pt->data_queued) {
+ struct auxtrace_buffer *buffer;
+ off_t data_offset;
+ int fd = perf_data_file__fd(session->file);
+ int err;
+
+ if (perf_data_file__is_pipe(session->file)) {
+ data_offset = 0;
+ } else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
+
+ err = auxtrace_queues__add_event(&pt->queues, session, event,
+ data_offset, &buffer);
+ if (err)
+ return err;
+
+ /* Dump here now we have copied a piped trace out of the pipe */
+ if (dump_trace) {
+ if (auxtrace_buffer__get_data(buffer, fd)) {
+ intel_pt_dump_event(pt, buffer->data,
+ buffer->size);
+ auxtrace_buffer__put_data(buffer);
+ }
+ }
+ }
+
+ return 0;
+}
+
+struct intel_pt_synth {
+ struct perf_tool dummy_tool;
+ struct perf_session *session;
+};
+
+static int intel_pt_event_synth(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct intel_pt_synth *intel_pt_synth =
+ container_of(tool, struct intel_pt_synth, dummy_tool);
+
+ return perf_session__deliver_synth_event(intel_pt_synth->session, event,
+ NULL);
+}
+
+static int intel_pt_synth_event(struct perf_session *session,
+ struct perf_event_attr *attr, u64 id)
+{
+ struct intel_pt_synth intel_pt_synth;
+
+ memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
+ intel_pt_synth.session = session;
+
+ return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
+ &id, intel_pt_event_synth);
+}
+
+static int intel_pt_synth_events(struct intel_pt *pt,
+ struct perf_session *session)
+{
+ struct perf_evlist *evlist = session->evlist;
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr;
+ bool found = false;
+ u64 id;
+ int err;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->attr.type == pt->pmu_type && evsel->ids) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ pr_debug("There are no selected events with Intel Processor Trace data\n");
+ return 0;
+ }
+
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+ attr.size = sizeof(struct perf_event_attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
+ attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+ PERF_SAMPLE_PERIOD;
+ if (pt->timeless_decoding)
+ attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+ else
+ attr.sample_type |= PERF_SAMPLE_TIME;
+ if (!pt->per_cpu_mmaps)
+ attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
+ attr.exclude_user = evsel->attr.exclude_user;
+ attr.exclude_kernel = evsel->attr.exclude_kernel;
+ attr.exclude_hv = evsel->attr.exclude_hv;
+ attr.exclude_host = evsel->attr.exclude_host;
+ attr.exclude_guest = evsel->attr.exclude_guest;
+ attr.sample_id_all = evsel->attr.sample_id_all;
+ attr.read_format = evsel->attr.read_format;
+
+ id = evsel->id[0] + 1000000000;
+ if (!id)
+ id = 1;
+
+ if (pt->synth_opts.instructions) {
+ attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+ if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
+ attr.sample_period =
+ intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
+ else
+ attr.sample_period = pt->synth_opts.period;
+ pt->instructions_sample_period = attr.sample_period;
+ if (pt->synth_opts.callchain)
+ attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+ pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+ id, (u64)attr.sample_type);
+ err = intel_pt_synth_event(session, &attr, id);
+ if (err) {
+ pr_err("%s: failed to synthesize 'instructions' event type\n",
+ __func__);
+ return err;
+ }
+ pt->sample_instructions = true;
+ pt->instructions_sample_type = attr.sample_type;
+ pt->instructions_id = id;
+ id += 1;
+ }
+
+ if (pt->synth_opts.transactions) {
+ attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+ attr.sample_period = 1;
+ if (pt->synth_opts.callchain)
+ attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+ pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+ id, (u64)attr.sample_type);
+ err = intel_pt_synth_event(session, &attr, id);
+ if (err) {
+ pr_err("%s: failed to synthesize 'transactions' event type\n",
+ __func__);
+ return err;
+ }
+ pt->sample_transactions = true;
+ pt->transactions_id = id;
+ id += 1;
+ evlist__for_each(evlist, evsel) {
+ if (evsel->id && evsel->id[0] == pt->transactions_id) {
+ if (evsel->name)
+ zfree(&evsel->name);
+ evsel->name = strdup("transactions");
+ break;
+ }
+ }
+ }
+
+ if (pt->synth_opts.branches) {
+ attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+ attr.sample_period = 1;
+ attr.sample_type |= PERF_SAMPLE_ADDR;
+ attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
+ pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+ id, (u64)attr.sample_type);
+ err = intel_pt_synth_event(session, &attr, id);
+ if (err) {
+ pr_err("%s: failed to synthesize 'branches' event type\n",
+ __func__);
+ return err;
+ }
+ pt->sample_branches = true;
+ pt->branches_sample_type = attr.sample_type;
+ pt->branches_id = id;
+ }
+
+ pt->synth_needs_swap = evsel->needs_swap;
+
+ return 0;
+}
+
+static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_reverse(evlist, evsel) {
+ const char *name = perf_evsel__name(evsel);
+
+ if (!strcmp(name, "sched:sched_switch"))
+ return evsel;
+ }
+
+ return NULL;
+}
+
+static const char * const intel_pt_info_fmts[] = {
+ [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
+ [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
+ [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
+ [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n",
+ [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
+ [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n",
+ [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n",
+ [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n",
+ [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
+ [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n",
+ [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n",
+ [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n",
+ [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n",
+ [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n",
+};
+
+static void intel_pt_print_info(u64 *arr, int start, int finish)
+{
+ int i;
+
+ if (!dump_trace)
+ return;
+
+ for (i = start; i <= finish; i++)
+ fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
+}
+
+int intel_pt_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
+ size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
+ struct intel_pt *pt;
+ int err;
+
+ if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
+ min_sz)
+ return -EINVAL;
+
+ pt = zalloc(sizeof(struct intel_pt));
+ if (!pt)
+ return -ENOMEM;
+
+ err = auxtrace_queues__init(&pt->queues);
+ if (err)
+ goto err_free;
+
+ intel_pt_log_set_name(INTEL_PT_PMU_NAME);
+
+ pt->session = session;
+ pt->machine = &session->machines.host; /* No kvm support */
+ pt->auxtrace_type = auxtrace_info->type;
+ pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
+ pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
+ pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
+ pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
+ pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
+ pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
+ pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
+ pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
+ pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
+ pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
+ intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
+ INTEL_PT_PER_CPU_MMAPS);
+
+ if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) +
+ (sizeof(u64) * INTEL_PT_CYC_BIT)) {
+ pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
+ pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
+ pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
+ pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
+ pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
+ intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
+ INTEL_PT_CYC_BIT);
+ }
+
+ pt->timeless_decoding = intel_pt_timeless_decoding(pt);
+ pt->have_tsc = intel_pt_have_tsc(pt);
+ pt->sampling_mode = false;
+ pt->est_tsc = !pt->timeless_decoding;
+
+ pt->unknown_thread = thread__new(999999999, 999999999);
+ if (!pt->unknown_thread) {
+ err = -ENOMEM;
+ goto err_free_queues;
+ }
+ err = thread__set_comm(pt->unknown_thread, "unknown", 0);
+ if (err)
+ goto err_delete_thread;
+ if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
+ err = -ENOMEM;
+ goto err_delete_thread;
+ }
+
+ pt->auxtrace.process_event = intel_pt_process_event;
+ pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
+ pt->auxtrace.flush_events = intel_pt_flush;
+ pt->auxtrace.free_events = intel_pt_free_events;
+ pt->auxtrace.free = intel_pt_free;
+ session->auxtrace = &pt->auxtrace;
+
+ if (dump_trace)
+ return 0;
+
+ if (pt->have_sched_switch == 1) {
+ pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
+ if (!pt->switch_evsel) {
+ pr_err("%s: missing sched_switch event\n", __func__);
+ goto err_delete_thread;
+ }
+ }
+
+ if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
+ pt->synth_opts = *session->itrace_synth_opts;
+ } else {
+ itrace_synth_opts__set_default(&pt->synth_opts);
+ if (use_browser != -1) {
+ pt->synth_opts.branches = false;
+ pt->synth_opts.callchain = true;
+ }
+ }
+
+ if (pt->synth_opts.log)
+ intel_pt_log_enable();
+
+ /* Maximum non-turbo ratio is TSC freq / 100 MHz */
+ if (pt->tc.time_mult) {
+ u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
+
+ pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000;
+ intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
+ intel_pt_log("Maximum non-turbo ratio %u\n",
+ pt->max_non_turbo_ratio);
+ }
+
+ if (pt->synth_opts.calls)
+ pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_TRACE_END;
+ if (pt->synth_opts.returns)
+ pt->branches_filter |= PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_TRACE_BEGIN;
+
+ if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
+ symbol_conf.use_callchain = true;
+ if (callchain_register_param(&callchain_param) < 0) {
+ symbol_conf.use_callchain = false;
+ pt->synth_opts.callchain = false;
+ }
+ }
+
+ err = intel_pt_synth_events(pt, session);
+ if (err)
+ goto err_delete_thread;
+
+ err = auxtrace_queues__process_index(&pt->queues, session);
+ if (err)
+ goto err_delete_thread;
+
+ if (pt->queues.populated)
+ pt->data_queued = true;
+
+ if (pt->timeless_decoding)
+ pr_debug2("Intel PT decoding without timestamps\n");
+
+ return 0;
+
+err_delete_thread:
+ thread__delete(pt->unknown_thread);
+err_free_queues:
+ intel_pt_log_disable();
+ auxtrace_queues__free(&pt->queues);
+ session->auxtrace = NULL;
+err_free:
+ free(pt);
+ return err;
+}
diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h
new file mode 100644
index 0000000..0065949
--- /dev/null
+++ b/tools/perf/util/intel-pt.h
@@ -0,0 +1,56 @@
+/*
+ * intel_pt.h: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__PERF_INTEL_PT_H__
+#define INCLUDE__PERF_INTEL_PT_H__
+
+#define INTEL_PT_PMU_NAME "intel_pt"
+
+enum {
+ INTEL_PT_PMU_TYPE,
+ INTEL_PT_TIME_SHIFT,
+ INTEL_PT_TIME_MULT,
+ INTEL_PT_TIME_ZERO,
+ INTEL_PT_CAP_USER_TIME_ZERO,
+ INTEL_PT_TSC_BIT,
+ INTEL_PT_NORETCOMP_BIT,
+ INTEL_PT_HAVE_SCHED_SWITCH,
+ INTEL_PT_SNAPSHOT_MODE,
+ INTEL_PT_PER_CPU_MMAPS,
+ INTEL_PT_MTC_BIT,
+ INTEL_PT_MTC_FREQ_BITS,
+ INTEL_PT_TSC_CTC_N,
+ INTEL_PT_TSC_CTC_D,
+ INTEL_PT_CYC_BIT,
+ INTEL_PT_AUXTRACE_PRIV_MAX,
+};
+
+#define INTEL_PT_AUXTRACE_PRIV_SIZE (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64))
+
+struct auxtrace_record;
+struct perf_tool;
+union perf_event;
+struct perf_session;
+struct perf_event_attr;
+struct perf_pmu;
+
+struct auxtrace_record *intel_pt_recording_init(int *err);
+
+int intel_pt_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
+
+struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu);
+
+#endif
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
new file mode 100644
index 0000000..4f6a478
--- /dev/null
+++ b/tools/perf/util/llvm-utils.c
@@ -0,0 +1,408 @@
+/*
+ * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+
+#include <stdio.h>
+#include <sys/utsname.h>
+#include "util.h"
+#include "debug.h"
+#include "llvm-utils.h"
+#include "cache.h"
+
+#define CLANG_BPF_CMD_DEFAULT_TEMPLATE \
+ "$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS " \
+ "$KERNEL_INC_OPTIONS -Wno-unused-value " \
+ "-Wno-pointer-sign -working-directory " \
+ "$WORKING_DIR -c \"$CLANG_SOURCE\" -target bpf -O2 -o -"
+
+struct llvm_param llvm_param = {
+ .clang_path = "clang",
+ .clang_bpf_cmd_template = CLANG_BPF_CMD_DEFAULT_TEMPLATE,
+ .clang_opt = NULL,
+ .kbuild_dir = NULL,
+ .kbuild_opts = NULL,
+ .user_set_param = false,
+};
+
+int perf_llvm_config(const char *var, const char *value)
+{
+ if (prefixcmp(var, "llvm."))
+ return 0;
+ var += sizeof("llvm.") - 1;
+
+ if (!strcmp(var, "clang-path"))
+ llvm_param.clang_path = strdup(value);
+ else if (!strcmp(var, "clang-bpf-cmd-template"))
+ llvm_param.clang_bpf_cmd_template = strdup(value);
+ else if (!strcmp(var, "clang-opt"))
+ llvm_param.clang_opt = strdup(value);
+ else if (!strcmp(var, "kbuild-dir"))
+ llvm_param.kbuild_dir = strdup(value);
+ else if (!strcmp(var, "kbuild-opts"))
+ llvm_param.kbuild_opts = strdup(value);
+ else
+ return -1;
+ llvm_param.user_set_param = true;
+ return 0;
+}
+
+static int
+search_program(const char *def, const char *name,
+ char *output)
+{
+ char *env, *path, *tmp = NULL;
+ char buf[PATH_MAX];
+ int ret;
+
+ output[0] = '\0';
+ if (def && def[0] != '\0') {
+ if (def[0] == '/') {
+ if (access(def, F_OK) == 0) {
+ strlcpy(output, def, PATH_MAX);
+ return 0;
+ }
+ } else if (def[0] != '\0')
+ name = def;
+ }
+
+ env = getenv("PATH");
+ if (!env)
+ return -1;
+ env = strdup(env);
+ if (!env)
+ return -1;
+
+ ret = -ENOENT;
+ path = strtok_r(env, ":", &tmp);
+ while (path) {
+ scnprintf(buf, sizeof(buf), "%s/%s", path, name);
+ if (access(buf, F_OK) == 0) {
+ strlcpy(output, buf, PATH_MAX);
+ ret = 0;
+ break;
+ }
+ path = strtok_r(NULL, ":", &tmp);
+ }
+
+ free(env);
+ return ret;
+}
+
+#define READ_SIZE 4096
+static int
+read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz)
+{
+ int err = 0;
+ void *buf = NULL;
+ FILE *file = NULL;
+ size_t read_sz = 0, buf_sz = 0;
+
+ file = popen(cmd, "r");
+ if (!file) {
+ pr_err("ERROR: unable to popen cmd: %s\n",
+ strerror(errno));
+ return -EINVAL;
+ }
+
+ while (!feof(file) && !ferror(file)) {
+ /*
+ * Make buf_sz always have obe byte extra space so we
+ * can put '\0' there.
+ */
+ if (buf_sz - read_sz < READ_SIZE + 1) {
+ void *new_buf;
+
+ buf_sz = read_sz + READ_SIZE + 1;
+ new_buf = realloc(buf, buf_sz);
+
+ if (!new_buf) {
+ pr_err("ERROR: failed to realloc memory\n");
+ err = -ENOMEM;
+ goto errout;
+ }
+
+ buf = new_buf;
+ }
+ read_sz += fread(buf + read_sz, 1, READ_SIZE, file);
+ }
+
+ if (buf_sz - read_sz < 1) {
+ pr_err("ERROR: internal error\n");
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (ferror(file)) {
+ pr_err("ERROR: error occurred when reading from pipe: %s\n",
+ strerror(errno));
+ err = -EIO;
+ goto errout;
+ }
+
+ err = WEXITSTATUS(pclose(file));
+ file = NULL;
+ if (err) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ /*
+ * If buf is string, give it terminal '\0' to make our life
+ * easier. If buf is not string, that '\0' is out of space
+ * indicated by read_sz so caller won't even notice it.
+ */
+ ((char *)buf)[read_sz] = '\0';
+
+ if (!p_buf)
+ free(buf);
+ else
+ *p_buf = buf;
+
+ if (p_read_sz)
+ *p_read_sz = read_sz;
+ return 0;
+
+errout:
+ if (file)
+ pclose(file);
+ free(buf);
+ if (p_buf)
+ *p_buf = NULL;
+ if (p_read_sz)
+ *p_read_sz = 0;
+ return err;
+}
+
+static inline void
+force_set_env(const char *var, const char *value)
+{
+ if (value) {
+ setenv(var, value, 1);
+ pr_debug("set env: %s=%s\n", var, value);
+ } else {
+ unsetenv(var);
+ pr_debug("unset env: %s\n", var);
+ }
+}
+
+static void
+version_notice(void)
+{
+ pr_err(
+" \tLLVM 3.7 or newer is required. Which can be found from http://llvm.org\n"
+" \tYou may want to try git trunk:\n"
+" \t\tgit clone http://llvm.org/git/llvm.git\n"
+" \t\t and\n"
+" \t\tgit clone http://llvm.org/git/clang.git\n\n"
+" \tOr fetch the latest clang/llvm 3.7 from pre-built llvm packages for\n"
+" \tdebian/ubuntu:\n"
+" \t\thttp://llvm.org/apt\n\n"
+" \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n"
+" \toption in [llvm] section of ~/.perfconfig to:\n\n"
+" \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS \\\n"
+" \t -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n"
+" \t -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n"
+" \t(Replace /path/to/llc with path to your llc)\n\n"
+);
+}
+
+static int detect_kbuild_dir(char **kbuild_dir)
+{
+ const char *test_dir = llvm_param.kbuild_dir;
+ const char *prefix_dir = "";
+ const char *suffix_dir = "";
+
+ char *autoconf_path;
+ struct utsname utsname;
+
+ int err;
+
+ if (!test_dir) {
+ err = uname(&utsname);
+ if (err) {
+ pr_warning("uname failed: %s\n", strerror(errno));
+ return -EINVAL;
+ }
+
+ test_dir = utsname.release;
+ prefix_dir = "/lib/modules/";
+ suffix_dir = "/build";
+ }
+
+ err = asprintf(&autoconf_path, "%s%s%s/include/generated/autoconf.h",
+ prefix_dir, test_dir, suffix_dir);
+ if (err < 0)
+ return -ENOMEM;
+
+ if (access(autoconf_path, R_OK) == 0) {
+ free(autoconf_path);
+
+ err = asprintf(kbuild_dir, "%s%s%s", prefix_dir, test_dir,
+ suffix_dir);
+ if (err < 0)
+ return -ENOMEM;
+ return 0;
+ }
+ free(autoconf_path);
+ return -ENOENT;
+}
+
+static const char *kinc_fetch_script =
+"#!/usr/bin/env sh\n"
+"if ! test -d \"$KBUILD_DIR\"\n"
+"then\n"
+" exit -1\n"
+"fi\n"
+"if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n"
+"then\n"
+" exit -1\n"
+"fi\n"
+"TMPDIR=`mktemp -d`\n"
+"if test -z \"$TMPDIR\"\n"
+"then\n"
+" exit -1\n"
+"fi\n"
+"cat << EOF > $TMPDIR/Makefile\n"
+"obj-y := dummy.o\n"
+"\\$(obj)/%.o: \\$(src)/%.c\n"
+"\t@echo -n \"\\$(NOSTDINC_FLAGS) \\$(LINUXINCLUDE) \\$(EXTRA_CFLAGS)\"\n"
+"EOF\n"
+"touch $TMPDIR/dummy.c\n"
+"make -s -C $KBUILD_DIR M=$TMPDIR $KBUILD_OPTS dummy.o 2>/dev/null\n"
+"RET=$?\n"
+"rm -rf $TMPDIR\n"
+"exit $RET\n";
+
+static inline void
+get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts)
+{
+ int err;
+
+ if (!kbuild_dir || !kbuild_include_opts)
+ return;
+
+ *kbuild_dir = NULL;
+ *kbuild_include_opts = NULL;
+
+ if (llvm_param.kbuild_dir && !llvm_param.kbuild_dir[0]) {
+ pr_debug("[llvm.kbuild-dir] is set to \"\" deliberately.\n");
+ pr_debug("Skip kbuild options detection.\n");
+ return;
+ }
+
+ err = detect_kbuild_dir(kbuild_dir);
+ if (err) {
+ pr_warning(
+"WARNING:\tunable to get correct kernel building directory.\n"
+"Hint:\tSet correct kbuild directory using 'kbuild-dir' option in [llvm]\n"
+" \tsection of ~/.perfconfig or set it to \"\" to suppress kbuild\n"
+" \tdetection.\n\n");
+ return;
+ }
+
+ pr_debug("Kernel build dir is set to %s\n", *kbuild_dir);
+ force_set_env("KBUILD_DIR", *kbuild_dir);
+ force_set_env("KBUILD_OPTS", llvm_param.kbuild_opts);
+ err = read_from_pipe(kinc_fetch_script,
+ (void **)kbuild_include_opts,
+ NULL);
+ if (err) {
+ pr_warning(
+"WARNING:\tunable to get kernel include directories from '%s'\n"
+"Hint:\tTry set clang include options using 'clang-bpf-cmd-template'\n"
+" \toption in [llvm] section of ~/.perfconfig and set 'kbuild-dir'\n"
+" \toption in [llvm] to \"\" to suppress this detection.\n\n",
+ *kbuild_dir);
+
+ free(*kbuild_dir);
+ *kbuild_dir = NULL;
+ return;
+ }
+
+ pr_debug("include option is set to %s\n", *kbuild_include_opts);
+}
+
+int llvm__compile_bpf(const char *path, void **p_obj_buf,
+ size_t *p_obj_buf_sz)
+{
+ int err;
+ char clang_path[PATH_MAX];
+ const char *clang_opt = llvm_param.clang_opt;
+ const char *template = llvm_param.clang_bpf_cmd_template;
+ char *kbuild_dir = NULL, *kbuild_include_opts = NULL;
+ void *obj_buf = NULL;
+ size_t obj_buf_sz;
+
+ if (!template)
+ template = CLANG_BPF_CMD_DEFAULT_TEMPLATE;
+
+ err = search_program(llvm_param.clang_path,
+ "clang", clang_path);
+ if (err) {
+ pr_err(
+"ERROR:\tunable to find clang.\n"
+"Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n"
+" \tand 'clang-path' option in [llvm] section of ~/.perfconfig.\n");
+ version_notice();
+ return -ENOENT;
+ }
+
+ /*
+ * This is an optional work. Even it fail we can continue our
+ * work. Needn't to check error return.
+ */
+ get_kbuild_opts(&kbuild_dir, &kbuild_include_opts);
+
+ force_set_env("CLANG_EXEC", clang_path);
+ force_set_env("CLANG_OPTIONS", clang_opt);
+ force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts);
+ force_set_env("WORKING_DIR", kbuild_dir ? : ".");
+
+ /*
+ * Since we may reset clang's working dir, path of source file
+ * should be transferred into absolute path, except we want
+ * stdin to be source file (testing).
+ */
+ force_set_env("CLANG_SOURCE",
+ (path[0] == '-') ? path :
+ make_nonrelative_path(path));
+
+ pr_debug("llvm compiling command template: %s\n", template);
+ err = read_from_pipe(template, &obj_buf, &obj_buf_sz);
+ if (err) {
+ pr_err("ERROR:\tunable to compile %s\n", path);
+ pr_err("Hint:\tCheck error message shown above.\n");
+ pr_err("Hint:\tYou can also pre-compile it into .o using:\n");
+ pr_err(" \t\tclang -target bpf -O2 -c %s\n", path);
+ pr_err(" \twith proper -I and -D options.\n");
+ goto errout;
+ }
+
+ free(kbuild_dir);
+ free(kbuild_include_opts);
+ if (!p_obj_buf)
+ free(obj_buf);
+ else
+ *p_obj_buf = obj_buf;
+
+ if (p_obj_buf_sz)
+ *p_obj_buf_sz = obj_buf_sz;
+ return 0;
+errout:
+ free(kbuild_dir);
+ free(kbuild_include_opts);
+ free(obj_buf);
+ if (p_obj_buf)
+ *p_obj_buf = NULL;
+ if (p_obj_buf_sz)
+ *p_obj_buf_sz = 0;
+ return err;
+}
+
+int llvm__search_clang(void)
+{
+ char clang_path[PATH_MAX];
+
+ return search_program(llvm_param.clang_path, "clang", clang_path);
+}
diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h
new file mode 100644
index 0000000..5b3cf1c
--- /dev/null
+++ b/tools/perf/util/llvm-utils.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+#ifndef __LLVM_UTILS_H
+#define __LLVM_UTILS_H
+
+#include "debug.h"
+
+struct llvm_param {
+ /* Path of clang executable */
+ const char *clang_path;
+ /*
+ * Template of clang bpf compiling. 5 env variables
+ * can be used:
+ * $CLANG_EXEC: Path to clang.
+ * $CLANG_OPTIONS: Extra options to clang.
+ * $KERNEL_INC_OPTIONS: Kernel include directories.
+ * $WORKING_DIR: Kernel source directory.
+ * $CLANG_SOURCE: Source file to be compiled.
+ */
+ const char *clang_bpf_cmd_template;
+ /* Will be filled in $CLANG_OPTIONS */
+ const char *clang_opt;
+ /* Where to find kbuild system */
+ const char *kbuild_dir;
+ /*
+ * Arguments passed to make, like 'ARCH=arm' if doing cross
+ * compiling. Should not be used for dynamic compiling.
+ */
+ const char *kbuild_opts;
+ /*
+ * Default is false. If one of the above fields is set by user
+ * explicitly then user_set_llvm is set to true. This is used
+ * for perf test. If user doesn't set anything in .perfconfig
+ * and clang is not found, don't trigger llvm test.
+ */
+ bool user_set_param;
+};
+
+extern struct llvm_param llvm_param;
+extern int perf_llvm_config(const char *var, const char *value);
+
+extern int llvm__compile_bpf(const char *path, void **p_obj_buf,
+ size_t *p_obj_buf_sz);
+
+/* This function is for test__llvm() use only */
+extern int llvm__search_clang(void);
+#endif
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 527e032..6309f7c 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -14,20 +14,23 @@
#include "unwind.h"
#include "linux/hash.h"
+static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);
+
static void dsos__init(struct dsos *dsos)
{
INIT_LIST_HEAD(&dsos->head);
dsos->root = RB_ROOT;
+ pthread_rwlock_init(&dsos->lock, NULL);
}
int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
{
map_groups__init(&machine->kmaps, machine);
RB_CLEAR_NODE(&machine->rb_node);
- dsos__init(&machine->user_dsos);
- dsos__init(&machine->kernel_dsos);
+ dsos__init(&machine->dsos);
machine->threads = RB_ROOT;
+ pthread_rwlock_init(&machine->threads_lock, NULL);
INIT_LIST_HEAD(&machine->dead_threads);
machine->last_match = NULL;
@@ -54,6 +57,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
snprintf(comm, sizeof(comm), "[guest/%d]", pid);
thread__set_comm(thread, comm, 0);
+ thread__put(thread);
}
machine->current_tid = NULL;
@@ -78,37 +82,50 @@ out_delete:
return NULL;
}
-static void dsos__delete(struct dsos *dsos)
+static void dsos__purge(struct dsos *dsos)
{
struct dso *pos, *n;
+ pthread_rwlock_wrlock(&dsos->lock);
+
list_for_each_entry_safe(pos, n, &dsos->head, node) {
RB_CLEAR_NODE(&pos->rb_node);
- list_del(&pos->node);
- dso__delete(pos);
+ list_del_init(&pos->node);
+ dso__put(pos);
}
+
+ pthread_rwlock_unlock(&dsos->lock);
+}
+
+static void dsos__exit(struct dsos *dsos)
+{
+ dsos__purge(dsos);
+ pthread_rwlock_destroy(&dsos->lock);
}
void machine__delete_threads(struct machine *machine)
{
- struct rb_node *nd = rb_first(&machine->threads);
+ struct rb_node *nd;
+ pthread_rwlock_wrlock(&machine->threads_lock);
+ nd = rb_first(&machine->threads);
while (nd) {
struct thread *t = rb_entry(nd, struct thread, rb_node);
nd = rb_next(nd);
- machine__remove_thread(machine, t);
+ __machine__remove_thread(machine, t, false);
}
+ pthread_rwlock_unlock(&machine->threads_lock);
}
void machine__exit(struct machine *machine)
{
map_groups__exit(&machine->kmaps);
- dsos__delete(&machine->user_dsos);
- dsos__delete(&machine->kernel_dsos);
- vdso__exit(machine);
+ dsos__exit(&machine->dsos);
+ machine__exit_vdso(machine);
zfree(&machine->root_dir);
zfree(&machine->current_tid);
+ pthread_rwlock_destroy(&machine->threads_lock);
}
void machine__delete(struct machine *machine)
@@ -233,7 +250,7 @@ struct machine *machines__findnew(struct machines *machines, pid_t pid)
static struct strlist *seen;
if (!seen)
- seen = strlist__new(true, NULL);
+ seen = strlist__new(NULL, NULL);
if (!strlist__has_entry(seen, path)) {
pr_err("Can't access file %s\n", path);
@@ -303,7 +320,7 @@ static void machine__update_thread_pid(struct machine *machine,
if (th->pid_ == th->tid)
return;
- leader = machine__findnew_thread(machine, th->pid_, th->pid_);
+ leader = __machine__findnew_thread(machine, th->pid_, th->pid_);
if (!leader)
goto out_err;
@@ -325,7 +342,7 @@ static void machine__update_thread_pid(struct machine *machine,
if (!map_groups__empty(th->mg))
pr_err("Discarding thread maps for %d:%d\n",
th->pid_, th->tid);
- map_groups__delete(th->mg);
+ map_groups__put(th->mg);
}
th->mg = map_groups__get(leader->mg);
@@ -336,9 +353,9 @@ out_err:
pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid);
}
-static struct thread *__machine__findnew_thread(struct machine *machine,
- pid_t pid, pid_t tid,
- bool create)
+static struct thread *____machine__findnew_thread(struct machine *machine,
+ pid_t pid, pid_t tid,
+ bool create)
{
struct rb_node **p = &machine->threads.rb_node;
struct rb_node *parent = NULL;
@@ -356,7 +373,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
return th;
}
- thread__zput(machine->last_match);
+ machine->last_match = NULL;
}
while (*p != NULL) {
@@ -364,7 +381,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
th = rb_entry(parent, struct thread, rb_node);
if (th->tid == tid) {
- machine->last_match = thread__get(th);
+ machine->last_match = th;
machine__update_thread_pid(machine, th, pid);
return th;
}
@@ -392,7 +409,8 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
* leader and that would screwed the rb tree.
*/
if (thread__init_map_groups(th, machine)) {
- rb_erase(&th->rb_node, &machine->threads);
+ rb_erase_init(&th->rb_node, &machine->threads);
+ RB_CLEAR_NODE(&th->rb_node);
thread__delete(th);
return NULL;
}
@@ -400,22 +418,36 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
* It is now in the rbtree, get a ref
*/
thread__get(th);
- machine->last_match = thread__get(th);
+ machine->last_match = th;
}
return th;
}
+struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
+{
+ return ____machine__findnew_thread(machine, pid, tid, true);
+}
+
struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
pid_t tid)
{
- return __machine__findnew_thread(machine, pid, tid, true);
+ struct thread *th;
+
+ pthread_rwlock_wrlock(&machine->threads_lock);
+ th = thread__get(__machine__findnew_thread(machine, pid, tid));
+ pthread_rwlock_unlock(&machine->threads_lock);
+ return th;
}
struct thread *machine__find_thread(struct machine *machine, pid_t pid,
pid_t tid)
{
- return __machine__findnew_thread(machine, pid, tid, false);
+ struct thread *th;
+ pthread_rwlock_rdlock(&machine->threads_lock);
+ th = thread__get(____machine__findnew_thread(machine, pid, tid, false));
+ pthread_rwlock_unlock(&machine->threads_lock);
+ return th;
}
struct comm *machine__thread_exec_comm(struct machine *machine,
@@ -434,6 +466,7 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event
event->comm.pid,
event->comm.tid);
bool exec = event->header.misc & PERF_RECORD_MISC_COMM_EXEC;
+ int err = 0;
if (exec)
machine->comm_exec = true;
@@ -444,10 +477,12 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event
if (thread == NULL ||
__thread__set_comm(thread, event->comm.comm, sample->time, exec)) {
dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
- return -1;
+ err = -1;
}
- return 0;
+ thread__put(thread);
+
+ return err;
}
int machine__process_lost_event(struct machine *machine __maybe_unused,
@@ -458,17 +493,27 @@ int machine__process_lost_event(struct machine *machine __maybe_unused,
return 0;
}
-static struct dso*
-machine__module_dso(struct machine *machine, struct kmod_path *m,
- const char *filename)
+int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
+ union perf_event *event, struct perf_sample *sample)
+{
+ dump_printf(": id:%" PRIu64 ": lost samples :%" PRIu64 "\n",
+ sample->id, event->lost_samples.lost);
+ return 0;
+}
+
+static struct dso *machine__findnew_module_dso(struct machine *machine,
+ struct kmod_path *m,
+ const char *filename)
{
struct dso *dso;
- dso = dsos__find(&machine->kernel_dsos, m->name, true);
+ pthread_rwlock_wrlock(&machine->dsos.lock);
+
+ dso = __dsos__find(&machine->dsos, m->name, true);
if (!dso) {
- dso = dsos__addnew(&machine->kernel_dsos, m->name);
+ dso = __dsos__addnew(&machine->dsos, m->name);
if (dso == NULL)
- return NULL;
+ goto out_unlock;
if (machine__is_host(machine))
dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
@@ -483,11 +528,38 @@ machine__module_dso(struct machine *machine, struct kmod_path *m,
dso__set_long_name(dso, strdup(filename), true);
}
+ dso__get(dso);
+out_unlock:
+ pthread_rwlock_unlock(&machine->dsos.lock);
return dso;
}
-struct map *machine__new_module(struct machine *machine, u64 start,
- const char *filename)
+int machine__process_aux_event(struct machine *machine __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_aux(event, stdout);
+ return 0;
+}
+
+int machine__process_itrace_start_event(struct machine *machine __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_itrace_start(event, stdout);
+ return 0;
+}
+
+int machine__process_switch_event(struct machine *machine __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_switch(event, stdout);
+ return 0;
+}
+
+struct map *machine__findnew_module_map(struct machine *machine, u64 start,
+ const char *filename)
{
struct map *map = NULL;
struct dso *dso;
@@ -501,7 +573,7 @@ struct map *machine__new_module(struct machine *machine, u64 start,
if (map)
goto out;
- dso = machine__module_dso(machine, &m, filename);
+ dso = machine__findnew_module_dso(machine, &m, filename);
if (dso == NULL)
goto out;
@@ -519,13 +591,11 @@ out:
size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
{
struct rb_node *nd;
- size_t ret = __dsos__fprintf(&machines->host.kernel_dsos.head, fp) +
- __dsos__fprintf(&machines->host.user_dsos.head, fp);
+ size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp);
for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
- ret += __dsos__fprintf(&pos->kernel_dsos.head, fp);
- ret += __dsos__fprintf(&pos->user_dsos.head, fp);
+ ret += __dsos__fprintf(&pos->dsos.head, fp);
}
return ret;
@@ -534,8 +604,7 @@ size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp,
bool (skip)(struct dso *dso, int parm), int parm)
{
- return __dsos__fprintf_buildid(&m->kernel_dsos.head, fp, skip, parm) +
- __dsos__fprintf_buildid(&m->user_dsos.head, fp, skip, parm);
+ return __dsos__fprintf_buildid(&m->dsos.head, fp, skip, parm);
}
size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
@@ -575,12 +644,16 @@ size_t machine__fprintf(struct machine *machine, FILE *fp)
size_t ret = 0;
struct rb_node *nd;
+ pthread_rwlock_rdlock(&machine->threads_lock);
+
for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
struct thread *pos = rb_entry(nd, struct thread, rb_node);
ret += thread__fprintf(pos, fp);
}
+ pthread_rwlock_unlock(&machine->threads_lock);
+
return ret;
}
@@ -594,9 +667,8 @@ static struct dso *machine__get_kernel(struct machine *machine)
if (!vmlinux_name)
vmlinux_name = "[kernel.kallsyms]";
- kernel = dso__kernel_findnew(machine, vmlinux_name,
- "[kernel]",
- DSO_TYPE_KERNEL);
+ kernel = machine__findnew_kernel(machine, vmlinux_name,
+ "[kernel]", DSO_TYPE_KERNEL);
} else {
char bf[PATH_MAX];
@@ -606,9 +678,9 @@ static struct dso *machine__get_kernel(struct machine *machine)
vmlinux_name = machine__mmap_name(machine, bf,
sizeof(bf));
- kernel = dso__kernel_findnew(machine, vmlinux_name,
- "[guest.kernel]",
- DSO_TYPE_GUEST_KERNEL);
+ kernel = machine__findnew_kernel(machine, vmlinux_name,
+ "[guest.kernel]",
+ DSO_TYPE_GUEST_KERNEL);
}
if (kernel != NULL && (!kernel->has_build_id))
@@ -713,7 +785,6 @@ void machine__destroy_kernel_maps(struct machine *machine)
kmap->ref_reloc_sym = NULL;
}
- map__delete(machine->vmlinux_maps[type]);
machine->vmlinux_maps[type] = NULL;
}
}
@@ -970,7 +1041,7 @@ static int machine__create_module(void *arg, const char *name, u64 start)
struct machine *machine = arg;
struct map *map;
- map = machine__new_module(machine, start, name);
+ map = machine__findnew_module_map(machine, start, name);
if (map == NULL)
return -1;
@@ -1062,7 +1133,7 @@ static bool machine__uses_kcore(struct machine *machine)
{
struct dso *dso;
- list_for_each_entry(dso, &machine->kernel_dsos.head, node) {
+ list_for_each_entry(dso, &machine->dsos.head, node) {
if (dso__is_kcore(dso))
return true;
}
@@ -1093,8 +1164,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
strlen(kmmap_prefix) - 1) == 0;
if (event->mmap.filename[0] == '/' ||
(!is_kernel_mmap && event->mmap.filename[0] == '[')) {
- map = machine__new_module(machine, event->mmap.start,
- event->mmap.filename);
+ map = machine__findnew_module_map(machine, event->mmap.start,
+ event->mmap.filename);
if (map == NULL)
goto out_problem;
@@ -1109,23 +1180,48 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
struct dso *kernel = NULL;
struct dso *dso;
- list_for_each_entry(dso, &machine->kernel_dsos.head, node) {
- if (is_kernel_module(dso->long_name))
+ pthread_rwlock_rdlock(&machine->dsos.lock);
+
+ list_for_each_entry(dso, &machine->dsos.head, node) {
+
+ /*
+ * The cpumode passed to is_kernel_module is not the
+ * cpumode of *this* event. If we insist on passing
+ * correct cpumode to is_kernel_module, we should
+ * record the cpumode when we adding this dso to the
+ * linked list.
+ *
+ * However we don't really need passing correct
+ * cpumode. We know the correct cpumode must be kernel
+ * mode (if not, we should not link it onto kernel_dsos
+ * list).
+ *
+ * Therefore, we pass PERF_RECORD_MISC_CPUMODE_UNKNOWN.
+ * is_kernel_module() treats it as a kernel cpumode.
+ */
+
+ if (!dso->kernel ||
+ is_kernel_module(dso->long_name,
+ PERF_RECORD_MISC_CPUMODE_UNKNOWN))
continue;
+
kernel = dso;
break;
}
+ pthread_rwlock_unlock(&machine->dsos.lock);
+
if (kernel == NULL)
- kernel = __dsos__findnew(&machine->kernel_dsos,
- kmmap_prefix);
+ kernel = machine__findnew_dso(machine, kmmap_prefix);
if (kernel == NULL)
goto out_problem;
kernel->kernel = kernel_type;
- if (__machine__create_kernel_maps(machine, kernel) < 0)
+ if (__machine__create_kernel_maps(machine, kernel) < 0) {
+ dso__put(kernel);
goto out_problem;
+ }
if (strstr(kernel->long_name, "vmlinux"))
dso__set_short_name(kernel, "[kernel.vmlinux]", false);
@@ -1197,11 +1293,15 @@ int machine__process_mmap2_event(struct machine *machine,
event->mmap2.filename, type, thread);
if (map == NULL)
- goto out_problem;
+ goto out_problem_map;
thread__insert_map(thread, map);
+ thread__put(thread);
+ map__put(map);
return 0;
+out_problem_map:
+ thread__put(thread);
out_problem:
dump_printf("problem processing PERF_RECORD_MMAP2, skipping event.\n");
return 0;
@@ -1244,31 +1344,46 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
type, thread);
if (map == NULL)
- goto out_problem;
+ goto out_problem_map;
thread__insert_map(thread, map);
+ thread__put(thread);
+ map__put(map);
return 0;
+out_problem_map:
+ thread__put(thread);
out_problem:
dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
return 0;
}
-void machine__remove_thread(struct machine *machine, struct thread *th)
+static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
{
if (machine->last_match == th)
- thread__zput(machine->last_match);
+ machine->last_match = NULL;
- rb_erase(&th->rb_node, &machine->threads);
+ BUG_ON(atomic_read(&th->refcnt) == 0);
+ if (lock)
+ pthread_rwlock_wrlock(&machine->threads_lock);
+ rb_erase_init(&th->rb_node, &machine->threads);
+ RB_CLEAR_NODE(&th->rb_node);
/*
* Move it first to the dead_threads list, then drop the reference,
* if this is the last reference, then the thread__delete destructor
* will be called and we will remove it from the dead_threads list.
*/
list_add_tail(&th->node, &machine->dead_threads);
+ if (lock)
+ pthread_rwlock_unlock(&machine->threads_lock);
thread__put(th);
}
+void machine__remove_thread(struct machine *machine, struct thread *th)
+{
+ return __machine__remove_thread(machine, th, true);
+}
+
int machine__process_fork_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample)
{
@@ -1278,23 +1393,44 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
struct thread *parent = machine__findnew_thread(machine,
event->fork.ppid,
event->fork.ptid);
+ int err = 0;
+
+ if (dump_trace)
+ perf_event__fprintf_task(event, stdout);
+
+ /*
+ * There may be an existing thread that is not actually the parent,
+ * either because we are processing events out of order, or because the
+ * (fork) event that would have removed the thread was lost. Assume the
+ * latter case and continue on as best we can.
+ */
+ if (parent->pid_ != (pid_t)event->fork.ppid) {
+ dump_printf("removing erroneous parent thread %d/%d\n",
+ parent->pid_, parent->tid);
+ machine__remove_thread(machine, parent);
+ thread__put(parent);
+ parent = machine__findnew_thread(machine, event->fork.ppid,
+ event->fork.ptid);
+ }
/* if a thread currently exists for the thread id remove it */
- if (thread != NULL)
+ if (thread != NULL) {
machine__remove_thread(machine, thread);
+ thread__put(thread);
+ }
thread = machine__findnew_thread(machine, event->fork.pid,
event->fork.tid);
- if (dump_trace)
- perf_event__fprintf_task(event, stdout);
if (thread == NULL || parent == NULL ||
thread__fork(thread, parent, sample->time) < 0) {
dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
- return -1;
+ err = -1;
}
+ thread__put(thread);
+ thread__put(parent);
- return 0;
+ return err;
}
int machine__process_exit_event(struct machine *machine, union perf_event *event,
@@ -1307,8 +1443,10 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event
if (dump_trace)
perf_event__fprintf_task(event, stdout);
- if (thread != NULL)
+ if (thread != NULL) {
thread__exited(thread);
+ thread__put(thread);
+ }
return 0;
}
@@ -1331,6 +1469,15 @@ int machine__process_event(struct machine *machine, union perf_event *event,
ret = machine__process_exit_event(machine, event, sample); break;
case PERF_RECORD_LOST:
ret = machine__process_lost_event(machine, event, sample); break;
+ case PERF_RECORD_AUX:
+ ret = machine__process_aux_event(machine, event); break;
+ case PERF_RECORD_ITRACE_START:
+ ret = machine__process_itrace_start_event(machine, event); break;
+ case PERF_RECORD_LOST_SAMPLES:
+ ret = machine__process_lost_samples_event(machine, event, sample); break;
+ case PERF_RECORD_SWITCH:
+ case PERF_RECORD_SWITCH_CPU_WIDE:
+ ret = machine__process_switch_event(machine, event); break;
default:
ret = -1;
break;
@@ -1769,14 +1916,36 @@ int machine__for_each_thread(struct machine *machine,
return rc;
}
+int machines__for_each_thread(struct machines *machines,
+ int (*fn)(struct thread *thread, void *p),
+ void *priv)
+{
+ struct rb_node *nd;
+ int rc = 0;
+
+ rc = machine__for_each_thread(&machines->host, fn, priv);
+ if (rc != 0)
+ return rc;
+
+ for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ struct machine *machine = rb_entry(nd, struct machine, rb_node);
+
+ rc = machine__for_each_thread(machine, fn, priv);
+ if (rc != 0)
+ return rc;
+ }
+ return rc;
+}
+
int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
struct target *target, struct thread_map *threads,
- perf_event__handler_t process, bool data_mmap)
+ perf_event__handler_t process, bool data_mmap,
+ unsigned int proc_map_timeout)
{
if (target__has_task(target))
- return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap);
+ return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout);
else if (target__has_cpu(target))
- return perf_event__synthesize_threads(tool, process, machine, data_mmap);
+ return perf_event__synthesize_threads(tool, process, machine, data_mmap, proc_map_timeout);
/* command specified */
return 0;
}
@@ -1820,6 +1989,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
return -ENOMEM;
thread->cpu = cpu;
+ thread__put(thread);
return 0;
}
@@ -1845,3 +2015,22 @@ int machine__get_kernel_start(struct machine *machine)
}
return err;
}
+
+struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
+{
+ return dsos__findnew(&machine->dsos, filename);
+}
+
+char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
+{
+ struct machine *machine = vmachine;
+ struct map *map;
+ struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map, NULL);
+
+ if (sym == NULL)
+ return NULL;
+
+ *modp = __map__is_kmodule(map) ? (char *)map->dso->short_name : NULL;
+ *addrp = map->unmap_ip(map, sym->start);
+ return sym->name;
+}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 6d64ced..ea5cb4a 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -30,11 +30,11 @@ struct machine {
bool comm_exec;
char *root_dir;
struct rb_root threads;
+ pthread_rwlock_t threads_lock;
struct list_head dead_threads;
struct thread *last_match;
struct vdso_info *vdso_info;
- struct dsos user_dsos;
- struct dsos kernel_dsos;
+ struct dsos dsos;
struct map_groups kmaps;
struct map *vmlinux_maps[MAP__NR_TYPES];
u64 kernel_start;
@@ -81,6 +81,14 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
struct perf_sample *sample);
int machine__process_lost_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
+int machine__process_lost_samples_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_aux_event(struct machine *machine,
+ union perf_event *event);
+int machine__process_itrace_start_event(struct machine *machine,
+ union perf_event *event);
+int machine__process_switch_event(struct machine *machine __maybe_unused,
+ union perf_event *event);
int machine__process_mmap_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
@@ -147,8 +155,10 @@ static inline bool machine__is_host(struct machine *machine)
return machine ? machine->pid == HOST_KERNEL_ID : false;
}
-struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
- pid_t tid);
+struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
+
+struct dso *machine__findnew_dso(struct machine *machine, const char *filename);
size_t machine__fprintf(struct machine *machine, FILE *fp);
@@ -181,8 +191,8 @@ struct symbol *machine__find_kernel_function_by_name(struct machine *machine,
filter);
}
-struct map *machine__new_module(struct machine *machine, u64 start,
- const char *filename);
+struct map *machine__findnew_module_map(struct machine *machine, u64 start,
+ const char *filename);
int machine__load_kallsyms(struct machine *machine, const char *filename,
enum map_type type, symbol_filter_t filter);
@@ -208,20 +218,30 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
int machine__for_each_thread(struct machine *machine,
int (*fn)(struct thread *thread, void *p),
void *priv);
+int machines__for_each_thread(struct machines *machines,
+ int (*fn)(struct thread *thread, void *p),
+ void *priv);
int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
struct target *target, struct thread_map *threads,
- perf_event__handler_t process, bool data_mmap);
+ perf_event__handler_t process, bool data_mmap,
+ unsigned int proc_map_timeout);
static inline
int machine__synthesize_threads(struct machine *machine, struct target *target,
- struct thread_map *threads, bool data_mmap)
+ struct thread_map *threads, bool data_mmap,
+ unsigned int proc_map_timeout)
{
return __machine__synthesize_threads(machine, NULL, target, threads,
- perf_event__process, data_mmap);
+ perf_event__process, data_mmap,
+ proc_map_timeout);
}
pid_t machine__get_current_tid(struct machine *machine, int cpu);
int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
pid_t tid);
+/*
+ * For use with libtraceevent's pevent_set_function_resolver()
+ */
+char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp);
#endif /* __PERF_MACHINE_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index a14f08f..b1c475d 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -16,6 +16,8 @@
#include "machine.h"
#include <linux/string.h>
+static void __maps__insert(struct maps *maps, struct map *map);
+
const char *map_type__name[MAP__NR_TYPES] = {
[MAP__FUNCTION] = "Functions",
[MAP__VARIABLE] = "Variables",
@@ -130,13 +132,13 @@ void map__init(struct map *map, enum map_type type,
map->end = end;
map->pgoff = pgoff;
map->reloc = 0;
- map->dso = dso;
+ map->dso = dso__get(dso);
map->map_ip = map__map_ip;
map->unmap_ip = map__unmap_ip;
RB_CLEAR_NODE(&map->rb_node);
map->groups = NULL;
- map->referenced = false;
map->erange_warned = false;
+ atomic_set(&map->refcnt, 1);
}
struct map *map__new(struct machine *machine, u64 start, u64 len,
@@ -175,9 +177,9 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
if (vdso) {
pgoff = 0;
- dso = vdso__dso_findnew(machine, thread);
+ dso = machine__findnew_vdso(machine, thread);
} else
- dso = __dsos__findnew(&machine->user_dsos, filename);
+ dso = machine__findnew_dso(machine, filename);
if (dso == NULL)
goto out_delete;
@@ -195,6 +197,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
if (type != MAP__FUNCTION)
dso__set_loaded(dso, map->type);
}
+ dso__put(dso);
}
return map;
out_delete:
@@ -221,11 +224,38 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
return map;
}
+/*
+ * Use this and __map__is_kmodule() for map instances that are in
+ * machine->kmaps, and thus have map->groups->machine all properly set, to
+ * disambiguate between the kernel and modules.
+ *
+ * When the need arises, introduce map__is_{kernel,kmodule)() that
+ * checks (map->groups != NULL && map->groups->machine != NULL &&
+ * map->dso->kernel) before calling __map__is_{kernel,kmodule}())
+ */
+bool __map__is_kernel(const struct map *map)
+{
+ return map->groups->machine->vmlinux_maps[map->type] == map;
+}
+
+static void map__exit(struct map *map)
+{
+ BUG_ON(!RB_EMPTY_NODE(&map->rb_node));
+ dso__zput(map->dso);
+}
+
void map__delete(struct map *map)
{
+ map__exit(map);
free(map);
}
+void map__put(struct map *map)
+{
+ if (map && atomic_dec_and_test(&map->refcnt))
+ map__delete(map);
+}
+
void map__fixup_start(struct map *map)
{
struct rb_root *symbols = &map->dso->symbols[map->type];
@@ -292,6 +322,11 @@ int map__load(struct map *map, symbol_filter_t filter)
return 0;
}
+int __weak arch__compare_symbol_names(const char *namea, const char *nameb)
+{
+ return strcmp(namea, nameb);
+}
+
struct symbol *map__find_symbol(struct map *map, u64 addr,
symbol_filter_t filter)
{
@@ -313,9 +348,18 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name,
return dso__find_symbol_by_name(map->dso, map->type, name);
}
-struct map *map__clone(struct map *map)
+struct map *map__clone(struct map *from)
{
- return memdup(map, sizeof(*map));
+ struct map *map = memdup(from, sizeof(*map));
+
+ if (map != NULL) {
+ atomic_set(&map->refcnt, 1);
+ RB_CLEAR_NODE(&map->rb_node);
+ dso__get(map->dso);
+ map->groups = NULL;
+ }
+
+ return map;
}
int map__overlap(struct map *l, struct map *r)
@@ -413,48 +457,49 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
return ip + map->reloc;
}
+static void maps__init(struct maps *maps)
+{
+ maps->entries = RB_ROOT;
+ pthread_rwlock_init(&maps->lock, NULL);
+}
+
void map_groups__init(struct map_groups *mg, struct machine *machine)
{
int i;
for (i = 0; i < MAP__NR_TYPES; ++i) {
- mg->maps[i] = RB_ROOT;
- INIT_LIST_HEAD(&mg->removed_maps[i]);
+ maps__init(&mg->maps[i]);
}
mg->machine = machine;
- mg->refcnt = 1;
+ atomic_set(&mg->refcnt, 1);
}
-static void maps__delete(struct rb_root *maps)
+static void __maps__purge(struct maps *maps)
{
- struct rb_node *next = rb_first(maps);
+ struct rb_root *root = &maps->entries;
+ struct rb_node *next = rb_first(root);
while (next) {
struct map *pos = rb_entry(next, struct map, rb_node);
next = rb_next(&pos->rb_node);
- rb_erase(&pos->rb_node, maps);
- map__delete(pos);
+ rb_erase_init(&pos->rb_node, root);
+ map__put(pos);
}
}
-static void maps__delete_removed(struct list_head *maps)
+static void maps__exit(struct maps *maps)
{
- struct map *pos, *n;
-
- list_for_each_entry_safe(pos, n, maps, node) {
- list_del(&pos->node);
- map__delete(pos);
- }
+ pthread_rwlock_wrlock(&maps->lock);
+ __maps__purge(maps);
+ pthread_rwlock_unlock(&maps->lock);
}
void map_groups__exit(struct map_groups *mg)
{
int i;
- for (i = 0; i < MAP__NR_TYPES; ++i) {
- maps__delete(&mg->maps[i]);
- maps__delete_removed(&mg->removed_maps[i]);
- }
+ for (i = 0; i < MAP__NR_TYPES; ++i)
+ maps__exit(&mg->maps[i]);
}
bool map_groups__empty(struct map_groups *mg)
@@ -464,8 +509,6 @@ bool map_groups__empty(struct map_groups *mg)
for (i = 0; i < MAP__NR_TYPES; ++i) {
if (maps__first(&mg->maps[i]))
return false;
- if (!list_empty(&mg->removed_maps[i]))
- return false;
}
return true;
@@ -489,32 +532,10 @@ void map_groups__delete(struct map_groups *mg)
void map_groups__put(struct map_groups *mg)
{
- if (--mg->refcnt == 0)
+ if (mg && atomic_dec_and_test(&mg->refcnt))
map_groups__delete(mg);
}
-void map_groups__flush(struct map_groups *mg)
-{
- int type;
-
- for (type = 0; type < MAP__NR_TYPES; type++) {
- struct rb_root *root = &mg->maps[type];
- struct rb_node *next = rb_first(root);
-
- while (next) {
- struct map *pos = rb_entry(next, struct map, rb_node);
- next = rb_next(&pos->rb_node);
- rb_erase(&pos->rb_node, root);
- /*
- * We may have references to this map, for
- * instance in some hist_entry instances, so
- * just move them to a separate list.
- */
- list_add_tail(&pos->node, &mg->removed_maps[pos->type]);
- }
- }
-}
-
struct symbol *map_groups__find_symbol(struct map_groups *mg,
enum map_type type, u64 addr,
struct map **mapp,
@@ -538,20 +559,28 @@ struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
struct map **mapp,
symbol_filter_t filter)
{
+ struct maps *maps = &mg->maps[type];
+ struct symbol *sym;
struct rb_node *nd;
- for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) {
+ pthread_rwlock_rdlock(&maps->lock);
+
+ for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
struct map *pos = rb_entry(nd, struct map, rb_node);
- struct symbol *sym = map__find_symbol_by_name(pos, name, filter);
+
+ sym = map__find_symbol_by_name(pos, name, filter);
if (sym == NULL)
continue;
if (mapp != NULL)
*mapp = pos;
- return sym;
+ goto out;
}
- return NULL;
+ sym = NULL;
+out:
+ pthread_rwlock_unlock(&maps->lock);
+ return sym;
}
int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter)
@@ -571,73 +600,54 @@ int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter)
return ams->sym ? 0 : -1;
}
-size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
- FILE *fp)
+static size_t maps__fprintf(struct maps *maps, FILE *fp)
{
- size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
+ size_t printed = 0;
struct rb_node *nd;
- for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) {
+ pthread_rwlock_rdlock(&maps->lock);
+
+ for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
struct map *pos = rb_entry(nd, struct map, rb_node);
printed += fprintf(fp, "Map:");
printed += map__fprintf(pos, fp);
if (verbose > 2) {
- printed += dso__fprintf(pos->dso, type, fp);
+ printed += dso__fprintf(pos->dso, pos->type, fp);
printed += fprintf(fp, "--\n");
}
}
- return printed;
-}
+ pthread_rwlock_unlock(&maps->lock);
-static size_t map_groups__fprintf_maps(struct map_groups *mg, FILE *fp)
-{
- size_t printed = 0, i;
- for (i = 0; i < MAP__NR_TYPES; ++i)
- printed += __map_groups__fprintf_maps(mg, i, fp);
return printed;
}
-static size_t __map_groups__fprintf_removed_maps(struct map_groups *mg,
- enum map_type type, FILE *fp)
+size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
+ FILE *fp)
{
- struct map *pos;
- size_t printed = 0;
-
- list_for_each_entry(pos, &mg->removed_maps[type], node) {
- printed += fprintf(fp, "Map:");
- printed += map__fprintf(pos, fp);
- if (verbose > 1) {
- printed += dso__fprintf(pos->dso, type, fp);
- printed += fprintf(fp, "--\n");
- }
- }
- return printed;
+ size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
+ return printed += maps__fprintf(&mg->maps[type], fp);
}
-static size_t map_groups__fprintf_removed_maps(struct map_groups *mg,
- FILE *fp)
+size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
{
size_t printed = 0, i;
for (i = 0; i < MAP__NR_TYPES; ++i)
- printed += __map_groups__fprintf_removed_maps(mg, i, fp);
+ printed += __map_groups__fprintf_maps(mg, i, fp);
return printed;
}
-size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
-{
- size_t printed = map_groups__fprintf_maps(mg, fp);
- printed += fprintf(fp, "Removed maps:\n");
- return printed + map_groups__fprintf_removed_maps(mg, fp);
-}
-
-int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
- FILE *fp)
+static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
{
- struct rb_root *root = &mg->maps[map->type];
- struct rb_node *next = rb_first(root);
+ struct rb_root *root;
+ struct rb_node *next;
int err = 0;
+ pthread_rwlock_wrlock(&maps->lock);
+
+ root = &maps->entries;
+ next = rb_first(root);
+
while (next) {
struct map *pos = rb_entry(next, struct map, rb_node);
next = rb_next(&pos->rb_node);
@@ -651,7 +661,7 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
map__fprintf(pos, fp);
}
- rb_erase(&pos->rb_node, root);
+ rb_erase_init(&pos->rb_node, root);
/*
* Now check if we need to create new maps for areas not
* overlapped by the new map:
@@ -661,11 +671,11 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
if (before == NULL) {
err = -ENOMEM;
- goto move_map;
+ goto put_map;
}
before->end = map->start;
- map_groups__insert(mg, before);
+ __maps__insert(maps, before);
if (verbose >= 2)
map__fprintf(before, fp);
}
@@ -675,28 +685,31 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
if (after == NULL) {
err = -ENOMEM;
- goto move_map;
+ goto put_map;
}
after->start = map->end;
- map_groups__insert(mg, after);
+ __maps__insert(maps, after);
if (verbose >= 2)
map__fprintf(after, fp);
}
-move_map:
- /*
- * If we have references, just move them to a separate list.
- */
- if (pos->referenced)
- list_add_tail(&pos->node, &mg->removed_maps[map->type]);
- else
- map__delete(pos);
+put_map:
+ map__put(pos);
if (err)
- return err;
+ goto out;
}
- return 0;
+ err = 0;
+out:
+ pthread_rwlock_unlock(&maps->lock);
+ return err;
+}
+
+int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
+ FILE *fp)
+{
+ return maps__fixup_overlappings(&mg->maps[map->type], map, fp);
}
/*
@@ -705,20 +718,28 @@ move_map:
int map_groups__clone(struct map_groups *mg,
struct map_groups *parent, enum map_type type)
{
- struct rb_node *nd;
- for (nd = rb_first(&parent->maps[type]); nd; nd = rb_next(nd)) {
- struct map *map = rb_entry(nd, struct map, rb_node);
+ int err = -ENOMEM;
+ struct map *map;
+ struct maps *maps = &parent->maps[type];
+
+ pthread_rwlock_rdlock(&maps->lock);
+
+ for (map = maps__first(maps); map; map = map__next(map)) {
struct map *new = map__clone(map);
if (new == NULL)
- return -ENOMEM;
+ goto out_unlock;
map_groups__insert(mg, new);
}
- return 0;
+
+ err = 0;
+out_unlock:
+ pthread_rwlock_unlock(&maps->lock);
+ return err;
}
-void maps__insert(struct rb_root *maps, struct map *map)
+static void __maps__insert(struct maps *maps, struct map *map)
{
- struct rb_node **p = &maps->rb_node;
+ struct rb_node **p = &maps->entries.rb_node;
struct rb_node *parent = NULL;
const u64 ip = map->start;
struct map *m;
@@ -733,20 +754,38 @@ void maps__insert(struct rb_root *maps, struct map *map)
}
rb_link_node(&map->rb_node, parent, p);
- rb_insert_color(&map->rb_node, maps);
+ rb_insert_color(&map->rb_node, &maps->entries);
+ map__get(map);
}
-void maps__remove(struct rb_root *maps, struct map *map)
+void maps__insert(struct maps *maps, struct map *map)
{
- rb_erase(&map->rb_node, maps);
+ pthread_rwlock_wrlock(&maps->lock);
+ __maps__insert(maps, map);
+ pthread_rwlock_unlock(&maps->lock);
}
-struct map *maps__find(struct rb_root *maps, u64 ip)
+static void __maps__remove(struct maps *maps, struct map *map)
{
- struct rb_node **p = &maps->rb_node;
- struct rb_node *parent = NULL;
+ rb_erase_init(&map->rb_node, &maps->entries);
+ map__put(map);
+}
+
+void maps__remove(struct maps *maps, struct map *map)
+{
+ pthread_rwlock_wrlock(&maps->lock);
+ __maps__remove(maps, map);
+ pthread_rwlock_unlock(&maps->lock);
+}
+
+struct map *maps__find(struct maps *maps, u64 ip)
+{
+ struct rb_node **p, *parent = NULL;
struct map *m;
+ pthread_rwlock_rdlock(&maps->lock);
+
+ p = &maps->entries.rb_node;
while (*p != NULL) {
parent = *p;
m = rb_entry(parent, struct map, rb_node);
@@ -755,22 +794,25 @@ struct map *maps__find(struct rb_root *maps, u64 ip)
else if (ip >= m->end)
p = &(*p)->rb_right;
else
- return m;
+ goto out;
}
- return NULL;
+ m = NULL;
+out:
+ pthread_rwlock_unlock(&maps->lock);
+ return m;
}
-struct map *maps__first(struct rb_root *maps)
+struct map *maps__first(struct maps *maps)
{
- struct rb_node *first = rb_first(maps);
+ struct rb_node *first = rb_first(&maps->entries);
if (first)
return rb_entry(first, struct map, rb_node);
return NULL;
}
-struct map *maps__next(struct map *map)
+struct map *map__next(struct map *map)
{
struct rb_node *next = rb_next(&map->rb_node);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index ec19c59..57829e8 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -1,9 +1,11 @@
#ifndef __PERF_MAP_H
#define __PERF_MAP_H
+#include <linux/atomic.h>
#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/rbtree.h>
+#include <pthread.h>
#include <stdio.h>
#include <stdbool.h>
#include <linux/types.h>
@@ -32,7 +34,6 @@ struct map {
u64 start;
u64 end;
u8 /* enum map_type */ type;
- bool referenced;
bool erange_warned;
u32 priv;
u32 prot;
@@ -50,6 +51,7 @@ struct map {
struct dso *dso;
struct map_groups *groups;
+ atomic_t refcnt;
};
struct kmap {
@@ -57,11 +59,15 @@ struct kmap {
struct map_groups *kmaps;
};
+struct maps {
+ struct rb_root entries;
+ pthread_rwlock_t lock;
+};
+
struct map_groups {
- struct rb_root maps[MAP__NR_TYPES];
- struct list_head removed_maps[MAP__NR_TYPES];
+ struct maps maps[MAP__NR_TYPES];
struct machine *machine;
- int refcnt;
+ atomic_t refcnt;
};
struct map_groups *map_groups__new(struct machine *machine);
@@ -70,7 +76,8 @@ bool map_groups__empty(struct map_groups *mg);
static inline struct map_groups *map_groups__get(struct map_groups *mg)
{
- ++mg->refcnt;
+ if (mg)
+ atomic_inc(&mg->refcnt);
return mg;
}
@@ -124,7 +131,7 @@ struct thread;
*/
#define __map__for_each_symbol_by_name(map, sym_name, pos, filter) \
for (pos = map__find_symbol_by_name(map, sym_name, filter); \
- pos && strcmp(pos->name, sym_name) == 0; \
+ pos && arch__compare_symbol_names(pos->name, sym_name) == 0; \
pos = symbol__next_by_name(pos))
#define map__for_each_symbol_by_name(map, sym_name, pos) \
@@ -132,6 +139,7 @@ struct thread;
typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
+int arch__compare_symbol_names(const char *namea, const char *nameb);
void map__init(struct map *map, enum map_type type,
u64 start, u64 end, u64 pgoff, struct dso *dso);
struct map *map__new(struct machine *machine, u64 start, u64 len,
@@ -141,6 +149,24 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
void map__delete(struct map *map);
struct map *map__clone(struct map *map);
+
+static inline struct map *map__get(struct map *map)
+{
+ if (map)
+ atomic_inc(&map->refcnt);
+ return map;
+}
+
+void map__put(struct map *map);
+
+static inline void __map__zput(struct map **map)
+{
+ map__put(*map);
+ *map = NULL;
+}
+
+#define map__zput(map) __map__zput(&map)
+
int map__overlap(struct map *l, struct map *r);
size_t map__fprintf(struct map *map, FILE *fp);
size_t map__fprintf_dsoname(struct map *map, FILE *fp);
@@ -159,11 +185,11 @@ void map__reloc_vmlinux(struct map *map);
size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
FILE *fp);
-void maps__insert(struct rb_root *maps, struct map *map);
-void maps__remove(struct rb_root *maps, struct map *map);
-struct map *maps__find(struct rb_root *maps, u64 addr);
-struct map *maps__first(struct rb_root *maps);
-struct map *maps__next(struct map *map);
+void maps__insert(struct maps *maps, struct map *map);
+void maps__remove(struct maps *maps, struct map *map);
+struct map *maps__find(struct maps *maps, u64 addr);
+struct map *maps__first(struct maps *maps);
+struct map *map__next(struct map *map);
void map_groups__init(struct map_groups *mg, struct machine *machine);
void map_groups__exit(struct map_groups *mg);
int map_groups__clone(struct map_groups *mg,
@@ -198,7 +224,7 @@ static inline struct map *map_groups__first(struct map_groups *mg,
static inline struct map *map_groups__next(struct map *map)
{
- return maps__next(map);
+ return map__next(map);
}
struct symbol *map_groups__find_symbol(struct map_groups *mg,
@@ -230,6 +256,11 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
struct map *map_groups__find_by_name(struct map_groups *mg,
enum map_type type, const char *name);
-void map_groups__flush(struct map_groups *mg);
+bool __map__is_kernel(const struct map *map);
+
+static inline bool __map__is_kmodule(const struct map *map)
+{
+ return !__map__is_kernel(map);
+}
#endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index 52be201..b1b9e23 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c
@@ -220,6 +220,9 @@ static int __ordered_events__flush(struct ordered_events *oe)
else if (last_ts <= limit)
oe->last = list_entry(head->prev, struct ordered_event, list);
+ if (show_progress)
+ ui_progress__finish();
+
return 0;
}
diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c
index 31ee02d..53ef006 100644
--- a/tools/perf/util/pager.c
+++ b/tools/perf/util/pager.c
@@ -50,11 +50,6 @@ void setup_pager(void)
if (!isatty(1))
return;
- if (!pager) {
- if (!pager_program)
- perf_config(perf_default_config, NULL);
- pager = pager_program;
- }
if (!pager)
pager = getenv("PAGER");
if (!(pager || access("/usr/bin/pager", X_OK)))
diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
new file mode 100644
index 0000000..a3b1e13
--- /dev/null
+++ b/tools/perf/util/parse-branch-options.c
@@ -0,0 +1,94 @@
+#include "perf.h"
+#include "util/util.h"
+#include "util/debug.h"
+#include "util/parse-options.h"
+#include "util/parse-branch-options.h"
+
+#define BRANCH_OPT(n, m) \
+ { .name = n, .mode = (m) }
+
+#define BRANCH_END { .name = NULL }
+
+struct branch_mode {
+ const char *name;
+ int mode;
+};
+
+static const struct branch_mode branch_modes[] = {
+ BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
+ BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
+ BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
+ BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
+ BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
+ BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
+ BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
+ BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
+ BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
+ BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
+ BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
+ BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP),
+ BRANCH_END
+};
+
+int
+parse_branch_stack(const struct option *opt, const char *str, int unset)
+{
+#define ONLY_PLM \
+ (PERF_SAMPLE_BRANCH_USER |\
+ PERF_SAMPLE_BRANCH_KERNEL |\
+ PERF_SAMPLE_BRANCH_HV)
+
+ uint64_t *mode = (uint64_t *)opt->value;
+ const struct branch_mode *br;
+ char *s, *os = NULL, *p;
+ int ret = -1;
+
+ if (unset)
+ return 0;
+
+ /*
+ * cannot set it twice, -b + --branch-filter for instance
+ */
+ if (*mode)
+ return -1;
+
+ /* str may be NULL in case no arg is passed to -b */
+ if (str) {
+ /* because str is read-only */
+ s = os = strdup(str);
+ if (!s)
+ return -1;
+
+ for (;;) {
+ p = strchr(s, ',');
+ if (p)
+ *p = '\0';
+
+ for (br = branch_modes; br->name; br++) {
+ if (!strcasecmp(s, br->name))
+ break;
+ }
+ if (!br->name) {
+ ui__warning("unknown branch filter %s,"
+ " check man page\n", s);
+ goto error;
+ }
+
+ *mode |= br->mode;
+
+ if (!p)
+ break;
+
+ s = p + 1;
+ }
+ }
+ ret = 0;
+
+ /* default to any branch */
+ if ((*mode & ~ONLY_PLM) == 0) {
+ *mode = PERF_SAMPLE_BRANCH_ANY;
+ }
+error:
+ free(os);
+ return ret;
+}
diff --git a/tools/perf/util/parse-branch-options.h b/tools/perf/util/parse-branch-options.h
new file mode 100644
index 0000000..b9d9470
--- /dev/null
+++ b/tools/perf/util/parse-branch-options.h
@@ -0,0 +1,5 @@
+#ifndef _PERF_PARSE_BRANCH_OPTIONS_H
+#define _PERF_PARSE_BRANCH_OPTIONS_H 1
+struct option;
+int parse_branch_stack(const struct option *opt, const char *str, int unset);
+#endif /* _PERF_PARSE_BRANCH_OPTIONS_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index be06553..d826e6f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -17,6 +17,8 @@
#include "parse-events-flex.h"
#include "pmu.h"
#include "thread_map.h"
+#include "cpumap.h"
+#include "asm/bug.h"
#define MAX_NAME_LEN 100
@@ -274,7 +276,8 @@ const char *event_type(int type)
static struct perf_evsel *
__add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr,
- char *name, struct cpu_map *cpus)
+ char *name, struct cpu_map *cpus,
+ struct list_head *config_terms)
{
struct perf_evsel *evsel;
@@ -284,17 +287,24 @@ __add_event(struct list_head *list, int *idx,
if (!evsel)
return NULL;
- evsel->cpus = cpus;
+ if (cpus)
+ evsel->cpus = cpu_map__get(cpus);
+
if (name)
evsel->name = strdup(name);
+
+ if (config_terms)
+ list_splice(config_terms, &evsel->config_terms);
+
list_add_tail(&evsel->node, list);
return evsel;
}
static int add_event(struct list_head *list, int *idx,
- struct perf_event_attr *attr, char *name)
+ struct perf_event_attr *attr, char *name,
+ struct list_head *config_terms)
{
- return __add_event(list, idx, attr, name, NULL) ? 0 : -ENOMEM;
+ return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM;
}
static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
@@ -373,7 +383,7 @@ int parse_events_add_cache(struct list_head *list, int *idx,
memset(&attr, 0, sizeof(attr));
attr.config = cache_type | (cache_op << 8) | (cache_result << 16);
attr.type = PERF_TYPE_HW_CACHE;
- return add_event(list, idx, &attr, name);
+ return add_event(list, idx, &attr, name, NULL);
}
static int add_tracepoint(struct list_head *list, int *idx,
@@ -535,19 +545,43 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx,
attr.type = PERF_TYPE_BREAKPOINT;
attr.sample_period = 1;
- return add_event(list, idx, &attr, NULL);
+ return add_event(list, idx, &attr, NULL, NULL);
+}
+
+static int check_type_val(struct parse_events_term *term,
+ struct parse_events_error *err,
+ int type)
+{
+ if (type == term->type_val)
+ return 0;
+
+ if (err) {
+ err->idx = term->err_val;
+ if (type == PARSE_EVENTS__TERM_TYPE_NUM)
+ err->str = strdup("expected numeric value");
+ else
+ err->str = strdup("expected string value");
+ }
+ return -EINVAL;
}
static int config_term(struct perf_event_attr *attr,
- struct parse_events_term *term)
+ struct parse_events_term *term,
+ struct parse_events_error *err)
{
-#define CHECK_TYPE_VAL(type) \
-do { \
- if (PARSE_EVENTS__TERM_TYPE_ ## type != term->type_val) \
- return -EINVAL; \
+#define CHECK_TYPE_VAL(type) \
+do { \
+ if (check_type_val(term, err, PARSE_EVENTS__TERM_TYPE_ ## type)) \
+ return -EINVAL; \
} while (0)
switch (term->type_term) {
+ case PARSE_EVENTS__TERM_TYPE_USER:
+ /*
+ * Always succeed for sysfs terms, as we dont know
+ * at this point what type they need to have.
+ */
+ return 0;
case PARSE_EVENTS__TERM_TYPE_CONFIG:
CHECK_TYPE_VAL(NUM);
attr->config = term->val.num;
@@ -562,7 +596,9 @@ do { \
break;
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
CHECK_TYPE_VAL(NUM);
- attr->sample_period = term->val.num;
+ break;
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
+ CHECK_TYPE_VAL(NUM);
break;
case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
/*
@@ -570,6 +606,20 @@ do { \
* attr->branch_sample_type = term->val.num;
*/
break;
+ case PARSE_EVENTS__TERM_TYPE_TIME:
+ CHECK_TYPE_VAL(NUM);
+ if (term->val.num > 1) {
+ err->str = strdup("expected 0 or 1");
+ err->idx = term->err_val;
+ return -EINVAL;
+ }
+ break;
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ CHECK_TYPE_VAL(STR);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ CHECK_TYPE_VAL(NUM);
+ break;
case PARSE_EVENTS__TERM_TYPE_NAME:
CHECK_TYPE_VAL(STR);
break;
@@ -582,32 +632,83 @@ do { \
}
static int config_attr(struct perf_event_attr *attr,
- struct list_head *head, int fail)
+ struct list_head *head,
+ struct parse_events_error *err)
{
struct parse_events_term *term;
list_for_each_entry(term, head, list)
- if (config_term(attr, term) && fail)
+ if (config_term(attr, term, err))
return -EINVAL;
return 0;
}
-int parse_events_add_numeric(struct list_head *list, int *idx,
+static int get_config_terms(struct list_head *head_config,
+ struct list_head *head_terms __maybe_unused)
+{
+#define ADD_CONFIG_TERM(__type, __name, __val) \
+do { \
+ struct perf_evsel_config_term *__t; \
+ \
+ __t = zalloc(sizeof(*__t)); \
+ if (!__t) \
+ return -ENOMEM; \
+ \
+ INIT_LIST_HEAD(&__t->list); \
+ __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \
+ __t->val.__name = __val; \
+ list_add_tail(&__t->list, head_terms); \
+} while (0)
+
+ struct parse_events_term *term;
+
+ list_for_each_entry(term, head_config, list) {
+ switch (term->type_term) {
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+ ADD_CONFIG_TERM(PERIOD, period, term->val.num);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
+ ADD_CONFIG_TERM(FREQ, freq, term->val.num);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_TIME:
+ ADD_CONFIG_TERM(TIME, time, term->val.num);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num);
+ break;
+ default:
+ break;
+ }
+ }
+#undef ADD_EVSEL_CONFIG
+ return 0;
+}
+
+int parse_events_add_numeric(struct parse_events_evlist *data,
+ struct list_head *list,
u32 type, u64 config,
struct list_head *head_config)
{
struct perf_event_attr attr;
+ LIST_HEAD(config_terms);
memset(&attr, 0, sizeof(attr));
attr.type = type;
attr.config = config;
- if (head_config &&
- config_attr(&attr, head_config, 1))
- return -EINVAL;
+ if (head_config) {
+ if (config_attr(&attr, head_config, data->error))
+ return -EINVAL;
- return add_event(list, idx, &attr, NULL);
+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
+ }
+
+ return add_event(list, &data->idx, &attr, NULL, &config_terms);
}
static int parse_events__is_name_term(struct parse_events_term *term)
@@ -626,13 +727,15 @@ static char *pmu_event_name(struct list_head *head_terms)
return NULL;
}
-int parse_events_add_pmu(struct list_head *list, int *idx,
- char *name, struct list_head *head_config)
+int parse_events_add_pmu(struct parse_events_evlist *data,
+ struct list_head *list, char *name,
+ struct list_head *head_config)
{
struct perf_event_attr attr;
struct perf_pmu_info info;
struct perf_pmu *pmu;
struct perf_evsel *evsel;
+ LIST_HEAD(config_terms);
pmu = perf_pmu__find(name);
if (!pmu)
@@ -647,7 +750,7 @@ int parse_events_add_pmu(struct list_head *list, int *idx,
if (!head_config) {
attr.type = pmu->type;
- evsel = __add_event(list, idx, &attr, NULL, pmu->cpus);
+ evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus, NULL);
return evsel ? 0 : -ENOMEM;
}
@@ -658,13 +761,18 @@ int parse_events_add_pmu(struct list_head *list, int *idx,
* Configure hardcoded terms first, no need to check
* return value when called with fail == 0 ;)
*/
- config_attr(&attr, head_config, 0);
+ if (config_attr(&attr, head_config, data->error))
+ return -EINVAL;
+
+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
- if (perf_pmu__config(pmu, &attr, head_config))
+ if (perf_pmu__config(pmu, &attr, head_config, data->error))
return -EINVAL;
- evsel = __add_event(list, idx, &attr, pmu_event_name(head_config),
- pmu->cpus);
+ evsel = __add_event(list, &data->idx, &attr,
+ pmu_event_name(head_config), pmu->cpus,
+ &config_terms);
if (evsel) {
evsel->unit = info.unit;
evsel->scale = info.scale;
@@ -1019,11 +1127,13 @@ int parse_events_terms(struct list_head *terms, const char *str)
return ret;
}
-int parse_events(struct perf_evlist *evlist, const char *str)
+int parse_events(struct perf_evlist *evlist, const char *str,
+ struct parse_events_error *err)
{
struct parse_events_evlist data = {
- .list = LIST_HEAD_INIT(data.list),
- .idx = evlist->nr_entries,
+ .list = LIST_HEAD_INIT(data.list),
+ .idx = evlist->nr_entries,
+ .error = err,
};
int ret;
@@ -1031,8 +1141,13 @@ int parse_events(struct perf_evlist *evlist, const char *str)
perf_pmu__parse_cleanup();
if (!ret) {
int entries = data.idx - evlist->nr_entries;
+ struct perf_evsel *last;
+
perf_evlist__splice_list_tail(evlist, &data.list, entries);
evlist->nr_groups += data.nr_groups;
+ last = perf_evlist__last(evlist);
+ last->cmdline_group_boundary = true;
+
return 0;
}
@@ -1044,43 +1159,177 @@ int parse_events(struct perf_evlist *evlist, const char *str)
return ret;
}
+#define MAX_WIDTH 1000
+static int get_term_width(void)
+{
+ struct winsize ws;
+
+ get_term_dimensions(&ws);
+ return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col;
+}
+
+static void parse_events_print_error(struct parse_events_error *err,
+ const char *event)
+{
+ const char *str = "invalid or unsupported event: ";
+ char _buf[MAX_WIDTH];
+ char *buf = (char *) event;
+ int idx = 0;
+
+ if (err->str) {
+ /* -2 for extra '' in the final fprintf */
+ int width = get_term_width() - 2;
+ int len_event = strlen(event);
+ int len_str, max_len, cut = 0;
+
+ /*
+ * Maximum error index indent, we will cut
+ * the event string if it's bigger.
+ */
+ int max_err_idx = 13;
+
+ /*
+ * Let's be specific with the message when
+ * we have the precise error.
+ */
+ str = "event syntax error: ";
+ len_str = strlen(str);
+ max_len = width - len_str;
+
+ buf = _buf;
+
+ /* We're cutting from the beggining. */
+ if (err->idx > max_err_idx)
+ cut = err->idx - max_err_idx;
+
+ strncpy(buf, event + cut, max_len);
+
+ /* Mark cut parts with '..' on both sides. */
+ if (cut)
+ buf[0] = buf[1] = '.';
+
+ if ((len_event - cut) > max_len) {
+ buf[max_len - 1] = buf[max_len - 2] = '.';
+ buf[max_len] = 0;
+ }
+
+ idx = len_str + err->idx - cut;
+ }
+
+ fprintf(stderr, "%s'%s'\n", str, buf);
+ if (idx) {
+ fprintf(stderr, "%*s\\___ %s\n", idx + 1, "", err->str);
+ if (err->help)
+ fprintf(stderr, "\n%s\n", err->help);
+ free(err->str);
+ free(err->help);
+ }
+
+ fprintf(stderr, "Run 'perf list' for a list of valid events\n");
+}
+
+#undef MAX_WIDTH
+
int parse_events_option(const struct option *opt, const char *str,
int unset __maybe_unused)
{
struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
- int ret = parse_events(evlist, str);
+ struct parse_events_error err = { .idx = 0, };
+ int ret = parse_events(evlist, str, &err);
+
+ if (ret)
+ parse_events_print_error(&err, str);
- if (ret) {
- fprintf(stderr, "invalid or unsupported event: '%s'\n", str);
- fprintf(stderr, "Run 'perf list' for a list of valid events\n");
- }
return ret;
}
-int parse_filter(const struct option *opt, const char *str,
- int unset __maybe_unused)
+static int
+foreach_evsel_in_last_glob(struct perf_evlist *evlist,
+ int (*func)(struct perf_evsel *evsel,
+ const void *arg),
+ const void *arg)
{
- struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
struct perf_evsel *last = NULL;
+ int err;
if (evlist->nr_entries > 0)
last = perf_evlist__last(evlist);
- if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) {
+ do {
+ err = (*func)(last, arg);
+ if (err)
+ return -1;
+ if (!last)
+ return 0;
+
+ if (last->node.prev == &evlist->entries)
+ return 0;
+ last = list_entry(last->node.prev, struct perf_evsel, node);
+ } while (!last->cmdline_group_boundary);
+
+ return 0;
+}
+
+static int set_filter(struct perf_evsel *evsel, const void *arg)
+{
+ const char *str = arg;
+
+ if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) {
fprintf(stderr,
"--filter option should follow a -e tracepoint option\n");
return -1;
}
- last->filter = strdup(str);
- if (last->filter == NULL) {
- fprintf(stderr, "not enough memory to hold filter string\n");
+ if (perf_evsel__append_filter(evsel, "&&", str) < 0) {
+ fprintf(stderr,
+ "not enough memory to hold filter string\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int parse_filter(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
+
+ return foreach_evsel_in_last_glob(evlist, set_filter,
+ (const void *)str);
+}
+
+static int add_exclude_perf_filter(struct perf_evsel *evsel,
+ const void *arg __maybe_unused)
+{
+ char new_filter[64];
+
+ if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+ fprintf(stderr,
+ "--exclude-perf option should follow a -e tracepoint option\n");
+ return -1;
+ }
+
+ snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid());
+
+ if (perf_evsel__append_filter(evsel, "&&", new_filter) < 0) {
+ fprintf(stderr,
+ "not enough memory to hold filter string\n");
return -1;
}
return 0;
}
+int exclude_perf(const struct option *opt,
+ const char *arg __maybe_unused,
+ int unset __maybe_unused)
+{
+ struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
+
+ return foreach_evsel_in_last_glob(evlist, add_exclude_perf_filter,
+ NULL);
+}
+
static const char * const event_type_descriptors[] = {
"Hardware event",
"Software event",
@@ -1460,7 +1709,7 @@ int parse_events__is_hardcoded_term(struct parse_events_term *term)
static int new_term(struct parse_events_term **_term, int type_val,
int type_term, char *config,
- char *str, u64 num)
+ char *str, u64 num, int err_term, int err_val)
{
struct parse_events_term *term;
@@ -1472,6 +1721,8 @@ static int new_term(struct parse_events_term **_term, int type_val,
term->type_val = type_val;
term->type_term = type_term;
term->config = config;
+ term->err_term = err_term;
+ term->err_val = err_val;
switch (type_val) {
case PARSE_EVENTS__TERM_TYPE_NUM:
@@ -1490,17 +1741,29 @@ static int new_term(struct parse_events_term **_term, int type_val,
}
int parse_events_term__num(struct parse_events_term **term,
- int type_term, char *config, u64 num)
+ int type_term, char *config, u64 num,
+ void *loc_term_, void *loc_val_)
{
+ YYLTYPE *loc_term = loc_term_;
+ YYLTYPE *loc_val = loc_val_;
+
return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term,
- config, NULL, num);
+ config, NULL, num,
+ loc_term ? loc_term->first_column : 0,
+ loc_val ? loc_val->first_column : 0);
}
int parse_events_term__str(struct parse_events_term **term,
- int type_term, char *config, char *str)
+ int type_term, char *config, char *str,
+ void *loc_term_, void *loc_val_)
{
+ YYLTYPE *loc_term = loc_term_;
+ YYLTYPE *loc_val = loc_val_;
+
return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term,
- config, str, 0);
+ config, str, 0,
+ loc_term ? loc_term->first_column : 0,
+ loc_val ? loc_val->first_column : 0);
}
int parse_events_term__sym_hw(struct parse_events_term **term,
@@ -1514,18 +1777,20 @@ int parse_events_term__sym_hw(struct parse_events_term **term,
if (config)
return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
PARSE_EVENTS__TERM_TYPE_USER, config,
- (char *) sym->symbol, 0);
+ (char *) sym->symbol, 0, 0, 0);
else
return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
PARSE_EVENTS__TERM_TYPE_USER,
- (char *) "event", (char *) sym->symbol, 0);
+ (char *) "event", (char *) sym->symbol,
+ 0, 0, 0);
}
int parse_events_term__clone(struct parse_events_term **new,
struct parse_events_term *term)
{
return new_term(new, term->type_val, term->type_term, term->config,
- term->val.str, term->val.num);
+ term->val.str, term->val.num,
+ term->err_term, term->err_val);
}
void parse_events__free_terms(struct list_head *terms)
@@ -1535,3 +1800,15 @@ void parse_events__free_terms(struct list_head *terms)
list_for_each_entry_safe(term, h, terms, list)
free(term);
}
+
+void parse_events_evlist_error(struct parse_events_evlist *data,
+ int idx, const char *str)
+{
+ struct parse_events_error *err = data->error;
+
+ if (!err)
+ return;
+ err->idx = idx;
+ err->str = strdup(str);
+ WARN_ONCE(!err->str, "WARNING: failed to allocate error string");
+}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 52a2dda..a09b0e2 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -12,6 +12,7 @@
struct list_head;
struct perf_evsel;
struct perf_evlist;
+struct parse_events_error;
struct option;
@@ -29,9 +30,11 @@ const char *event_type(int type);
extern int parse_events_option(const struct option *opt, const char *str,
int unset);
-extern int parse_events(struct perf_evlist *evlist, const char *str);
+extern int parse_events(struct perf_evlist *evlist, const char *str,
+ struct parse_events_error *error);
extern int parse_events_terms(struct list_head *terms, const char *str);
extern int parse_filter(const struct option *opt, const char *str, int unset);
+extern int exclude_perf(const struct option *opt, const char *arg, int unset);
#define EVENTS_HELP_MAX (128*1024)
@@ -59,7 +62,11 @@ enum {
PARSE_EVENTS__TERM_TYPE_CONFIG2,
PARSE_EVENTS__TERM_TYPE_NAME,
PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
+ PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ,
PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
+ PARSE_EVENTS__TERM_TYPE_TIME,
+ PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
+ PARSE_EVENTS__TERM_TYPE_STACKSIZE,
};
struct parse_events_term {
@@ -72,12 +79,23 @@ struct parse_events_term {
int type_term;
struct list_head list;
bool used;
+
+ /* error string indexes for within parsed string */
+ int err_term;
+ int err_val;
+};
+
+struct parse_events_error {
+ int idx; /* index in the parsed string */
+ char *str; /* string to display at the index */
+ char *help; /* optional help string */
};
struct parse_events_evlist {
- struct list_head list;
- int idx;
- int nr_groups;
+ struct list_head list;
+ int idx;
+ int nr_groups;
+ struct parse_events_error *error;
};
struct parse_events_terms {
@@ -85,10 +103,12 @@ struct parse_events_terms {
};
int parse_events__is_hardcoded_term(struct parse_events_term *term);
-int parse_events_term__num(struct parse_events_term **_term,
- int type_term, char *config, u64 num);
-int parse_events_term__str(struct parse_events_term **_term,
- int type_term, char *config, char *str);
+int parse_events_term__num(struct parse_events_term **term,
+ int type_term, char *config, u64 num,
+ void *loc_term, void *loc_val);
+int parse_events_term__str(struct parse_events_term **term,
+ int type_term, char *config, char *str,
+ void *loc_term, void *loc_val);
int parse_events_term__sym_hw(struct parse_events_term **term,
char *config, unsigned idx);
int parse_events_term__clone(struct parse_events_term **new,
@@ -99,21 +119,24 @@ int parse_events__modifier_group(struct list_head *list, char *event_mod);
int parse_events_name(struct list_head *list, char *name);
int parse_events_add_tracepoint(struct list_head *list, int *idx,
char *sys, char *event);
-int parse_events_add_numeric(struct list_head *list, int *idx,
+int parse_events_add_numeric(struct parse_events_evlist *data,
+ struct list_head *list,
u32 type, u64 config,
struct list_head *head_config);
int parse_events_add_cache(struct list_head *list, int *idx,
char *type, char *op_result1, char *op_result2);
int parse_events_add_breakpoint(struct list_head *list, int *idx,
void *ptr, char *type, u64 len);
-int parse_events_add_pmu(struct list_head *list, int *idx,
- char *pmu , struct list_head *head_config);
+int parse_events_add_pmu(struct parse_events_evlist *data,
+ struct list_head *list, char *name,
+ struct list_head *head_config);
enum perf_pmu_event_symbol_type
perf_pmu__parse_check(const char *name);
void parse_events__set_leader(char *name, struct list_head *list);
void parse_events_update_lists(struct list_head *list_event,
struct list_head *list_all);
-void parse_events_error(void *data, void *scanner, char const *msg);
+void parse_events_evlist_error(struct parse_events_evlist *data,
+ int idx, const char *str);
void print_events(const char *event_glob, bool name_only);
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 8895cf3..936d566 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -3,6 +3,8 @@
%option bison-bridge
%option prefix="parse_events_"
%option stack
+%option bison-locations
+%option yylineno
%{
#include <errno.h>
@@ -51,6 +53,18 @@ static int str(yyscan_t scanner, int token)
return token;
}
+#define REWIND(__alloc) \
+do { \
+ YYSTYPE *__yylval = parse_events_get_lval(yyscanner); \
+ char *text = parse_events_get_text(yyscanner); \
+ \
+ if (__alloc) \
+ __yylval->str = strdup(text); \
+ \
+ yycolumn -= strlen(text); \
+ yyless(0); \
+} while (0)
+
static int pmu_str_check(yyscan_t scanner)
{
YYSTYPE *yylval = parse_events_get_lval(scanner);
@@ -85,6 +99,13 @@ static int term(yyscan_t scanner, int type)
return PE_TERM;
}
+#define YY_USER_ACTION \
+do { \
+ yylloc->last_column = yylloc->first_column; \
+ yylloc->first_column = yycolumn; \
+ yycolumn += yyleng; \
+} while (0);
+
%}
%x mem
@@ -98,8 +119,8 @@ event [^,{}/]+
num_dec [0-9]+
num_hex 0x[a-fA-F0-9]+
num_raw_hex [a-fA-F0-9]+
-name [a-zA-Z_*?][a-zA-Z0-9_*?]*
-name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?]*
+name [a-zA-Z_*?][a-zA-Z0-9_*?.]*
+name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.]*
/* If you add a modifier you need to update check_modifier() */
modifier_event [ukhpGHSDI]+
modifier_bp [rwx]{1,3}
@@ -119,6 +140,12 @@ modifier_bp [rwx]{1,3}
if (start_token) {
parse_events_set_extra(NULL, yyscanner);
+ /*
+ * The flex parser does not init locations variable
+ * via the scan_string interface, so we need do the
+ * init in here.
+ */
+ yycolumn = 0;
return start_token;
}
}
@@ -127,30 +154,39 @@ modifier_bp [rwx]{1,3}
<event>{
{group} {
- BEGIN(INITIAL); yyless(0);
+ BEGIN(INITIAL);
+ REWIND(0);
}
{event_pmu} |
{event} {
- str(yyscanner, PE_EVENT_NAME);
- BEGIN(INITIAL); yyless(0);
+ BEGIN(INITIAL);
+ REWIND(1);
return PE_EVENT_NAME;
}
-. |
<<EOF>> {
- BEGIN(INITIAL); yyless(0);
+ BEGIN(INITIAL);
+ REWIND(0);
}
}
<config>{
+ /*
+ * Please update formats_error_string any time
+ * new static term is added.
+ */
config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
+freq { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); }
branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
+time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
+call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
+stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 72def07..591905a 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -2,6 +2,7 @@
%parse-param {void *_data}
%parse-param {void *scanner}
%lex-param {void* scanner}
+%locations
%{
@@ -14,8 +15,6 @@
#include "parse-events.h"
#include "parse-events-bison.h"
-extern int parse_events_lex (YYSTYPE* lvalp, void* scanner);
-
#define ABORT_ON(val) \
do { \
if (val) \
@@ -208,7 +207,7 @@ PE_NAME '/' event_config '/'
struct list_head *list;
ALLOC_LIST(list);
- ABORT_ON(parse_events_add_pmu(list, &data->idx, $1, $3));
+ ABORT_ON(parse_events_add_pmu(data, list, $1, $3));
parse_events__free_terms($3);
$$ = list;
}
@@ -219,7 +218,7 @@ PE_NAME '/' '/'
struct list_head *list;
ALLOC_LIST(list);
- ABORT_ON(parse_events_add_pmu(list, &data->idx, $1, NULL));
+ ABORT_ON(parse_events_add_pmu(data, list, $1, NULL));
$$ = list;
}
|
@@ -232,11 +231,11 @@ PE_KERNEL_PMU_EVENT sep_dc
ALLOC_LIST(head);
ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
- $1, 1));
+ $1, 1, &@1, NULL));
list_add_tail(&term->list, head);
ALLOC_LIST(list);
- ABORT_ON(parse_events_add_pmu(list, &data->idx, "cpu", head));
+ ABORT_ON(parse_events_add_pmu(data, list, "cpu", head));
parse_events__free_terms(head);
$$ = list;
}
@@ -252,7 +251,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
ALLOC_LIST(head);
ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
- &pmu_name, 1));
+ &pmu_name, 1, &@1, NULL));
list_add_tail(&term->list, head);
ALLOC_LIST(list);
@@ -275,8 +274,7 @@ value_sym '/' event_config '/'
int config = $1 & 255;
ALLOC_LIST(list);
- ABORT_ON(parse_events_add_numeric(list, &data->idx,
- type, config, $3));
+ ABORT_ON(parse_events_add_numeric(data, list, type, config, $3));
parse_events__free_terms($3);
$$ = list;
}
@@ -289,8 +287,7 @@ value_sym sep_slash_dc
int config = $1 & 255;
ALLOC_LIST(list);
- ABORT_ON(parse_events_add_numeric(list, &data->idx,
- type, config, NULL));
+ ABORT_ON(parse_events_add_numeric(data, list, type, config, NULL));
$$ = list;
}
@@ -389,7 +386,15 @@ PE_NAME ':' PE_NAME
struct list_head *list;
ALLOC_LIST(list);
- ABORT_ON(parse_events_add_tracepoint(list, &data->idx, $1, $3));
+ if (parse_events_add_tracepoint(list, &data->idx, $1, $3)) {
+ struct parse_events_error *error = data->error;
+
+ if (error) {
+ error->idx = @1.first_column;
+ error->str = strdup("unknown tracepoint");
+ }
+ return -1;
+ }
$$ = list;
}
@@ -400,7 +405,7 @@ PE_VALUE ':' PE_VALUE
struct list_head *list;
ALLOC_LIST(list);
- ABORT_ON(parse_events_add_numeric(list, &data->idx, (u32)$1, $3, NULL));
+ ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, NULL));
$$ = list;
}
@@ -411,8 +416,7 @@ PE_RAW
struct list_head *list;
ALLOC_LIST(list);
- ABORT_ON(parse_events_add_numeric(list, &data->idx,
- PERF_TYPE_RAW, $1, NULL));
+ ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, NULL));
$$ = list;
}
@@ -450,7 +454,7 @@ PE_NAME '=' PE_NAME
struct parse_events_term *term;
ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
- $1, $3));
+ $1, $3, &@1, &@3));
$$ = term;
}
|
@@ -459,7 +463,7 @@ PE_NAME '=' PE_VALUE
struct parse_events_term *term;
ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
- $1, $3));
+ $1, $3, &@1, &@3));
$$ = term;
}
|
@@ -477,7 +481,7 @@ PE_NAME
struct parse_events_term *term;
ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
- $1, 1));
+ $1, 1, &@1, NULL));
$$ = term;
}
|
@@ -494,7 +498,7 @@ PE_TERM '=' PE_NAME
{
struct parse_events_term *term;
- ABORT_ON(parse_events_term__str(&term, (int)$1, NULL, $3));
+ ABORT_ON(parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3));
$$ = term;
}
|
@@ -502,7 +506,7 @@ PE_TERM '=' PE_VALUE
{
struct parse_events_term *term;
- ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3));
+ ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, &@1, &@3));
$$ = term;
}
|
@@ -510,7 +514,7 @@ PE_TERM
{
struct parse_events_term *term;
- ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1));
+ ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL));
$$ = term;
}
@@ -520,7 +524,9 @@ sep_slash_dc: '/' | ':' |
%%
-void parse_events_error(void *data __maybe_unused, void *scanner __maybe_unused,
+void parse_events_error(YYLTYPE *loc, void *data,
+ void *scanner __maybe_unused,
char const *msg __maybe_unused)
{
+ parse_events_evlist_error(data, loc->last_column, "parser error");
}
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
index 59561fd..367d8b8 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/perf/util/parse-options.h
@@ -123,6 +123,10 @@ struct option {
#define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
#define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h) }
+#define OPT_STRING_OPTARG(s, l, v, a, h, d) \
+ { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \
+ .value = check_vtype(v, const char **), (a), .help = (h), \
+ .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) }
#define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY}
#define OPT_DATE(s, l, v, h) \
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }
diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
new file mode 100644
index 0000000..4f2c1c2
--- /dev/null
+++ b/tools/perf/util/parse-regs-options.c
@@ -0,0 +1,71 @@
+#include "perf.h"
+#include "util/util.h"
+#include "util/debug.h"
+#include "util/parse-options.h"
+#include "util/parse-regs-options.h"
+
+int
+parse_regs(const struct option *opt, const char *str, int unset)
+{
+ uint64_t *mode = (uint64_t *)opt->value;
+ const struct sample_reg *r;
+ char *s, *os = NULL, *p;
+ int ret = -1;
+
+ if (unset)
+ return 0;
+
+ /*
+ * cannot set it twice
+ */
+ if (*mode)
+ return -1;
+
+ /* str may be NULL in case no arg is passed to -I */
+ if (str) {
+ /* because str is read-only */
+ s = os = strdup(str);
+ if (!s)
+ return -1;
+
+ for (;;) {
+ p = strchr(s, ',');
+ if (p)
+ *p = '\0';
+
+ if (!strcmp(s, "?")) {
+ fprintf(stderr, "available registers: ");
+ for (r = sample_reg_masks; r->name; r++) {
+ fprintf(stderr, "%s ", r->name);
+ }
+ fputc('\n', stderr);
+ /* just printing available regs */
+ return -1;
+ }
+ for (r = sample_reg_masks; r->name; r++) {
+ if (!strcasecmp(s, r->name))
+ break;
+ }
+ if (!r->name) {
+ ui__warning("unknown register %s,"
+ " check man page\n", s);
+ goto error;
+ }
+
+ *mode |= r->mask;
+
+ if (!p)
+ break;
+
+ s = p + 1;
+ }
+ }
+ ret = 0;
+
+ /* default to all possible regs */
+ if (*mode == 0)
+ *mode = PERF_REGS_MASK;
+error:
+ free(os);
+ return ret;
+}
diff --git a/tools/perf/util/parse-regs-options.h b/tools/perf/util/parse-regs-options.h
new file mode 100644
index 0000000..7d762b1
--- /dev/null
+++ b/tools/perf/util/parse-regs-options.h
@@ -0,0 +1,5 @@
+#ifndef _PERF_PARSE_REGS_OPTIONS_H
+#define _PERF_PARSE_REGS_OPTIONS_H 1
+struct option;
+int parse_regs(const struct option *opt, const char *str, int unset);
+#endif /* _PERF_PARSE_REGS_OPTIONS_H */
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index 43168fb..885e8ac 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -2,6 +2,10 @@
#include "perf_regs.h"
#include "event.h"
+const struct sample_reg __weak sample_reg_masks[] = {
+ SMPL_REG_END
+};
+
int perf_reg_value(u64 *valp, struct regs_dump *regs, int id)
{
int i, idx = 0;
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index 980dbf7..2984dcc 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -5,6 +5,15 @@
struct regs_dump;
+struct sample_reg {
+ const char *name;
+ uint64_t mask;
+};
+#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) }
+#define SMPL_REG_END { .name = NULL }
+
+extern const struct sample_reg sample_reg_masks[];
+
#ifdef HAVE_PERF_REGS_SUPPORT
#include <perf_regs.h>
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 4841167..89c91a1 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1,4 +1,5 @@
#include <linux/list.h>
+#include <linux/compiler.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
@@ -112,7 +113,11 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
if (sret < 0)
goto error;
- scale[sret] = '\0';
+ if (scale[sret - 1] == '\n')
+ scale[sret - 1] = '\0';
+ else
+ scale[sret] = '\0';
+
/*
* save current locale
*/
@@ -154,7 +159,10 @@ static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *n
close(fd);
- alias->unit[sret] = '\0';
+ if (alias->unit[sret - 1] == '\n')
+ alias->unit[sret - 1] = '\0';
+ else
+ alias->unit[sret] = '\0';
return 0;
error:
@@ -198,17 +206,12 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias,
return 0;
}
-static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file)
+static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
+ char *desc __maybe_unused, char *val)
{
struct perf_pmu_alias *alias;
- char buf[256];
int ret;
- ret = fread(buf, 1, sizeof(buf), file);
- if (ret == 0)
- return -EINVAL;
- buf[ret] = 0;
-
alias = malloc(sizeof(*alias));
if (!alias)
return -ENOMEM;
@@ -218,26 +221,43 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI
alias->unit[0] = '\0';
alias->per_pkg = false;
- ret = parse_events_terms(&alias->terms, buf);
+ ret = parse_events_terms(&alias->terms, val);
if (ret) {
+ pr_err("Cannot parse alias %s: %d\n", val, ret);
free(alias);
return ret;
}
alias->name = strdup(name);
- /*
- * load unit name and scale if available
- */
- perf_pmu__parse_unit(alias, dir, name);
- perf_pmu__parse_scale(alias, dir, name);
- perf_pmu__parse_per_pkg(alias, dir, name);
- perf_pmu__parse_snapshot(alias, dir, name);
+ if (dir) {
+ /*
+ * load unit name and scale if available
+ */
+ perf_pmu__parse_unit(alias, dir, name);
+ perf_pmu__parse_scale(alias, dir, name);
+ perf_pmu__parse_per_pkg(alias, dir, name);
+ perf_pmu__parse_snapshot(alias, dir, name);
+ }
list_add_tail(&alias->list, list);
return 0;
}
+static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file)
+{
+ char buf[256];
+ int ret;
+
+ ret = fread(buf, 1, sizeof(buf), file);
+ if (ret == 0)
+ return -EINVAL;
+
+ buf[ret] = 0;
+
+ return __perf_pmu__new_alias(list, dir, name, NULL, buf);
+}
+
static inline bool pmu_alias_info_file(char *name)
{
size_t len;
@@ -429,7 +449,7 @@ static struct cpu_map *pmu_cpumask(const char *name)
return cpus;
}
-struct perf_event_attr *__attribute__((weak))
+struct perf_event_attr * __weak
perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
{
return NULL;
@@ -518,7 +538,7 @@ struct perf_pmu *perf_pmu__find(const char *name)
}
static struct perf_pmu_format *
-pmu_find_format(struct list_head *formats, char *name)
+pmu_find_format(struct list_head *formats, const char *name)
{
struct perf_pmu_format *format;
@@ -529,6 +549,21 @@ pmu_find_format(struct list_head *formats, char *name)
return NULL;
}
+__u64 perf_pmu__format_bits(struct list_head *formats, const char *name)
+{
+ struct perf_pmu_format *format = pmu_find_format(formats, name);
+ __u64 bits = 0;
+ int fbit;
+
+ if (!format)
+ return 0;
+
+ for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS)
+ bits |= 1ULL << fbit;
+
+ return bits;
+}
+
/*
* Sets value based on the format definition (format parameter)
* and unformated value (value parameter).
@@ -550,6 +585,18 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
}
}
+static __u64 pmu_format_max_value(const unsigned long *format)
+{
+ int w;
+
+ w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
+ if (!w)
+ return 0;
+ if (w < 64)
+ return (1ULL << w) - 1;
+ return -1;
+}
+
/*
* Term is a string term, and might be a param-term. Try to look up it's value
* in the remaining terms.
@@ -579,6 +626,40 @@ static int pmu_resolve_param_term(struct parse_events_term *term,
return -1;
}
+static char *formats_error_string(struct list_head *formats)
+{
+ struct perf_pmu_format *format;
+ char *err, *str;
+ static const char *static_terms = "config,config1,config2,name,"
+ "period,freq,branch_type,time,"
+ "call-graph,stack-size\n";
+ unsigned i = 0;
+
+ if (!asprintf(&str, "valid terms:"))
+ return NULL;
+
+ /* sysfs exported terms */
+ list_for_each_entry(format, formats, list) {
+ char c = i++ ? ',' : ' ';
+
+ err = str;
+ if (!asprintf(&str, "%s%c%s", err, c, format->name))
+ goto fail;
+ free(err);
+ }
+
+ /* static terms */
+ err = str;
+ if (!asprintf(&str, "%s,%s", err, static_terms))
+ goto fail;
+
+ free(err);
+ return str;
+fail:
+ free(err);
+ return NULL;
+}
+
/*
* Setup one of config[12] attr members based on the
* user input data - term parameter.
@@ -587,11 +668,11 @@ static int pmu_config_term(struct list_head *formats,
struct perf_event_attr *attr,
struct parse_events_term *term,
struct list_head *head_terms,
- bool zero)
+ bool zero, struct parse_events_error *err)
{
struct perf_pmu_format *format;
__u64 *vp;
- __u64 val;
+ __u64 val, max_val;
/*
* If this is a parameter we've already used for parameterized-eval,
@@ -611,6 +692,11 @@ static int pmu_config_term(struct list_head *formats,
if (!format) {
if (verbose)
printf("Invalid event/parameter '%s'\n", term->config);
+ if (err) {
+ err->idx = term->err_term;
+ err->str = strdup("unknown term");
+ err->help = formats_error_string(formats);
+ }
return -EINVAL;
}
@@ -636,9 +722,14 @@ static int pmu_config_term(struct list_head *formats,
val = term->val.num;
else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
if (strcmp(term->val.str, "?")) {
- if (verbose)
+ if (verbose) {
pr_info("Invalid sysfs entry %s=%s\n",
term->config, term->val.str);
+ }
+ if (err) {
+ err->idx = term->err_val;
+ err->str = strdup("expected numeric value");
+ }
return -EINVAL;
}
@@ -647,6 +738,22 @@ static int pmu_config_term(struct list_head *formats,
} else
return -EINVAL;
+ max_val = pmu_format_max_value(format->bits);
+ if (val > max_val) {
+ if (err) {
+ err->idx = term->err_val;
+ if (asprintf(&err->str,
+ "value too big for format, maximum is %llu",
+ (unsigned long long)max_val) < 0)
+ err->str = strdup("value too big for format");
+ return -EINVAL;
+ }
+ /*
+ * Assume we don't care if !err, in which case the value will be
+ * silently truncated.
+ */
+ }
+
pmu_format_value(format->bits, val, vp, zero);
return 0;
}
@@ -654,12 +761,13 @@ static int pmu_config_term(struct list_head *formats,
int perf_pmu__config_terms(struct list_head *formats,
struct perf_event_attr *attr,
struct list_head *head_terms,
- bool zero)
+ bool zero, struct parse_events_error *err)
{
struct parse_events_term *term;
list_for_each_entry(term, head_terms, list) {
- if (pmu_config_term(formats, attr, term, head_terms, zero))
+ if (pmu_config_term(formats, attr, term, head_terms,
+ zero, err))
return -EINVAL;
}
@@ -672,12 +780,14 @@ int perf_pmu__config_terms(struct list_head *formats,
* 2) pmu format definitions - specified by pmu parameter
*/
int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
- struct list_head *head_terms)
+ struct list_head *head_terms,
+ struct parse_events_error *err)
{
bool zero = !!pmu->default_config;
attr->type = pmu->type;
- return perf_pmu__config_terms(&pmu->format, attr, head_terms, zero);
+ return perf_pmu__config_terms(&pmu->format, attr, head_terms,
+ zero, err);
}
static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 6b1249f..5d7e844 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -4,6 +4,7 @@
#include <linux/bitmap.h>
#include <linux/perf_event.h>
#include <stdbool.h>
+#include "parse-events.h"
enum {
PERF_PMU_FORMAT_VALUE_CONFIG,
@@ -47,11 +48,13 @@ struct perf_pmu_alias {
struct perf_pmu *perf_pmu__find(const char *name);
int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
- struct list_head *head_terms);
+ struct list_head *head_terms,
+ struct parse_events_error *error);
int perf_pmu__config_terms(struct list_head *formats,
struct perf_event_attr *attr,
struct list_head *head_terms,
- bool zero);
+ bool zero, struct parse_events_error *error);
+__u64 perf_pmu__format_bits(struct list_head *formats, const char *name);
int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
struct perf_pmu_info *info);
struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index d8bb616..eb5f18b 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -45,20 +45,18 @@
#include "trace-event.h" /* For __maybe_unused */
#include "probe-event.h"
#include "probe-finder.h"
+#include "probe-file.h"
#include "session.h"
#define MAX_CMDLEN 256
#define PERFPROBE_GROUP "probe"
bool probe_event_dry_run; /* Dry run flag */
+struct probe_conf probe_conf;
#define semantic_error(msg ...) pr_err("Semantic error :" msg)
-/* If there is no space to write, returns -E2BIG. */
-static int e_snprintf(char *str, size_t size, const char *format, ...)
- __attribute__((format(printf, 3, 4)));
-
-static int e_snprintf(char *str, size_t size, const char *format, ...)
+int e_snprintf(char *str, size_t size, const char *format, ...)
{
int ret;
va_list ap;
@@ -71,7 +69,6 @@ static int e_snprintf(char *str, size_t size, const char *format, ...)
}
static char *synthesize_perf_probe_point(struct perf_probe_point *pp);
-static void clear_probe_trace_event(struct probe_trace_event *tev);
static struct machine *host_machine;
/* Initialize symbol maps and path of vmlinux/modules */
@@ -161,18 +158,18 @@ static u64 kernel_get_symbol_address_by_name(const char *name, bool reloc)
static struct map *kernel_get_module_map(const char *module)
{
- struct rb_node *nd;
struct map_groups *grp = &host_machine->kmaps;
+ struct maps *maps = &grp->maps[MAP__FUNCTION];
+ struct map *pos;
/* A file path -- this is an offline module */
if (module && strchr(module, '/'))
- return machine__new_module(host_machine, 0, module);
+ return machine__findnew_module_map(host_machine, 0, module);
if (!module)
module = "kernel";
- for (nd = rb_first(&grp->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) {
- struct map *pos = rb_entry(nd, struct map, rb_node);
+ for (pos = maps__first(maps); pos; pos = map__next(pos)) {
if (strncmp(pos->dso->short_name + 1, module,
pos->dso->short_name_len - 2) == 0) {
return pos;
@@ -194,52 +191,11 @@ static void put_target_map(struct map *map, bool user)
{
if (map && user) {
/* Only the user map needs to be released */
- dso__delete(map->dso);
- map__delete(map);
+ map__put(map);
}
}
-static struct dso *kernel_get_module_dso(const char *module)
-{
- struct dso *dso;
- struct map *map;
- const char *vmlinux_name;
-
- if (module) {
- list_for_each_entry(dso, &host_machine->kernel_dsos.head,
- node) {
- if (strncmp(dso->short_name + 1, module,
- dso->short_name_len - 2) == 0)
- goto found;
- }
- pr_debug("Failed to find module %s.\n", module);
- return NULL;
- }
-
- map = host_machine->vmlinux_maps[MAP__FUNCTION];
- dso = map->dso;
-
- vmlinux_name = symbol_conf.vmlinux_name;
- if (vmlinux_name) {
- if (dso__load_vmlinux(dso, map, vmlinux_name, false, NULL) <= 0)
- return NULL;
- } else {
- if (dso__load_vmlinux_path(dso, map, NULL) <= 0) {
- pr_debug("Failed to load kernel map.\n");
- return NULL;
- }
- }
-found:
- return dso;
-}
-
-const char *kernel_get_module_path(const char *module)
-{
- struct dso *dso = kernel_get_module_dso(module);
- return (dso) ? dso->long_name : NULL;
-}
-
static int convert_exec_to_group(const char *exec, char **result)
{
char *ptr1, *ptr2, *exec_copy;
@@ -286,7 +242,59 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs)
clear_probe_trace_event(tevs + i);
}
+static bool kprobe_blacklist__listed(unsigned long address);
+static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
+{
+ u64 etext_addr;
+
+ /* Get the address of _etext for checking non-probable text symbol */
+ etext_addr = kernel_get_symbol_address_by_name("_etext", false);
+
+ if (etext_addr != 0 && etext_addr < address)
+ pr_warning("%s is out of .text, skip it.\n", symbol);
+ else if (kprobe_blacklist__listed(address))
+ pr_warning("%s is blacklisted function, skip it.\n", symbol);
+ else
+ return false;
+
+ return true;
+}
+
#ifdef HAVE_DWARF_SUPPORT
+
+static int kernel_get_module_dso(const char *module, struct dso **pdso)
+{
+ struct dso *dso;
+ struct map *map;
+ const char *vmlinux_name;
+ int ret = 0;
+
+ if (module) {
+ list_for_each_entry(dso, &host_machine->dsos.head, node) {
+ if (!dso->kernel)
+ continue;
+ if (strncmp(dso->short_name + 1, module,
+ dso->short_name_len - 2) == 0)
+ goto found;
+ }
+ pr_debug("Failed to find module %s.\n", module);
+ return -ENOENT;
+ }
+
+ map = host_machine->vmlinux_maps[MAP__FUNCTION];
+ dso = map->dso;
+
+ vmlinux_name = symbol_conf.vmlinux_name;
+ dso->load_errno = 0;
+ if (vmlinux_name)
+ ret = dso__load_vmlinux(dso, map, vmlinux_name, false, NULL);
+ else
+ ret = dso__load_vmlinux_path(dso, map, NULL);
+found:
+ *pdso = dso;
+ return ret;
+}
+
/*
* Some binaries like glibc have special symbols which are on the symbol
* table, but not in the debuginfo. If we can find the address of the
@@ -344,15 +352,14 @@ out:
static int get_alternative_probe_event(struct debuginfo *dinfo,
struct perf_probe_event *pev,
- struct perf_probe_point *tmp,
- const char *target)
+ struct perf_probe_point *tmp)
{
int ret;
memcpy(tmp, &pev->point, sizeof(*tmp));
memset(&pev->point, 0, sizeof(pev->point));
ret = find_alternative_probe_point(dinfo, tmp, &pev->point,
- target, pev->uprobes);
+ pev->target, pev->uprobes);
if (ret < 0)
memcpy(&pev->point, tmp, sizeof(*tmp));
@@ -390,16 +397,25 @@ static int get_alternative_line_range(struct debuginfo *dinfo,
static struct debuginfo *open_debuginfo(const char *module, bool silent)
{
const char *path = module;
- struct debuginfo *ret;
+ char reason[STRERR_BUFSIZE];
+ struct debuginfo *ret = NULL;
+ struct dso *dso = NULL;
+ int err;
if (!module || !strchr(module, '/')) {
- path = kernel_get_module_path(module);
- if (!path) {
+ err = kernel_get_module_dso(module, &dso);
+ if (err < 0) {
+ if (!dso || dso->load_errno == 0) {
+ if (!strerror_r(-err, reason, STRERR_BUFSIZE))
+ strcpy(reason, "(unknown)");
+ } else
+ dso__strerror_load(dso, reason, STRERR_BUFSIZE);
if (!silent)
- pr_err("Failed to find path of %s module.\n",
- module ?: "kernel");
+ pr_err("Failed to find the path for %s: %s\n",
+ module ?: "kernel", reason);
return NULL;
}
+ path = dso->long_name;
}
ret = debuginfo__new(path);
if (!ret && !silent) {
@@ -413,6 +429,41 @@ static struct debuginfo *open_debuginfo(const char *module, bool silent)
return ret;
}
+/* For caching the last debuginfo */
+static struct debuginfo *debuginfo_cache;
+static char *debuginfo_cache_path;
+
+static struct debuginfo *debuginfo_cache__open(const char *module, bool silent)
+{
+ if ((debuginfo_cache_path && !strcmp(debuginfo_cache_path, module)) ||
+ (!debuginfo_cache_path && !module && debuginfo_cache))
+ goto out;
+
+ /* Copy module path */
+ free(debuginfo_cache_path);
+ if (module) {
+ debuginfo_cache_path = strdup(module);
+ if (!debuginfo_cache_path) {
+ debuginfo__delete(debuginfo_cache);
+ debuginfo_cache = NULL;
+ goto out;
+ }
+ }
+
+ debuginfo_cache = open_debuginfo(module, silent);
+ if (!debuginfo_cache)
+ zfree(&debuginfo_cache_path);
+out:
+ return debuginfo_cache;
+}
+
+static void debuginfo_cache__exit(void)
+{
+ debuginfo__delete(debuginfo_cache);
+ debuginfo_cache = NULL;
+ zfree(&debuginfo_cache_path);
+}
+
static int get_text_start_address(const char *exec, unsigned long *address)
{
@@ -464,7 +515,7 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
if (ret < 0)
goto error;
addr += stext;
- } else {
+ } else if (tp->symbol) {
addr = kernel_get_symbol_address_by_name(tp->symbol, false);
if (addr == 0)
goto error;
@@ -474,12 +525,11 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
pr_debug("try to find information at %" PRIx64 " in %s\n", addr,
tp->module ? : "kernel");
- dinfo = open_debuginfo(tp->module, verbose == 0);
- if (dinfo) {
+ dinfo = debuginfo_cache__open(tp->module, verbose == 0);
+ if (dinfo)
ret = debuginfo__find_probe_point(dinfo,
(unsigned long)addr, pp);
- debuginfo__delete(dinfo);
- } else
+ else
ret = -ENOENT;
if (ret > 0) {
@@ -558,7 +608,7 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
{
struct ref_reloc_sym *reloc_sym;
char *tmp;
- int i;
+ int i, skipped = 0;
if (uprobe)
return add_exec_to_probe_trace_events(tevs, ntevs, module);
@@ -574,31 +624,40 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
}
for (i = 0; i < ntevs; i++) {
- if (tevs[i].point.address && !tevs[i].point.retprobe) {
+ if (!tevs[i].point.address || tevs[i].point.retprobe)
+ continue;
+ /* If we found a wrong one, mark it by NULL symbol */
+ if (kprobe_warn_out_range(tevs[i].point.symbol,
+ tevs[i].point.address)) {
+ tmp = NULL;
+ skipped++;
+ } else {
tmp = strdup(reloc_sym->name);
if (!tmp)
return -ENOMEM;
- free(tevs[i].point.symbol);
- tevs[i].point.symbol = tmp;
- tevs[i].point.offset = tevs[i].point.address -
- reloc_sym->unrelocated_addr;
}
+ /* If we have no realname, use symbol for it */
+ if (!tevs[i].point.realname)
+ tevs[i].point.realname = tevs[i].point.symbol;
+ else
+ free(tevs[i].point.symbol);
+ tevs[i].point.symbol = tmp;
+ tevs[i].point.offset = tevs[i].point.address -
+ reloc_sym->unrelocated_addr;
}
- return 0;
+ return skipped;
}
/* Try to find perf_probe_event with debuginfo */
static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
- struct probe_trace_event **tevs,
- int max_tevs, const char *target)
+ struct probe_trace_event **tevs)
{
bool need_dwarf = perf_probe_event_need_dwarf(pev);
struct perf_probe_point tmp;
struct debuginfo *dinfo;
int ntevs, ret = 0;
- dinfo = open_debuginfo(target, !need_dwarf);
-
+ dinfo = open_debuginfo(pev->target, !need_dwarf);
if (!dinfo) {
if (need_dwarf)
return -ENOENT;
@@ -608,13 +667,12 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
pr_debug("Try to find probe point from debuginfo.\n");
/* Searching trace events corresponding to a probe event */
- ntevs = debuginfo__find_trace_events(dinfo, pev, tevs, max_tevs);
+ ntevs = debuginfo__find_trace_events(dinfo, pev, tevs);
if (ntevs == 0) { /* Not found, retry with an alternative */
- ret = get_alternative_probe_event(dinfo, pev, &tmp, target);
+ ret = get_alternative_probe_event(dinfo, pev, &tmp);
if (!ret) {
- ntevs = debuginfo__find_trace_events(dinfo, pev,
- tevs, max_tevs);
+ ntevs = debuginfo__find_trace_events(dinfo, pev, tevs);
/*
* Write back to the original probe_event for
* setting appropriate (user given) event name
@@ -629,12 +687,15 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
if (ntevs > 0) { /* Succeeded to find trace events */
pr_debug("Found %d probe_trace_events.\n", ntevs);
ret = post_process_probe_trace_events(*tevs, ntevs,
- target, pev->uprobes);
- if (ret < 0) {
+ pev->target, pev->uprobes);
+ if (ret < 0 || ret == ntevs) {
clear_probe_trace_events(*tevs, ntevs);
zfree(tevs);
}
- return ret < 0 ? ret : ntevs;
+ if (ret != ntevs)
+ return ret < 0 ? ret : ntevs;
+ ntevs = 0;
+ /* Fall through */
}
if (ntevs == 0) { /* No error but failed to find probe point. */
@@ -644,9 +705,10 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
}
/* Error path : ntevs < 0 */
pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs);
- if (ntevs == -EBADF) {
- pr_warning("Warning: No dwarf info found in the vmlinux - "
- "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n");
+ if (ntevs < 0) {
+ if (ntevs == -EBADF)
+ pr_warning("Warning: No dwarf info found in the vmlinux - "
+ "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n");
if (!need_dwarf) {
pr_debug("Trying to use symbols.\n");
return 0;
@@ -809,8 +871,7 @@ int show_line_range(struct line_range *lr, const char *module, bool user)
static int show_available_vars_at(struct debuginfo *dinfo,
struct perf_probe_event *pev,
- int max_vls, struct strfilter *_filter,
- bool externs, const char *target)
+ struct strfilter *_filter)
{
char *buf;
int ret, i, nvars;
@@ -824,13 +885,12 @@ static int show_available_vars_at(struct debuginfo *dinfo,
return -EINVAL;
pr_debug("Searching variables at %s\n", buf);
- ret = debuginfo__find_available_vars_at(dinfo, pev, &vls,
- max_vls, externs);
+ ret = debuginfo__find_available_vars_at(dinfo, pev, &vls);
if (!ret) { /* Not found, retry with an alternative */
- ret = get_alternative_probe_event(dinfo, pev, &tmp, target);
+ ret = get_alternative_probe_event(dinfo, pev, &tmp);
if (!ret) {
ret = debuginfo__find_available_vars_at(dinfo, pev,
- &vls, max_vls, externs);
+ &vls);
/* Release the old probe_point */
clear_perf_probe_point(&tmp);
}
@@ -877,8 +937,7 @@ end:
/* Show available variables on given probe point */
int show_available_vars(struct perf_probe_event *pevs, int npevs,
- int max_vls, const char *module,
- struct strfilter *_filter, bool externs)
+ struct strfilter *_filter)
{
int i, ret = 0;
struct debuginfo *dinfo;
@@ -887,7 +946,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
if (ret < 0)
return ret;
- dinfo = open_debuginfo(module, false);
+ dinfo = open_debuginfo(pevs->target, false);
if (!dinfo) {
ret = -ENOENT;
goto out;
@@ -896,8 +955,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
setup_pager();
for (i = 0; i < npevs && ret >= 0; i++)
- ret = show_available_vars_at(dinfo, &pevs[i], max_vls, _filter,
- externs, module);
+ ret = show_available_vars_at(dinfo, &pevs[i], _filter);
debuginfo__delete(dinfo);
out:
@@ -907,6 +965,10 @@ out:
#else /* !HAVE_DWARF_SUPPORT */
+static void debuginfo_cache__exit(void)
+{
+}
+
static int
find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused,
struct perf_probe_point *pp __maybe_unused,
@@ -916,9 +978,7 @@ find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused,
}
static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
- struct probe_trace_event **tevs __maybe_unused,
- int max_tevs __maybe_unused,
- const char *target __maybe_unused)
+ struct probe_trace_event **tevs __maybe_unused)
{
if (perf_probe_event_need_dwarf(pev)) {
pr_warning("Debuginfo-analysis is not supported.\n");
@@ -937,10 +997,8 @@ int show_line_range(struct line_range *lr __maybe_unused,
}
int show_available_vars(struct perf_probe_event *pevs __maybe_unused,
- int npevs __maybe_unused, int max_vls __maybe_unused,
- const char *module __maybe_unused,
- struct strfilter *filter __maybe_unused,
- bool externs __maybe_unused)
+ int npevs __maybe_unused,
+ struct strfilter *filter __maybe_unused)
{
pr_warning("Debuginfo-analysis is not supported.\n");
return -ENOSYS;
@@ -980,6 +1038,18 @@ static int parse_line_num(char **ptr, int *val, const char *what)
return 0;
}
+/* Check the name is good for event, group or function */
+static bool is_c_func_name(const char *name)
+{
+ if (!isalpha(*name) && *name != '_')
+ return false;
+ while (*++name != '\0') {
+ if (!isalpha(*name) && !isdigit(*name) && *name != '_')
+ return false;
+ }
+ return true;
+}
+
/*
* Stuff 'lr' according to the line range described by 'arg'.
* The line range syntax is described by:
@@ -1048,10 +1118,15 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
goto err;
}
lr->function = name;
- } else if (strchr(name, '.'))
+ } else if (strchr(name, '/') || strchr(name, '.'))
lr->file = name;
- else
+ else if (is_c_func_name(name))/* We reuse it for checking funcname */
lr->function = name;
+ else { /* Invalid name */
+ semantic_error("'%s' is not a valid function name.\n", name);
+ err = -EINVAL;
+ goto err;
+ }
return 0;
err:
@@ -1059,24 +1134,13 @@ err:
return err;
}
-/* Check the name is good for event/group */
-static bool check_event_name(const char *name)
-{
- if (!isalpha(*name) && *name != '_')
- return false;
- while (*++name != '\0') {
- if (!isalpha(*name) && !isdigit(*name) && *name != '_')
- return false;
- }
- return true;
-}
-
/* Parse probepoint definition. */
static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
{
struct perf_probe_point *pp = &pev->point;
char *ptr, *tmp;
char c, nc = 0;
+ bool file_spec = false;
/*
* <Syntax>
* perf probe [EVENT=]SRC[:LN|;PTN]
@@ -1084,6 +1148,8 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
*
* TODO:Group name support
*/
+ if (!arg)
+ return -EINVAL;
ptr = strpbrk(arg, ";=@+%");
if (ptr && *ptr == '=') { /* Event name */
@@ -1093,7 +1159,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
semantic_error("Group name is not supported yet.\n");
return -ENOTSUP;
}
- if (!check_event_name(arg)) {
+ if (!is_c_func_name(arg)) {
semantic_error("%s is bad for event name -it must "
"follow C symbol-naming rule.\n", arg);
return -EINVAL;
@@ -1105,22 +1171,60 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
arg = tmp;
}
+ /*
+ * Check arg is function or file name and copy it.
+ *
+ * We consider arg to be a file spec if and only if it satisfies
+ * all of the below criteria::
+ * - it does not include any of "+@%",
+ * - it includes one of ":;", and
+ * - it has a period '.' in the name.
+ *
+ * Otherwise, we consider arg to be a function specification.
+ */
+ if (!strpbrk(arg, "+@%") && (ptr = strpbrk(arg, ";:")) != NULL) {
+ /* This is a file spec if it includes a '.' before ; or : */
+ if (memchr(arg, '.', ptr - arg))
+ file_spec = true;
+ }
+
ptr = strpbrk(arg, ";:+@%");
if (ptr) {
nc = *ptr;
*ptr++ = '\0';
}
- tmp = strdup(arg);
- if (tmp == NULL)
- return -ENOMEM;
+ if (arg[0] == '\0')
+ tmp = NULL;
+ else {
+ tmp = strdup(arg);
+ if (tmp == NULL)
+ return -ENOMEM;
+ }
- /* Check arg is function or file and copy it */
- if (strchr(tmp, '.')) /* File */
+ if (file_spec)
pp->file = tmp;
- else /* Function */
+ else {
pp->function = tmp;
+ /*
+ * Keep pp->function even if this is absolute address,
+ * so it can mark whether abs_address is valid.
+ * Which make 'perf probe lib.bin 0x0' possible.
+ *
+ * Note that checking length of tmp is not needed
+ * because when we access tmp[1] we know tmp[0] is '0',
+ * so tmp[1] should always valid (but could be '\0').
+ */
+ if (tmp && !strncmp(tmp, "0x", 2)) {
+ pp->abs_address = strtoul(pp->function, &tmp, 0);
+ if (*tmp != '\0') {
+ semantic_error("Invalid absolute address.\n");
+ return -EINVAL;
+ }
+ }
+ }
+
/* Parse other options */
while (ptr) {
arg = ptr;
@@ -1382,8 +1486,7 @@ bool perf_probe_event_need_dwarf(struct perf_probe_event *pev)
}
/* Parse probe_events event into struct probe_point */
-static int parse_probe_trace_command(const char *cmd,
- struct probe_trace_event *tev)
+int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev)
{
struct probe_trace_point *tp = &tev->point;
char pr;
@@ -1438,9 +1541,31 @@ static int parse_probe_trace_command(const char *cmd,
} else
p = argv[1];
fmt1_str = strtok_r(p, "+", &fmt);
- if (fmt1_str[0] == '0') /* only the address started with 0x */
- tp->address = strtoul(fmt1_str, NULL, 0);
- else {
+ /* only the address started with 0x */
+ if (fmt1_str[0] == '0') {
+ /*
+ * Fix a special case:
+ * if address == 0, kernel reports something like:
+ * p:probe_libc/abs_0 /lib/libc-2.18.so:0x (null) arg1=%ax
+ * Newer kernel may fix that, but we want to
+ * support old kernel also.
+ */
+ if (strcmp(fmt1_str, "0x") == 0) {
+ if (!argv[2] || strcmp(argv[2], "(null)")) {
+ ret = -EINVAL;
+ goto out;
+ }
+ tp->address = 0;
+
+ free(argv[2]);
+ for (i = 2; argv[i + 1] != NULL; i++)
+ argv[i] = argv[i + 1];
+
+ argv[i] = NULL;
+ argc -= 1;
+ } else
+ tp->address = strtoul(fmt1_str, NULL, 0);
+ } else {
/* Only the symbol-based probe has offset */
tp->symbol = strdup(fmt1_str);
if (tp->symbol == NULL) {
@@ -1697,14 +1822,29 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev)
if (len <= 0)
goto error;
- /* Uprobes must have tp->address and tp->module */
- if (tev->uprobes && (!tp->address || !tp->module))
+ /* Uprobes must have tp->module */
+ if (tev->uprobes && !tp->module)
goto error;
+ /*
+ * If tp->address == 0, then this point must be a
+ * absolute address uprobe.
+ * try_to_find_absolute_address() should have made
+ * tp->symbol to "0x0".
+ */
+ if (tev->uprobes && !tp->address) {
+ if (!tp->symbol || strcmp(tp->symbol, "0x0"))
+ goto error;
+ }
/* Use the tp->address for uprobes */
if (tev->uprobes)
ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s:0x%lx",
tp->module, tp->address);
+ else if (!strncmp(tp->symbol, "0x", 2))
+ /* Absolute address. See try_to_find_absolute_address() */
+ ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s0x%lx",
+ tp->module ?: "", tp->module ? ":" : "",
+ tp->address);
else
ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s%s+%lu",
tp->module ?: "", tp->module ? ":" : "",
@@ -1734,17 +1874,17 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
{
struct symbol *sym = NULL;
struct map *map;
- u64 addr;
+ u64 addr = tp->address;
int ret = -ENOENT;
if (!is_kprobe) {
map = dso__new_map(tp->module);
if (!map)
goto out;
- addr = tp->address;
sym = map__find_symbol(map, addr, NULL);
} else {
- addr = kernel_get_symbol_address_by_name(tp->symbol, true);
+ if (tp->symbol)
+ addr = kernel_get_symbol_address_by_name(tp->symbol, true);
if (addr) {
addr += tp->offset;
sym = __find_kernel_function(addr, &map);
@@ -1760,16 +1900,15 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
out:
if (map && !is_kprobe) {
- dso__delete(map->dso);
- map__delete(map);
+ map__put(map);
}
return ret;
}
static int convert_to_perf_probe_point(struct probe_trace_point *tp,
- struct perf_probe_point *pp,
- bool is_kprobe)
+ struct perf_probe_point *pp,
+ bool is_kprobe)
{
char buf[128];
int ret;
@@ -1786,7 +1925,7 @@ static int convert_to_perf_probe_point(struct probe_trace_point *tp,
if (tp->symbol) {
pp->function = strdup(tp->symbol);
pp->offset = tp->offset;
- } else if (!tp->module && !is_kprobe) {
+ } else {
ret = e_snprintf(buf, 128, "0x%" PRIx64, (u64)tp->address);
if (ret < 0)
return ret;
@@ -1867,7 +2006,7 @@ void clear_perf_probe_event(struct perf_probe_event *pev)
memset(pev, 0, sizeof(*pev));
}
-static void clear_probe_trace_event(struct probe_trace_event *tev)
+void clear_probe_trace_event(struct probe_trace_event *tev)
{
struct probe_trace_arg_ref *ref, *next;
int i;
@@ -1875,6 +2014,7 @@ static void clear_probe_trace_event(struct probe_trace_event *tev)
free(tev->event);
free(tev->group);
free(tev->point.symbol);
+ free(tev->point.realname);
free(tev->point.module);
for (i = 0; i < tev->nargs; i++) {
free(tev->args[i].name);
@@ -1891,119 +2031,6 @@ static void clear_probe_trace_event(struct probe_trace_event *tev)
memset(tev, 0, sizeof(*tev));
}
-static void print_open_warning(int err, bool is_kprobe)
-{
- char sbuf[STRERR_BUFSIZE];
-
- if (err == -ENOENT) {
- const char *config;
-
- if (!is_kprobe)
- config = "CONFIG_UPROBE_EVENTS";
- else
- config = "CONFIG_KPROBE_EVENTS";
-
- pr_warning("%cprobe_events file does not exist"
- " - please rebuild kernel with %s.\n",
- is_kprobe ? 'k' : 'u', config);
- } else if (err == -ENOTSUP)
- pr_warning("Tracefs or debugfs is not mounted.\n");
- else
- pr_warning("Failed to open %cprobe_events: %s\n",
- is_kprobe ? 'k' : 'u',
- strerror_r(-err, sbuf, sizeof(sbuf)));
-}
-
-static void print_both_open_warning(int kerr, int uerr)
-{
- /* Both kprobes and uprobes are disabled, warn it. */
- if (kerr == -ENOTSUP && uerr == -ENOTSUP)
- pr_warning("Tracefs or debugfs is not mounted.\n");
- else if (kerr == -ENOENT && uerr == -ENOENT)
- pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS "
- "or/and CONFIG_UPROBE_EVENTS.\n");
- else {
- char sbuf[STRERR_BUFSIZE];
- pr_warning("Failed to open kprobe events: %s.\n",
- strerror_r(-kerr, sbuf, sizeof(sbuf)));
- pr_warning("Failed to open uprobe events: %s.\n",
- strerror_r(-uerr, sbuf, sizeof(sbuf)));
- }
-}
-
-static int open_probe_events(const char *trace_file, bool readwrite)
-{
- char buf[PATH_MAX];
- const char *__debugfs;
- const char *tracing_dir = "";
- int ret;
-
- __debugfs = tracefs_find_mountpoint();
- if (__debugfs == NULL) {
- tracing_dir = "tracing/";
-
- __debugfs = debugfs_find_mountpoint();
- if (__debugfs == NULL)
- return -ENOTSUP;
- }
-
- ret = e_snprintf(buf, PATH_MAX, "%s/%s%s",
- __debugfs, tracing_dir, trace_file);
- if (ret >= 0) {
- pr_debug("Opening %s write=%d\n", buf, readwrite);
- if (readwrite && !probe_event_dry_run)
- ret = open(buf, O_RDWR, O_APPEND);
- else
- ret = open(buf, O_RDONLY, 0);
-
- if (ret < 0)
- ret = -errno;
- }
- return ret;
-}
-
-static int open_kprobe_events(bool readwrite)
-{
- return open_probe_events("kprobe_events", readwrite);
-}
-
-static int open_uprobe_events(bool readwrite)
-{
- return open_probe_events("uprobe_events", readwrite);
-}
-
-/* Get raw string list of current kprobe_events or uprobe_events */
-static struct strlist *get_probe_trace_command_rawlist(int fd)
-{
- int ret, idx;
- FILE *fp;
- char buf[MAX_CMDLEN];
- char *p;
- struct strlist *sl;
-
- sl = strlist__new(true, NULL);
-
- fp = fdopen(dup(fd), "r");
- while (!feof(fp)) {
- p = fgets(buf, MAX_CMDLEN, fp);
- if (!p)
- break;
-
- idx = strlen(p) - 1;
- if (p[idx] == '\n')
- p[idx] = '\0';
- ret = strlist__add(sl, buf);
- if (ret < 0) {
- pr_debug("strlist__add failed (%d)\n", ret);
- strlist__delete(sl);
- return NULL;
- }
- }
- fclose(fp);
-
- return sl;
-}
-
struct kprobe_blacklist_node {
struct list_head list;
unsigned long start;
@@ -2093,9 +2120,31 @@ kprobe_blacklist__find_by_address(struct list_head *blacklist,
return NULL;
}
-/* Show an event */
-static int show_perf_probe_event(struct perf_probe_event *pev,
- const char *module)
+static LIST_HEAD(kprobe_blacklist);
+
+static void kprobe_blacklist__init(void)
+{
+ if (!list_empty(&kprobe_blacklist))
+ return;
+
+ if (kprobe_blacklist__load(&kprobe_blacklist) < 0)
+ pr_debug("No kprobe blacklist support, ignored\n");
+}
+
+static void kprobe_blacklist__release(void)
+{
+ kprobe_blacklist__delete(&kprobe_blacklist);
+}
+
+static bool kprobe_blacklist__listed(unsigned long address)
+{
+ return !!kprobe_blacklist__find_by_address(&kprobe_blacklist, address);
+}
+
+static int perf_probe_event__sprintf(const char *group, const char *event,
+ struct perf_probe_event *pev,
+ const char *module,
+ struct strbuf *result)
{
int i, ret;
char buf[128];
@@ -2106,30 +2155,67 @@ static int show_perf_probe_event(struct perf_probe_event *pev,
if (!place)
return -EINVAL;
- ret = e_snprintf(buf, 128, "%s:%s", pev->group, pev->event);
+ ret = e_snprintf(buf, 128, "%s:%s", group, event);
if (ret < 0)
- return ret;
+ goto out;
- pr_info(" %-20s (on %s", buf, place);
+ strbuf_addf(result, " %-20s (on %s", buf, place);
if (module)
- pr_info(" in %s", module);
+ strbuf_addf(result, " in %s", module);
if (pev->nargs > 0) {
- pr_info(" with");
+ strbuf_addstr(result, " with");
for (i = 0; i < pev->nargs; i++) {
ret = synthesize_perf_probe_arg(&pev->args[i],
buf, 128);
if (ret < 0)
- break;
- pr_info(" %s", buf);
+ goto out;
+ strbuf_addf(result, " %s", buf);
}
}
- pr_info(")\n");
+ strbuf_addch(result, ')');
+out:
free(place);
return ret;
}
-static int __show_perf_probe_events(int fd, bool is_kprobe)
+/* Show an event */
+static int show_perf_probe_event(const char *group, const char *event,
+ struct perf_probe_event *pev,
+ const char *module, bool use_stdout)
+{
+ struct strbuf buf = STRBUF_INIT;
+ int ret;
+
+ ret = perf_probe_event__sprintf(group, event, pev, module, &buf);
+ if (ret >= 0) {
+ if (use_stdout)
+ printf("%s\n", buf.buf);
+ else
+ pr_info("%s\n", buf.buf);
+ }
+ strbuf_release(&buf);
+
+ return ret;
+}
+
+static bool filter_probe_trace_event(struct probe_trace_event *tev,
+ struct strfilter *filter)
+{
+ char tmp[128];
+
+ /* At first, check the event name itself */
+ if (strfilter__compare(filter, tev->event))
+ return true;
+
+ /* Next, check the combination of name and group */
+ if (e_snprintf(tmp, 128, "%s:%s", tev->group, tev->event) < 0)
+ return false;
+ return strfilter__compare(filter, tmp);
+}
+
+static int __show_perf_probe_events(int fd, bool is_kprobe,
+ struct strfilter *filter)
{
int ret = 0;
struct probe_trace_event tev;
@@ -2140,31 +2226,38 @@ static int __show_perf_probe_events(int fd, bool is_kprobe)
memset(&tev, 0, sizeof(tev));
memset(&pev, 0, sizeof(pev));
- rawlist = get_probe_trace_command_rawlist(fd);
+ rawlist = probe_file__get_rawlist(fd);
if (!rawlist)
return -ENOMEM;
strlist__for_each(ent, rawlist) {
ret = parse_probe_trace_command(ent->s, &tev);
if (ret >= 0) {
+ if (!filter_probe_trace_event(&tev, filter))
+ goto next;
ret = convert_to_perf_probe_event(&tev, &pev,
is_kprobe);
- if (ret >= 0)
- ret = show_perf_probe_event(&pev,
- tev.point.module);
+ if (ret < 0)
+ goto next;
+ ret = show_perf_probe_event(pev.group, pev.event,
+ &pev, tev.point.module,
+ true);
}
+next:
clear_perf_probe_event(&pev);
clear_probe_trace_event(&tev);
if (ret < 0)
break;
}
strlist__delete(rawlist);
+ /* Cleanup cached debuginfo if needed */
+ debuginfo_cache__exit();
return ret;
}
/* List up current perf-probe events */
-int show_perf_probe_events(void)
+int show_perf_probe_events(struct strfilter *filter)
{
int kp_fd, up_fd, ret;
@@ -2174,89 +2267,20 @@ int show_perf_probe_events(void)
if (ret < 0)
return ret;
- kp_fd = open_kprobe_events(false);
- if (kp_fd >= 0) {
- ret = __show_perf_probe_events(kp_fd, true);
- close(kp_fd);
- if (ret < 0)
- goto out;
- }
-
- up_fd = open_uprobe_events(false);
- if (kp_fd < 0 && up_fd < 0) {
- print_both_open_warning(kp_fd, up_fd);
- ret = kp_fd;
- goto out;
- }
+ ret = probe_file__open_both(&kp_fd, &up_fd, 0);
+ if (ret < 0)
+ return ret;
- if (up_fd >= 0) {
- ret = __show_perf_probe_events(up_fd, false);
+ if (kp_fd >= 0)
+ ret = __show_perf_probe_events(kp_fd, true, filter);
+ if (up_fd >= 0 && ret >= 0)
+ ret = __show_perf_probe_events(up_fd, false, filter);
+ if (kp_fd > 0)
+ close(kp_fd);
+ if (up_fd > 0)
close(up_fd);
- }
-out:
exit_symbol_maps();
- return ret;
-}
-
-/* Get current perf-probe event names */
-static struct strlist *get_probe_trace_event_names(int fd, bool include_group)
-{
- char buf[128];
- struct strlist *sl, *rawlist;
- struct str_node *ent;
- struct probe_trace_event tev;
- int ret = 0;
-
- memset(&tev, 0, sizeof(tev));
- rawlist = get_probe_trace_command_rawlist(fd);
- if (!rawlist)
- return NULL;
- sl = strlist__new(true, NULL);
- strlist__for_each(ent, rawlist) {
- ret = parse_probe_trace_command(ent->s, &tev);
- if (ret < 0)
- break;
- if (include_group) {
- ret = e_snprintf(buf, 128, "%s:%s", tev.group,
- tev.event);
- if (ret >= 0)
- ret = strlist__add(sl, buf);
- } else
- ret = strlist__add(sl, tev.event);
- clear_probe_trace_event(&tev);
- if (ret < 0)
- break;
- }
- strlist__delete(rawlist);
-
- if (ret < 0) {
- strlist__delete(sl);
- return NULL;
- }
- return sl;
-}
-
-static int write_probe_trace_event(int fd, struct probe_trace_event *tev)
-{
- int ret = 0;
- char *buf = synthesize_probe_trace_command(tev);
- char sbuf[STRERR_BUFSIZE];
-
- if (!buf) {
- pr_debug("Failed to synthesize probe trace event.\n");
- return -EINVAL;
- }
- pr_debug("Writing event: %s\n", buf);
- if (!probe_event_dry_run) {
- ret = write(fd, buf, strlen(buf));
- if (ret <= 0) {
- ret = -errno;
- pr_warning("Failed to write event: %s\n",
- strerror_r(errno, sbuf, sizeof(sbuf)));
- }
- }
- free(buf);
return ret;
}
@@ -2264,6 +2288,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
struct strlist *namelist, bool allow_suffix)
{
int i, ret;
+ char *p;
+
+ if (*base == '.')
+ base++;
/* Try no suffix */
ret = e_snprintf(buf, len, "%s", base);
@@ -2271,6 +2299,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
pr_debug("snprintf() failed: %d\n", ret);
return ret;
}
+ /* Cut off the postfixes (e.g. .const, .isra)*/
+ p = strchr(buf, '.');
+ if (p && p != buf)
+ *p = '\0';
if (!strlist__has_entry(namelist, buf))
return 0;
@@ -2319,93 +2351,93 @@ out:
free(buf);
}
+/* Set new name from original perf_probe_event and namelist */
+static int probe_trace_event__set_name(struct probe_trace_event *tev,
+ struct perf_probe_event *pev,
+ struct strlist *namelist,
+ bool allow_suffix)
+{
+ const char *event, *group;
+ char buf[64];
+ int ret;
+
+ if (pev->event)
+ event = pev->event;
+ else
+ if (pev->point.function &&
+ (strncmp(pev->point.function, "0x", 2) != 0) &&
+ !strisglob(pev->point.function))
+ event = pev->point.function;
+ else
+ event = tev->point.realname;
+ if (pev->group)
+ group = pev->group;
+ else
+ group = PERFPROBE_GROUP;
+
+ /* Get an unused new event name */
+ ret = get_new_event_name(buf, 64, event,
+ namelist, allow_suffix);
+ if (ret < 0)
+ return ret;
+
+ event = buf;
+
+ tev->event = strdup(event);
+ tev->group = strdup(group);
+ if (tev->event == NULL || tev->group == NULL)
+ return -ENOMEM;
+
+ /* Add added event name to namelist */
+ strlist__add(namelist, event);
+ return 0;
+}
+
static int __add_probe_trace_events(struct perf_probe_event *pev,
struct probe_trace_event *tevs,
int ntevs, bool allow_suffix)
{
int i, fd, ret;
struct probe_trace_event *tev = NULL;
- char buf[64];
- const char *event, *group;
+ const char *event = NULL, *group = NULL;
struct strlist *namelist;
- LIST_HEAD(blacklist);
- struct kprobe_blacklist_node *node;
-
- if (pev->uprobes)
- fd = open_uprobe_events(true);
- else
- fd = open_kprobe_events(true);
- if (fd < 0) {
- print_open_warning(fd, !pev->uprobes);
+ fd = probe_file__open(PF_FL_RW | (pev->uprobes ? PF_FL_UPROBE : 0));
+ if (fd < 0)
return fd;
- }
/* Get current event names */
- namelist = get_probe_trace_event_names(fd, false);
+ namelist = probe_file__get_namelist(fd);
if (!namelist) {
pr_debug("Failed to get current event list.\n");
- return -EIO;
- }
- /* Get kprobe blacklist if exists */
- if (!pev->uprobes) {
- ret = kprobe_blacklist__load(&blacklist);
- if (ret < 0)
- pr_debug("No kprobe blacklist support, ignored\n");
+ ret = -ENOMEM;
+ goto close_out;
}
ret = 0;
pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":");
for (i = 0; i < ntevs; i++) {
tev = &tevs[i];
- /* Ensure that the address is NOT blacklisted */
- node = kprobe_blacklist__find_by_address(&blacklist,
- tev->point.address);
- if (node) {
- pr_warning("Warning: Skipped probing on blacklisted function: %s\n", node->symbol);
+ /* Skip if the symbol is out of .text or blacklisted */
+ if (!tev->point.symbol)
continue;
- }
- if (pev->event)
- event = pev->event;
- else
- if (pev->point.function)
- event = pev->point.function;
- else
- event = tev->point.symbol;
- if (pev->group)
- group = pev->group;
- else
- group = PERFPROBE_GROUP;
-
- /* Get an unused new event name */
- ret = get_new_event_name(buf, 64, event,
- namelist, allow_suffix);
+ /* Set new name for tev (and update namelist) */
+ ret = probe_trace_event__set_name(tev, pev, namelist,
+ allow_suffix);
if (ret < 0)
break;
- event = buf;
- tev->event = strdup(event);
- tev->group = strdup(group);
- if (tev->event == NULL || tev->group == NULL) {
- ret = -ENOMEM;
- break;
- }
- ret = write_probe_trace_event(fd, tev);
+ ret = probe_file__add_event(fd, tev);
if (ret < 0)
break;
- /* Add added event name to namelist */
- strlist__add(namelist, event);
- /* Trick here - save current event/group */
- event = pev->event;
- group = pev->group;
- pev->event = tev->event;
- pev->group = tev->group;
- show_perf_probe_event(pev, tev->point.module);
- /* Trick here - restore current event/group */
- pev->event = (char *)event;
- pev->group = (char *)group;
+ /* We use tev's name for showing new events */
+ show_perf_probe_event(tev->group, tev->event, pev,
+ tev->point.module, false);
+ /* Save the last valid name */
+ event = tev->event;
+ group = tev->group;
/*
* Probes after the first probe which comes from same
@@ -2419,26 +2451,34 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
warn_uprobe_event_compat(tev);
/* Note that it is possible to skip all events because of blacklist */
- if (ret >= 0 && tev->event) {
+ if (ret >= 0 && event) {
/* Show how to use the event. */
pr_info("\nYou can now use it in all perf tools, such as:\n\n");
- pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group,
- tev->event);
+ pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", group, event);
}
- kprobe_blacklist__delete(&blacklist);
strlist__delete(namelist);
+close_out:
close(fd);
return ret;
}
-static int find_probe_functions(struct map *map, char *name)
+static int find_probe_functions(struct map *map, char *name,
+ struct symbol **syms)
{
int found = 0;
struct symbol *sym;
+ struct rb_node *tmp;
- map__for_each_symbol_by_name(map, name, sym) {
- found++;
+ if (map__load(map, NULL) < 0)
+ return 0;
+
+ map__for_each_symbol(map, sym, tmp) {
+ if (strglobmatch(sym->name, name)) {
+ found++;
+ if (syms && found < probe_conf.max_probes)
+ syms[found - 1] = sym;
+ }
}
return found;
@@ -2447,42 +2487,52 @@ static int find_probe_functions(struct map *map, char *name)
#define strdup_or_goto(str, label) \
({ char *__p = strdup(str); if (!__p) goto label; __p; })
+void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
+ struct probe_trace_event *tev __maybe_unused,
+ struct map *map __maybe_unused) { }
+
/*
* Find probe function addresses from map.
* Return an error or the number of found probe_trace_event
*/
static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
- struct probe_trace_event **tevs,
- int max_tevs, const char *target)
+ struct probe_trace_event **tevs)
{
struct map *map = NULL;
struct ref_reloc_sym *reloc_sym = NULL;
struct symbol *sym;
+ struct symbol **syms = NULL;
struct probe_trace_event *tev;
struct perf_probe_point *pp = &pev->point;
struct probe_trace_point *tp;
int num_matched_functions;
- int ret, i;
+ int ret, i, j, skipped = 0;
- map = get_target_map(target, pev->uprobes);
+ map = get_target_map(pev->target, pev->uprobes);
if (!map) {
ret = -EINVAL;
goto out;
}
+ syms = malloc(sizeof(struct symbol *) * probe_conf.max_probes);
+ if (!syms) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
/*
* Load matched symbols: Since the different local symbols may have
* same name but different addresses, this lists all the symbols.
*/
- num_matched_functions = find_probe_functions(map, pp->function);
+ num_matched_functions = find_probe_functions(map, pp->function, syms);
if (num_matched_functions == 0) {
pr_err("Failed to find symbol %s in %s\n", pp->function,
- target ? : "kernel");
+ pev->target ? : "kernel");
ret = -ENOENT;
goto out;
- } else if (num_matched_functions > max_tevs) {
+ } else if (num_matched_functions > probe_conf.max_probes) {
pr_err("Too many functions matched in %s\n",
- target ? : "kernel");
+ pev->target ? : "kernel");
ret = -E2BIG;
goto out;
}
@@ -2505,7 +2555,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
ret = 0;
- map__for_each_symbol_by_name(map, pp->function, sym) {
+ for (j = 0; j < num_matched_functions; j++) {
+ sym = syms[j];
+
tev = (*tevs) + ret;
tp = &tev->point;
if (ret == num_matched_functions) {
@@ -2522,16 +2574,24 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
}
/* Add one probe point */
tp->address = map->unmap_ip(map, sym->start) + pp->offset;
- if (reloc_sym) {
+ /* If we found a wrong one, mark it by NULL symbol */
+ if (!pev->uprobes &&
+ kprobe_warn_out_range(sym->name, tp->address)) {
+ tp->symbol = NULL; /* Skip it */
+ skipped++;
+ } else if (reloc_sym) {
tp->symbol = strdup_or_goto(reloc_sym->name, nomem_out);
tp->offset = tp->address - reloc_sym->addr;
} else {
tp->symbol = strdup_or_goto(sym->name, nomem_out);
tp->offset = pp->offset;
}
+ tp->realname = strdup_or_goto(sym->name, nomem_out);
+
tp->retprobe = pp->retprobe;
- if (target)
- tev->point.module = strdup_or_goto(target, nomem_out);
+ if (pev->target)
+ tev->point.module = strdup_or_goto(pev->target,
+ nomem_out);
tev->uprobes = pev->uprobes;
tev->nargs = pev->nargs;
if (tev->nargs) {
@@ -2553,10 +2613,16 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
strdup_or_goto(pev->args[i].type,
nomem_out);
}
+ arch__fix_tev_from_maps(pev, tev, map);
+ }
+ if (ret == skipped) {
+ ret = -ENOENT;
+ goto err_out;
}
out:
put_target_map(map, pev->uprobes);
+ free(syms);
return ret;
nomem_out:
@@ -2567,27 +2633,130 @@ err_out:
goto out;
}
+static int try_to_find_absolute_address(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ struct perf_probe_point *pp = &pev->point;
+ struct probe_trace_event *tev;
+ struct probe_trace_point *tp;
+ int i, err;
+
+ if (!(pev->point.function && !strncmp(pev->point.function, "0x", 2)))
+ return -EINVAL;
+ if (perf_probe_event_need_dwarf(pev))
+ return -EINVAL;
+
+ /*
+ * This is 'perf probe /lib/libc.so 0xabcd'. Try to probe at
+ * absolute address.
+ *
+ * Only one tev can be generated by this.
+ */
+ *tevs = zalloc(sizeof(*tev));
+ if (!*tevs)
+ return -ENOMEM;
+
+ tev = *tevs;
+ tp = &tev->point;
+
+ /*
+ * Don't use tp->offset, use address directly, because
+ * in synthesize_probe_trace_command() address cannot be
+ * zero.
+ */
+ tp->address = pev->point.abs_address;
+ tp->retprobe = pp->retprobe;
+ tev->uprobes = pev->uprobes;
+
+ err = -ENOMEM;
+ /*
+ * Give it a '0x' leading symbol name.
+ * In __add_probe_trace_events, a NULL symbol is interpreted as
+ * invalud.
+ */
+ if (asprintf(&tp->symbol, "0x%lx", tp->address) < 0)
+ goto errout;
+
+ /* For kprobe, check range */
+ if ((!tev->uprobes) &&
+ (kprobe_warn_out_range(tev->point.symbol,
+ tev->point.address))) {
+ err = -EACCES;
+ goto errout;
+ }
+
+ if (asprintf(&tp->realname, "abs_%lx", tp->address) < 0)
+ goto errout;
+
+ if (pev->target) {
+ tp->module = strdup(pev->target);
+ if (!tp->module)
+ goto errout;
+ }
+
+ if (tev->group) {
+ tev->group = strdup(pev->group);
+ if (!tev->group)
+ goto errout;
+ }
+
+ if (pev->event) {
+ tev->event = strdup(pev->event);
+ if (!tev->event)
+ goto errout;
+ }
+
+ tev->nargs = pev->nargs;
+ tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+ if (!tev->args) {
+ err = -ENOMEM;
+ goto errout;
+ }
+ for (i = 0; i < tev->nargs; i++)
+ copy_to_probe_trace_arg(&tev->args[i], &pev->args[i]);
+
+ return 1;
+
+errout:
+ if (*tevs) {
+ clear_probe_trace_events(*tevs, 1);
+ *tevs = NULL;
+ }
+ return err;
+}
+
+bool __weak arch__prefers_symtab(void) { return false; }
+
static int convert_to_probe_trace_events(struct perf_probe_event *pev,
- struct probe_trace_event **tevs,
- int max_tevs, const char *target)
+ struct probe_trace_event **tevs)
{
int ret;
if (pev->uprobes && !pev->group) {
/* Replace group name if not given */
- ret = convert_exec_to_group(target, &pev->group);
+ ret = convert_exec_to_group(pev->target, &pev->group);
if (ret != 0) {
pr_warning("Failed to make a group name.\n");
return ret;
}
}
+ ret = try_to_find_absolute_address(pev, tevs);
+ if (ret > 0)
+ return ret;
+
+ if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
+ ret = find_probe_trace_events_from_map(pev, tevs);
+ if (ret > 0)
+ return ret; /* Found in symbol table */
+ }
+
/* Convert perf_probe_event with debuginfo */
- ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, target);
+ ret = try_to_find_probe_trace_events(pev, tevs);
if (ret != 0)
return ret; /* Found in debuginfo or got an error */
- return find_probe_trace_events_from_map(pev, tevs, max_tevs, target);
+ return find_probe_trace_events_from_map(pev, tevs);
}
struct __event_package {
@@ -2596,8 +2765,7 @@ struct __event_package {
int ntevs;
};
-int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
- int max_tevs, bool force_add)
+int add_perf_probe_events(struct perf_probe_event *pevs, int npevs)
{
int i, j, ret;
struct __event_package *pkgs;
@@ -2617,20 +2785,24 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
/* Loop 1: convert all events */
for (i = 0; i < npevs; i++) {
pkgs[i].pev = &pevs[i];
+ /* Init kprobe blacklist if needed */
+ if (!pkgs[i].pev->uprobes)
+ kprobe_blacklist__init();
/* Convert with or without debuginfo */
ret = convert_to_probe_trace_events(pkgs[i].pev,
- &pkgs[i].tevs,
- max_tevs,
- pkgs[i].pev->target);
+ &pkgs[i].tevs);
if (ret < 0)
goto end;
pkgs[i].ntevs = ret;
}
+ /* This just release blacklist only if allocated */
+ kprobe_blacklist__release();
/* Loop 2: add all events */
for (i = 0; i < npevs; i++) {
ret = __add_probe_trace_events(pkgs[i].pev, pkgs[i].tevs,
- pkgs[i].ntevs, force_add);
+ pkgs[i].ntevs,
+ probe_conf.force_add);
if (ret < 0)
break;
}
@@ -2647,141 +2819,42 @@ end:
return ret;
}
-static int __del_trace_probe_event(int fd, struct str_node *ent)
-{
- char *p;
- char buf[128];
- int ret;
-
- /* Convert from perf-probe event to trace-probe event */
- ret = e_snprintf(buf, 128, "-:%s", ent->s);
- if (ret < 0)
- goto error;
-
- p = strchr(buf + 2, ':');
- if (!p) {
- pr_debug("Internal error: %s should have ':' but not.\n",
- ent->s);
- ret = -ENOTSUP;
- goto error;
- }
- *p = '/';
-
- pr_debug("Writing event: %s\n", buf);
- ret = write(fd, buf, strlen(buf));
- if (ret < 0) {
- ret = -errno;
- goto error;
- }
-
- pr_info("Removed event: %s\n", ent->s);
- return 0;
-error:
- pr_warning("Failed to delete event: %s\n",
- strerror_r(-ret, buf, sizeof(buf)));
- return ret;
-}
-
-static int del_trace_probe_event(int fd, const char *buf,
- struct strlist *namelist)
+int del_perf_probe_events(struct strfilter *filter)
{
- struct str_node *ent, *n;
- int ret = -1;
+ int ret, ret2, ufd = -1, kfd = -1;
+ char *str = strfilter__string(filter);
- if (strpbrk(buf, "*?")) { /* Glob-exp */
- strlist__for_each_safe(ent, n, namelist)
- if (strglobmatch(ent->s, buf)) {
- ret = __del_trace_probe_event(fd, ent);
- if (ret < 0)
- break;
- strlist__remove(namelist, ent);
- }
- } else {
- ent = strlist__find(namelist, buf);
- if (ent) {
- ret = __del_trace_probe_event(fd, ent);
- if (ret >= 0)
- strlist__remove(namelist, ent);
- }
- }
-
- return ret;
-}
+ if (!str)
+ return -EINVAL;
-int del_perf_probe_events(struct strlist *dellist)
-{
- int ret = -1, ufd = -1, kfd = -1;
- char buf[128];
- const char *group, *event;
- char *p, *str;
- struct str_node *ent;
- struct strlist *namelist = NULL, *unamelist = NULL;
+ pr_debug("Delete filter: \'%s\'\n", str);
/* Get current event names */
- kfd = open_kprobe_events(true);
- if (kfd >= 0)
- namelist = get_probe_trace_event_names(kfd, true);
-
- ufd = open_uprobe_events(true);
- if (ufd >= 0)
- unamelist = get_probe_trace_event_names(ufd, true);
+ ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW);
+ if (ret < 0)
+ goto out;
- if (kfd < 0 && ufd < 0) {
- print_both_open_warning(kfd, ufd);
+ ret = probe_file__del_events(kfd, filter);
+ if (ret < 0 && ret != -ENOENT)
goto error;
- }
- if (namelist == NULL && unamelist == NULL)
+ ret2 = probe_file__del_events(ufd, filter);
+ if (ret2 < 0 && ret2 != -ENOENT) {
+ ret = ret2;
goto error;
-
- strlist__for_each(ent, dellist) {
- str = strdup(ent->s);
- if (str == NULL) {
- ret = -ENOMEM;
- goto error;
- }
- pr_debug("Parsing: %s\n", str);
- p = strchr(str, ':');
- if (p) {
- group = str;
- *p = '\0';
- event = p + 1;
- } else {
- group = "*";
- event = str;
- }
-
- ret = e_snprintf(buf, 128, "%s:%s", group, event);
- if (ret < 0) {
- pr_err("Failed to copy event.");
- free(str);
- goto error;
- }
-
- pr_debug("Group: %s, Event: %s\n", group, event);
-
- if (namelist)
- ret = del_trace_probe_event(kfd, buf, namelist);
-
- if (unamelist && ret != 0)
- ret = del_trace_probe_event(ufd, buf, unamelist);
-
- if (ret != 0)
- pr_info("Info: Event \"%s\" does not exist.\n", buf);
-
- free(str);
}
+ if (ret == -ENOENT && ret2 == -ENOENT)
+ pr_debug("\"%s\" does not hit any event.\n", str);
+ /* Note that this is silently ignored */
+ ret = 0;
error:
- if (kfd >= 0) {
- strlist__delete(namelist);
+ if (kfd >= 0)
close(kfd);
- }
-
- if (ufd >= 0) {
- strlist__delete(unamelist);
+ if (ufd >= 0)
close(ufd);
- }
+out:
+ free(str);
return ret;
}
@@ -2835,11 +2908,29 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
dso__fprintf_symbols_by_name(map->dso, map->type, stdout);
end:
if (user) {
- dso__delete(map->dso);
- map__delete(map);
+ map__put(map);
}
exit_symbol_maps();
return ret;
}
+int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
+ struct perf_probe_arg *pvar)
+{
+ tvar->value = strdup(pvar->var);
+ if (tvar->value == NULL)
+ return -ENOMEM;
+ if (pvar->type) {
+ tvar->type = strdup(pvar->type);
+ if (tvar->type == NULL)
+ return -ENOMEM;
+ }
+ if (pvar->name) {
+ tvar->name = strdup(pvar->name);
+ if (tvar->name == NULL)
+ return -ENOMEM;
+ } else
+ tvar->name = NULL;
+ return 0;
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index d6b7834..6e7ec68 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -6,10 +6,20 @@
#include "strlist.h"
#include "strfilter.h"
+/* Probe related configurations */
+struct probe_conf {
+ bool show_ext_vars;
+ bool show_location_range;
+ bool force_add;
+ bool no_inlines;
+ int max_probes;
+};
+extern struct probe_conf probe_conf;
extern bool probe_event_dry_run;
/* kprobe-tracer and uprobe-tracer tracing point */
struct probe_trace_point {
+ char *realname; /* function real name (if needed) */
char *symbol; /* Base symbol */
char *module; /* Module name */
unsigned long offset; /* Offset from symbol */
@@ -49,6 +59,7 @@ struct perf_probe_point {
bool retprobe; /* Return probe flag */
char *lazy_line; /* Lazy matching pattern */
unsigned long offset; /* Offset from function entry */
+ unsigned long abs_address; /* Absolute address of the point */
};
/* Perf probe probing argument field chain */
@@ -96,9 +107,13 @@ struct variable_list {
struct strlist *vars; /* Available variables */
};
+struct map;
+
/* Command string to events */
extern int parse_perf_probe_command(const char *cmd,
struct perf_probe_event *pev);
+extern int parse_probe_trace_command(const char *cmd,
+ struct probe_trace_event *tev);
/* Events to command string */
extern char *synthesize_perf_probe_command(struct perf_probe_event *pev);
@@ -111,6 +126,7 @@ extern bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
/* Release event contents */
extern void clear_perf_probe_event(struct perf_probe_event *pev);
+extern void clear_probe_trace_event(struct probe_trace_event *tev);
/* Command string to line-range */
extern int parse_line_range_desc(const char *cmd, struct line_range *lr);
@@ -121,22 +137,27 @@ extern void line_range__clear(struct line_range *lr);
/* Initialize line range */
extern int line_range__init(struct line_range *lr);
-/* Internal use: Return kernel/module path */
-extern const char *kernel_get_module_path(const char *module);
-
-extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
- int max_probe_points, bool force_add);
-extern int del_perf_probe_events(struct strlist *dellist);
-extern int show_perf_probe_events(void);
+extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+extern int del_perf_probe_events(struct strfilter *filter);
+extern int show_perf_probe_events(struct strfilter *filter);
extern int show_line_range(struct line_range *lr, const char *module,
bool user);
extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
- int max_probe_points, const char *module,
- struct strfilter *filter, bool externs);
+ struct strfilter *filter);
extern int show_available_funcs(const char *module, struct strfilter *filter,
bool user);
+bool arch__prefers_symtab(void);
+void arch__fix_tev_from_maps(struct perf_probe_event *pev,
+ struct probe_trace_event *tev, struct map *map);
+
+/* If there is no space to write, returns -E2BIG. */
+int e_snprintf(char *str, size_t size, const char *format, ...)
+ __attribute__((format(printf, 3, 4)));
/* Maximum index number of event-name postfix */
#define MAX_EVENT_INDEX 1024
+int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
+ struct perf_probe_arg *pvar);
+
#endif /*_PROBE_EVENT_H */
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
new file mode 100644
index 0000000..bbb2437
--- /dev/null
+++ b/tools/perf/util/probe-file.c
@@ -0,0 +1,301 @@
+/*
+ * probe-file.c : operate ftrace k/uprobe events files
+ *
+ * Written by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include "util.h"
+#include "event.h"
+#include "strlist.h"
+#include "debug.h"
+#include "cache.h"
+#include "color.h"
+#include "symbol.h"
+#include "thread.h"
+#include <api/fs/debugfs.h>
+#include <api/fs/tracefs.h>
+#include "probe-event.h"
+#include "probe-file.h"
+#include "session.h"
+
+#define MAX_CMDLEN 256
+
+static void print_open_warning(int err, bool uprobe)
+{
+ char sbuf[STRERR_BUFSIZE];
+
+ if (err == -ENOENT) {
+ const char *config;
+
+ if (uprobe)
+ config = "CONFIG_UPROBE_EVENTS";
+ else
+ config = "CONFIG_KPROBE_EVENTS";
+
+ pr_warning("%cprobe_events file does not exist"
+ " - please rebuild kernel with %s.\n",
+ uprobe ? 'u' : 'k', config);
+ } else if (err == -ENOTSUP)
+ pr_warning("Tracefs or debugfs is not mounted.\n");
+ else
+ pr_warning("Failed to open %cprobe_events: %s\n",
+ uprobe ? 'u' : 'k',
+ strerror_r(-err, sbuf, sizeof(sbuf)));
+}
+
+static void print_both_open_warning(int kerr, int uerr)
+{
+ /* Both kprobes and uprobes are disabled, warn it. */
+ if (kerr == -ENOTSUP && uerr == -ENOTSUP)
+ pr_warning("Tracefs or debugfs is not mounted.\n");
+ else if (kerr == -ENOENT && uerr == -ENOENT)
+ pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS "
+ "or/and CONFIG_UPROBE_EVENTS.\n");
+ else {
+ char sbuf[STRERR_BUFSIZE];
+ pr_warning("Failed to open kprobe events: %s.\n",
+ strerror_r(-kerr, sbuf, sizeof(sbuf)));
+ pr_warning("Failed to open uprobe events: %s.\n",
+ strerror_r(-uerr, sbuf, sizeof(sbuf)));
+ }
+}
+
+static int open_probe_events(const char *trace_file, bool readwrite)
+{
+ char buf[PATH_MAX];
+ const char *__debugfs;
+ const char *tracing_dir = "";
+ int ret;
+
+ __debugfs = tracefs_find_mountpoint();
+ if (__debugfs == NULL) {
+ tracing_dir = "tracing/";
+
+ __debugfs = debugfs_find_mountpoint();
+ if (__debugfs == NULL)
+ return -ENOTSUP;
+ }
+
+ ret = e_snprintf(buf, PATH_MAX, "%s/%s%s",
+ __debugfs, tracing_dir, trace_file);
+ if (ret >= 0) {
+ pr_debug("Opening %s write=%d\n", buf, readwrite);
+ if (readwrite && !probe_event_dry_run)
+ ret = open(buf, O_RDWR | O_APPEND, 0);
+ else
+ ret = open(buf, O_RDONLY, 0);
+
+ if (ret < 0)
+ ret = -errno;
+ }
+ return ret;
+}
+
+static int open_kprobe_events(bool readwrite)
+{
+ return open_probe_events("kprobe_events", readwrite);
+}
+
+static int open_uprobe_events(bool readwrite)
+{
+ return open_probe_events("uprobe_events", readwrite);
+}
+
+int probe_file__open(int flag)
+{
+ int fd;
+
+ if (flag & PF_FL_UPROBE)
+ fd = open_uprobe_events(flag & PF_FL_RW);
+ else
+ fd = open_kprobe_events(flag & PF_FL_RW);
+ if (fd < 0)
+ print_open_warning(fd, flag & PF_FL_UPROBE);
+
+ return fd;
+}
+
+int probe_file__open_both(int *kfd, int *ufd, int flag)
+{
+ if (!kfd || !ufd)
+ return -EINVAL;
+
+ *kfd = open_kprobe_events(flag & PF_FL_RW);
+ *ufd = open_uprobe_events(flag & PF_FL_RW);
+ if (*kfd < 0 && *ufd < 0) {
+ print_both_open_warning(*kfd, *ufd);
+ return *kfd;
+ }
+
+ return 0;
+}
+
+/* Get raw string list of current kprobe_events or uprobe_events */
+struct strlist *probe_file__get_rawlist(int fd)
+{
+ int ret, idx;
+ FILE *fp;
+ char buf[MAX_CMDLEN];
+ char *p;
+ struct strlist *sl;
+
+ sl = strlist__new(NULL, NULL);
+
+ fp = fdopen(dup(fd), "r");
+ while (!feof(fp)) {
+ p = fgets(buf, MAX_CMDLEN, fp);
+ if (!p)
+ break;
+
+ idx = strlen(p) - 1;
+ if (p[idx] == '\n')
+ p[idx] = '\0';
+ ret = strlist__add(sl, buf);
+ if (ret < 0) {
+ pr_debug("strlist__add failed (%d)\n", ret);
+ strlist__delete(sl);
+ return NULL;
+ }
+ }
+ fclose(fp);
+
+ return sl;
+}
+
+static struct strlist *__probe_file__get_namelist(int fd, bool include_group)
+{
+ char buf[128];
+ struct strlist *sl, *rawlist;
+ struct str_node *ent;
+ struct probe_trace_event tev;
+ int ret = 0;
+
+ memset(&tev, 0, sizeof(tev));
+ rawlist = probe_file__get_rawlist(fd);
+ if (!rawlist)
+ return NULL;
+ sl = strlist__new(NULL, NULL);
+ strlist__for_each(ent, rawlist) {
+ ret = parse_probe_trace_command(ent->s, &tev);
+ if (ret < 0)
+ break;
+ if (include_group) {
+ ret = e_snprintf(buf, 128, "%s:%s", tev.group,
+ tev.event);
+ if (ret >= 0)
+ ret = strlist__add(sl, buf);
+ } else
+ ret = strlist__add(sl, tev.event);
+ clear_probe_trace_event(&tev);
+ if (ret < 0)
+ break;
+ }
+ strlist__delete(rawlist);
+
+ if (ret < 0) {
+ strlist__delete(sl);
+ return NULL;
+ }
+ return sl;
+}
+
+/* Get current perf-probe event names */
+struct strlist *probe_file__get_namelist(int fd)
+{
+ return __probe_file__get_namelist(fd, false);
+}
+
+int probe_file__add_event(int fd, struct probe_trace_event *tev)
+{
+ int ret = 0;
+ char *buf = synthesize_probe_trace_command(tev);
+ char sbuf[STRERR_BUFSIZE];
+
+ if (!buf) {
+ pr_debug("Failed to synthesize probe trace event.\n");
+ return -EINVAL;
+ }
+
+ pr_debug("Writing event: %s\n", buf);
+ if (!probe_event_dry_run) {
+ ret = write(fd, buf, strlen(buf));
+ if (ret <= 0) {
+ ret = -errno;
+ pr_warning("Failed to write event: %s\n",
+ strerror_r(errno, sbuf, sizeof(sbuf)));
+ }
+ }
+ free(buf);
+
+ return ret;
+}
+
+static int __del_trace_probe_event(int fd, struct str_node *ent)
+{
+ char *p;
+ char buf[128];
+ int ret;
+
+ /* Convert from perf-probe event to trace-probe event */
+ ret = e_snprintf(buf, 128, "-:%s", ent->s);
+ if (ret < 0)
+ goto error;
+
+ p = strchr(buf + 2, ':');
+ if (!p) {
+ pr_debug("Internal error: %s should have ':' but not.\n",
+ ent->s);
+ ret = -ENOTSUP;
+ goto error;
+ }
+ *p = '/';
+
+ pr_debug("Writing event: %s\n", buf);
+ ret = write(fd, buf, strlen(buf));
+ if (ret < 0) {
+ ret = -errno;
+ goto error;
+ }
+
+ pr_info("Removed event: %s\n", ent->s);
+ return 0;
+error:
+ pr_warning("Failed to delete event: %s\n",
+ strerror_r(-ret, buf, sizeof(buf)));
+ return ret;
+}
+
+int probe_file__del_events(int fd, struct strfilter *filter)
+{
+ struct strlist *namelist;
+ struct str_node *ent;
+ const char *p;
+ int ret = -ENOENT;
+
+ namelist = __probe_file__get_namelist(fd, true);
+ if (!namelist)
+ return -ENOENT;
+
+ strlist__for_each(ent, namelist) {
+ p = strchr(ent->s, ':');
+ if ((p && strfilter__compare(filter, p + 1)) ||
+ strfilter__compare(filter, ent->s)) {
+ ret = __del_trace_probe_event(fd, ent);
+ if (ret < 0)
+ break;
+ }
+ }
+ strlist__delete(namelist);
+
+ return ret;
+}
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
new file mode 100644
index 0000000..ada94a2
--- /dev/null
+++ b/tools/perf/util/probe-file.h
@@ -0,0 +1,18 @@
+#ifndef __PROBE_FILE_H
+#define __PROBE_FILE_H
+
+#include "strlist.h"
+#include "strfilter.h"
+#include "probe-event.h"
+
+#define PF_FL_UPROBE 1
+#define PF_FL_RW 2
+
+int probe_file__open(int flag);
+int probe_file__open_both(int *kfd, int *ufd, int flag);
+struct strlist *probe_file__get_namelist(int fd);
+struct strlist *probe_file__get_rawlist(int fd);
+int probe_file__add_event(int fd, struct probe_trace_event *tev);
+int probe_file__del_events(int fd, struct strfilter *filter);
+
+#endif
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index b5bf9d5..29c43c068 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -130,7 +130,7 @@ struct debuginfo *debuginfo__new(const char *path)
continue;
dinfo = __debuginfo__new(buf);
}
- dso__delete(dso);
+ dso__put(dso);
out:
/* if failed to open all distro debuginfo, open given binary */
@@ -177,7 +177,7 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
Dwarf_Word offs = 0;
bool ref = false;
const char *regs;
- int ret;
+ int ret, ret2 = 0;
if (dwarf_attr(vr_die, DW_AT_external, &attr) != NULL)
goto static_var;
@@ -187,9 +187,19 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
return -EINVAL; /* Broken DIE ? */
if (dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0) {
ret = dwarf_entrypc(sp_die, &tmp);
- if (ret || addr != tmp ||
- dwarf_tag(vr_die) != DW_TAG_formal_parameter ||
- dwarf_highpc(sp_die, &tmp))
+ if (ret)
+ return -ENOENT;
+
+ if (probe_conf.show_location_range &&
+ (dwarf_tag(vr_die) == DW_TAG_variable)) {
+ ret2 = -ERANGE;
+ } else if (addr != tmp ||
+ dwarf_tag(vr_die) != DW_TAG_formal_parameter) {
+ return -ENOENT;
+ }
+
+ ret = dwarf_highpc(sp_die, &tmp);
+ if (ret)
return -ENOENT;
/*
* This is fuzzed by fentry mcount. We try to find the
@@ -210,7 +220,7 @@ found:
if (op->atom == DW_OP_addr) {
static_var:
if (!tvar)
- return 0;
+ return ret2;
/* Static variables on memory (not stack), make @varname */
ret = strlen(dwarf_diename(vr_die));
tvar->value = zalloc(ret + 2);
@@ -220,7 +230,7 @@ static_var:
tvar->ref = alloc_trace_arg_ref((long)offs);
if (tvar->ref == NULL)
return -ENOMEM;
- return 0;
+ return ret2;
}
/* If this is based on frame buffer, set the offset */
@@ -250,14 +260,14 @@ static_var:
}
if (!tvar)
- return 0;
+ return ret2;
regs = get_arch_regstr(regn);
if (!regs) {
/* This should be a bug in DWARF or this tool */
pr_warning("Mapping for the register number %u "
"missing on this architecture.\n", regn);
- return -ERANGE;
+ return -ENOTSUP;
}
tvar->value = strdup(regs);
@@ -269,7 +279,7 @@ static_var:
if (tvar->ref == NULL)
return -ENOMEM;
}
- return 0;
+ return ret2;
}
#define BYTES_TO_BITS(nb) ((nb) * BITS_PER_LONG / sizeof(long))
@@ -517,10 +527,12 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
&pf->sp_die, pf->tvar);
- if (ret == -ENOENT || ret == -EINVAL)
- pr_err("Failed to find the location of %s at this address.\n"
- " Perhaps, it has been optimized out.\n", pf->pvar->var);
- else if (ret == -ENOTSUP)
+ if (ret == -ENOENT || ret == -EINVAL) {
+ pr_err("Failed to find the location of the '%s' variable at this address.\n"
+ " Perhaps it has been optimized out.\n"
+ " Use -V with the --range option to show '%s' location range.\n",
+ pf->pvar->var, pf->pvar->var);
+ } else if (ret == -ENOTSUP)
pr_err("Sorry, we don't support this variable location yet.\n");
else if (ret == 0 && pf->pvar->field) {
ret = convert_variable_fields(vr_die, pf->pvar->var,
@@ -541,24 +553,9 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
char buf[32], *ptr;
int ret = 0;
- if (!is_c_varname(pf->pvar->var)) {
- /* Copy raw parameters */
- pf->tvar->value = strdup(pf->pvar->var);
- if (pf->tvar->value == NULL)
- return -ENOMEM;
- if (pf->pvar->type) {
- pf->tvar->type = strdup(pf->pvar->type);
- if (pf->tvar->type == NULL)
- return -ENOMEM;
- }
- if (pf->pvar->name) {
- pf->tvar->name = strdup(pf->pvar->name);
- if (pf->tvar->name == NULL)
- return -ENOMEM;
- } else
- pf->tvar->name = NULL;
- return 0;
- }
+ /* Copy raw parameters */
+ if (!is_c_varname(pf->pvar->var))
+ return copy_to_probe_trace_arg(pf->tvar, pf->pvar);
if (pf->pvar->name)
pf->tvar->name = strdup(pf->pvar->name);
@@ -578,10 +575,12 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
/* Search child die for local variables and parameters. */
if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) {
/* Search again in global variables */
- if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, 0, &vr_die))
+ if (!die_find_variable_at(&pf->cu_die, pf->pvar->var,
+ 0, &vr_die)) {
pr_warning("Failed to find '%s' in this function.\n",
pf->pvar->var);
ret = -ENOENT;
+ }
}
if (ret >= 0)
ret = convert_variable(&vr_die, pf);
@@ -660,9 +659,15 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
/* If not a real subprogram, find a real one */
if (!die_is_func_def(sc_die)) {
if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
- pr_warning("Failed to find probe point in any "
- "functions.\n");
- return -ENOENT;
+ if (die_find_tailfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
+ pr_warning("Ignoring tail call from %s\n",
+ dwarf_diename(&pf->sp_die));
+ return 0;
+ } else {
+ pr_warning("Failed to find probe point in any "
+ "functions.\n");
+ return -ENOENT;
+ }
}
} else
memcpy(&pf->sp_die, sc_die, sizeof(Dwarf_Die));
@@ -717,7 +722,7 @@ static int find_best_scope_cb(Dwarf_Die *fn_die, void *data)
}
/* If the function name is given, that's what user expects */
if (fsp->function) {
- if (die_compare_name(fn_die, fsp->function)) {
+ if (die_match_name(fn_die, fsp->function)) {
memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
fsp->found = true;
return 1;
@@ -920,13 +925,14 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
/* Check tag and diename */
if (!die_is_func_def(sp_die) ||
- !die_compare_name(sp_die, pp->function))
+ !die_match_name(sp_die, pp->function))
return DWARF_CB_OK;
/* Check declared file */
if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die)))
return DWARF_CB_OK;
+ pr_debug("Matched function: %s\n", dwarf_diename(sp_die));
pf->fname = dwarf_decl_file(sp_die);
if (pp->line) { /* Function relative line */
dwarf_decl_line(sp_die, &pf->lno);
@@ -943,10 +949,20 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
/* TODO: Check the address in this function */
param->retval = call_probe_finder(sp_die, pf);
}
- } else
+ } else if (!probe_conf.no_inlines) {
/* Inlined function: search instances */
param->retval = die_walk_instances(sp_die,
probe_point_inline_cb, (void *)pf);
+ /* This could be a non-existed inline definition */
+ if (param->retval == -ENOENT && strisglob(pp->function))
+ param->retval = 0;
+ }
+
+ /* We need to find other candidates */
+ if (strisglob(pp->function) && param->retval >= 0) {
+ param->retval = 0; /* We have to clear the result */
+ return DWARF_CB_OK;
+ }
return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */
}
@@ -975,7 +991,7 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
if (dwarf_tag(param->sp_die) != DW_TAG_subprogram)
return DWARF_CB_OK;
- if (die_compare_name(param->sp_die, param->function)) {
+ if (die_match_name(param->sp_die, param->function)) {
if (!dwarf_offdie(dbg, gl->cu_offset, param->cu_die))
return DWARF_CB_OK;
@@ -1028,7 +1044,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
return -ENOMEM;
/* Fastpath: lookup by function name from .debug_pubnames section */
- if (pp->function) {
+ if (pp->function && !strisglob(pp->function)) {
struct pubname_callback_param pubname_param = {
.function = pp->function,
.file = pp->file,
@@ -1087,6 +1103,7 @@ found:
struct local_vars_finder {
struct probe_finder *pf;
struct perf_probe_arg *args;
+ bool vars;
int max_args;
int nargs;
int ret;
@@ -1101,7 +1118,7 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
tag = dwarf_tag(die_mem);
if (tag == DW_TAG_formal_parameter ||
- tag == DW_TAG_variable) {
+ (tag == DW_TAG_variable && vf->vars)) {
if (convert_variable_location(die_mem, vf->pf->addr,
vf->pf->fb_ops, &pf->sp_die,
NULL) == 0) {
@@ -1127,26 +1144,28 @@ static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf,
Dwarf_Die die_mem;
int i;
int n = 0;
- struct local_vars_finder vf = {.pf = pf, .args = args,
+ struct local_vars_finder vf = {.pf = pf, .args = args, .vars = false,
.max_args = MAX_PROBE_ARGS, .ret = 0};
for (i = 0; i < pf->pev->nargs; i++) {
/* var never be NULL */
- if (strcmp(pf->pev->args[i].var, "$vars") == 0) {
- pr_debug("Expanding $vars into:");
- vf.nargs = n;
- /* Special local variables */
- die_find_child(sc_die, copy_variables_cb, (void *)&vf,
- &die_mem);
- pr_debug(" (%d)\n", vf.nargs - n);
- if (vf.ret < 0)
- return vf.ret;
- n = vf.nargs;
- } else {
+ if (strcmp(pf->pev->args[i].var, PROBE_ARG_VARS) == 0)
+ vf.vars = true;
+ else if (strcmp(pf->pev->args[i].var, PROBE_ARG_PARAMS) != 0) {
/* Copy normal argument */
args[n] = pf->pev->args[i];
n++;
+ continue;
}
+ pr_debug("Expanding %s into:", pf->pev->args[i].var);
+ vf.nargs = n;
+ /* Special local variables */
+ die_find_child(sc_die, copy_variables_cb, (void *)&vf,
+ &die_mem);
+ pr_debug(" (%d)\n", vf.nargs - n);
+ if (vf.ret < 0)
+ return vf.ret;
+ n = vf.nargs;
}
return n;
}
@@ -1174,6 +1193,10 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
if (ret < 0)
return ret;
+ tev->point.realname = strdup(dwarf_diename(sc_die));
+ if (!tev->point.realname)
+ return -ENOMEM;
+
pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
tev->point.offset);
@@ -1211,15 +1234,15 @@ end:
/* Find probe_trace_events specified by perf_probe_event from debuginfo */
int debuginfo__find_trace_events(struct debuginfo *dbg,
struct perf_probe_event *pev,
- struct probe_trace_event **tevs, int max_tevs)
+ struct probe_trace_event **tevs)
{
struct trace_event_finder tf = {
.pf = {.pev = pev, .callback = add_probe_trace_event},
- .mod = dbg->mod, .max_tevs = max_tevs};
+ .max_tevs = probe_conf.max_probes, .mod = dbg->mod};
int ret;
/* Allocate result tevs array */
- *tevs = zalloc(sizeof(struct probe_trace_event) * max_tevs);
+ *tevs = zalloc(sizeof(struct probe_trace_event) * tf.max_tevs);
if (*tevs == NULL)
return -ENOMEM;
@@ -1235,14 +1258,11 @@ int debuginfo__find_trace_events(struct debuginfo *dbg,
return (ret < 0) ? ret : tf.ntevs;
}
-#define MAX_VAR_LEN 64
-
/* Collect available variables in this scope */
static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
{
struct available_var_finder *af = data;
struct variable_list *vl;
- char buf[MAX_VAR_LEN];
int tag, ret;
vl = &af->vls[af->nvls - 1];
@@ -1253,11 +1273,38 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
ret = convert_variable_location(die_mem, af->pf.addr,
af->pf.fb_ops, &af->pf.sp_die,
NULL);
- if (ret == 0) {
- ret = die_get_varname(die_mem, buf, MAX_VAR_LEN);
- pr_debug2("Add new var: %s\n", buf);
- if (ret > 0)
- strlist__add(vl->vars, buf);
+ if (ret == 0 || ret == -ERANGE) {
+ int ret2;
+ bool externs = !af->child;
+ struct strbuf buf;
+
+ strbuf_init(&buf, 64);
+
+ if (probe_conf.show_location_range) {
+ if (!externs) {
+ if (ret)
+ strbuf_addf(&buf, "[INV]\t");
+ else
+ strbuf_addf(&buf, "[VAL]\t");
+ } else
+ strbuf_addf(&buf, "[EXT]\t");
+ }
+
+ ret2 = die_get_varname(die_mem, &buf);
+
+ if (!ret2 && probe_conf.show_location_range &&
+ !externs) {
+ strbuf_addf(&buf, "\t");
+ ret2 = die_get_var_range(&af->pf.sp_die,
+ die_mem, &buf);
+ }
+
+ pr_debug("Add new var: %s\n", buf.buf);
+ if (ret2 == 0) {
+ strlist__add(vl->vars,
+ strbuf_detach(&buf, NULL));
+ }
+ strbuf_release(&buf);
}
}
@@ -1293,16 +1340,16 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf)
vl->point.offset);
/* Find local variables */
- vl->vars = strlist__new(true, NULL);
+ vl->vars = strlist__new(NULL, NULL);
if (vl->vars == NULL)
return -ENOMEM;
af->child = true;
die_find_child(sc_die, collect_variables_cb, (void *)af, &die_mem);
/* Find external variables */
- if (!af->externs)
+ if (!probe_conf.show_ext_vars)
goto out;
- /* Don't need to search child DIE for externs. */
+ /* Don't need to search child DIE for external vars. */
af->child = false;
die_find_child(&pf->cu_die, collect_variables_cb, (void *)af, &die_mem);
@@ -1322,17 +1369,16 @@ out:
*/
int debuginfo__find_available_vars_at(struct debuginfo *dbg,
struct perf_probe_event *pev,
- struct variable_list **vls,
- int max_vls, bool externs)
+ struct variable_list **vls)
{
struct available_var_finder af = {
.pf = {.pev = pev, .callback = add_available_vars},
.mod = dbg->mod,
- .max_vls = max_vls, .externs = externs};
+ .max_vls = probe_conf.max_probes};
int ret;
/* Allocate result vls array */
- *vls = zalloc(sizeof(struct variable_list) * max_vls);
+ *vls = zalloc(sizeof(struct variable_list) * af.max_vls);
if (*vls == NULL)
return -ENOMEM;
@@ -1533,7 +1579,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
return DWARF_CB_OK;
if (die_is_func_def(sp_die) &&
- die_compare_name(sp_die, lr->function)) {
+ die_match_name(sp_die, lr->function)) {
lf->fname = dwarf_decl_file(sp_die);
dwarf_decl_line(sp_die, &lr->offset);
pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset);
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index ebf8c8c..bed8271 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -10,6 +10,9 @@
#define MAX_PROBES 128
#define MAX_PROBE_ARGS 128
+#define PROBE_ARG_VARS "$vars"
+#define PROBE_ARG_PARAMS "$params"
+
static inline int is_c_varname(const char *name)
{
/* TODO */
@@ -37,8 +40,7 @@ extern void debuginfo__delete(struct debuginfo *dbg);
/* Find probe_trace_events specified by perf_probe_event from debuginfo */
extern int debuginfo__find_trace_events(struct debuginfo *dbg,
struct perf_probe_event *pev,
- struct probe_trace_event **tevs,
- int max_tevs);
+ struct probe_trace_event **tevs);
/* Find a perf_probe_point from debuginfo */
extern int debuginfo__find_probe_point(struct debuginfo *dbg,
@@ -52,8 +54,7 @@ extern int debuginfo__find_line_range(struct debuginfo *dbg,
/* Find available variables */
extern int debuginfo__find_available_vars_at(struct debuginfo *dbg,
struct perf_probe_event *pev,
- struct variable_list **vls,
- int max_points, bool externs);
+ struct variable_list **vls);
/* Find a src file from a DWARF tag path */
int get_real_path(const char *raw_path, const char *comp_dir,
@@ -96,7 +97,6 @@ struct available_var_finder {
struct variable_list *vls; /* Found variable lists */
int nvls; /* Number of variable lists */
int max_vls; /* Max no. of variable lists */
- bool externs; /* Find external vars too */
bool child; /* Search child scopes */
};
diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c
index a126e6c..b234a6e 100644
--- a/tools/perf/util/pstack.c
+++ b/tools/perf/util/pstack.c
@@ -74,3 +74,10 @@ void *pstack__pop(struct pstack *pstack)
pstack->entries[pstack->top] = NULL;
return ret;
}
+
+void *pstack__peek(struct pstack *pstack)
+{
+ if (pstack->top == 0)
+ return NULL;
+ return pstack->entries[pstack->top - 1];
+}
diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h
index c3cb658..ded7f2e 100644
--- a/tools/perf/util/pstack.h
+++ b/tools/perf/util/pstack.h
@@ -10,5 +10,6 @@ bool pstack__empty(const struct pstack *pstack);
void pstack__remove(struct pstack *pstack, void *key);
void pstack__push(struct pstack *pstack, void *key);
void *pstack__pop(struct pstack *pstack);
+void *pstack__peek(struct pstack *pstack);
#endif /* _PERF_PSTACK_ */
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index 4d28624..51be28b 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -10,12 +10,14 @@ util/ctype.c
util/evlist.c
util/evsel.c
util/cpumap.c
-../../lib/hweight.c
+../lib/hweight.c
util/thread_map.c
util/util.c
util/xyarray.c
util/cgroup.c
util/rblist.c
+util/counts.c
util/strlist.c
util/trace-event.c
-../../lib/rbtree.c
+../lib/rbtree.c
+util/string.c
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index d906d0ad..6324fe6 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -384,7 +384,7 @@ static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus,
static void pyrf_cpu_map__delete(struct pyrf_cpu_map *pcpus)
{
- cpu_map__delete(pcpus->cpus);
+ cpu_map__put(pcpus->cpus);
pcpus->ob_type->tp_free((PyObject*)pcpus);
}
@@ -453,7 +453,7 @@ static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
static void pyrf_thread_map__delete(struct pyrf_thread_map *pthreads)
{
- thread_map__delete(pthreads->threads);
+ thread_map__put(pthreads->threads);
pthreads->ob_type->tp_free((PyObject*)pthreads);
}
@@ -941,76 +941,84 @@ static int pyrf_evlist__setup_types(void)
return PyType_Ready(&pyrf_evlist__type);
}
+#define PERF_CONST(name) { #name, PERF_##name }
+
static struct {
const char *name;
int value;
} perf__constants[] = {
- { "TYPE_HARDWARE", PERF_TYPE_HARDWARE },
- { "TYPE_SOFTWARE", PERF_TYPE_SOFTWARE },
- { "TYPE_TRACEPOINT", PERF_TYPE_TRACEPOINT },
- { "TYPE_HW_CACHE", PERF_TYPE_HW_CACHE },
- { "TYPE_RAW", PERF_TYPE_RAW },
- { "TYPE_BREAKPOINT", PERF_TYPE_BREAKPOINT },
-
- { "COUNT_HW_CPU_CYCLES", PERF_COUNT_HW_CPU_CYCLES },
- { "COUNT_HW_INSTRUCTIONS", PERF_COUNT_HW_INSTRUCTIONS },
- { "COUNT_HW_CACHE_REFERENCES", PERF_COUNT_HW_CACHE_REFERENCES },
- { "COUNT_HW_CACHE_MISSES", PERF_COUNT_HW_CACHE_MISSES },
- { "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
- { "COUNT_HW_BRANCH_MISSES", PERF_COUNT_HW_BRANCH_MISSES },
- { "COUNT_HW_BUS_CYCLES", PERF_COUNT_HW_BUS_CYCLES },
- { "COUNT_HW_CACHE_L1D", PERF_COUNT_HW_CACHE_L1D },
- { "COUNT_HW_CACHE_L1I", PERF_COUNT_HW_CACHE_L1I },
- { "COUNT_HW_CACHE_LL", PERF_COUNT_HW_CACHE_LL },
- { "COUNT_HW_CACHE_DTLB", PERF_COUNT_HW_CACHE_DTLB },
- { "COUNT_HW_CACHE_ITLB", PERF_COUNT_HW_CACHE_ITLB },
- { "COUNT_HW_CACHE_BPU", PERF_COUNT_HW_CACHE_BPU },
- { "COUNT_HW_CACHE_OP_READ", PERF_COUNT_HW_CACHE_OP_READ },
- { "COUNT_HW_CACHE_OP_WRITE", PERF_COUNT_HW_CACHE_OP_WRITE },
- { "COUNT_HW_CACHE_OP_PREFETCH", PERF_COUNT_HW_CACHE_OP_PREFETCH },
- { "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS },
- { "COUNT_HW_CACHE_RESULT_MISS", PERF_COUNT_HW_CACHE_RESULT_MISS },
-
- { "COUNT_HW_STALLED_CYCLES_FRONTEND", PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
- { "COUNT_HW_STALLED_CYCLES_BACKEND", PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
-
- { "COUNT_SW_CPU_CLOCK", PERF_COUNT_SW_CPU_CLOCK },
- { "COUNT_SW_TASK_CLOCK", PERF_COUNT_SW_TASK_CLOCK },
- { "COUNT_SW_PAGE_FAULTS", PERF_COUNT_SW_PAGE_FAULTS },
- { "COUNT_SW_CONTEXT_SWITCHES", PERF_COUNT_SW_CONTEXT_SWITCHES },
- { "COUNT_SW_CPU_MIGRATIONS", PERF_COUNT_SW_CPU_MIGRATIONS },
- { "COUNT_SW_PAGE_FAULTS_MIN", PERF_COUNT_SW_PAGE_FAULTS_MIN },
- { "COUNT_SW_PAGE_FAULTS_MAJ", PERF_COUNT_SW_PAGE_FAULTS_MAJ },
- { "COUNT_SW_ALIGNMENT_FAULTS", PERF_COUNT_SW_ALIGNMENT_FAULTS },
- { "COUNT_SW_EMULATION_FAULTS", PERF_COUNT_SW_EMULATION_FAULTS },
- { "COUNT_SW_DUMMY", PERF_COUNT_SW_DUMMY },
-
- { "SAMPLE_IP", PERF_SAMPLE_IP },
- { "SAMPLE_TID", PERF_SAMPLE_TID },
- { "SAMPLE_TIME", PERF_SAMPLE_TIME },
- { "SAMPLE_ADDR", PERF_SAMPLE_ADDR },
- { "SAMPLE_READ", PERF_SAMPLE_READ },
- { "SAMPLE_CALLCHAIN", PERF_SAMPLE_CALLCHAIN },
- { "SAMPLE_ID", PERF_SAMPLE_ID },
- { "SAMPLE_CPU", PERF_SAMPLE_CPU },
- { "SAMPLE_PERIOD", PERF_SAMPLE_PERIOD },
- { "SAMPLE_STREAM_ID", PERF_SAMPLE_STREAM_ID },
- { "SAMPLE_RAW", PERF_SAMPLE_RAW },
-
- { "FORMAT_TOTAL_TIME_ENABLED", PERF_FORMAT_TOTAL_TIME_ENABLED },
- { "FORMAT_TOTAL_TIME_RUNNING", PERF_FORMAT_TOTAL_TIME_RUNNING },
- { "FORMAT_ID", PERF_FORMAT_ID },
- { "FORMAT_GROUP", PERF_FORMAT_GROUP },
-
- { "RECORD_MMAP", PERF_RECORD_MMAP },
- { "RECORD_LOST", PERF_RECORD_LOST },
- { "RECORD_COMM", PERF_RECORD_COMM },
- { "RECORD_EXIT", PERF_RECORD_EXIT },
- { "RECORD_THROTTLE", PERF_RECORD_THROTTLE },
- { "RECORD_UNTHROTTLE", PERF_RECORD_UNTHROTTLE },
- { "RECORD_FORK", PERF_RECORD_FORK },
- { "RECORD_READ", PERF_RECORD_READ },
- { "RECORD_SAMPLE", PERF_RECORD_SAMPLE },
+ PERF_CONST(TYPE_HARDWARE),
+ PERF_CONST(TYPE_SOFTWARE),
+ PERF_CONST(TYPE_TRACEPOINT),
+ PERF_CONST(TYPE_HW_CACHE),
+ PERF_CONST(TYPE_RAW),
+ PERF_CONST(TYPE_BREAKPOINT),
+
+ PERF_CONST(COUNT_HW_CPU_CYCLES),
+ PERF_CONST(COUNT_HW_INSTRUCTIONS),
+ PERF_CONST(COUNT_HW_CACHE_REFERENCES),
+ PERF_CONST(COUNT_HW_CACHE_MISSES),
+ PERF_CONST(COUNT_HW_BRANCH_INSTRUCTIONS),
+ PERF_CONST(COUNT_HW_BRANCH_MISSES),
+ PERF_CONST(COUNT_HW_BUS_CYCLES),
+ PERF_CONST(COUNT_HW_CACHE_L1D),
+ PERF_CONST(COUNT_HW_CACHE_L1I),
+ PERF_CONST(COUNT_HW_CACHE_LL),
+ PERF_CONST(COUNT_HW_CACHE_DTLB),
+ PERF_CONST(COUNT_HW_CACHE_ITLB),
+ PERF_CONST(COUNT_HW_CACHE_BPU),
+ PERF_CONST(COUNT_HW_CACHE_OP_READ),
+ PERF_CONST(COUNT_HW_CACHE_OP_WRITE),
+ PERF_CONST(COUNT_HW_CACHE_OP_PREFETCH),
+ PERF_CONST(COUNT_HW_CACHE_RESULT_ACCESS),
+ PERF_CONST(COUNT_HW_CACHE_RESULT_MISS),
+
+ PERF_CONST(COUNT_HW_STALLED_CYCLES_FRONTEND),
+ PERF_CONST(COUNT_HW_STALLED_CYCLES_BACKEND),
+
+ PERF_CONST(COUNT_SW_CPU_CLOCK),
+ PERF_CONST(COUNT_SW_TASK_CLOCK),
+ PERF_CONST(COUNT_SW_PAGE_FAULTS),
+ PERF_CONST(COUNT_SW_CONTEXT_SWITCHES),
+ PERF_CONST(COUNT_SW_CPU_MIGRATIONS),
+ PERF_CONST(COUNT_SW_PAGE_FAULTS_MIN),
+ PERF_CONST(COUNT_SW_PAGE_FAULTS_MAJ),
+ PERF_CONST(COUNT_SW_ALIGNMENT_FAULTS),
+ PERF_CONST(COUNT_SW_EMULATION_FAULTS),
+ PERF_CONST(COUNT_SW_DUMMY),
+
+ PERF_CONST(SAMPLE_IP),
+ PERF_CONST(SAMPLE_TID),
+ PERF_CONST(SAMPLE_TIME),
+ PERF_CONST(SAMPLE_ADDR),
+ PERF_CONST(SAMPLE_READ),
+ PERF_CONST(SAMPLE_CALLCHAIN),
+ PERF_CONST(SAMPLE_ID),
+ PERF_CONST(SAMPLE_CPU),
+ PERF_CONST(SAMPLE_PERIOD),
+ PERF_CONST(SAMPLE_STREAM_ID),
+ PERF_CONST(SAMPLE_RAW),
+
+ PERF_CONST(FORMAT_TOTAL_TIME_ENABLED),
+ PERF_CONST(FORMAT_TOTAL_TIME_RUNNING),
+ PERF_CONST(FORMAT_ID),
+ PERF_CONST(FORMAT_GROUP),
+
+ PERF_CONST(RECORD_MMAP),
+ PERF_CONST(RECORD_LOST),
+ PERF_CONST(RECORD_COMM),
+ PERF_CONST(RECORD_EXIT),
+ PERF_CONST(RECORD_THROTTLE),
+ PERF_CONST(RECORD_UNTHROTTLE),
+ PERF_CONST(RECORD_FORK),
+ PERF_CONST(RECORD_READ),
+ PERF_CONST(RECORD_SAMPLE),
+ PERF_CONST(RECORD_MMAP2),
+ PERF_CONST(RECORD_AUX),
+ PERF_CONST(RECORD_ITRACE_START),
+ PERF_CONST(RECORD_LOST_SAMPLES),
+ PERF_CONST(RECORD_SWITCH),
+ PERF_CONST(RECORD_SWITCH_CPU_WIDE),
{ .name = NULL, },
};
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 8acd0df..0467367 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -20,7 +20,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
if (!evlist)
return -ENOMEM;
- if (parse_events(evlist, str))
+ if (parse_events(evlist, str, NULL))
goto out_delete;
evsel = perf_evlist__first(evlist);
@@ -64,7 +64,7 @@ static bool perf_probe_api(setup_probe_fn_t fn)
if (!cpus)
return false;
cpu = cpus->map[0];
- cpu_map__delete(cpus);
+ cpu_map__put(cpus);
do {
ret = perf_do_probe_api(fn, cpu, try[i++]);
@@ -85,6 +85,11 @@ static void perf_probe_comm_exec(struct perf_evsel *evsel)
evsel->attr.comm_exec = 1;
}
+static void perf_probe_context_switch(struct perf_evsel *evsel)
+{
+ evsel->attr.context_switch = 1;
+}
+
bool perf_can_sample_identifier(void)
{
return perf_probe_api(perf_probe_sample_identifier);
@@ -95,6 +100,35 @@ static bool perf_can_comm_exec(void)
return perf_probe_api(perf_probe_comm_exec);
}
+bool perf_can_record_switch_events(void)
+{
+ return perf_probe_api(perf_probe_context_switch);
+}
+
+bool perf_can_record_cpu_wide(void)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_CPU_CLOCK,
+ .exclude_kernel = 1,
+ };
+ struct cpu_map *cpus;
+ int cpu, fd;
+
+ cpus = cpu_map__new(NULL);
+ if (!cpus)
+ return false;
+ cpu = cpus->map[0];
+ cpu_map__put(cpus);
+
+ fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
+ if (fd < 0)
+ return false;
+ close(fd);
+
+ return true;
+}
+
void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts)
{
struct perf_evsel *evsel;
@@ -119,7 +153,16 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts)
evsel->attr.comm_exec = 1;
}
- if (evlist->nr_entries > 1) {
+ if (opts->full_auxtrace) {
+ /*
+ * Need to be able to synthesize and parse selected events with
+ * arbitrary sample types, which requires always being able to
+ * match the id.
+ */
+ use_sample_identifier = perf_can_sample_identifier();
+ evlist__for_each(evlist, evsel)
+ perf_evsel__set_sample_id(evsel, use_sample_identifier);
+ } else if (evlist->nr_entries > 1) {
struct perf_evsel *first = perf_evlist__first(evlist);
evlist__for_each(evlist, evsel) {
@@ -207,7 +250,7 @@ bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str)
if (!temp_evlist)
return false;
- err = parse_events(temp_evlist, str);
+ err = parse_events(temp_evlist, str, NULL);
if (err)
goto out_delete;
@@ -217,7 +260,7 @@ bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str)
struct cpu_map *cpus = cpu_map__new(NULL);
cpu = cpus ? cpus->map[0] : 0;
- cpu_map__delete(cpus);
+ cpu_map__put(cpus);
} else {
cpu = evlist->cpus->map[0];
}
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 430b5d2..1bd593b 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -55,10 +55,10 @@ void xs_init(pTHX)
INTERP my_perl;
-#define FTRACE_MAX_EVENT \
+#define TRACE_EVENT_TYPE_MAX \
((1 << (sizeof(unsigned short) * 8)) - 1)
-static DECLARE_BITMAP(events_defined, FTRACE_MAX_EVENT);
+static DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX);
extern struct scripting_context *scripting_context;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 5544b8c..ace2484 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -44,10 +44,10 @@
PyMODINIT_FUNC initperf_trace_context(void);
-#define FTRACE_MAX_EVENT \
+#define TRACE_EVENT_TYPE_MAX \
((1 << (sizeof(unsigned short) * 8)) - 1)
-static DECLARE_BITMAP(events_defined, FTRACE_MAX_EVENT);
+static DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX);
#define MAX_FIELDS 64
#define N_COMMON_FIELDS 7
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 0c74012..8a4537e 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -15,12 +15,14 @@
#include "cpumap.h"
#include "perf_regs.h"
#include "asm/bug.h"
+#include "auxtrace.h"
+#include "thread-stack.h"
-static int machines__deliver_event(struct machines *machines,
- struct perf_evlist *evlist,
- union perf_event *event,
- struct perf_sample *sample,
- struct perf_tool *tool, u64 file_offset);
+static int perf_session__deliver_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool,
+ u64 file_offset);
static int perf_session__open(struct perf_session *session)
{
@@ -105,8 +107,8 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
return ret;
}
- return machines__deliver_event(&session->machines, session->evlist, event->event,
- &sample, session->tool, event->file_offset);
+ return perf_session__deliver_event(session, event->event, &sample,
+ session->tool, event->file_offset);
}
struct perf_session *perf_session__new(struct perf_data_file *file,
@@ -119,6 +121,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file,
session->repipe = repipe;
session->tool = tool;
+ INIT_LIST_HEAD(&session->auxtrace_index);
machines__init(&session->machines);
ordered_events__init(&session->ordered_events, ordered_events__deliver_event);
@@ -167,7 +170,7 @@ static void perf_session__delete_threads(struct perf_session *session)
machine__delete_threads(&session->machines.host);
}
-static void perf_session_env__delete(struct perf_session_env *env)
+static void perf_session_env__exit(struct perf_env *env)
{
zfree(&env->hostname);
zfree(&env->os_release);
@@ -177,6 +180,7 @@ static void perf_session_env__delete(struct perf_session_env *env)
zfree(&env->cpuid);
zfree(&env->cmdline);
+ zfree(&env->cmdline_argv);
zfree(&env->sibling_cores);
zfree(&env->sibling_threads);
zfree(&env->numa_nodes);
@@ -185,9 +189,11 @@ static void perf_session_env__delete(struct perf_session_env *env)
void perf_session__delete(struct perf_session *session)
{
+ auxtrace__free(session);
+ auxtrace_index__free(&session->auxtrace_index);
perf_session__destroy_kernel_maps(session);
perf_session__delete_threads(session);
- perf_session_env__delete(&session->header.env);
+ perf_session_env__exit(&session->header.env);
machines__exit(&session->machines);
if (session->file)
perf_data_file__close(session->file);
@@ -262,6 +268,49 @@ static int process_id_index_stub(struct perf_tool *tool __maybe_unused,
return 0;
}
+static int process_event_auxtrace_info_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int skipn(int fd, off_t n)
+{
+ char buf[4096];
+ ssize_t ret;
+
+ while (n > 0) {
+ ret = read(fd, buf, min(n, (off_t)sizeof(buf)));
+ if (ret <= 0)
+ return ret;
+ n -= ret;
+ }
+
+ return 0;
+}
+
+static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session
+ __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ if (perf_data_file__is_pipe(session->file))
+ skipn(perf_data_file__fd(session->file), event->auxtrace.size);
+ return event->auxtrace.size;
+}
+
+static
+int process_event_auxtrace_error_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
void perf_tool__fill_defaults(struct perf_tool *tool)
{
if (tool->sample == NULL)
@@ -278,6 +327,14 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->exit = process_event_stub;
if (tool->lost == NULL)
tool->lost = perf_event__process_lost;
+ if (tool->lost_samples == NULL)
+ tool->lost_samples = perf_event__process_lost_samples;
+ if (tool->aux == NULL)
+ tool->aux = perf_event__process_aux;
+ if (tool->itrace_start == NULL)
+ tool->itrace_start = perf_event__process_itrace_start;
+ if (tool->context_switch == NULL)
+ tool->context_switch = perf_event__process_switch;
if (tool->read == NULL)
tool->read = process_event_sample_stub;
if (tool->throttle == NULL)
@@ -298,6 +355,12 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
}
if (tool->id_index == NULL)
tool->id_index = process_id_index_stub;
+ if (tool->auxtrace_info == NULL)
+ tool->auxtrace_info = process_event_auxtrace_info_stub;
+ if (tool->auxtrace == NULL)
+ tool->auxtrace = process_event_auxtrace_stub;
+ if (tool->auxtrace_error == NULL)
+ tool->auxtrace_error = process_event_auxtrace_error_stub;
}
static void swap_sample_id_all(union perf_event *event, void *data)
@@ -390,6 +453,39 @@ static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
swap_sample_id_all(event, &event->read + 1);
}
+static void perf_event__aux_swap(union perf_event *event, bool sample_id_all)
+{
+ event->aux.aux_offset = bswap_64(event->aux.aux_offset);
+ event->aux.aux_size = bswap_64(event->aux.aux_size);
+ event->aux.flags = bswap_64(event->aux.flags);
+
+ if (sample_id_all)
+ swap_sample_id_all(event, &event->aux + 1);
+}
+
+static void perf_event__itrace_start_swap(union perf_event *event,
+ bool sample_id_all)
+{
+ event->itrace_start.pid = bswap_32(event->itrace_start.pid);
+ event->itrace_start.tid = bswap_32(event->itrace_start.tid);
+
+ if (sample_id_all)
+ swap_sample_id_all(event, &event->itrace_start + 1);
+}
+
+static void perf_event__switch_swap(union perf_event *event, bool sample_id_all)
+{
+ if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
+ event->context_switch.next_prev_pid =
+ bswap_32(event->context_switch.next_prev_pid);
+ event->context_switch.next_prev_tid =
+ bswap_32(event->context_switch.next_prev_tid);
+ }
+
+ if (sample_id_all)
+ swap_sample_id_all(event, &event->context_switch + 1);
+}
+
static void perf_event__throttle_swap(union perf_event *event,
bool sample_id_all)
{
@@ -438,19 +534,42 @@ void perf_event__attr_swap(struct perf_event_attr *attr)
{
attr->type = bswap_32(attr->type);
attr->size = bswap_32(attr->size);
- attr->config = bswap_64(attr->config);
- attr->sample_period = bswap_64(attr->sample_period);
- attr->sample_type = bswap_64(attr->sample_type);
- attr->read_format = bswap_64(attr->read_format);
- attr->wakeup_events = bswap_32(attr->wakeup_events);
- attr->bp_type = bswap_32(attr->bp_type);
- attr->bp_addr = bswap_64(attr->bp_addr);
- attr->bp_len = bswap_64(attr->bp_len);
- attr->branch_sample_type = bswap_64(attr->branch_sample_type);
- attr->sample_regs_user = bswap_64(attr->sample_regs_user);
- attr->sample_stack_user = bswap_32(attr->sample_stack_user);
- swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64));
+#define bswap_safe(f, n) \
+ (attr->size > (offsetof(struct perf_event_attr, f) + \
+ sizeof(attr->f) * (n)))
+#define bswap_field(f, sz) \
+do { \
+ if (bswap_safe(f, 0)) \
+ attr->f = bswap_##sz(attr->f); \
+} while(0)
+#define bswap_field_32(f) bswap_field(f, 32)
+#define bswap_field_64(f) bswap_field(f, 64)
+
+ bswap_field_64(config);
+ bswap_field_64(sample_period);
+ bswap_field_64(sample_type);
+ bswap_field_64(read_format);
+ bswap_field_32(wakeup_events);
+ bswap_field_32(bp_type);
+ bswap_field_64(bp_addr);
+ bswap_field_64(bp_len);
+ bswap_field_64(branch_sample_type);
+ bswap_field_64(sample_regs_user);
+ bswap_field_32(sample_stack_user);
+ bswap_field_32(aux_watermark);
+
+ /*
+ * After read_format are bitfields. Check read_format because
+ * we are unable to use offsetof on bitfield.
+ */
+ if (bswap_safe(read_format, 1))
+ swap_bitfield((u8 *) (&attr->read_format + 1),
+ sizeof(u64));
+#undef bswap_field_64
+#undef bswap_field_32
+#undef bswap_field
+#undef bswap_safe
}
static void perf_event__hdr_attr_swap(union perf_event *event,
@@ -478,6 +597,40 @@ static void perf_event__tracing_data_swap(union perf_event *event,
event->tracing_data.size = bswap_32(event->tracing_data.size);
}
+static void perf_event__auxtrace_info_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ size_t size;
+
+ event->auxtrace_info.type = bswap_32(event->auxtrace_info.type);
+
+ size = event->header.size;
+ size -= (void *)&event->auxtrace_info.priv - (void *)event;
+ mem_bswap_64(event->auxtrace_info.priv, size);
+}
+
+static void perf_event__auxtrace_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->auxtrace.size = bswap_64(event->auxtrace.size);
+ event->auxtrace.offset = bswap_64(event->auxtrace.offset);
+ event->auxtrace.reference = bswap_64(event->auxtrace.reference);
+ event->auxtrace.idx = bswap_32(event->auxtrace.idx);
+ event->auxtrace.tid = bswap_32(event->auxtrace.tid);
+ event->auxtrace.cpu = bswap_32(event->auxtrace.cpu);
+}
+
+static void perf_event__auxtrace_error_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->auxtrace_error.type = bswap_32(event->auxtrace_error.type);
+ event->auxtrace_error.code = bswap_32(event->auxtrace_error.code);
+ event->auxtrace_error.cpu = bswap_32(event->auxtrace_error.cpu);
+ event->auxtrace_error.pid = bswap_32(event->auxtrace_error.pid);
+ event->auxtrace_error.tid = bswap_32(event->auxtrace_error.tid);
+ event->auxtrace_error.ip = bswap_64(event->auxtrace_error.ip);
+}
+
typedef void (*perf_event__swap_op)(union perf_event *event,
bool sample_id_all);
@@ -492,11 +645,19 @@ static perf_event__swap_op perf_event__swap_ops[] = {
[PERF_RECORD_THROTTLE] = perf_event__throttle_swap,
[PERF_RECORD_UNTHROTTLE] = perf_event__throttle_swap,
[PERF_RECORD_SAMPLE] = perf_event__all64_swap,
+ [PERF_RECORD_AUX] = perf_event__aux_swap,
+ [PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap,
+ [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap,
+ [PERF_RECORD_SWITCH] = perf_event__switch_swap,
+ [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap,
[PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
[PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
[PERF_RECORD_HEADER_BUILD_ID] = NULL,
[PERF_RECORD_ID_INDEX] = perf_event__all64_swap,
+ [PERF_RECORD_AUXTRACE_INFO] = perf_event__auxtrace_info_swap,
+ [PERF_RECORD_AUXTRACE] = perf_event__auxtrace_swap,
+ [PERF_RECORD_AUXTRACE_ERROR] = perf_event__auxtrace_error_swap,
[PERF_RECORD_HEADER_MAX] = NULL,
};
@@ -543,6 +704,8 @@ static int process_finished_round(struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct ordered_events *oe)
{
+ if (dump_trace)
+ fprintf(stdout, "\n");
return ordered_events__flush(oe, OE_FLUSH__ROUND);
}
@@ -621,10 +784,18 @@ static void branch_stack__printf(struct perf_sample *sample)
printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
- for (i = 0; i < sample->branch_stack->nr; i++)
- printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n",
- i, sample->branch_stack->entries[i].from,
- sample->branch_stack->entries[i].to);
+ for (i = 0; i < sample->branch_stack->nr; i++) {
+ struct branch_entry *e = &sample->branch_stack->entries[i];
+
+ printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
+ i, e->from, e->to,
+ e->flags.cycles,
+ e->flags.mispred ? "M" : " ",
+ e->flags.predicted ? "P" : " ",
+ e->flags.abort ? "A" : " ",
+ e->flags.in_tx ? "T" : " ",
+ (unsigned)e->flags.reserved);
+ }
}
static void regs_dump__printf(u64 mask, u64 *regs)
@@ -921,6 +1092,8 @@ static int machines__deliver_event(struct machines *machines,
case PERF_RECORD_MMAP:
return tool->mmap(tool, event, sample, machine);
case PERF_RECORD_MMAP2:
+ if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT)
+ ++evlist->stats.nr_proc_map_timeout;
return tool->mmap2(tool, event, sample, machine);
case PERF_RECORD_COMM:
return tool->comm(tool, event, sample, machine);
@@ -932,18 +1105,47 @@ static int machines__deliver_event(struct machines *machines,
if (tool->lost == perf_event__process_lost)
evlist->stats.total_lost += event->lost.lost;
return tool->lost(tool, event, sample, machine);
+ case PERF_RECORD_LOST_SAMPLES:
+ if (tool->lost_samples == perf_event__process_lost_samples)
+ evlist->stats.total_lost_samples += event->lost_samples.lost;
+ return tool->lost_samples(tool, event, sample, machine);
case PERF_RECORD_READ:
return tool->read(tool, event, sample, evsel, machine);
case PERF_RECORD_THROTTLE:
return tool->throttle(tool, event, sample, machine);
case PERF_RECORD_UNTHROTTLE:
return tool->unthrottle(tool, event, sample, machine);
+ case PERF_RECORD_AUX:
+ return tool->aux(tool, event, sample, machine);
+ case PERF_RECORD_ITRACE_START:
+ return tool->itrace_start(tool, event, sample, machine);
+ case PERF_RECORD_SWITCH:
+ case PERF_RECORD_SWITCH_CPU_WIDE:
+ return tool->context_switch(tool, event, sample, machine);
default:
++evlist->stats.nr_unknown_events;
return -1;
}
}
+static int perf_session__deliver_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool,
+ u64 file_offset)
+{
+ int ret;
+
+ ret = auxtrace__process_event(session, event, sample, tool);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ return 0;
+
+ return machines__deliver_event(&session->machines, session->evlist,
+ event, sample, tool, file_offset);
+}
+
static s64 perf_session__process_user_event(struct perf_session *session,
union perf_event *event,
u64 file_offset)
@@ -980,6 +1182,15 @@ static s64 perf_session__process_user_event(struct perf_session *session,
return tool->finished_round(tool, event, oe);
case PERF_RECORD_ID_INDEX:
return tool->id_index(tool, event, session);
+ case PERF_RECORD_AUXTRACE_INFO:
+ return tool->auxtrace_info(tool, event, session);
+ case PERF_RECORD_AUXTRACE:
+ /* setup for reading amidst mmap */
+ lseek(fd, file_offset + event->header.size, SEEK_SET);
+ return tool->auxtrace(tool, event, session);
+ case PERF_RECORD_AUXTRACE_ERROR:
+ perf_session__auxtrace_error_inc(session, event);
+ return tool->auxtrace_error(tool, event, session);
default:
return -EINVAL;
}
@@ -1034,7 +1245,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
return -1;
if (lseek(fd, file_offset, SEEK_SET) == (off_t)-1 ||
- readn(fd, &buf, hdr_sz) != (ssize_t)hdr_sz)
+ readn(fd, buf, hdr_sz) != (ssize_t)hdr_sz)
return -1;
event = (union perf_event *)buf;
@@ -1042,12 +1253,12 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
if (session->header.needs_swap)
perf_event_header__bswap(&event->header);
- if (event->header.size < hdr_sz)
+ if (event->header.size < hdr_sz || event->header.size > buf_sz)
return -1;
rest = event->header.size - hdr_sz;
- if (readn(fd, &buf, rest) != (ssize_t)rest)
+ if (readn(fd, buf, rest) != (ssize_t)rest)
return -1;
if (session->header.needs_swap)
@@ -1096,8 +1307,8 @@ static s64 perf_session__process_event(struct perf_session *session,
return ret;
}
- return machines__deliver_event(&session->machines, evlist, event,
- &sample, tool, file_offset);
+ return perf_session__deliver_event(session, event, &sample, tool,
+ file_offset);
}
void perf_event_header__bswap(struct perf_event_header *hdr)
@@ -1138,6 +1349,18 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
stats->nr_events[PERF_RECORD_LOST]);
}
+ if (session->tool->lost_samples == perf_event__process_lost_samples) {
+ double drop_rate;
+
+ drop_rate = (double)stats->total_lost_samples /
+ (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
+ if (drop_rate > 0.05) {
+ ui__warning("Processed %" PRIu64 " samples and lost %3.2f%% samples!\n\n",
+ stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
+ drop_rate * 100.0);
+ }
+ }
+
if (stats->nr_unknown_events != 0) {
ui__warning("Found %u unknown events!\n\n"
"Is this an older tool processing a perf.data "
@@ -1168,6 +1391,32 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
if (oe->nr_unordered_events != 0)
ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
+
+ events_stats__auxtrace_error_warn(stats);
+
+ if (stats->nr_proc_map_timeout != 0) {
+ ui__warning("%d map information files for pre-existing threads were\n"
+ "not processed, if there are samples for addresses they\n"
+ "will not be resolved, you may find out which are these\n"
+ "threads by running with -v and redirecting the output\n"
+ "to a file.\n"
+ "The time limit to process proc map is too short?\n"
+ "Increase it by --proc-map-timeout\n",
+ stats->nr_proc_map_timeout);
+ }
+}
+
+static int perf_session__flush_thread_stack(struct thread *thread,
+ void *p __maybe_unused)
+{
+ return thread_stack__flush(thread);
+}
+
+static int perf_session__flush_thread_stacks(struct perf_session *session)
+{
+ return machines__for_each_thread(&session->machines,
+ perf_session__flush_thread_stack,
+ NULL);
}
volatile int session_done;
@@ -1256,10 +1505,17 @@ more:
done:
/* do the final flush for ordered samples */
err = ordered_events__flush(oe, OE_FLUSH__FINAL);
+ if (err)
+ goto out_err;
+ err = auxtrace__flush_events(session, tool);
+ if (err)
+ goto out_err;
+ err = perf_session__flush_thread_stacks(session);
out_err:
free(buf);
perf_session__warn_about_errors(session);
ordered_events__free(&session->ordered_events);
+ auxtrace__free_events(session);
return err;
}
@@ -1402,10 +1658,17 @@ more:
out:
/* do the final flush for ordered samples */
err = ordered_events__flush(oe, OE_FLUSH__FINAL);
+ if (err)
+ goto out_err;
+ err = auxtrace__flush_events(session, tool);
+ if (err)
+ goto out_err;
+ err = perf_session__flush_thread_stacks(session);
out_err:
ui_progress__finish();
perf_session__warn_about_errors(session);
ordered_events__free(&session->ordered_events);
+ auxtrace__free_events(session);
session->one_mmap = false;
return err;
}
@@ -1488,7 +1751,13 @@ size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp
size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
{
- size_t ret = fprintf(fp, "Aggregated stats:\n");
+ size_t ret;
+ const char *msg = "";
+
+ if (perf_header__has_feat(&session->header, HEADER_AUXTRACE))
+ msg = " (excludes AUX area (e.g. instruction trace) decoded / synthesized events)";
+
+ ret = fprintf(fp, "\nAggregated stats:%s\n", msg);
ret += events_stats__fprintf(&session->evlist->stats, fp);
return ret;
@@ -1655,7 +1924,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
err = 0;
out_delete_map:
- cpu_map__delete(map);
+ cpu_map__put(map);
return err;
}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index d5fa7b791..b44afc7 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -15,10 +15,16 @@
struct ip_callchain;
struct thread;
+struct auxtrace;
+struct itrace_synth_opts;
+
struct perf_session {
struct perf_header header;
struct machines machines;
struct perf_evlist *evlist;
+ struct auxtrace *auxtrace;
+ struct itrace_synth_opts *itrace_synth_opts;
+ struct list_head auxtrace_index;
struct trace_event tevent;
bool repipe;
bool one_mmap;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 4593f36..7e38716 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -9,7 +9,7 @@ regex_t parent_regex;
const char default_parent_pattern[] = "^sys_|^do_page_fault";
const char *parent_pattern = default_parent_pattern;
const char default_sort_order[] = "comm,dso,symbol";
-const char default_branch_sort_order[] = "comm,dso_from,symbol_from,dso_to,symbol_to";
+const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
const char default_top_sort_order[] = "dso,symbol";
const char default_diff_sort_order[] = "dso,symbol";
@@ -89,14 +89,14 @@ static int64_t
sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
{
/* Compare the addr that should be unique among comm */
- return comm__str(right->comm) - comm__str(left->comm);
+ return strcmp(comm__str(right->comm), comm__str(left->comm));
}
static int64_t
sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
{
/* Compare the addr that should be unique among comm */
- return comm__str(right->comm) - comm__str(left->comm);
+ return strcmp(comm__str(right->comm), comm__str(left->comm));
}
static int64_t
@@ -182,18 +182,16 @@ static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
{
- u64 ip_l, ip_r;
-
if (!sym_l || !sym_r)
return cmp_null(sym_l, sym_r);
if (sym_l == sym_r)
return 0;
- ip_l = sym_l->start;
- ip_r = sym_r->start;
+ if (sym_l->start != sym_r->start)
+ return (int64_t)(sym_r->start - sym_l->start);
- return (int64_t)(ip_r - ip_l);
+ return (int64_t)(sym_r->end - sym_l->end);
}
static int64_t
@@ -321,6 +319,59 @@ struct sort_entry sort_srcline = {
.se_width_idx = HISTC_SRCLINE,
};
+/* --sort srcfile */
+
+static char no_srcfile[1];
+
+static char *get_srcfile(struct hist_entry *e)
+{
+ char *sf, *p;
+ struct map *map = e->ms.map;
+
+ sf = get_srcline(map->dso, map__rip_2objdump(map, e->ip),
+ e->ms.sym, true);
+ if (!strcmp(sf, SRCLINE_UNKNOWN))
+ return no_srcfile;
+ p = strchr(sf, ':');
+ if (p && *sf) {
+ *p = 0;
+ return sf;
+ }
+ free(sf);
+ return no_srcfile;
+}
+
+static int64_t
+sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->srcfile) {
+ if (!left->ms.map)
+ left->srcfile = no_srcfile;
+ else
+ left->srcfile = get_srcfile(left);
+ }
+ if (!right->srcfile) {
+ if (!right->ms.map)
+ right->srcfile = no_srcfile;
+ else
+ right->srcfile = get_srcfile(right);
+ }
+ return strcmp(right->srcfile, left->srcfile);
+}
+
+static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcfile);
+}
+
+struct sort_entry sort_srcfile = {
+ .se_header = "Source File",
+ .se_cmp = sort__srcfile_cmp,
+ .se_snprintf = hist_entry__srcfile_snprintf,
+ .se_width_idx = HISTC_SRCFILE,
+};
+
/* --sort parent */
static int64_t
@@ -528,6 +579,29 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
return repsep_snprintf(bf, size, "%-*.*s", width, width, out);
}
+static int64_t
+sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return left->branch_info->flags.cycles -
+ right->branch_info->flags.cycles;
+}
+
+static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (he->branch_info->flags.cycles == 0)
+ return repsep_snprintf(bf, size, "%-*s", width, "-");
+ return repsep_snprintf(bf, size, "%-*hd", width,
+ he->branch_info->flags.cycles);
+}
+
+struct sort_entry sort_cycles = {
+ .se_header = "Basic Block Cycles",
+ .se_cmp = sort__cycles_cmp,
+ .se_snprintf = hist_entry__cycles_snprintf,
+ .se_width_idx = HISTC_CYCLES,
+};
+
/* --sort daddr_sym */
static int64_t
sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
@@ -1175,6 +1249,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_PARENT, "parent", sort_parent),
DIM(SORT_CPU, "cpu", sort_cpu),
DIM(SORT_SRCLINE, "srcline", sort_srcline),
+ DIM(SORT_SRCFILE, "srcfile", sort_srcfile),
DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
DIM(SORT_TRANSACTION, "transaction", sort_transaction),
@@ -1192,6 +1267,7 @@ static struct sort_dimension bstack_sort_dimensions[] = {
DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
DIM(SORT_IN_TX, "in_tx", sort_in_tx),
DIM(SORT_ABORT, "abort", sort_abort),
+ DIM(SORT_CYCLES, "cycles", sort_cycles),
};
#undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 846036a..3c2a399 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -58,15 +58,16 @@ struct he_stat {
struct hist_entry_diff {
bool computed;
+ union {
+ /* PERF_HPP__DELTA */
+ double period_ratio_delta;
- /* PERF_HPP__DELTA */
- double period_ratio_delta;
-
- /* PERF_HPP__RATIO */
- double period_ratio;
+ /* PERF_HPP__RATIO */
+ double period_ratio;
- /* HISTC_WEIGHTED_DIFF */
- s64 wdiff;
+ /* HISTC_WEIGHTED_DIFF */
+ s64 wdiff;
+ };
};
/**
@@ -92,21 +93,29 @@ struct hist_entry {
s32 cpu;
u8 cpumode;
- struct hist_entry_diff diff;
-
/* We are added by hists__add_dummy_entry. */
bool dummy;
- /* XXX These two should move to some tree widget lib */
- u16 row_offset;
- u16 nr_rows;
-
- bool init_have_children;
char level;
u8 filtered;
+ union {
+ /*
+ * Since perf diff only supports the stdio output, TUI
+ * fields are only accessed from perf report (or perf
+ * top). So make it an union to reduce memory usage.
+ */
+ struct hist_entry_diff diff;
+ struct /* for TUI */ {
+ u16 row_offset;
+ u16 nr_rows;
+ bool init_have_children;
+ bool unfolded;
+ bool has_children;
+ };
+ };
char *srcline;
+ char *srcfile;
struct symbol *parent;
- unsigned long position;
struct rb_root sorted_chain;
struct branch_info *branch_info;
struct hists *hists;
@@ -164,6 +173,7 @@ enum sort_type {
SORT_PARENT,
SORT_CPU,
SORT_SRCLINE,
+ SORT_SRCFILE,
SORT_LOCAL_WEIGHT,
SORT_GLOBAL_WEIGHT,
SORT_TRANSACTION,
@@ -177,6 +187,7 @@ enum sort_type {
SORT_MISPREDICT,
SORT_ABORT,
SORT_IN_TX,
+ SORT_CYCLES,
/* memory mode specific sort keys */
__SORT_MEMORY_MODE,
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index c93fb0c..fc08248 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -10,6 +10,8 @@
#include "symbol.h"
+bool srcline_full_filename;
+
#ifdef HAVE_LIBBFD_SUPPORT
/*
@@ -277,7 +279,9 @@ char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
if (!addr2line(dso_name, addr, &file, &line, dso))
goto out;
- if (asprintf(&srcline, "%s:%u", basename(file), line) < 0) {
+ if (asprintf(&srcline, "%s:%u",
+ srcline_full_filename ? file : basename(file),
+ line) < 0) {
free(file);
goto out;
}
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
new file mode 100644
index 0000000..2a5d8d7
--- /dev/null
+++ b/tools/perf/util/stat-shadow.c
@@ -0,0 +1,432 @@
+#include <stdio.h>
+#include "evsel.h"
+#include "stat.h"
+#include "color.h"
+
+enum {
+ CTX_BIT_USER = 1 << 0,
+ CTX_BIT_KERNEL = 1 << 1,
+ CTX_BIT_HV = 1 << 2,
+ CTX_BIT_HOST = 1 << 3,
+ CTX_BIT_IDLE = 1 << 4,
+ CTX_BIT_MAX = 1 << 5,
+};
+
+#define NUM_CTX CTX_BIT_MAX
+
+static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+
+struct stats walltime_nsecs_stats;
+
+static int evsel_context(struct perf_evsel *evsel)
+{
+ int ctx = 0;
+
+ if (evsel->attr.exclude_kernel)
+ ctx |= CTX_BIT_KERNEL;
+ if (evsel->attr.exclude_user)
+ ctx |= CTX_BIT_USER;
+ if (evsel->attr.exclude_hv)
+ ctx |= CTX_BIT_HV;
+ if (evsel->attr.exclude_host)
+ ctx |= CTX_BIT_HOST;
+ if (evsel->attr.exclude_idle)
+ ctx |= CTX_BIT_IDLE;
+
+ return ctx;
+}
+
+void perf_stat__reset_shadow_stats(void)
+{
+ memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
+ memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
+ memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
+ memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
+ memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
+ memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
+ memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
+ memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
+ memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
+ memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
+ memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
+ memset(runtime_cycles_in_tx_stats, 0,
+ sizeof(runtime_cycles_in_tx_stats));
+ memset(runtime_transaction_stats, 0,
+ sizeof(runtime_transaction_stats));
+ memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
+ memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+}
+
+/*
+ * Update various tracking values we maintain to print
+ * more semantic information such as miss/hit ratios,
+ * instruction rates, etc:
+ */
+void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
+ int cpu)
+{
+ int ctx = evsel_context(counter);
+
+ if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
+ update_stats(&runtime_nsecs_stats[cpu], count[0]);
+ else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
+ update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
+ update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, TRANSACTION_START))
+ update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, ELISION_START))
+ update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
+ else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
+ update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
+ else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
+ update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
+ else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+ update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
+ else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
+ update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
+ else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
+ update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
+ else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
+ update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
+ else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
+ update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
+ else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
+ update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
+ else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
+ update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
+}
+
+/* used for get_ratio_color() */
+enum grc_type {
+ GRC_STALLED_CYCLES_FE,
+ GRC_STALLED_CYCLES_BE,
+ GRC_CACHE_MISSES,
+ GRC_MAX_NR
+};
+
+static const char *get_ratio_color(enum grc_type type, double ratio)
+{
+ static const double grc_table[GRC_MAX_NR][3] = {
+ [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
+ [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
+ [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
+ };
+ const char *color = PERF_COLOR_NORMAL;
+
+ if (ratio > grc_table[type][0])
+ color = PERF_COLOR_RED;
+ else if (ratio > grc_table[type][1])
+ color = PERF_COLOR_MAGENTA;
+ else if (ratio > grc_table[type][2])
+ color = PERF_COLOR_YELLOW;
+
+ return color;
+}
+
+static void print_stalled_cycles_frontend(FILE *out, int cpu,
+ struct perf_evsel *evsel
+ __maybe_unused, double avg)
+{
+ double total, ratio = 0.0;
+ const char *color;
+ int ctx = evsel_context(evsel);
+
+ total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
+
+ fprintf(out, " # ");
+ color_fprintf(out, color, "%6.2f%%", ratio);
+ fprintf(out, " frontend cycles idle ");
+}
+
+static void print_stalled_cycles_backend(FILE *out, int cpu,
+ struct perf_evsel *evsel
+ __maybe_unused, double avg)
+{
+ double total, ratio = 0.0;
+ const char *color;
+ int ctx = evsel_context(evsel);
+
+ total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
+
+ fprintf(out, " # ");
+ color_fprintf(out, color, "%6.2f%%", ratio);
+ fprintf(out, " backend cycles idle ");
+}
+
+static void print_branch_misses(FILE *out, int cpu,
+ struct perf_evsel *evsel __maybe_unused,
+ double avg)
+{
+ double total, ratio = 0.0;
+ const char *color;
+ int ctx = evsel_context(evsel);
+
+ total = avg_stats(&runtime_branches_stats[ctx][cpu]);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+ fprintf(out, " # ");
+ color_fprintf(out, color, "%6.2f%%", ratio);
+ fprintf(out, " of all branches ");
+}
+
+static void print_l1_dcache_misses(FILE *out, int cpu,
+ struct perf_evsel *evsel __maybe_unused,
+ double avg)
+{
+ double total, ratio = 0.0;
+ const char *color;
+ int ctx = evsel_context(evsel);
+
+ total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+ fprintf(out, " # ");
+ color_fprintf(out, color, "%6.2f%%", ratio);
+ fprintf(out, " of all L1-dcache hits ");
+}
+
+static void print_l1_icache_misses(FILE *out, int cpu,
+ struct perf_evsel *evsel __maybe_unused,
+ double avg)
+{
+ double total, ratio = 0.0;
+ const char *color;
+ int ctx = evsel_context(evsel);
+
+ total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+ fprintf(out, " # ");
+ color_fprintf(out, color, "%6.2f%%", ratio);
+ fprintf(out, " of all L1-icache hits ");
+}
+
+static void print_dtlb_cache_misses(FILE *out, int cpu,
+ struct perf_evsel *evsel __maybe_unused,
+ double avg)
+{
+ double total, ratio = 0.0;
+ const char *color;
+ int ctx = evsel_context(evsel);
+
+ total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+ fprintf(out, " # ");
+ color_fprintf(out, color, "%6.2f%%", ratio);
+ fprintf(out, " of all dTLB cache hits ");
+}
+
+static void print_itlb_cache_misses(FILE *out, int cpu,
+ struct perf_evsel *evsel __maybe_unused,
+ double avg)
+{
+ double total, ratio = 0.0;
+ const char *color;
+ int ctx = evsel_context(evsel);
+
+ total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+ fprintf(out, " # ");
+ color_fprintf(out, color, "%6.2f%%", ratio);
+ fprintf(out, " of all iTLB cache hits ");
+}
+
+static void print_ll_cache_misses(FILE *out, int cpu,
+ struct perf_evsel *evsel __maybe_unused,
+ double avg)
+{
+ double total, ratio = 0.0;
+ const char *color;
+ int ctx = evsel_context(evsel);
+
+ total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+ fprintf(out, " # ");
+ color_fprintf(out, color, "%6.2f%%", ratio);
+ fprintf(out, " of all LL-cache hits ");
+}
+
+void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
+ double avg, int cpu, enum aggr_mode aggr)
+{
+ double total, ratio = 0.0, total2;
+ int ctx = evsel_context(evsel);
+
+ if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
+ total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+ if (total) {
+ ratio = avg / total;
+ fprintf(out, " # %5.2f insns per cycle ", ratio);
+ } else {
+ fprintf(out, " ");
+ }
+ total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
+ total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
+
+ if (total && avg) {
+ ratio = total / avg;
+ fprintf(out, "\n");
+ if (aggr == AGGR_NONE)
+ fprintf(out, " ");
+ fprintf(out, " # %5.2f stalled cycles per insn", ratio);
+ }
+
+ } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
+ runtime_branches_stats[ctx][cpu].n != 0) {
+ print_branch_misses(out, cpu, evsel, avg);
+ } else if (
+ evsel->attr.type == PERF_TYPE_HW_CACHE &&
+ evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+ runtime_l1_dcache_stats[ctx][cpu].n != 0) {
+ print_l1_dcache_misses(out, cpu, evsel, avg);
+ } else if (
+ evsel->attr.type == PERF_TYPE_HW_CACHE &&
+ evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+ runtime_l1_icache_stats[ctx][cpu].n != 0) {
+ print_l1_icache_misses(out, cpu, evsel, avg);
+ } else if (
+ evsel->attr.type == PERF_TYPE_HW_CACHE &&
+ evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+ runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
+ print_dtlb_cache_misses(out, cpu, evsel, avg);
+ } else if (
+ evsel->attr.type == PERF_TYPE_HW_CACHE &&
+ evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+ runtime_itlb_cache_stats[ctx][cpu].n != 0) {
+ print_itlb_cache_misses(out, cpu, evsel, avg);
+ } else if (
+ evsel->attr.type == PERF_TYPE_HW_CACHE &&
+ evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+ runtime_ll_cache_stats[ctx][cpu].n != 0) {
+ print_ll_cache_misses(out, cpu, evsel, avg);
+ } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
+ runtime_cacherefs_stats[ctx][cpu].n != 0) {
+ total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
+
+ if (total)
+ ratio = avg * 100 / total;
+
+ fprintf(out, " # %8.3f %% of all cache refs ", ratio);
+
+ } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
+ print_stalled_cycles_frontend(out, cpu, evsel, avg);
+ } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
+ print_stalled_cycles_backend(out, cpu, evsel, avg);
+ } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
+ total = avg_stats(&runtime_nsecs_stats[cpu]);
+
+ if (total) {
+ ratio = avg / total;
+ fprintf(out, " # %8.3f GHz ", ratio);
+ } else {
+ fprintf(out, " ");
+ }
+ } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
+ total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+ if (total)
+ fprintf(out,
+ " # %5.2f%% transactional cycles ",
+ 100.0 * (avg / total));
+ } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
+ total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+ total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
+ if (total2 < avg)
+ total2 = avg;
+ if (total)
+ fprintf(out,
+ " # %5.2f%% aborted cycles ",
+ 100.0 * ((total2-avg) / total));
+ } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
+ runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+ total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
+
+ if (avg)
+ ratio = total / avg;
+
+ fprintf(out, " # %8.0f cycles / transaction ", ratio);
+ } else if (perf_stat_evsel__is(evsel, ELISION_START) &&
+ runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+ total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
+
+ if (avg)
+ ratio = total / avg;
+
+ fprintf(out, " # %8.0f cycles / elision ", ratio);
+ } else if (runtime_nsecs_stats[cpu].n != 0) {
+ char unit = 'M';
+
+ total = avg_stats(&runtime_nsecs_stats[cpu]);
+
+ if (total)
+ ratio = 1000.0 * avg / total;
+ if (ratio < 0.001) {
+ ratio *= 1000;
+ unit = 'K';
+ }
+
+ fprintf(out, " # %8.3f %c/sec ", ratio, unit);
+ } else {
+ fprintf(out, " ");
+ }
+}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 6506b3d..415c359 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -1,6 +1,8 @@
#include <math.h>
-
#include "stat.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
void update_stats(struct stats *stats, u64 val)
{
@@ -61,3 +63,268 @@ double rel_stddev_stats(double stddev, double avg)
return pct;
}
+
+bool __perf_evsel_stat__is(struct perf_evsel *evsel,
+ enum perf_stat_evsel_id id)
+{
+ struct perf_stat *ps = evsel->priv;
+
+ return ps->id == id;
+}
+
+#define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
+static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
+ ID(NONE, x),
+ ID(CYCLES_IN_TX, cpu/cycles-t/),
+ ID(TRANSACTION_START, cpu/tx-start/),
+ ID(ELISION_START, cpu/el-start/),
+ ID(CYCLES_IN_TX_CP, cpu/cycles-ct/),
+};
+#undef ID
+
+void perf_stat_evsel_id_init(struct perf_evsel *evsel)
+{
+ struct perf_stat *ps = evsel->priv;
+ int i;
+
+ /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
+
+ for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
+ if (!strcmp(perf_evsel__name(evsel), id_str[i])) {
+ ps->id = i;
+ break;
+ }
+ }
+}
+
+void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
+{
+ int i;
+ struct perf_stat *ps = evsel->priv;
+
+ for (i = 0; i < 3; i++)
+ init_stats(&ps->res_stats[i]);
+
+ perf_stat_evsel_id_init(evsel);
+}
+
+int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
+{
+ evsel->priv = zalloc(sizeof(struct perf_stat));
+ if (evsel->priv == NULL)
+ return -ENOMEM;
+ perf_evsel__reset_stat_priv(evsel);
+ return 0;
+}
+
+void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
+{
+ zfree(&evsel->priv);
+}
+
+int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
+ int ncpus, int nthreads)
+{
+ struct perf_counts *counts;
+
+ counts = perf_counts__new(ncpus, nthreads);
+ if (counts)
+ evsel->prev_raw_counts = counts;
+
+ return counts ? 0 : -ENOMEM;
+}
+
+void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
+{
+ perf_counts__delete(evsel->prev_raw_counts);
+ evsel->prev_raw_counts = NULL;
+}
+
+int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw)
+{
+ int ncpus = perf_evsel__nr_cpus(evsel);
+ int nthreads = thread_map__nr(evsel->threads);
+
+ if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
+ perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
+ (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
+ return -ENOMEM;
+
+ return 0;
+}
+
+int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each(evlist, evsel) {
+ if (perf_evsel__alloc_stats(evsel, alloc_raw))
+ goto out_free;
+ }
+
+ return 0;
+
+out_free:
+ perf_evlist__free_stats(evlist);
+ return -1;
+}
+
+void perf_evlist__free_stats(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each(evlist, evsel) {
+ perf_evsel__free_stat_priv(evsel);
+ perf_evsel__free_counts(evsel);
+ perf_evsel__free_prev_raw_counts(evsel);
+ }
+}
+
+void perf_evlist__reset_stats(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each(evlist, evsel) {
+ perf_evsel__reset_stat_priv(evsel);
+ perf_evsel__reset_counts(evsel);
+ }
+}
+
+static void zero_per_pkg(struct perf_evsel *counter)
+{
+ if (counter->per_pkg_mask)
+ memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
+}
+
+static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
+{
+ unsigned long *mask = counter->per_pkg_mask;
+ struct cpu_map *cpus = perf_evsel__cpus(counter);
+ int s;
+
+ *skip = false;
+
+ if (!counter->per_pkg)
+ return 0;
+
+ if (cpu_map__empty(cpus))
+ return 0;
+
+ if (!mask) {
+ mask = zalloc(MAX_NR_CPUS);
+ if (!mask)
+ return -ENOMEM;
+
+ counter->per_pkg_mask = mask;
+ }
+
+ s = cpu_map__get_socket(cpus, cpu);
+ if (s < 0)
+ return -1;
+
+ *skip = test_and_set_bit(s, mask) == 1;
+ return 0;
+}
+
+static int
+process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel,
+ int cpu, int thread,
+ struct perf_counts_values *count)
+{
+ struct perf_counts_values *aggr = &evsel->counts->aggr;
+ static struct perf_counts_values zero;
+ bool skip = false;
+
+ if (check_per_pkg(evsel, cpu, &skip)) {
+ pr_err("failed to read per-pkg counter\n");
+ return -1;
+ }
+
+ if (skip)
+ count = &zero;
+
+ switch (config->aggr_mode) {
+ case AGGR_THREAD:
+ case AGGR_CORE:
+ case AGGR_SOCKET:
+ case AGGR_NONE:
+ if (!evsel->snapshot)
+ perf_evsel__compute_deltas(evsel, cpu, thread, count);
+ perf_counts_values__scale(count, config->scale, NULL);
+ if (config->aggr_mode == AGGR_NONE)
+ perf_stat__update_shadow_stats(evsel, count->values, cpu);
+ break;
+ case AGGR_GLOBAL:
+ aggr->val += count->val;
+ if (config->scale) {
+ aggr->ena += count->ena;
+ aggr->run += count->run;
+ }
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int process_counter_maps(struct perf_stat_config *config,
+ struct perf_evsel *counter)
+{
+ int nthreads = thread_map__nr(counter->threads);
+ int ncpus = perf_evsel__nr_cpus(counter);
+ int cpu, thread;
+
+ if (counter->system_wide)
+ nthreads = 1;
+
+ for (thread = 0; thread < nthreads; thread++) {
+ for (cpu = 0; cpu < ncpus; cpu++) {
+ if (process_counter_values(config, counter, cpu, thread,
+ perf_counts(counter->counts, cpu, thread)))
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int perf_stat_process_counter(struct perf_stat_config *config,
+ struct perf_evsel *counter)
+{
+ struct perf_counts_values *aggr = &counter->counts->aggr;
+ struct perf_stat *ps = counter->priv;
+ u64 *count = counter->counts->aggr.values;
+ int i, ret;
+
+ aggr->val = aggr->ena = aggr->run = 0;
+ init_stats(ps->res_stats);
+
+ if (counter->per_pkg)
+ zero_per_pkg(counter);
+
+ ret = process_counter_maps(config, counter);
+ if (ret)
+ return ret;
+
+ if (config->aggr_mode != AGGR_GLOBAL)
+ return 0;
+
+ if (!counter->snapshot)
+ perf_evsel__compute_deltas(counter, -1, -1, aggr);
+ perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
+
+ for (i = 0; i < 3; i++)
+ update_stats(&ps->res_stats[i], count[i]);
+
+ if (verbose) {
+ fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+ perf_evsel__name(counter), count[0], count[1], count[2]);
+ }
+
+ /*
+ * Save the full runtime - to allow normalization during printout:
+ */
+ perf_stat__update_shadow_stats(counter, count, 0);
+
+ return 0;
+}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 5667fc3..62448c8 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -2,6 +2,8 @@
#define __PERF_STATS_H
#include <linux/types.h>
+#include <stdio.h>
+#include "xyarray.h"
struct stats
{
@@ -9,6 +11,35 @@ struct stats
u64 max, min;
};
+enum perf_stat_evsel_id {
+ PERF_STAT_EVSEL_ID__NONE = 0,
+ PERF_STAT_EVSEL_ID__CYCLES_IN_TX,
+ PERF_STAT_EVSEL_ID__TRANSACTION_START,
+ PERF_STAT_EVSEL_ID__ELISION_START,
+ PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
+ PERF_STAT_EVSEL_ID__MAX,
+};
+
+struct perf_stat {
+ struct stats res_stats[3];
+ enum perf_stat_evsel_id id;
+};
+
+enum aggr_mode {
+ AGGR_NONE,
+ AGGR_GLOBAL,
+ AGGR_SOCKET,
+ AGGR_CORE,
+ AGGR_THREAD,
+};
+
+struct perf_stat_config {
+ enum aggr_mode aggr_mode;
+ bool scale;
+ FILE *output;
+ unsigned int interval;
+};
+
void update_stats(struct stats *stats, u64 val);
double avg_stats(struct stats *stats);
double stddev_stats(struct stats *stats);
@@ -22,4 +53,40 @@ static inline void init_stats(struct stats *stats)
stats->min = (u64) -1;
stats->max = 0;
}
+
+struct perf_evsel;
+struct perf_evlist;
+
+bool __perf_evsel_stat__is(struct perf_evsel *evsel,
+ enum perf_stat_evsel_id id);
+
+#define perf_stat_evsel__is(evsel, id) \
+ __perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id)
+
+void perf_stat_evsel_id_init(struct perf_evsel *evsel);
+
+extern struct stats walltime_nsecs_stats;
+
+void perf_stat__reset_shadow_stats(void);
+void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
+ int cpu);
+void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
+ double avg, int cpu, enum aggr_mode aggr);
+
+void perf_evsel__reset_stat_priv(struct perf_evsel *evsel);
+int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel);
+void perf_evsel__free_stat_priv(struct perf_evsel *evsel);
+
+int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
+ int ncpus, int nthreads);
+void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel);
+
+int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw);
+
+int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
+void perf_evlist__free_stats(struct perf_evlist *evlist);
+void perf_evlist__reset_stats(struct perf_evlist *evlist);
+
+int perf_stat_process_counter(struct perf_stat_config *config,
+ struct perf_evsel *counter);
#endif
diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c
index 79a757a..bcae659 100644
--- a/tools/perf/util/strfilter.c
+++ b/tools/perf/util/strfilter.c
@@ -170,6 +170,46 @@ struct strfilter *strfilter__new(const char *rules, const char **err)
return filter;
}
+static int strfilter__append(struct strfilter *filter, bool _or,
+ const char *rules, const char **err)
+{
+ struct strfilter_node *right, *root;
+ const char *ep = NULL;
+
+ if (!filter || !rules)
+ return -EINVAL;
+
+ right = strfilter_node__new(rules, &ep);
+ if (!right || *ep != '\0') {
+ if (err)
+ *err = ep;
+ goto error;
+ }
+ root = strfilter_node__alloc(_or ? OP_or : OP_and, filter->root, right);
+ if (!root) {
+ ep = NULL;
+ goto error;
+ }
+
+ filter->root = root;
+ return 0;
+
+error:
+ strfilter_node__delete(right);
+ return ep ? -EINVAL : -ENOMEM;
+}
+
+int strfilter__or(struct strfilter *filter, const char *rules, const char **err)
+{
+ return strfilter__append(filter, true, rules, err);
+}
+
+int strfilter__and(struct strfilter *filter, const char *rules,
+ const char **err)
+{
+ return strfilter__append(filter, false, rules, err);
+}
+
static bool strfilter_node__compare(struct strfilter_node *node,
const char *str)
{
@@ -197,3 +237,70 @@ bool strfilter__compare(struct strfilter *filter, const char *str)
return false;
return strfilter_node__compare(filter->root, str);
}
+
+static int strfilter_node__sprint(struct strfilter_node *node, char *buf);
+
+/* sprint node in parenthesis if needed */
+static int strfilter_node__sprint_pt(struct strfilter_node *node, char *buf)
+{
+ int len;
+ int pt = node->r ? 2 : 0; /* don't need to check node->l */
+
+ if (buf && pt)
+ *buf++ = '(';
+ len = strfilter_node__sprint(node, buf);
+ if (len < 0)
+ return len;
+ if (buf && pt)
+ *(buf + len) = ')';
+ return len + pt;
+}
+
+static int strfilter_node__sprint(struct strfilter_node *node, char *buf)
+{
+ int len = 0, rlen;
+
+ if (!node || !node->p)
+ return -EINVAL;
+
+ switch (*node->p) {
+ case '|':
+ case '&':
+ len = strfilter_node__sprint_pt(node->l, buf);
+ if (len < 0)
+ return len;
+ case '!':
+ if (buf) {
+ *(buf + len++) = *node->p;
+ buf += len;
+ } else
+ len++;
+ rlen = strfilter_node__sprint_pt(node->r, buf);
+ if (rlen < 0)
+ return rlen;
+ len += rlen;
+ break;
+ default:
+ len = strlen(node->p);
+ if (buf)
+ strcpy(buf, node->p);
+ }
+
+ return len;
+}
+
+char *strfilter__string(struct strfilter *filter)
+{
+ int len;
+ char *ret = NULL;
+
+ len = strfilter_node__sprint(filter->root, NULL);
+ if (len < 0)
+ return NULL;
+
+ ret = malloc(len + 1);
+ if (ret)
+ strfilter_node__sprint(filter->root, ret);
+
+ return ret;
+}
diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h
index fe611f3..cff5eda 100644
--- a/tools/perf/util/strfilter.h
+++ b/tools/perf/util/strfilter.h
@@ -29,6 +29,32 @@ struct strfilter {
struct strfilter *strfilter__new(const char *rules, const char **err);
/**
+ * strfilter__or - Append an additional rule by logical-or
+ * @filter: Original string filter
+ * @rules: Filter rule to be appended at left of the root of
+ * @filter by using logical-or.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and join it to the @filter by using logical-or.
+ * Return 0 if success, or return the error code.
+ */
+int strfilter__or(struct strfilter *filter,
+ const char *rules, const char **err);
+
+/**
+ * strfilter__add - Append an additional rule by logical-and
+ * @filter: Original string filter
+ * @rules: Filter rule to be appended at left of the root of
+ * @filter by using logical-and.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and join it to the @filter by using logical-and.
+ * Return 0 if success, or return the error code.
+ */
+int strfilter__and(struct strfilter *filter,
+ const char *rules, const char **err);
+
+/**
* strfilter__compare - compare given string and a string filter
* @filter: String filter
* @str: target string
@@ -45,4 +71,13 @@ bool strfilter__compare(struct strfilter *filter, const char *str);
*/
void strfilter__delete(struct strfilter *filter);
+/**
+ * strfilter__string - Reconstruct a rule string from filter
+ * @filter: String filter to reconstruct
+ *
+ * Reconstruct a rule string from @filter. This will be good for
+ * debug messages. Note that returning string must be freed afterward.
+ */
+char *strfilter__string(struct strfilter *filter);
+
#endif
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 6afd610..fc8781d 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -357,3 +357,42 @@ void *memdup(const void *src, size_t len)
return p;
}
+
+char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints)
+{
+ /*
+ * FIXME: replace this with an expression using log10() when we
+ * find a suitable implementation, maybe the one in the dvb drivers...
+ *
+ * "%s == %d || " = log10(MAXINT) * 2 + 8 chars for the operators
+ */
+ size_t size = nints * 28 + 1; /* \0 */
+ size_t i, printed = 0;
+ char *expr = malloc(size);
+
+ if (expr) {
+ const char *or_and = "||", *eq_neq = "==";
+ char *e = expr;
+
+ if (!in) {
+ or_and = "&&";
+ eq_neq = "!=";
+ }
+
+ for (i = 0; i < nints; ++i) {
+ if (printed == size)
+ goto out_err_overflow;
+
+ if (i > 0)
+ printed += snprintf(e + printed, size - printed, " %s ", or_and);
+ printed += scnprintf(e + printed, size - printed,
+ "%s %s %d", var, eq_neq, ints[i]);
+ }
+ }
+
+ return expr;
+
+out_err_overflow:
+ free(expr);
+ return NULL;
+}
diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c
index 71f9d10..bdf98f6 100644
--- a/tools/perf/util/strlist.c
+++ b/tools/perf/util/strlist.c
@@ -72,7 +72,7 @@ int strlist__load(struct strlist *slist, const char *filename)
FILE *fp = fopen(filename, "r");
if (fp == NULL)
- return errno;
+ return -errno;
while (fgets(entry, sizeof(entry), fp) != NULL) {
const size_t len = strlen(entry);
@@ -108,43 +108,70 @@ struct str_node *strlist__find(struct strlist *slist, const char *entry)
return snode;
}
-static int strlist__parse_list_entry(struct strlist *slist, const char *s)
+static int strlist__parse_list_entry(struct strlist *slist, const char *s,
+ const char *subst_dir)
{
+ int err;
+ char *subst = NULL;
+
if (strncmp(s, "file://", 7) == 0)
return strlist__load(slist, s + 7);
- return strlist__add(slist, s);
+ if (subst_dir) {
+ err = -ENOMEM;
+ if (asprintf(&subst, "%s/%s", subst_dir, s) < 0)
+ goto out;
+
+ if (access(subst, F_OK) == 0) {
+ err = strlist__load(slist, subst);
+ goto out;
+ }
+ }
+
+ err = strlist__add(slist, s);
+out:
+ free(subst);
+ return err;
}
-int strlist__parse_list(struct strlist *slist, const char *s)
+static int strlist__parse_list(struct strlist *slist, const char *s, const char *subst_dir)
{
char *sep;
int err;
while ((sep = strchr(s, ',')) != NULL) {
*sep = '\0';
- err = strlist__parse_list_entry(slist, s);
+ err = strlist__parse_list_entry(slist, s, subst_dir);
*sep = ',';
if (err != 0)
return err;
s = sep + 1;
}
- return *s ? strlist__parse_list_entry(slist, s) : 0;
+ return *s ? strlist__parse_list_entry(slist, s, subst_dir) : 0;
}
-struct strlist *strlist__new(bool dupstr, const char *list)
+struct strlist *strlist__new(const char *list, const struct strlist_config *config)
{
struct strlist *slist = malloc(sizeof(*slist));
if (slist != NULL) {
+ bool dupstr = true;
+ const char *dirname = NULL;
+
+ if (config) {
+ dupstr = !config->dont_dupstr;
+ dirname = config->dirname;
+ }
+
rblist__init(&slist->rblist);
slist->rblist.node_cmp = strlist__node_cmp;
slist->rblist.node_new = strlist__node_new;
slist->rblist.node_delete = strlist__node_delete;
slist->dupstr = dupstr;
- if (list && strlist__parse_list(slist, list) != 0)
+
+ if (list && strlist__parse_list(slist, list, dirname) != 0)
goto out_error;
}
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
index 5c7f870..297565a 100644
--- a/tools/perf/util/strlist.h
+++ b/tools/perf/util/strlist.h
@@ -16,7 +16,12 @@ struct strlist {
bool dupstr;
};
-struct strlist *strlist__new(bool dupstr, const char *slist);
+struct strlist_config {
+ bool dont_dupstr;
+ const char *dirname;
+};
+
+struct strlist *strlist__new(const char *slist, const struct strlist_config *config);
void strlist__delete(struct strlist *slist);
void strlist__remove(struct strlist *slist, struct str_node *sn);
@@ -74,6 +79,4 @@ static inline struct str_node *strlist__next(struct str_node *sn)
#define strlist__for_each_safe(pos, n, slist) \
for (pos = strlist__first(slist), n = strlist__next(pos); pos;\
pos = n, n = strlist__next(n))
-
-int strlist__parse_list(struct strlist *slist, const char *s);
#endif /* __PERF_STRLIST_H */
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 283d3e7..eec6c11 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -748,7 +748,7 @@ static int str_to_bitmap(char *s, cpumask_t *b)
set_bit(c, cpumask_bits(b));
}
- cpu_map__delete(m);
+ cpu_map__put(m);
return ret;
}
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index a7ab606..53bb5f5 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -38,7 +38,7 @@ static inline char *bfd_demangle(void __maybe_unused *v,
#endif
#ifndef HAVE_ELF_GETPHDRNUM_SUPPORT
-static int elf_getphdrnum(Elf *elf, size_t *dst)
+int elf_getphdrnum(Elf *elf, size_t *dst)
{
GElf_Ehdr gehdr;
GElf_Ehdr *ehdr;
@@ -630,6 +630,11 @@ void symsrc__destroy(struct symsrc *ss)
close(ss->fd);
}
+bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+{
+ return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL;
+}
+
int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
enum dso_binary_type type)
{
@@ -678,6 +683,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
}
if (!dso__build_id_equal(dso, build_id)) {
+ pr_debug("%s: build id mismatch for %s.\n", __func__, name);
dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID;
goto out_elf_end;
}
@@ -711,8 +717,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
".gnu.prelink_undo",
NULL) != NULL);
} else {
- ss->adjust_symbols = ehdr.e_type == ET_EXEC ||
- ehdr.e_type == ET_REL;
+ ss->adjust_symbols = elf__needs_adjust_symbols(ehdr);
}
ss->name = strdup(name);
@@ -771,6 +776,8 @@ static bool want_demangle(bool is_kernel_sym)
return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
}
+void __weak arch__elf_sym_adjust(GElf_Sym *sym __maybe_unused) { }
+
int dso__load_sym(struct dso *dso, struct map *map,
struct symsrc *syms_ss, struct symsrc *runtime_ss,
symbol_filter_t filter, int kmodule)
@@ -868,6 +875,17 @@ int dso__load_sym(struct dso *dso, struct map *map,
}
}
+ /*
+ * Handle any relocation of vdso necessary because older kernels
+ * attempted to prelink vdso to its virtual address.
+ */
+ if (dso__is_vdso(dso)) {
+ GElf_Shdr tshdr;
+
+ if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL))
+ map->reloc = map->start - tshdr.sh_addr + tshdr.sh_offset;
+ }
+
dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
/*
* Initial kernel and module mappings do not map to the dso. For
@@ -935,6 +953,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
(sym.st_value & 1))
--sym.st_value;
+ arch__elf_sym_adjust(&sym);
+
if (dso->kernel || kmodule) {
char dso_name[PATH_MAX];
@@ -963,8 +983,10 @@ int dso__load_sym(struct dso *dso, struct map *map,
map->unmap_ip = map__unmap_ip;
/* Ensure maps are correctly ordered */
if (kmaps) {
+ map__get(map);
map_groups__remove(kmaps, map);
map_groups__insert(kmaps, map);
+ map__put(map);
}
}
@@ -1005,7 +1027,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
curr_map = map__new2(start, curr_dso,
map->type);
if (curr_map == NULL) {
- dso__delete(curr_dso);
+ dso__put(curr_dso);
goto out_elf_end;
}
if (adjust_kernel_syms) {
@@ -1020,11 +1042,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
}
curr_dso->symtab_type = dso->symtab_type;
map_groups__insert(kmaps, curr_map);
- /*
- * The new DSO should go to the kernel DSOS
- */
- dsos__add(&map->groups->machine->kernel_dsos,
- curr_dso);
+ dsos__add(&map->groups->machine->dsos, curr_dso);
dso__set_loaded(curr_dso, map->type);
} else
curr_dso = curr_map->dso;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 201f6c4c..1f97ffb 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -85,8 +85,17 @@ static int prefix_underscores_count(const char *str)
return tail - str;
}
-#define SYMBOL_A 0
-#define SYMBOL_B 1
+int __weak arch__choose_best_symbol(struct symbol *syma,
+ struct symbol *symb __maybe_unused)
+{
+ /* Avoid "SyS" kernel syscall aliases */
+ if (strlen(syma->name) >= 3 && !strncmp(syma->name, "SyS", 3))
+ return SYMBOL_B;
+ if (strlen(syma->name) >= 10 && !strncmp(syma->name, "compat_SyS", 10))
+ return SYMBOL_B;
+
+ return SYMBOL_A;
+}
static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
{
@@ -134,13 +143,7 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
else if (na < nb)
return SYMBOL_B;
- /* Avoid "SyS" kernel syscall aliases */
- if (na >= 3 && !strncmp(syma->name, "SyS", 3))
- return SYMBOL_B;
- if (na >= 10 && !strncmp(syma->name, "compat_SyS", 10))
- return SYMBOL_B;
-
- return SYMBOL_A;
+ return arch__choose_best_symbol(syma, symb);
}
void symbols__fixup_duplicate(struct rb_root *symbols)
@@ -199,18 +202,18 @@ void symbols__fixup_end(struct rb_root *symbols)
void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
{
- struct map *prev, *curr;
- struct rb_node *nd, *prevnd = rb_first(&mg->maps[type]);
+ struct maps *maps = &mg->maps[type];
+ struct map *next, *curr;
- if (prevnd == NULL)
- return;
+ pthread_rwlock_wrlock(&maps->lock);
- curr = rb_entry(prevnd, struct map, rb_node);
+ curr = maps__first(maps);
+ if (curr == NULL)
+ goto out_unlock;
- for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
- prev = curr;
- curr = rb_entry(nd, struct map, rb_node);
- prev->end = curr->start;
+ for (next = map__next(curr); next; next = map__next(curr)) {
+ curr->end = next->start;
+ curr = next;
}
/*
@@ -218,6 +221,9 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
* last map final address.
*/
curr->end = ~0ULL;
+
+out_unlock:
+ pthread_rwlock_unlock(&maps->lock);
}
struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
@@ -397,7 +403,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
const char *name)
{
struct rb_node *n;
- struct symbol_name_rb_node *s;
+ struct symbol_name_rb_node *s = NULL;
if (symbols == NULL)
return NULL;
@@ -408,7 +414,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
int cmp;
s = rb_entry(n, struct symbol_name_rb_node, rb_node);
- cmp = strcmp(name, s->sym.name);
+ cmp = arch__compare_symbol_names(name, s->sym.name);
if (cmp < 0)
n = n->rb_left;
@@ -426,7 +432,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
struct symbol_name_rb_node *tmp;
tmp = rb_entry(n, struct symbol_name_rb_node, rb_node);
- if (strcmp(tmp->sym.name, s->sym.name))
+ if (arch__compare_symbol_names(tmp->sym.name, s->sym.name))
break;
s = tmp;
@@ -435,10 +441,25 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
return &s->sym;
}
+void dso__reset_find_symbol_cache(struct dso *dso)
+{
+ enum map_type type;
+
+ for (type = MAP__FUNCTION; type <= MAP__VARIABLE; ++type) {
+ dso->last_find_result[type].addr = 0;
+ dso->last_find_result[type].symbol = NULL;
+ }
+}
+
struct symbol *dso__find_symbol(struct dso *dso,
enum map_type type, u64 addr)
{
- return symbols__find(&dso->symbols[type], addr);
+ if (dso->last_find_result[type].addr != addr) {
+ dso->last_find_result[type].addr = addr;
+ dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr);
+ }
+
+ return dso->last_find_result[type].symbol;
}
struct symbol *dso__first_symbol(struct dso *dso, enum map_type type)
@@ -653,14 +674,14 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map,
curr_map = map_groups__find(kmaps, map->type, pos->start);
if (!curr_map || (filter && filter(curr_map, pos))) {
- rb_erase(&pos->rb_node, root);
+ rb_erase_init(&pos->rb_node, root);
symbol__delete(pos);
} else {
pos->start -= curr_map->start - curr_map->pgoff;
if (pos->end)
pos->end -= curr_map->start - curr_map->pgoff;
if (curr_map != map) {
- rb_erase(&pos->rb_node, root);
+ rb_erase_init(&pos->rb_node, root);
symbols__insert(
&curr_map->dso->symbols[curr_map->type],
pos);
@@ -780,7 +801,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta,
curr_map = map__new2(pos->start, ndso, map->type);
if (curr_map == NULL) {
- dso__delete(ndso);
+ dso__put(ndso);
return -1;
}
@@ -1126,8 +1147,11 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
INIT_LIST_HEAD(&md.maps);
fd = open(kcore_filename, O_RDONLY);
- if (fd < 0)
+ if (fd < 0) {
+ pr_debug("Failed to open %s. Note /proc/kcore requires CAP_SYS_RAWIO capability to access.\n",
+ kcore_filename);
return -EINVAL;
+ }
/* Read new maps into temporary lists */
err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md,
@@ -1167,20 +1191,23 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
/* Add new maps */
while (!list_empty(&md.maps)) {
new_map = list_entry(md.maps.next, struct map, node);
- list_del(&new_map->node);
+ list_del_init(&new_map->node);
if (new_map == replacement_map) {
map->start = new_map->start;
map->end = new_map->end;
map->pgoff = new_map->pgoff;
map->map_ip = new_map->map_ip;
map->unmap_ip = new_map->unmap_ip;
- map__delete(new_map);
/* Ensure maps are correctly ordered */
+ map__get(map);
map_groups__remove(kmaps, map);
map_groups__insert(kmaps, map);
+ map__put(map);
} else {
map_groups__insert(kmaps, new_map);
}
+
+ map__put(new_map);
}
/*
@@ -1205,8 +1232,8 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
out_err:
while (!list_empty(&md.maps)) {
map = list_entry(md.maps.next, struct map, node);
- list_del(&map->node);
- map__delete(map);
+ list_del_init(&map->node);
+ map__put(map);
}
close(fd);
return -EINVAL;
@@ -1355,7 +1382,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
/*
* kernel modules know their symtab type - it's set when
- * creating a module dso in machine__new_module().
+ * creating a module dso in machine__findnew_module_map().
*/
return kmod && dso->symtab_type == type;
@@ -1380,12 +1407,22 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
struct symsrc *syms_ss = NULL, *runtime_ss = NULL;
bool kmod;
- dso__set_loaded(dso, map->type);
+ pthread_mutex_lock(&dso->lock);
+
+ /* check again under the dso->lock */
+ if (dso__loaded(dso, map->type)) {
+ ret = 1;
+ goto out;
+ }
+
+ if (dso->kernel) {
+ if (dso->kernel == DSO_TYPE_KERNEL)
+ ret = dso__load_kernel_sym(dso, map, filter);
+ else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ ret = dso__load_guest_kernel_sym(dso, map, filter);
- if (dso->kernel == DSO_TYPE_KERNEL)
- return dso__load_kernel_sym(dso, map, filter);
- else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
- return dso__load_guest_kernel_sym(dso, map, filter);
+ goto out;
+ }
if (map->groups && map->groups->machine)
machine = map->groups->machine;
@@ -1398,18 +1435,18 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
struct stat st;
if (lstat(dso->name, &st) < 0)
- return -1;
+ goto out;
if (st.st_uid && (st.st_uid != geteuid())) {
pr_warning("File %s not owned by current user or root, "
"ignoring it.\n", dso->name);
- return -1;
+ goto out;
}
ret = dso__load_perf_map(dso, map, filter);
dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT :
DSO_BINARY_TYPE__NOT_FOUND;
- return ret;
+ goto out;
}
if (machine)
@@ -1417,7 +1454,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
name = malloc(PATH_MAX);
if (!name)
- return -1;
+ goto out;
kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
@@ -1498,23 +1535,32 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
out_free:
free(name);
if (ret < 0 && strstr(dso->name, " (deleted)") != NULL)
- return 0;
+ ret = 0;
+out:
+ dso__set_loaded(dso, map->type);
+ pthread_mutex_unlock(&dso->lock);
+
return ret;
}
struct map *map_groups__find_by_name(struct map_groups *mg,
enum map_type type, const char *name)
{
- struct rb_node *nd;
+ struct maps *maps = &mg->maps[type];
+ struct map *map;
- for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) {
- struct map *map = rb_entry(nd, struct map, rb_node);
+ pthread_rwlock_rdlock(&maps->lock);
+ for (map = maps__first(maps); map; map = map__next(map)) {
if (map->dso && strcmp(map->dso->short_name, name) == 0)
- return map;
+ goto out_unlock;
}
- return NULL;
+ map = NULL;
+
+out_unlock:
+ pthread_rwlock_unlock(&maps->lock);
+ return map;
}
int dso__load_vmlinux(struct dso *dso, struct map *map,
@@ -1802,11 +1848,12 @@ static void vmlinux_path__exit(void)
{
while (--vmlinux_path__nr_entries >= 0)
zfree(&vmlinux_path[vmlinux_path__nr_entries]);
+ vmlinux_path__nr_entries = 0;
zfree(&vmlinux_path);
}
-static int vmlinux_path__init(struct perf_session_env *env)
+static int vmlinux_path__init(struct perf_env *env)
{
struct utsname uts;
char bf[PATH_MAX];
@@ -1874,11 +1921,13 @@ int setup_list(struct strlist **list, const char *list_str,
if (list_str == NULL)
return 0;
- *list = strlist__new(true, list_str);
+ *list = strlist__new(list_str, NULL);
if (!*list) {
pr_err("problems parsing %s list\n", list_name);
return -1;
}
+
+ symbol_conf.has_filter = true;
return 0;
}
@@ -1915,7 +1964,7 @@ static bool symbol__read_kptr_restrict(void)
return value;
}
-int symbol__init(struct perf_session_env *env)
+int symbol__init(struct perf_env *env)
{
const char *symfs;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 0956150..440ba8a 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -105,7 +105,9 @@ struct symbol_conf {
demangle_kernel,
filter_relative,
show_hist_headers,
- branch_callstack;
+ branch_callstack,
+ has_filter,
+ show_ref_callgraph;
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
@@ -158,8 +160,6 @@ struct ref_reloc_sym {
struct map_symbol {
struct map *map;
struct symbol *sym;
- bool unfolded;
- bool has_children;
};
struct addr_map_symbol {
@@ -252,8 +252,8 @@ int modules__parse(const char *filename, void *arg,
int filename__read_debuglink(const char *filename, char *debuglink,
size_t size);
-struct perf_session_env;
-int symbol__init(struct perf_session_env *env);
+struct perf_env;
+int symbol__init(struct perf_env *env);
void symbol__exit(void);
void symbol__elf_init(void);
struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name);
@@ -303,4 +303,14 @@ int setup_list(struct strlist **list, const char *list_str,
int setup_intlist(struct intlist **list, const char *list_str,
const char *list_name);
+#ifdef HAVE_LIBELF_SUPPORT
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
+void arch__elf_sym_adjust(GElf_Sym *sym);
+#endif
+
+#define SYMBOL_A 0
+#define SYMBOL_B 1
+
+int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb);
+
#endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 9ed59a4..679688e 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -219,7 +219,7 @@ static int thread_stack__call_return(struct thread *thread,
return crp->process(&cr, crp->data);
}
-static int thread_stack__flush(struct thread *thread, struct thread_stack *ts)
+static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
{
struct call_return_processor *crp = ts->crp;
int err;
@@ -242,6 +242,14 @@ static int thread_stack__flush(struct thread *thread, struct thread_stack *ts)
return 0;
}
+int thread_stack__flush(struct thread *thread)
+{
+ if (thread->ts)
+ return __thread_stack__flush(thread, thread->ts);
+
+ return 0;
+}
+
int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
u64 to_ip, u16 insn_len, u64 trace_nr)
{
@@ -264,7 +272,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
*/
if (trace_nr != thread->ts->trace_nr) {
if (thread->ts->trace_nr)
- thread_stack__flush(thread, thread->ts);
+ __thread_stack__flush(thread, thread->ts);
thread->ts->trace_nr = trace_nr;
}
@@ -297,7 +305,7 @@ void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
if (trace_nr != thread->ts->trace_nr) {
if (thread->ts->trace_nr)
- thread_stack__flush(thread, thread->ts);
+ __thread_stack__flush(thread, thread->ts);
thread->ts->trace_nr = trace_nr;
}
}
@@ -305,7 +313,7 @@ void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
void thread_stack__free(struct thread *thread)
{
if (thread->ts) {
- thread_stack__flush(thread, thread->ts);
+ __thread_stack__flush(thread, thread->ts);
zfree(&thread->ts->stack);
zfree(&thread->ts);
}
@@ -689,7 +697,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
/* Flush stack on exec */
if (ts->comm != comm && thread->pid_ == thread->tid) {
- err = thread_stack__flush(thread, ts);
+ err = __thread_stack__flush(thread, ts);
if (err)
return err;
ts->comm = comm;
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index b843bbe..e1528f1 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -96,6 +96,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
size_t sz, u64 ip);
+int thread_stack__flush(struct thread *thread);
void thread_stack__free(struct thread *thread);
struct call_return_processor *
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 1c8fbc9..0a9ae80 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -18,7 +18,7 @@ int thread__init_map_groups(struct thread *thread, struct machine *machine)
if (pid == thread->tid || pid == -1) {
thread->mg = map_groups__new(machine);
} else {
- leader = machine__findnew_thread(machine, pid, pid);
+ leader = __machine__findnew_thread(machine, pid, pid);
if (leader)
thread->mg = map_groups__get(leader->mg);
}
@@ -53,7 +53,8 @@ struct thread *thread__new(pid_t pid, pid_t tid)
goto err_thread;
list_add(&comm->list, &thread->comm_list);
-
+ atomic_set(&thread->refcnt, 0);
+ RB_CLEAR_NODE(&thread->rb_node);
}
return thread;
@@ -67,6 +68,8 @@ void thread__delete(struct thread *thread)
{
struct comm *comm, *tmp;
+ BUG_ON(!RB_EMPTY_NODE(&thread->rb_node));
+
thread_stack__free(thread);
if (thread->mg) {
@@ -84,13 +87,14 @@ void thread__delete(struct thread *thread)
struct thread *thread__get(struct thread *thread)
{
- ++thread->refcnt;
+ if (thread)
+ atomic_inc(&thread->refcnt);
return thread;
}
void thread__put(struct thread *thread)
{
- if (thread && --thread->refcnt == 0) {
+ if (thread && atomic_dec_and_test(&thread->refcnt)) {
list_del_init(&thread->node);
thread__delete(thread);
}
@@ -187,6 +191,12 @@ static int thread__clone_map_groups(struct thread *thread,
if (thread->pid_ == parent->pid_)
return 0;
+ if (thread->mg == parent->mg) {
+ pr_debug("broken map groups on thread %d/%d parent %d/%d\n",
+ thread->pid_, thread->tid, parent->pid_, parent->tid);
+ return 0;
+ }
+
/* But this one is new process, copy maps. */
for (i = 0; i < MAP__NR_TYPES; ++i)
if (map_groups__clone(thread->mg, parent->mg, i) < 0)
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 9b8a54d..a0ac031 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -1,6 +1,7 @@
#ifndef __PERF_THREAD_H
#define __PERF_THREAD_H
+#include <linux/atomic.h>
#include <linux/rbtree.h>
#include <linux/list.h>
#include <unistd.h>
@@ -21,12 +22,12 @@ struct thread {
pid_t tid;
pid_t ppid;
int cpu;
- int refcnt;
+ atomic_t refcnt;
char shortname[3];
bool comm_set;
+ int comm_len;
bool dead; /* if set thread has exited */
struct list_head comm_list;
- int comm_len;
u64 db_id;
void *priv;
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index f93b973..6ec3c5c 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -8,8 +8,11 @@
#include <unistd.h>
#include "strlist.h"
#include <string.h>
+#include <api/fs/fs.h>
+#include "asm/bug.h"
#include "thread_map.h"
#include "util.h"
+#include "debug.h"
/* Skip "." and ".." directories */
static int filter(const struct dirent *dir)
@@ -20,6 +23,30 @@ static int filter(const struct dirent *dir)
return 1;
}
+static void thread_map__reset(struct thread_map *map, int start, int nr)
+{
+ size_t size = (nr - start) * sizeof(map->map[0]);
+
+ memset(&map->map[start], 0, size);
+}
+
+static struct thread_map *thread_map__realloc(struct thread_map *map, int nr)
+{
+ size_t size = sizeof(*map) + sizeof(map->map[0]) * nr;
+ int start = map ? map->nr : 0;
+
+ map = realloc(map, size);
+ /*
+ * We only realloc to add more items, let's reset new items.
+ */
+ if (map)
+ thread_map__reset(map, start, nr);
+
+ return map;
+}
+
+#define thread_map__alloc(__nr) thread_map__realloc(NULL, __nr)
+
struct thread_map *thread_map__new_by_pid(pid_t pid)
{
struct thread_map *threads;
@@ -33,11 +60,12 @@ struct thread_map *thread_map__new_by_pid(pid_t pid)
if (items <= 0)
return NULL;
- threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
+ threads = thread_map__alloc(items);
if (threads != NULL) {
for (i = 0; i < items; i++)
- threads->map[i] = atoi(namelist[i]->d_name);
+ thread_map__set_pid(threads, i, atoi(namelist[i]->d_name));
threads->nr = items;
+ atomic_set(&threads->refcnt, 1);
}
for (i=0; i<items; i++)
@@ -49,11 +77,12 @@ struct thread_map *thread_map__new_by_pid(pid_t pid)
struct thread_map *thread_map__new_by_tid(pid_t tid)
{
- struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
+ struct thread_map *threads = thread_map__alloc(1);
if (threads != NULL) {
- threads->map[0] = tid;
- threads->nr = 1;
+ thread_map__set_pid(threads, 0, tid);
+ threads->nr = 1;
+ atomic_set(&threads->refcnt, 1);
}
return threads;
@@ -65,8 +94,8 @@ struct thread_map *thread_map__new_by_uid(uid_t uid)
int max_threads = 32, items, i;
char path[256];
struct dirent dirent, *next, **namelist = NULL;
- struct thread_map *threads = malloc(sizeof(*threads) +
- max_threads * sizeof(pid_t));
+ struct thread_map *threads = thread_map__alloc(max_threads);
+
if (threads == NULL)
goto out;
@@ -75,6 +104,7 @@ struct thread_map *thread_map__new_by_uid(uid_t uid)
goto out_free_threads;
threads->nr = 0;
+ atomic_set(&threads->refcnt, 1);
while (!readdir_r(proc, &dirent, &next) && next) {
char *end;
@@ -106,16 +136,17 @@ struct thread_map *thread_map__new_by_uid(uid_t uid)
if (grow) {
struct thread_map *tmp;
- tmp = realloc(threads, (sizeof(*threads) +
- max_threads * sizeof(pid_t)));
+ tmp = thread_map__realloc(threads, max_threads);
if (tmp == NULL)
goto out_free_namelist;
threads = tmp;
}
- for (i = 0; i < items; i++)
- threads->map[threads->nr + i] = atoi(namelist[i]->d_name);
+ for (i = 0; i < items; i++) {
+ thread_map__set_pid(threads, threads->nr + i,
+ atoi(namelist[i]->d_name));
+ }
for (i = 0; i < items; i++)
zfree(&namelist[i]);
@@ -164,7 +195,8 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
pid_t pid, prev_pid = INT_MAX;
char *end_ptr;
struct str_node *pos;
- struct strlist *slist = strlist__new(false, pid_str);
+ struct strlist_config slist_config = { .dont_dupstr = true, };
+ struct strlist *slist = strlist__new(pid_str, &slist_config);
if (!slist)
return NULL;
@@ -185,15 +217,14 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
goto out_free_threads;
total_tasks += items;
- nt = realloc(threads, (sizeof(*threads) +
- sizeof(pid_t) * total_tasks));
+ nt = thread_map__realloc(threads, total_tasks);
if (nt == NULL)
goto out_free_namelist;
threads = nt;
for (i = 0; i < items; i++) {
- threads->map[j++] = atoi(namelist[i]->d_name);
+ thread_map__set_pid(threads, j++, atoi(namelist[i]->d_name));
zfree(&namelist[i]);
}
threads->nr = total_tasks;
@@ -202,6 +233,8 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
out:
strlist__delete(slist);
+ if (threads)
+ atomic_set(&threads->refcnt, 1);
return threads;
out_free_namelist:
@@ -216,11 +249,12 @@ out_free_threads:
struct thread_map *thread_map__new_dummy(void)
{
- struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
+ struct thread_map *threads = thread_map__alloc(1);
if (threads != NULL) {
- threads->map[0] = -1;
- threads->nr = 1;
+ thread_map__set_pid(threads, 0, -1);
+ threads->nr = 1;
+ atomic_set(&threads->refcnt, 1);
}
return threads;
}
@@ -232,13 +266,14 @@ static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
pid_t tid, prev_tid = INT_MAX;
char *end_ptr;
struct str_node *pos;
+ struct strlist_config slist_config = { .dont_dupstr = true, };
struct strlist *slist;
/* perf-stat expects threads to be generated even if tid not given */
if (!tid_str)
return thread_map__new_dummy();
- slist = strlist__new(false, tid_str);
+ slist = strlist__new(tid_str, &slist_config);
if (!slist)
return NULL;
@@ -253,16 +288,18 @@ static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
continue;
ntasks++;
- nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks);
+ nt = thread_map__realloc(threads, ntasks);
if (nt == NULL)
goto out_free_threads;
threads = nt;
- threads->map[ntasks - 1] = tid;
- threads->nr = ntasks;
+ thread_map__set_pid(threads, ntasks - 1, tid);
+ threads->nr = ntasks;
}
out:
+ if (threads)
+ atomic_set(&threads->refcnt, 1);
return threads;
out_free_threads:
@@ -282,9 +319,30 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid,
return thread_map__new_by_tid_str(tid);
}
-void thread_map__delete(struct thread_map *threads)
+static void thread_map__delete(struct thread_map *threads)
{
- free(threads);
+ if (threads) {
+ int i;
+
+ WARN_ONCE(atomic_read(&threads->refcnt) != 0,
+ "thread map refcnt unbalanced\n");
+ for (i = 0; i < threads->nr; i++)
+ free(thread_map__comm(threads, i));
+ free(threads);
+ }
+}
+
+struct thread_map *thread_map__get(struct thread_map *map)
+{
+ if (map)
+ atomic_inc(&map->refcnt);
+ return map;
+}
+
+void thread_map__put(struct thread_map *map)
+{
+ if (map && atomic_dec_and_test(&map->refcnt))
+ thread_map__delete(map);
}
size_t thread_map__fprintf(struct thread_map *threads, FILE *fp)
@@ -293,7 +351,60 @@ size_t thread_map__fprintf(struct thread_map *threads, FILE *fp)
size_t printed = fprintf(fp, "%d thread%s: ",
threads->nr, threads->nr > 1 ? "s" : "");
for (i = 0; i < threads->nr; ++i)
- printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]);
+ printed += fprintf(fp, "%s%d", i ? ", " : "", thread_map__pid(threads, i));
return printed + fprintf(fp, "\n");
}
+
+static int get_comm(char **comm, pid_t pid)
+{
+ char *path;
+ size_t size;
+ int err;
+
+ if (asprintf(&path, "%s/%d/comm", procfs__mountpoint(), pid) == -1)
+ return -ENOMEM;
+
+ err = filename__read_str(path, comm, &size);
+ if (!err) {
+ /*
+ * We're reading 16 bytes, while filename__read_str
+ * allocates data per BUFSIZ bytes, so we can safely
+ * mark the end of the string.
+ */
+ (*comm)[size] = 0;
+ rtrim(*comm);
+ }
+
+ free(path);
+ return err;
+}
+
+static void comm_init(struct thread_map *map, int i)
+{
+ pid_t pid = thread_map__pid(map, i);
+ char *comm = NULL;
+
+ /* dummy pid comm initialization */
+ if (pid == -1) {
+ map->map[i].comm = strdup("dummy");
+ return;
+ }
+
+ /*
+ * The comm name is like extra bonus ;-),
+ * so just warn if we fail for any reason.
+ */
+ if (get_comm(&comm, pid))
+ pr_warning("Couldn't resolve comm name for pid %d\n", pid);
+
+ map->map[i].comm = comm;
+}
+
+void thread_map__read_comms(struct thread_map *threads)
+{
+ int i;
+
+ for (i = 0; i < threads->nr; ++i)
+ comm_init(threads, i);
+}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 95313f4..af679d8 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -3,10 +3,17 @@
#include <sys/types.h>
#include <stdio.h>
+#include <linux/atomic.h>
+
+struct thread_map_data {
+ pid_t pid;
+ char *comm;
+};
struct thread_map {
+ atomic_t refcnt;
int nr;
- pid_t map[];
+ struct thread_map_data map[];
};
struct thread_map *thread_map__new_dummy(void);
@@ -15,11 +22,12 @@ struct thread_map *thread_map__new_by_tid(pid_t tid);
struct thread_map *thread_map__new_by_uid(uid_t uid);
struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
+struct thread_map *thread_map__get(struct thread_map *map);
+void thread_map__put(struct thread_map *map);
+
struct thread_map *thread_map__new_str(const char *pid,
const char *tid, uid_t uid);
-void thread_map__delete(struct thread_map *threads);
-
size_t thread_map__fprintf(struct thread_map *threads, FILE *fp);
static inline int thread_map__nr(struct thread_map *threads)
@@ -27,4 +35,21 @@ static inline int thread_map__nr(struct thread_map *threads)
return threads ? threads->nr : 1;
}
+static inline pid_t thread_map__pid(struct thread_map *map, int thread)
+{
+ return map->map[thread].pid;
+}
+
+static inline void
+thread_map__set_pid(struct thread_map *map, int thread, pid_t pid)
+{
+ map->map[thread].pid = pid;
+}
+
+static inline char *thread_map__comm(struct thread_map *map, int thread)
+{
+ return map->map[thread].comm;
+}
+
+void thread_map__read_comms(struct thread_map *threads);
#endif /* __PERF_THREAD_MAP_H */
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 51d9e56..cab8cc2 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -3,6 +3,8 @@
#include <stdbool.h>
+#include <linux/types.h>
+
struct perf_session;
union perf_event;
struct perf_evlist;
@@ -29,6 +31,9 @@ typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event,
typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
struct ordered_events *oe);
+typedef s64 (*event_op3)(struct perf_tool *tool, union perf_event *event,
+ struct perf_session *session);
+
struct perf_tool {
event_sample sample,
read;
@@ -38,13 +43,20 @@ struct perf_tool {
fork,
exit,
lost,
+ lost_samples,
+ aux,
+ itrace_start,
+ context_switch,
throttle,
unthrottle;
event_attr_op attr;
event_op2 tracing_data;
event_oe finished_round;
event_op2 build_id,
- id_index;
+ id_index,
+ auxtrace_info,
+ auxtrace_error;
+ event_op3 auxtrace;
bool ordered_events;
bool ordering_requires_timestamps;
};
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index eb72716..2224598 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -341,20 +341,14 @@ out:
static int record_proc_kallsyms(void)
{
- unsigned int size;
- const char *path = "/proc/kallsyms";
- struct stat st;
- int ret, err = 0;
-
- ret = stat(path, &st);
- if (ret < 0) {
- /* not found */
- size = 0;
- if (write(output_fd, &size, 4) != 4)
- err = -EIO;
- return err;
- }
- return record_file(path, 4);
+ unsigned long long size = 0;
+ /*
+ * Just to keep older perf.data file parsers happy, record a zero
+ * sized kallsyms file, i.e. do the same thing that was done when
+ * /proc/kallsyms (or something specified via --kallsyms, in a
+ * different path) couldn't be read.
+ */
+ return write(output_fd, &size, 4) != 4 ? -EIO : 0;
}
static int record_ftrace_printk(void)
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 25d6c73..8ff7d62 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -135,36 +135,6 @@ void event_format__print(struct event_format *event,
return event_format__fprintf(event, cpu, data, size, stdout);
}
-void parse_proc_kallsyms(struct pevent *pevent,
- char *file, unsigned int size __maybe_unused)
-{
- unsigned long long addr;
- char *func;
- char *line;
- char *next = NULL;
- char *addr_str;
- char *mod;
- char *fmt = NULL;
-
- line = strtok_r(file, "\n", &next);
- while (line) {
- mod = NULL;
- addr_str = strtok_r(line, " ", &fmt);
- addr = strtoull(addr_str, NULL, 16);
- /* skip character */
- strtok_r(NULL, " ", &fmt);
- func = strtok_r(NULL, "\t", &fmt);
- mod = strtok_r(NULL, "]", &fmt);
- /* truncate the extra '[' */
- if (mod)
- mod = mod + 1;
-
- pevent_register_function(pevent, func, addr, mod);
-
- line = strtok_r(NULL, "\n", &next);
- }
-}
-
void parse_ftrace_printk(struct pevent *pevent,
char *file, unsigned int size __maybe_unused)
{
@@ -173,7 +143,7 @@ void parse_ftrace_printk(struct pevent *pevent,
char *line;
char *next = NULL;
char *addr_str;
- char *fmt;
+ char *fmt = NULL;
line = strtok_r(file, "\n", &next);
while (line) {
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 54d9e9b..b67a0cc 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -162,25 +162,23 @@ out:
static int read_proc_kallsyms(struct pevent *pevent)
{
unsigned int size;
- char *buf;
size = read4(pevent);
if (!size)
return 0;
-
- buf = malloc(size + 1);
- if (buf == NULL)
- return -1;
-
- if (do_read(buf, size) < 0) {
- free(buf);
- return -1;
- }
- buf[size] = '\0';
-
- parse_proc_kallsyms(pevent, buf, size);
-
- free(buf);
+ /*
+ * Just skip it, now that we configure libtraceevent to use the
+ * tools/perf/ symbol resolver.
+ *
+ * We need to skip it so that we can continue parsing old perf.data
+ * files, that contains this /proc/kallsyms payload.
+ *
+ * Newer perf.data files will have just the 4-bytes zeros "kallsyms
+ * payload", so that older tools can continue reading it and interpret
+ * it as "no kallsyms payload is present".
+ */
+ lseek(input_fd, size, SEEK_CUR);
+ trace_data_size += size;
return 0;
}
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
index 6322d37..b90e646 100644
--- a/tools/perf/util/trace-event.c
+++ b/tools/perf/util/trace-event.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <traceevent/event-parse.h>
#include "trace-event.h"
+#include "machine.h"
#include "util.h"
/*
@@ -19,6 +20,7 @@
* there.
*/
static struct trace_event tevent;
+static bool tevent_initialized;
int trace_event__init(struct trace_event *t)
{
@@ -32,6 +34,31 @@ int trace_event__init(struct trace_event *t)
return pevent ? 0 : -1;
}
+static int trace_event__init2(void)
+{
+ int be = traceevent_host_bigendian();
+ struct pevent *pevent;
+
+ if (trace_event__init(&tevent))
+ return -1;
+
+ pevent = tevent.pevent;
+ pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT);
+ pevent_set_file_bigendian(pevent, be);
+ pevent_set_host_bigendian(pevent, be);
+ tevent_initialized = true;
+ return 0;
+}
+
+int trace_event__register_resolver(struct machine *machine,
+ pevent_func_resolver_t *func)
+{
+ if (!tevent_initialized && trace_event__init2())
+ return -1;
+
+ return pevent_set_function_resolver(tevent.pevent, func, machine);
+}
+
void trace_event__cleanup(struct trace_event *t)
{
traceevent_unload_plugins(t->plugin_list, t->pevent);
@@ -62,21 +89,8 @@ tp_format(const char *sys, const char *name)
struct event_format*
trace_event__tp_format(const char *sys, const char *name)
{
- static bool initialized;
-
- if (!initialized) {
- int be = traceevent_host_bigendian();
- struct pevent *pevent;
-
- if (trace_event__init(&tevent))
- return NULL;
-
- pevent = tevent.pevent;
- pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT);
- pevent_set_file_bigendian(pevent, be);
- pevent_set_host_bigendian(pevent, be);
- initialized = true;
- }
+ if (!tevent_initialized && trace_event__init2())
+ return NULL;
return tp_format(sys, name);
}
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index d5168f0..da6cc4c 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -18,6 +18,8 @@ struct trace_event {
int trace_event__init(struct trace_event *t);
void trace_event__cleanup(struct trace_event *t);
+int trace_event__register_resolver(struct machine *machine,
+ pevent_func_resolver_t *func);
struct event_format*
trace_event__tp_format(const char *sys, const char *name);
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index 7b09a44..4c00507 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -269,13 +269,14 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
u64 offset = dso->data.eh_frame_hdr_offset;
if (offset == 0) {
- fd = dso__data_fd(dso, machine);
+ fd = dso__data_get_fd(dso, machine);
if (fd < 0)
return -EINVAL;
/* Check the .eh_frame section for unwinding info */
offset = elf_section_offset(fd, ".eh_frame_hdr");
dso->data.eh_frame_hdr_offset = offset;
+ dso__data_put_fd(dso);
}
if (offset)
@@ -294,13 +295,14 @@ static int read_unwind_spec_debug_frame(struct dso *dso,
u64 ofs = dso->data.debug_frame_offset;
if (ofs == 0) {
- fd = dso__data_fd(dso, machine);
+ fd = dso__data_get_fd(dso, machine);
if (fd < 0)
return -EINVAL;
/* Check the .debug_frame section for unwinding info */
ofs = elf_section_offset(fd, ".debug_frame");
dso->data.debug_frame_offset = ofs;
+ dso__data_put_fd(dso);
}
*offset = ofs;
@@ -353,10 +355,13 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
#ifndef NO_LIBUNWIND_DEBUG_FRAME
/* Check the .debug_frame section for unwinding info */
if (!read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) {
- int fd = dso__data_fd(map->dso, ui->machine);
+ int fd = dso__data_get_fd(map->dso, ui->machine);
int is_exec = elf_is_exec(fd, map->dso->name);
unw_word_t base = is_exec ? 0 : map->start;
+ if (fd >= 0)
+ dso__data_put_fd(map->dso);
+
memset(&di, 0, sizeof(di));
if (dwarf_find_debug_frame(0, &di, ip, base, map->dso->name,
map->start, map->end))
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 4ee6d0d..7acafb3 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -34,6 +34,7 @@ bool test_attr__enabled;
bool perf_host = true;
bool perf_guest = false;
+char tracing_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing";
char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events";
void event_attr_init(struct perf_event_attr *attr)
@@ -72,20 +73,60 @@ int mkdir_p(char *path, mode_t mode)
return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
}
-static int slow_copyfile(const char *from, const char *to, mode_t mode)
+int rm_rf(char *path)
+{
+ DIR *dir;
+ int ret = 0;
+ struct dirent *d;
+ char namebuf[PATH_MAX];
+
+ dir = opendir(path);
+ if (dir == NULL)
+ return 0;
+
+ while ((d = readdir(dir)) != NULL && !ret) {
+ struct stat statbuf;
+
+ if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
+ continue;
+
+ scnprintf(namebuf, sizeof(namebuf), "%s/%s",
+ path, d->d_name);
+
+ ret = stat(namebuf, &statbuf);
+ if (ret < 0) {
+ pr_debug("stat failed: %s\n", namebuf);
+ break;
+ }
+
+ if (S_ISREG(statbuf.st_mode))
+ ret = unlink(namebuf);
+ else if (S_ISDIR(statbuf.st_mode))
+ ret = rm_rf(namebuf);
+ else {
+ pr_debug("unknown file: %s\n", namebuf);
+ ret = -1;
+ }
+ }
+ closedir(dir);
+
+ if (ret < 0)
+ return ret;
+
+ return rmdir(path);
+}
+
+static int slow_copyfile(const char *from, const char *to)
{
int err = -1;
char *line = NULL;
size_t n;
FILE *from_fp = fopen(from, "r"), *to_fp;
- mode_t old_umask;
if (from_fp == NULL)
goto out;
- old_umask = umask(mode ^ 0777);
to_fp = fopen(to, "w");
- umask(old_umask);
if (to_fp == NULL)
goto out_fclose_from;
@@ -102,42 +143,81 @@ out:
return err;
}
+int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
+{
+ void *ptr;
+ loff_t pgoff;
+
+ pgoff = off_in & ~(page_size - 1);
+ off_in -= pgoff;
+
+ ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff);
+ if (ptr == MAP_FAILED)
+ return -1;
+
+ while (size) {
+ ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out);
+ if (ret < 0 && errno == EINTR)
+ continue;
+ if (ret <= 0)
+ break;
+
+ size -= ret;
+ off_in += ret;
+ off_out -= ret;
+ }
+ munmap(ptr, off_in + size);
+
+ return size ? -1 : 0;
+}
+
int copyfile_mode(const char *from, const char *to, mode_t mode)
{
int fromfd, tofd;
struct stat st;
- void *addr;
int err = -1;
+ char *tmp = NULL, *ptr = NULL;
if (stat(from, &st))
goto out;
- if (st.st_size == 0) /* /proc? do it slowly... */
- return slow_copyfile(from, to, mode);
-
- fromfd = open(from, O_RDONLY);
- if (fromfd < 0)
+ /* extra 'x' at the end is to reserve space for '.' */
+ if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) {
+ tmp = NULL;
+ goto out;
+ }
+ ptr = strrchr(tmp, '/');
+ if (!ptr)
goto out;
+ ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1);
+ *ptr = '.';
- tofd = creat(to, mode);
+ tofd = mkstemp(tmp);
if (tofd < 0)
- goto out_close_from;
+ goto out;
+
+ if (fchmod(tofd, mode))
+ goto out_close_to;
- addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fromfd, 0);
- if (addr == MAP_FAILED)
+ if (st.st_size == 0) { /* /proc? do it slowly... */
+ err = slow_copyfile(from, tmp);
goto out_close_to;
+ }
- if (write(tofd, addr, st.st_size) == st.st_size)
- err = 0;
+ fromfd = open(from, O_RDONLY);
+ if (fromfd < 0)
+ goto out_close_to;
- munmap(addr, st.st_size);
+ err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size);
+
+ close(fromfd);
out_close_to:
close(tofd);
- if (err)
- unlink(to);
-out_close_from:
- close(fromfd);
+ if (!err)
+ err = link(tmp, to);
+ unlink(tmp);
out:
+ free(tmp);
return err;
}
@@ -312,6 +392,8 @@ void set_term_quiet_input(struct termios *old)
static void set_tracing_events_path(const char *tracing, const char *mountpoint)
{
+ snprintf(tracing_path, sizeof(tracing_path), "%s/%s",
+ mountpoint, tracing);
snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s",
mountpoint, tracing, "events");
}
@@ -357,66 +439,14 @@ const char *perf_debugfs_mount(const char *mountpoint)
void perf_debugfs_set_path(const char *mntpt)
{
- snprintf(debugfs_mountpoint, strlen(debugfs_mountpoint), "%s", mntpt);
set_tracing_events_path("tracing/", mntpt);
}
-static const char *find_tracefs(void)
-{
- const char *path = __perf_tracefs_mount(NULL);
-
- return path;
-}
-
-static const char *find_debugfs(void)
-{
- const char *path = __perf_debugfs_mount(NULL);
-
- if (!path)
- fprintf(stderr, "Your kernel does not support the debugfs filesystem");
-
- return path;
-}
-
-/*
- * Finds the path to the debugfs/tracing
- * Allocates the string and stores it.
- */
-const char *find_tracing_dir(void)
-{
- const char *tracing_dir = "";
- static char *tracing;
- static int tracing_found;
- const char *debugfs;
-
- if (tracing_found)
- return tracing;
-
- debugfs = find_tracefs();
- if (!debugfs) {
- tracing_dir = "/tracing";
- debugfs = find_debugfs();
- if (!debugfs)
- return NULL;
- }
-
- if (asprintf(&tracing, "%s%s", debugfs, tracing_dir) < 0)
- return NULL;
-
- tracing_found = 1;
- return tracing;
-}
-
char *get_tracing_file(const char *name)
{
- const char *tracing;
char *file;
- tracing = find_tracing_dir();
- if (!tracing)
- return NULL;
-
- if (asprintf(&file, "%s/%s", tracing, name) < 0)
+ if (asprintf(&file, "%s/%s", tracing_path, name) < 0)
return NULL;
return file;
@@ -487,6 +517,96 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
return (unsigned long) -1;
}
+int get_stack_size(const char *str, unsigned long *_size)
+{
+ char *endptr;
+ unsigned long size;
+ unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
+
+ size = strtoul(str, &endptr, 0);
+
+ do {
+ if (*endptr)
+ break;
+
+ size = round_up(size, sizeof(u64));
+ if (!size || size > max_size)
+ break;
+
+ *_size = size;
+ return 0;
+
+ } while (0);
+
+ pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
+ max_size, str);
+ return -1;
+}
+
+int parse_callchain_record(const char *arg, struct callchain_param *param)
+{
+ char *tok, *name, *saveptr = NULL;
+ char *buf;
+ int ret = -1;
+
+ /* We need buffer that we know we can write to. */
+ buf = malloc(strlen(arg) + 1);
+ if (!buf)
+ return -ENOMEM;
+
+ strcpy(buf, arg);
+
+ tok = strtok_r((char *)buf, ",", &saveptr);
+ name = tok ? : (char *)buf;
+
+ do {
+ /* Framepointer style */
+ if (!strncmp(name, "fp", sizeof("fp"))) {
+ if (!strtok_r(NULL, ",", &saveptr)) {
+ param->record_mode = CALLCHAIN_FP;
+ ret = 0;
+ } else
+ pr_err("callchain: No more arguments "
+ "needed for --call-graph fp\n");
+ break;
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+ /* Dwarf style */
+ } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
+ const unsigned long default_stack_dump_size = 8192;
+
+ ret = 0;
+ param->record_mode = CALLCHAIN_DWARF;
+ param->dump_size = default_stack_dump_size;
+
+ tok = strtok_r(NULL, ",", &saveptr);
+ if (tok) {
+ unsigned long size = 0;
+
+ ret = get_stack_size(tok, &size);
+ param->dump_size = size;
+ }
+#endif /* HAVE_DWARF_UNWIND_SUPPORT */
+ } else if (!strncmp(name, "lbr", sizeof("lbr"))) {
+ if (!strtok_r(NULL, ",", &saveptr)) {
+ param->record_mode = CALLCHAIN_LBR;
+ ret = 0;
+ } else
+ pr_err("callchain: No more arguments "
+ "needed for --call-graph lbr\n");
+ break;
+ } else {
+ pr_err("callchain: Unknown --call-graph option "
+ "value: %s\n", arg);
+ break;
+ }
+
+ } while (0);
+
+ free(buf);
+ return ret;
+}
+
int filename__read_str(const char *filename, char **buf, size_t *sizep)
{
size_t size = 0, alloc_size = 0;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 1ff23e0..291be1d 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -83,10 +83,10 @@
extern const char *graph_line;
extern const char *graph_dotted_line;
extern char buildid_dir[];
+extern char tracing_path[];
extern char tracing_events_path[];
extern void perf_debugfs_set_path(const char *mountpoint);
const char *perf_debugfs_mount(const char *mountpoint);
-const char *find_tracing_dir(void);
char *get_tracing_file(const char *name);
void put_tracing_file(char *file);
@@ -249,14 +249,20 @@ static inline int sane_case(int x, int high)
}
int mkdir_p(char *path, mode_t mode);
+int rm_rf(char *path);
int copyfile(const char *from, const char *to);
int copyfile_mode(const char *from, const char *to, mode_t mode);
+int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size);
s64 perf_atoll(const char *str);
char **argv_split(const char *str, int *argcp);
void argv_free(char **argv);
bool strglobmatch(const char *str, const char *pat);
bool strlazymatch(const char *str, const char *pat);
+static inline bool strisglob(const char *str)
+{
+ return strpbrk(str, "*?[") != NULL;
+}
int strtailcmp(const char *s1, const char *s2);
char *strxfrchar(char *s, char from, char to);
unsigned long convert_unit(unsigned long value, char *unit);
@@ -312,6 +318,7 @@ static inline int path__join3(char *bf, size_t size,
struct dso;
struct symbol;
+extern bool srcline_full_filename;
char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
bool show_sym);
void free_srcline(char *srcline);
@@ -333,4 +340,18 @@ int gzip_decompress_to_file(const char *input, int output_fd);
int lzma_decompress_to_file(const char *input, int output_fd);
#endif
+char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints);
+
+static inline char *asprintf_expr_in_ints(const char *var, size_t nints, int *ints)
+{
+ return asprintf_expr_inout_ints(var, true, nints, ints);
+}
+
+static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int *ints)
+{
+ return asprintf_expr_inout_ints(var, false, nints, ints);
+}
+
+int get_stack_size(const char *str, unsigned long *_size);
+
#endif /* GIT_COMPAT_UTIL_H */
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index 5c7dd79..44d440d 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -101,7 +101,7 @@ static char *get_file(struct vdso_file *vdso_file)
return vdso;
}
-void vdso__exit(struct machine *machine)
+void machine__exit_vdso(struct machine *machine)
{
struct vdso_info *vdso_info = machine->vdso_info;
@@ -120,14 +120,14 @@ void vdso__exit(struct machine *machine)
zfree(&machine->vdso_info);
}
-static struct dso *vdso__new(struct machine *machine, const char *short_name,
- const char *long_name)
+static struct dso *__machine__addnew_vdso(struct machine *machine, const char *short_name,
+ const char *long_name)
{
struct dso *dso;
dso = dso__new(short_name);
if (dso != NULL) {
- dsos__add(&machine->user_dsos, dso);
+ __dsos__add(&machine->dsos, dso);
dso__set_long_name(dso, long_name, false);
}
@@ -230,27 +230,29 @@ static const char *vdso__get_compat_file(struct vdso_file *vdso_file)
return vdso_file->temp_file_name;
}
-static struct dso *vdso__findnew_compat(struct machine *machine,
- struct vdso_file *vdso_file)
+static struct dso *__machine__findnew_compat(struct machine *machine,
+ struct vdso_file *vdso_file)
{
const char *file_name;
struct dso *dso;
- dso = dsos__find(&machine->user_dsos, vdso_file->dso_name, true);
+ dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true);
if (dso)
- return dso;
+ goto out;
file_name = vdso__get_compat_file(vdso_file);
if (!file_name)
- return NULL;
+ goto out;
- return vdso__new(machine, vdso_file->dso_name, file_name);
+ dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name);
+out:
+ return dso;
}
-static int vdso__dso_findnew_compat(struct machine *machine,
- struct thread *thread,
- struct vdso_info *vdso_info,
- struct dso **dso)
+static int __machine__findnew_vdso_compat(struct machine *machine,
+ struct thread *thread,
+ struct vdso_info *vdso_info,
+ struct dso **dso)
{
enum dso_type dso_type;
@@ -267,10 +269,10 @@ static int vdso__dso_findnew_compat(struct machine *machine,
switch (dso_type) {
case DSO__TYPE_32BIT:
- *dso = vdso__findnew_compat(machine, &vdso_info->vdso32);
+ *dso = __machine__findnew_compat(machine, &vdso_info->vdso32);
return 1;
case DSO__TYPE_X32BIT:
- *dso = vdso__findnew_compat(machine, &vdso_info->vdsox32);
+ *dso = __machine__findnew_compat(machine, &vdso_info->vdsox32);
return 1;
case DSO__TYPE_UNKNOWN:
case DSO__TYPE_64BIT:
@@ -281,35 +283,37 @@ static int vdso__dso_findnew_compat(struct machine *machine,
#endif
-struct dso *vdso__dso_findnew(struct machine *machine,
- struct thread *thread __maybe_unused)
+struct dso *machine__findnew_vdso(struct machine *machine,
+ struct thread *thread __maybe_unused)
{
struct vdso_info *vdso_info;
- struct dso *dso;
+ struct dso *dso = NULL;
+ pthread_rwlock_wrlock(&machine->dsos.lock);
if (!machine->vdso_info)
machine->vdso_info = vdso_info__new();
vdso_info = machine->vdso_info;
if (!vdso_info)
- return NULL;
+ goto out_unlock;
#if BITS_PER_LONG == 64
- if (vdso__dso_findnew_compat(machine, thread, vdso_info, &dso))
- return dso;
+ if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso))
+ goto out_unlock;
#endif
- dso = dsos__find(&machine->user_dsos, DSO__NAME_VDSO, true);
+ dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
if (!dso) {
char *file;
file = get_file(&vdso_info->vdso);
- if (!file)
- return NULL;
-
- dso = vdso__new(machine, DSO__NAME_VDSO, file);
+ if (file)
+ dso = __machine__addnew_vdso(machine, DSO__NAME_VDSO, file);
}
+out_unlock:
+ dso__get(dso);
+ pthread_rwlock_unlock(&machine->dsos.lock);
return dso;
}
diff --git a/tools/perf/util/vdso.h b/tools/perf/util/vdso.h
index d97da16..cdc4fab 100644
--- a/tools/perf/util/vdso.h
+++ b/tools/perf/util/vdso.h
@@ -23,7 +23,7 @@ bool dso__is_vdso(struct dso *dso);
struct machine;
struct thread;
-struct dso *vdso__dso_findnew(struct machine *machine, struct thread *thread);
-void vdso__exit(struct machine *machine);
+struct dso *machine__findnew_vdso(struct machine *machine, struct thread *thread);
+void machine__exit_vdso(struct machine *machine);
#endif /* __PERF_VDSO__ */
diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c
index 22afbf6..c10ba41 100644
--- a/tools/perf/util/xyarray.c
+++ b/tools/perf/util/xyarray.c
@@ -9,11 +9,19 @@ struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size)
if (xy != NULL) {
xy->entry_size = entry_size;
xy->row_size = row_size;
+ xy->entries = xlen * ylen;
}
return xy;
}
+void xyarray__reset(struct xyarray *xy)
+{
+ size_t n = xy->entries * xy->entry_size;
+
+ memset(xy->contents, 0, n);
+}
+
void xyarray__delete(struct xyarray *xy)
{
free(xy);
diff --git a/tools/perf/util/xyarray.h b/tools/perf/util/xyarray.h
index c488a07..7f30af3 100644
--- a/tools/perf/util/xyarray.h
+++ b/tools/perf/util/xyarray.h
@@ -6,11 +6,13 @@
struct xyarray {
size_t row_size;
size_t entry_size;
+ size_t entries;
char contents[];
};
struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size);
void xyarray__delete(struct xyarray *xy);
+void xyarray__reset(struct xyarray *xy);
static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
{
diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile
index 3d1537b..e882c83 100644
--- a/tools/power/acpi/Makefile
+++ b/tools/power/acpi/Makefile
@@ -8,154 +8,20 @@
# as published by the Free Software Foundation; version 2
# of the License.
-OUTPUT=./
-ifeq ("$(origin O)", "command line")
- OUTPUT := $(O)/
-endif
-
-ifneq ($(OUTPUT),)
-# check that the output directory actually exists
-OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd)
-$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist))
-endif
-
-SUBDIRS = tools/ec
-
-# --- CONFIGURATION BEGIN ---
-
-# Set the following to `true' to make a unstripped, unoptimized
-# binary. Leave this set to `false' for production use.
-DEBUG ?= true
-
-# make the build silent. Set this to something else to make it noisy again.
-V ?= false
-
-# Prefix to the directories we're installing to
-DESTDIR ?=
-
-# --- CONFIGURATION END ---
-
-# Directory definitions. These are default and most probably
-# do not need to be changed. Please note that DESTDIR is
-# added in front of any of them
-
-bindir ?= /usr/bin
-sbindir ?= /usr/sbin
-mandir ?= /usr/man
-
-# Toolchain: what tools do we use, and what options do they need:
-
-INSTALL = /usr/bin/install -c
-INSTALL_PROGRAM = ${INSTALL}
-INSTALL_DATA = ${INSTALL} -m 644
-INSTALL_SCRIPT = ${INSTALL_PROGRAM}
-
-# If you are running a cross compiler, you may want to set this
-# to something more interesting, like "arm-linux-". If you want
-# to compile vs uClibc, that can be done here as well.
-CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
-CC = $(CROSS)gcc
-LD = $(CROSS)gcc
-STRIP = $(CROSS)strip
-HOSTCC = gcc
-
-# check if compiler option is supported
-cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}
-
-# use '-Os' optimization if available, else use -O2
-OPTIMIZATION := $(call cc-supports,-Os,-O2)
-
-WARNINGS := -Wall
-WARNINGS += $(call cc-supports,-Wstrict-prototypes)
-WARNINGS += $(call cc-supports,-Wdeclaration-after-statement)
-
-KERNEL_INCLUDE := ../../../include
-ACPICA_INCLUDE := ../../../drivers/acpi/acpica
-CFLAGS += -D_LINUX -I$(KERNEL_INCLUDE) -I$(ACPICA_INCLUDE)
-CFLAGS += $(WARNINGS)
-
-ifeq ($(strip $(V)),false)
- QUIET=@
- ECHO=@echo
-else
- QUIET=
- ECHO=@\#
-endif
-export QUIET ECHO
-
-# if DEBUG is enabled, then we do not strip or optimize
-ifeq ($(strip $(DEBUG)),true)
- CFLAGS += -O1 -g -DDEBUG
- STRIPCMD = /bin/true -Since_we_are_debugging
-else
- CFLAGS += $(OPTIMIZATION) -fomit-frame-pointer
- STRIPCMD = $(STRIP) -s --remove-section=.note --remove-section=.comment
-endif
-
-# --- ACPIDUMP BEGIN ---
-
-vpath %.c \
- ../../../drivers/acpi/acpica\
- tools/acpidump\
- common\
- os_specific/service_layers
-
-CFLAGS += -DACPI_DUMP_APP -Itools/acpidump
-
-DUMP_OBJS = \
- apdump.o\
- apfiles.o\
- apmain.o\
- osunixdir.o\
- osunixmap.o\
- osunixxf.o\
- tbprint.o\
- tbxfroot.o\
- utbuffer.o\
- utdebug.o\
- utexcep.o\
- utglobal.o\
- utmath.o\
- utprint.o\
- utstring.o\
- utxferror.o\
- oslibcfs.o\
- oslinuxtbl.o\
- cmfsize.o\
- getopt.o
-
-DUMP_OBJS := $(addprefix $(OUTPUT)tools/acpidump/,$(DUMP_OBJS))
-
-$(OUTPUT)acpidump: $(DUMP_OBJS)
- $(ECHO) " LD " $@
- $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(DUMP_OBJS) -L$(OUTPUT) -o $@
- $(QUIET) $(STRIPCMD) $@
-
-$(OUTPUT)tools/acpidump/%.o: %.c
- $(ECHO) " CC " $@
- $(QUIET) $(CC) -c $(CFLAGS) -o $@ $<
-
-# --- ACPIDUMP END ---
-
-all: $(OUTPUT)acpidump
- echo $(OUTPUT)
-
-clean:
- -find $(OUTPUT) \( -not -type d \) -and \( -name '*~' -o -name '*.[oas]' \) -type f -print \
- | xargs rm -f
- -rm -f $(OUTPUT)acpidump
-
-install-tools:
- $(INSTALL) -d $(DESTDIR)${sbindir}
- $(INSTALL_PROGRAM) $(OUTPUT)acpidump $(DESTDIR)${sbindir}
-
-install-man:
- $(INSTALL_DATA) -D man/acpidump.8 $(DESTDIR)${mandir}/man8/acpidump.8
-
-install: all install-tools install-man
-
-uninstall:
- - rm -f $(DESTDIR)${sbindir}/acpidump
- - rm -f $(DESTDIR)${mandir}/man8/acpidump.8
-
-.PHONY: all utils install-tools install-man install uninstall clean
+include ../../scripts/Makefile.include
+
+all: acpidump ec
+clean: acpidump_clean ec_clean
+install: acpidump_install ec_install
+uninstall: acpidump_uninstall ec_uninstall
+
+acpidump ec: FORCE
+ $(call descend,tools/$@,all)
+acpidump_clean ec_clean:
+ $(call descend,tools/$(@:_clean=),clean)
+acpidump_install ec_install:
+ $(call descend,tools/$(@:_install=),install)
+acpidump_uninstall ec_uninstall:
+ $(call descend,tools/$(@:_uninstall=),uninstall)
+
+.PHONY: FORCE
diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config
new file mode 100644
index 0000000..552af68
--- /dev/null
+++ b/tools/power/acpi/Makefile.config
@@ -0,0 +1,92 @@
+# tools/power/acpi/Makefile.config - ACPI tool Makefile
+#
+# Copyright (c) 2015, Intel Corporation
+# Author: Lv Zheng <lv.zheng@intel.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License.
+
+include ../../../../scripts/Makefile.include
+
+OUTPUT=./
+ifeq ("$(origin O)", "command line")
+ OUTPUT := $(O)/
+endif
+
+ifneq ($(OUTPUT),)
+# check that the output directory actually exists
+OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd)
+$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist))
+endif
+
+# --- CONFIGURATION BEGIN ---
+
+# Set the following to `true' to make a unstripped, unoptimized
+# binary. Leave this set to `false' for production use.
+DEBUG ?= true
+
+# make the build silent. Set this to something else to make it noisy again.
+V ?= false
+
+# Prefix to the directories we're installing to
+DESTDIR ?=
+
+# --- CONFIGURATION END ---
+
+# Directory definitions. These are default and most probably
+# do not need to be changed. Please note that DESTDIR is
+# added in front of any of them
+
+bindir ?= /usr/bin
+sbindir ?= /usr/sbin
+mandir ?= /usr/man
+
+# Toolchain: what tools do we use, and what options do they need:
+
+INSTALL = /usr/bin/install -c
+INSTALL_PROGRAM = ${INSTALL}
+INSTALL_DATA = ${INSTALL} -m 644
+INSTALL_SCRIPT = ${INSTALL_PROGRAM}
+
+# If you are running a cross compiler, you may want to set this
+# to something more interesting, like "arm-linux-". If you want
+# to compile vs uClibc, that can be done here as well.
+CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
+CC = $(CROSS)gcc
+LD = $(CROSS)gcc
+STRIP = $(CROSS)strip
+HOSTCC = gcc
+
+# check if compiler option is supported
+cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}
+
+# use '-Os' optimization if available, else use -O2
+OPTIMIZATION := $(call cc-supports,-Os,-O2)
+
+WARNINGS := -Wall
+WARNINGS += $(call cc-supports,-Wstrict-prototypes)
+WARNINGS += $(call cc-supports,-Wdeclaration-after-statement)
+
+KERNEL_INCLUDE := ../../../include
+ACPICA_INCLUDE := ../../../drivers/acpi/acpica
+CFLAGS += -D_LINUX -I$(KERNEL_INCLUDE) -I$(ACPICA_INCLUDE)
+CFLAGS += $(WARNINGS)
+
+ifeq ($(strip $(V)),false)
+ QUIET=@
+ ECHO=@echo
+else
+ QUIET=
+ ECHO=@\#
+endif
+
+# if DEBUG is enabled, then we do not strip or optimize
+ifeq ($(strip $(DEBUG)),true)
+ CFLAGS += -O1 -g -DDEBUG
+ STRIPCMD = /bin/true -Since_we_are_debugging
+else
+ CFLAGS += $(OPTIMIZATION) -fomit-frame-pointer
+ STRIPCMD = $(STRIP) -s --remove-section=.note --remove-section=.comment
+endif
diff --git a/tools/power/acpi/Makefile.rules b/tools/power/acpi/Makefile.rules
new file mode 100644
index 0000000..ec87a9e
--- /dev/null
+++ b/tools/power/acpi/Makefile.rules
@@ -0,0 +1,37 @@
+# tools/power/acpi/Makefile.rules - ACPI tool Makefile
+#
+# Copyright (c) 2015, Intel Corporation
+# Author: Lv Zheng <lv.zheng@intel.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License.
+
+$(OUTPUT)$(TOOL): $(TOOL_OBJS) FORCE
+ $(ECHO) " LD " $@
+ $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(TOOL_OBJS) -L$(OUTPUT) -o $@
+ $(QUIET) $(STRIPCMD) $@
+
+$(OUTPUT)%.o: %.c
+ $(ECHO) " CC " $@
+ $(QUIET) $(CC) -c $(CFLAGS) -o $@ $<
+
+all: $(OUTPUT)$(TOOL)
+clean:
+ -find $(OUTPUT) \( -not -type d \) \
+ -and \( -name '*~' -o -name '*.[oas]' \) \
+ -type f -print \
+ | xargs rm -f
+ -rm -f $(OUTPUT)$(TOOL)
+
+install-tools:
+ $(INSTALL) -d $(DESTDIR)${sbindir}
+ $(INSTALL_PROGRAM) $(OUTPUT)$(TOOL) $(DESTDIR)${sbindir}
+uninstall-tools:
+ - rm -f $(DESTDIR)${sbindir}/$(TOOL)
+
+install: all install-tools $(EXTRA_INSTALL)
+uninstall: uninstall-tools $(EXTRA_UNINSTALL)
+
+.PHONY: FORCE
diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c
index 5da129e..326e826 100644
--- a/tools/power/acpi/common/getopt.c
+++ b/tools/power/acpi/common/getopt.c
@@ -127,7 +127,7 @@ int acpi_getopt(int argc, char **argv, char *opts)
argv[acpi_gbl_optind][0] != '-' ||
argv[acpi_gbl_optind][1] == '\0') {
return (ACPI_OPT_END);
- } else if (ACPI_STRCMP(argv[acpi_gbl_optind], "--") == 0) {
+ } else if (strcmp(argv[acpi_gbl_optind], "--") == 0) {
acpi_gbl_optind++;
return (ACPI_OPT_END);
}
@@ -140,7 +140,7 @@ int acpi_getopt(int argc, char **argv, char *opts)
/* Make sure that the option is legal */
if (current_char == ':' ||
- (opts_ptr = ACPI_STRCHR(opts, current_char)) == NULL) {
+ (opts_ptr = strchr(opts, current_char)) == NULL) {
ACPI_OPTION_ERROR("Illegal option: -", current_char);
if (argv[acpi_gbl_optind][++current_char_ptr] == '\0') {
diff --git a/tools/power/acpi/man/acpidump.8 b/tools/power/acpi/man/acpidump.8
index 38f095d..79e2d1d 100644
--- a/tools/power/acpi/man/acpidump.8
+++ b/tools/power/acpi/man/acpidump.8
@@ -22,9 +22,6 @@ acpidump options are as follow:
.B \-b
Dump tables to binary files
.TP
-.B \-c
-Dump customized tables
-.TP
.B \-h \-?
This help message
.TP
@@ -48,15 +45,25 @@ Verbose mode
.B \-a <Address>
Get table via a physical address
.TP
+.B \-c <on|off>
+Turning on/off customized table dumping
+.TP
.B \-f <BinaryFile>
Get table via a binary file
.TP
.B \-n <Signature>
Get table via a name/signature
.TP
-Invocation without parameters dumps all available tables
+.B \-x
+Do not use but dump XSDT
+.TP
+.B \-x \-x
+Do not use or dump XSDT
+.TP
+.fi
+Invocation without parameters dumps all available tables.
.TP
-Multiple mixed instances of -a, -f, and -n are supported
+Multiple mixed instances of -a, -f, and -n are supported.
.SH EXAMPLES
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index db15c9d..dd5008b 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -222,7 +222,7 @@ acpi_os_get_table_by_address(acpi_physical_address address,
goto exit;
}
- ACPI_MEMCPY(local_table, mapped_table, table_length);
+ memcpy(local_table, mapped_table, table_length);
exit:
osl_unmap_table(mapped_table);
@@ -531,7 +531,7 @@ static acpi_status osl_load_rsdp(void)
gbl_rsdp_address =
rsdp_base + (ACPI_CAST8(mapped_table) - rsdp_address);
- ACPI_MEMCPY(&gbl_rsdp, mapped_table, sizeof(struct acpi_table_rsdp));
+ memcpy(&gbl_rsdp, mapped_table, sizeof(struct acpi_table_rsdp));
acpi_os_unmap_memory(rsdp_address, rsdp_size);
return (AE_OK);
@@ -582,64 +582,67 @@ static acpi_status osl_table_initialize(void)
return (AE_OK);
}
- /* Get RSDP from memory */
+ if (!gbl_dump_customized_tables) {
- status = osl_load_rsdp();
- if (ACPI_FAILURE(status)) {
- return (status);
- }
+ /* Get RSDP from memory */
+
+ status = osl_load_rsdp();
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
- /* Get XSDT from memory */
+ /* Get XSDT from memory */
- if (gbl_rsdp.revision && !gbl_do_not_dump_xsdt) {
- if (gbl_xsdt) {
- free(gbl_xsdt);
- gbl_xsdt = NULL;
+ if (gbl_rsdp.revision && !gbl_do_not_dump_xsdt) {
+ if (gbl_xsdt) {
+ free(gbl_xsdt);
+ gbl_xsdt = NULL;
+ }
+
+ gbl_revision = 2;
+ status = osl_get_bios_table(ACPI_SIG_XSDT, 0,
+ ACPI_CAST_PTR(struct
+ acpi_table_header
+ *, &gbl_xsdt),
+ &address);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
}
- gbl_revision = 2;
- status = osl_get_bios_table(ACPI_SIG_XSDT, 0,
- ACPI_CAST_PTR(struct
- acpi_table_header *,
- &gbl_xsdt), &address);
- if (ACPI_FAILURE(status)) {
- return (status);
+ /* Get RSDT from memory */
+
+ if (gbl_rsdp.rsdt_physical_address) {
+ if (gbl_rsdt) {
+ free(gbl_rsdt);
+ gbl_rsdt = NULL;
+ }
+
+ status = osl_get_bios_table(ACPI_SIG_RSDT, 0,
+ ACPI_CAST_PTR(struct
+ acpi_table_header
+ *, &gbl_rsdt),
+ &address);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
}
- }
- /* Get RSDT from memory */
+ /* Get FADT from memory */
- if (gbl_rsdp.rsdt_physical_address) {
- if (gbl_rsdt) {
- free(gbl_rsdt);
- gbl_rsdt = NULL;
+ if (gbl_fadt) {
+ free(gbl_fadt);
+ gbl_fadt = NULL;
}
- status = osl_get_bios_table(ACPI_SIG_RSDT, 0,
+ status = osl_get_bios_table(ACPI_SIG_FADT, 0,
ACPI_CAST_PTR(struct
acpi_table_header *,
- &gbl_rsdt), &address);
+ &gbl_fadt),
+ &gbl_fadt_address);
if (ACPI_FAILURE(status)) {
return (status);
}
- }
-
- /* Get FADT from memory */
-
- if (gbl_fadt) {
- free(gbl_fadt);
- gbl_fadt = NULL;
- }
-
- status = osl_get_bios_table(ACPI_SIG_FADT, 0,
- ACPI_CAST_PTR(struct acpi_table_header *,
- &gbl_fadt),
- &gbl_fadt_address);
- if (ACPI_FAILURE(status)) {
- return (status);
- }
-
- if (!gbl_dump_customized_tables) {
/* Add mandatory tables to global table list first */
@@ -961,7 +964,7 @@ osl_get_bios_table(char *signature,
goto exit;
}
- ACPI_MEMCPY(local_table, mapped_table, table_length);
+ memcpy(local_table, mapped_table, table_length);
*address = table_address;
*table = local_table;
diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c
index 0b1fa29..44ad488 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixmap.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c
@@ -54,7 +54,7 @@ ACPI_MODULE_NAME("osunixmap")
#ifndef O_BINARY
#define O_BINARY 0
#endif
-#ifdef _free_BSD
+#if defined(_dragon_fly) || defined(_free_BSD)
#define MMAP_FLAGS MAP_SHARED
#else
#define MMAP_FLAGS MAP_PRIVATE
diff --git a/tools/power/acpi/tools/acpidump/Makefile b/tools/power/acpi/tools/acpidump/Makefile
new file mode 100644
index 0000000..8d76157
--- /dev/null
+++ b/tools/power/acpi/tools/acpidump/Makefile
@@ -0,0 +1,53 @@
+# tools/power/acpi/tools/acpidump/Makefile - ACPI tool Makefile
+#
+# Copyright (c) 2015, Intel Corporation
+# Author: Lv Zheng <lv.zheng@intel.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License.
+
+include ../../Makefile.config
+
+TOOL = acpidump
+EXTRA_INSTALL = install-man
+EXTRA_UNINSTALL = uninstall-man
+
+vpath %.c \
+ ../../../../../drivers/acpi/acpica\
+ ./\
+ ../../common\
+ ../../os_specific/service_layers
+CFLAGS += -DACPI_DUMP_APP -I.\
+ -I../../../../../drivers/acpi/acpica\
+ -I../../../../../include
+TOOL_OBJS = \
+ apdump.o\
+ apfiles.o\
+ apmain.o\
+ osunixdir.o\
+ osunixmap.o\
+ osunixxf.o\
+ tbprint.o\
+ tbxfroot.o\
+ utbuffer.o\
+ utdebug.o\
+ utexcep.o\
+ utglobal.o\
+ utmath.o\
+ utnonansi.o\
+ utprint.o\
+ utstring.o\
+ utxferror.o\
+ oslibcfs.o\
+ oslinuxtbl.o\
+ cmfsize.o\
+ getopt.o
+
+include ../../Makefile.rules
+
+install-man: ../../man/acpidump.8
+ $(INSTALL_DATA) -D $< $(DESTDIR)${mandir}/man8/acpidump.8
+uninstall-man:
+ - rm -f $(DESTDIR)${mandir}/man8/acpidump.8
diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h
index 84bdef0..eed5344 100644
--- a/tools/power/acpi/tools/acpidump/acpidump.h
+++ b/tools/power/acpi/tools/acpidump/acpidump.h
@@ -66,7 +66,7 @@
EXTERN u8 INIT_GLOBAL(gbl_summary_mode, FALSE);
EXTERN u8 INIT_GLOBAL(gbl_verbose_mode, FALSE);
EXTERN u8 INIT_GLOBAL(gbl_binary_mode, FALSE);
-EXTERN u8 INIT_GLOBAL(gbl_dump_customized_tables, FALSE);
+EXTERN u8 INIT_GLOBAL(gbl_dump_customized_tables, TRUE);
EXTERN u8 INIT_GLOBAL(gbl_do_not_dump_xsdt, FALSE);
EXTERN ACPI_FILE INIT_GLOBAL(gbl_output_file, NULL);
EXTERN char INIT_GLOBAL(*gbl_output_filename, NULL);
diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
index c736adf..61d0de8 100644
--- a/tools/power/acpi/tools/acpidump/apdump.c
+++ b/tools/power/acpi/tools/acpidump/apdump.c
@@ -329,7 +329,7 @@ int ap_dump_table_by_name(char *signature)
acpi_status status;
int table_status;
- if (ACPI_STRLEN(signature) != ACPI_NAME_SIZE) {
+ if (strlen(signature) != ACPI_NAME_SIZE) {
acpi_log_error
("Invalid table signature [%s]: must be exactly 4 characters\n",
signature);
@@ -338,15 +338,15 @@ int ap_dump_table_by_name(char *signature)
/* Table signatures are expected to be uppercase */
- ACPI_STRCPY(local_signature, signature);
+ strcpy(local_signature, signature);
acpi_ut_strupr(local_signature);
/* To be friendly, handle tables whose signatures do not match the name */
if (ACPI_COMPARE_NAME(local_signature, "FADT")) {
- ACPI_STRCPY(local_signature, ACPI_SIG_FADT);
+ strcpy(local_signature, ACPI_SIG_FADT);
} else if (ACPI_COMPARE_NAME(local_signature, "MADT")) {
- ACPI_STRCPY(local_signature, ACPI_SIG_MADT);
+ strcpy(local_signature, ACPI_SIG_MADT);
}
/* Dump all instances of this signature (to handle multiple SSDTs) */
diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c
index 8f2fe16..a37f970 100644
--- a/tools/power/acpi/tools/acpidump/apfiles.c
+++ b/tools/power/acpi/tools/acpidump/apfiles.c
@@ -136,10 +136,10 @@ int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance)
} else {
ACPI_MOVE_NAME(filename, table->signature);
}
- filename[0] = (char)ACPI_TOLOWER(filename[0]);
- filename[1] = (char)ACPI_TOLOWER(filename[1]);
- filename[2] = (char)ACPI_TOLOWER(filename[2]);
- filename[3] = (char)ACPI_TOLOWER(filename[3]);
+ filename[0] = (char)tolower((int)filename[0]);
+ filename[1] = (char)tolower((int)filename[1]);
+ filename[2] = (char)tolower((int)filename[2]);
+ filename[3] = (char)tolower((int)filename[3]);
filename[ACPI_NAME_SIZE] = 0;
/* Handle multiple SSDts - create different filenames for each */
@@ -147,10 +147,10 @@ int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance)
if (instance > 0) {
acpi_ut_snprintf(instance_str, sizeof(instance_str), "%u",
instance);
- ACPI_STRCAT(filename, instance_str);
+ strcat(filename, instance_str);
}
- ACPI_STRCAT(filename, ACPI_TABLE_FILE_SUFFIX);
+ strcat(filename, ACPI_TABLE_FILE_SUFFIX);
if (gbl_verbose_mode) {
acpi_log_error
diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c
index d0ba653..57620f6 100644
--- a/tools/power/acpi/tools/acpidump/apmain.c
+++ b/tools/power/acpi/tools/acpidump/apmain.c
@@ -80,7 +80,7 @@ struct ap_dump_action action_table[AP_MAX_ACTIONS];
u32 current_action = 0;
#define AP_UTILITY_NAME "ACPI Binary Table Dump Utility"
-#define AP_SUPPORTED_OPTIONS "?a:bcf:hn:o:r:svxz"
+#define AP_SUPPORTED_OPTIONS "?a:bc:f:hn:o:r:svxz"
/******************************************************************************
*
@@ -96,7 +96,6 @@ static void ap_display_usage(void)
ACPI_USAGE_HEADER("acpidump [options]");
ACPI_OPTION("-b", "Dump tables to binary files");
- ACPI_OPTION("-c", "Dump customized tables");
ACPI_OPTION("-h -?", "This help message");
ACPI_OPTION("-o <File>", "Redirect output to file");
ACPI_OPTION("-r <Address>", "Dump tables from specified RSDP");
@@ -107,6 +106,7 @@ static void ap_display_usage(void)
ACPI_USAGE_TEXT("\nTable Options:\n");
ACPI_OPTION("-a <Address>", "Get table via a physical address");
+ ACPI_OPTION("-c <on|off>", "Turning on/off customized table dumping");
ACPI_OPTION("-f <BinaryFile>", "Get table via a binary file");
ACPI_OPTION("-n <Signature>", "Get table via a name/signature");
ACPI_OPTION("-x", "Do not use but dump XSDT");
@@ -181,7 +181,16 @@ static int ap_do_options(int argc, char **argv)
case 'c': /* Dump customized tables */
- gbl_dump_customized_tables = TRUE;
+ if (!strcmp(acpi_gbl_optarg, "on")) {
+ gbl_dump_customized_tables = TRUE;
+ } else if (!strcmp(acpi_gbl_optarg, "off")) {
+ gbl_dump_customized_tables = FALSE;
+ } else {
+ acpi_log_error
+ ("%s: Cannot handle this switch, please use on|off\n",
+ acpi_gbl_optarg);
+ return (-1);
+ }
continue;
case 'h':
diff --git a/tools/power/acpi/tools/ec/Makefile b/tools/power/acpi/tools/ec/Makefile
index b7b0b92..75d8a12 100644
--- a/tools/power/acpi/tools/ec/Makefile
+++ b/tools/power/acpi/tools/ec/Makefile
@@ -1,22 +1,17 @@
-ec_access: ec_access.o
- $(ECHO) " LD " $@
- $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $< -o $@
- $(QUIET) $(STRIPCMD) $@
+# tools/power/acpi/tools/acpidump/Makefile - ACPI tool Makefile
+#
+# Copyright (c) 2015, Intel Corporation
+# Author: Lv Zheng <lv.zheng@intel.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License.
-%.o: %.c
- $(ECHO) " CC " $@
- $(QUIET) $(CC) -c $(CFLAGS) -o $@ $<
+include ../../Makefile.config
-all: ec_access
+TOOL = ec
+TOOL_OBJS = \
+ ec_access.o
-install:
- $(INSTALL) -d $(DESTDIR)${sbindir}
- $(INSTALL_PROGRAM) ec_access $(DESTDIR)${sbindir}
-
-uninstall:
- - rm -f $(DESTDIR)${sbindir}/ec_access
-
-clean:
- -rm -f $(OUTPUT)ec_access
-
-.PHONY: all install uninstall
+include ../../Makefile.rules
diff --git a/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c b/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c
index 5224ee5..6ff8383 100644
--- a/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c
+++ b/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c
@@ -81,11 +81,11 @@ static int __init cpufreq_test_tsc(void)
printk(KERN_DEBUG "start--> \n");
then = read_pmtmr();
- rdtscll(then_tsc);
+ then_tsc = rdtsc();
for (i=0;i<20;i++) {
mdelay(100);
now = read_pmtmr();
- rdtscll(now_tsc);
+ now_tsc = rdtsc();
diff = (now - then) & 0xFFFFFF;
diff_tsc = now_tsc - then_tsc;
printk(KERN_DEBUG "t1: %08u t2: %08u diff_pmtmr: %08u diff_tsc: %016llu\n", then, now, diff, diff_tsc);
diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c
index f656e58..4e21357 100644
--- a/tools/power/cpupower/utils/cpufreq-set.c
+++ b/tools/power/cpupower/utils/cpufreq-set.c
@@ -17,6 +17,7 @@
#include "cpufreq.h"
#include "helpers/helpers.h"
+#include "helpers/sysfs.h"
#define NORM_FREQ_LEN 32
@@ -318,6 +319,9 @@ int cmd_freq_set(int argc, char **argv)
cpufreq_cpu_exists(cpu))
continue;
+ if (sysfs_is_cpu_online(cpu) != 1)
+ continue;
+
printf(_("Setting cpu: %d\n"), cpu);
ret = do_one_cpu(cpu, &new_pol, freq, policychange);
if (ret) {
diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c
index c13120a..cea398c 100644
--- a/tools/power/cpupower/utils/helpers/topology.c
+++ b/tools/power/cpupower/utils/helpers/topology.c
@@ -73,6 +73,8 @@ int get_cpu_topology(struct cpupower_topology *cpu_top)
for (cpu = 0; cpu < cpus; cpu++) {
cpu_top->core_info[cpu].cpu = cpu;
cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu);
+ if (!cpu_top->core_info[cpu].is_online)
+ continue;
if(sysfs_topology_read_file(
cpu,
"physical_package_id",
diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
index 90a8c4f..c83f160 100644
--- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
@@ -135,7 +135,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent,
dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n",
mperf_cstates[id].name, mperf_diff, tsc_diff);
} else if (max_freq_mode == MAX_FREQ_SYSFS) {
- timediff = timespec_diff_us(time_start, time_end);
+ timediff = max_frequency * timespec_diff_us(time_start, time_end);
*percent = 100.0 * mperf_diff / timediff;
dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n",
mperf_cstates[id].name, mperf_diff, timediff);
@@ -176,7 +176,7 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
dprint("%s: Average freq based on %s maximum frequency:\n",
mperf_cstates[id].name,
(max_freq_mode == MAX_FREQ_TSC_REF) ? "TSC calculated" : "sysfs read");
- dprint("%max_frequency: %lu", max_frequency);
+ dprint("max_frequency: %lu\n", max_frequency);
dprint("aperf_diff: %llu\n", aperf_diff);
dprint("mperf_diff: %llu\n", mperf_diff);
dprint("avg freq: %llu\n", *count);
@@ -279,6 +279,7 @@ use_sysfs:
return -1;
}
max_freq_mode = MAX_FREQ_SYSFS;
+ max_frequency /= 1000; /* Default automatically to MHz value */
return 0;
}
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index 4039854..e367b1a8 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -9,7 +9,7 @@ endif
turbostat : turbostat.c
CFLAGS += -Wall
-CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/uapi/asm/msr-index.h"'
+CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
%: %.c
@mkdir -p $(BUILD_OUTPUT)
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 05b8fc3..622db68 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -251,11 +251,6 @@ Although it is not guaranteed by the architecture, turbostat assumes
that they count at TSC rate, which is true on all processors tested to date.
.SH REFERENCES
-"Intel® Turbo Boost Technology
-in Intel® Core™ Microarchitecture (Nehalem) Based Processors"
-http://download.intel.com/design/processor/applnots/320354.pdf
-
-"Intel® 64 and IA-32 Architectures Software Developer's Manual
Volume 3B: System Programming Guide"
http://www.intel.com/products/processor/manuals/
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index bac98ca..9655cb4 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -52,6 +52,7 @@ unsigned int skip_c0;
unsigned int skip_c1;
unsigned int do_nhm_cstates;
unsigned int do_snb_cstates;
+unsigned int do_knl_cstates;
unsigned int do_pc2;
unsigned int do_pc3;
unsigned int do_pc6;
@@ -91,6 +92,7 @@ unsigned int do_gfx_perf_limit_reasons;
unsigned int do_ring_perf_limit_reasons;
unsigned int crystal_hz;
unsigned long long tsc_hz;
+int base_cpu;
#define RAPL_PKG (1 << 0)
/* 0x610 MSR_PKG_POWER_LIMIT */
@@ -316,7 +318,7 @@ void print_header(void)
if (do_nhm_cstates)
outp += sprintf(outp, " CPU%%c1");
- if (do_nhm_cstates && !do_slm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
outp += sprintf(outp, " CPU%%c3");
if (do_nhm_cstates)
outp += sprintf(outp, " CPU%%c6");
@@ -370,7 +372,7 @@ void print_header(void)
if (do_rapl & RAPL_GFX)
outp += sprintf(outp, " GFX_J");
if (do_rapl & RAPL_DRAM)
- outp += sprintf(outp, " RAM_W");
+ outp += sprintf(outp, " RAM_J");
if (do_rapl & RAPL_PKG_PERF_STATUS)
outp += sprintf(outp, " PKG_%%");
if (do_rapl & RAPL_DRAM_PERF_STATUS)
@@ -546,7 +548,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
goto done;
- if (do_nhm_cstates && !do_slm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc);
if (do_nhm_cstates)
outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc);
@@ -1018,14 +1020,17 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
return 0;
- if (do_nhm_cstates && !do_slm_cstates) {
+ if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) {
if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
return -6;
}
- if (do_nhm_cstates) {
+ if (do_nhm_cstates && !do_knl_cstates) {
if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
return -7;
+ } else if (do_knl_cstates) {
+ if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
+ return -7;
}
if (do_snb_cstates)
@@ -1150,9 +1155,9 @@ dump_nhm_platform_info(void)
unsigned long long msr;
unsigned int ratio;
- get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
+ get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr);
- fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
+ fprintf(stderr, "cpu%d: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
ratio = (msr >> 40) & 0xFF;
fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n",
@@ -1162,9 +1167,9 @@ dump_nhm_platform_info(void)
fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n",
ratio, bclk, ratio * bclk);
- get_msr(0, MSR_IA32_POWER_CTL, &msr);
- fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
- msr, msr & 0x2 ? "EN" : "DIS");
+ get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
+ fprintf(stderr, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
+ base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
return;
}
@@ -1175,9 +1180,9 @@ dump_hsw_turbo_ratio_limits(void)
unsigned long long msr;
unsigned int ratio;
- get_msr(0, MSR_TURBO_RATIO_LIMIT2, &msr);
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
- fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr);
+ fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
ratio = (msr >> 8) & 0xFF;
if (ratio)
@@ -1197,9 +1202,9 @@ dump_ivt_turbo_ratio_limits(void)
unsigned long long msr;
unsigned int ratio;
- get_msr(0, MSR_TURBO_RATIO_LIMIT1, &msr);
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
- fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr);
+ fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
ratio = (msr >> 56) & 0xFF;
if (ratio)
@@ -1249,9 +1254,9 @@ dump_nhm_turbo_ratio_limits(void)
unsigned long long msr;
unsigned int ratio;
- get_msr(0, MSR_TURBO_RATIO_LIMIT, &msr);
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
- fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
+ fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
ratio = (msr >> 56) & 0xFF;
if (ratio)
@@ -1296,16 +1301,77 @@ dump_nhm_turbo_ratio_limits(void)
}
static void
+dump_knl_turbo_ratio_limits(void)
+{
+ int cores;
+ unsigned int ratio;
+ unsigned long long msr;
+ int delta_cores;
+ int delta_ratio;
+ int i;
+
+ get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
+
+ fprintf(stderr, "cpu%d: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n",
+ base_cpu, msr);
+
+ /**
+ * Turbo encoding in KNL is as follows:
+ * [7:0] -- Base value of number of active cores of bucket 1.
+ * [15:8] -- Base value of freq ratio of bucket 1.
+ * [20:16] -- +ve delta of number of active cores of bucket 2.
+ * i.e. active cores of bucket 2 =
+ * active cores of bucket 1 + delta
+ * [23:21] -- Negative delta of freq ratio of bucket 2.
+ * i.e. freq ratio of bucket 2 =
+ * freq ratio of bucket 1 - delta
+ * [28:24]-- +ve delta of number of active cores of bucket 3.
+ * [31:29]-- -ve delta of freq ratio of bucket 3.
+ * [36:32]-- +ve delta of number of active cores of bucket 4.
+ * [39:37]-- -ve delta of freq ratio of bucket 4.
+ * [44:40]-- +ve delta of number of active cores of bucket 5.
+ * [47:45]-- -ve delta of freq ratio of bucket 5.
+ * [52:48]-- +ve delta of number of active cores of bucket 6.
+ * [55:53]-- -ve delta of freq ratio of bucket 6.
+ * [60:56]-- +ve delta of number of active cores of bucket 7.
+ * [63:61]-- -ve delta of freq ratio of bucket 7.
+ */
+ cores = msr & 0xFF;
+ ratio = (msr >> 8) && 0xFF;
+ if (ratio > 0)
+ fprintf(stderr,
+ "%d * %.0f = %.0f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, cores);
+
+ for (i = 16; i < 64; i = i + 8) {
+ delta_cores = (msr >> i) & 0x1F;
+ delta_ratio = (msr >> (i + 5)) && 0x7;
+ if (!delta_cores || !delta_ratio)
+ return;
+ cores = cores + delta_cores;
+ ratio = ratio - delta_ratio;
+
+ /** -ve ratios will make successive ratio calculations
+ * negative. Hence return instead of carrying on.
+ */
+ if (ratio > 0)
+ fprintf(stderr,
+ "%d * %.0f = %.0f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, cores);
+ }
+}
+
+static void
dump_nhm_cst_cfg(void)
{
unsigned long long msr;
- get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+ get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
- fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr);
+ fprintf(stderr, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr);
fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
@@ -1318,6 +1384,49 @@ dump_nhm_cst_cfg(void)
return;
}
+static void
+dump_config_tdp(void)
+{
+ unsigned long long msr;
+
+ get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
+ fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
+ fprintf(stderr, " (base_ratio=%d)\n", (unsigned int)msr & 0xEF);
+
+ get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
+ fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
+ if (msr) {
+ fprintf(stderr, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0xEFFF);
+ fprintf(stderr, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0xEFFF);
+ fprintf(stderr, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF);
+ fprintf(stderr, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0xEFFF);
+ }
+ fprintf(stderr, ")\n");
+
+ get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
+ fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
+ if (msr) {
+ fprintf(stderr, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0xEFFF);
+ fprintf(stderr, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0xEFFF);
+ fprintf(stderr, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF);
+ fprintf(stderr, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0xEFFF);
+ }
+ fprintf(stderr, ")\n");
+
+ get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
+ fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
+ if ((msr) & 0x3)
+ fprintf(stderr, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
+ fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1);
+ fprintf(stderr, ")\n");
+
+ get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
+ fprintf(stderr, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
+ fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xEF);
+ fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1);
+ fprintf(stderr, ")\n");
+}
+
void free_all_buffers(void)
{
CPU_FREE(cpu_present_set);
@@ -1381,12 +1490,41 @@ int parse_int_file(const char *fmt, ...)
}
/*
- * cpu_is_first_sibling_in_core(cpu)
- * return 1 if given CPU is 1st HT sibling in the core
+ * get_cpu_position_in_core(cpu)
+ * return the position of the CPU among its HT siblings in the core
+ * return -1 if the sibling is not in list
*/
-int cpu_is_first_sibling_in_core(int cpu)
+int get_cpu_position_in_core(int cpu)
{
- return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
+ char path[64];
+ FILE *filep;
+ int this_cpu;
+ char character;
+ int i;
+
+ sprintf(path,
+ "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
+ cpu);
+ filep = fopen(path, "r");
+ if (filep == NULL) {
+ perror(path);
+ exit(1);
+ }
+
+ for (i = 0; i < topo.num_threads_per_core; i++) {
+ fscanf(filep, "%d", &this_cpu);
+ if (this_cpu == cpu) {
+ fclose(filep);
+ return i;
+ }
+
+ /* Account for no separator after last thread*/
+ if (i != (topo.num_threads_per_core - 1))
+ fscanf(filep, "%c", &character);
+ }
+
+ fclose(filep);
+ return -1;
}
/*
@@ -1412,25 +1550,31 @@ int get_num_ht_siblings(int cpu)
{
char path[80];
FILE *filep;
- int sib1, sib2;
- int matches;
+ int sib1;
+ int matches = 0;
char character;
+ char str[100];
+ char *ch;
sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
filep = fopen_or_die(path, "r");
+
/*
* file format:
- * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4)
- * otherwinse 1 sibling (self).
+ * A ',' separated or '-' separated set of numbers
+ * (eg 1-2 or 1,3,4,5)
*/
- matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2);
+ fscanf(filep, "%d%c\n", &sib1, &character);
+ fseek(filep, 0, SEEK_SET);
+ fgets(str, 100, filep);
+ ch = strchr(str, character);
+ while (ch != NULL) {
+ matches++;
+ ch = strchr(ch+1, character);
+ }
fclose(filep);
-
- if (matches == 3)
- return 2;
- else
- return 1;
+ return matches+1;
}
/*
@@ -1594,8 +1738,10 @@ restart:
void check_dev_msr()
{
struct stat sb;
+ char pathname[32];
- if (stat("/dev/cpu/0/msr", &sb))
+ sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+ if (stat(pathname, &sb))
if (system("/sbin/modprobe msr > /dev/null 2>&1"))
err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
}
@@ -1608,6 +1754,7 @@ void check_permissions()
cap_user_data_t cap_data = &cap_data_data;
extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
int do_exit = 0;
+ char pathname[32];
/* check for CAP_SYS_RAWIO */
cap_header->pid = getpid();
@@ -1622,7 +1769,8 @@ void check_permissions()
}
/* test file permissions */
- if (euidaccess("/dev/cpu/0/msr", R_OK)) {
+ sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+ if (euidaccess(pathname, R_OK)) {
do_exit++;
warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
}
@@ -1704,7 +1852,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
default:
return 0;
}
- get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+ get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
@@ -1753,6 +1901,51 @@ int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
}
}
+int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ switch (model) {
+ case 0x57: /* Knights Landing */
+ return 1;
+ default:
+ return 0;
+ }
+}
+int has_config_tdp(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ switch (model) {
+ case 0x3A: /* IVB */
+ case 0x3E: /* IVB Xeon */
+
+ case 0x3C: /* HSW */
+ case 0x3F: /* HSX */
+ case 0x45: /* HSW */
+ case 0x46: /* HSW */
+ case 0x3D: /* BDW */
+ case 0x47: /* BDW */
+ case 0x4F: /* BDX */
+ case 0x56: /* BDX-DE */
+ case 0x4E: /* SKL */
+ case 0x5E: /* SKL */
+
+ case 0x57: /* Knights Landing */
+ return 1;
+ default:
+ return 0;
+ }
+}
+
static void
dump_cstate_pstate_config_info(family, model)
{
@@ -1770,6 +1963,12 @@ dump_cstate_pstate_config_info(family, model)
if (has_nhm_turbo_ratio_limit(family, model))
dump_nhm_turbo_ratio_limits();
+ if (has_knl_turbo_ratio_limit(family, model))
+ dump_knl_turbo_ratio_limits();
+
+ if (has_config_tdp(family, model))
+ dump_config_tdp();
+
dump_nhm_cst_cfg();
}
@@ -1801,7 +2000,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
return 0;
- switch (msr & 0x7) {
+ switch (msr & 0xF) {
case ENERGY_PERF_BIAS_PERFORMANCE:
epb_string = "performance";
break;
@@ -1925,7 +2124,7 @@ double get_tdp(model)
unsigned long long msr;
if (do_rapl & RAPL_PKG_POWER_INFO)
- if (!get_msr(0, MSR_PKG_POWER_INFO, &msr))
+ if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
switch (model) {
@@ -1950,6 +2149,7 @@ rapl_dram_energy_units_probe(int model, double rapl_energy_units)
case 0x3F: /* HSX */
case 0x4F: /* BDX */
case 0x56: /* BDX-DE */
+ case 0x57: /* KNL */
return (rapl_dram_energy_units = 15.3 / 1000000);
default:
return (rapl_energy_units);
@@ -1991,6 +2191,7 @@ void rapl_probe(unsigned int family, unsigned int model)
case 0x3F: /* HSX */
case 0x4F: /* BDX */
case 0x56: /* BDX-DE */
+ case 0x57: /* KNL */
do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
break;
case 0x2D:
@@ -2006,7 +2207,7 @@ void rapl_probe(unsigned int family, unsigned int model)
}
/* units on package 0, verify later other packages match */
- if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr))
+ if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
return;
rapl_power_units = 1.0 / (1 << (msr & 0xF));
@@ -2331,6 +2532,17 @@ int is_slm(unsigned int family, unsigned int model)
return 0;
}
+int is_knl(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+ switch (model) {
+ case 0x57: /* KNL */
+ return 1;
+ }
+ return 0;
+}
+
#define SLM_BCLK_FREQS 5
double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
@@ -2340,7 +2552,7 @@ double slm_bclk(void)
unsigned int i;
double freq;
- if (get_msr(0, MSR_FSB_FREQ, &msr))
+ if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
fprintf(stderr, "SLM BCLK: unknown\n");
i = msr & 0xf;
@@ -2408,7 +2620,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
if (!do_nhm_platform_info)
goto guess;
- if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr))
+ if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
goto guess;
target_c_local = (msr >> 16) & 0xFF;
@@ -2541,6 +2753,7 @@ void process_cpuid()
do_c8_c9_c10 = has_hsw_msrs(family, model);
do_skl_residency = has_skl_msrs(family, model);
do_slm_cstates = is_slm(family, model);
+ do_knl_cstates = is_knl(family, model);
bclk = discover_bclk(family, model);
rapl_probe(family, model);
@@ -2755,13 +2968,9 @@ int initialize_counters(int cpu_id)
my_package_id = get_physical_package_id(cpu_id);
my_core_id = get_core_id(cpu_id);
-
- if (cpu_is_first_sibling_in_core(cpu_id)) {
- my_thread_id = 0;
+ my_thread_id = get_cpu_position_in_core(cpu_id);
+ if (!my_thread_id)
topo.num_cores++;
- } else {
- my_thread_id = 1;
- }
init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
@@ -2785,13 +2994,24 @@ void setup_all_buffers(void)
for_all_proc_cpus(initialize_counters);
}
+void set_base_cpu(void)
+{
+ base_cpu = sched_getcpu();
+ if (base_cpu < 0)
+ err(-ENODEV, "No valid cpus found");
+
+ if (debug > 1)
+ fprintf(stderr, "base_cpu = %d\n", base_cpu);
+}
+
void turbostat_init()
{
+ setup_all_buffers();
+ set_base_cpu();
check_dev_msr();
check_permissions();
process_cpuid();
- setup_all_buffers();
if (debug)
for_all_cpus(print_epb, ODD_COUNTERS);
@@ -2870,7 +3090,7 @@ int get_and_dump_counters(void)
}
void print_version() {
- fprintf(stderr, "turbostat version 4.5 2 Apr, 2015"
+ fprintf(stderr, "turbostat version 4.7 17-June, 2015"
" - Len Brown <lenb@kernel.org>\n");
}
@@ -2898,7 +3118,7 @@ void cmdline(int argc, char **argv)
progname = argv[0];
- while ((opt = getopt_long_only(argc, argv, "C:c:Ddhi:JM:m:PpST:v",
+ while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:PpST:v",
long_options, &option_index)) != -1) {
switch (opt) {
case 'C':
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
new file mode 100644
index 0000000..f56914c
--- /dev/null
+++ b/tools/testing/nvdimm/Kbuild
@@ -0,0 +1,43 @@
+ldflags-y += --wrap=ioremap_wt
+ldflags-y += --wrap=ioremap_wc
+ldflags-y += --wrap=devm_ioremap_nocache
+ldflags-y += --wrap=ioremap_cache
+ldflags-y += --wrap=ioremap_nocache
+ldflags-y += --wrap=iounmap
+ldflags-y += --wrap=__request_region
+ldflags-y += --wrap=__release_region
+
+DRIVERS := ../../../drivers
+NVDIMM_SRC := $(DRIVERS)/nvdimm
+ACPI_SRC := $(DRIVERS)/acpi
+
+obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
+obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
+obj-$(CONFIG_ND_BTT) += nd_btt.o
+obj-$(CONFIG_ND_BLK) += nd_blk.o
+obj-$(CONFIG_ACPI_NFIT) += nfit.o
+
+nfit-y := $(ACPI_SRC)/nfit.o
+nfit-y += config_check.o
+
+nd_pmem-y := $(NVDIMM_SRC)/pmem.o
+nd_pmem-y += config_check.o
+
+nd_btt-y := $(NVDIMM_SRC)/btt.o
+nd_btt-y += config_check.o
+
+nd_blk-y := $(NVDIMM_SRC)/blk.o
+nd_blk-y += config_check.o
+
+libnvdimm-y := $(NVDIMM_SRC)/core.o
+libnvdimm-y += $(NVDIMM_SRC)/bus.o
+libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o
+libnvdimm-y += $(NVDIMM_SRC)/dimm.o
+libnvdimm-y += $(NVDIMM_SRC)/region_devs.o
+libnvdimm-y += $(NVDIMM_SRC)/region.o
+libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o
+libnvdimm-y += $(NVDIMM_SRC)/label.o
+libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
+libnvdimm-y += config_check.o
+
+obj-m += test/
diff --git a/tools/testing/nvdimm/Makefile b/tools/testing/nvdimm/Makefile
new file mode 100644
index 0000000..3dfe024
--- /dev/null
+++ b/tools/testing/nvdimm/Makefile
@@ -0,0 +1,7 @@
+KDIR ?= ../../../
+
+default:
+ $(MAKE) -C $(KDIR) M=$$PWD
+
+install: default
+ $(MAKE) -C $(KDIR) M=$$PWD modules_install
diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c
new file mode 100644
index 0000000..f2c7615
--- /dev/null
+++ b/tools/testing/nvdimm/config_check.c
@@ -0,0 +1,15 @@
+#include <linux/kconfig.h>
+#include <linux/bug.h>
+
+void check(void)
+{
+ /*
+ * These kconfig symbols must be set to "m" for nfit_test to
+ * load and operate.
+ */
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
+}
diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild
new file mode 100644
index 0000000..9241064
--- /dev/null
+++ b/tools/testing/nvdimm/test/Kbuild
@@ -0,0 +1,8 @@
+ccflags-y := -I$(src)/../../../../drivers/nvdimm/
+ccflags-y += -I$(src)/../../../../drivers/acpi/
+
+obj-m += nfit_test.o
+obj-m += nfit_test_iomap.o
+
+nfit_test-y := nfit.o
+nfit_test_iomap-y := iomap.o
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
new file mode 100644
index 0000000..64bfaa5
--- /dev/null
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/rculist.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include "nfit_test.h"
+
+static LIST_HEAD(iomap_head);
+
+static struct iomap_ops {
+ nfit_test_lookup_fn nfit_test_lookup;
+ struct list_head list;
+} iomap_ops = {
+ .list = LIST_HEAD_INIT(iomap_ops.list),
+};
+
+void nfit_test_setup(nfit_test_lookup_fn lookup)
+{
+ iomap_ops.nfit_test_lookup = lookup;
+ list_add_rcu(&iomap_ops.list, &iomap_head);
+}
+EXPORT_SYMBOL(nfit_test_setup);
+
+void nfit_test_teardown(void)
+{
+ list_del_rcu(&iomap_ops.list);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL(nfit_test_teardown);
+
+static struct nfit_test_resource *get_nfit_res(resource_size_t resource)
+{
+ struct iomap_ops *ops;
+
+ ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list);
+ if (ops)
+ return ops->nfit_test_lookup(resource);
+ return NULL;
+}
+
+void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
+ void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
+{
+ struct nfit_test_resource *nfit_res;
+
+ rcu_read_lock();
+ nfit_res = get_nfit_res(offset);
+ rcu_read_unlock();
+ if (nfit_res)
+ return (void __iomem *) nfit_res->buf + offset
+ - nfit_res->res->start;
+ return fallback_fn(offset, size);
+}
+
+void __iomem *__wrap_devm_ioremap_nocache(struct device *dev,
+ resource_size_t offset, unsigned long size)
+{
+ struct nfit_test_resource *nfit_res;
+
+ rcu_read_lock();
+ nfit_res = get_nfit_res(offset);
+ rcu_read_unlock();
+ if (nfit_res)
+ return (void __iomem *) nfit_res->buf + offset
+ - nfit_res->res->start;
+ return devm_ioremap_nocache(dev, offset, size);
+}
+EXPORT_SYMBOL(__wrap_devm_ioremap_nocache);
+
+void __iomem *__wrap_ioremap_cache(resource_size_t offset, unsigned long size)
+{
+ return __nfit_test_ioremap(offset, size, ioremap_cache);
+}
+EXPORT_SYMBOL(__wrap_ioremap_cache);
+
+void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
+{
+ return __nfit_test_ioremap(offset, size, ioremap_nocache);
+}
+EXPORT_SYMBOL(__wrap_ioremap_nocache);
+
+void __iomem *__wrap_ioremap_wt(resource_size_t offset, unsigned long size)
+{
+ return __nfit_test_ioremap(offset, size, ioremap_wt);
+}
+EXPORT_SYMBOL(__wrap_ioremap_wt);
+
+void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size)
+{
+ return __nfit_test_ioremap(offset, size, ioremap_wc);
+}
+EXPORT_SYMBOL(__wrap_ioremap_wc);
+
+void __wrap_iounmap(volatile void __iomem *addr)
+{
+ struct nfit_test_resource *nfit_res;
+
+ rcu_read_lock();
+ nfit_res = get_nfit_res((unsigned long) addr);
+ rcu_read_unlock();
+ if (nfit_res)
+ return;
+ return iounmap(addr);
+}
+EXPORT_SYMBOL(__wrap_iounmap);
+
+struct resource *__wrap___request_region(struct resource *parent,
+ resource_size_t start, resource_size_t n, const char *name,
+ int flags)
+{
+ struct nfit_test_resource *nfit_res;
+
+ if (parent == &iomem_resource) {
+ rcu_read_lock();
+ nfit_res = get_nfit_res(start);
+ rcu_read_unlock();
+ if (nfit_res) {
+ struct resource *res = nfit_res->res + 1;
+
+ if (start + n > nfit_res->res->start
+ + resource_size(nfit_res->res)) {
+ pr_debug("%s: start: %llx n: %llx overflow: %pr\n",
+ __func__, start, n,
+ nfit_res->res);
+ return NULL;
+ }
+
+ res->start = start;
+ res->end = start + n - 1;
+ res->name = name;
+ res->flags = resource_type(parent);
+ res->flags |= IORESOURCE_BUSY | flags;
+ pr_debug("%s: %pr\n", __func__, res);
+ return res;
+ }
+ }
+ return __request_region(parent, start, n, name, flags);
+}
+EXPORT_SYMBOL(__wrap___request_region);
+
+void __wrap___release_region(struct resource *parent, resource_size_t start,
+ resource_size_t n)
+{
+ struct nfit_test_resource *nfit_res;
+
+ if (parent == &iomem_resource) {
+ rcu_read_lock();
+ nfit_res = get_nfit_res(start);
+ rcu_read_unlock();
+ if (nfit_res) {
+ struct resource *res = nfit_res->res + 1;
+
+ if (start != res->start || resource_size(res) != n)
+ pr_info("%s: start: %llx n: %llx mismatch: %pr\n",
+ __func__, start, n, res);
+ else
+ memset(res, 0, sizeof(*res));
+ return;
+ }
+ }
+ __release_region(parent, start, n);
+}
+EXPORT_SYMBOL(__wrap___release_region);
+
+MODULE_LICENSE("GPL v2");
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
new file mode 100644
index 0000000..d0bdae4
--- /dev/null
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -0,0 +1,1162 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/libnvdimm.h>
+#include <linux/vmalloc.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/ndctl.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <nfit.h>
+#include <nd.h>
+#include "nfit_test.h"
+
+/*
+ * Generate an NFIT table to describe the following topology:
+ *
+ * BUS0: Interleaved PMEM regions, and aliasing with BLK regions
+ *
+ * (a) (b) DIMM BLK-REGION
+ * +----------+--------------+----------+---------+
+ * +------+ | blk2.0 | pm0.0 | blk2.1 | pm1.0 | 0 region2
+ * | imc0 +--+- - - - - region0 - - - -+----------+ +
+ * +--+---+ | blk3.0 | pm0.0 | blk3.1 | pm1.0 | 1 region3
+ * | +----------+--------------v----------v v
+ * +--+---+ | |
+ * | cpu0 | region1
+ * +--+---+ | |
+ * | +-------------------------^----------^ ^
+ * +--+---+ | blk4.0 | pm1.0 | 2 region4
+ * | imc1 +--+-------------------------+----------+ +
+ * +------+ | blk5.0 | pm1.0 | 3 region5
+ * +-------------------------+----------+-+-------+
+ *
+ * *) In this layout we have four dimms and two memory controllers in one
+ * socket. Each unique interface (BLK or PMEM) to DPA space
+ * is identified by a region device with a dynamically assigned id.
+ *
+ * *) The first portion of dimm0 and dimm1 are interleaved as REGION0.
+ * A single PMEM namespace "pm0.0" is created using half of the
+ * REGION0 SPA-range. REGION0 spans dimm0 and dimm1. PMEM namespace
+ * allocate from from the bottom of a region. The unallocated
+ * portion of REGION0 aliases with REGION2 and REGION3. That
+ * unallacted capacity is reclaimed as BLK namespaces ("blk2.0" and
+ * "blk3.0") starting at the base of each DIMM to offset (a) in those
+ * DIMMs. "pm0.0", "blk2.0" and "blk3.0" are free-form readable
+ * names that can be assigned to a namespace.
+ *
+ * *) In the last portion of dimm0 and dimm1 we have an interleaved
+ * SPA range, REGION1, that spans those two dimms as well as dimm2
+ * and dimm3. Some of REGION1 allocated to a PMEM namespace named
+ * "pm1.0" the rest is reclaimed in 4 BLK namespaces (for each
+ * dimm in the interleave set), "blk2.1", "blk3.1", "blk4.0", and
+ * "blk5.0".
+ *
+ * *) The portion of dimm2 and dimm3 that do not participate in the
+ * REGION1 interleaved SPA range (i.e. the DPA address below offset
+ * (b) are also included in the "blk4.0" and "blk5.0" namespaces.
+ * Note, that BLK namespaces need not be contiguous in DPA-space, and
+ * can consume aliased capacity from multiple interleave sets.
+ *
+ * BUS1: Legacy NVDIMM (single contiguous range)
+ *
+ * region2
+ * +---------------------+
+ * |---------------------|
+ * || pm2.0 ||
+ * |---------------------|
+ * +---------------------+
+ *
+ * *) A NFIT-table may describe a simple system-physical-address range
+ * with no BLK aliasing. This type of region may optionally
+ * reference an NVDIMM.
+ */
+enum {
+ NUM_PM = 2,
+ NUM_DCR = 4,
+ NUM_BDW = NUM_DCR,
+ NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
+ NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */,
+ DIMM_SIZE = SZ_32M,
+ LABEL_SIZE = SZ_128K,
+ SPA0_SIZE = DIMM_SIZE,
+ SPA1_SIZE = DIMM_SIZE*2,
+ SPA2_SIZE = DIMM_SIZE,
+ BDW_SIZE = 64 << 8,
+ DCR_SIZE = 12,
+ NUM_NFITS = 2, /* permit testing multiple NFITs per system */
+};
+
+struct nfit_test_dcr {
+ __le64 bdw_addr;
+ __le32 bdw_status;
+ __u8 aperature[BDW_SIZE];
+};
+
+#define NFIT_DIMM_HANDLE(node, socket, imc, chan, dimm) \
+ (((node & 0xfff) << 16) | ((socket & 0xf) << 12) \
+ | ((imc & 0xf) << 8) | ((chan & 0xf) << 4) | (dimm & 0xf))
+
+static u32 handle[NUM_DCR] = {
+ [0] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 0),
+ [1] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 1),
+ [2] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 0),
+ [3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1),
+};
+
+struct nfit_test {
+ struct acpi_nfit_desc acpi_desc;
+ struct platform_device pdev;
+ struct list_head resources;
+ void *nfit_buf;
+ dma_addr_t nfit_dma;
+ size_t nfit_size;
+ int num_dcr;
+ int num_pm;
+ void **dimm;
+ dma_addr_t *dimm_dma;
+ void **flush;
+ dma_addr_t *flush_dma;
+ void **label;
+ dma_addr_t *label_dma;
+ void **spa_set;
+ dma_addr_t *spa_set_dma;
+ struct nfit_test_dcr **dcr;
+ dma_addr_t *dcr_dma;
+ int (*alloc)(struct nfit_test *t);
+ void (*setup)(struct nfit_test *t);
+};
+
+static struct nfit_test *to_nfit_test(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+
+ return container_of(pdev, struct nfit_test, pdev);
+}
+
+static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
+ struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+ unsigned int buf_len)
+{
+ struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+ struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc);
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+ int i, rc;
+
+ if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask))
+ return -ENOTTY;
+
+ /* lookup label space for the given dimm */
+ for (i = 0; i < ARRAY_SIZE(handle); i++)
+ if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i])
+ break;
+ if (i >= ARRAY_SIZE(handle))
+ return -ENXIO;
+
+ switch (cmd) {
+ case ND_CMD_GET_CONFIG_SIZE: {
+ struct nd_cmd_get_config_size *nd_cmd = buf;
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+ nd_cmd->status = 0;
+ nd_cmd->config_size = LABEL_SIZE;
+ nd_cmd->max_xfer = SZ_4K;
+ rc = 0;
+ break;
+ }
+ case ND_CMD_GET_CONFIG_DATA: {
+ struct nd_cmd_get_config_data_hdr *nd_cmd = buf;
+ unsigned int len, offset = nd_cmd->in_offset;
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+ if (offset >= LABEL_SIZE)
+ return -EINVAL;
+ if (nd_cmd->in_length + sizeof(*nd_cmd) > buf_len)
+ return -EINVAL;
+
+ nd_cmd->status = 0;
+ len = min(nd_cmd->in_length, LABEL_SIZE - offset);
+ memcpy(nd_cmd->out_buf, t->label[i] + offset, len);
+ rc = buf_len - sizeof(*nd_cmd) - len;
+ break;
+ }
+ case ND_CMD_SET_CONFIG_DATA: {
+ struct nd_cmd_set_config_hdr *nd_cmd = buf;
+ unsigned int len, offset = nd_cmd->in_offset;
+ u32 *status;
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+ if (offset >= LABEL_SIZE)
+ return -EINVAL;
+ if (nd_cmd->in_length + sizeof(*nd_cmd) + 4 > buf_len)
+ return -EINVAL;
+
+ status = buf + nd_cmd->in_length + sizeof(*nd_cmd);
+ *status = 0;
+ len = min(nd_cmd->in_length, LABEL_SIZE - offset);
+ memcpy(t->label[i] + offset, nd_cmd->in_buf, len);
+ rc = buf_len - sizeof(*nd_cmd) - (len + 4);
+ break;
+ }
+ default:
+ return -ENOTTY;
+ }
+
+ return rc;
+}
+
+static DEFINE_SPINLOCK(nfit_test_lock);
+static struct nfit_test *instances[NUM_NFITS];
+
+static void release_nfit_res(void *data)
+{
+ struct nfit_test_resource *nfit_res = data;
+ struct resource *res = nfit_res->res;
+
+ spin_lock(&nfit_test_lock);
+ list_del(&nfit_res->list);
+ spin_unlock(&nfit_test_lock);
+
+ if (is_vmalloc_addr(nfit_res->buf))
+ vfree(nfit_res->buf);
+ else
+ dma_free_coherent(nfit_res->dev, resource_size(res),
+ nfit_res->buf, res->start);
+ kfree(res);
+ kfree(nfit_res);
+}
+
+static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
+ void *buf)
+{
+ struct device *dev = &t->pdev.dev;
+ struct resource *res = kzalloc(sizeof(*res) * 2, GFP_KERNEL);
+ struct nfit_test_resource *nfit_res = kzalloc(sizeof(*nfit_res),
+ GFP_KERNEL);
+ int rc;
+
+ if (!res || !buf || !nfit_res)
+ goto err;
+ rc = devm_add_action(dev, release_nfit_res, nfit_res);
+ if (rc)
+ goto err;
+ INIT_LIST_HEAD(&nfit_res->list);
+ memset(buf, 0, size);
+ nfit_res->dev = dev;
+ nfit_res->buf = buf;
+ nfit_res->res = res;
+ res->start = *dma;
+ res->end = *dma + size - 1;
+ res->name = "NFIT";
+ spin_lock(&nfit_test_lock);
+ list_add(&nfit_res->list, &t->resources);
+ spin_unlock(&nfit_test_lock);
+
+ return nfit_res->buf;
+ err:
+ if (buf && !is_vmalloc_addr(buf))
+ dma_free_coherent(dev, size, buf, *dma);
+ else if (buf)
+ vfree(buf);
+ kfree(res);
+ kfree(nfit_res);
+ return NULL;
+}
+
+static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma)
+{
+ void *buf = vmalloc(size);
+
+ *dma = (unsigned long) buf;
+ return __test_alloc(t, size, dma, buf);
+}
+
+static void *test_alloc_coherent(struct nfit_test *t, size_t size,
+ dma_addr_t *dma)
+{
+ struct device *dev = &t->pdev.dev;
+ void *buf = dma_alloc_coherent(dev, size, dma, GFP_KERNEL);
+
+ return __test_alloc(t, size, dma, buf);
+}
+
+static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(instances); i++) {
+ struct nfit_test_resource *n, *nfit_res = NULL;
+ struct nfit_test *t = instances[i];
+
+ if (!t)
+ continue;
+ spin_lock(&nfit_test_lock);
+ list_for_each_entry(n, &t->resources, list) {
+ if (addr >= n->res->start && (addr < n->res->start
+ + resource_size(n->res))) {
+ nfit_res = n;
+ break;
+ } else if (addr >= (unsigned long) n->buf
+ && (addr < (unsigned long) n->buf
+ + resource_size(n->res))) {
+ nfit_res = n;
+ break;
+ }
+ }
+ spin_unlock(&nfit_test_lock);
+ if (nfit_res)
+ return nfit_res;
+ }
+
+ return NULL;
+}
+
+static int nfit_test0_alloc(struct nfit_test *t)
+{
+ size_t nfit_size = sizeof(struct acpi_table_nfit)
+ + sizeof(struct acpi_nfit_system_address) * NUM_SPA
+ + sizeof(struct acpi_nfit_memory_map) * NUM_MEM
+ + sizeof(struct acpi_nfit_control_region) * NUM_DCR
+ + sizeof(struct acpi_nfit_data_region) * NUM_BDW
+ + sizeof(struct acpi_nfit_flush_address) * NUM_DCR;
+ int i;
+
+ t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
+ if (!t->nfit_buf)
+ return -ENOMEM;
+ t->nfit_size = nfit_size;
+
+ t->spa_set[0] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[0]);
+ if (!t->spa_set[0])
+ return -ENOMEM;
+
+ t->spa_set[1] = test_alloc_coherent(t, SPA1_SIZE, &t->spa_set_dma[1]);
+ if (!t->spa_set[1])
+ return -ENOMEM;
+
+ for (i = 0; i < NUM_DCR; i++) {
+ t->dimm[i] = test_alloc(t, DIMM_SIZE, &t->dimm_dma[i]);
+ if (!t->dimm[i])
+ return -ENOMEM;
+
+ t->label[i] = test_alloc(t, LABEL_SIZE, &t->label_dma[i]);
+ if (!t->label[i])
+ return -ENOMEM;
+ sprintf(t->label[i], "label%d", i);
+
+ t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]);
+ if (!t->flush[i])
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < NUM_DCR; i++) {
+ t->dcr[i] = test_alloc(t, LABEL_SIZE, &t->dcr_dma[i]);
+ if (!t->dcr[i])
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int nfit_test1_alloc(struct nfit_test *t)
+{
+ size_t nfit_size = sizeof(struct acpi_table_nfit)
+ + sizeof(struct acpi_nfit_system_address)
+ + sizeof(struct acpi_nfit_memory_map)
+ + sizeof(struct acpi_nfit_control_region);
+
+ t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
+ if (!t->nfit_buf)
+ return -ENOMEM;
+ t->nfit_size = nfit_size;
+
+ t->spa_set[0] = test_alloc_coherent(t, SPA2_SIZE, &t->spa_set_dma[0]);
+ if (!t->spa_set[0])
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void nfit_test_init_header(struct acpi_table_nfit *nfit, size_t size)
+{
+ memcpy(nfit->header.signature, ACPI_SIG_NFIT, 4);
+ nfit->header.length = size;
+ nfit->header.revision = 1;
+ memcpy(nfit->header.oem_id, "LIBND", 6);
+ memcpy(nfit->header.oem_table_id, "TEST", 5);
+ nfit->header.oem_revision = 1;
+ memcpy(nfit->header.asl_compiler_id, "TST", 4);
+ nfit->header.asl_compiler_revision = 1;
+}
+
+static void nfit_test0_setup(struct nfit_test *t)
+{
+ struct nvdimm_bus_descriptor *nd_desc;
+ struct acpi_nfit_desc *acpi_desc;
+ struct acpi_nfit_memory_map *memdev;
+ void *nfit_buf = t->nfit_buf;
+ size_t size = t->nfit_size;
+ struct acpi_nfit_system_address *spa;
+ struct acpi_nfit_control_region *dcr;
+ struct acpi_nfit_data_region *bdw;
+ struct acpi_nfit_flush_address *flush;
+ unsigned int offset;
+
+ nfit_test_init_header(nfit_buf, size);
+
+ /*
+ * spa0 (interleave first half of dimm0 and dimm1, note storage
+ * does not actually alias the related block-data-window
+ * regions)
+ */
+ spa = nfit_buf + sizeof(struct acpi_table_nfit);
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+ spa->range_index = 0+1;
+ spa->address = t->spa_set_dma[0];
+ spa->length = SPA0_SIZE;
+
+ /*
+ * spa1 (interleave last half of the 4 DIMMS, note storage
+ * does not actually alias the related block-data-window
+ * regions)
+ */
+ spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa);
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+ spa->range_index = 1+1;
+ spa->address = t->spa_set_dma[1];
+ spa->length = SPA1_SIZE;
+
+ /* spa2 (dcr0) dimm0 */
+ spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 2;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+ spa->range_index = 2+1;
+ spa->address = t->dcr_dma[0];
+ spa->length = DCR_SIZE;
+
+ /* spa3 (dcr1) dimm1 */
+ spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 3;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+ spa->range_index = 3+1;
+ spa->address = t->dcr_dma[1];
+ spa->length = DCR_SIZE;
+
+ /* spa4 (dcr2) dimm2 */
+ spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 4;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+ spa->range_index = 4+1;
+ spa->address = t->dcr_dma[2];
+ spa->length = DCR_SIZE;
+
+ /* spa5 (dcr3) dimm3 */
+ spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 5;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+ spa->range_index = 5+1;
+ spa->address = t->dcr_dma[3];
+ spa->length = DCR_SIZE;
+
+ /* spa6 (bdw for dcr0) dimm0 */
+ spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 6;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+ spa->range_index = 6+1;
+ spa->address = t->dimm_dma[0];
+ spa->length = DIMM_SIZE;
+
+ /* spa7 (bdw for dcr1) dimm1 */
+ spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 7;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+ spa->range_index = 7+1;
+ spa->address = t->dimm_dma[1];
+ spa->length = DIMM_SIZE;
+
+ /* spa8 (bdw for dcr2) dimm2 */
+ spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 8;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+ spa->range_index = 8+1;
+ spa->address = t->dimm_dma[2];
+ spa->length = DIMM_SIZE;
+
+ /* spa9 (bdw for dcr3) dimm3 */
+ spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 9;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+ spa->range_index = 9+1;
+ spa->address = t->dimm_dma[3];
+ spa->length = DIMM_SIZE;
+
+ offset = sizeof(struct acpi_table_nfit) + sizeof(*spa) * 10;
+ /* mem-region0 (spa0, dimm0) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[0];
+ memdev->physical_id = 0;
+ memdev->region_id = 0;
+ memdev->range_index = 0+1;
+ memdev->region_index = 0+1;
+ memdev->region_size = SPA0_SIZE/2;
+ memdev->region_offset = t->spa_set_dma[0];
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 2;
+
+ /* mem-region1 (spa0, dimm1) */
+ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map);
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[1];
+ memdev->physical_id = 1;
+ memdev->region_id = 0;
+ memdev->range_index = 0+1;
+ memdev->region_index = 1+1;
+ memdev->region_size = SPA0_SIZE/2;
+ memdev->region_offset = t->spa_set_dma[0] + SPA0_SIZE/2;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 2;
+
+ /* mem-region2 (spa1, dimm0) */
+ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[0];
+ memdev->physical_id = 0;
+ memdev->region_id = 1;
+ memdev->range_index = 1+1;
+ memdev->region_index = 0+1;
+ memdev->region_size = SPA1_SIZE/4;
+ memdev->region_offset = t->spa_set_dma[1];
+ memdev->address = SPA0_SIZE/2;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 4;
+
+ /* mem-region3 (spa1, dimm1) */
+ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[1];
+ memdev->physical_id = 1;
+ memdev->region_id = 1;
+ memdev->range_index = 1+1;
+ memdev->region_index = 1+1;
+ memdev->region_size = SPA1_SIZE/4;
+ memdev->region_offset = t->spa_set_dma[1] + SPA1_SIZE/4;
+ memdev->address = SPA0_SIZE/2;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 4;
+
+ /* mem-region4 (spa1, dimm2) */
+ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 4;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[2];
+ memdev->physical_id = 2;
+ memdev->region_id = 0;
+ memdev->range_index = 1+1;
+ memdev->region_index = 2+1;
+ memdev->region_size = SPA1_SIZE/4;
+ memdev->region_offset = t->spa_set_dma[1] + 2*SPA1_SIZE/4;
+ memdev->address = SPA0_SIZE/2;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 4;
+
+ /* mem-region5 (spa1, dimm3) */
+ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[3];
+ memdev->physical_id = 3;
+ memdev->region_id = 0;
+ memdev->range_index = 1+1;
+ memdev->region_index = 3+1;
+ memdev->region_size = SPA1_SIZE/4;
+ memdev->region_offset = t->spa_set_dma[1] + 3*SPA1_SIZE/4;
+ memdev->address = SPA0_SIZE/2;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 4;
+
+ /* mem-region6 (spa/dcr0, dimm0) */
+ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 6;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[0];
+ memdev->physical_id = 0;
+ memdev->region_id = 0;
+ memdev->range_index = 2+1;
+ memdev->region_index = 0+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+
+ /* mem-region7 (spa/dcr1, dimm1) */
+ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 7;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[1];
+ memdev->physical_id = 1;
+ memdev->region_id = 0;
+ memdev->range_index = 3+1;
+ memdev->region_index = 1+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+
+ /* mem-region8 (spa/dcr2, dimm2) */
+ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 8;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[2];
+ memdev->physical_id = 2;
+ memdev->region_id = 0;
+ memdev->range_index = 4+1;
+ memdev->region_index = 2+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+
+ /* mem-region9 (spa/dcr3, dimm3) */
+ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 9;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[3];
+ memdev->physical_id = 3;
+ memdev->region_id = 0;
+ memdev->range_index = 5+1;
+ memdev->region_index = 3+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+
+ /* mem-region10 (spa/bdw0, dimm0) */
+ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 10;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[0];
+ memdev->physical_id = 0;
+ memdev->region_id = 0;
+ memdev->range_index = 6+1;
+ memdev->region_index = 0+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+
+ /* mem-region11 (spa/bdw1, dimm1) */
+ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 11;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[1];
+ memdev->physical_id = 1;
+ memdev->region_id = 0;
+ memdev->range_index = 7+1;
+ memdev->region_index = 1+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+
+ /* mem-region12 (spa/bdw2, dimm2) */
+ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 12;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[2];
+ memdev->physical_id = 2;
+ memdev->region_id = 0;
+ memdev->range_index = 8+1;
+ memdev->region_index = 2+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+
+ /* mem-region13 (spa/dcr3, dimm3) */
+ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 13;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[3];
+ memdev->physical_id = 3;
+ memdev->region_id = 0;
+ memdev->range_index = 9+1;
+ memdev->region_index = 3+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+
+ offset = offset + sizeof(struct acpi_nfit_memory_map) * 14;
+ /* dcr-descriptor0 */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = sizeof(struct acpi_nfit_control_region);
+ dcr->region_index = 0+1;
+ dcr->vendor_id = 0xabcd;
+ dcr->device_id = 0;
+ dcr->revision_id = 1;
+ dcr->serial_number = ~handle[0];
+ dcr->windows = 1;
+ dcr->window_size = DCR_SIZE;
+ dcr->command_offset = 0;
+ dcr->command_size = 8;
+ dcr->status_offset = 8;
+ dcr->status_size = 4;
+
+ /* dcr-descriptor1 */
+ dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region);
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = sizeof(struct acpi_nfit_control_region);
+ dcr->region_index = 1+1;
+ dcr->vendor_id = 0xabcd;
+ dcr->device_id = 0;
+ dcr->revision_id = 1;
+ dcr->serial_number = ~handle[1];
+ dcr->windows = 1;
+ dcr->window_size = DCR_SIZE;
+ dcr->command_offset = 0;
+ dcr->command_size = 8;
+ dcr->status_offset = 8;
+ dcr->status_size = 4;
+
+ /* dcr-descriptor2 */
+ dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = sizeof(struct acpi_nfit_control_region);
+ dcr->region_index = 2+1;
+ dcr->vendor_id = 0xabcd;
+ dcr->device_id = 0;
+ dcr->revision_id = 1;
+ dcr->serial_number = ~handle[2];
+ dcr->windows = 1;
+ dcr->window_size = DCR_SIZE;
+ dcr->command_offset = 0;
+ dcr->command_size = 8;
+ dcr->status_offset = 8;
+ dcr->status_size = 4;
+
+ /* dcr-descriptor3 */
+ dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = sizeof(struct acpi_nfit_control_region);
+ dcr->region_index = 3+1;
+ dcr->vendor_id = 0xabcd;
+ dcr->device_id = 0;
+ dcr->revision_id = 1;
+ dcr->serial_number = ~handle[3];
+ dcr->windows = 1;
+ dcr->window_size = DCR_SIZE;
+ dcr->command_offset = 0;
+ dcr->command_size = 8;
+ dcr->status_offset = 8;
+ dcr->status_size = 4;
+
+ offset = offset + sizeof(struct acpi_nfit_control_region) * 4;
+ /* bdw0 (spa/dcr0, dimm0) */
+ bdw = nfit_buf + offset;
+ bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+ bdw->header.length = sizeof(struct acpi_nfit_data_region);
+ bdw->region_index = 0+1;
+ bdw->windows = 1;
+ bdw->offset = 0;
+ bdw->size = BDW_SIZE;
+ bdw->capacity = DIMM_SIZE;
+ bdw->start_address = 0;
+
+ /* bdw1 (spa/dcr1, dimm1) */
+ bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region);
+ bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+ bdw->header.length = sizeof(struct acpi_nfit_data_region);
+ bdw->region_index = 1+1;
+ bdw->windows = 1;
+ bdw->offset = 0;
+ bdw->size = BDW_SIZE;
+ bdw->capacity = DIMM_SIZE;
+ bdw->start_address = 0;
+
+ /* bdw2 (spa/dcr2, dimm2) */
+ bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 2;
+ bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+ bdw->header.length = sizeof(struct acpi_nfit_data_region);
+ bdw->region_index = 2+1;
+ bdw->windows = 1;
+ bdw->offset = 0;
+ bdw->size = BDW_SIZE;
+ bdw->capacity = DIMM_SIZE;
+ bdw->start_address = 0;
+
+ /* bdw3 (spa/dcr3, dimm3) */
+ bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 3;
+ bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+ bdw->header.length = sizeof(struct acpi_nfit_data_region);
+ bdw->region_index = 3+1;
+ bdw->windows = 1;
+ bdw->offset = 0;
+ bdw->size = BDW_SIZE;
+ bdw->capacity = DIMM_SIZE;
+ bdw->start_address = 0;
+
+ offset = offset + sizeof(struct acpi_nfit_data_region) * 4;
+ /* flush0 (dimm0) */
+ flush = nfit_buf + offset;
+ flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+ flush->header.length = sizeof(struct acpi_nfit_flush_address);
+ flush->device_handle = handle[0];
+ flush->hint_count = 1;
+ flush->hint_address[0] = t->flush_dma[0];
+
+ /* flush1 (dimm1) */
+ flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1;
+ flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+ flush->header.length = sizeof(struct acpi_nfit_flush_address);
+ flush->device_handle = handle[1];
+ flush->hint_count = 1;
+ flush->hint_address[0] = t->flush_dma[1];
+
+ /* flush2 (dimm2) */
+ flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2;
+ flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+ flush->header.length = sizeof(struct acpi_nfit_flush_address);
+ flush->device_handle = handle[2];
+ flush->hint_count = 1;
+ flush->hint_address[0] = t->flush_dma[2];
+
+ /* flush3 (dimm3) */
+ flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3;
+ flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+ flush->header.length = sizeof(struct acpi_nfit_flush_address);
+ flush->device_handle = handle[3];
+ flush->hint_count = 1;
+ flush->hint_address[0] = t->flush_dma[3];
+
+ acpi_desc = &t->acpi_desc;
+ set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en);
+ set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en);
+ set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en);
+ nd_desc = &acpi_desc->nd_desc;
+ nd_desc->ndctl = nfit_test_ctl;
+}
+
+static void nfit_test1_setup(struct nfit_test *t)
+{
+ size_t size = t->nfit_size, offset;
+ void *nfit_buf = t->nfit_buf;
+ struct acpi_nfit_memory_map *memdev;
+ struct acpi_nfit_control_region *dcr;
+ struct acpi_nfit_system_address *spa;
+
+ nfit_test_init_header(nfit_buf, size);
+
+ offset = sizeof(struct acpi_table_nfit);
+ /* spa0 (flat range with no bdw aliasing) */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+ spa->range_index = 0+1;
+ spa->address = t->spa_set_dma[0];
+ spa->length = SPA2_SIZE;
+
+ offset += sizeof(*spa);
+ /* mem-region0 (spa0, dimm0) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = 0;
+ memdev->physical_id = 0;
+ memdev->region_id = 0;
+ memdev->range_index = 0+1;
+ memdev->region_index = 0+1;
+ memdev->region_size = SPA2_SIZE;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED
+ | ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED
+ | ACPI_NFIT_MEM_ARMED;
+
+ offset += sizeof(*memdev);
+ /* dcr-descriptor0 */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = sizeof(struct acpi_nfit_control_region);
+ dcr->region_index = 0+1;
+ dcr->vendor_id = 0xabcd;
+ dcr->device_id = 0;
+ dcr->revision_id = 1;
+ dcr->serial_number = ~0;
+ dcr->code = 0x201;
+ dcr->windows = 0;
+ dcr->window_size = 0;
+ dcr->command_offset = 0;
+ dcr->command_size = 0;
+ dcr->status_offset = 0;
+ dcr->status_size = 0;
+}
+
+static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
+ void *iobuf, u64 len, int rw)
+{
+ struct nfit_blk *nfit_blk = ndbr->blk_provider_data;
+ struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
+ struct nd_region *nd_region = &ndbr->nd_region;
+ unsigned int lane;
+
+ lane = nd_region_acquire_lane(nd_region);
+ if (rw)
+ memcpy(mmio->base + dpa, iobuf, len);
+ else
+ memcpy(iobuf, mmio->base + dpa, len);
+ nd_region_release_lane(nd_region, lane);
+
+ return 0;
+}
+
+static int nfit_test_probe(struct platform_device *pdev)
+{
+ struct nvdimm_bus_descriptor *nd_desc;
+ struct acpi_nfit_desc *acpi_desc;
+ struct device *dev = &pdev->dev;
+ struct nfit_test *nfit_test;
+ int rc;
+
+ nfit_test = to_nfit_test(&pdev->dev);
+
+ /* common alloc */
+ if (nfit_test->num_dcr) {
+ int num = nfit_test->num_dcr;
+
+ nfit_test->dimm = devm_kcalloc(dev, num, sizeof(void *),
+ GFP_KERNEL);
+ nfit_test->dimm_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t),
+ GFP_KERNEL);
+ nfit_test->flush = devm_kcalloc(dev, num, sizeof(void *),
+ GFP_KERNEL);
+ nfit_test->flush_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t),
+ GFP_KERNEL);
+ nfit_test->label = devm_kcalloc(dev, num, sizeof(void *),
+ GFP_KERNEL);
+ nfit_test->label_dma = devm_kcalloc(dev, num,
+ sizeof(dma_addr_t), GFP_KERNEL);
+ nfit_test->dcr = devm_kcalloc(dev, num,
+ sizeof(struct nfit_test_dcr *), GFP_KERNEL);
+ nfit_test->dcr_dma = devm_kcalloc(dev, num,
+ sizeof(dma_addr_t), GFP_KERNEL);
+ if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label
+ && nfit_test->label_dma && nfit_test->dcr
+ && nfit_test->dcr_dma && nfit_test->flush
+ && nfit_test->flush_dma)
+ /* pass */;
+ else
+ return -ENOMEM;
+ }
+
+ if (nfit_test->num_pm) {
+ int num = nfit_test->num_pm;
+
+ nfit_test->spa_set = devm_kcalloc(dev, num, sizeof(void *),
+ GFP_KERNEL);
+ nfit_test->spa_set_dma = devm_kcalloc(dev, num,
+ sizeof(dma_addr_t), GFP_KERNEL);
+ if (nfit_test->spa_set && nfit_test->spa_set_dma)
+ /* pass */;
+ else
+ return -ENOMEM;
+ }
+
+ /* per-nfit specific alloc */
+ if (nfit_test->alloc(nfit_test))
+ return -ENOMEM;
+
+ nfit_test->setup(nfit_test);
+ acpi_desc = &nfit_test->acpi_desc;
+ acpi_desc->dev = &pdev->dev;
+ acpi_desc->nfit = nfit_test->nfit_buf;
+ acpi_desc->blk_do_io = nfit_test_blk_do_io;
+ nd_desc = &acpi_desc->nd_desc;
+ nd_desc->attr_groups = acpi_nfit_attribute_groups;
+ acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc);
+ if (!acpi_desc->nvdimm_bus)
+ return -ENXIO;
+
+ rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size);
+ if (rc) {
+ nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+ return rc;
+ }
+
+ return 0;
+}
+
+static int nfit_test_remove(struct platform_device *pdev)
+{
+ struct nfit_test *nfit_test = to_nfit_test(&pdev->dev);
+ struct acpi_nfit_desc *acpi_desc = &nfit_test->acpi_desc;
+
+ nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+
+ return 0;
+}
+
+static void nfit_test_release(struct device *dev)
+{
+ struct nfit_test *nfit_test = to_nfit_test(dev);
+
+ kfree(nfit_test);
+}
+
+static const struct platform_device_id nfit_test_id[] = {
+ { KBUILD_MODNAME },
+ { },
+};
+
+static struct platform_driver nfit_test_driver = {
+ .probe = nfit_test_probe,
+ .remove = nfit_test_remove,
+ .driver = {
+ .name = KBUILD_MODNAME,
+ },
+ .id_table = nfit_test_id,
+};
+
+#ifdef CONFIG_CMA_SIZE_MBYTES
+#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
+#else
+#define CMA_SIZE_MBYTES 0
+#endif
+
+static __init int nfit_test_init(void)
+{
+ int rc, i;
+
+ nfit_test_setup(nfit_test_lookup);
+
+ for (i = 0; i < NUM_NFITS; i++) {
+ struct nfit_test *nfit_test;
+ struct platform_device *pdev;
+ static int once;
+
+ nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL);
+ if (!nfit_test) {
+ rc = -ENOMEM;
+ goto err_register;
+ }
+ INIT_LIST_HEAD(&nfit_test->resources);
+ switch (i) {
+ case 0:
+ nfit_test->num_pm = NUM_PM;
+ nfit_test->num_dcr = NUM_DCR;
+ nfit_test->alloc = nfit_test0_alloc;
+ nfit_test->setup = nfit_test0_setup;
+ break;
+ case 1:
+ nfit_test->num_pm = 1;
+ nfit_test->alloc = nfit_test1_alloc;
+ nfit_test->setup = nfit_test1_setup;
+ break;
+ default:
+ rc = -EINVAL;
+ goto err_register;
+ }
+ pdev = &nfit_test->pdev;
+ pdev->name = KBUILD_MODNAME;
+ pdev->id = i;
+ pdev->dev.release = nfit_test_release;
+ rc = platform_device_register(pdev);
+ if (rc) {
+ put_device(&pdev->dev);
+ goto err_register;
+ }
+
+ rc = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (rc)
+ goto err_register;
+
+ instances[i] = nfit_test;
+
+ if (!once++) {
+ dma_addr_t dma;
+ void *buf;
+
+ buf = dma_alloc_coherent(&pdev->dev, SZ_128M, &dma,
+ GFP_KERNEL);
+ if (!buf) {
+ rc = -ENOMEM;
+ dev_warn(&pdev->dev, "need 128M of free cma\n");
+ goto err_register;
+ }
+ dma_free_coherent(&pdev->dev, SZ_128M, buf, dma);
+ }
+ }
+
+ rc = platform_driver_register(&nfit_test_driver);
+ if (rc)
+ goto err_register;
+ return 0;
+
+ err_register:
+ for (i = 0; i < NUM_NFITS; i++)
+ if (instances[i])
+ platform_device_unregister(&instances[i]->pdev);
+ nfit_test_teardown();
+ return rc;
+}
+
+static __exit void nfit_test_exit(void)
+{
+ int i;
+
+ platform_driver_unregister(&nfit_test_driver);
+ for (i = 0; i < NUM_NFITS; i++)
+ platform_device_unregister(&instances[i]->pdev);
+ nfit_test_teardown();
+}
+
+module_init(nfit_test_init);
+module_exit(nfit_test_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
new file mode 100644
index 0000000..96c5e16
--- /dev/null
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#ifndef __NFIT_TEST_H__
+#define __NFIT_TEST_H__
+
+struct nfit_test_resource {
+ struct list_head list;
+ struct resource *res;
+ struct device *dev;
+ void *buf;
+};
+
+typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t);
+void __iomem *__wrap_ioremap_nocache(resource_size_t offset,
+ unsigned long size);
+void __wrap_iounmap(volatile void __iomem *addr);
+void nfit_test_setup(nfit_test_lookup_fn lookup);
+void nfit_test_teardown(void);
+#endif
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 95abddc..b8f12e0 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -4,6 +4,7 @@ TARGETS += efivarfs
TARGETS += exec
TARGETS += firmware
TARGETS += ftrace
+TARGETS += futex
TARGETS += kcmp
TARGETS += memfd
TARGETS += memory-hotplug
@@ -12,13 +13,19 @@ TARGETS += mqueue
TARGETS += net
TARGETS += powerpc
TARGETS += ptrace
+TARGETS += seccomp
TARGETS += size
TARGETS += sysctl
+ifneq (1, $(quicktest))
TARGETS += timers
+endif
TARGETS += user
+TARGETS += jumplabel
TARGETS += vm
TARGETS += x86
#Please keep the TARGETS list alphabetically sorted
+# Run "make quicktest=1 run_tests" or
+# "make quicktest=1 kselftest from top level Makefile
TARGETS_HOTPLUG = cpu-hotplug
TARGETS_HOTPLUG += memory-hotplug
@@ -27,7 +34,7 @@ TARGETS_HOTPLUG += memory-hotplug
# Makefile to avoid test build failures when test
# Makefile doesn't have explicit build rules.
ifeq (1,$(MAKELEVEL))
-undefine LDFLAGS
+override LDFLAGS =
override MAKEFLAGS =
endif
diff --git a/tools/testing/selftests/capabilities/.gitignore b/tools/testing/selftests/capabilities/.gitignore
new file mode 100644
index 0000000..b732dd0
--- /dev/null
+++ b/tools/testing/selftests/capabilities/.gitignore
@@ -0,0 +1,2 @@
+test_execve
+validate_cap
diff --git a/tools/testing/selftests/capabilities/Makefile b/tools/testing/selftests/capabilities/Makefile
new file mode 100644
index 0000000..8c8f0c1
--- /dev/null
+++ b/tools/testing/selftests/capabilities/Makefile
@@ -0,0 +1,18 @@
+all:
+
+include ../lib.mk
+
+.PHONY: all clean
+
+TARGETS := validate_cap test_execve
+TEST_PROGS := test_execve
+
+CFLAGS := -O2 -g -std=gnu99 -Wall -lcap-ng
+
+all: $(TARGETS)
+
+clean:
+ $(RM) $(TARGETS)
+
+$(TARGETS): %: %.c
+ $(CC) -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c
new file mode 100644
index 0000000..10a21a9
--- /dev/null
+++ b/tools/testing/selftests/capabilities/test_execve.c
@@ -0,0 +1,427 @@
+#define _GNU_SOURCE
+
+#include <cap-ng.h>
+#include <err.h>
+#include <linux/capability.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <sched.h>
+#include <sys/mount.h>
+#include <limits.h>
+#include <libgen.h>
+#include <malloc.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+
+#ifndef PR_CAP_AMBIENT
+#define PR_CAP_AMBIENT 47
+# define PR_CAP_AMBIENT_IS_SET 1
+# define PR_CAP_AMBIENT_RAISE 2
+# define PR_CAP_AMBIENT_LOWER 3
+# define PR_CAP_AMBIENT_CLEAR_ALL 4
+#endif
+
+static int nerrs;
+
+static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
+{
+ char buf[4096];
+ int fd;
+ ssize_t written;
+ int buf_len;
+
+ buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
+ if (buf_len < 0) {
+ err(1, "vsnprintf failed");
+ }
+ if (buf_len >= sizeof(buf)) {
+ errx(1, "vsnprintf output truncated");
+ }
+
+ fd = open(filename, O_WRONLY);
+ if (fd < 0) {
+ if ((errno == ENOENT) && enoent_ok)
+ return;
+ err(1, "open of %s failed", filename);
+ }
+ written = write(fd, buf, buf_len);
+ if (written != buf_len) {
+ if (written >= 0) {
+ errx(1, "short write to %s", filename);
+ } else {
+ err(1, "write to %s failed", filename);
+ }
+ }
+ if (close(fd) != 0) {
+ err(1, "close of %s failed", filename);
+ }
+}
+
+static void maybe_write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(true, filename, fmt, ap);
+ va_end(ap);
+}
+
+static void write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(false, filename, fmt, ap);
+ va_end(ap);
+}
+
+static bool create_and_enter_ns(uid_t inner_uid)
+{
+ uid_t outer_uid;
+ gid_t outer_gid;
+ int i;
+ bool have_outer_privilege;
+
+ outer_uid = getuid();
+ outer_gid = getgid();
+
+ /*
+ * TODO: If we're already root, we could skip creating the userns.
+ */
+
+ if (unshare(CLONE_NEWNS) == 0) {
+ printf("[NOTE]\tUsing global UIDs for tests\n");
+ if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0)
+ err(1, "PR_SET_KEEPCAPS");
+ if (setresuid(inner_uid, inner_uid, -1) != 0)
+ err(1, "setresuid");
+
+ // Re-enable effective caps
+ capng_get_caps_process();
+ for (i = 0; i < CAP_LAST_CAP; i++)
+ if (capng_have_capability(CAPNG_PERMITTED, i))
+ capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, i);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ err(1, "capng_apply");
+
+ have_outer_privilege = true;
+ } else if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == 0) {
+ printf("[NOTE]\tUsing a user namespace for tests\n");
+ maybe_write_file("/proc/self/setgroups", "deny");
+ write_file("/proc/self/uid_map", "%d %d 1", inner_uid, outer_uid);
+ write_file("/proc/self/gid_map", "0 %d 1", outer_gid);
+
+ have_outer_privilege = false;
+ } else {
+ errx(1, "must be root or be able to create a userns");
+ }
+
+ if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL) != 0)
+ err(1, "remount everything private");
+
+ return have_outer_privilege;
+}
+
+static void chdir_to_tmpfs(void)
+{
+ char cwd[PATH_MAX];
+ if (getcwd(cwd, sizeof(cwd)) != cwd)
+ err(1, "getcwd");
+
+ if (mount("private_tmp", ".", "tmpfs", 0, "mode=0777") != 0)
+ err(1, "mount private tmpfs");
+
+ if (chdir(cwd) != 0)
+ err(1, "chdir to private tmpfs");
+
+ if (umount2(".", MNT_DETACH) != 0)
+ err(1, "detach private tmpfs");
+}
+
+static void copy_fromat_to(int fromfd, const char *fromname, const char *toname)
+{
+ int from = openat(fromfd, fromname, O_RDONLY);
+ if (from == -1)
+ err(1, "open copy source");
+
+ int to = open(toname, O_CREAT | O_WRONLY | O_EXCL, 0700);
+
+ while (true) {
+ char buf[4096];
+ ssize_t sz = read(from, buf, sizeof(buf));
+ if (sz == 0)
+ break;
+ if (sz < 0)
+ err(1, "read");
+
+ if (write(to, buf, sz) != sz)
+ err(1, "write"); /* no short writes on tmpfs */
+ }
+
+ close(from);
+ close(to);
+}
+
+static bool fork_wait(void)
+{
+ pid_t child = fork();
+ if (child == 0) {
+ nerrs = 0;
+ return true;
+ } else if (child > 0) {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tChild died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild succeeded\n");
+ }
+
+ return false;
+ } else {
+ err(1, "fork");
+ }
+}
+
+static void exec_other_validate_cap(const char *name,
+ bool eff, bool perm, bool inh, bool ambient)
+{
+ execl(name, name, (eff ? "1" : "0"),
+ (perm ? "1" : "0"), (inh ? "1" : "0"), (ambient ? "1" : "0"),
+ NULL);
+ err(1, "execl");
+}
+
+static void exec_validate_cap(bool eff, bool perm, bool inh, bool ambient)
+{
+ exec_other_validate_cap("./validate_cap", eff, perm, inh, ambient);
+}
+
+static int do_tests(int uid, const char *our_path)
+{
+ bool have_outer_privilege = create_and_enter_ns(uid);
+
+ int ourpath_fd = open(our_path, O_RDONLY | O_DIRECTORY);
+ if (ourpath_fd == -1)
+ err(1, "open '%s'", our_path);
+
+ chdir_to_tmpfs();
+
+ copy_fromat_to(ourpath_fd, "validate_cap", "validate_cap");
+
+ if (have_outer_privilege) {
+ uid_t gid = getegid();
+
+ copy_fromat_to(ourpath_fd, "validate_cap",
+ "validate_cap_suidroot");
+ if (chown("validate_cap_suidroot", 0, -1) != 0)
+ err(1, "chown");
+ if (chmod("validate_cap_suidroot", S_ISUID | 0700) != 0)
+ err(1, "chmod");
+
+ copy_fromat_to(ourpath_fd, "validate_cap",
+ "validate_cap_suidnonroot");
+ if (chown("validate_cap_suidnonroot", uid + 1, -1) != 0)
+ err(1, "chown");
+ if (chmod("validate_cap_suidnonroot", S_ISUID | 0700) != 0)
+ err(1, "chmod");
+
+ copy_fromat_to(ourpath_fd, "validate_cap",
+ "validate_cap_sgidroot");
+ if (chown("validate_cap_sgidroot", -1, 0) != 0)
+ err(1, "chown");
+ if (chmod("validate_cap_sgidroot", S_ISGID | 0710) != 0)
+ err(1, "chmod");
+
+ copy_fromat_to(ourpath_fd, "validate_cap",
+ "validate_cap_sgidnonroot");
+ if (chown("validate_cap_sgidnonroot", -1, gid + 1) != 0)
+ err(1, "chown");
+ if (chmod("validate_cap_sgidnonroot", S_ISGID | 0710) != 0)
+ err(1, "chmod");
+}
+
+ capng_get_caps_process();
+
+ /* Make sure that i starts out clear */
+ capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ err(1, "capng_apply");
+
+ if (uid == 0) {
+ printf("[RUN]\tRoot => ep\n");
+ if (fork_wait())
+ exec_validate_cap(true, true, false, false);
+ } else {
+ printf("[RUN]\tNon-root => no caps\n");
+ if (fork_wait())
+ exec_validate_cap(false, false, false, false);
+ }
+
+ printf("[OK]\tCheck cap_ambient manipulation rules\n");
+
+ /* We should not be able to add ambient caps yet. */
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != -1 || errno != EPERM) {
+ if (errno == EINVAL)
+ printf("[FAIL]\tPR_CAP_AMBIENT_RAISE isn't supported\n");
+ else
+ printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed eith EPERM on a non-inheritable cap\n");
+ return 1;
+ }
+ printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-inheritable cap\n");
+
+ capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_RAW);
+ capng_update(CAPNG_DROP, CAPNG_PERMITTED, CAP_NET_RAW);
+ capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, CAP_NET_RAW);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ err(1, "capng_apply");
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_RAW, 0, 0, 0) != -1 || errno != EPERM) {
+ printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed on a non-permitted cap\n");
+ return 1;
+ }
+ printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-permitted cap\n");
+
+ capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ err(1, "capng_apply");
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
+ printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have succeeded\n");
+ return 1;
+ }
+ printf("[OK]\tPR_CAP_AMBIENT_RAISE worked\n");
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 1) {
+ printf("[FAIL]\tPR_CAP_AMBIENT_IS_SET is broken\n");
+ return 1;
+ }
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0, 0) != 0)
+ err(1, "PR_CAP_AMBIENT_CLEAR_ALL");
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
+ printf("[FAIL]\tPR_CAP_AMBIENT_CLEAR_ALL didn't work\n");
+ return 1;
+ }
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0)
+ err(1, "PR_CAP_AMBIENT_RAISE");
+
+ capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ err(1, "capng_apply");
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
+ printf("[FAIL]\tDropping I should have dropped A\n");
+ return 1;
+ }
+
+ printf("[OK]\tBasic manipulation appears to work\n");
+
+ capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ err(1, "capng_apply");
+ if (uid == 0) {
+ printf("[RUN]\tRoot +i => eip\n");
+ if (fork_wait())
+ exec_validate_cap(true, true, true, false);
+ } else {
+ printf("[RUN]\tNon-root +i => i\n");
+ if (fork_wait())
+ exec_validate_cap(false, false, true, false);
+ }
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0)
+ err(1, "PR_CAP_AMBIENT_RAISE");
+
+ printf("[RUN]\tUID %d +ia => eipa\n", uid);
+ if (fork_wait())
+ exec_validate_cap(true, true, true, true);
+
+ /* The remaining tests need real privilege */
+
+ if (!have_outer_privilege) {
+ printf("[SKIP]\tSUID/SGID tests (needs privilege)\n");
+ goto done;
+ }
+
+ if (uid == 0) {
+ printf("[RUN]\tRoot +ia, suidroot => eipa\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_suidroot",
+ true, true, true, true);
+
+ printf("[RUN]\tRoot +ia, suidnonroot => ip\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_suidnonroot",
+ false, true, true, false);
+
+ printf("[RUN]\tRoot +ia, sgidroot => eipa\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_sgidroot",
+ true, true, true, true);
+
+ if (fork_wait()) {
+ printf("[RUN]\tRoot, gid != 0, +ia, sgidroot => eip\n");
+ if (setresgid(1, 1, 1) != 0)
+ err(1, "setresgid");
+ exec_other_validate_cap("./validate_cap_sgidroot",
+ true, true, true, false);
+ }
+
+ printf("[RUN]\tRoot +ia, sgidnonroot => eip\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_sgidnonroot",
+ true, true, true, false);
+ } else {
+ printf("[RUN]\tNon-root +ia, sgidnonroot => i\n");
+ exec_other_validate_cap("./validate_cap_sgidnonroot",
+ false, false, true, false);
+
+ if (fork_wait()) {
+ printf("[RUN]\tNon-root +ia, sgidroot => i\n");
+ if (setresgid(1, 1, 1) != 0)
+ err(1, "setresgid");
+ exec_other_validate_cap("./validate_cap_sgidroot",
+ false, false, true, false);
+ }
+ }
+
+done:
+ return nerrs ? 1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+ char *tmp1, *tmp2, *our_path;
+
+ /* Find our path */
+ tmp1 = strdup(argv[0]);
+ if (!tmp1)
+ err(1, "strdup");
+ tmp2 = dirname(tmp1);
+ our_path = strdup(tmp2);
+ if (!our_path)
+ err(1, "strdup");
+ free(tmp1);
+
+ if (fork_wait()) {
+ printf("[RUN]\t+++ Tests with uid == 0 +++\n");
+ return do_tests(0, our_path);
+ }
+
+ if (fork_wait()) {
+ printf("[RUN]\t+++ Tests with uid != 0 +++\n");
+ return do_tests(1, our_path);
+ }
+
+ return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c
new file mode 100644
index 0000000..dd3c45f
--- /dev/null
+++ b/tools/testing/selftests/capabilities/validate_cap.c
@@ -0,0 +1,73 @@
+#include <cap-ng.h>
+#include <err.h>
+#include <linux/capability.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/prctl.h>
+#include <sys/auxv.h>
+
+#ifndef PR_CAP_AMBIENT
+#define PR_CAP_AMBIENT 47
+# define PR_CAP_AMBIENT_IS_SET 1
+# define PR_CAP_AMBIENT_RAISE 2
+# define PR_CAP_AMBIENT_LOWER 3
+# define PR_CAP_AMBIENT_CLEAR_ALL 4
+#endif
+
+#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 19)
+# define HAVE_GETAUXVAL
+#endif
+
+static bool bool_arg(char **argv, int i)
+{
+ if (!strcmp(argv[i], "0"))
+ return false;
+ else if (!strcmp(argv[i], "1"))
+ return true;
+ else
+ errx(1, "wrong argv[%d]", i);
+}
+
+int main(int argc, char **argv)
+{
+ const char *atsec = "";
+
+ /*
+ * Be careful just in case a setgid or setcapped copy of this
+ * helper gets out.
+ */
+
+ if (argc != 5)
+ errx(1, "wrong argc");
+
+#ifdef HAVE_GETAUXVAL
+ if (getauxval(AT_SECURE))
+ atsec = " (AT_SECURE is set)";
+ else
+ atsec = " (AT_SECURE is not set)";
+#endif
+
+ capng_get_caps_process();
+
+ if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) {
+ printf("[FAIL]\tWrong effective state%s\n", atsec);
+ return 1;
+ }
+ if (capng_have_capability(CAPNG_PERMITTED, CAP_NET_BIND_SERVICE) != bool_arg(argv, 2)) {
+ printf("[FAIL]\tWrong permitted state%s\n", atsec);
+ return 1;
+ }
+ if (capng_have_capability(CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 3)) {
+ printf("[FAIL]\tWrong inheritable state%s\n", atsec);
+ return 1;
+ }
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != bool_arg(argv, 4)) {
+ printf("[FAIL]\tWrong ambient state%s\n", atsec);
+ return 1;
+ }
+
+ printf("[OK]\tCapabilities after execve were correct\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 4edb7d0..6b76bfd 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -1,6 +1,6 @@
CFLAGS = -Wall
BINARIES = execveat
-DEPS = execveat.symlink execveat.denatured script subdir
+DEPS = execveat.symlink execveat.denatured script
all: $(BINARIES) $(DEPS)
subdir:
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index 3fc6c10..c4366dc 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -9,7 +9,15 @@ modprobe test_firmware
DIR=/sys/devices/virtual/misc/test_firmware
-OLD_TIMEOUT=$(cat /sys/class/firmware/timeout)
+# CONFIG_FW_LOADER_USER_HELPER has a sysfs class under /sys/class/firmware/
+# These days no one enables CONFIG_FW_LOADER_USER_HELPER so check for that
+# as an indicator for CONFIG_FW_LOADER_USER_HELPER.
+HAS_FW_LOADER_USER_HELPER=$(if [ -d /sys/class/firmware/ ]; then echo yes; else echo no; fi)
+
+if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ OLD_TIMEOUT=$(cat /sys/class/firmware/timeout)
+fi
+
OLD_FWPATH=$(cat /sys/module/firmware_class/parameters/path)
FWPATH=$(mktemp -d)
@@ -17,7 +25,9 @@ FW="$FWPATH/test-firmware.bin"
test_finish()
{
- echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
+ if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
+ fi
echo -n "$OLD_PATH" >/sys/module/firmware_class/parameters/path
rm -f "$FW"
rmdir "$FWPATH"
@@ -25,8 +35,11 @@ test_finish()
trap "test_finish" EXIT
-# Turn down the timeout so failures don't take so long.
-echo 1 >/sys/class/firmware/timeout
+if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ # Turn down the timeout so failures don't take so long.
+ echo 1 >/sys/class/firmware/timeout
+fi
+
# Set the kernel search path.
echo -n "$FWPATH" >/sys/module/firmware_class/parameters/path
@@ -41,7 +54,9 @@ if diff -q "$FW" /dev/test_firmware >/dev/null ; then
echo "$0: firmware was not expected to match" >&2
exit 1
else
- echo "$0: timeout works"
+ if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ echo "$0: timeout works"
+ fi
fi
# This should succeed via kernel load or will fail after 1 second after
diff --git a/tools/testing/selftests/firmware/fw_userhelper.sh b/tools/testing/selftests/firmware/fw_userhelper.sh
index 6efbade..b9983f8 100755
--- a/tools/testing/selftests/firmware/fw_userhelper.sh
+++ b/tools/testing/selftests/firmware/fw_userhelper.sh
@@ -9,7 +9,17 @@ modprobe test_firmware
DIR=/sys/devices/virtual/misc/test_firmware
-OLD_TIMEOUT=$(cat /sys/class/firmware/timeout)
+# CONFIG_FW_LOADER_USER_HELPER has a sysfs class under /sys/class/firmware/
+# These days no one enables CONFIG_FW_LOADER_USER_HELPER so check for that
+# as an indicator for CONFIG_FW_LOADER_USER_HELPER.
+HAS_FW_LOADER_USER_HELPER=$(if [ -d /sys/class/firmware/ ]; then echo yes; else echo no; fi)
+
+if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ OLD_TIMEOUT=$(cat /sys/class/firmware/timeout)
+else
+ echo "usermode helper disabled so ignoring test"
+ exit 0
+fi
FWPATH=$(mktemp -d)
FW="$FWPATH/test-firmware.bin"
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
index 3467206..0acbeca 100644
--- a/tools/testing/selftests/ftrace/Makefile
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -1,6 +1,7 @@
all:
TEST_PROGS := ftracetest
+TEST_DIRS := test.d/
include ../lib.mk
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
new file mode 100644
index 0000000..6a17529
--- /dev/null
+++ b/tools/testing/selftests/futex/Makefile
@@ -0,0 +1,29 @@
+SUBDIRS := functional
+
+TEST_PROGS := run.sh
+
+.PHONY: all clean
+all:
+ for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done
+
+include ../lib.mk
+
+override define RUN_TESTS
+ ./run.sh
+endef
+
+override define INSTALL_RULE
+ mkdir -p $(INSTALL_PATH)
+ install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+
+ @for SUBDIR in $(SUBDIRS); do \
+ $(MAKE) -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
+ done;
+endef
+
+override define EMIT_TESTS
+ echo "./run.sh"
+endef
+
+clean:
+ for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done
diff --git a/tools/testing/selftests/futex/README b/tools/testing/selftests/futex/README
new file mode 100644
index 0000000..3224a04
--- /dev/null
+++ b/tools/testing/selftests/futex/README
@@ -0,0 +1,62 @@
+Futex Test
+==========
+Futex Test is intended to thoroughly test the Linux kernel futex system call
+API.
+
+Functional tests shall test the documented behavior of the futex operation
+code under test. This includes checking for proper behavior under normal use,
+odd corner cases, regression tests, and abject abuse and misuse.
+
+Futextest will also provide example implementation of mutual exclusion
+primitives. These can be used as is in user applications or can serve as
+examples for system libraries. These will likely be added to either a new lib/
+directory or purely as header files under include/, I'm leaning toward the
+latter.
+
+Quick Start
+-----------
+# make
+# ./run.sh
+
+Design and Implementation Goals
+-------------------------------
+o Tests should be as self contained as is practical so as to facilitate sharing
+ the individual tests on mailing list discussions and bug reports.
+o The build system shall remain as simple as possible, avoiding any archive or
+ shared object building and linking.
+o Where possible, any helper functions or other package-wide code shall be
+ implemented in header files, avoiding the need to compile intermediate object
+ files.
+o External dependendencies shall remain as minimal as possible. Currently gcc
+ and glibc are the only dependencies.
+o Tests return 0 for success and < 0 for failure.
+
+Output Formatting
+-----------------
+Test output shall be easily parsable by both human and machine. Title and
+results are printed to stdout, while intermediate ERROR or FAIL messages are
+sent to stderr. Tests shall support the -c option to print PASS, FAIL, and
+ERROR strings in color for easy visual parsing. Output shall conform to the
+following format:
+
+test_name: Description of the test
+ Arguments: arg1=val1 #units specified for clarity where appropriate
+ ERROR: Description of unexpected error
+ FAIL: Reason for test failure
+ # FIXME: Perhaps an " INFO: informational message" option would be
+ # useful here. Using -v to toggle it them on and off, as with -c.
+ # there may be multiple ERROR or FAIL messages
+Result: (PASS|FAIL|ERROR)
+
+Naming
+------
+o FIXME: decide on a sane test naming scheme. Currently the tests are named
+ based on the primary futex operation they test. Eventually this will become a
+ problem as we intend to write multiple tests which collide in this namespace.
+ Perhaps something like "wait-wake-1" "wait-wake-2" is adequate, leaving the
+ detailed description in the test source and the output.
+
+Coding Style
+------------
+o The Futex Test project adheres to the coding standards set forth by Linux
+ kernel as defined in the Linux source Documentation/CodingStyle.
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
new file mode 100644
index 0000000..a09f570
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -0,0 +1,7 @@
+futex_requeue_pi
+futex_requeue_pi_mismatched_ops
+futex_requeue_pi_signal_restart
+futex_wait_private_mapped_file
+futex_wait_timeout
+futex_wait_uninitialized_heap
+futex_wait_wouldblock
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
new file mode 100644
index 0000000..9d6b75e
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -0,0 +1,25 @@
+INCLUDES := -I../include -I../../
+CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
+LDFLAGS := $(LDFLAGS) -pthread -lrt
+
+HEADERS := ../include/futextest.h
+TARGETS := \
+ futex_wait_timeout \
+ futex_wait_wouldblock \
+ futex_requeue_pi \
+ futex_requeue_pi_signal_restart \
+ futex_requeue_pi_mismatched_ops \
+ futex_wait_uninitialized_heap \
+ futex_wait_private_mapped_file
+
+TEST_PROGS := $(TARGETS) run.sh
+
+.PHONY: all clean
+all: $(TARGETS)
+
+$(TARGETS): $(HEADERS)
+
+include ../../lib.mk
+
+clean:
+ rm -f $(TARGETS)
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
new file mode 100644
index 0000000..3da06ad
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -0,0 +1,409 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2006-2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * This test excercises the futex syscall op codes needed for requeuing
+ * priority inheritance aware POSIX condition variables and mutexes.
+ *
+ * AUTHORS
+ * Sripathi Kodi <sripathik@in.ibm.com>
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2008-Jan-13: Initial version by Sripathi Kodi <sripathik@in.ibm.com>
+ * 2009-Nov-6: futex test adaptation by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include "atomic.h"
+#include "futextest.h"
+#include "logging.h"
+
+#define MAX_WAKE_ITERS 1000
+#define THREAD_MAX 10
+#define SIGNAL_PERIOD_US 100
+
+atomic_t waiters_blocked = ATOMIC_INITIALIZER;
+atomic_t waiters_woken = ATOMIC_INITIALIZER;
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+futex_t wake_complete = FUTEX_INITIALIZER;
+
+/* Test option defaults */
+static long timeout_ns;
+static int broadcast;
+static int owner;
+static int locked;
+
+struct thread_arg {
+ long id;
+ struct timespec *timeout;
+ int lock;
+ int ret;
+};
+#define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 }
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -b Broadcast wakeup (all waiters)\n");
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -l Lock the pi futex across requeue\n");
+ printf(" -o Use a third party pi futex owner during requeue (cancels -l)\n");
+ printf(" -t N Timeout in nanoseconds (default: 0)\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
+ int policy, int prio)
+{
+ int ret;
+ struct sched_param schedp;
+ pthread_attr_t attr;
+
+ pthread_attr_init(&attr);
+ memset(&schedp, 0, sizeof(schedp));
+
+ ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+ if (ret) {
+ error("pthread_attr_setinheritsched\n", ret);
+ return -1;
+ }
+
+ ret = pthread_attr_setschedpolicy(&attr, policy);
+ if (ret) {
+ error("pthread_attr_setschedpolicy\n", ret);
+ return -1;
+ }
+
+ schedp.sched_priority = prio;
+ ret = pthread_attr_setschedparam(&attr, &schedp);
+ if (ret) {
+ error("pthread_attr_setschedparam\n", ret);
+ return -1;
+ }
+
+ ret = pthread_create(pth, &attr, func, arg);
+ if (ret) {
+ error("pthread_create\n", ret);
+ return -1;
+ }
+ return 0;
+}
+
+
+void *waiterfn(void *arg)
+{
+ struct thread_arg *args = (struct thread_arg *)arg;
+ futex_t old_val;
+
+ info("Waiter %ld: running\n", args->id);
+ /* Each thread sleeps for a different amount of time
+ * This is to avoid races, because we don't lock the
+ * external mutex here */
+ usleep(1000 * (long)args->id);
+
+ old_val = f1;
+ atomic_inc(&waiters_blocked);
+ info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n",
+ &f1, f1, &f2);
+ args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout,
+ FUTEX_PRIVATE_FLAG);
+
+ info("waiter %ld woke with %d %s\n", args->id, args->ret,
+ args->ret < 0 ? strerror(errno) : "");
+ atomic_inc(&waiters_woken);
+ if (args->ret < 0) {
+ if (args->timeout && errno == ETIMEDOUT)
+ args->ret = 0;
+ else {
+ args->ret = RET_ERROR;
+ error("futex_wait_requeue_pi\n", errno);
+ }
+ futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+ }
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+ info("Waiter %ld: exiting with %d\n", args->id, args->ret);
+ pthread_exit((void *)&args->ret);
+}
+
+void *broadcast_wakerfn(void *arg)
+{
+ struct thread_arg *args = (struct thread_arg *)arg;
+ int nr_requeue = INT_MAX;
+ int task_count = 0;
+ futex_t old_val;
+ int nr_wake = 1;
+ int i = 0;
+
+ info("Waker: waiting for waiters to block\n");
+ while (waiters_blocked.val < THREAD_MAX)
+ usleep(1000);
+ usleep(1000);
+
+ info("Waker: Calling broadcast\n");
+ if (args->lock) {
+ info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2);
+ futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+ }
+ continue_requeue:
+ old_val = f1;
+ args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue,
+ FUTEX_PRIVATE_FLAG);
+ if (args->ret < 0) {
+ args->ret = RET_ERROR;
+ error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+ } else if (++i < MAX_WAKE_ITERS) {
+ task_count += args->ret;
+ if (task_count < THREAD_MAX - waiters_woken.val)
+ goto continue_requeue;
+ } else {
+ error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n",
+ 0, MAX_WAKE_ITERS, task_count, THREAD_MAX);
+ args->ret = RET_ERROR;
+ }
+
+ futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG);
+
+ if (args->lock)
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+ if (args->ret > 0)
+ args->ret = task_count;
+
+ info("Waker: exiting with %d\n", args->ret);
+ pthread_exit((void *)&args->ret);
+}
+
+void *signal_wakerfn(void *arg)
+{
+ struct thread_arg *args = (struct thread_arg *)arg;
+ unsigned int old_val;
+ int nr_requeue = 0;
+ int task_count = 0;
+ int nr_wake = 1;
+ int i = 0;
+
+ info("Waker: waiting for waiters to block\n");
+ while (waiters_blocked.val < THREAD_MAX)
+ usleep(1000);
+ usleep(1000);
+
+ while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) {
+ info("task_count: %d, waiters_woken: %d\n",
+ task_count, waiters_woken.val);
+ if (args->lock) {
+ info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n",
+ f2, &f2);
+ futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+ }
+ info("Waker: Calling signal\n");
+ /* cond_signal */
+ old_val = f1;
+ args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2,
+ nr_wake, nr_requeue,
+ FUTEX_PRIVATE_FLAG);
+ if (args->ret < 0)
+ args->ret = -errno;
+ info("futex: %x\n", f2);
+ if (args->lock) {
+ info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n",
+ f2, &f2);
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+ }
+ info("futex: %x\n", f2);
+ if (args->ret < 0) {
+ error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+ args->ret = RET_ERROR;
+ break;
+ }
+
+ task_count += args->ret;
+ usleep(SIGNAL_PERIOD_US);
+ i++;
+ /* we have to loop at least THREAD_MAX times */
+ if (i > MAX_WAKE_ITERS + THREAD_MAX) {
+ error("max signaling iterations (%d) reached, giving up on pending waiters.\n",
+ 0, MAX_WAKE_ITERS + THREAD_MAX);
+ args->ret = RET_ERROR;
+ break;
+ }
+ }
+
+ futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG);
+
+ if (args->ret >= 0)
+ args->ret = task_count;
+
+ info("Waker: exiting with %d\n", args->ret);
+ info("Waker: waiters_woken: %d\n", waiters_woken.val);
+ pthread_exit((void *)&args->ret);
+}
+
+void *third_party_blocker(void *arg)
+{
+ struct thread_arg *args = (struct thread_arg *)arg;
+ int ret2 = 0;
+
+ args->ret = futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+ if (args->ret)
+ goto out;
+ args->ret = futex_wait(&wake_complete, wake_complete, NULL,
+ FUTEX_PRIVATE_FLAG);
+ ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+ out:
+ if (args->ret || ret2) {
+ error("third_party_blocker() futex error", 0);
+ args->ret = RET_ERROR;
+ }
+
+ pthread_exit((void *)&args->ret);
+}
+
+int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
+{
+ void *(*wakerfn)(void *) = signal_wakerfn;
+ struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER;
+ struct thread_arg waker_arg = THREAD_ARG_INITIALIZER;
+ pthread_t waiter[THREAD_MAX], waker, blocker;
+ struct timespec ts, *tsp = NULL;
+ struct thread_arg args[THREAD_MAX];
+ int *waiter_ret;
+ int i, ret = RET_PASS;
+
+ if (timeout_ns) {
+ time_t secs;
+
+ info("timeout_ns = %ld\n", timeout_ns);
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts);
+ secs = (ts.tv_nsec + timeout_ns) / 1000000000;
+ ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000;
+ ts.tv_sec += secs;
+ info("ts.tv_sec = %ld\n", ts.tv_sec);
+ info("ts.tv_nsec = %ld\n", ts.tv_nsec);
+ tsp = &ts;
+ }
+
+ if (broadcast)
+ wakerfn = broadcast_wakerfn;
+
+ if (third_party_owner) {
+ if (create_rt_thread(&blocker, third_party_blocker,
+ (void *)&blocker_arg, SCHED_FIFO, 1)) {
+ error("Creating third party blocker thread failed\n",
+ errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+ }
+
+ atomic_set(&waiters_woken, 0);
+ for (i = 0; i < THREAD_MAX; i++) {
+ args[i].id = i;
+ args[i].timeout = tsp;
+ info("Starting thread %d\n", i);
+ if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i],
+ SCHED_FIFO, 1)) {
+ error("Creating waiting thread failed\n", errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+ }
+ waker_arg.lock = lock;
+ if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg,
+ SCHED_FIFO, 1)) {
+ error("Creating waker thread failed\n", errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+
+ /* Wait for threads to finish */
+ /* Store the first error or failure encountered in waiter_ret */
+ waiter_ret = &args[0].ret;
+ for (i = 0; i < THREAD_MAX; i++)
+ pthread_join(waiter[i],
+ *waiter_ret ? NULL : (void **)&waiter_ret);
+
+ if (third_party_owner)
+ pthread_join(blocker, NULL);
+ pthread_join(waker, NULL);
+
+out:
+ if (!ret) {
+ if (*waiter_ret)
+ ret = *waiter_ret;
+ else if (waker_arg.ret < 0)
+ ret = waker_arg.ret;
+ else if (blocker_arg.ret)
+ ret = blocker_arg.ret;
+ }
+
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ int c, ret;
+
+ while ((c = getopt(argc, argv, "bchlot:v:")) != -1) {
+ switch (c) {
+ case 'b':
+ broadcast = 1;
+ break;
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'l':
+ locked = 1;
+ break;
+ case 'o':
+ owner = 1;
+ locked = 0;
+ break;
+ case 't':
+ timeout_ns = atoi(optarg);
+ break;
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ printf("%s: Test requeue functionality\n", basename(argv[0]));
+ printf("\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
+ broadcast, locked, owner, timeout_ns);
+
+ /*
+ * FIXME: unit_test is obsolete now that we parse options and the
+ * various style of runs are done by run.sh - simplify the code and move
+ * unit_test into main()
+ */
+ ret = unit_test(broadcast, locked, owner, timeout_ns);
+
+ print_result(ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
new file mode 100644
index 0000000..d5e4f2c
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
@@ -0,0 +1,135 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * 1. Block a thread using FUTEX_WAIT
+ * 2. Attempt to use FUTEX_CMP_REQUEUE_PI on the futex from 1.
+ * 3. The kernel must detect the mismatch and return -EINVAL.
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+int child_ret = 0;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *blocking_child(void *arg)
+{
+ child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG);
+ if (child_ret < 0) {
+ child_ret = -errno;
+ error("futex_wait\n", errno);
+ }
+ return (void *)&child_ret;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = RET_PASS;
+ pthread_t child;
+ int c;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ printf("%s: Detect mismatched requeue_pi operations\n",
+ basename(argv[0]));
+
+ if (pthread_create(&child, NULL, blocking_child, NULL)) {
+ error("pthread_create\n", errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+ /* Allow the child to block in the kernel. */
+ sleep(1);
+
+ /*
+ * The kernel should detect the waiter did not setup the
+ * q->requeue_pi_key and return -EINVAL. If it does not,
+ * it likely gave the lock to the child, which is now hung
+ * in the kernel.
+ */
+ ret = futex_cmp_requeue_pi(&f1, f1, &f2, 1, 0, FUTEX_PRIVATE_FLAG);
+ if (ret < 0) {
+ if (errno == EINVAL) {
+ /*
+ * The kernel correctly detected the mismatched
+ * requeue_pi target and aborted. Wake the child with
+ * FUTEX_WAKE.
+ */
+ ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG);
+ if (ret == 1) {
+ ret = RET_PASS;
+ } else if (ret < 0) {
+ error("futex_wake\n", errno);
+ ret = RET_ERROR;
+ } else {
+ error("futex_wake did not wake the child\n", 0);
+ ret = RET_ERROR;
+ }
+ } else {
+ error("futex_cmp_requeue_pi\n", errno);
+ ret = RET_ERROR;
+ }
+ } else if (ret > 0) {
+ fail("futex_cmp_requeue_pi failed to detect the mismatch\n");
+ ret = RET_FAIL;
+ } else {
+ error("futex_cmp_requeue_pi found no waiters\n", 0);
+ ret = RET_ERROR;
+ }
+
+ pthread_join(child, NULL);
+
+ if (!ret)
+ ret = child_ret;
+
+ out:
+ /* If the kernel crashes, we shouldn't return at all. */
+ print_result(ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
new file mode 100644
index 0000000..3d7dc6a
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
@@ -0,0 +1,223 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2006-2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * This test exercises the futex_wait_requeue_pi() signal handling both
+ * before and after the requeue. The first should be restarted by the
+ * kernel. The latter should return EWOULDBLOCK to the waiter.
+ *
+ * AUTHORS
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2008-May-5: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "atomic.h"
+#include "futextest.h"
+#include "logging.h"
+
+#define DELAY_US 100
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+atomic_t requeued = ATOMIC_INITIALIZER;
+
+int waiter_ret = 0;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
+ int policy, int prio)
+{
+ struct sched_param schedp;
+ pthread_attr_t attr;
+ int ret;
+
+ pthread_attr_init(&attr);
+ memset(&schedp, 0, sizeof(schedp));
+
+ ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+ if (ret) {
+ error("pthread_attr_setinheritsched\n", ret);
+ return -1;
+ }
+
+ ret = pthread_attr_setschedpolicy(&attr, policy);
+ if (ret) {
+ error("pthread_attr_setschedpolicy\n", ret);
+ return -1;
+ }
+
+ schedp.sched_priority = prio;
+ ret = pthread_attr_setschedparam(&attr, &schedp);
+ if (ret) {
+ error("pthread_attr_setschedparam\n", ret);
+ return -1;
+ }
+
+ ret = pthread_create(pth, &attr, func, arg);
+ if (ret) {
+ error("pthread_create\n", ret);
+ return -1;
+ }
+ return 0;
+}
+
+void handle_signal(int signo)
+{
+ info("signal received %s requeue\n",
+ requeued.val ? "after" : "prior to");
+}
+
+void *waiterfn(void *arg)
+{
+ unsigned int old_val;
+ int res;
+
+ waiter_ret = RET_PASS;
+
+ info("Waiter running\n");
+ info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+ old_val = f1;
+ res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL,
+ FUTEX_PRIVATE_FLAG);
+ if (!requeued.val || errno != EWOULDBLOCK) {
+ fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n",
+ res, strerror(errno));
+ info("w2:futex: %x\n", f2);
+ if (!res)
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+ waiter_ret = RET_FAIL;
+ }
+
+ info("Waiter exiting with %d\n", waiter_ret);
+ pthread_exit(NULL);
+}
+
+
+int main(int argc, char *argv[])
+{
+ unsigned int old_val;
+ struct sigaction sa;
+ pthread_t waiter;
+ int c, res, ret = RET_PASS;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ printf("%s: Test signal handling during requeue_pi\n",
+ basename(argv[0]));
+ printf("\tArguments: <none>\n");
+
+ sa.sa_handler = handle_signal;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = 0;
+ if (sigaction(SIGUSR1, &sa, NULL)) {
+ error("sigaction\n", errno);
+ exit(1);
+ }
+
+ info("m1:f2: %x\n", f2);
+ info("Creating waiter\n");
+ res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1);
+ if (res) {
+ error("Creating waiting thread failed", res);
+ ret = RET_ERROR;
+ goto out;
+ }
+
+ info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+ info("m2:f2: %x\n", f2);
+ futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG);
+ info("m3:f2: %x\n", f2);
+
+ while (1) {
+ /*
+ * signal the waiter before requeue, waiter should automatically
+ * restart futex_wait_requeue_pi() in the kernel. Wait for the
+ * waiter to block on f1 again.
+ */
+ info("Issuing SIGUSR1 to waiter\n");
+ pthread_kill(waiter, SIGUSR1);
+ usleep(DELAY_US);
+
+ info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n");
+ old_val = f1;
+ res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0,
+ FUTEX_PRIVATE_FLAG);
+ /*
+ * If res is non-zero, we either requeued the waiter or hit an
+ * error, break out and handle it. If it is zero, then the
+ * signal may have hit before the the waiter was blocked on f1.
+ * Try again.
+ */
+ if (res > 0) {
+ atomic_set(&requeued, 1);
+ break;
+ } else if (res < 0) {
+ error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+ ret = RET_ERROR;
+ break;
+ }
+ }
+ info("m4:f2: %x\n", f2);
+
+ /*
+ * Signal the waiter after requeue, waiter should return from
+ * futex_wait_requeue_pi() with EWOULDBLOCK. Join the thread here so the
+ * futex_unlock_pi() can't happen before the signal wakeup is detected
+ * in the kernel.
+ */
+ info("Issuing SIGUSR1 to waiter\n");
+ pthread_kill(waiter, SIGUSR1);
+ info("Waiting for waiter to return\n");
+ pthread_join(waiter, NULL);
+
+ info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2);
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+ info("m5:f2: %x\n", f2);
+
+ out:
+ if (ret == RET_PASS && waiter_ret)
+ ret = waiter_ret;
+
+ print_result(ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
new file mode 100644
index 0000000..5f687f2
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
@@ -0,0 +1,125 @@
+/******************************************************************************
+ *
+ * Copyright FUJITSU LIMITED 2010
+ * Copyright KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Internally, Futex has two handling mode, anon and file. The private file
+ * mapping is special. At first it behave as file, but after write anything
+ * it behave as anon. This test is intent to test such case.
+ *
+ * AUTHOR
+ * KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * HISTORY
+ * 2010-Jan-6: Initial version by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ *****************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <libgen.h>
+#include <signal.h>
+
+#include "logging.h"
+#include "futextest.h"
+
+#define PAGE_SZ 4096
+
+char pad[PAGE_SZ] = {1};
+futex_t val = 1;
+char pad2[PAGE_SZ] = {1};
+
+#define WAKE_WAIT_US 3000000
+struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0};
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *thr_futex_wait(void *arg)
+{
+ int ret;
+
+ info("futex wait\n");
+ ret = futex_wait(&val, 1, &wait_timeout, 0);
+ if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) {
+ error("futex error.\n", errno);
+ print_result(RET_ERROR);
+ exit(RET_ERROR);
+ }
+
+ if (ret && errno == ETIMEDOUT)
+ fail("waiter timedout\n");
+
+ info("futex_wait: ret = %d, errno = %d\n", ret, errno);
+
+ return NULL;
+}
+
+int main(int argc, char **argv)
+{
+ pthread_t thr;
+ int ret = RET_PASS;
+ int res;
+ int c;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ printf("%s: Test the futex value of private file mappings in FUTEX_WAIT\n",
+ basename(argv[0]));
+
+ ret = pthread_create(&thr, NULL, thr_futex_wait, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "pthread_create error\n");
+ ret = RET_ERROR;
+ goto out;
+ }
+
+ info("wait a while\n");
+ usleep(WAKE_WAIT_US);
+ val = 2;
+ res = futex_wake(&val, 1, 0);
+ info("futex_wake %d\n", res);
+ if (res != 1) {
+ fail("FUTEX_WAKE didn't find the waiting thread.\n");
+ ret = RET_FAIL;
+ }
+
+ info("join\n");
+ pthread_join(thr, NULL);
+
+ out:
+ print_result(ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
new file mode 100644
index 0000000..ab428ca
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -0,0 +1,86 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Block on a futex and wait for timeout.
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+static long timeout_ns = 100000; /* 100us default timeout */
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -t N Timeout in nanoseconds (default: 100,000)\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int main(int argc, char *argv[])
+{
+ futex_t f1 = FUTEX_INITIALIZER;
+ struct timespec to;
+ int res, ret = RET_PASS;
+ int c;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 't':
+ timeout_ns = atoi(optarg);
+ break;
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ printf("%s: Block on a futex and wait for timeout\n",
+ basename(argv[0]));
+ printf("\tArguments: timeout=%ldns\n", timeout_ns);
+
+ /* initialize timeout */
+ to.tv_sec = 0;
+ to.tv_nsec = timeout_ns;
+
+ info("Calling futex_wait on f1: %u @ %p\n", f1, &f1);
+ res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG);
+ if (!res || errno != ETIMEDOUT) {
+ fail("futex_wait returned %d\n", ret < 0 ? errno : ret);
+ ret = RET_FAIL;
+ }
+
+ print_result(ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
new file mode 100644
index 0000000..fe7aee9
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
@@ -0,0 +1,124 @@
+/******************************************************************************
+ *
+ * Copyright FUJITSU LIMITED 2010
+ * Copyright KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Wait on uninitialized heap. It shold be zero and FUTEX_WAIT should
+ * return immediately. This test is intent to test zero page handling in
+ * futex.
+ *
+ * AUTHOR
+ * KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * HISTORY
+ * 2010-Jan-6: Initial version by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ *****************************************************************************/
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/futex.h>
+#include <libgen.h>
+
+#include "logging.h"
+#include "futextest.h"
+
+#define WAIT_US 5000000
+
+static int child_blocked = 1;
+static int child_ret;
+void *buf;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *wait_thread(void *arg)
+{
+ int res;
+
+ child_ret = RET_PASS;
+ res = futex_wait(buf, 1, NULL, 0);
+ child_blocked = 0;
+
+ if (res != 0 && errno != EWOULDBLOCK) {
+ error("futex failure\n", errno);
+ child_ret = RET_ERROR;
+ }
+ pthread_exit(NULL);
+}
+
+int main(int argc, char **argv)
+{
+ int c, ret = RET_PASS;
+ long page_size;
+ pthread_t thr;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+ if (buf == (void *)-1) {
+ error("mmap\n", errno);
+ exit(1);
+ }
+
+ printf("%s: Test the uninitialized futex value in FUTEX_WAIT\n",
+ basename(argv[0]));
+
+
+ ret = pthread_create(&thr, NULL, wait_thread, NULL);
+ if (ret) {
+ error("pthread_create\n", errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+
+ info("waiting %dus for child to return\n", WAIT_US);
+ usleep(WAIT_US);
+
+ ret = child_ret;
+ if (child_blocked) {
+ fail("child blocked in kernel\n");
+ ret = RET_FAIL;
+ }
+
+ out:
+ print_result(ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
new file mode 100644
index 0000000..b6b0274
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -0,0 +1,79 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Test if FUTEX_WAIT op returns -EWOULDBLOCK if the futex value differs
+ * from the expected one.
+ *
+ * AUTHOR
+ * Gowrishankar <gowrishankar.m@in.ibm.com>
+ *
+ * HISTORY
+ * 2009-Nov-14: Initial version by Gowrishankar <gowrishankar.m@in.ibm.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+#define timeout_ns 100000
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int main(int argc, char *argv[])
+{
+ struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
+ futex_t f1 = FUTEX_INITIALIZER;
+ int res, ret = RET_PASS;
+ int c;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ printf("%s: Test the unexpected futex value in FUTEX_WAIT\n",
+ basename(argv[0]));
+
+ info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+ res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG);
+ if (!res || errno != EWOULDBLOCK) {
+ fail("futex_wait returned: %d %s\n",
+ res ? errno : res, res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ }
+
+ print_result(ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
new file mode 100755
index 0000000..e87dbe2
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -0,0 +1,79 @@
+#!/bin/sh
+
+###############################################################################
+#
+# Copyright © International Business Machines Corp., 2009
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# DESCRIPTION
+# Run tests in the current directory.
+#
+# AUTHOR
+# Darren Hart <dvhart@linux.intel.com>
+#
+# HISTORY
+# 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+# 2010-Jan-6: Add futex_wait_uninitialized_heap and futex_wait_private_mapped_file
+# by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+#
+###############################################################################
+
+# Test for a color capable console
+if [ -z "$USE_COLOR" ]; then
+ tput setf 7
+ if [ $? -eq 0 ]; then
+ USE_COLOR=1
+ tput sgr0
+ fi
+fi
+if [ "$USE_COLOR" -eq 1 ]; then
+ COLOR="-c"
+fi
+
+
+echo
+# requeue pi testing
+# without timeouts
+./futex_requeue_pi $COLOR
+./futex_requeue_pi $COLOR -b
+./futex_requeue_pi $COLOR -b -l
+./futex_requeue_pi $COLOR -b -o
+./futex_requeue_pi $COLOR -l
+./futex_requeue_pi $COLOR -o
+# with timeouts
+./futex_requeue_pi $COLOR -b -l -t 5000
+./futex_requeue_pi $COLOR -l -t 5000
+./futex_requeue_pi $COLOR -b -l -t 500000
+./futex_requeue_pi $COLOR -l -t 500000
+./futex_requeue_pi $COLOR -b -t 5000
+./futex_requeue_pi $COLOR -t 5000
+./futex_requeue_pi $COLOR -b -t 500000
+./futex_requeue_pi $COLOR -t 500000
+./futex_requeue_pi $COLOR -b -o -t 5000
+./futex_requeue_pi $COLOR -l -t 5000
+./futex_requeue_pi $COLOR -b -o -t 500000
+./futex_requeue_pi $COLOR -l -t 500000
+# with long timeout
+./futex_requeue_pi $COLOR -b -l -t 2000000000
+./futex_requeue_pi $COLOR -l -t 2000000000
+
+
+echo
+./futex_requeue_pi_mismatched_ops $COLOR
+
+echo
+./futex_requeue_pi_signal_restart $COLOR
+
+echo
+./futex_wait_timeout $COLOR
+
+echo
+./futex_wait_wouldblock $COLOR
+
+echo
+./futex_wait_uninitialized_heap $COLOR
+./futex_wait_private_mapped_file $COLOR
diff --git a/tools/testing/selftests/futex/include/atomic.h b/tools/testing/selftests/futex/include/atomic.h
new file mode 100644
index 0000000..f861da3
--- /dev/null
+++ b/tools/testing/selftests/futex/include/atomic.h
@@ -0,0 +1,83 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * GCC atomic builtin wrappers
+ * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-17: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _ATOMIC_H
+#define _ATOMIC_H
+
+typedef struct {
+ volatile int val;
+} atomic_t;
+
+#define ATOMIC_INITIALIZER { 0 }
+
+/**
+ * atomic_cmpxchg() - Atomic compare and exchange
+ * @uaddr: The address of the futex to be modified
+ * @oldval: The expected value of the futex
+ * @newval: The new value to try and assign the futex
+ *
+ * Return the old value of addr->val.
+ */
+static inline int
+atomic_cmpxchg(atomic_t *addr, int oldval, int newval)
+{
+ return __sync_val_compare_and_swap(&addr->val, oldval, newval);
+}
+
+/**
+ * atomic_inc() - Atomic incrememnt
+ * @addr: Address of the variable to increment
+ *
+ * Return the new value of addr->val.
+ */
+static inline int
+atomic_inc(atomic_t *addr)
+{
+ return __sync_add_and_fetch(&addr->val, 1);
+}
+
+/**
+ * atomic_dec() - Atomic decrement
+ * @addr: Address of the variable to decrement
+ *
+ * Return the new value of addr-val.
+ */
+static inline int
+atomic_dec(atomic_t *addr)
+{
+ return __sync_sub_and_fetch(&addr->val, 1);
+}
+
+/**
+ * atomic_set() - Atomic set
+ * @addr: Address of the variable to set
+ * @newval: New value for the atomic_t
+ *
+ * Return the new value of addr->val.
+ */
+static inline int
+atomic_set(atomic_t *addr, int newval)
+{
+ addr->val = newval;
+ return newval;
+}
+
+#endif
diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h
new file mode 100644
index 0000000..b98c3ab
--- /dev/null
+++ b/tools/testing/selftests/futex/include/futextest.h
@@ -0,0 +1,266 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Glibc independent futex library for testing kernel functionality.
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _FUTEXTEST_H
+#define _FUTEXTEST_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <linux/futex.h>
+
+typedef volatile u_int32_t futex_t;
+#define FUTEX_INITIALIZER 0
+
+/* Define the newer op codes if the system header file is not up to date. */
+#ifndef FUTEX_WAIT_BITSET
+#define FUTEX_WAIT_BITSET 9
+#endif
+#ifndef FUTEX_WAKE_BITSET
+#define FUTEX_WAKE_BITSET 10
+#endif
+#ifndef FUTEX_WAIT_REQUEUE_PI
+#define FUTEX_WAIT_REQUEUE_PI 11
+#endif
+#ifndef FUTEX_CMP_REQUEUE_PI
+#define FUTEX_CMP_REQUEUE_PI 12
+#endif
+#ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE
+#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \
+ FUTEX_PRIVATE_FLAG)
+#endif
+#ifndef FUTEX_REQUEUE_PI_PRIVATE
+#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \
+ FUTEX_PRIVATE_FLAG)
+#endif
+
+/**
+ * futex() - SYS_futex syscall wrapper
+ * @uaddr: address of first futex
+ * @op: futex op code
+ * @val: typically expected value of uaddr, but varies by op
+ * @timeout: typically an absolute struct timespec (except where noted
+ * otherwise). Overloaded by some ops
+ * @uaddr2: address of second futex for some ops\
+ * @val3: varies by op
+ * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG
+ *
+ * futex() is used by all the following futex op wrappers. It can also be
+ * used for misuse and abuse testing. Generally, the specific op wrappers
+ * should be used instead. It is a macro instead of an static inline function as
+ * some of the types over overloaded (timeout is used for nr_requeue for
+ * example).
+ *
+ * These argument descriptions are the defaults for all
+ * like-named arguments in the following wrappers except where noted below.
+ */
+#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \
+ syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3)
+
+/**
+ * futex_wait() - block on uaddr with optional timeout
+ * @timeout: relative timeout
+ */
+static inline int
+futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags)
+{
+ return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake() - wake one or more tasks blocked on uaddr
+ * @nr_wake: wake up to this many tasks
+ */
+static inline int
+futex_wake(futex_t *uaddr, int nr_wake, int opflags)
+{
+ return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags);
+}
+
+/**
+ * futex_wait_bitset() - block on uaddr with bitset
+ * @bitset: bitset to be used with futex_wake_bitset
+ */
+static inline int
+futex_wait_bitset(futex_t *uaddr, futex_t val, struct timespec *timeout,
+ u_int32_t bitset, int opflags)
+{
+ return futex(uaddr, FUTEX_WAIT_BITSET, val, timeout, NULL, bitset,
+ opflags);
+}
+
+/**
+ * futex_wake_bitset() - wake one or more tasks blocked on uaddr with bitset
+ * @bitset: bitset to compare with that used in futex_wait_bitset
+ */
+static inline int
+futex_wake_bitset(futex_t *uaddr, int nr_wake, u_int32_t bitset, int opflags)
+{
+ return futex(uaddr, FUTEX_WAKE_BITSET, nr_wake, NULL, NULL, bitset,
+ opflags);
+}
+
+/**
+ * futex_lock_pi() - block on uaddr as a PI mutex
+ * @detect: whether (1) or not (0) to perform deadlock detection
+ */
+static inline int
+futex_lock_pi(futex_t *uaddr, struct timespec *timeout, int detect,
+ int opflags)
+{
+ return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter
+ */
+static inline int
+futex_unlock_pi(futex_t *uaddr, int opflags)
+{
+ return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake_op() - FIXME: COME UP WITH A GOOD ONE LINE DESCRIPTION
+ */
+static inline int
+futex_wake_op(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_wake2,
+ int wake_op, int opflags)
+{
+ return futex(uaddr, FUTEX_WAKE_OP, nr_wake, nr_wake2, uaddr2, wake_op,
+ opflags);
+}
+
+/**
+ * futex_requeue() - requeue without expected value comparison, deprecated
+ * @nr_wake: wake up to this many tasks
+ * @nr_requeue: requeue up to this many tasks
+ *
+ * Due to its inherently racy implementation, futex_requeue() is deprecated in
+ * favor of futex_cmp_requeue().
+ */
+static inline int
+futex_requeue(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_requeue,
+ int opflags)
+{
+ return futex(uaddr, FUTEX_REQUEUE, nr_wake, nr_requeue, uaddr2, 0,
+ opflags);
+}
+
+/**
+ * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
+ * @nr_wake: wake up to this many tasks
+ * @nr_requeue: requeue up to this many tasks
+ */
+static inline int
+futex_cmp_requeue(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake,
+ int nr_requeue, int opflags)
+{
+ return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
+ val, opflags);
+}
+
+/**
+ * futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2
+ * @uaddr: non-PI futex source
+ * @uaddr2: PI futex target
+ *
+ * This is the first half of the requeue_pi mechanism. It shall always be
+ * paired with futex_cmp_requeue_pi().
+ */
+static inline int
+futex_wait_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2,
+ struct timespec *timeout, int opflags)
+{
+ return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0,
+ opflags);
+}
+
+/**
+ * futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2 (PI aware)
+ * @uaddr: non-PI futex source
+ * @uaddr2: PI futex target
+ * @nr_wake: wake up to this many tasks
+ * @nr_requeue: requeue up to this many tasks
+ */
+static inline int
+futex_cmp_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake,
+ int nr_requeue, int opflags)
+{
+ return futex(uaddr, FUTEX_CMP_REQUEUE_PI, nr_wake, nr_requeue, uaddr2,
+ val, opflags);
+}
+
+/**
+ * futex_cmpxchg() - atomic compare and exchange
+ * @uaddr: The address of the futex to be modified
+ * @oldval: The expected value of the futex
+ * @newval: The new value to try and assign the futex
+ *
+ * Implement cmpxchg using gcc atomic builtins.
+ * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html
+ *
+ * Return the old futex value.
+ */
+static inline u_int32_t
+futex_cmpxchg(futex_t *uaddr, u_int32_t oldval, u_int32_t newval)
+{
+ return __sync_val_compare_and_swap(uaddr, oldval, newval);
+}
+
+/**
+ * futex_dec() - atomic decrement of the futex value
+ * @uaddr: The address of the futex to be modified
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_dec(futex_t *uaddr)
+{
+ return __sync_sub_and_fetch(uaddr, 1);
+}
+
+/**
+ * futex_inc() - atomic increment of the futex value
+ * @uaddr: the address of the futex to be modified
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_inc(futex_t *uaddr)
+{
+ return __sync_add_and_fetch(uaddr, 1);
+}
+
+/**
+ * futex_set() - atomic decrement of the futex value
+ * @uaddr: the address of the futex to be modified
+ * @newval: New value for the atomic_t
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_set(futex_t *uaddr, u_int32_t newval)
+{
+ *uaddr = newval;
+ return newval;
+}
+
+#endif
diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h
new file mode 100644
index 0000000..014aa01
--- /dev/null
+++ b/tools/testing/selftests/futex/include/logging.h
@@ -0,0 +1,153 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Glibc independent futex library for testing kernel functionality.
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _LOGGING_H
+#define _LOGGING_H
+
+#include <string.h>
+#include <unistd.h>
+#include <linux/futex.h>
+#include "kselftest.h"
+
+/*
+ * Define PASS, ERROR, and FAIL strings with and without color escape
+ * sequences, default to no color.
+ */
+#define ESC 0x1B, '['
+#define BRIGHT '1'
+#define GREEN '3', '2'
+#define YELLOW '3', '3'
+#define RED '3', '1'
+#define ESCEND 'm'
+#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND
+#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND
+#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND
+#define RESET_COLOR ESC, '0', 'm'
+static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S',
+ RESET_COLOR, 0};
+static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R',
+ RESET_COLOR, 0};
+static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L',
+ RESET_COLOR, 0};
+static const char INFO_NORMAL[] = " INFO";
+static const char PASS_NORMAL[] = " PASS";
+static const char ERROR_NORMAL[] = "ERROR";
+static const char FAIL_NORMAL[] = " FAIL";
+const char *INFO = INFO_NORMAL;
+const char *PASS = PASS_NORMAL;
+const char *ERROR = ERROR_NORMAL;
+const char *FAIL = FAIL_NORMAL;
+
+/* Verbosity setting for INFO messages */
+#define VQUIET 0
+#define VCRITICAL 1
+#define VINFO 2
+#define VMAX VINFO
+int _verbose = VCRITICAL;
+
+/* Functional test return codes */
+#define RET_PASS 0
+#define RET_ERROR -1
+#define RET_FAIL -2
+
+/**
+ * log_color() - Use colored output for PASS, ERROR, and FAIL strings
+ * @use_color: use color (1) or not (0)
+ */
+void log_color(int use_color)
+{
+ if (use_color) {
+ PASS = PASS_COLOR;
+ ERROR = ERROR_COLOR;
+ FAIL = FAIL_COLOR;
+ } else {
+ PASS = PASS_NORMAL;
+ ERROR = ERROR_NORMAL;
+ FAIL = FAIL_NORMAL;
+ }
+}
+
+/**
+ * log_verbosity() - Set verbosity of test output
+ * @verbose: Enable (1) verbose output or not (0)
+ *
+ * Currently setting verbose=1 will enable INFO messages and 0 will disable
+ * them. FAIL and ERROR messages are always displayed.
+ */
+void log_verbosity(int level)
+{
+ if (level > VMAX)
+ level = VMAX;
+ else if (level < 0)
+ level = 0;
+ _verbose = level;
+}
+
+/**
+ * print_result() - Print standard PASS | ERROR | FAIL results
+ * @ret: the return value to be considered: 0 | RET_ERROR | RET_FAIL
+ *
+ * print_result() is primarily intended for functional tests.
+ */
+void print_result(int ret)
+{
+ const char *result = "Unknown return code";
+
+ switch (ret) {
+ case RET_PASS:
+ ksft_inc_pass_cnt();
+ result = PASS;
+ break;
+ case RET_ERROR:
+ result = ERROR;
+ break;
+ case RET_FAIL:
+ ksft_inc_fail_cnt();
+ result = FAIL;
+ break;
+ }
+ printf("Result: %s\n", result);
+}
+
+/* log level macros */
+#define info(message, vargs...) \
+do { \
+ if (_verbose >= VINFO) \
+ fprintf(stderr, "\t%s: "message, INFO, ##vargs); \
+} while (0)
+
+#define error(message, err, args...) \
+do { \
+ if (_verbose >= VCRITICAL) {\
+ if (err) \
+ fprintf(stderr, "\t%s: %s: "message, \
+ ERROR, strerror(err), ##args); \
+ else \
+ fprintf(stderr, "\t%s: "message, ERROR, ##args); \
+ } \
+} while (0)
+
+#define fail(message, args...) \
+do { \
+ if (_verbose >= VCRITICAL) \
+ fprintf(stderr, "\t%s: "message, FAIL, ##args); \
+} while (0)
+
+#endif
diff --git a/tools/testing/selftests/futex/run.sh b/tools/testing/selftests/futex/run.sh
new file mode 100755
index 0000000..4126312
--- /dev/null
+++ b/tools/testing/selftests/futex/run.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+###############################################################################
+#
+# Copyright © International Business Machines Corp., 2009
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# DESCRIPTION
+# Run all tests under the functional, performance, and stress directories.
+# Format and summarize the results.
+#
+# AUTHOR
+# Darren Hart <dvhart@linux.intel.com>
+#
+# HISTORY
+# 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+#
+###############################################################################
+
+# Test for a color capable shell and pass the result to the subdir scripts
+USE_COLOR=0
+tput setf 7
+if [ $? -eq 0 ]; then
+ USE_COLOR=1
+ tput sgr0
+fi
+export USE_COLOR
+
+(cd functional; ./run.sh)
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 572c888..ef1c80d 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -13,6 +13,13 @@
#include <stdlib.h>
#include <unistd.h>
+/* define kselftest exit codes */
+#define KSFT_PASS 0
+#define KSFT_FAIL 1
+#define KSFT_XFAIL 2
+#define KSFT_XPASS 3
+#define KSFT_SKIP 4
+
/* counters */
struct ksft_count {
unsigned int ksft_pass;
@@ -40,23 +47,23 @@ static inline void ksft_print_cnts(void)
static inline int ksft_exit_pass(void)
{
- exit(0);
+ exit(KSFT_PASS);
}
static inline int ksft_exit_fail(void)
{
- exit(1);
+ exit(KSFT_FAIL);
}
static inline int ksft_exit_xfail(void)
{
- exit(2);
+ exit(KSFT_XFAIL);
}
static inline int ksft_exit_xpass(void)
{
- exit(3);
+ exit(KSFT_XPASS);
}
static inline int ksft_exit_skip(void)
{
- exit(4);
+ exit(KSFT_SKIP);
}
#endif /* __KSELFTEST_H */
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 2194155..ee412ba 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -13,6 +13,9 @@ run_tests: all
define INSTALL_RULE
mkdir -p $(INSTALL_PATH)
+ @for TEST_DIR in $(TEST_DIRS); do\
+ cp -r $$TEST_DIR $(INSTALL_PATH); \
+ done;
install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
endef
diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile
index 95580a9..5e35c9c 100644
--- a/tools/testing/selftests/mount/Makefile
+++ b/tools/testing/selftests/mount/Makefile
@@ -9,7 +9,12 @@ unprivileged-remount-test: unprivileged-remount-test.c
include ../lib.mk
TEST_PROGS := unprivileged-remount-test
-override RUN_TESTS := if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi
+override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \
+ then \
+ ./unprivileged-remount-test ; \
+ else \
+ echo "WARN: No /proc/self/uid_map exist, test skipped." ; \
+ fi
override EMIT_TESTS := echo "$(RUN_TESTS)"
clean:
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 6f67333..4124593 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -19,6 +19,8 @@
* - PACKET_FANOUT_LB
* - PACKET_FANOUT_CPU
* - PACKET_FANOUT_ROLLOVER
+ * - PACKET_FANOUT_CBPF
+ * - PACKET_FANOUT_EBPF
*
* Todo:
* - functionality: PACKET_FANOUT_FLAG_DEFRAG
@@ -44,7 +46,9 @@
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
+#include <linux/unistd.h> /* for __NR_bpf */
#include <linux/filter.h>
+#include <linux/bpf.h>
#include <linux/if_packet.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
@@ -91,6 +95,51 @@ static int sock_fanout_open(uint16_t typeflags, int num_packets)
return fd;
}
+static void sock_fanout_set_ebpf(int fd)
+{
+ const int len_off = __builtin_offsetof(struct __sk_buff, len);
+ struct bpf_insn prog[] = {
+ { BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 },
+ { BPF_LDX | BPF_W | BPF_MEM, 0, 6, len_off, 0 },
+ { BPF_JMP | BPF_JGE | BPF_K, 0, 0, 1, DATA_LEN },
+ { BPF_JMP | BPF_JA | BPF_K, 0, 0, 4, 0 },
+ { BPF_LD | BPF_B | BPF_ABS, 0, 0, 0, 0x50 },
+ { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 2, DATA_CHAR },
+ { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1, DATA_CHAR_1 },
+ { BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 },
+ { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+ };
+ char log_buf[512];
+ union bpf_attr attr;
+ int pfd;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insns = (unsigned long) prog;
+ attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
+ attr.license = (unsigned long) "GPL";
+ attr.log_buf = (unsigned long) log_buf,
+ attr.log_size = sizeof(log_buf),
+ attr.log_level = 1,
+
+ pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (pfd < 0) {
+ perror("bpf");
+ fprintf(stderr, "bpf verifier:\n%s\n", log_buf);
+ exit(1);
+ }
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
+ perror("fanout data ebpf");
+ exit(1);
+ }
+
+ if (close(pfd)) {
+ perror("close ebpf");
+ exit(1);
+ }
+}
+
static char *sock_fanout_open_ring(int fd)
{
struct tpacket_req req = {
@@ -115,8 +164,8 @@ static char *sock_fanout_open_ring(int fd)
ring = mmap(0, req.tp_block_size * req.tp_block_nr,
PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- if (!ring) {
- fprintf(stderr, "packetsock ring mmap\n");
+ if (ring == MAP_FAILED) {
+ perror("packetsock ring mmap");
exit(1);
}
@@ -209,6 +258,7 @@ static int test_datapath(uint16_t typeflags, int port_off,
{
const int expect0[] = { 0, 0 };
char *rings[2];
+ uint8_t type = typeflags & 0xFF;
int fds[2], fds_udp[2][2], ret;
fprintf(stderr, "test: datapath 0x%hx\n", typeflags);
@@ -219,6 +269,11 @@ static int test_datapath(uint16_t typeflags, int port_off,
fprintf(stderr, "ERROR: failed open\n");
exit(1);
}
+ if (type == PACKET_FANOUT_CBPF)
+ sock_setfilter(fds[0], SOL_PACKET, PACKET_FANOUT_DATA);
+ else if (type == PACKET_FANOUT_EBPF)
+ sock_fanout_set_ebpf(fds[0]);
+
rings[0] = sock_fanout_open_ring(fds[0]);
rings[1] = sock_fanout_open_ring(fds[1]);
pair_udp_open(fds_udp[0], PORT_BASE);
@@ -227,11 +282,11 @@ static int test_datapath(uint16_t typeflags, int port_off,
/* Send data, but not enough to overflow a queue */
pair_udp_send(fds_udp[0], 15);
- pair_udp_send(fds_udp[1], 5);
+ pair_udp_send_char(fds_udp[1], 5, DATA_CHAR_1);
ret = sock_fanout_read(fds, rings, expect1);
/* Send more data, overflow the queue */
- pair_udp_send(fds_udp[0], 15);
+ pair_udp_send_char(fds_udp[0], 15, DATA_CHAR_1);
/* TODO: ensure consistent order between expect1 and expect2 */
ret |= sock_fanout_read(fds, rings, expect2);
@@ -272,9 +327,10 @@ int main(int argc, char **argv)
const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } };
const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } };
const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } };
- const int expect_rb[2][2] = { { 20, 0 }, { 20, 15 } };
+ const int expect_rb[2][2] = { { 15, 5 }, { 20, 15 } };
const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } };
const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } };
+ const int expect_bpf[2][2] = { { 15, 5 }, { 15, 20 } };
int port_off = 2, tries = 5, ret;
test_control_single();
@@ -296,6 +352,11 @@ int main(int argc, char **argv)
ret |= test_datapath(PACKET_FANOUT_ROLLOVER,
port_off, expect_rb[0], expect_rb[1]);
+ ret |= test_datapath(PACKET_FANOUT_CBPF,
+ port_off, expect_bpf[0], expect_bpf[1]);
+ ret |= test_datapath(PACKET_FANOUT_EBPF,
+ port_off, expect_bpf[0], expect_bpf[1]);
+
set_cpuaffinity(0);
ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
expect_cpu0[0], expect_cpu0[1]);
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h
index 37da54a..24bc7ec 100644
--- a/tools/testing/selftests/net/psock_lib.h
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -30,6 +30,7 @@
#define DATA_LEN 100
#define DATA_CHAR 'a'
+#define DATA_CHAR_1 'b'
#define PORT_BASE 8000
@@ -37,29 +38,36 @@
# define __maybe_unused __attribute__ ((__unused__))
#endif
-static __maybe_unused void pair_udp_setfilter(int fd)
+static __maybe_unused void sock_setfilter(int fd, int lvl, int optnum)
{
struct sock_filter bpf_filter[] = {
{ 0x80, 0, 0, 0x00000000 }, /* LD pktlen */
- { 0x35, 0, 5, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/
+ { 0x35, 0, 4, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/
{ 0x30, 0, 0, 0x00000050 }, /* LD ip[80] */
- { 0x15, 0, 3, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/
- { 0x30, 0, 0, 0x00000051 }, /* LD ip[81] */
- { 0x15, 0, 1, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/
+ { 0x15, 1, 0, DATA_CHAR }, /* JEQ DATA_CHAR [t goto match]*/
+ { 0x15, 0, 1, DATA_CHAR_1}, /* JEQ DATA_CHAR_1 [t goto match]*/
{ 0x06, 0, 0, 0x00000060 }, /* RET match */
{ 0x06, 0, 0, 0x00000000 }, /* RET no match */
};
struct sock_fprog bpf_prog;
+ if (lvl == SOL_PACKET && optnum == PACKET_FANOUT_DATA)
+ bpf_filter[5].code = 0x16; /* RET A */
+
bpf_prog.filter = bpf_filter;
bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
- if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog,
+ if (setsockopt(fd, lvl, optnum, &bpf_prog,
sizeof(bpf_prog))) {
perror("setsockopt SO_ATTACH_FILTER");
exit(1);
}
}
+static __maybe_unused void pair_udp_setfilter(int fd)
+{
+ sock_setfilter(fd, SOL_SOCKET, SO_ATTACH_FILTER);
+}
+
static __maybe_unused void pair_udp_open(int fds[], uint16_t port)
{
struct sockaddr_in saddr, daddr;
@@ -96,11 +104,11 @@ static __maybe_unused void pair_udp_open(int fds[], uint16_t port)
}
}
-static __maybe_unused void pair_udp_send(int fds[], int num)
+static __maybe_unused void pair_udp_send_char(int fds[], int num, char payload)
{
char buf[DATA_LEN], rbuf[DATA_LEN];
- memset(buf, DATA_CHAR, sizeof(buf));
+ memset(buf, payload, sizeof(buf));
while (num--) {
/* Should really handle EINTR and EAGAIN */
if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) {
@@ -118,6 +126,11 @@ static __maybe_unused void pair_udp_send(int fds[], int num)
}
}
+static __maybe_unused void pair_udp_send(int fds[], int num)
+{
+ return pair_udp_send_char(fds, num, DATA_CHAR);
+}
+
static __maybe_unused void pair_udp_close(int fds[])
{
close(fds[0]);
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 5ad0423..03ca2e6 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -12,7 +12,7 @@ CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CUR
export CFLAGS
-SUB_DIRS = pmu copyloops mm tm primitives stringloops vphn switch_endian
+SUB_DIRS = pmu copyloops mm tm primitives stringloops vphn switch_endian dscr
endif
diff --git a/tools/testing/selftests/powerpc/dscr/.gitignore b/tools/testing/selftests/powerpc/dscr/.gitignore
new file mode 100644
index 0000000..b585c6c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/.gitignore
@@ -0,0 +1,7 @@
+dscr_default_test
+dscr_explicit_test
+dscr_inherit_exec_test
+dscr_inherit_test
+dscr_sysfs_test
+dscr_sysfs_thread_test
+dscr_user_test
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
new file mode 100644
index 0000000..49327ee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -0,0 +1,14 @@
+TEST_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \
+ dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test \
+ dscr_sysfs_thread_test
+
+dscr_default_test: LDLIBS += -lpthread
+
+all: $(TEST_PROGS)
+
+$(TEST_PROGS): ../harness.c
+
+include ../../lib.mk
+
+clean:
+ rm -f $(TEST_PROGS) *.o
diff --git a/tools/testing/selftests/powerpc/dscr/dscr.h b/tools/testing/selftests/powerpc/dscr/dscr.h
new file mode 100644
index 0000000..a36af1b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr.h
@@ -0,0 +1,127 @@
+/*
+ * POWER Data Stream Control Register (DSCR)
+ *
+ * This header file contains helper functions and macros
+ * required for all the DSCR related test cases.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#ifndef _SELFTESTS_POWERPC_DSCR_DSCR_H
+#define _SELFTESTS_POWERPC_DSCR_DSCR_H
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <pthread.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+#include "utils.h"
+
+#define SPRN_DSCR 0x11 /* Privilege state SPR */
+#define SPRN_DSCR_USR 0x03 /* Problem state SPR */
+#define THREADS 100 /* Max threads */
+#define COUNT 100 /* Max iterations */
+#define DSCR_MAX 16 /* Max DSCR value */
+#define LEN_MAX 100 /* Max name length */
+
+#define DSCR_DEFAULT "/sys/devices/system/cpu/dscr_default"
+#define CPU_PATH "/sys/devices/system/cpu/"
+
+#define rmb() asm volatile("lwsync":::"memory")
+#define wmb() asm volatile("lwsync":::"memory")
+
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+/* Prilvilege state DSCR access */
+inline unsigned long get_dscr(void)
+{
+ unsigned long ret;
+
+ asm volatile("mfspr %0,%1" : "=r" (ret): "i" (SPRN_DSCR));
+
+ return ret;
+}
+
+inline void set_dscr(unsigned long val)
+{
+ asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
+
+/* Problem state DSCR access */
+inline unsigned long get_dscr_usr(void)
+{
+ unsigned long ret;
+
+ asm volatile("mfspr %0,%1" : "=r" (ret): "i" (SPRN_DSCR_USR));
+
+ return ret;
+}
+
+inline void set_dscr_usr(unsigned long val)
+{
+ asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR_USR));
+}
+
+/* Default DSCR access */
+unsigned long get_default_dscr(void)
+{
+ int fd = -1, ret;
+ char buf[16];
+ unsigned long val;
+
+ if (fd == -1) {
+ fd = open(DSCR_DEFAULT, O_RDONLY);
+ if (fd == -1) {
+ perror("open() failed");
+ exit(1);
+ }
+ }
+ memset(buf, 0, sizeof(buf));
+ lseek(fd, 0, SEEK_SET);
+ ret = read(fd, buf, sizeof(buf));
+ if (ret == -1) {
+ perror("read() failed");
+ exit(1);
+ }
+ sscanf(buf, "%lx", &val);
+ close(fd);
+ return val;
+}
+
+void set_default_dscr(unsigned long val)
+{
+ int fd = -1, ret;
+ char buf[16];
+
+ if (fd == -1) {
+ fd = open(DSCR_DEFAULT, O_RDWR);
+ if (fd == -1) {
+ perror("open() failed");
+ exit(1);
+ }
+ }
+ sprintf(buf, "%lx\n", val);
+ ret = write(fd, buf, strlen(buf));
+ if (ret == -1) {
+ perror("write() failed");
+ exit(1);
+ }
+ close(fd);
+}
+
+double uniform_deviate(int seed)
+{
+ return seed * (1.0 / (RAND_MAX + 1.0));
+}
+#endif /* _SELFTESTS_POWERPC_DSCR_DSCR_H */
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
new file mode 100644
index 0000000..df17c3b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
@@ -0,0 +1,127 @@
+/*
+ * POWER Data Stream Control Register (DSCR) default test
+ *
+ * This test modifies the system wide default DSCR through
+ * it's sysfs interface and then verifies that all threads
+ * see the correct changed DSCR value immediately.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static unsigned long dscr; /* System DSCR default */
+static unsigned long sequence;
+static unsigned long result[THREADS];
+
+static void *do_test(void *in)
+{
+ unsigned long thread = (unsigned long)in;
+ unsigned long i;
+
+ for (i = 0; i < COUNT; i++) {
+ unsigned long d, cur_dscr, cur_dscr_usr;
+ unsigned long s1, s2;
+
+ s1 = ACCESS_ONCE(sequence);
+ if (s1 & 1)
+ continue;
+ rmb();
+
+ d = dscr;
+ cur_dscr = get_dscr();
+ cur_dscr_usr = get_dscr_usr();
+
+ rmb();
+ s2 = sequence;
+
+ if (s1 != s2)
+ continue;
+
+ if (cur_dscr != d) {
+ fprintf(stderr, "thread %ld kernel DSCR should be %ld "
+ "but is %ld\n", thread, d, cur_dscr);
+ result[thread] = 1;
+ pthread_exit(&result[thread]);
+ }
+
+ if (cur_dscr_usr != d) {
+ fprintf(stderr, "thread %ld user DSCR should be %ld "
+ "but is %ld\n", thread, d, cur_dscr_usr);
+ result[thread] = 1;
+ pthread_exit(&result[thread]);
+ }
+ }
+ result[thread] = 0;
+ pthread_exit(&result[thread]);
+}
+
+int dscr_default(void)
+{
+ pthread_t threads[THREADS];
+ unsigned long i, *status[THREADS];
+ unsigned long orig_dscr_default;
+
+ orig_dscr_default = get_default_dscr();
+
+ /* Initial DSCR default */
+ dscr = 1;
+ set_default_dscr(dscr);
+
+ /* Spawn all testing threads */
+ for (i = 0; i < THREADS; i++) {
+ if (pthread_create(&threads[i], NULL, do_test, (void *)i)) {
+ perror("pthread_create() failed");
+ goto fail;
+ }
+ }
+
+ srand(getpid());
+
+ /* Keep changing the DSCR default */
+ for (i = 0; i < COUNT; i++) {
+ double ret = uniform_deviate(rand());
+
+ if (ret < 0.0001) {
+ sequence++;
+ wmb();
+
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ set_default_dscr(dscr);
+
+ wmb();
+ sequence++;
+ }
+ }
+
+ /* Individual testing thread exit status */
+ for (i = 0; i < THREADS; i++) {
+ if (pthread_join(threads[i], (void **)&(status[i]))) {
+ perror("pthread_join() failed");
+ goto fail;
+ }
+
+ if (*status[i]) {
+ printf("%ldth thread failed to join with %ld status\n",
+ i, *status[i]);
+ goto fail;
+ }
+ }
+ set_default_dscr(orig_dscr_default);
+ return 0;
+fail:
+ set_default_dscr(orig_dscr_default);
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_default, "dscr_default_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
new file mode 100644
index 0000000..ad9c3ec
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
@@ -0,0 +1,71 @@
+/*
+ * POWER Data Stream Control Register (DSCR) explicit test
+ *
+ * This test modifies the DSCR value using mtspr instruction and
+ * verifies the change with mfspr instruction. It uses both the
+ * privilege state SPR and the problem state SPR for this purpose.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+int dscr_explicit(void)
+{
+ unsigned long i, dscr = 0;
+
+ srand(getpid());
+ set_dscr(dscr);
+
+ for (i = 0; i < COUNT; i++) {
+ unsigned long cur_dscr, cur_dscr_usr;
+ double ret = uniform_deviate(rand());
+
+ if (ret < 0.001) {
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ set_dscr(dscr);
+ }
+
+ cur_dscr = get_dscr();
+ if (cur_dscr != dscr) {
+ fprintf(stderr, "Kernel DSCR should be %ld but "
+ "is %ld\n", dscr, cur_dscr);
+ return 1;
+ }
+
+ ret = uniform_deviate(rand());
+ if (ret < 0.001) {
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ set_dscr_usr(dscr);
+ }
+
+ cur_dscr_usr = get_dscr_usr();
+ if (cur_dscr_usr != dscr) {
+ fprintf(stderr, "User DSCR should be %ld but "
+ "is %ld\n", dscr, cur_dscr_usr);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_explicit, "dscr_explicit_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
new file mode 100644
index 0000000..8265504
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
@@ -0,0 +1,117 @@
+/*
+ * POWER Data Stream Control Register (DSCR) fork exec test
+ *
+ * This testcase modifies the DSCR using mtspr, forks & execs and
+ * verifies that the child is using the changed DSCR using mfspr.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static char prog[LEN_MAX];
+
+static void do_exec(unsigned long parent_dscr)
+{
+ unsigned long cur_dscr, cur_dscr_usr;
+
+ cur_dscr = get_dscr();
+ cur_dscr_usr = get_dscr_usr();
+
+ if (cur_dscr != parent_dscr) {
+ fprintf(stderr, "Parent DSCR %ld was not inherited "
+ "over exec (kernel value)\n", parent_dscr);
+ exit(1);
+ }
+
+ if (cur_dscr_usr != parent_dscr) {
+ fprintf(stderr, "Parent DSCR %ld was not inherited "
+ "over exec (user value)\n", parent_dscr);
+ exit(1);
+ }
+ exit(0);
+}
+
+int dscr_inherit_exec(void)
+{
+ unsigned long i, dscr = 0;
+ pid_t pid;
+
+ for (i = 0; i < COUNT; i++) {
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ if (dscr == get_default_dscr())
+ continue;
+
+ if (i % 2 == 0)
+ set_dscr_usr(dscr);
+ else
+ set_dscr(dscr);
+
+ /*
+ * XXX: Force a context switch out so that DSCR
+ * current value is copied into the thread struct
+ * which is required for the child to inherit the
+ * changed value.
+ */
+ sleep(1);
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork() failed");
+ exit(1);
+ } else if (pid) {
+ int status;
+
+ if (waitpid(pid, &status, 0) == -1) {
+ perror("waitpid() failed");
+ exit(1);
+ }
+
+ if (!WIFEXITED(status)) {
+ fprintf(stderr, "Child didn't exit cleanly\n");
+ exit(1);
+ }
+
+ if (WEXITSTATUS(status) != 0) {
+ fprintf(stderr, "Child didn't exit cleanly\n");
+ return 1;
+ }
+ } else {
+ char dscr_str[16];
+
+ sprintf(dscr_str, "%ld", dscr);
+ execlp(prog, prog, "exec", dscr_str, NULL);
+ exit(1);
+ }
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ if (argc == 3 && !strcmp(argv[1], "exec")) {
+ unsigned long parent_dscr;
+
+ parent_dscr = atoi(argv[2]);
+ do_exec(parent_dscr);
+ } else if (argc != 1) {
+ fprintf(stderr, "Usage: %s\n", argv[0]);
+ exit(1);
+ }
+
+ strncpy(prog, argv[0], strlen(argv[0]));
+ return test_harness(dscr_inherit_exec, "dscr_inherit_exec_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
new file mode 100644
index 0000000..4e414ca
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
@@ -0,0 +1,95 @@
+/*
+ * POWER Data Stream Control Register (DSCR) fork test
+ *
+ * This testcase modifies the DSCR using mtspr, forks and then
+ * verifies that the child process has the correct changed DSCR
+ * value using mfspr.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+int dscr_inherit(void)
+{
+ unsigned long i, dscr = 0;
+ pid_t pid;
+
+ srand(getpid());
+ set_dscr(dscr);
+
+ for (i = 0; i < COUNT; i++) {
+ unsigned long cur_dscr, cur_dscr_usr;
+
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ if (i % 2 == 0)
+ set_dscr_usr(dscr);
+ else
+ set_dscr(dscr);
+
+ /*
+ * XXX: Force a context switch out so that DSCR
+ * current value is copied into the thread struct
+ * which is required for the child to inherit the
+ * changed value.
+ */
+ sleep(1);
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork() failed");
+ exit(1);
+ } else if (pid) {
+ int status;
+
+ if (waitpid(pid, &status, 0) == -1) {
+ perror("waitpid() failed");
+ exit(1);
+ }
+
+ if (!WIFEXITED(status)) {
+ fprintf(stderr, "Child didn't exit cleanly\n");
+ exit(1);
+ }
+
+ if (WEXITSTATUS(status) != 0) {
+ fprintf(stderr, "Child didn't exit cleanly\n");
+ return 1;
+ }
+ } else {
+ cur_dscr = get_dscr();
+ if (cur_dscr != dscr) {
+ fprintf(stderr, "Kernel DSCR should be %ld "
+ "but is %ld\n", dscr, cur_dscr);
+ exit(1);
+ }
+
+ cur_dscr_usr = get_dscr_usr();
+ if (cur_dscr_usr != dscr) {
+ fprintf(stderr, "User DSCR should be %ld "
+ "but is %ld\n", dscr, cur_dscr_usr);
+ exit(1);
+ }
+ exit(0);
+ }
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_inherit, "dscr_inherit_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
new file mode 100644
index 0000000..17fb1b4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
@@ -0,0 +1,97 @@
+/*
+ * POWER Data Stream Control Register (DSCR) sysfs interface test
+ *
+ * This test updates to system wide DSCR default through the sysfs interface
+ * and then verifies that all the CPU specific DSCR defaults are updated as
+ * well verified from their sysfs interfaces.
+ *
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static int check_cpu_dscr_default(char *file, unsigned long val)
+{
+ char buf[10];
+ int fd, rc;
+
+ fd = open(file, O_RDWR);
+ if (fd == -1) {
+ perror("open() failed");
+ return 1;
+ }
+
+ rc = read(fd, buf, sizeof(buf));
+ if (rc == -1) {
+ perror("read() failed");
+ return 1;
+ }
+ close(fd);
+
+ buf[rc] = '\0';
+ if (strtol(buf, NULL, 16) != val) {
+ printf("DSCR match failed: %ld (system) %ld (cpu)\n",
+ val, strtol(buf, NULL, 16));
+ return 1;
+ }
+ return 0;
+}
+
+static int check_all_cpu_dscr_defaults(unsigned long val)
+{
+ DIR *sysfs;
+ struct dirent *dp;
+ char file[LEN_MAX];
+
+ sysfs = opendir(CPU_PATH);
+ if (!sysfs) {
+ perror("opendir() failed");
+ return 1;
+ }
+
+ while ((dp = readdir(sysfs))) {
+ if (!(dp->d_type & DT_DIR))
+ continue;
+ if (!strcmp(dp->d_name, "cpuidle"))
+ continue;
+ if (!strstr(dp->d_name, "cpu"))
+ continue;
+
+ sprintf(file, "%s%s/dscr", CPU_PATH, dp->d_name);
+ if (access(file, F_OK))
+ continue;
+
+ if (check_cpu_dscr_default(file, val))
+ return 1;
+ }
+ closedir(sysfs);
+ return 0;
+}
+
+int dscr_sysfs(void)
+{
+ unsigned long orig_dscr_default;
+ int i, j;
+
+ orig_dscr_default = get_default_dscr();
+ for (i = 0; i < COUNT; i++) {
+ for (j = 0; j < DSCR_MAX; j++) {
+ set_default_dscr(j);
+ if (check_all_cpu_dscr_defaults(j))
+ goto fail;
+ }
+ }
+ set_default_dscr(orig_dscr_default);
+ return 0;
+fail:
+ set_default_dscr(orig_dscr_default);
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_sysfs, "dscr_sysfs_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
new file mode 100644
index 0000000..ad97b59
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
@@ -0,0 +1,80 @@
+/*
+ * POWER Data Stream Control Register (DSCR) sysfs thread test
+ *
+ * This test updates the system wide DSCR default value through
+ * sysfs interface which should then update all the CPU specific
+ * DSCR default values which must also be then visible to threads
+ * executing on individual CPUs on the system.
+ *
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#define _GNU_SOURCE
+#include "dscr.h"
+
+static int test_thread_dscr(unsigned long val)
+{
+ unsigned long cur_dscr, cur_dscr_usr;
+
+ cur_dscr = get_dscr();
+ cur_dscr_usr = get_dscr_usr();
+
+ if (val != cur_dscr) {
+ printf("[cpu %d] Kernel DSCR should be %ld but is %ld\n",
+ sched_getcpu(), val, cur_dscr);
+ return 1;
+ }
+
+ if (val != cur_dscr_usr) {
+ printf("[cpu %d] User DSCR should be %ld but is %ld\n",
+ sched_getcpu(), val, cur_dscr_usr);
+ return 1;
+ }
+ return 0;
+}
+
+static int check_cpu_dscr_thread(unsigned long val)
+{
+ cpu_set_t mask;
+ int cpu;
+
+ for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+ if (sched_setaffinity(0, sizeof(mask), &mask))
+ continue;
+
+ if (test_thread_dscr(val))
+ return 1;
+ }
+ return 0;
+
+}
+
+int dscr_sysfs_thread(void)
+{
+ unsigned long orig_dscr_default;
+ int i, j;
+
+ orig_dscr_default = get_default_dscr();
+ for (i = 0; i < COUNT; i++) {
+ for (j = 0; j < DSCR_MAX; j++) {
+ set_default_dscr(j);
+ if (check_cpu_dscr_thread(j))
+ goto fail;
+ }
+ }
+ set_default_dscr(orig_dscr_default);
+ return 0;
+fail:
+ set_default_dscr(orig_dscr_default);
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_sysfs_thread, "dscr_sysfs_thread_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
new file mode 100644
index 0000000..77d16b5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
@@ -0,0 +1,61 @@
+/*
+ * POWER Data Stream Control Register (DSCR) SPR test
+ *
+ * This test modifies the DSCR value through both the SPR number
+ * based mtspr instruction and then makes sure that the same is
+ * reflected through mfspr instruction using either of the SPR
+ * numbers.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2013, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static int check_dscr(char *str)
+{
+ unsigned long cur_dscr, cur_dscr_usr;
+
+ cur_dscr = get_dscr();
+ cur_dscr_usr = get_dscr_usr();
+ if (cur_dscr != cur_dscr_usr) {
+ printf("%s set, kernel get %lx != user get %lx\n",
+ str, cur_dscr, cur_dscr_usr);
+ return 1;
+ }
+ return 0;
+}
+
+int dscr_user(void)
+{
+ int i;
+
+ check_dscr("");
+
+ for (i = 0; i < COUNT; i++) {
+ set_dscr(i);
+ if (check_dscr("kernel"))
+ return 1;
+ }
+
+ for (i = 0; i < COUNT; i++) {
+ set_dscr_usr(i);
+ if (check_dscr("user"))
+ return 1;
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_user, "dscr_user_test");
+}
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 41cc3ed..ee179e2 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,8 +2,9 @@ noarg:
$(MAKE) -C ../
TEST_PROGS := hugetlb_vs_thp_test subpage_prot
+TEST_FILES := tempfile
-all: $(TEST_PROGS) tempfile
+all: $(TEST_PROGS) $(TEST_FILES)
$(TEST_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index 5a16117..a9099d9 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -26,7 +26,7 @@ override define EMIT_TESTS
$(MAKE) -s -C ebb emit_tests
endef
-DEFAULT_INSTALL := $(INSTALL_RULE)
+DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
override define INSTALL_RULE
$(DEFAULT_INSTALL_RULE)
$(MAKE) -C ebb install
diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile
index 081473db..e21d106 100644
--- a/tools/testing/selftests/powerpc/switch_endian/Makefile
+++ b/tools/testing/selftests/powerpc/switch_endian/Makefile
@@ -1,9 +1,8 @@
-CC := $(CROSS_COMPILE)gcc
-PROGS := switch_endian_test
+TEST_PROGS := switch_endian_test
ASFLAGS += -O2 -Wall -g -nostdlib -m64
-all: $(PROGS)
+all: $(TEST_PROGS)
switch_endian_test: check-reversed.S
@@ -13,12 +12,7 @@ check-reversed.o: check.o
check-reversed.S: check-reversed.o
hexdump -v -e '/1 ".byte 0x%02X\n"' $< > $@
-run_tests: all
- @-for PROG in $(PROGS); do \
- ./$$PROG; \
- done;
+include ../../lib.mk
clean:
- rm -f $(PROGS) *.o check-reversed.S
-
-.PHONY: all run_tests clean
+ rm -f $(TEST_PROGS) *.o check-reversed.S
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 1b616fa..4bea62a 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -5,7 +5,7 @@ all: $(TEST_PROGS)
$(TEST_PROGS): ../harness.c
tm-syscall: tm-syscall-asm.S
-tm-syscall: CFLAGS += -mhtm
+tm-syscall: CFLAGS += -mhtm -I../../../../../usr/include
include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c
index 3ed8d4b..1276e23 100644
--- a/tools/testing/selftests/powerpc/tm/tm-syscall.c
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c
@@ -82,7 +82,8 @@ int tm_syscall(void)
unsigned count = 0;
struct timeval end, now;
- SKIP_IF(!((long)get_auxv_entry(AT_HWCAP2) & PPC_FEATURE2_HTM));
+ SKIP_IF(!((long)get_auxv_entry(AT_HWCAP2)
+ & PPC_FEATURE2_HTM_NOSC));
setbuf(stdout, NULL);
printf("Testing transactional syscalls for %d seconds...\n", TEST_DURATION);
diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile
index e539f77..a485f2e 100644
--- a/tools/testing/selftests/powerpc/vphn/Makefile
+++ b/tools/testing/selftests/powerpc/vphn/Makefile
@@ -1,15 +1,12 @@
-PROG := test-vphn
+TEST_PROGS := test-vphn
CFLAGS += -m64
-all: $(PROG)
+all: $(TEST_PROGS)
-$(PROG): ../harness.c
+$(TEST_PROGS): ../harness.c
-run_tests: all
- ./$(PROG)
+include ../../lib.mk
clean:
- rm -f $(PROG)
-
-.PHONY: all run_tests clean
+ rm -f $(TEST_PROGS)
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
index 15f1a17..3f81a10 100755
--- a/tools/testing/selftests/rcutorture/bin/configinit.sh
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -66,7 +66,7 @@ make $buildloc $TORTURE_DEFCONFIG > $builddir/Make.defconfig.out 2>&1
mv $builddir/.config $builddir/.config.sav
sh $T/upd.sh < $builddir/.config.sav > $builddir/.config
cp $builddir/.config $builddir/.config.new
-yes '' | make $buildloc oldconfig > $builddir/Make.modconfig.out 2>&1
+yes '' | make $buildloc oldconfig > $builddir/Make.oldconfig.out 2> $builddir/Make.oldconfig.err
# verify new config matches specification.
configcheck.sh $builddir/.config $c
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 4f5b20f..d86bdd6 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -43,6 +43,10 @@ do
if test -f "$i/console.log"
then
configcheck.sh $i/.config $i/ConfigFragment
+ if test -r $i/Make.oldconfig.err
+ then
+ cat $i/Make.oldconfig.err
+ fi
parse-build.sh $i/Make.out $configfile
parse-torture.sh $i/console.log $configfile
parse-console.sh $i/console.log $configfile
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index dd2812c..fbe2dbf 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -55,7 +55,7 @@ usage () {
echo " --bootargs kernel-boot-arguments"
echo " --bootimage relative-path-to-kernel-boot-image"
echo " --buildonly"
- echo " --configs \"config-file list\""
+ echo " --configs \"config-file list w/ repeat factor (3*TINY01)\""
echo " --cpus N"
echo " --datestamp string"
echo " --defconfig string"
@@ -178,13 +178,26 @@ fi
touch $T/cfgcpu
for CF in $configs
do
- if test -f "$CONFIGFRAG/$CF"
+ case $CF in
+ [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**)
+ config_reps=`echo $CF | sed -e 's/\*.*$//'`
+ CF1=`echo $CF | sed -e 's/^[^*]*\*//'`
+ ;;
+ *)
+ config_reps=1
+ CF1=$CF
+ ;;
+ esac
+ if test -f "$CONFIGFRAG/$CF1"
then
- cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF`
- cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF" "$cpu_count"`
- echo $CF $cpu_count >> $T/cfgcpu
+ cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1`
+ cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
+ for ((cur_rep=0;cur_rep<$config_reps;cur_rep++))
+ do
+ echo $CF1 $cpu_count >> $T/cfgcpu
+ done
else
- echo "The --configs file $CF does not exist, terminating."
+ echo "The --configs file $CF1 does not exist, terminating."
exit 1
fi
done
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
index 4970121..f824b4c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
@@ -1,3 +1,5 @@
CONFIG_RCU_TORTURE_TEST=y
CONFIG_PRINTK_TIME=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
index 9fbb41b..1a087c3 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
@@ -5,3 +5,4 @@ CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
index 4b6f272..4837430 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
@@ -5,3 +5,4 @@ CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
+#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
index 238bfe3..84a7d51 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
@@ -1 +1 @@
-rcutorture.torture_type=srcu
+rcutorture.torture_type=srcud
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
index 97f0a0b..bafe94c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
@@ -5,5 +5,6 @@ CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_PROVE_RCU=y
-CONFIG_TASKS_RCU=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
index 696d2ea7..ad2be91 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
@@ -2,4 +2,3 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
-CONFIG_TASKS_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
index 9c60da5..c70c51d 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -6,8 +6,8 @@ CONFIG_HIBERNATION=n
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
-CONFIG_TASKS_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
CONFIG_NO_HZ_FULL_ALL=y
+#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
index 36e41df..f1892e0 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
@@ -8,7 +8,7 @@ CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_TRACE=y
CONFIG_PROVE_LOCKING=y
-CONFIG_PROVE_RCU=y
+#CHECK#CONFIG_PROVE_RCU=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_PREEMPT_COUNT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
index 0f08027..6c1a292 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
@@ -1,2 +1,3 @@
rcupdate.rcu_self_test=1
rcupdate.rcu_self_test_bh=1
+rcutorture.torture_type=rcu_bh
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
index f8a10a7..f572b87 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -13,6 +13,6 @@ CONFIG_MAXSMP=y
CONFIG_RCU_NOCB_CPU=y
CONFIG_RCU_NOCB_CPU_ZERO=y
CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_CPU_STALL_INFO=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
index 629122f..ef6a22c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
@@ -14,10 +14,9 @@ CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=3
CONFIG_RCU_FANOUT_LEAF=3
-CONFIG_RCU_FANOUT_EXACT=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=n
-CONFIG_RCU_CPU_STALL_INFO=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T
index a25de47..917d251 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T
@@ -14,10 +14,8 @@ CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=3
CONFIG_RCU_FANOUT_LEAF=3
-CONFIG_RCU_FANOUT_EXACT=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=n
-CONFIG_RCU_CPU_STALL_INFO=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
index 53f24e0..7a17c50 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=8
+CONFIG_NR_CPUS=16
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
@@ -9,12 +9,11 @@ CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=y
-CONFIG_RCU_FANOUT=4
-CONFIG_RCU_FANOUT_LEAF=4
-CONFIG_RCU_FANOUT_EXACT=n
+CONFIG_RCU_FANOUT=2
+CONFIG_RCU_FANOUT_LEAF=2
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_CPU_STALL_INFO=n
CONFIG_RCU_BOOST=y
CONFIG_RCU_KTHREAD_PRIO=2
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
new file mode 100644
index 0000000..120c0c8
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -0,0 +1 @@
+rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index 0f84db3..39a2c6d 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -13,10 +13,9 @@ CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=n
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
-CONFIG_RCU_FANOUT=2
-CONFIG_RCU_FANOUT_LEAF=2
-CONFIG_RCU_FANOUT_EXACT=n
+CONFIG_RCU_FANOUT=4
+CONFIG_RCU_FANOUT_LEAF=4
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_CPU_STALL_INFO=y
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
index 212e3bf..1257d32 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
@@ -12,11 +12,10 @@ CONFIG_RCU_TRACE=n
CONFIG_HOTPLUG_CPU=y
CONFIG_RCU_FANOUT=6
CONFIG_RCU_FANOUT_LEAF=6
-CONFIG_RCU_FANOUT_EXACT=n
CONFIG_RCU_NOCB_CPU=y
CONFIG_RCU_NOCB_CPU_NONE=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
-CONFIG_PROVE_RCU=y
-CONFIG_RCU_CPU_STALL_INFO=n
+#CHECK#CONFIG_PROVE_RCU=y
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
index 7eee63b..d3e456b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
@@ -14,10 +14,9 @@ CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=6
CONFIG_RCU_FANOUT_LEAF=6
-CONFIG_RCU_FANOUT_EXACT=y
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
-CONFIG_PROVE_RCU=y
-CONFIG_RCU_CPU_STALL_INFO=n
+#CHECK#CONFIG_PROVE_RCU=y
CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
index da9a03a..dd90f28 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
@@ -1,3 +1,4 @@
rcupdate.rcu_self_test=1
rcupdate.rcu_self_test_bh=1
rcupdate.rcu_self_test_sched=1
+rcutree.rcu_fanout_exact=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
index 92a97fa..3956b41 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -15,8 +15,7 @@ CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=y
CONFIG_RCU_FANOUT=2
CONFIG_RCU_FANOUT_LEAF=2
-CONFIG_RCU_FANOUT_EXACT=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_CPU_STALL_INFO=y
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
index 5812027..bb9b0c1 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=16
+CONFIG_NR_CPUS=8
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
@@ -13,13 +13,12 @@ CONFIG_HOTPLUG_CPU=n
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=3
-CONFIG_RCU_FANOUT_EXACT=y
CONFIG_RCU_FANOUT_LEAF=2
CONFIG_RCU_NOCB_CPU=y
CONFIG_RCU_NOCB_CPU_ALL=y
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_PROVE_LOCKING=y
-CONFIG_PROVE_RCU=y
-CONFIG_RCU_CPU_STALL_INFO=n
+#CHECK#CONFIG_PROVE_RCU=y
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
index 3eaecca..2ad13f0 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
@@ -13,11 +13,9 @@ CONFIG_HOTPLUG_CPU=n
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=3
-CONFIG_RCU_FANOUT_EXACT=y
CONFIG_RCU_FANOUT_LEAF=2
CONFIG_RCU_NOCB_CPU=y
CONFIG_RCU_NOCB_CPU_ALL=y
CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_CPU_STALL_INFO=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot
new file mode 100644
index 0000000..883149b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot
@@ -0,0 +1 @@
+rcutree.rcu_fanout_exact=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
index 2561daf..fb066dc 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
@@ -1,3 +1,4 @@
rcutorture.torture_type=sched
rcupdate.rcu_self_test=1
rcupdate.rcu_self_test_sched=1
+rcutree.rcu_fanout_exact=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
index 6076b36..6710e74 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE09
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
@@ -13,6 +13,6 @@ CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_CPU_STALL_INFO=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index ec03c88..657f3a0 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -12,13 +12,11 @@ CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.)
CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE.
CONFIG_NO_HZ_FULL_SYSIDLE -- Do one.
CONFIG_PREEMPT -- Do half. (First three and #8.)
-CONFIG_PROVE_LOCKING -- Do all but two, covering CONFIG_PROVE_RCU and not.
-CONFIG_PROVE_RCU -- Do all but one under CONFIG_PROVE_LOCKING.
+CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not.
+CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing.
-CONFIG_RCU_CPU_STALL_INFO -- Do one.
-CONFIG_RCU_FANOUT -- Cover hierarchy as currently, but overlap with others.
-CONFIG_RCU_FANOUT_EXACT -- Do one.
+CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others.
CONFIG_RCU_FANOUT_LEAF -- Do one non-default.
CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL.
CONFIG_RCU_NOCB_CPU -- Do three, see below.
@@ -27,28 +25,19 @@ CONFIG_RCU_NOCB_CPU_NONE -- Do one.
CONFIG_RCU_NOCB_CPU_ZERO -- Do one.
CONFIG_RCU_TRACE -- Do half.
CONFIG_SMP -- Need one !SMP for PREEMPT_RCU.
+!RCU_EXPERT -- Do a few, but these have to be vanilla configurations.
RCU-bh: Do one with PREEMPT and one with !PREEMPT.
RCU-sched: Do one with PREEMPT but not BOOST.
-Hierarchy:
-
-TREE01. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=8, CONFIG_RCU_FANOUT_EXACT=n.
-TREE02. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=n,
- CONFIG_RCU_FANOUT_LEAF=3.
-TREE03. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=4, CONFIG_RCU_FANOUT_EXACT=n,
- CONFIG_RCU_FANOUT_LEAF=4.
-TREE04. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n,
- CONFIG_RCU_FANOUT_LEAF=2.
-TREE05. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=n
- CONFIG_RCU_FANOUT_LEAF=6.
-TREE06. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=y
- CONFIG_RCU_FANOUT_LEAF=6.
-TREE07. CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n,
- CONFIG_RCU_FANOUT_LEAF=2.
-TREE08. CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=y,
- CONFIG_RCU_FANOUT_LEAF=2.
-TREE09. CONFIG_NR_CPUS=1.
+Boot parameters:
+
+nohz_full - do at least one.
+maxcpu -- do at least one.
+rcupdate.rcu_self_test_bh -- Do at least one each, offloaded and not.
+rcupdate.rcu_self_test_sched -- Do at least one each, offloaded and not.
+rcupdate.rcu_self_test -- Do at least one each, offloaded and not.
+rcutree.rcu_fanout_exact -- Do at least one.
Kconfig Parameters Ignored:
diff --git a/tools/testing/selftests/seccomp/.gitignore b/tools/testing/selftests/seccomp/.gitignore
new file mode 100644
index 0000000..346d83c
--- /dev/null
+++ b/tools/testing/selftests/seccomp/.gitignore
@@ -0,0 +1 @@
+seccomp_bpf
diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
new file mode 100644
index 0000000..8401e87
--- /dev/null
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -0,0 +1,10 @@
+TEST_PROGS := seccomp_bpf
+CFLAGS += -Wl,-no-as-needed -Wall
+LDFLAGS += -lpthread
+
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+ $(RM) $(TEST_PROGS)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
new file mode 100644
index 0000000..a004b4c
--- /dev/null
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -0,0 +1,2122 @@
+/*
+ * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by the GPLv2 license.
+ *
+ * Test code for seccomp bpf.
+ */
+
+#include <asm/siginfo.h>
+#define __have_siginfo_t 1
+#define __have_sigval_t 1
+#define __have_sigevent_t 1
+
+#include <errno.h>
+#include <linux/filter.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/user.h>
+#include <linux/prctl.h>
+#include <linux/ptrace.h>
+#include <linux/seccomp.h>
+#include <poll.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/elf.h>
+#include <sys/uio.h>
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "test_harness.h"
+
+#ifndef PR_SET_PTRACER
+# define PR_SET_PTRACER 0x59616d61
+#endif
+
+#ifndef PR_SET_NO_NEW_PRIVS
+#define PR_SET_NO_NEW_PRIVS 38
+#define PR_GET_NO_NEW_PRIVS 39
+#endif
+
+#ifndef PR_SECCOMP_EXT
+#define PR_SECCOMP_EXT 43
+#endif
+
+#ifndef SECCOMP_EXT_ACT
+#define SECCOMP_EXT_ACT 1
+#endif
+
+#ifndef SECCOMP_EXT_ACT_TSYNC
+#define SECCOMP_EXT_ACT_TSYNC 1
+#endif
+
+#ifndef SECCOMP_MODE_STRICT
+#define SECCOMP_MODE_STRICT 1
+#endif
+
+#ifndef SECCOMP_MODE_FILTER
+#define SECCOMP_MODE_FILTER 2
+#endif
+
+#ifndef SECCOMP_RET_KILL
+#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */
+#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
+
+/* Masks for the return value sections. */
+#define SECCOMP_RET_ACTION 0x7fff0000U
+#define SECCOMP_RET_DATA 0x0000ffffU
+
+struct seccomp_data {
+ int nr;
+ __u32 arch;
+ __u64 instruction_pointer;
+ __u64 args[6];
+};
+#endif
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
+#else
+#error "wut? Unknown __BYTE_ORDER?!"
+#endif
+
+#define SIBLING_EXIT_UNKILLED 0xbadbeef
+#define SIBLING_EXIT_FAILURE 0xbadface
+#define SIBLING_EXIT_NEWPRIVS 0xbadfeed
+
+TEST(mode_strict_support)
+{
+ long ret;
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support CONFIG_SECCOMP");
+ }
+ syscall(__NR_exit, 1);
+}
+
+TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
+{
+ long ret;
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support CONFIG_SECCOMP");
+ }
+ syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
+ NULL, NULL, NULL);
+ EXPECT_FALSE(true) {
+ TH_LOG("Unreachable!");
+ }
+}
+
+/* Note! This doesn't test no new privs behavior */
+TEST(no_new_privs_support)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+}
+
+/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
+TEST(mode_filter_support)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EFAULT, errno) {
+ TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
+ }
+}
+
+TEST(mode_filter_without_nnp)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
+ ASSERT_LE(0, ret) {
+ TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
+ }
+ errno = 0;
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ /* Succeeds with CAP_SYS_ADMIN, fails without */
+ /* TODO(wad) check caps not euid */
+ if (geteuid()) {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EACCES, errno);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+}
+
+#define MAX_INSNS_PER_PATH 32768
+
+TEST(filter_size_limits)
+{
+ int i;
+ int count = BPF_MAXINSNS + 1;
+ struct sock_filter allow[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter *filter;
+ struct sock_fprog prog = { };
+ long ret;
+
+ filter = calloc(count, sizeof(*filter));
+ ASSERT_NE(NULL, filter);
+
+ for (i = 0; i < count; i++)
+ filter[i] = allow[0];
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ prog.filter = filter;
+ prog.len = count;
+
+ /* Too many filter instructions in a single filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_NE(0, ret) {
+ TH_LOG("Installing %d insn filter was allowed", prog.len);
+ }
+
+ /* One less is okay, though. */
+ prog.len -= 1;
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
+ }
+}
+
+TEST(filter_chain_limits)
+{
+ int i;
+ int count = BPF_MAXINSNS;
+ struct sock_filter allow[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter *filter;
+ struct sock_fprog prog = { };
+ long ret;
+
+ filter = calloc(count, sizeof(*filter));
+ ASSERT_NE(NULL, filter);
+
+ for (i = 0; i < count; i++)
+ filter[i] = allow[0];
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ prog.filter = filter;
+ prog.len = 1;
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ prog.len = count;
+
+ /* Too many total filter instructions. */
+ for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ if (ret != 0)
+ break;
+ }
+ ASSERT_NE(0, ret) {
+ TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
+ i, count, i * (count + 4));
+ }
+}
+
+TEST(mode_filter_cannot_move_to_strict)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+
+TEST(mode_filter_get_seccomp)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+ EXPECT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+ EXPECT_EQ(2, ret);
+}
+
+
+TEST(ALLOW_all)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+}
+
+TEST(empty_prog)
+{
+ struct sock_filter filter[] = {
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+ EXPECT_EQ(0, syscall(__NR_getpid)) {
+ TH_LOG("getpid() shouldn't ever return");
+ }
+}
+
+/* return code >= 0x80000000 is unused. */
+TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+ EXPECT_EQ(0, syscall(__NR_getpid)) {
+ TH_LOG("getpid() shouldn't ever return");
+ }
+}
+
+TEST_SIGNAL(KILL_all, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+}
+
+TEST_SIGNAL(KILL_one, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ /* Only both with lower 32-bit for now. */
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+ pid_t pid = getpid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(pid, syscall(__NR_getpid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid, 0x0C0FFEE));
+}
+
+TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ /* Only both with lower 32-bit for now. */
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+ pid_t pid = getpid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(pid, syscall(__NR_getpid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid, 1, 2, 3, 4, 5, 0x0C0FFEE));
+}
+
+/* TODO(wad) add 64-bit versus 32-bit arg tests. */
+TEST(arg_out_of_range)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+TEST(ERRNO_valid)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(E2BIG, errno);
+}
+
+TEST(ERRNO_zero)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* "errno" of 0 is ok. */
+ EXPECT_EQ(0, read(0, NULL, 0));
+}
+
+TEST(ERRNO_capped)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(4095, errno);
+}
+
+FIXTURE_DATA(TRAP) {
+ struct sock_fprog prog;
+};
+
+FIXTURE_SETUP(TRAP)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ memset(&self->prog, 0, sizeof(self->prog));
+ self->prog.filter = malloc(sizeof(filter));
+ ASSERT_NE(NULL, self->prog.filter);
+ memcpy(self->prog.filter, filter, sizeof(filter));
+ self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+}
+
+FIXTURE_TEARDOWN(TRAP)
+{
+ if (self->prog.filter)
+ free(self->prog.filter);
+}
+
+TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+ ASSERT_EQ(0, ret);
+ syscall(__NR_getpid);
+}
+
+/* Ensure that SIGSYS overrides SIG_IGN */
+TEST_F_SIGNAL(TRAP, ign, SIGSYS)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ signal(SIGSYS, SIG_IGN);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+ ASSERT_EQ(0, ret);
+ syscall(__NR_getpid);
+}
+
+static struct siginfo TRAP_info;
+static volatile int TRAP_nr;
+static void TRAP_action(int nr, siginfo_t *info, void *void_context)
+{
+ memcpy(&TRAP_info, info, sizeof(TRAP_info));
+ TRAP_nr = nr;
+}
+
+TEST_F(TRAP, handler)
+{
+ int ret, test;
+ struct sigaction act;
+ sigset_t mask;
+
+ memset(&act, 0, sizeof(act));
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGSYS);
+
+ act.sa_sigaction = &TRAP_action;
+ act.sa_flags = SA_SIGINFO;
+ ret = sigaction(SIGSYS, &act, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("sigaction failed");
+ }
+ ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("sigprocmask failed");
+ }
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+ ASSERT_EQ(0, ret);
+ TRAP_nr = 0;
+ memset(&TRAP_info, 0, sizeof(TRAP_info));
+ /* Expect the registers to be rolled back. (nr = error) may vary
+ * based on arch. */
+ ret = syscall(__NR_getpid);
+ /* Silence gcc warning about volatile. */
+ test = TRAP_nr;
+ EXPECT_EQ(SIGSYS, test);
+ struct local_sigsys {
+ void *_call_addr; /* calling user insn */
+ int _syscall; /* triggering system call number */
+ unsigned int _arch; /* AUDIT_ARCH_* of syscall */
+ } *sigsys = (struct local_sigsys *)
+#ifdef si_syscall
+ &(TRAP_info.si_call_addr);
+#else
+ &TRAP_info.si_pid;
+#endif
+ EXPECT_EQ(__NR_getpid, sigsys->_syscall);
+ /* Make sure arch is non-zero. */
+ EXPECT_NE(0, sigsys->_arch);
+ EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
+}
+
+FIXTURE_DATA(precedence) {
+ struct sock_fprog allow;
+ struct sock_fprog trace;
+ struct sock_fprog error;
+ struct sock_fprog trap;
+ struct sock_fprog kill;
+};
+
+FIXTURE_SETUP(precedence)
+{
+ struct sock_filter allow_insns[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter trace_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
+ };
+ struct sock_filter error_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
+ };
+ struct sock_filter trap_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
+ };
+ struct sock_filter kill_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ };
+
+ memset(self, 0, sizeof(*self));
+#define FILTER_ALLOC(_x) \
+ self->_x.filter = malloc(sizeof(_x##_insns)); \
+ ASSERT_NE(NULL, self->_x.filter); \
+ memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
+ self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
+ FILTER_ALLOC(allow);
+ FILTER_ALLOC(trace);
+ FILTER_ALLOC(error);
+ FILTER_ALLOC(trap);
+ FILTER_ALLOC(kill);
+}
+
+FIXTURE_TEARDOWN(precedence)
+{
+#define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
+ FILTER_FREE(allow);
+ FILTER_FREE(trace);
+ FILTER_FREE(error);
+ FILTER_FREE(trap);
+ FILTER_FREE(kill);
+}
+
+TEST_F(precedence, allow_ok)
+{
+ pid_t parent, res = 0;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ res = syscall(__NR_getppid);
+ EXPECT_EQ(parent, res);
+}
+
+TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
+{
+ pid_t parent, res = 0;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ res = syscall(__NR_getppid);
+ EXPECT_EQ(parent, res);
+ /* getpid() should never return. */
+ res = syscall(__NR_getpid);
+ EXPECT_EQ(0, res);
+}
+
+TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, errno_is_third)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, errno_is_third_in_any_order)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, trace_is_fourth)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* No ptracer */
+ EXPECT_EQ(-1, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, trace_is_fourth_in_any_order)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* No ptracer */
+ EXPECT_EQ(-1, syscall(__NR_getpid));
+}
+
+#ifndef PTRACE_O_TRACESECCOMP
+#define PTRACE_O_TRACESECCOMP 0x00000080
+#endif
+
+/* Catch the Ubuntu 12.04 value error. */
+#if PTRACE_EVENT_SECCOMP != 7
+#undef PTRACE_EVENT_SECCOMP
+#endif
+
+#ifndef PTRACE_EVENT_SECCOMP
+#define PTRACE_EVENT_SECCOMP 7
+#endif
+
+#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
+bool tracer_running;
+void tracer_stop(int sig)
+{
+ tracer_running = false;
+}
+
+typedef void tracer_func_t(struct __test_metadata *_metadata,
+ pid_t tracee, int status, void *args);
+
+void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
+ tracer_func_t tracer_func, void *args)
+{
+ int ret = -1;
+ struct sigaction action = {
+ .sa_handler = tracer_stop,
+ };
+
+ /* Allow external shutdown. */
+ tracer_running = true;
+ ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
+
+ errno = 0;
+ while (ret == -1 && errno != EINVAL)
+ ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
+ ASSERT_EQ(0, ret) {
+ kill(tracee, SIGKILL);
+ }
+ /* Wait for attach stop */
+ wait(NULL);
+
+ ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, PTRACE_O_TRACESECCOMP);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
+ kill(tracee, SIGKILL);
+ }
+ ptrace(PTRACE_CONT, tracee, NULL, 0);
+
+ /* Unblock the tracee */
+ ASSERT_EQ(1, write(fd, "A", 1));
+ ASSERT_EQ(0, close(fd));
+
+ /* Run until we're shut down. Must assert to stop execution. */
+ while (tracer_running) {
+ int status;
+
+ if (wait(&status) != tracee)
+ continue;
+ if (WIFSIGNALED(status) || WIFEXITED(status))
+ /* Child is dead. Time to go. */
+ return;
+
+ /* Make sure this is a seccomp event. */
+ ASSERT_EQ(true, IS_SECCOMP_EVENT(status));
+
+ tracer_func(_metadata, tracee, status, args);
+
+ ret = ptrace(PTRACE_CONT, tracee, NULL, NULL);
+ ASSERT_EQ(0, ret);
+ }
+ /* Directly report the status of our test harness results. */
+ syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+}
+
+/* Common tracer setup/teardown functions. */
+void cont_handler(int num)
+{ }
+pid_t setup_trace_fixture(struct __test_metadata *_metadata,
+ tracer_func_t func, void *args)
+{
+ char sync;
+ int pipefd[2];
+ pid_t tracer_pid;
+ pid_t tracee = getpid();
+
+ /* Setup a pipe for clean synchronization. */
+ ASSERT_EQ(0, pipe(pipefd));
+
+ /* Fork a child which we'll promote to tracer */
+ tracer_pid = fork();
+ ASSERT_LE(0, tracer_pid);
+ signal(SIGALRM, cont_handler);
+ if (tracer_pid == 0) {
+ close(pipefd[0]);
+ tracer(_metadata, pipefd[1], tracee, func, args);
+ syscall(__NR_exit, 0);
+ }
+ close(pipefd[1]);
+ prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
+ read(pipefd[0], &sync, 1);
+ close(pipefd[0]);
+
+ return tracer_pid;
+}
+void teardown_trace_fixture(struct __test_metadata *_metadata,
+ pid_t tracer)
+{
+ if (tracer) {
+ int status;
+ /*
+ * Extract the exit code from the other process and
+ * adopt it for ourselves in case its asserts failed.
+ */
+ ASSERT_EQ(0, kill(tracer, SIGUSR1));
+ ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
+ if (WEXITSTATUS(status))
+ _metadata->passed = 0;
+ }
+}
+
+/* "poke" tracer arguments and function. */
+struct tracer_args_poke_t {
+ unsigned long poke_addr;
+};
+
+void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
+ void *args)
+{
+ int ret;
+ unsigned long msg;
+ struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
+
+ ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
+ EXPECT_EQ(0, ret);
+ /* If this fails, don't try to recover. */
+ ASSERT_EQ(0x1001, msg) {
+ kill(tracee, SIGKILL);
+ }
+ /*
+ * Poke in the message.
+ * Registers are not touched to try to keep this relatively arch
+ * agnostic.
+ */
+ ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
+ EXPECT_EQ(0, ret);
+}
+
+FIXTURE_DATA(TRACE_poke) {
+ struct sock_fprog prog;
+ pid_t tracer;
+ long poked;
+ struct tracer_args_poke_t tracer_args;
+};
+
+FIXTURE_SETUP(TRACE_poke)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ self->poked = 0;
+ memset(&self->prog, 0, sizeof(self->prog));
+ self->prog.filter = malloc(sizeof(filter));
+ ASSERT_NE(NULL, self->prog.filter);
+ memcpy(self->prog.filter, filter, sizeof(filter));
+ self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+
+ /* Set up tracer args. */
+ self->tracer_args.poke_addr = (unsigned long)&self->poked;
+
+ /* Launch tracer. */
+ self->tracer = setup_trace_fixture(_metadata, tracer_poke,
+ &self->tracer_args);
+}
+
+FIXTURE_TEARDOWN(TRACE_poke)
+{
+ teardown_trace_fixture(_metadata, self->tracer);
+ if (self->prog.filter)
+ free(self->prog.filter);
+}
+
+TEST_F(TRACE_poke, read_has_side_effects)
+{
+ ssize_t ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(0, self->poked);
+ ret = read(-1, NULL, 0);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(0x1001, self->poked);
+}
+
+TEST_F(TRACE_poke, getpid_runs_normally)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(0, self->poked);
+ EXPECT_NE(0, syscall(__NR_getpid));
+ EXPECT_EQ(0, self->poked);
+}
+
+#if defined(__x86_64__)
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM orig_rax
+# define SYSCALL_RET rax
+#elif defined(__i386__)
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM orig_eax
+# define SYSCALL_RET eax
+#elif defined(__arm__)
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM ARM_r7
+# define SYSCALL_RET ARM_r0
+#elif defined(__aarch64__)
+# define ARCH_REGS struct user_pt_regs
+# define SYSCALL_NUM regs[8]
+# define SYSCALL_RET regs[0]
+#elif defined(__powerpc__)
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM gpr[0]
+# define SYSCALL_RET gpr[3]
+#else
+# error "Do not know how to find your architecture's registers and syscalls"
+#endif
+
+/* Architecture-specific syscall fetching routine. */
+int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
+{
+ struct iovec iov;
+ ARCH_REGS regs;
+
+ iov.iov_base = &regs;
+ iov.iov_len = sizeof(regs);
+ EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
+ TH_LOG("PTRACE_GETREGSET failed");
+ return -1;
+ }
+
+ return regs.SYSCALL_NUM;
+}
+
+/* Architecture-specific syscall changing routine. */
+void change_syscall(struct __test_metadata *_metadata,
+ pid_t tracee, int syscall)
+{
+ struct iovec iov;
+ int ret;
+ ARCH_REGS regs;
+
+ iov.iov_base = &regs;
+ iov.iov_len = sizeof(regs);
+ ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
+ EXPECT_EQ(0, ret);
+
+#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || defined(__powerpc__)
+ {
+ regs.SYSCALL_NUM = syscall;
+ }
+
+#elif defined(__arm__)
+# ifndef PTRACE_SET_SYSCALL
+# define PTRACE_SET_SYSCALL 23
+# endif
+ {
+ ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
+ EXPECT_EQ(0, ret);
+ }
+
+#else
+ ASSERT_EQ(1, 0) {
+ TH_LOG("How is the syscall changed on this architecture?");
+ }
+#endif
+
+ /* If syscall is skipped, change return value. */
+ if (syscall == -1)
+ regs.SYSCALL_RET = 1;
+
+ ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
+ EXPECT_EQ(0, ret);
+}
+
+void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
+ int status, void *args)
+{
+ int ret;
+ unsigned long msg;
+
+ /* Make sure we got the right message. */
+ ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
+ EXPECT_EQ(0, ret);
+
+ switch (msg) {
+ case 0x1002:
+ /* change getpid to getppid. */
+ change_syscall(_metadata, tracee, __NR_getppid);
+ break;
+ case 0x1003:
+ /* skip gettid. */
+ change_syscall(_metadata, tracee, -1);
+ break;
+ case 0x1004:
+ /* do nothing (allow getppid) */
+ break;
+ default:
+ EXPECT_EQ(0, msg) {
+ TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
+ kill(tracee, SIGKILL);
+ }
+ }
+
+}
+
+FIXTURE_DATA(TRACE_syscall) {
+ struct sock_fprog prog;
+ pid_t tracer, mytid, mypid, parent;
+};
+
+FIXTURE_SETUP(TRACE_syscall)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ memset(&self->prog, 0, sizeof(self->prog));
+ self->prog.filter = malloc(sizeof(filter));
+ ASSERT_NE(NULL, self->prog.filter);
+ memcpy(self->prog.filter, filter, sizeof(filter));
+ self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+
+ /* Prepare some testable syscall results. */
+ self->mytid = syscall(__NR_gettid);
+ ASSERT_GT(self->mytid, 0);
+ ASSERT_NE(self->mytid, 1) {
+ TH_LOG("Running this test as init is not supported. :)");
+ }
+
+ self->mypid = getpid();
+ ASSERT_GT(self->mypid, 0);
+ ASSERT_EQ(self->mytid, self->mypid);
+
+ self->parent = getppid();
+ ASSERT_GT(self->parent, 0);
+ ASSERT_NE(self->parent, self->mypid);
+
+ /* Launch tracer. */
+ self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL);
+}
+
+FIXTURE_TEARDOWN(TRACE_syscall)
+{
+ teardown_trace_fixture(_metadata, self->tracer);
+ if (self->prog.filter)
+ free(self->prog.filter);
+}
+
+TEST_F(TRACE_syscall, syscall_allowed)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* getppid works as expected (no changes). */
+ EXPECT_EQ(self->parent, syscall(__NR_getppid));
+ EXPECT_NE(self->mypid, syscall(__NR_getppid));
+}
+
+TEST_F(TRACE_syscall, syscall_redirected)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* getpid has been redirected to getppid as expected. */
+ EXPECT_EQ(self->parent, syscall(__NR_getpid));
+ EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, syscall_dropped)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* gettid has been skipped and an altered return value stored. */
+ EXPECT_EQ(1, syscall(__NR_gettid));
+ EXPECT_NE(self->mytid, syscall(__NR_gettid));
+}
+
+#ifndef __NR_seccomp
+# if defined(__i386__)
+# define __NR_seccomp 354
+# elif defined(__x86_64__)
+# define __NR_seccomp 317
+# elif defined(__arm__)
+# define __NR_seccomp 383
+# elif defined(__aarch64__)
+# define __NR_seccomp 277
+# elif defined(__powerpc__)
+# define __NR_seccomp 358
+# else
+# warning "seccomp syscall number unknown for this architecture"
+# define __NR_seccomp 0xffff
+# endif
+#endif
+
+#ifndef SECCOMP_SET_MODE_STRICT
+#define SECCOMP_SET_MODE_STRICT 0
+#endif
+
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
+
+#ifndef SECCOMP_FLAG_FILTER_TSYNC
+#define SECCOMP_FLAG_FILTER_TSYNC 1
+#endif
+
+#ifndef seccomp
+int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter)
+{
+ errno = 0;
+ return syscall(__NR_seccomp, op, flags, filter);
+}
+#endif
+
+TEST(seccomp_syscall)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* Reject insane operation. */
+ ret = seccomp(-1, 0, &prog);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject crazy op value!");
+ }
+
+ /* Reject strict with flags or pointer. */
+ ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject mode strict with flags!");
+ }
+ ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject mode strict with uargs!");
+ }
+
+ /* Reject insane args for filter. */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject crazy filter flags!");
+ }
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
+ EXPECT_EQ(EFAULT, errno) {
+ TH_LOG("Did not reject NULL filter!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+ EXPECT_EQ(0, errno) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
+ strerror(errno));
+ }
+}
+
+TEST(seccomp_syscall_mode_lock)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Could not install filter!");
+ }
+
+ /* Make sure neither entry point will switch to strict. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Switched to mode strict!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Switched to mode strict!");
+ }
+}
+
+TEST(TSYNC_first)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ &prog);
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Could not install initial filter with TSYNC!");
+ }
+}
+
+#define TSYNC_SIBLINGS 2
+struct tsync_sibling {
+ pthread_t tid;
+ pid_t system_tid;
+ sem_t *started;
+ pthread_cond_t *cond;
+ pthread_mutex_t *mutex;
+ int diverge;
+ int num_waits;
+ struct sock_fprog *prog;
+ struct __test_metadata *metadata;
+};
+
+FIXTURE_DATA(TSYNC) {
+ struct sock_fprog root_prog, apply_prog;
+ struct tsync_sibling sibling[TSYNC_SIBLINGS];
+ sem_t started;
+ pthread_cond_t cond;
+ pthread_mutex_t mutex;
+ int sibling_count;
+};
+
+FIXTURE_SETUP(TSYNC)
+{
+ struct sock_filter root_filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter apply_filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ memset(&self->root_prog, 0, sizeof(self->root_prog));
+ memset(&self->apply_prog, 0, sizeof(self->apply_prog));
+ memset(&self->sibling, 0, sizeof(self->sibling));
+ self->root_prog.filter = malloc(sizeof(root_filter));
+ ASSERT_NE(NULL, self->root_prog.filter);
+ memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
+ self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
+
+ self->apply_prog.filter = malloc(sizeof(apply_filter));
+ ASSERT_NE(NULL, self->apply_prog.filter);
+ memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
+ self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
+
+ self->sibling_count = 0;
+ pthread_mutex_init(&self->mutex, NULL);
+ pthread_cond_init(&self->cond, NULL);
+ sem_init(&self->started, 0, 0);
+ self->sibling[0].tid = 0;
+ self->sibling[0].cond = &self->cond;
+ self->sibling[0].started = &self->started;
+ self->sibling[0].mutex = &self->mutex;
+ self->sibling[0].diverge = 0;
+ self->sibling[0].num_waits = 1;
+ self->sibling[0].prog = &self->root_prog;
+ self->sibling[0].metadata = _metadata;
+ self->sibling[1].tid = 0;
+ self->sibling[1].cond = &self->cond;
+ self->sibling[1].started = &self->started;
+ self->sibling[1].mutex = &self->mutex;
+ self->sibling[1].diverge = 0;
+ self->sibling[1].prog = &self->root_prog;
+ self->sibling[1].num_waits = 1;
+ self->sibling[1].metadata = _metadata;
+}
+
+FIXTURE_TEARDOWN(TSYNC)
+{
+ int sib = 0;
+
+ if (self->root_prog.filter)
+ free(self->root_prog.filter);
+ if (self->apply_prog.filter)
+ free(self->apply_prog.filter);
+
+ for ( ; sib < self->sibling_count; ++sib) {
+ struct tsync_sibling *s = &self->sibling[sib];
+ void *status;
+
+ if (!s->tid)
+ continue;
+ if (pthread_kill(s->tid, 0)) {
+ pthread_cancel(s->tid);
+ pthread_join(s->tid, &status);
+ }
+ }
+ pthread_mutex_destroy(&self->mutex);
+ pthread_cond_destroy(&self->cond);
+ sem_destroy(&self->started);
+}
+
+void *tsync_sibling(void *data)
+{
+ long ret = 0;
+ struct tsync_sibling *me = data;
+
+ me->system_tid = syscall(__NR_gettid);
+
+ pthread_mutex_lock(me->mutex);
+ if (me->diverge) {
+ /* Just re-apply the root prog to fork the tree */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
+ me->prog, 0, 0);
+ }
+ sem_post(me->started);
+ /* Return outside of started so parent notices failures. */
+ if (ret) {
+ pthread_mutex_unlock(me->mutex);
+ return (void *)SIBLING_EXIT_FAILURE;
+ }
+ do {
+ pthread_cond_wait(me->cond, me->mutex);
+ me->num_waits = me->num_waits - 1;
+ } while (me->num_waits);
+ pthread_mutex_unlock(me->mutex);
+
+ ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
+ if (!ret)
+ return (void *)SIBLING_EXIT_NEWPRIVS;
+ read(0, NULL, 0);
+ return (void *)SIBLING_EXIT_UNKILLED;
+}
+
+void tsync_start_sibling(struct tsync_sibling *sibling)
+{
+ pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
+}
+
+TEST_F(TSYNC, siblings_fail_prctl)
+{
+ long ret;
+ void *status;
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* Check prctl failure detection by requesting sib 0 diverge. */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("setting filter failed");
+ }
+
+ self->sibling[0].diverge = 1;
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ /* Signal the threads to clean up*/
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure diverging sibling failed to call prctl. */
+ pthread_join(self->sibling[0].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
+ pthread_join(self->sibling[1].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_ancestor)
+{
+ long ret;
+ void *status;
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+ }
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Could install filter on all threads!");
+ }
+ /* Tell the siblings to test the policy */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+ /* Ensure they are both killed and don't exit cleanly. */
+ pthread_join(self->sibling[0].tid, &status);
+ EXPECT_EQ(0x0, (long)status);
+ pthread_join(self->sibling[1].tid, &status);
+ EXPECT_EQ(0x0, (long)status);
+}
+
+TEST_F(TSYNC, two_sibling_want_nnp)
+{
+ void *status;
+
+ /* start siblings before any prctl() operations */
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ /* Tell the siblings to test no policy */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure they are both upset about lacking nnp. */
+ pthread_join(self->sibling[0].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
+ pthread_join(self->sibling[1].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_no_filter)
+{
+ long ret;
+ void *status;
+
+ /* start siblings before any prctl() operations */
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Could install filter on all threads!");
+ }
+
+ /* Tell the siblings to test the policy */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure they are both killed and don't exit cleanly. */
+ pthread_join(self->sibling[0].tid, &status);
+ EXPECT_EQ(0x0, (long)status);
+ pthread_join(self->sibling[1].tid, &status);
+ EXPECT_EQ(0x0, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_one_divergence)
+{
+ long ret;
+ void *status;
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+ }
+ self->sibling[0].diverge = 1;
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(self->sibling[0].system_tid, ret) {
+ TH_LOG("Did not fail on diverged sibling.");
+ }
+
+ /* Wake the threads */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure they are both unkilled. */
+ pthread_join(self->sibling[0].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+ pthread_join(self->sibling[1].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_not_under_filter)
+{
+ long ret, sib;
+ void *status;
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /*
+ * Sibling 0 will have its own seccomp policy
+ * and Sibling 1 will not be under seccomp at
+ * all. Sibling 1 will enter seccomp and 0
+ * will cause failure.
+ */
+ self->sibling[0].diverge = 1;
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(ret, self->sibling[0].system_tid) {
+ TH_LOG("Did not fail on diverged sibling.");
+ }
+ sib = 1;
+ if (ret == self->sibling[0].system_tid)
+ sib = 0;
+
+ pthread_mutex_lock(&self->mutex);
+
+ /* Increment the other siblings num_waits so we can clean up
+ * the one we just saw.
+ */
+ self->sibling[!sib].num_waits += 1;
+
+ /* Signal the thread to clean up*/
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+ pthread_join(self->sibling[sib].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+ /* Poll for actual task death. pthread_join doesn't guarantee it. */
+ while (!kill(self->sibling[sib].system_tid, 0))
+ sleep(0.1);
+ /* Switch to the remaining sibling */
+ sib = !sib;
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Expected the remaining sibling to sync");
+ };
+
+ pthread_mutex_lock(&self->mutex);
+
+ /* If remaining sibling didn't have a chance to wake up during
+ * the first broadcast, manually reduce the num_waits now.
+ */
+ if (self->sibling[sib].num_waits > 1)
+ self->sibling[sib].num_waits = 1;
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+ pthread_join(self->sibling[sib].tid, &status);
+ EXPECT_EQ(0, (long)status);
+ /* Poll for actual task death. pthread_join doesn't guarantee it. */
+ while (!kill(self->sibling[sib].system_tid, 0))
+ sleep(0.1);
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(0, ret); /* just us chickens */
+}
+
+/* Make sure restarted syscalls are seen directly as "restart_syscall". */
+TEST(syscall_restart)
+{
+ long ret;
+ unsigned long msg;
+ pid_t child_pid;
+ int pipefd[2];
+ int status;
+ siginfo_t info = { };
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+
+#ifdef __NR_sigreturn
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
+#endif
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_poll, 4, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
+
+ /* Allow __NR_write for easy logging. */
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), /* poll */
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), /* restart */
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, pipe(pipefd));
+
+ child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (child_pid == 0) {
+ /* Child uses EXPECT not ASSERT to deliver status correctly. */
+ char buf = ' ';
+ struct pollfd fds = {
+ .fd = pipefd[0],
+ .events = POLLIN,
+ };
+
+ /* Attach parent as tracer and stop. */
+ EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
+ EXPECT_EQ(0, raise(SIGSTOP));
+
+ EXPECT_EQ(0, close(pipefd[1]));
+
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Failed to install filter!");
+ }
+
+ EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
+ TH_LOG("Failed to read() sync from parent");
+ }
+ EXPECT_EQ('.', buf) {
+ TH_LOG("Failed to get sync data from read()");
+ }
+
+ /* Start poll to be interrupted. */
+ errno = 0;
+ EXPECT_EQ(1, poll(&fds, 1, -1)) {
+ TH_LOG("Call to poll() failed (errno %d)", errno);
+ }
+
+ /* Read final sync from parent. */
+ EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
+ TH_LOG("Failed final read() from parent");
+ }
+ EXPECT_EQ('!', buf) {
+ TH_LOG("Failed to get final data from read()");
+ }
+
+ /* Directly report the status of our test harness results. */
+ syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
+ : EXIT_FAILURE);
+ }
+ EXPECT_EQ(0, close(pipefd[0]));
+
+ /* Attach to child, setup options, and release. */
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
+ PTRACE_O_TRACESECCOMP));
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+ ASSERT_EQ(1, write(pipefd[1], ".", 1));
+
+ /* Wait for poll() to start. */
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
+ ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
+ ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
+ ASSERT_EQ(0x100, msg);
+ EXPECT_EQ(__NR_poll, get_syscall(_metadata, child_pid));
+
+ /* Might as well check siginfo for sanity while we're here. */
+ ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
+ ASSERT_EQ(SIGTRAP, info.si_signo);
+ ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
+ EXPECT_EQ(0, info.si_errno);
+ EXPECT_EQ(getuid(), info.si_uid);
+ /* Verify signal delivery came from child (seccomp-triggered). */
+ EXPECT_EQ(child_pid, info.si_pid);
+
+ /* Interrupt poll with SIGSTOP (which we'll need to handle). */
+ ASSERT_EQ(0, kill(child_pid, SIGSTOP));
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
+ /* Verify signal delivery came from parent now. */
+ ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
+ EXPECT_EQ(getpid(), info.si_pid);
+
+ /* Restart poll with SIGCONT, which triggers restart_syscall. */
+ ASSERT_EQ(0, kill(child_pid, SIGCONT));
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(SIGCONT, WSTOPSIG(status));
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+
+ /* Wait for restart_syscall() to start. */
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
+ ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
+ ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
+ ASSERT_EQ(0x200, msg);
+ ret = get_syscall(_metadata, child_pid);
+#if defined(__arm__)
+ /* FIXME: ARM does not expose true syscall in registers. */
+ EXPECT_EQ(__NR_poll, ret);
+#else
+ EXPECT_EQ(__NR_restart_syscall, ret);
+#endif
+
+ /* Write again to end poll. */
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+ ASSERT_EQ(1, write(pipefd[1], "!", 1));
+ EXPECT_EQ(0, close(pipefd[1]));
+
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ if (WIFSIGNALED(status) || WEXITSTATUS(status))
+ _metadata->passed = 0;
+}
+
+/*
+ * TODO:
+ * - add microbenchmarks
+ * - expand NNP testing
+ * - better arch-specific TRACE and TRAP handlers.
+ * - endianness checking when appropriate
+ * - 64-bit arg prodding
+ * - arch value testing (x86 modes especially)
+ * - ...
+ */
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/seccomp/test_harness.h b/tools/testing/selftests/seccomp/test_harness.h
new file mode 100644
index 0000000..977a6af
--- /dev/null
+++ b/tools/testing/selftests/seccomp/test_harness.h
@@ -0,0 +1,537 @@
+/*
+ * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by the GPLv2 license.
+ *
+ * test_harness.h: simple C unit test helper.
+ *
+ * Usage:
+ * #include "test_harness.h"
+ * TEST(standalone_test) {
+ * do_some_stuff;
+ * EXPECT_GT(10, stuff) {
+ * stuff_state_t state;
+ * enumerate_stuff_state(&state);
+ * TH_LOG("expectation failed with state: %s", state.msg);
+ * }
+ * more_stuff;
+ * ASSERT_NE(some_stuff, NULL) TH_LOG("how did it happen?!");
+ * last_stuff;
+ * EXPECT_EQ(0, last_stuff);
+ * }
+ *
+ * FIXTURE(my_fixture) {
+ * mytype_t *data;
+ * int awesomeness_level;
+ * };
+ * FIXTURE_SETUP(my_fixture) {
+ * self->data = mytype_new();
+ * ASSERT_NE(NULL, self->data);
+ * }
+ * FIXTURE_TEARDOWN(my_fixture) {
+ * mytype_free(self->data);
+ * }
+ * TEST_F(my_fixture, data_is_good) {
+ * EXPECT_EQ(1, is_my_data_good(self->data));
+ * }
+ *
+ * TEST_HARNESS_MAIN
+ *
+ * API inspired by code.google.com/p/googletest
+ */
+#ifndef TEST_HARNESS_H_
+#define TEST_HARNESS_H_
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+/* All exported functionality should be declared through this macro. */
+#define TEST_API(x) _##x
+
+/*
+ * Exported APIs
+ */
+
+/* TEST(name) { implementation }
+ * Defines a test by name.
+ * Names must be unique and tests must not be run in parallel. The
+ * implementation containing block is a function and scoping should be treated
+ * as such. Returning early may be performed with a bare "return;" statement.
+ *
+ * EXPECT_* and ASSERT_* are valid in a TEST() { } context.
+ */
+#define TEST TEST_API(TEST)
+
+/* TEST_SIGNAL(name, signal) { implementation }
+ * Defines a test by name and the expected term signal.
+ * Names must be unique and tests must not be run in parallel. The
+ * implementation containing block is a function and scoping should be treated
+ * as such. Returning early may be performed with a bare "return;" statement.
+ *
+ * EXPECT_* and ASSERT_* are valid in a TEST() { } context.
+ */
+#define TEST_SIGNAL TEST_API(TEST_SIGNAL)
+
+/* FIXTURE(datatype name) {
+ * type property1;
+ * ...
+ * };
+ * Defines the data provided to TEST_F()-defined tests as |self|. It should be
+ * populated and cleaned up using FIXTURE_SETUP and FIXTURE_TEARDOWN.
+ */
+#define FIXTURE TEST_API(FIXTURE)
+
+/* FIXTURE_DATA(datatype name)
+ * This call may be used when the type of the fixture data
+ * is needed. In general, this should not be needed unless
+ * the |self| is being passed to a helper directly.
+ */
+#define FIXTURE_DATA TEST_API(FIXTURE_DATA)
+
+/* FIXTURE_SETUP(fixture name) { implementation }
+ * Populates the required "setup" function for a fixture. An instance of the
+ * datatype defined with _FIXTURE_DATA will be exposed as |self| for the
+ * implementation.
+ *
+ * ASSERT_* are valid for use in this context and will prempt the execution
+ * of any dependent fixture tests.
+ *
+ * A bare "return;" statement may be used to return early.
+ */
+#define FIXTURE_SETUP TEST_API(FIXTURE_SETUP)
+
+/* FIXTURE_TEARDOWN(fixture name) { implementation }
+ * Populates the required "teardown" function for a fixture. An instance of the
+ * datatype defined with _FIXTURE_DATA will be exposed as |self| for the
+ * implementation to clean up.
+ *
+ * A bare "return;" statement may be used to return early.
+ */
+#define FIXTURE_TEARDOWN TEST_API(FIXTURE_TEARDOWN)
+
+/* TEST_F(fixture, name) { implementation }
+ * Defines a test that depends on a fixture (e.g., is part of a test case).
+ * Very similar to TEST() except that |self| is the setup instance of fixture's
+ * datatype exposed for use by the implementation.
+ */
+#define TEST_F TEST_API(TEST_F)
+
+#define TEST_F_SIGNAL TEST_API(TEST_F_SIGNAL)
+
+/* Use once to append a main() to the test file. E.g.,
+ * TEST_HARNESS_MAIN
+ */
+#define TEST_HARNESS_MAIN TEST_API(TEST_HARNESS_MAIN)
+
+/*
+ * Operators for use in TEST and TEST_F.
+ * ASSERT_* calls will stop test execution immediately.
+ * EXPECT_* calls will emit a failure warning, note it, and continue.
+ */
+
+/* ASSERT_EQ(expected, measured): expected == measured */
+#define ASSERT_EQ TEST_API(ASSERT_EQ)
+/* ASSERT_NE(expected, measured): expected != measured */
+#define ASSERT_NE TEST_API(ASSERT_NE)
+/* ASSERT_LT(expected, measured): expected < measured */
+#define ASSERT_LT TEST_API(ASSERT_LT)
+/* ASSERT_LE(expected, measured): expected <= measured */
+#define ASSERT_LE TEST_API(ASSERT_LE)
+/* ASSERT_GT(expected, measured): expected > measured */
+#define ASSERT_GT TEST_API(ASSERT_GT)
+/* ASSERT_GE(expected, measured): expected >= measured */
+#define ASSERT_GE TEST_API(ASSERT_GE)
+/* ASSERT_NULL(measured): NULL == measured */
+#define ASSERT_NULL TEST_API(ASSERT_NULL)
+/* ASSERT_TRUE(measured): measured != 0 */
+#define ASSERT_TRUE TEST_API(ASSERT_TRUE)
+/* ASSERT_FALSE(measured): measured == 0 */
+#define ASSERT_FALSE TEST_API(ASSERT_FALSE)
+/* ASSERT_STREQ(expected, measured): !strcmp(expected, measured) */
+#define ASSERT_STREQ TEST_API(ASSERT_STREQ)
+/* ASSERT_STRNE(expected, measured): strcmp(expected, measured) */
+#define ASSERT_STRNE TEST_API(ASSERT_STRNE)
+/* EXPECT_EQ(expected, measured): expected == measured */
+#define EXPECT_EQ TEST_API(EXPECT_EQ)
+/* EXPECT_NE(expected, measured): expected != measured */
+#define EXPECT_NE TEST_API(EXPECT_NE)
+/* EXPECT_LT(expected, measured): expected < measured */
+#define EXPECT_LT TEST_API(EXPECT_LT)
+/* EXPECT_LE(expected, measured): expected <= measured */
+#define EXPECT_LE TEST_API(EXPECT_LE)
+/* EXPECT_GT(expected, measured): expected > measured */
+#define EXPECT_GT TEST_API(EXPECT_GT)
+/* EXPECT_GE(expected, measured): expected >= measured */
+#define EXPECT_GE TEST_API(EXPECT_GE)
+/* EXPECT_NULL(measured): NULL == measured */
+#define EXPECT_NULL TEST_API(EXPECT_NULL)
+/* EXPECT_TRUE(measured): 0 != measured */
+#define EXPECT_TRUE TEST_API(EXPECT_TRUE)
+/* EXPECT_FALSE(measured): 0 == measured */
+#define EXPECT_FALSE TEST_API(EXPECT_FALSE)
+/* EXPECT_STREQ(expected, measured): !strcmp(expected, measured) */
+#define EXPECT_STREQ TEST_API(EXPECT_STREQ)
+/* EXPECT_STRNE(expected, measured): strcmp(expected, measured) */
+#define EXPECT_STRNE TEST_API(EXPECT_STRNE)
+
+/* TH_LOG(format, ...)
+ * Optional debug logging function available for use in tests.
+ * Logging may be enabled or disabled by defining TH_LOG_ENABLED.
+ * E.g., #define TH_LOG_ENABLED 1
+ * If no definition is provided, logging is enabled by default.
+ */
+#define TH_LOG TEST_API(TH_LOG)
+
+/*
+ * Internal implementation.
+ *
+ */
+
+/* Utilities exposed to the test definitions */
+#ifndef TH_LOG_STREAM
+# define TH_LOG_STREAM stderr
+#endif
+
+#ifndef TH_LOG_ENABLED
+# define TH_LOG_ENABLED 1
+#endif
+
+#define _TH_LOG(fmt, ...) do { \
+ if (TH_LOG_ENABLED) \
+ __TH_LOG(fmt, ##__VA_ARGS__); \
+} while (0)
+
+/* Unconditional logger for internal use. */
+#define __TH_LOG(fmt, ...) \
+ fprintf(TH_LOG_STREAM, "%s:%d:%s:" fmt "\n", \
+ __FILE__, __LINE__, _metadata->name, ##__VA_ARGS__)
+
+/* Defines the test function and creates the registration stub. */
+#define _TEST(test_name) __TEST_IMPL(test_name, -1)
+
+#define _TEST_SIGNAL(test_name, signal) __TEST_IMPL(test_name, signal)
+
+#define __TEST_IMPL(test_name, _signal) \
+ static void test_name(struct __test_metadata *_metadata); \
+ static struct __test_metadata _##test_name##_object = \
+ { name: "global." #test_name, \
+ fn: &test_name, termsig: _signal }; \
+ static void __attribute__((constructor)) _register_##test_name(void) \
+ { \
+ __register_test(&_##test_name##_object); \
+ } \
+ static void test_name( \
+ struct __test_metadata __attribute__((unused)) *_metadata)
+
+/* Wraps the struct name so we have one less argument to pass around. */
+#define _FIXTURE_DATA(fixture_name) struct _test_data_##fixture_name
+
+/* Called once per fixture to setup the data and register. */
+#define _FIXTURE(fixture_name) \
+ static void __attribute__((constructor)) \
+ _register_##fixture_name##_data(void) \
+ { \
+ __fixture_count++; \
+ } \
+ _FIXTURE_DATA(fixture_name)
+
+/* Prepares the setup function for the fixture. |_metadata| is included
+ * so that ASSERT_* work as a convenience.
+ */
+#define _FIXTURE_SETUP(fixture_name) \
+ void fixture_name##_setup( \
+ struct __test_metadata __attribute__((unused)) *_metadata, \
+ _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+#define _FIXTURE_TEARDOWN(fixture_name) \
+ void fixture_name##_teardown( \
+ struct __test_metadata __attribute__((unused)) *_metadata, \
+ _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+
+/* Emits test registration and helpers for fixture-based test
+ * cases.
+ * TODO(wad) register fixtures on dedicated test lists.
+ */
+#define _TEST_F(fixture_name, test_name) \
+ __TEST_F_IMPL(fixture_name, test_name, -1)
+
+#define _TEST_F_SIGNAL(fixture_name, test_name, signal) \
+ __TEST_F_IMPL(fixture_name, test_name, signal)
+
+#define __TEST_F_IMPL(fixture_name, test_name, signal) \
+ static void fixture_name##_##test_name( \
+ struct __test_metadata *_metadata, \
+ _FIXTURE_DATA(fixture_name) *self); \
+ static inline void wrapper_##fixture_name##_##test_name( \
+ struct __test_metadata *_metadata) \
+ { \
+ /* fixture data is alloced, setup, and torn down per call. */ \
+ _FIXTURE_DATA(fixture_name) self; \
+ memset(&self, 0, sizeof(_FIXTURE_DATA(fixture_name))); \
+ fixture_name##_setup(_metadata, &self); \
+ /* Let setup failure terminate early. */ \
+ if (!_metadata->passed) \
+ return; \
+ fixture_name##_##test_name(_metadata, &self); \
+ fixture_name##_teardown(_metadata, &self); \
+ } \
+ static struct __test_metadata \
+ _##fixture_name##_##test_name##_object = { \
+ name: #fixture_name "." #test_name, \
+ fn: &wrapper_##fixture_name##_##test_name, \
+ termsig: signal, \
+ }; \
+ static void __attribute__((constructor)) \
+ _register_##fixture_name##_##test_name(void) \
+ { \
+ __register_test(&_##fixture_name##_##test_name##_object); \
+ } \
+ static void fixture_name##_##test_name( \
+ struct __test_metadata __attribute__((unused)) *_metadata, \
+ _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+
+/* Exports a simple wrapper to run the test harness. */
+#define _TEST_HARNESS_MAIN \
+ static void __attribute__((constructor)) \
+ __constructor_order_last(void) \
+ { \
+ if (!__constructor_order) \
+ __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; \
+ } \
+ int main(int argc, char **argv) { \
+ return test_harness_run(argc, argv); \
+ }
+
+#define _ASSERT_EQ(_expected, _seen) \
+ __EXPECT(_expected, _seen, ==, 1)
+#define _ASSERT_NE(_expected, _seen) \
+ __EXPECT(_expected, _seen, !=, 1)
+#define _ASSERT_LT(_expected, _seen) \
+ __EXPECT(_expected, _seen, <, 1)
+#define _ASSERT_LE(_expected, _seen) \
+ __EXPECT(_expected, _seen, <=, 1)
+#define _ASSERT_GT(_expected, _seen) \
+ __EXPECT(_expected, _seen, >, 1)
+#define _ASSERT_GE(_expected, _seen) \
+ __EXPECT(_expected, _seen, >=, 1)
+#define _ASSERT_NULL(_seen) \
+ __EXPECT(NULL, _seen, ==, 1)
+
+#define _ASSERT_TRUE(_seen) \
+ _ASSERT_NE(0, _seen)
+#define _ASSERT_FALSE(_seen) \
+ _ASSERT_EQ(0, _seen)
+#define _ASSERT_STREQ(_expected, _seen) \
+ __EXPECT_STR(_expected, _seen, ==, 1)
+#define _ASSERT_STRNE(_expected, _seen) \
+ __EXPECT_STR(_expected, _seen, !=, 1)
+
+#define _EXPECT_EQ(_expected, _seen) \
+ __EXPECT(_expected, _seen, ==, 0)
+#define _EXPECT_NE(_expected, _seen) \
+ __EXPECT(_expected, _seen, !=, 0)
+#define _EXPECT_LT(_expected, _seen) \
+ __EXPECT(_expected, _seen, <, 0)
+#define _EXPECT_LE(_expected, _seen) \
+ __EXPECT(_expected, _seen, <=, 0)
+#define _EXPECT_GT(_expected, _seen) \
+ __EXPECT(_expected, _seen, >, 0)
+#define _EXPECT_GE(_expected, _seen) \
+ __EXPECT(_expected, _seen, >=, 0)
+
+#define _EXPECT_NULL(_seen) \
+ __EXPECT(NULL, _seen, ==, 0)
+#define _EXPECT_TRUE(_seen) \
+ _EXPECT_NE(0, _seen)
+#define _EXPECT_FALSE(_seen) \
+ _EXPECT_EQ(0, _seen)
+
+#define _EXPECT_STREQ(_expected, _seen) \
+ __EXPECT_STR(_expected, _seen, ==, 0)
+#define _EXPECT_STRNE(_expected, _seen) \
+ __EXPECT_STR(_expected, _seen, !=, 0)
+
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+
+/* Support an optional handler after and ASSERT_* or EXPECT_*. The approach is
+ * not thread-safe, but it should be fine in most sane test scenarios.
+ *
+ * Using __bail(), which optionally abort()s, is the easiest way to early
+ * return while still providing an optional block to the API consumer.
+ */
+#define OPTIONAL_HANDLER(_assert) \
+ for (; _metadata->trigger; _metadata->trigger = __bail(_assert))
+
+#define __EXPECT(_expected, _seen, _t, _assert) do { \
+ /* Avoid multiple evaluation of the cases */ \
+ __typeof__(_expected) __exp = (_expected); \
+ __typeof__(_seen) __seen = (_seen); \
+ if (!(__exp _t __seen)) { \
+ unsigned long long __exp_print = 0; \
+ unsigned long long __seen_print = 0; \
+ /* Avoid casting complaints the scariest way we can. */ \
+ memcpy(&__exp_print, &__exp, sizeof(__exp)); \
+ memcpy(&__seen_print, &__seen, sizeof(__seen)); \
+ __TH_LOG("Expected %s (%llu) %s %s (%llu)", \
+ #_expected, __exp_print, #_t, \
+ #_seen, __seen_print); \
+ _metadata->passed = 0; \
+ /* Ensure the optional handler is triggered */ \
+ _metadata->trigger = 1; \
+ } \
+} while (0); OPTIONAL_HANDLER(_assert)
+
+#define __EXPECT_STR(_expected, _seen, _t, _assert) do { \
+ const char *__exp = (_expected); \
+ const char *__seen = (_seen); \
+ if (!(strcmp(__exp, __seen) _t 0)) { \
+ __TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \
+ _metadata->passed = 0; \
+ _metadata->trigger = 1; \
+ } \
+} while (0); OPTIONAL_HANDLER(_assert)
+
+/* Contains all the information for test execution and status checking. */
+struct __test_metadata {
+ const char *name;
+ void (*fn)(struct __test_metadata *);
+ int termsig;
+ int passed;
+ int trigger; /* extra handler after the evaluation */
+ struct __test_metadata *prev, *next;
+};
+
+/* Storage for the (global) tests to be run. */
+static struct __test_metadata *__test_list;
+static unsigned int __test_count;
+static unsigned int __fixture_count;
+static int __constructor_order;
+
+#define _CONSTRUCTOR_ORDER_FORWARD 1
+#define _CONSTRUCTOR_ORDER_BACKWARD -1
+
+/*
+ * Since constructors are called in reverse order, reverse the test
+ * list so tests are run in source declaration order.
+ * https://gcc.gnu.org/onlinedocs/gccint/Initialization.html
+ * However, it seems not all toolchains do this correctly, so use
+ * __constructor_order to detect which direction is called first
+ * and adjust list building logic to get things running in the right
+ * direction.
+ */
+static inline void __register_test(struct __test_metadata *t)
+{
+ __test_count++;
+ /* Circular linked list where only prev is circular. */
+ if (__test_list == NULL) {
+ __test_list = t;
+ t->next = NULL;
+ t->prev = t;
+ return;
+ }
+ if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) {
+ t->next = NULL;
+ t->prev = __test_list->prev;
+ t->prev->next = t;
+ __test_list->prev = t;
+ } else {
+ t->next = __test_list;
+ t->next->prev = t;
+ t->prev = t;
+ __test_list = t;
+ }
+}
+
+static inline int __bail(int for_realz)
+{
+ if (for_realz)
+ abort();
+ return 0;
+}
+
+void __run_test(struct __test_metadata *t)
+{
+ pid_t child_pid;
+ int status;
+
+ t->passed = 1;
+ t->trigger = 0;
+ printf("[ RUN ] %s\n", t->name);
+ child_pid = fork();
+ if (child_pid < 0) {
+ printf("ERROR SPAWNING TEST CHILD\n");
+ t->passed = 0;
+ } else if (child_pid == 0) {
+ t->fn(t);
+ _exit(t->passed);
+ } else {
+ /* TODO(wad) add timeout support. */
+ waitpid(child_pid, &status, 0);
+ if (WIFEXITED(status)) {
+ t->passed = t->termsig == -1 ? WEXITSTATUS(status) : 0;
+ if (t->termsig != -1) {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test exited normally "
+ "instead of by signal (code: %d)\n",
+ t->name,
+ WEXITSTATUS(status));
+ }
+ } else if (WIFSIGNALED(status)) {
+ t->passed = 0;
+ if (WTERMSIG(status) == SIGABRT) {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test terminated by assertion\n",
+ t->name);
+ } else if (WTERMSIG(status) == t->termsig) {
+ t->passed = 1;
+ } else {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test terminated unexpectedly "
+ "by signal %d\n",
+ t->name,
+ WTERMSIG(status));
+ }
+ } else {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test ended in some other way [%u]\n",
+ t->name,
+ status);
+ }
+ }
+ printf("[ %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name);
+}
+
+static int test_harness_run(int __attribute__((unused)) argc,
+ char __attribute__((unused)) **argv)
+{
+ struct __test_metadata *t;
+ int ret = 0;
+ unsigned int count = 0;
+ unsigned int pass_count = 0;
+
+ /* TODO(wad) add optional arguments similar to gtest. */
+ printf("[==========] Running %u tests from %u test cases.\n",
+ __test_count, __fixture_count + 1);
+ for (t = __test_list; t; t = t->next) {
+ count++;
+ __run_test(t);
+ if (t->passed)
+ pass_count++;
+ else
+ ret = 1;
+ }
+ printf("[==========] %u / %u tests passed.\n", pass_count, count);
+ printf("[ %s ]\n", (ret ? "FAILED" : "PASSED"));
+ return ret;
+}
+
+static void __attribute__((constructor)) __constructor_order_first(void)
+{
+ if (!__constructor_order)
+ __constructor_order = _CONSTRUCTOR_ORDER_FORWARD;
+}
+
+#endif /* TEST_HARNESS_H_ */
diff --git a/tools/testing/selftests/static_keys/Makefile b/tools/testing/selftests/static_keys/Makefile
new file mode 100644
index 0000000..9cdadf3
--- /dev/null
+++ b/tools/testing/selftests/static_keys/Makefile
@@ -0,0 +1,8 @@
+# Makefile for static keys selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := test_static_keys.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh
new file mode 100644
index 0000000..1261e3f
--- /dev/null
+++ b/tools/testing/selftests/static_keys/test_static_keys.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# Runs static keys kernel module tests
+
+if /sbin/modprobe -q test_static_key_base; then
+ if /sbin/modprobe -q test_static_keys; then
+ echo "static_key: ok"
+ /sbin/modprobe -q -r test_static_keys
+ /sbin/modprobe -q -r test_static_key_base
+ else
+ echo "static_keys: [FAIL]"
+ /sbin/modprobe -q -r test_static_key_base
+ fi
+else
+ echo "static_key: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore
new file mode 100644
index 0000000..ced9981
--- /dev/null
+++ b/tools/testing/selftests/timers/.gitignore
@@ -0,0 +1,18 @@
+alarmtimer-suspend
+change_skew
+clocksource-switch
+inconsistency-check
+leap-a-day
+leapcrash
+mqueue-lat
+nanosleep
+nsleep-lat
+posix_timers
+raw_skew
+rtctest
+set-2038
+set-tai
+set-timer-lat
+skew_consistency
+threadtest
+valid-adjtimex
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
index aaffbde..72cacf5 100644
--- a/tools/testing/selftests/timers/alarmtimer-suspend.c
+++ b/tools/testing/selftests/timers/alarmtimer-suspend.c
@@ -57,7 +57,7 @@ static inline int ksft_exit_fail(void)
#define NSEC_PER_SEC 1000000000ULL
-#define UNREASONABLE_LAT (NSEC_PER_SEC * 4) /* hopefully we resume in 4secs */
+#define UNREASONABLE_LAT (NSEC_PER_SEC * 5) /* hopefully we resume in 5 secs */
#define SUSPEND_SECS 15
int alarmcount;
@@ -152,7 +152,11 @@ int main(void)
alarm_clock_id++) {
alarmcount = 0;
- timer_create(alarm_clock_id, &se, &tm1);
+ if (timer_create(alarm_clock_id, &se, &tm1) == -1) {
+ printf("timer_create failled, %s unspported?\n",
+ clockstring(alarm_clock_id));
+ break;
+ }
clock_gettime(alarm_clock_id, &start_time);
printf("Start time (%s): %ld:%ld\n", clockstring(alarm_clock_id),
@@ -172,7 +176,7 @@ int main(void)
while (alarmcount < 10) {
int ret;
- sleep(1);
+ sleep(3);
ret = system("echo mem > /sys/power/state");
if (ret)
break;
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
index b8272e6..fb46ad6 100644
--- a/tools/testing/selftests/timers/leap-a-day.c
+++ b/tools/testing/selftests/timers/leap-a-day.c
@@ -44,6 +44,7 @@
#include <time.h>
#include <sys/time.h>
#include <sys/timex.h>
+#include <sys/errno.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
@@ -63,6 +64,9 @@ static inline int ksft_exit_fail(void)
#define NSEC_PER_SEC 1000000000ULL
#define CLOCK_TAI 11
+time_t next_leap;
+int error_found;
+
/* returns 1 if a <= b, 0 otherwise */
static inline int in_order(struct timespec a, struct timespec b)
{
@@ -134,6 +138,35 @@ void handler(int unused)
exit(0);
}
+void sigalarm(int signo)
+{
+ struct timex tx;
+ int ret;
+
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+
+ if (tx.time.tv_sec < next_leap) {
+ printf("Error: Early timer expiration! (Should be %ld)\n", next_leap);
+ error_found = 1;
+ printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n",
+ tx.time.tv_sec,
+ tx.time.tv_usec,
+ tx.tai,
+ time_state_str(ret));
+ }
+ if (ret != TIME_WAIT) {
+ printf("Error: Timer seeing incorrect NTP state? (Should be TIME_WAIT)\n");
+ error_found = 1;
+ printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n",
+ tx.time.tv_sec,
+ tx.time.tv_usec,
+ tx.tai,
+ time_state_str(ret));
+ }
+}
+
+
/* Test for known hrtimer failure */
void test_hrtimer_failure(void)
{
@@ -144,12 +177,19 @@ void test_hrtimer_failure(void)
clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL);
clock_gettime(CLOCK_REALTIME, &now);
- if (!in_order(target, now))
+ if (!in_order(target, now)) {
printf("ERROR: hrtimer early expiration failure observed.\n");
+ error_found = 1;
+ }
}
int main(int argc, char **argv)
{
+ timer_t tm1;
+ struct itimerspec its1;
+ struct sigevent se;
+ struct sigaction act;
+ int signum = SIGRTMAX;
int settime = 0;
int tai_time = 0;
int insert = 1;
@@ -191,6 +231,12 @@ int main(int argc, char **argv)
signal(SIGINT, handler);
signal(SIGKILL, handler);
+ /* Set up timer signal handler: */
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = sigalarm;
+ sigaction(signum, &act, NULL);
+
if (iterations < 0)
printf("This runs continuously. Press ctrl-c to stop\n");
else
@@ -201,7 +247,7 @@ int main(int argc, char **argv)
int ret;
struct timespec ts;
struct timex tx;
- time_t now, next_leap;
+ time_t now;
/* Get the current time */
clock_gettime(CLOCK_REALTIME, &ts);
@@ -251,10 +297,27 @@ int main(int argc, char **argv)
printf("Scheduling leap second for %s", ctime(&next_leap));
+ /* Set up timer */
+ printf("Setting timer for %ld - %s", next_leap, ctime(&next_leap));
+ memset(&se, 0, sizeof(se));
+ se.sigev_notify = SIGEV_SIGNAL;
+ se.sigev_signo = signum;
+ se.sigev_value.sival_int = 0;
+ if (timer_create(CLOCK_REALTIME, &se, &tm1) == -1) {
+ printf("Error: timer_create failed\n");
+ return ksft_exit_fail();
+ }
+ its1.it_value.tv_sec = next_leap;
+ its1.it_value.tv_nsec = 0;
+ its1.it_interval.tv_sec = 0;
+ its1.it_interval.tv_nsec = 0;
+ timer_settime(tm1, TIMER_ABSTIME, &its1, NULL);
+
/* Wake up 3 seconds before leap */
ts.tv_sec = next_leap - 3;
ts.tv_nsec = 0;
+
while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL))
printf("Something woke us up, returning to sleep\n");
@@ -276,6 +339,7 @@ int main(int argc, char **argv)
while (now < next_leap + 2) {
char buf[26];
struct timespec tai;
+ int ret;
tx.modes = 0;
ret = adjtimex(&tx);
@@ -308,8 +372,13 @@ int main(int argc, char **argv)
/* Note if kernel has known hrtimer failure */
test_hrtimer_failure();
- printf("Leap complete\n\n");
-
+ printf("Leap complete\n");
+ if (error_found) {
+ printf("Errors observed\n");
+ clear_time_state();
+ return ksft_exit_fail();
+ }
+ printf("\n");
if ((iterations != -1) && !(--iterations))
break;
}
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index a5ce953..0d68547 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,12 +1,20 @@
# Makefile for vm selftests
CFLAGS = -Wall
-BINARIES = hugepage-mmap hugepage-shm map_hugetlb thuge-gen hugetlbfstest
+BINARIES = compaction_test
+BINARIES += hugepage-mmap
+BINARIES += hugepage-shm
+BINARIES += hugetlbfstest
+BINARIES += map_hugetlb
+BINARIES += thuge-gen
BINARIES += transhuge-stress
+BINARIES += userfaultfd
all: $(BINARIES)
%: %.c
$(CC) $(CFLAGS) -o $@ $^ -lrt
+userfaultfd: userfaultfd.c
+ $(CC) $(CFLAGS) -O2 -o $@ $^ -lpthread
TEST_PROGS := run_vmtests
TEST_FILES := $(BINARIES)
diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
new file mode 100644
index 0000000..932ff57
--- /dev/null
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -0,0 +1,225 @@
+/*
+ *
+ * A test for the patch "Allow compaction of unevictable pages".
+ * With this patch we should be able to allocate at least 1/4
+ * of RAM in huge pages. Without the patch much less is
+ * allocated.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+
+#define MAP_SIZE 1048576
+
+struct map_list {
+ void *map;
+ struct map_list *next;
+};
+
+int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize)
+{
+ char buffer[256] = {0};
+ char *cmd = "cat /proc/meminfo | grep -i memfree | grep -o '[0-9]*'";
+ FILE *cmdfile = popen(cmd, "r");
+
+ if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
+ perror("Failed to read meminfo\n");
+ return -1;
+ }
+
+ pclose(cmdfile);
+
+ *memfree = atoll(buffer);
+ cmd = "cat /proc/meminfo | grep -i hugepagesize | grep -o '[0-9]*'";
+ cmdfile = popen(cmd, "r");
+
+ if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
+ perror("Failed to read meminfo\n");
+ return -1;
+ }
+
+ pclose(cmdfile);
+ *hugepagesize = atoll(buffer);
+
+ return 0;
+}
+
+int prereq(void)
+{
+ char allowed;
+ int fd;
+
+ fd = open("/proc/sys/vm/compact_unevictable_allowed",
+ O_RDONLY | O_NONBLOCK);
+ if (fd < 0) {
+ perror("Failed to open\n"
+ "/proc/sys/vm/compact_unevictable_allowed\n");
+ return -1;
+ }
+
+ if (read(fd, &allowed, sizeof(char)) != sizeof(char)) {
+ perror("Failed to read from\n"
+ "/proc/sys/vm/compact_unevictable_allowed\n");
+ close(fd);
+ return -1;
+ }
+
+ close(fd);
+ if (allowed == '1')
+ return 0;
+
+ return -1;
+}
+
+int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
+{
+ int fd;
+ int compaction_index = 0;
+ char initial_nr_hugepages[10] = {0};
+ char nr_hugepages[10] = {0};
+
+ /* We want to test with 80% of available memory. Else, OOM killer comes
+ in to play */
+ mem_free = mem_free * 0.8;
+
+ fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK);
+ if (fd < 0) {
+ perror("Failed to open /proc/sys/vm/nr_hugepages");
+ return -1;
+ }
+
+ if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) {
+ perror("Failed to read from /proc/sys/vm/nr_hugepages");
+ goto close_fd;
+ }
+
+ /* Start with the initial condition of 0 huge pages*/
+ if (write(fd, "0", sizeof(char)) != sizeof(char)) {
+ perror("Failed to write to /proc/sys/vm/nr_hugepages\n");
+ goto close_fd;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+
+ /* Request a large number of huge pages. The Kernel will allocate
+ as much as it can */
+ if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
+ perror("Failed to write to /proc/sys/vm/nr_hugepages\n");
+ goto close_fd;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+
+ if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) {
+ perror("Failed to read from /proc/sys/vm/nr_hugepages\n");
+ goto close_fd;
+ }
+
+ /* We should have been able to request at least 1/3 rd of the memory in
+ huge pages */
+ compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size);
+
+ if (compaction_index > 3) {
+ printf("No of huge pages allocated = %d\n",
+ (atoi(nr_hugepages)));
+ fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n"
+ "as huge pages\n", compaction_index);
+ goto close_fd;
+ }
+
+ printf("No of huge pages allocated = %d\n",
+ (atoi(nr_hugepages)));
+
+ if (write(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages))
+ != strlen(initial_nr_hugepages)) {
+ perror("Failed to write to /proc/sys/vm/nr_hugepages\n");
+ goto close_fd;
+ }
+
+ close(fd);
+ return 0;
+
+ close_fd:
+ close(fd);
+ printf("Not OK. Compaction test failed.");
+ return -1;
+}
+
+
+int main(int argc, char **argv)
+{
+ struct rlimit lim;
+ struct map_list *list, *entry;
+ size_t page_size, i;
+ void *map = NULL;
+ unsigned long mem_free = 0;
+ unsigned long hugepage_size = 0;
+ unsigned long mem_fragmentable = 0;
+
+ if (prereq() != 0) {
+ printf("Either the sysctl compact_unevictable_allowed is not\n"
+ "set to 1 or couldn't read the proc file.\n"
+ "Skipping the test\n");
+ return 0;
+ }
+
+ lim.rlim_cur = RLIM_INFINITY;
+ lim.rlim_max = RLIM_INFINITY;
+ if (setrlimit(RLIMIT_MEMLOCK, &lim)) {
+ perror("Failed to set rlimit:\n");
+ return -1;
+ }
+
+ page_size = getpagesize();
+
+ list = NULL;
+
+ if (read_memory_info(&mem_free, &hugepage_size) != 0) {
+ printf("ERROR: Cannot read meminfo\n");
+ return -1;
+ }
+
+ mem_fragmentable = mem_free * 0.8 / 1024;
+
+ while (mem_fragmentable > 0) {
+ map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0);
+ if (map == MAP_FAILED)
+ break;
+
+ entry = malloc(sizeof(struct map_list));
+ if (!entry) {
+ munmap(map, MAP_SIZE);
+ break;
+ }
+ entry->map = map;
+ entry->next = list;
+ list = entry;
+
+ /* Write something (in this case the address of the map) to
+ * ensure that KSM can't merge the mapped pages
+ */
+ for (i = 0; i < MAP_SIZE; i += page_size)
+ *(unsigned long *)(map + i) = (unsigned long)map + i;
+
+ mem_fragmentable--;
+ }
+
+ for (entry = list; entry != NULL; entry = entry->next) {
+ munmap(entry->map, MAP_SIZE);
+ if (!entry->next)
+ break;
+ entry = entry->next;
+ }
+
+ if (check_compaction(mem_free, hugepage_size) == 0)
+ return 0;
+
+ return -1;
+}
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index c87b681..831adeb 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -86,8 +86,31 @@ else
echo "[PASS]"
fi
+echo "--------------------"
+echo "running userfaultfd"
+echo "--------------------"
+./userfaultfd 128 32
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
#cleanup
umount $mnt
rm -rf $mnt
echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+
+echo "-----------------------"
+echo "running compaction_test"
+echo "-----------------------"
+./compaction_test
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
exit $exitcode
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
new file mode 100644
index 0000000..76071b1
--- /dev/null
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -0,0 +1,636 @@
+/*
+ * Stress userfaultfd syscall.
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * This test allocates two virtual areas and bounces the physical
+ * memory across the two virtual areas (from area_src to area_dst)
+ * using userfaultfd.
+ *
+ * There are three threads running per CPU:
+ *
+ * 1) one per-CPU thread takes a per-page pthread_mutex in a random
+ * page of the area_dst (while the physical page may still be in
+ * area_src), and increments a per-page counter in the same page,
+ * and checks its value against a verification region.
+ *
+ * 2) another per-CPU thread handles the userfaults generated by
+ * thread 1 above. userfaultfd blocking reads or poll() modes are
+ * exercised interleaved.
+ *
+ * 3) one last per-CPU thread transfers the memory in the background
+ * at maximum bandwidth (if not already transferred by thread
+ * 2). Each cpu thread takes cares of transferring a portion of the
+ * area.
+ *
+ * When all threads of type 3 completed the transfer, one bounce is
+ * complete. area_src and area_dst are then swapped. All threads are
+ * respawned and so the bounce is immediately restarted in the
+ * opposite direction.
+ *
+ * per-CPU threads 1 by triggering userfaults inside
+ * pthread_mutex_lock will also verify the atomicity of the memory
+ * transfer (UFFDIO_COPY).
+ *
+ * The program takes two parameters: the amounts of physical memory in
+ * megabytes (MiB) of the area and the number of bounces to execute.
+ *
+ * # 100MiB 99999 bounces
+ * ./userfaultfd 100 99999
+ *
+ * # 1GiB 99 bounces
+ * ./userfaultfd 1000 99
+ *
+ * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers
+ * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <signal.h>
+#include <poll.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <pthread.h>
+#include "../../../../include/uapi/linux/userfaultfd.h"
+
+#ifdef __x86_64__
+#define __NR_userfaultfd 323
+#elif defined(__i386__)
+#define __NR_userfaultfd 374
+#elif defined(__powewrpc__)
+#define __NR_userfaultfd 364
+#else
+#error "missing __NR_userfaultfd definition"
+#endif
+
+static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
+
+#define BOUNCE_RANDOM (1<<0)
+#define BOUNCE_RACINGFAULTS (1<<1)
+#define BOUNCE_VERIFY (1<<2)
+#define BOUNCE_POLL (1<<3)
+static int bounces;
+
+static unsigned long long *count_verify;
+static int uffd, finished, *pipefd;
+static char *area_src, *area_dst;
+static char *zeropage;
+pthread_attr_t attr;
+
+/* pthread_mutex_t starts at page offset 0 */
+#define area_mutex(___area, ___nr) \
+ ((pthread_mutex_t *) ((___area) + (___nr)*page_size))
+/*
+ * count is placed in the page after pthread_mutex_t naturally aligned
+ * to avoid non alignment faults on non-x86 archs.
+ */
+#define area_count(___area, ___nr) \
+ ((volatile unsigned long long *) ((unsigned long) \
+ ((___area) + (___nr)*page_size + \
+ sizeof(pthread_mutex_t) + \
+ sizeof(unsigned long long) - 1) & \
+ ~(unsigned long)(sizeof(unsigned long long) \
+ - 1)))
+
+static int my_bcmp(char *str1, char *str2, size_t n)
+{
+ unsigned long i;
+ for (i = 0; i < n; i++)
+ if (str1[i] != str2[i])
+ return 1;
+ return 0;
+}
+
+static void *locking_thread(void *arg)
+{
+ unsigned long cpu = (unsigned long) arg;
+ struct random_data rand;
+ unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */
+ int32_t rand_nr;
+ unsigned long long count;
+ char randstate[64];
+ unsigned int seed;
+ time_t start;
+
+ if (bounces & BOUNCE_RANDOM) {
+ seed = (unsigned int) time(NULL) - bounces;
+ if (!(bounces & BOUNCE_RACINGFAULTS))
+ seed += cpu;
+ bzero(&rand, sizeof(rand));
+ bzero(&randstate, sizeof(randstate));
+ if (initstate_r(seed, randstate, sizeof(randstate), &rand))
+ fprintf(stderr, "srandom_r error\n"), exit(1);
+ } else {
+ page_nr = -bounces;
+ if (!(bounces & BOUNCE_RACINGFAULTS))
+ page_nr += cpu * nr_pages_per_cpu;
+ }
+
+ while (!finished) {
+ if (bounces & BOUNCE_RANDOM) {
+ if (random_r(&rand, &rand_nr))
+ fprintf(stderr, "random_r 1 error\n"), exit(1);
+ page_nr = rand_nr;
+ if (sizeof(page_nr) > sizeof(rand_nr)) {
+ if (random_r(&rand, &rand_nr))
+ fprintf(stderr, "random_r 2 error\n"), exit(1);
+ page_nr |= ((unsigned long) rand_nr) << 32;
+ }
+ } else
+ page_nr += 1;
+ page_nr %= nr_pages;
+
+ start = time(NULL);
+ if (bounces & BOUNCE_VERIFY) {
+ count = *area_count(area_dst, page_nr);
+ if (!count)
+ fprintf(stderr,
+ "page_nr %lu wrong count %Lu %Lu\n",
+ page_nr, count,
+ count_verify[page_nr]), exit(1);
+
+
+ /*
+ * We can't use bcmp (or memcmp) because that
+ * returns 0 erroneously if the memory is
+ * changing under it (even if the end of the
+ * page is never changing and always
+ * different).
+ */
+#if 1
+ if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
+ page_size))
+ fprintf(stderr,
+ "my_bcmp page_nr %lu wrong count %Lu %Lu\n",
+ page_nr, count,
+ count_verify[page_nr]), exit(1);
+#else
+ unsigned long loops;
+
+ loops = 0;
+ /* uncomment the below line to test with mutex */
+ /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */
+ while (!bcmp(area_dst + page_nr * page_size, zeropage,
+ page_size)) {
+ loops += 1;
+ if (loops > 10)
+ break;
+ }
+ /* uncomment below line to test with mutex */
+ /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */
+ if (loops) {
+ fprintf(stderr,
+ "page_nr %lu all zero thread %lu %p %lu\n",
+ page_nr, cpu, area_dst + page_nr * page_size,
+ loops);
+ if (loops > 10)
+ exit(1);
+ }
+#endif
+ }
+
+ pthread_mutex_lock(area_mutex(area_dst, page_nr));
+ count = *area_count(area_dst, page_nr);
+ if (count != count_verify[page_nr]) {
+ fprintf(stderr,
+ "page_nr %lu memory corruption %Lu %Lu\n",
+ page_nr, count,
+ count_verify[page_nr]), exit(1);
+ }
+ count++;
+ *area_count(area_dst, page_nr) = count_verify[page_nr] = count;
+ pthread_mutex_unlock(area_mutex(area_dst, page_nr));
+
+ if (time(NULL) - start > 1)
+ fprintf(stderr,
+ "userfault too slow %ld "
+ "possible false positive with overcommit\n",
+ time(NULL) - start);
+ }
+
+ return NULL;
+}
+
+static int copy_page(unsigned long offset)
+{
+ struct uffdio_copy uffdio_copy;
+
+ if (offset >= nr_pages * page_size)
+ fprintf(stderr, "unexpected offset %lu\n",
+ offset), exit(1);
+ uffdio_copy.dst = (unsigned long) area_dst + offset;
+ uffdio_copy.src = (unsigned long) area_src + offset;
+ uffdio_copy.len = page_size;
+ uffdio_copy.mode = 0;
+ uffdio_copy.copy = 0;
+ if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy)) {
+ /* real retval in ufdio_copy.copy */
+ if (uffdio_copy.copy != -EEXIST)
+ fprintf(stderr, "UFFDIO_COPY error %Ld\n",
+ uffdio_copy.copy), exit(1);
+ } else if (uffdio_copy.copy != page_size) {
+ fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
+ uffdio_copy.copy), exit(1);
+ } else
+ return 1;
+ return 0;
+}
+
+static void *uffd_poll_thread(void *arg)
+{
+ unsigned long cpu = (unsigned long) arg;
+ struct pollfd pollfd[2];
+ struct uffd_msg msg;
+ int ret;
+ unsigned long offset;
+ char tmp_chr;
+ unsigned long userfaults = 0;
+
+ pollfd[0].fd = uffd;
+ pollfd[0].events = POLLIN;
+ pollfd[1].fd = pipefd[cpu*2];
+ pollfd[1].events = POLLIN;
+
+ for (;;) {
+ ret = poll(pollfd, 2, -1);
+ if (!ret)
+ fprintf(stderr, "poll error %d\n", ret), exit(1);
+ if (ret < 0)
+ perror("poll"), exit(1);
+ if (pollfd[1].revents & POLLIN) {
+ if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
+ fprintf(stderr, "read pipefd error\n"),
+ exit(1);
+ break;
+ }
+ if (!(pollfd[0].revents & POLLIN))
+ fprintf(stderr, "pollfd[0].revents %d\n",
+ pollfd[0].revents), exit(1);
+ ret = read(uffd, &msg, sizeof(msg));
+ if (ret < 0) {
+ if (errno == EAGAIN)
+ continue;
+ perror("nonblocking read error"), exit(1);
+ }
+ if (msg.event != UFFD_EVENT_PAGEFAULT)
+ fprintf(stderr, "unexpected msg event %u\n",
+ msg.event), exit(1);
+ if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+ fprintf(stderr, "unexpected write fault\n"), exit(1);
+ offset = (char *)msg.arg.pagefault.address - area_dst;
+ offset &= ~(page_size-1);
+ if (copy_page(offset))
+ userfaults++;
+ }
+ return (void *)userfaults;
+}
+
+pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void *uffd_read_thread(void *arg)
+{
+ unsigned long *this_cpu_userfaults;
+ struct uffd_msg msg;
+ unsigned long offset;
+ int ret;
+
+ this_cpu_userfaults = (unsigned long *) arg;
+ *this_cpu_userfaults = 0;
+
+ pthread_mutex_unlock(&uffd_read_mutex);
+ /* from here cancellation is ok */
+
+ for (;;) {
+ ret = read(uffd, &msg, sizeof(msg));
+ if (ret != sizeof(msg)) {
+ if (ret < 0)
+ perror("blocking read error"), exit(1);
+ else
+ fprintf(stderr, "short read\n"), exit(1);
+ }
+ if (msg.event != UFFD_EVENT_PAGEFAULT)
+ fprintf(stderr, "unexpected msg event %u\n",
+ msg.event), exit(1);
+ if (bounces & BOUNCE_VERIFY &&
+ msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+ fprintf(stderr, "unexpected write fault\n"), exit(1);
+ offset = (char *)msg.arg.pagefault.address - area_dst;
+ offset &= ~(page_size-1);
+ if (copy_page(offset))
+ (*this_cpu_userfaults)++;
+ }
+ return (void *)NULL;
+}
+
+static void *background_thread(void *arg)
+{
+ unsigned long cpu = (unsigned long) arg;
+ unsigned long page_nr;
+
+ for (page_nr = cpu * nr_pages_per_cpu;
+ page_nr < (cpu+1) * nr_pages_per_cpu;
+ page_nr++)
+ copy_page(page_nr * page_size);
+
+ return NULL;
+}
+
+static int stress(unsigned long *userfaults)
+{
+ unsigned long cpu;
+ pthread_t locking_threads[nr_cpus];
+ pthread_t uffd_threads[nr_cpus];
+ pthread_t background_threads[nr_cpus];
+ void **_userfaults = (void **) userfaults;
+
+ finished = 0;
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ if (pthread_create(&locking_threads[cpu], &attr,
+ locking_thread, (void *)cpu))
+ return 1;
+ if (bounces & BOUNCE_POLL) {
+ if (pthread_create(&uffd_threads[cpu], &attr,
+ uffd_poll_thread, (void *)cpu))
+ return 1;
+ } else {
+ if (pthread_create(&uffd_threads[cpu], &attr,
+ uffd_read_thread,
+ &_userfaults[cpu]))
+ return 1;
+ pthread_mutex_lock(&uffd_read_mutex);
+ }
+ if (pthread_create(&background_threads[cpu], &attr,
+ background_thread, (void *)cpu))
+ return 1;
+ }
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ if (pthread_join(background_threads[cpu], NULL))
+ return 1;
+
+ /*
+ * Be strict and immediately zap area_src, the whole area has
+ * been transferred already by the background treads. The
+ * area_src could then be faulted in in a racy way by still
+ * running uffdio_threads reading zeropages after we zapped
+ * area_src (but they're guaranteed to get -EEXIST from
+ * UFFDIO_COPY without writing zero pages into area_dst
+ * because the background threads already completed).
+ */
+ if (madvise(area_src, nr_pages * page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ return 1;
+ }
+
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ char c;
+ if (bounces & BOUNCE_POLL) {
+ if (write(pipefd[cpu*2+1], &c, 1) != 1) {
+ fprintf(stderr, "pipefd write error\n");
+ return 1;
+ }
+ if (pthread_join(uffd_threads[cpu], &_userfaults[cpu]))
+ return 1;
+ } else {
+ if (pthread_cancel(uffd_threads[cpu]))
+ return 1;
+ if (pthread_join(uffd_threads[cpu], NULL))
+ return 1;
+ }
+ }
+
+ finished = 1;
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ if (pthread_join(locking_threads[cpu], NULL))
+ return 1;
+
+ return 0;
+}
+
+static int userfaultfd_stress(void)
+{
+ void *area;
+ char *tmp_area;
+ unsigned long nr;
+ struct uffdio_register uffdio_register;
+ struct uffdio_api uffdio_api;
+ unsigned long cpu;
+ int uffd_flags;
+ unsigned long userfaults[nr_cpus];
+
+ if (posix_memalign(&area, page_size, nr_pages * page_size)) {
+ fprintf(stderr, "out of memory\n");
+ return 1;
+ }
+ area_src = area;
+ if (posix_memalign(&area, page_size, nr_pages * page_size)) {
+ fprintf(stderr, "out of memory\n");
+ return 1;
+ }
+ area_dst = area;
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd < 0) {
+ fprintf(stderr,
+ "userfaultfd syscall not available in this kernel\n");
+ return 1;
+ }
+ uffd_flags = fcntl(uffd, F_GETFD, NULL);
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
+ fprintf(stderr, "UFFDIO_API\n");
+ return 1;
+ }
+ if (uffdio_api.api != UFFD_API) {
+ fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api);
+ return 1;
+ }
+
+ count_verify = malloc(nr_pages * sizeof(unsigned long long));
+ if (!count_verify) {
+ perror("count_verify");
+ return 1;
+ }
+
+ for (nr = 0; nr < nr_pages; nr++) {
+ *area_mutex(area_src, nr) = (pthread_mutex_t)
+ PTHREAD_MUTEX_INITIALIZER;
+ count_verify[nr] = *area_count(area_src, nr) = 1;
+ }
+
+ pipefd = malloc(sizeof(int) * nr_cpus * 2);
+ if (!pipefd) {
+ perror("pipefd");
+ return 1;
+ }
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) {
+ perror("pipe");
+ return 1;
+ }
+ }
+
+ if (posix_memalign(&area, page_size, page_size)) {
+ fprintf(stderr, "out of memory\n");
+ return 1;
+ }
+ zeropage = area;
+ bzero(zeropage, page_size);
+
+ pthread_mutex_lock(&uffd_read_mutex);
+
+ pthread_attr_init(&attr);
+ pthread_attr_setstacksize(&attr, 16*1024*1024);
+
+ while (bounces--) {
+ unsigned long expected_ioctls;
+
+ printf("bounces: %d, mode:", bounces);
+ if (bounces & BOUNCE_RANDOM)
+ printf(" rnd");
+ if (bounces & BOUNCE_RACINGFAULTS)
+ printf(" racing");
+ if (bounces & BOUNCE_VERIFY)
+ printf(" ver");
+ if (bounces & BOUNCE_POLL)
+ printf(" poll");
+ printf(", ");
+ fflush(stdout);
+
+ if (bounces & BOUNCE_POLL)
+ fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+ else
+ fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK);
+
+ /* register */
+ uffdio_register.range.start = (unsigned long) area_dst;
+ uffdio_register.range.len = nr_pages * page_size;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ fprintf(stderr, "register failure\n");
+ return 1;
+ }
+ expected_ioctls = (1 << _UFFDIO_WAKE) |
+ (1 << _UFFDIO_COPY) |
+ (1 << _UFFDIO_ZEROPAGE);
+ if ((uffdio_register.ioctls & expected_ioctls) !=
+ expected_ioctls) {
+ fprintf(stderr,
+ "unexpected missing ioctl for anon memory\n");
+ return 1;
+ }
+
+ /*
+ * The madvise done previously isn't enough: some
+ * uffd_thread could have read userfaults (one of
+ * those already resolved by the background thread)
+ * and it may be in the process of calling
+ * UFFDIO_COPY. UFFDIO_COPY will read the zapped
+ * area_src and it would map a zero page in it (of
+ * course such a UFFDIO_COPY is perfectly safe as it'd
+ * return -EEXIST). The problem comes at the next
+ * bounce though: that racing UFFDIO_COPY would
+ * generate zeropages in the area_src, so invalidating
+ * the previous MADV_DONTNEED. Without this additional
+ * MADV_DONTNEED those zeropages leftovers in the
+ * area_src would lead to -EEXIST failure during the
+ * next bounce, effectively leaving a zeropage in the
+ * area_dst.
+ *
+ * Try to comment this out madvise to see the memory
+ * corruption being caught pretty quick.
+ *
+ * khugepaged is also inhibited to collapse THP after
+ * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
+ * required to MADV_DONTNEED here.
+ */
+ if (madvise(area_dst, nr_pages * page_size, MADV_DONTNEED)) {
+ perror("madvise 2");
+ return 1;
+ }
+
+ /* bounce pass */
+ if (stress(userfaults))
+ return 1;
+
+ /* unregister */
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) {
+ fprintf(stderr, "register failure\n");
+ return 1;
+ }
+
+ /* verification */
+ if (bounces & BOUNCE_VERIFY) {
+ for (nr = 0; nr < nr_pages; nr++) {
+ if (my_bcmp(area_dst,
+ area_dst + nr * page_size,
+ sizeof(pthread_mutex_t))) {
+ fprintf(stderr,
+ "error mutex 2 %lu\n",
+ nr);
+ bounces = 0;
+ }
+ if (*area_count(area_dst, nr) != count_verify[nr]) {
+ fprintf(stderr,
+ "error area_count %Lu %Lu %lu\n",
+ *area_count(area_src, nr),
+ count_verify[nr],
+ nr);
+ bounces = 0;
+ }
+ }
+ }
+
+ /* prepare next bounce */
+ tmp_area = area_src;
+ area_src = area_dst;
+ area_dst = tmp_area;
+
+ printf("userfaults:");
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ printf(" %lu", userfaults[cpu]);
+ printf("\n");
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ if (argc < 3)
+ fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ page_size = sysconf(_SC_PAGE_SIZE);
+ if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) >
+ page_size)
+ fprintf(stderr, "Impossible to run this test\n"), exit(2);
+ nr_pages_per_cpu = atol(argv[1]) * 1024*1024 / page_size /
+ nr_cpus;
+ if (!nr_pages_per_cpu) {
+ fprintf(stderr, "invalid MiB\n");
+ fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+ }
+ bounces = atoi(argv[2]);
+ if (bounces <= 0) {
+ fprintf(stderr, "invalid bounces\n");
+ fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+ }
+ nr_pages = nr_pages_per_cpu * nr_cpus;
+ printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
+ nr_pages, nr_pages_per_cpu);
+ return userfaultfd_stress();
+}
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index ddf6356..29089b2 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -1,48 +1,62 @@
-.PHONY: all all_32 all_64 check_build32 clean run_tests
+all:
-TARGETS_C_BOTHBITS := sigreturn single_step_syscall
+include ../lib.mk
-BINARIES_32 := $(TARGETS_C_BOTHBITS:%=%_32)
+.PHONY: all all_32 all_64 warn_32bit_failure clean
+
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt
+TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn
+
+TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
+BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64)
CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
-UNAME_P := $(shell uname -p)
+UNAME_M := $(shell uname -m)
+CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
-# Always build 32-bit tests
+ifeq ($(CAN_BUILD_I386),1)
all: all_32
+TEST_PROGS += $(BINARIES_32)
+endif
-# If we're on a 64-bit host, build 64-bit tests as well
-ifeq ($(shell uname -p),x86_64)
+ifeq ($(CAN_BUILD_X86_64),1)
all: all_64
+TEST_PROGS += $(BINARIES_64)
endif
-all_32: check_build32 $(BINARIES_32)
+all_32: $(BINARIES_32)
all_64: $(BINARIES_64)
clean:
$(RM) $(BINARIES_32) $(BINARIES_64)
-run_tests:
- ./run_x86_tests.sh
-
-$(TARGETS_C_BOTHBITS:%=%_32): %_32: %.c
+$(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c
$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
$(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c
$(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
-check_build32:
- @if ! $(CC) -m32 -o /dev/null trivial_32bit_program.c; then \
- echo "Warning: you seem to have a broken 32-bit build" 2>&1; \
- echo "environment. If you are using a Debian-like"; \
- echo " distribution, try:"; \
- echo ""; \
- echo " apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \
- echo ""; \
- echo "If you are using a Fedora-like distribution, try:"; \
- echo ""; \
- echo " yum install glibc-devel.*i686"; \
- exit 1; \
- fi
+# x86_64 users should be encouraged to install 32-bit libraries
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01)
+all: warn_32bit_failure
+
+warn_32bit_failure:
+ @echo "Warning: you seem to have a broken 32-bit build" 2>&1; \
+ echo "environment. This will reduce test coverage of 64-bit" 2>&1; \
+ echo "kernels. If you are using a Debian-like distribution," 2>&1; \
+ echo "try:"; 2>&1; \
+ echo ""; \
+ echo " apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \
+ echo ""; \
+ echo "If you are using a Fedora-like distribution, try:"; \
+ echo ""; \
+ echo " yum install glibc-devel.*i686"; \
+ exit 0;
+endif
+
+# Some tests have additional dependencies.
+sysret_ss_attrs_64: thunks.S
diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh
new file mode 100755
index 0000000..172d329
--- /dev/null
+++ b/tools/testing/selftests/x86/check_cc.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# check_cc.sh - Helper to test userspace compilation support
+# Copyright (c) 2015 Andrew Lutomirski
+# GPL v2
+
+CC="$1"
+TESTPROG="$2"
+shift 2
+
+if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
+ echo 1
+else
+ echo 0
+fi
+
+exit 0
diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
new file mode 100644
index 0000000..9a43a59
--- /dev/null
+++ b/tools/testing/selftests/x86/entry_from_vm86.c
@@ -0,0 +1,233 @@
+/*
+ * entry_from_vm86.c - tests kernel entries from vm86 mode
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This exercises a few paths that need to special-case vm86 mode.
+ *
+ * GPL v2.
+ */
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <sys/vm86.h>
+
+static unsigned long load_addr = 0x10000;
+static int nerrs = 0;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static sig_atomic_t got_signal;
+
+static void sighandler(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM ||
+ (ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) {
+ printf("[FAIL]\tSignal frame should not reflect vm86 mode\n");
+ nerrs++;
+ }
+
+ const char *signame;
+ if (sig == SIGSEGV)
+ signame = "SIGSEGV";
+ else if (sig == SIGILL)
+ signame = "SIGILL";
+ else
+ signame = "unexpected signal";
+
+ printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame,
+ (unsigned long)ctx->uc_mcontext.gregs[REG_EFL],
+ (unsigned short)ctx->uc_mcontext.gregs[REG_CS]);
+
+ got_signal = 1;
+}
+
+asm (
+ ".pushsection .rodata\n\t"
+ ".type vmcode_bound, @object\n\t"
+ "vmcode:\n\t"
+ "vmcode_bound:\n\t"
+ ".code16\n\t"
+ "bound %ax, (2048)\n\t"
+ "int3\n\t"
+ "vmcode_sysenter:\n\t"
+ "sysenter\n\t"
+ "vmcode_syscall:\n\t"
+ "syscall\n\t"
+ "vmcode_sti:\n\t"
+ "sti\n\t"
+ "vmcode_int3:\n\t"
+ "int3\n\t"
+ "vmcode_int80:\n\t"
+ "int $0x80\n\t"
+ ".size vmcode, . - vmcode\n\t"
+ "end_vmcode:\n\t"
+ ".code32\n\t"
+ ".popsection"
+ );
+
+extern unsigned char vmcode[], end_vmcode[];
+extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[],
+ vmcode_sti[], vmcode_int3[], vmcode_int80[];
+
+/* Returns false if the test was skipped. */
+static bool do_test(struct vm86plus_struct *v86, unsigned long eip,
+ unsigned int rettype, unsigned int retarg,
+ const char *text)
+{
+ long ret;
+
+ printf("[RUN]\t%s from vm86 mode\n", text);
+ v86->regs.eip = eip;
+ ret = vm86(VM86_ENTER, v86);
+
+ if (ret == -1 && errno == ENOSYS) {
+ printf("[SKIP]\tvm86 not supported\n");
+ return false;
+ }
+
+ if (VM86_TYPE(ret) == VM86_INTx) {
+ char trapname[32];
+ int trapno = VM86_ARG(ret);
+ if (trapno == 13)
+ strcpy(trapname, "GP");
+ else if (trapno == 5)
+ strcpy(trapname, "BR");
+ else if (trapno == 14)
+ strcpy(trapname, "PF");
+ else
+ sprintf(trapname, "%d", trapno);
+
+ printf("[INFO]\tExited vm86 mode due to #%s\n", trapname);
+ } else if (VM86_TYPE(ret) == VM86_UNKNOWN) {
+ printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n");
+ } else if (VM86_TYPE(ret) == VM86_TRAP) {
+ printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n",
+ VM86_ARG(ret));
+ } else if (VM86_TYPE(ret) == VM86_SIGNAL) {
+ printf("[INFO]\tExited vm86 mode due to a signal\n");
+ } else if (VM86_TYPE(ret) == VM86_STI) {
+ printf("[INFO]\tExited vm86 mode due to STI\n");
+ } else {
+ printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n",
+ VM86_TYPE(ret), VM86_ARG(ret));
+ }
+
+ if (rettype == -1 ||
+ (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) {
+ printf("[OK]\tReturned correctly\n");
+ } else {
+ printf("[FAIL]\tIncorrect return reason\n");
+ nerrs++;
+ }
+
+ return true;
+}
+
+int main(void)
+{
+ struct vm86plus_struct v86;
+ unsigned char *addr = mmap((void *)load_addr, 4096,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1,0);
+ if (addr != (unsigned char *)load_addr)
+ err(1, "mmap");
+
+ memcpy(addr, vmcode, end_vmcode - vmcode);
+ addr[2048] = 2;
+ addr[2050] = 3;
+
+ memset(&v86, 0, sizeof(v86));
+
+ v86.regs.cs = load_addr / 16;
+ v86.regs.ss = load_addr / 16;
+ v86.regs.ds = load_addr / 16;
+ v86.regs.es = load_addr / 16;
+
+ assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */
+
+ /* #BR -- should deliver SIG??? */
+ do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR");
+
+ /*
+ * SYSENTER -- should cause #GP or #UD depending on CPU.
+ * Expected return type -1 means that we shouldn't validate
+ * the vm86 return value. This will avoid problems on non-SEP
+ * CPUs.
+ */
+ sethandler(SIGILL, sighandler, 0);
+ do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER");
+ clearhandler(SIGILL);
+
+ /*
+ * SYSCALL would be a disaster in VM86 mode. Fortunately,
+ * there is no kernel that both enables SYSCALL and sets
+ * EFER.SCE, so it's #UD on all systems. But vm86 is
+ * buggy (or has a "feature"), so the SIGILL will actually
+ * be delivered.
+ */
+ sethandler(SIGILL, sighandler, 0);
+ do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL");
+ clearhandler(SIGILL);
+
+ /* STI with VIP set */
+ v86.regs.eflags |= X86_EFLAGS_VIP;
+ v86.regs.eflags &= ~X86_EFLAGS_IF;
+ do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set");
+
+ /* INT3 -- should cause #BP */
+ do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3");
+
+ /* INT80 -- should exit with "INTx 0x80" */
+ v86.regs.eax = (unsigned int)-1;
+ do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80");
+
+ /* Execute a null pointer */
+ v86.regs.cs = 0;
+ v86.regs.ss = 0;
+ sethandler(SIGSEGV, sighandler, 0);
+ got_signal = 0;
+ if (do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer") &&
+ !got_signal) {
+ printf("[FAIL]\tDid not receive SIGSEGV\n");
+ nerrs++;
+ }
+ clearhandler(SIGSEGV);
+
+ return (nerrs == 0 ? 0 : 1);
+}
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
new file mode 100644
index 0000000..31a3035
--- /dev/null
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -0,0 +1,576 @@
+/*
+ * ldt_gdt.c - Test cases for LDT and GDT access
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <asm/ldt.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <sched.h>
+#include <linux/futex.h>
+
+#define AR_ACCESSED (1<<8)
+
+#define AR_TYPE_RODATA (0 * (1<<9))
+#define AR_TYPE_RWDATA (1 * (1<<9))
+#define AR_TYPE_RODATA_EXPDOWN (2 * (1<<9))
+#define AR_TYPE_RWDATA_EXPDOWN (3 * (1<<9))
+#define AR_TYPE_XOCODE (4 * (1<<9))
+#define AR_TYPE_XRCODE (5 * (1<<9))
+#define AR_TYPE_XOCODE_CONF (6 * (1<<9))
+#define AR_TYPE_XRCODE_CONF (7 * (1<<9))
+
+#define AR_DPL3 (3 * (1<<13))
+
+#define AR_S (1 << 12)
+#define AR_P (1 << 15)
+#define AR_AVL (1 << 20)
+#define AR_L (1 << 21)
+#define AR_DB (1 << 22)
+#define AR_G (1 << 23)
+
+static int nerrs;
+
+static void check_invalid_segment(uint16_t index, int ldt)
+{
+ uint32_t has_limit = 0, has_ar = 0, limit, ar;
+ uint32_t selector = (index << 3) | (ldt << 2) | 3;
+
+ asm ("lsl %[selector], %[limit]\n\t"
+ "jnz 1f\n\t"
+ "movl $1, %[has_limit]\n\t"
+ "1:"
+ : [limit] "=r" (limit), [has_limit] "+rm" (has_limit)
+ : [selector] "r" (selector));
+ asm ("larl %[selector], %[ar]\n\t"
+ "jnz 1f\n\t"
+ "movl $1, %[has_ar]\n\t"
+ "1:"
+ : [ar] "=r" (ar), [has_ar] "+rm" (has_ar)
+ : [selector] "r" (selector));
+
+ if (has_limit || has_ar) {
+ printf("[FAIL]\t%s entry %hu is valid but should be invalid\n",
+ (ldt ? "LDT" : "GDT"), index);
+ nerrs++;
+ } else {
+ printf("[OK]\t%s entry %hu is invalid\n",
+ (ldt ? "LDT" : "GDT"), index);
+ }
+}
+
+static void check_valid_segment(uint16_t index, int ldt,
+ uint32_t expected_ar, uint32_t expected_limit,
+ bool verbose)
+{
+ uint32_t has_limit = 0, has_ar = 0, limit, ar;
+ uint32_t selector = (index << 3) | (ldt << 2) | 3;
+
+ asm ("lsl %[selector], %[limit]\n\t"
+ "jnz 1f\n\t"
+ "movl $1, %[has_limit]\n\t"
+ "1:"
+ : [limit] "=r" (limit), [has_limit] "+rm" (has_limit)
+ : [selector] "r" (selector));
+ asm ("larl %[selector], %[ar]\n\t"
+ "jnz 1f\n\t"
+ "movl $1, %[has_ar]\n\t"
+ "1:"
+ : [ar] "=r" (ar), [has_ar] "+rm" (has_ar)
+ : [selector] "r" (selector));
+
+ if (!has_limit || !has_ar) {
+ printf("[FAIL]\t%s entry %hu is invalid but should be valid\n",
+ (ldt ? "LDT" : "GDT"), index);
+ nerrs++;
+ return;
+ }
+
+ if (ar != expected_ar) {
+ printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
+ (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
+ nerrs++;
+ } else if (limit != expected_limit) {
+ printf("[FAIL]\t%s entry %hu has limit 0x%08X but expected 0x%08X\n",
+ (ldt ? "LDT" : "GDT"), index, limit, expected_limit);
+ nerrs++;
+ } else if (verbose) {
+ printf("[OK]\t%s entry %hu has AR 0x%08X and limit 0x%08X\n",
+ (ldt ? "LDT" : "GDT"), index, ar, limit);
+ }
+}
+
+static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
+ bool oldmode)
+{
+ int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
+ desc, sizeof(*desc));
+ if (ret < -1)
+ errno = -ret;
+ if (ret == 0) {
+ uint32_t limit = desc->limit;
+ if (desc->limit_in_pages)
+ limit = (limit << 12) + 4095;
+ check_valid_segment(desc->entry_number, 1, ar, limit, true);
+ return true;
+ } else if (errno == ENOSYS) {
+ printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+ return false;
+ } else {
+ if (desc->seg_32bit) {
+ printf("[FAIL]\tUnexpected modify_ldt failure %d\n",
+ errno);
+ nerrs++;
+ return false;
+ } else {
+ printf("[OK]\tmodify_ldt rejected 16 bit segment\n");
+ return false;
+ }
+ }
+}
+
+static bool install_valid(const struct user_desc *desc, uint32_t ar)
+{
+ return install_valid_mode(desc, ar, false);
+}
+
+static void install_invalid(const struct user_desc *desc, bool oldmode)
+{
+ int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
+ desc, sizeof(*desc));
+ if (ret < -1)
+ errno = -ret;
+ if (ret == 0) {
+ check_invalid_segment(desc->entry_number, 1);
+ } else if (errno == ENOSYS) {
+ printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+ } else {
+ if (desc->seg_32bit) {
+ printf("[FAIL]\tUnexpected modify_ldt failure %d\n",
+ errno);
+ nerrs++;
+ } else {
+ printf("[OK]\tmodify_ldt rejected 16 bit segment\n");
+ }
+ }
+}
+
+static int safe_modify_ldt(int func, struct user_desc *ptr,
+ unsigned long bytecount)
+{
+ int ret = syscall(SYS_modify_ldt, 0x11, ptr, bytecount);
+ if (ret < -1)
+ errno = -ret;
+ return ret;
+}
+
+static void fail_install(struct user_desc *desc)
+{
+ if (safe_modify_ldt(0x11, desc, sizeof(*desc)) == 0) {
+ printf("[FAIL]\tmodify_ldt accepted a bad descriptor\n");
+ nerrs++;
+ } else if (errno == ENOSYS) {
+ printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+ } else {
+ printf("[OK]\tmodify_ldt failure %d\n", errno);
+ }
+}
+
+static void do_simple_tests(void)
+{
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = 0,
+ .limit = 10,
+ .seg_32bit = 1,
+ .contents = 2, /* Code, not conforming */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB);
+
+ desc.limit_in_pages = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_P | AR_DB | AR_G);
+
+ check_invalid_segment(1, 1);
+
+ desc.entry_number = 2;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_P | AR_DB | AR_G);
+
+ check_invalid_segment(1, 1);
+
+ desc.base_addr = 0xf0000000;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_P | AR_DB | AR_G);
+
+ desc.useable = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_P | AR_DB | AR_G | AR_AVL);
+
+ desc.seg_not_present = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_DB | AR_G | AR_AVL);
+
+ desc.seg_32bit = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_G | AR_AVL);
+
+ desc.seg_32bit = 1;
+ desc.contents = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA |
+ AR_S | AR_DB | AR_G | AR_AVL);
+
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA |
+ AR_S | AR_DB | AR_G | AR_AVL);
+
+ desc.contents = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN |
+ AR_S | AR_DB | AR_G | AR_AVL);
+
+ desc.read_exec_only = 0;
+ desc.limit_in_pages = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN |
+ AR_S | AR_DB | AR_AVL);
+
+ desc.contents = 3;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE_CONF |
+ AR_S | AR_DB | AR_AVL);
+
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE_CONF |
+ AR_S | AR_DB | AR_AVL);
+
+ desc.read_exec_only = 0;
+ desc.contents = 2;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_DB | AR_AVL);
+
+ desc.read_exec_only = 1;
+
+#ifdef __x86_64__
+ desc.lm = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE |
+ AR_S | AR_DB | AR_AVL);
+ desc.lm = 0;
+#endif
+
+ bool entry1_okay = install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE |
+ AR_S | AR_DB | AR_AVL);
+
+ if (entry1_okay) {
+ printf("[RUN]\tTest fork\n");
+ pid_t child = fork();
+ if (child == 0) {
+ nerrs = 0;
+ check_valid_segment(desc.entry_number, 1,
+ AR_DPL3 | AR_TYPE_XOCODE |
+ AR_S | AR_DB | AR_AVL, desc.limit,
+ true);
+ check_invalid_segment(1, 1);
+ exit(nerrs ? 1 : 0);
+ } else {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tChild died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild succeeded\n");
+ }
+ }
+
+ printf("[RUN]\tTest size\n");
+ int i;
+ for (i = 0; i < 8192; i++) {
+ desc.entry_number = i;
+ desc.limit = i;
+ if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) {
+ printf("[FAIL]\tFailed to install entry %d\n", i);
+ nerrs++;
+ break;
+ }
+ }
+ for (int j = 0; j < i; j++) {
+ check_valid_segment(j, 1, AR_DPL3 | AR_TYPE_XOCODE |
+ AR_S | AR_DB | AR_AVL, j, false);
+ }
+ printf("[DONE]\tSize test\n");
+ } else {
+ printf("[SKIP]\tSkipping fork and size tests because we have no LDT\n");
+ }
+
+ /* Test entry_number too high. */
+ desc.entry_number = 8192;
+ fail_install(&desc);
+
+ /* Test deletion and actions mistakeable for deletion. */
+ memset(&desc, 0, sizeof(desc));
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P);
+
+ desc.seg_not_present = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S);
+
+ desc.seg_not_present = 0;
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P);
+
+ desc.read_exec_only = 0;
+ desc.seg_not_present = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S);
+
+ desc.read_exec_only = 1;
+ desc.limit = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S);
+
+ desc.limit = 0;
+ desc.base_addr = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S);
+
+ desc.base_addr = 0;
+ install_invalid(&desc, false);
+
+ desc.seg_not_present = 0;
+ desc.read_exec_only = 0;
+ desc.seg_32bit = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB);
+ install_invalid(&desc, true);
+}
+
+/*
+ * 0: thread is idle
+ * 1: thread armed
+ * 2: thread should clear LDT entry 0
+ * 3: thread should exit
+ */
+static volatile unsigned int ftx;
+
+static void *threadproc(void *ctx)
+{
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(1, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ err(1, "sched_setaffinity to CPU 1"); /* should never fail */
+
+ while (1) {
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
+ while (ftx != 2) {
+ if (ftx >= 3)
+ return NULL;
+ }
+
+ /* clear LDT entry 0 */
+ const struct user_desc desc = {};
+ if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) != 0)
+ err(1, "modify_ldt");
+
+ /* If ftx == 2, set it to zero. If ftx == 100, quit. */
+ unsigned int x = -2;
+ asm volatile ("lock xaddl %[x], %[ftx]" :
+ [x] "+r" (x), [ftx] "+m" (ftx));
+ if (x != 2)
+ return NULL;
+ }
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ siglongjmp(jmpbuf, 1);
+}
+
+static void do_multicpu_tests(void)
+{
+ cpu_set_t cpuset;
+ pthread_t thread;
+ int failures = 0, iters = 5, i;
+ unsigned short orig_ss;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(1, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+ printf("[SKIP]\tCannot set affinity to CPU 1\n");
+ return;
+ }
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+ printf("[SKIP]\tCannot set affinity to CPU 0\n");
+ return;
+ }
+
+ sethandler(SIGSEGV, sigsegv, 0);
+#ifdef __i386__
+ /* True 32-bit kernels send SIGILL instead of SIGSEGV on IRET faults. */
+ sethandler(SIGILL, sigsegv, 0);
+#endif
+
+ printf("[RUN]\tCross-CPU LDT invalidation\n");
+
+ if (pthread_create(&thread, 0, threadproc, 0) != 0)
+ err(1, "pthread_create");
+
+ asm volatile ("mov %%ss, %0" : "=rm" (orig_ss));
+
+ for (i = 0; i < 5; i++) {
+ if (sigsetjmp(jmpbuf, 1) != 0)
+ continue;
+
+ /* Make sure the thread is ready after the last test. */
+ while (ftx != 0)
+ ;
+
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = 0,
+ .limit = 0xfffff,
+ .seg_32bit = 1,
+ .contents = 0, /* Data */
+ .read_exec_only = 0,
+ .limit_in_pages = 1,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+
+ if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) {
+ if (errno != ENOSYS)
+ err(1, "modify_ldt");
+ printf("[SKIP]\tmodify_ldt unavailable\n");
+ break;
+ }
+
+ /* Arm the thread. */
+ ftx = 1;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+ asm volatile ("mov %0, %%ss" : : "r" (0x7));
+
+ /* Go! */
+ ftx = 2;
+
+ while (ftx != 0)
+ ;
+
+ /*
+ * On success, modify_ldt will segfault us synchronously,
+ * and we'll escape via siglongjmp.
+ */
+
+ failures++;
+ asm volatile ("mov %0, %%ss" : : "rm" (orig_ss));
+ };
+
+ ftx = 100; /* Kill the thread. */
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+ if (pthread_join(thread, NULL) != 0)
+ err(1, "pthread_join");
+
+ if (failures) {
+ printf("[FAIL]\t%d of %d iterations failed\n", failures, iters);
+ nerrs++;
+ } else {
+ printf("[OK]\tAll %d iterations succeeded\n", iters);
+ }
+}
+
+static int finish_exec_test(void)
+{
+ /*
+ * In a sensible world, this would be check_invalid_segment(0, 1);
+ * For better or for worse, though, the LDT is inherited across exec.
+ * We can probably change this safely, but for now we test it.
+ */
+ check_valid_segment(0, 1,
+ AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
+ 42, true);
+
+ return nerrs ? 1 : 0;
+}
+
+static void do_exec_test(void)
+{
+ printf("[RUN]\tTest exec\n");
+
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = 0,
+ .limit = 42,
+ .seg_32bit = 1,
+ .contents = 2, /* Code, not conforming */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB);
+
+ pid_t child = fork();
+ if (child == 0) {
+ execl("/proc/self/exe", "ldt_gdt_test_exec", NULL);
+ printf("[FAIL]\tCould not exec self\n");
+ exit(1); /* exec failed */
+ } else {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tChild died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild succeeded\n");
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec"))
+ return finish_exec_test();
+
+ do_simple_tests();
+
+ do_multicpu_tests();
+
+ do_exec_test();
+
+ return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/run_x86_tests.sh b/tools/testing/selftests/x86/run_x86_tests.sh
deleted file mode 100644
index 3fc19b3..0000000
--- a/tools/testing/selftests/x86/run_x86_tests.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-# This is deliberately minimal. IMO kselftests should provide a standard
-# script here.
-./sigreturn_32 || exit 1
-./single_step_syscall_32 || exit 1
-
-if [[ "$uname -p" -eq "x86_64" ]]; then
- ./sigreturn_64 || exit 1
- ./single_step_syscall_64 || exit 1
-fi
-
-exit 0
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
new file mode 100644
index 0000000..7db4fc9
--- /dev/null
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -0,0 +1,130 @@
+/*
+ * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <err.h>
+#include <setjmp.h>
+#include <errno.h>
+
+/* Our sigaltstack scratch space. */
+static unsigned char altstack_data[SIGSTKSZ];
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static volatile sig_atomic_t sig_traps;
+static sigjmp_buf jmpbuf;
+
+static volatile sig_atomic_t n_errs;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) {
+ printf("[FAIL]\tAX had the wrong value: 0x%x\n",
+ ctx->uc_mcontext.gregs[REG_EAX]);
+ n_errs++;
+ } else {
+ printf("[OK]\tSeems okay\n");
+ }
+
+ siglongjmp(jmpbuf, 1);
+}
+
+static void sigill(int sig, siginfo_t *info, void *ctx_void)
+{
+ printf("[SKIP]\tIllegal instruction\n");
+ siglongjmp(jmpbuf, 1);
+}
+
+int main()
+{
+ stack_t stack = {
+ .ss_sp = altstack_data,
+ .ss_size = SIGSTKSZ,
+ };
+ if (sigaltstack(&stack, NULL) != 0)
+ err(1, "sigaltstack");
+
+ sethandler(SIGSEGV, sigsegv, SA_ONSTACK);
+ sethandler(SIGILL, sigill, SA_ONSTACK);
+
+ /*
+ * Exercise another nasty special case. The 32-bit SYSCALL
+ * and SYSENTER instructions (even in compat mode) each
+ * clobber one register. A Linux system call has a syscall
+ * number and six arguments, and the user stack pointer
+ * needs to live in some register on return. That means
+ * that we need eight registers, but SYSCALL and SYSENTER
+ * only preserve seven registers. As a result, one argument
+ * ends up on the stack. The stack is user memory, which
+ * means that the kernel can fail to read it.
+ *
+ * The 32-bit fast system calls don't have a defined ABI:
+ * we're supposed to invoke them through the vDSO. So we'll
+ * fudge it: we set all regs to invalid pointer values and
+ * invoke the entry instruction. The return will fail no
+ * matter what, and we completely lose our program state,
+ * but we can fix it up with a signal handler.
+ */
+
+ printf("[RUN]\tSYSENTER with invalid state\n");
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ asm volatile (
+ "movl $-1, %%eax\n\t"
+ "movl $-1, %%ebx\n\t"
+ "movl $-1, %%ecx\n\t"
+ "movl $-1, %%edx\n\t"
+ "movl $-1, %%esi\n\t"
+ "movl $-1, %%edi\n\t"
+ "movl $-1, %%ebp\n\t"
+ "movl $-1, %%esp\n\t"
+ "sysenter"
+ : : : "memory", "flags");
+ }
+
+ printf("[RUN]\tSYSCALL with invalid state\n");
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ asm volatile (
+ "movl $-1, %%eax\n\t"
+ "movl $-1, %%ebx\n\t"
+ "movl $-1, %%ecx\n\t"
+ "movl $-1, %%edx\n\t"
+ "movl $-1, %%esi\n\t"
+ "movl $-1, %%edi\n\t"
+ "movl $-1, %%ebp\n\t"
+ "movl $-1, %%esp\n\t"
+ "syscall\n\t"
+ "pushl $0" /* make sure we segfault cleanly */
+ : : : "memory", "flags");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c
new file mode 100644
index 0000000..60c06af4
--- /dev/null
+++ b/tools/testing/selftests/x86/syscall_nt.c
@@ -0,0 +1,54 @@
+/*
+ * syscall_nt.c - checks syscalls with NT set
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Some obscure user-space code requires the ability to make system calls
+ * with FLAGS.NT set. Make sure it works.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <asm/processor-flags.h>
+
+#ifdef __x86_64__
+# define WIDTH "q"
+#else
+# define WIDTH "l"
+#endif
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+ : : "rm" (eflags) : "flags");
+}
+
+int main()
+{
+ printf("[RUN]\tSet NT and issue a syscall\n");
+ set_eflags(get_eflags() | X86_EFLAGS_NT);
+ syscall(SYS_getpid);
+ if (get_eflags() & X86_EFLAGS_NT) {
+ printf("[OK]\tThe syscall worked and NT is still set\n");
+ return 0;
+ } else {
+ printf("[FAIL]\tThe syscall worked but NT was cleared\n");
+ return 1;
+ }
+}
diff --git a/tools/testing/selftests/x86/sysret_ss_attrs.c b/tools/testing/selftests/x86/sysret_ss_attrs.c
new file mode 100644
index 0000000..ce42d5a
--- /dev/null
+++ b/tools/testing/selftests/x86/sysret_ss_attrs.c
@@ -0,0 +1,112 @@
+/*
+ * sysret_ss_attrs.c - test that syscalls return valid hidden SS attributes
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * On AMD CPUs, SYSRET can return with a valid SS descriptor with with
+ * the hidden attributes set to an unusable state. Make sure the kernel
+ * doesn't let this happen.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <pthread.h>
+
+static void *threadproc(void *ctx)
+{
+ /*
+ * Do our best to cause sleeps on this CPU to exit the kernel and
+ * re-enter with SS = 0.
+ */
+ while (true)
+ ;
+
+ return NULL;
+}
+
+#ifdef __x86_64__
+extern unsigned long call32_from_64(void *stack, void (*function)(void));
+
+asm (".pushsection .text\n\t"
+ ".code32\n\t"
+ "test_ss:\n\t"
+ "pushl $0\n\t"
+ "popl %eax\n\t"
+ "ret\n\t"
+ ".code64");
+extern void test_ss(void);
+#endif
+
+int main()
+{
+ /*
+ * Start a busy-looping thread on the same CPU we're on.
+ * For simplicity, just stick everything to CPU 0. This will
+ * fail in some containers, but that's probably okay.
+ */
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ printf("[WARN]\tsched_setaffinity failed\n");
+
+ pthread_t thread;
+ if (pthread_create(&thread, 0, threadproc, 0) != 0)
+ err(1, "pthread_create");
+
+#ifdef __x86_64__
+ unsigned char *stack32 = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
+ MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE,
+ -1, 0);
+ if (stack32 == MAP_FAILED)
+ err(1, "mmap");
+#endif
+
+ printf("[RUN]\tSyscalls followed by SS validation\n");
+
+ for (int i = 0; i < 1000; i++) {
+ /*
+ * Go to sleep and return using sysret (if we're 64-bit
+ * or we're 32-bit on AMD on a 64-bit kernel). On AMD CPUs,
+ * SYSRET doesn't fix up the cached SS descriptor, so the
+ * kernel needs some kind of workaround to make sure that we
+ * end the system call with a valid stack segment. This
+ * can be a confusing failure because the SS *selector*
+ * is the same regardless.
+ */
+ usleep(2);
+
+#ifdef __x86_64__
+ /*
+ * On 32-bit, just doing a syscall through glibc is enough
+ * to cause a crash if our cached SS descriptor is invalid.
+ * On 64-bit, it's not, so try extra hard.
+ */
+ call32_from_64(stack32 + 4088, test_ss);
+#endif
+ }
+
+ printf("[OK]\tWe survived\n");
+
+#ifdef __x86_64__
+ munmap(stack32, 4096);
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/thunks.S b/tools/testing/selftests/x86/thunks.S
new file mode 100644
index 0000000..ce8a995
--- /dev/null
+++ b/tools/testing/selftests/x86/thunks.S
@@ -0,0 +1,67 @@
+/*
+ * thunks.S - assembly helpers for mixed-bitness code
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * These are little helpers that make it easier to switch bitness on
+ * the fly.
+ */
+
+ .text
+
+ .global call32_from_64
+ .type call32_from_64, @function
+call32_from_64:
+ // rdi: stack to use
+ // esi: function to call
+
+ // Save registers
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+
+ // Switch stacks
+ mov %rsp,(%rdi)
+ mov %rdi,%rsp
+
+ // Switch to compatibility mode
+ pushq $0x23 /* USER32_CS */
+ pushq $1f
+ lretq
+
+1:
+ .code32
+ // Call the function
+ call *%esi
+ // Switch back to long mode
+ jmp $0x33,$1f
+ .code64
+
+1:
+ // Restore the stack
+ mov (%rsp),%rsp
+
+ // Restore registers
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+
+ ret
+
+.size call32_from_64, .-call32_from_64
diff --git a/tools/testing/selftests/x86/trivial_32bit_program.c b/tools/testing/selftests/x86/trivial_32bit_program.c
index 2e231be..fabdf0f 100644
--- a/tools/testing/selftests/x86/trivial_32bit_program.c
+++ b/tools/testing/selftests/x86/trivial_32bit_program.c
@@ -4,6 +4,10 @@
* GPL v2
*/
+#ifndef __i386__
+# error wrong architecture
+#endif
+
#include <stdio.h>
int main()
diff --git a/tools/testing/selftests/x86/trivial_64bit_program.c b/tools/testing/selftests/x86/trivial_64bit_program.c
new file mode 100644
index 0000000..05c6a41
--- /dev/null
+++ b/tools/testing/selftests/x86/trivial_64bit_program.c
@@ -0,0 +1,18 @@
+/*
+ * Trivial program to check that we have a valid 64-bit build environment.
+ * Copyright (c) 2015 Andy Lutomirski
+ * GPL v2
+ */
+
+#ifndef __x86_64__
+# error wrong architecture
+#endif
+
+#include <stdio.h>
+
+int main()
+{
+ printf("\n");
+
+ return 0;
+}
diff --git a/tools/thermal/tmon/Makefile b/tools/thermal/tmon/Makefile
index 0788621..2e83dd3 100644
--- a/tools/thermal/tmon/Makefile
+++ b/tools/thermal/tmon/Makefile
@@ -12,10 +12,6 @@ TARGET=tmon
INSTALL_PROGRAM=install -m 755 -p
DEL_FILE=rm -f
-INSTALL_CONFIGFILE=install -m 644 -p
-CONFIG_FILE=
-CONFIG_PATH=
-
# Static builds might require -ltinfo, for instance
ifneq ($(findstring -static, $(LDFLAGS)),)
STATIC := --static
@@ -38,13 +34,9 @@ valgrind: tmon
install:
- mkdir -p $(INSTALL_ROOT)/$(BINDIR)
- $(INSTALL_PROGRAM) "$(TARGET)" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)"
- - mkdir -p $(INSTALL_ROOT)/$(CONFIG_PATH)
- - $(INSTALL_CONFIGFILE) "$(CONFIG_FILE)" "$(INSTALL_ROOT)/$(CONFIG_PATH)"
uninstall:
$(DEL_FILE) "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)"
- $(CONFIG_FILE) "$(CONFIG_PATH)"
-
clean:
find . -name "*.o" | xargs $(DEL_FILE)
diff --git a/tools/vm/Makefile b/tools/vm/Makefile
index ac884b6..93aadaf 100644
--- a/tools/vm/Makefile
+++ b/tools/vm/Makefile
@@ -3,7 +3,7 @@
TARGETS=page-types slabinfo page_owner_sort
LIB_DIR = ../lib/api
-LIBS = $(LIB_DIR)/libapikfs.a
+LIBS = $(LIB_DIR)/libapi.a
CC = $(CROSS_COMPILE)gcc
CFLAGS = -Wall -Wextra -I../lib/
OpenPOWER on IntegriCloud