summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2013-01-31 10:20:14 +0100
committerIngo Molnar <mingo@kernel.org>2013-01-31 10:20:14 +0100
commit152fefa921535665f95840c08062844ab2f5593e (patch)
treeb6ff202ebeca4341a1332258a04403f8ce95e75a
parenta2d28d0c198b65fac28ea6212f5f8edc77b29c27 (diff)
parent5809fde040de2afa477a6c593ce2e8fd2c11d9d3 (diff)
downloadop-kernel-dev-152fefa921535665f95840c08062844ab2f5593e.zip
op-kernel-dev-152fefa921535665f95840c08062844ab2f5593e.tar.gz
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: . Fix some leaks in exit paths. . Use memdup where applicable . Remove some die() calls, allowing callers to handle exit paths gracefully. . Correct typo in tools Makefile, fix from Borislav Petkov. . Add 'perf bench numa mem' NUMA performance measurement suite, from Ingo Molnar. . Handle dynamic array's element size properly, fix from Jiri Olsa. . Fix memory leaks on evsel->counts, from Namhyung Kim. . Make numa benchmark optional, allowing the build in machines where required numa libraries are not present, fix from Peter Hurley. . Add interval printing in 'perf stat', from Stephane Eranian. . Fix compile warnings in tests/attr.c, from Sukadev Bhattiprolu. . Fix double free, pclose instead of fclose, leaks and double fclose errors found with the cppcheck tool, from Thomas Jarosch. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/Makefile2
-rw-r--r--tools/lib/traceevent/event-parse.c39
-rw-r--r--tools/perf/Documentation/perf-stat.txt4
-rw-r--r--tools/perf/Makefile13
-rw-r--r--tools/perf/arch/common.c1
-rw-r--r--tools/perf/bench/bench.h1
-rw-r--r--tools/perf/bench/numa.c1731
-rw-r--r--tools/perf/builtin-bench.c17
-rw-r--r--tools/perf/builtin-kmem.c6
-rw-r--r--tools/perf/builtin-stat.c158
-rw-r--r--tools/perf/config/feature-tests.mak11
-rw-r--r--tools/perf/tests/attr.c5
-rw-r--r--tools/perf/tests/open-syscall-all-cpus.c1
-rw-r--r--tools/perf/tests/perf-record.c12
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c4
-rw-r--r--tools/perf/ui/browser.c2
-rw-r--r--tools/perf/util/event.c4
-rw-r--r--tools/perf/util/evsel.c31
-rw-r--r--tools/perf/util/evsel.h2
-rw-r--r--tools/perf/util/header.c25
-rw-r--r--tools/perf/util/map.c118
-rw-r--r--tools/perf/util/map.h24
-rw-r--r--tools/perf/util/sort.c7
-rw-r--r--tools/perf/util/strlist.c54
-rw-r--r--tools/perf/util/strlist.h42
25 files changed, 2154 insertions, 160 deletions
diff --git a/tools/Makefile b/tools/Makefile
index 1f9a529..798fa0e 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -15,7 +15,7 @@ help:
@echo ' x86_energy_perf_policy - Intel energy policy tool'
@echo ''
@echo 'You can do:'
- @echo ' $$ make -C tools/<tool>_install'
+ @echo ' $$ make -C tools/ <tool>_install'
@echo ''
@echo ' from the kernel command line to build and install one of'
@echo ' the tools above'
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index bb8b3db..82b0606 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -1223,6 +1223,34 @@ static int field_is_long(struct format_field *field)
return 0;
}
+static unsigned int type_size(const char *name)
+{
+ /* This covers all FIELD_IS_STRING types. */
+ static struct {
+ const char *type;
+ unsigned int size;
+ } table[] = {
+ { "u8", 1 },
+ { "u16", 2 },
+ { "u32", 4 },
+ { "u64", 8 },
+ { "s8", 1 },
+ { "s16", 2 },
+ { "s32", 4 },
+ { "s64", 8 },
+ { "char", 1 },
+ { },
+ };
+ int i;
+
+ for (i = 0; table[i].type; i++) {
+ if (!strcmp(table[i].type, name))
+ return table[i].size;
+ }
+
+ return 0;
+}
+
static int event_read_fields(struct event_format *event, struct format_field **fields)
{
struct format_field *field = NULL;
@@ -1232,6 +1260,8 @@ static int event_read_fields(struct event_format *event, struct format_field **f
int count = 0;
do {
+ unsigned int size_dynamic = 0;
+
type = read_token(&token);
if (type == EVENT_NEWLINE) {
free_token(token);
@@ -1390,6 +1420,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
field->type = new_type;
strcat(field->type, " ");
strcat(field->type, field->name);
+ size_dynamic = type_size(field->name);
free_token(field->name);
strcat(field->type, brackets);
field->name = token;
@@ -1478,10 +1509,14 @@ static int event_read_fields(struct event_format *event, struct format_field **f
if (field->flags & FIELD_IS_ARRAY) {
if (field->arraylen)
field->elementsize = field->size / field->arraylen;
+ else if (field->flags & FIELD_IS_DYNAMIC)
+ field->elementsize = size_dynamic;
else if (field->flags & FIELD_IS_STRING)
field->elementsize = 1;
- else
- field->elementsize = event->pevent->long_size;
+ else if (field->flags & FIELD_IS_LONG)
+ field->elementsize = event->pevent ?
+ event->pevent->long_size :
+ sizeof(long);
} else
field->elementsize = field->size;
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index cf0c310..5289da3 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -114,6 +114,10 @@ with it. --append may be used here. Examples:
perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage
+-I msecs::
+--interval-print msecs::
+ print count deltas every N milliseconds (minimum: 100ms)
+ example: perf stat -I 1000 -e cycles -a sleep 5
EXAMPLES
--------
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index a84021a..4b1044c 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -47,6 +47,8 @@ include config/utilities.mak
# backtrace post unwind.
#
# Define NO_BACKTRACE if you do not want stack backtrace debug feature
+#
+# Define NO_LIBNUMA if you do not want numa perf benchmark
$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
@$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
@@ -838,6 +840,17 @@ ifndef NO_BACKTRACE
endif
endif
+ifndef NO_LIBNUMA
+ FLAGS_LIBNUMA = $(ALL_CFLAGS) $(ALL_LDFLAGS) -lnuma
+ ifneq ($(call try-cc,$(SOURCE_LIBNUMA),$(FLAGS_LIBNUMA),libnuma),y)
+ msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numa-libs-devel or libnuma-dev);
+ else
+ BASIC_CFLAGS += -DLIBNUMA_SUPPORT
+ BUILTIN_OBJS += $(OUTPUT)bench/numa.o
+ EXTLIBS += -lnuma
+ endif
+endif
+
ifdef ASCIIDOC8
export ASCIIDOC8
endif
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index 3e975cb..aacef07 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -155,6 +155,7 @@ static int perf_session_env__lookup_binutils_path(struct perf_session_env *env,
if (lookup_path(buf))
goto out;
free(buf);
+ buf = NULL;
}
if (!strcmp(arch, "arm"))
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 8f89998..a5223e6 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -1,6 +1,7 @@
#ifndef BENCH_H
#define BENCH_H
+extern int bench_numa(int argc, const char **argv, const char *prefix);
extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
extern int bench_mem_memcpy(int argc, const char **argv,
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
new file mode 100644
index 0000000..30d1c322
--- /dev/null
+++ b/tools/perf/bench/numa.c
@@ -0,0 +1,1731 @@
+/*
+ * numa.c
+ *
+ * numa: Simulate NUMA-sensitive workload and measure their NUMA performance
+ */
+
+#include "../perf.h"
+#include "../builtin.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+
+#include "bench.h"
+
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <assert.h>
+#include <malloc.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+
+#include <numa.h>
+#include <numaif.h>
+
+/*
+ * Regular printout to the terminal, supressed if -q is specified:
+ */
+#define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0)
+
+/*
+ * Debug printf:
+ */
+#define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0)
+
+struct thread_data {
+ int curr_cpu;
+ cpu_set_t bind_cpumask;
+ int bind_node;
+ u8 *process_data;
+ int process_nr;
+ int thread_nr;
+ int task_nr;
+ unsigned int loops_done;
+ u64 val;
+ u64 runtime_ns;
+ pthread_mutex_t *process_lock;
+};
+
+/* Parameters set by options: */
+
+struct params {
+ /* Startup synchronization: */
+ bool serialize_startup;
+
+ /* Task hierarchy: */
+ int nr_proc;
+ int nr_threads;
+
+ /* Working set sizes: */
+ const char *mb_global_str;
+ const char *mb_proc_str;
+ const char *mb_proc_locked_str;
+ const char *mb_thread_str;
+
+ double mb_global;
+ double mb_proc;
+ double mb_proc_locked;
+ double mb_thread;
+
+ /* Access patterns to the working set: */
+ bool data_reads;
+ bool data_writes;
+ bool data_backwards;
+ bool data_zero_memset;
+ bool data_rand_walk;
+ u32 nr_loops;
+ u32 nr_secs;
+ u32 sleep_usecs;
+
+ /* Working set initialization: */
+ bool init_zero;
+ bool init_random;
+ bool init_cpu0;
+
+ /* Misc options: */
+ int show_details;
+ int run_all;
+ int thp;
+
+ long bytes_global;
+ long bytes_process;
+ long bytes_process_locked;
+ long bytes_thread;
+
+ int nr_tasks;
+ bool show_quiet;
+
+ bool show_convergence;
+ bool measure_convergence;
+
+ int perturb_secs;
+ int nr_cpus;
+ int nr_nodes;
+
+ /* Affinity options -C and -N: */
+ char *cpu_list_str;
+ char *node_list_str;
+};
+
+
+/* Global, read-writable area, accessible to all processes and threads: */
+
+struct global_info {
+ u8 *data;
+
+ pthread_mutex_t startup_mutex;
+ int nr_tasks_started;
+
+ pthread_mutex_t startup_done_mutex;
+
+ pthread_mutex_t start_work_mutex;
+ int nr_tasks_working;
+
+ pthread_mutex_t stop_work_mutex;
+ u64 bytes_done;
+
+ struct thread_data *threads;
+
+ /* Convergence latency measurement: */
+ bool all_converged;
+ bool stop_work;
+
+ int print_once;
+
+ struct params p;
+};
+
+static struct global_info *g = NULL;
+
+static int parse_cpus_opt(const struct option *opt, const char *arg, int unset);
+static int parse_nodes_opt(const struct option *opt, const char *arg, int unset);
+
+struct params p0;
+
+static const struct option options[] = {
+ OPT_INTEGER('p', "nr_proc" , &p0.nr_proc, "number of processes"),
+ OPT_INTEGER('t', "nr_threads" , &p0.nr_threads, "number of threads per process"),
+
+ OPT_STRING('G', "mb_global" , &p0.mb_global_str, "MB", "global memory (MBs)"),
+ OPT_STRING('P', "mb_proc" , &p0.mb_proc_str, "MB", "process memory (MBs)"),
+ OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"),
+ OPT_STRING('T', "mb_thread" , &p0.mb_thread_str, "MB", "thread memory (MBs)"),
+
+ OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run"),
+ OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run"),
+ OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"),
+
+ OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via writes (can be mixed with -W)"),
+ OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"),
+ OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"),
+ OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"),
+ OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk, "access the data with random (32bit LFSR) walk"),
+
+
+ OPT_BOOLEAN('z', "init_zero" , &p0.init_zero, "bzero the initial allocations"),
+ OPT_BOOLEAN('I', "init_random" , &p0.init_random, "randomize the contents of the initial allocations"),
+ OPT_BOOLEAN('0', "init_cpu0" , &p0.init_cpu0, "do the initial allocations on CPU#0"),
+ OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs, "perturb thread 0/0 every X secs, to test convergence stability"),
+
+ OPT_INCR ('d', "show_details" , &p0.show_details, "Show details"),
+ OPT_INCR ('a', "all" , &p0.run_all, "Run all tests in the suite"),
+ OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
+ OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"),
+ OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"),
+ OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "bzero the initial allocations"),
+ OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
+
+ /* Special option string parsing callbacks: */
+ OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]",
+ "bind the first N tasks to these specific cpus (the rest is unbound)",
+ parse_cpus_opt),
+ OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]",
+ "bind the first N tasks to these specific memory nodes (the rest is unbound)",
+ parse_nodes_opt),
+ OPT_END()
+};
+
+static const char * const bench_numa_usage[] = {
+ "perf bench numa <options>",
+ NULL
+};
+
+static const char * const numa_usage[] = {
+ "perf bench numa mem [<options>]",
+ NULL
+};
+
+static cpu_set_t bind_to_cpu(int target_cpu)
+{
+ cpu_set_t orig_mask, mask;
+ int ret;
+
+ ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
+ BUG_ON(ret);
+
+ CPU_ZERO(&mask);
+
+ if (target_cpu == -1) {
+ int cpu;
+
+ for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
+ CPU_SET(cpu, &mask);
+ } else {
+ BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
+ CPU_SET(target_cpu, &mask);
+ }
+
+ ret = sched_setaffinity(0, sizeof(mask), &mask);
+ BUG_ON(ret);
+
+ return orig_mask;
+}
+
+static cpu_set_t bind_to_node(int target_node)
+{
+ int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes;
+ cpu_set_t orig_mask, mask;
+ int cpu;
+ int ret;
+
+ BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus);
+ BUG_ON(!cpus_per_node);
+
+ ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
+ BUG_ON(ret);
+
+ CPU_ZERO(&mask);
+
+ if (target_node == -1) {
+ for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
+ CPU_SET(cpu, &mask);
+ } else {
+ int cpu_start = (target_node + 0) * cpus_per_node;
+ int cpu_stop = (target_node + 1) * cpus_per_node;
+
+ BUG_ON(cpu_stop > g->p.nr_cpus);
+
+ for (cpu = cpu_start; cpu < cpu_stop; cpu++)
+ CPU_SET(cpu, &mask);
+ }
+
+ ret = sched_setaffinity(0, sizeof(mask), &mask);
+ BUG_ON(ret);
+
+ return orig_mask;
+}
+
+static void bind_to_cpumask(cpu_set_t mask)
+{
+ int ret;
+
+ ret = sched_setaffinity(0, sizeof(mask), &mask);
+ BUG_ON(ret);
+}
+
+static void mempol_restore(void)
+{
+ int ret;
+
+ ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1);
+
+ BUG_ON(ret);
+}
+
+static void bind_to_memnode(int node)
+{
+ unsigned long nodemask;
+ int ret;
+
+ if (node == -1)
+ return;
+
+ BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask));
+ nodemask = 1L << node;
+
+ ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
+ dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
+
+ BUG_ON(ret);
+}
+
+#define HPSIZE (2*1024*1024)
+
+#define set_taskname(fmt...) \
+do { \
+ char name[20]; \
+ \
+ snprintf(name, 20, fmt); \
+ prctl(PR_SET_NAME, name); \
+} while (0)
+
+static u8 *alloc_data(ssize_t bytes0, int map_flags,
+ int init_zero, int init_cpu0, int thp, int init_random)
+{
+ cpu_set_t orig_mask;
+ ssize_t bytes;
+ u8 *buf;
+ int ret;
+
+ if (!bytes0)
+ return NULL;
+
+ /* Allocate and initialize all memory on CPU#0: */
+ if (init_cpu0) {
+ orig_mask = bind_to_node(0);
+ bind_to_memnode(0);
+ }
+
+ bytes = bytes0 + HPSIZE;
+
+ buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0);
+ BUG_ON(buf == (void *)-1);
+
+ if (map_flags == MAP_PRIVATE) {
+ if (thp > 0) {
+ ret = madvise(buf, bytes, MADV_HUGEPAGE);
+ if (ret && !g->print_once) {
+ g->print_once = 1;
+ printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n");
+ }
+ }
+ if (thp < 0) {
+ ret = madvise(buf, bytes, MADV_NOHUGEPAGE);
+ if (ret && !g->print_once) {
+ g->print_once = 1;
+ printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n");
+ }
+ }
+ }
+
+ if (init_zero) {
+ bzero(buf, bytes);
+ } else {
+ /* Initialize random contents, different in each word: */
+ if (init_random) {
+ u64 *wbuf = (void *)buf;
+ long off = rand();
+ long i;
+
+ for (i = 0; i < bytes/8; i++)
+ wbuf[i] = i + off;
+ }
+ }
+
+ /* Align to 2MB boundary: */
+ buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1));
+
+ /* Restore affinity: */
+ if (init_cpu0) {
+ bind_to_cpumask(orig_mask);
+ mempol_restore();
+ }
+
+ return buf;
+}
+
+static void free_data(void *data, ssize_t bytes)
+{
+ int ret;
+
+ if (!data)
+ return;
+
+ ret = munmap(data, bytes);
+ BUG_ON(ret);
+}
+
+/*
+ * Create a shared memory buffer that can be shared between processes, zeroed:
+ */
+static void * zalloc_shared_data(ssize_t bytes)
+{
+ return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0, g->p.thp, g->p.init_random);
+}
+
+/*
+ * Create a shared memory buffer that can be shared between processes:
+ */
+static void * setup_shared_data(ssize_t bytes)
+{
+ return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0, g->p.thp, g->p.init_random);
+}
+
+/*
+ * Allocate process-local memory - this will either be shared between
+ * threads of this process, or only be accessed by this thread:
+ */
+static void * setup_private_data(ssize_t bytes)
+{
+ return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0, g->p.thp, g->p.init_random);
+}
+
+/*
+ * Return a process-shared (global) mutex:
+ */
+static void init_global_mutex(pthread_mutex_t *mutex)
+{
+ pthread_mutexattr_t attr;
+
+ pthread_mutexattr_init(&attr);
+ pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
+ pthread_mutex_init(mutex, &attr);
+}
+
+static int parse_cpu_list(const char *arg)
+{
+ p0.cpu_list_str = strdup(arg);
+
+ dprintf("got CPU list: {%s}\n", p0.cpu_list_str);
+
+ return 0;
+}
+
+static void parse_setup_cpu_list(void)
+{
+ struct thread_data *td;
+ char *str0, *str;
+ int t;
+
+ if (!g->p.cpu_list_str)
+ return;
+
+ dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
+
+ str0 = str = strdup(g->p.cpu_list_str);
+ t = 0;
+
+ BUG_ON(!str);
+
+ tprintf("# binding tasks to CPUs:\n");
+ tprintf("# ");
+
+ while (true) {
+ int bind_cpu, bind_cpu_0, bind_cpu_1;
+ char *tok, *tok_end, *tok_step, *tok_len, *tok_mul;
+ int bind_len;
+ int step;
+ int mul;
+
+ tok = strsep(&str, ",");
+ if (!tok)
+ break;
+
+ tok_end = strstr(tok, "-");
+
+ dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
+ if (!tok_end) {
+ /* Single CPU specified: */
+ bind_cpu_0 = bind_cpu_1 = atol(tok);
+ } else {
+ /* CPU range specified (for example: "5-11"): */
+ bind_cpu_0 = atol(tok);
+ bind_cpu_1 = atol(tok_end + 1);
+ }
+
+ step = 1;
+ tok_step = strstr(tok, "#");
+ if (tok_step) {
+ step = atol(tok_step + 1);
+ BUG_ON(step <= 0 || step >= g->p.nr_cpus);
+ }
+
+ /*
+ * Mask length.
+ * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4',
+ * where the _4 means the next 4 CPUs are allowed.
+ */
+ bind_len = 1;
+ tok_len = strstr(tok, "_");
+ if (tok_len) {
+ bind_len = atol(tok_len + 1);
+ BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus);
+ }
+
+ /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
+ mul = 1;
+ tok_mul = strstr(tok, "x");
+ if (tok_mul) {
+ mul = atol(tok_mul + 1);
+ BUG_ON(mul <= 0);
+ }
+
+ dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul);
+
+ BUG_ON(bind_cpu_0 < 0 || bind_cpu_0 >= g->p.nr_cpus);
+ BUG_ON(bind_cpu_1 < 0 || bind_cpu_1 >= g->p.nr_cpus);
+ BUG_ON(bind_cpu_0 > bind_cpu_1);
+
+ for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
+ int i;
+
+ for (i = 0; i < mul; i++) {
+ int cpu;
+
+ if (t >= g->p.nr_tasks) {
+ printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu);
+ goto out;
+ }
+ td = g->threads + t;
+
+ if (t)
+ tprintf(",");
+ if (bind_len > 1) {
+ tprintf("%2d/%d", bind_cpu, bind_len);
+ } else {
+ tprintf("%2d", bind_cpu);
+ }
+
+ CPU_ZERO(&td->bind_cpumask);
+ for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
+ BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
+ CPU_SET(cpu, &td->bind_cpumask);
+ }
+ t++;
+ }
+ }
+ }
+out:
+
+ tprintf("\n");
+
+ if (t < g->p.nr_tasks)
+ printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
+
+ free(str0);
+}
+
+static int parse_cpus_opt(const struct option *opt __maybe_unused,
+ const char *arg, int unset __maybe_unused)
+{
+ if (!arg)
+ return -1;
+
+ return parse_cpu_list(arg);
+}
+
+static int parse_node_list(const char *arg)
+{
+ p0.node_list_str = strdup(arg);
+
+ dprintf("got NODE list: {%s}\n", p0.node_list_str);
+
+ return 0;
+}
+
+static void parse_setup_node_list(void)
+{
+ struct thread_data *td;
+ char *str0, *str;
+ int t;
+
+ if (!g->p.node_list_str)
+ return;
+
+ dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
+
+ str0 = str = strdup(g->p.node_list_str);
+ t = 0;
+
+ BUG_ON(!str);
+
+ tprintf("# binding tasks to NODEs:\n");
+ tprintf("# ");
+
+ while (true) {
+ int bind_node, bind_node_0, bind_node_1;
+ char *tok, *tok_end, *tok_step, *tok_mul;
+ int step;
+ int mul;
+
+ tok = strsep(&str, ",");
+ if (!tok)
+ break;
+
+ tok_end = strstr(tok, "-");
+
+ dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
+ if (!tok_end) {
+ /* Single NODE specified: */
+ bind_node_0 = bind_node_1 = atol(tok);
+ } else {
+ /* NODE range specified (for example: "5-11"): */
+ bind_node_0 = atol(tok);
+ bind_node_1 = atol(tok_end + 1);
+ }
+
+ step = 1;
+ tok_step = strstr(tok, "#");
+ if (tok_step) {
+ step = atol(tok_step + 1);
+ BUG_ON(step <= 0 || step >= g->p.nr_nodes);
+ }
+
+ /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
+ mul = 1;
+ tok_mul = strstr(tok, "x");
+ if (tok_mul) {
+ mul = atol(tok_mul + 1);
+ BUG_ON(mul <= 0);
+ }
+
+ dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step);
+
+ BUG_ON(bind_node_0 < 0 || bind_node_0 >= g->p.nr_nodes);
+ BUG_ON(bind_node_1 < 0 || bind_node_1 >= g->p.nr_nodes);
+ BUG_ON(bind_node_0 > bind_node_1);
+
+ for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) {
+ int i;
+
+ for (i = 0; i < mul; i++) {
+ if (t >= g->p.nr_tasks) {
+ printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node);
+ goto out;
+ }
+ td = g->threads + t;
+
+ if (!t)
+ tprintf(" %2d", bind_node);
+ else
+ tprintf(",%2d", bind_node);
+
+ td->bind_node = bind_node;
+ t++;
+ }
+ }
+ }
+out:
+
+ tprintf("\n");
+
+ if (t < g->p.nr_tasks)
+ printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
+
+ free(str0);
+}
+
+static int parse_nodes_opt(const struct option *opt __maybe_unused,
+ const char *arg, int unset __maybe_unused)
+{
+ if (!arg)
+ return -1;
+
+ return parse_node_list(arg);
+
+ return 0;
+}
+
+#define BIT(x) (1ul << x)
+
+static inline uint32_t lfsr_32(uint32_t lfsr)
+{
+ const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
+ return (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps);
+}
+
+/*
+ * Make sure there's real data dependency to RAM (when read
+ * accesses are enabled), so the compiler, the CPU and the
+ * kernel (KSM, zero page, etc.) cannot optimize away RAM
+ * accesses:
+ */
+static inline u64 access_data(u64 *data __attribute__((unused)), u64 val)
+{
+ if (g->p.data_reads)
+ val += *data;
+ if (g->p.data_writes)
+ *data = val + 1;
+ return val;
+}
+
+/*
+ * The worker process does two types of work, a forwards going
+ * loop and a backwards going loop.
+ *
+ * We do this so that on multiprocessor systems we do not create
+ * a 'train' of processing, with highly synchronized processes,
+ * skewing the whole benchmark.
+ */
+static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val)
+{
+ long words = bytes/sizeof(u64);
+ u64 *data = (void *)__data;
+ long chunk_0, chunk_1;
+ u64 *d0, *d, *d1;
+ long off;
+ long i;
+
+ BUG_ON(!data && words);
+ BUG_ON(data && !words);
+
+ if (!data)
+ return val;
+
+ /* Very simple memset() work variant: */
+ if (g->p.data_zero_memset && !g->p.data_rand_walk) {
+ bzero(data, bytes);
+ return val;
+ }
+
+ /* Spread out by PID/TID nr and by loop nr: */
+ chunk_0 = words/nr_max;
+ chunk_1 = words/g->p.nr_loops;
+ off = nr*chunk_0 + loop*chunk_1;
+
+ while (off >= words)
+ off -= words;
+
+ if (g->p.data_rand_walk) {
+ u32 lfsr = nr + loop + val;
+ int j;
+
+ for (i = 0; i < words/1024; i++) {
+ long start, end;
+
+ lfsr = lfsr_32(lfsr);
+
+ start = lfsr % words;
+ end = min(start + 1024, words-1);
+
+ if (g->p.data_zero_memset) {
+ bzero(data + start, (end-start) * sizeof(u64));
+ } else {
+ for (j = start; j < end; j++)
+ val = access_data(data + j, val);
+ }
+ }
+ } else if (!g->p.data_backwards || (nr + loop) & 1) {
+
+ d0 = data + off;
+ d = data + off + 1;
+ d1 = data + words;
+
+ /* Process data forwards: */
+ for (;;) {
+ if (unlikely(d >= d1))
+ d = data;
+ if (unlikely(d == d0))
+ break;
+
+ val = access_data(d, val);
+
+ d++;
+ }
+ } else {
+ /* Process data backwards: */
+
+ d0 = data + off;
+ d = data + off - 1;
+ d1 = data + words;
+
+ /* Process data forwards: */
+ for (;;) {
+ if (unlikely(d < data))
+ d = data + words-1;
+ if (unlikely(d == d0))
+ break;
+
+ val = access_data(d, val);
+
+ d--;
+ }
+ }
+
+ return val;
+}
+
+static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
+{
+ unsigned int cpu;
+
+ cpu = sched_getcpu();
+
+ g->threads[task_nr].curr_cpu = cpu;
+ prctl(0, bytes_worked);
+}
+
+#define MAX_NR_NODES 64
+
+/*
+ * Count the number of nodes a process's threads
+ * are spread out on.
+ *
+ * A count of 1 means that the process is compressed
+ * to a single node. A count of g->p.nr_nodes means it's
+ * spread out on the whole system.
+ */
+static int count_process_nodes(int process_nr)
+{
+ char node_present[MAX_NR_NODES] = { 0, };
+ int nodes;
+ int n, t;
+
+ for (t = 0; t < g->p.nr_threads; t++) {
+ struct thread_data *td;
+ int task_nr;
+ int node;
+
+ task_nr = process_nr*g->p.nr_threads + t;
+ td = g->threads + task_nr;
+
+ node = numa_node_of_cpu(td->curr_cpu);
+ node_present[node] = 1;
+ }
+
+ nodes = 0;
+
+ for (n = 0; n < MAX_NR_NODES; n++)
+ nodes += node_present[n];
+
+ return nodes;
+}
+
+/*
+ * Count the number of distinct process-threads a node contains.
+ *
+ * A count of 1 means that the node contains only a single
+ * process. If all nodes on the system contain at most one
+ * process then we are well-converged.
+ */
+static int count_node_processes(int node)
+{
+ int processes = 0;
+ int t, p;
+
+ for (p = 0; p < g->p.nr_proc; p++) {
+ for (t = 0; t < g->p.nr_threads; t++) {
+ struct thread_data *td;
+ int task_nr;
+ int n;
+
+ task_nr = p*g->p.nr_threads + t;
+ td = g->threads + task_nr;
+
+ n = numa_node_of_cpu(td->curr_cpu);
+ if (n == node) {
+ processes++;
+ break;
+ }
+ }
+ }
+
+ return processes;
+}
+
+static void calc_convergence_compression(int *strong)
+{
+ unsigned int nodes_min, nodes_max;
+ int p;
+
+ nodes_min = -1;
+ nodes_max = 0;
+
+ for (p = 0; p < g->p.nr_proc; p++) {
+ unsigned int nodes = count_process_nodes(p);
+
+ nodes_min = min(nodes, nodes_min);
+ nodes_max = max(nodes, nodes_max);
+ }
+
+ /* Strong convergence: all threads compress on a single node: */
+ if (nodes_min == 1 && nodes_max == 1) {
+ *strong = 1;
+ } else {
+ *strong = 0;
+ tprintf(" {%d-%d}", nodes_min, nodes_max);
+ }
+}
+
+static void calc_convergence(double runtime_ns_max, double *convergence)
+{
+ unsigned int loops_done_min, loops_done_max;
+ int process_groups;
+ int nodes[MAX_NR_NODES];
+ int distance;
+ int nr_min;
+ int nr_max;
+ int strong;
+ int sum;
+ int nr;
+ int node;
+ int cpu;
+ int t;
+
+ if (!g->p.show_convergence && !g->p.measure_convergence)
+ return;
+
+ for (node = 0; node < g->p.nr_nodes; node++)
+ nodes[node] = 0;
+
+ loops_done_min = -1;
+ loops_done_max = 0;
+
+ for (t = 0; t < g->p.nr_tasks; t++) {
+ struct thread_data *td = g->threads + t;
+ unsigned int loops_done;
+
+ cpu = td->curr_cpu;
+
+ /* Not all threads have written it yet: */
+ if (cpu < 0)
+ continue;
+
+ node = numa_node_of_cpu(cpu);
+
+ nodes[node]++;
+
+ loops_done = td->loops_done;
+ loops_done_min = min(loops_done, loops_done_min);
+ loops_done_max = max(loops_done, loops_done_max);
+ }
+
+ nr_max = 0;
+ nr_min = g->p.nr_tasks;
+ sum = 0;
+
+ for (node = 0; node < g->p.nr_nodes; node++) {
+ nr = nodes[node];
+ nr_min = min(nr, nr_min);
+ nr_max = max(nr, nr_max);
+ sum += nr;
+ }
+ BUG_ON(nr_min > nr_max);
+
+ BUG_ON(sum > g->p.nr_tasks);
+
+ if (0 && (sum < g->p.nr_tasks))
+ return;
+
+ /*
+ * Count the number of distinct process groups present
+ * on nodes - when we are converged this will decrease
+ * to g->p.nr_proc:
+ */
+ process_groups = 0;
+
+ for (node = 0; node < g->p.nr_nodes; node++) {
+ int processes = count_node_processes(node);
+
+ nr = nodes[node];
+ tprintf(" %2d/%-2d", nr, processes);
+
+ process_groups += processes;
+ }
+
+ distance = nr_max - nr_min;
+
+ tprintf(" [%2d/%-2d]", distance, process_groups);
+
+ tprintf(" l:%3d-%-3d (%3d)",
+ loops_done_min, loops_done_max, loops_done_max-loops_done_min);
+
+ if (loops_done_min && loops_done_max) {
+ double skew = 1.0 - (double)loops_done_min/loops_done_max;
+
+ tprintf(" [%4.1f%%]", skew * 100.0);
+ }
+
+ calc_convergence_compression(&strong);
+
+ if (strong && process_groups == g->p.nr_proc) {
+ if (!*convergence) {
+ *convergence = runtime_ns_max;
+ tprintf(" (%6.1fs converged)\n", *convergence/1e9);
+ if (g->p.measure_convergence) {
+ g->all_converged = true;
+ g->stop_work = true;
+ }
+ }
+ } else {
+ if (*convergence) {
+ tprintf(" (%6.1fs de-converged)", runtime_ns_max/1e9);
+ *convergence = 0;
+ }
+ tprintf("\n");
+ }
+}
+
+static void show_summary(double runtime_ns_max, int l, double *convergence)
+{
+ tprintf("\r # %5.1f%% [%.1f mins]",
+ (double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max/1e9 / 60.0);
+
+ calc_convergence(runtime_ns_max, convergence);
+
+ if (g->p.show_details >= 0)
+ fflush(stdout);
+}
+
+static void *worker_thread(void *__tdata)
+{
+ struct thread_data *td = __tdata;
+ struct timeval start0, start, stop, diff;
+ int process_nr = td->process_nr;
+ int thread_nr = td->thread_nr;
+ unsigned long last_perturbance;
+ int task_nr = td->task_nr;
+ int details = g->p.show_details;
+ int first_task, last_task;
+ double convergence = 0;
+ u64 val = td->val;
+ double runtime_ns_max;
+ u8 *global_data;
+ u8 *process_data;
+ u8 *thread_data;
+ u64 bytes_done;
+ long work_done;
+ u32 l;
+
+ bind_to_cpumask(td->bind_cpumask);
+ bind_to_memnode(td->bind_node);
+
+ set_taskname("thread %d/%d", process_nr, thread_nr);
+
+ global_data = g->data;
+ process_data = td->process_data;
+ thread_data = setup_private_data(g->p.bytes_thread);
+
+ bytes_done = 0;
+
+ last_task = 0;
+ if (process_nr == g->p.nr_proc-1 && thread_nr == g->p.nr_threads-1)
+ last_task = 1;
+
+ first_task = 0;
+ if (process_nr == 0 && thread_nr == 0)
+ first_task = 1;
+
+ if (details >= 2) {
+ printf("# thread %2d / %2d global mem: %p, process mem: %p, thread mem: %p\n",
+ process_nr, thread_nr, global_data, process_data, thread_data);
+ }
+
+ if (g->p.serialize_startup) {
+ pthread_mutex_lock(&g->startup_mutex);
+ g->nr_tasks_started++;
+ pthread_mutex_unlock(&g->startup_mutex);
+
+ /* Here we will wait for the main process to start us all at once: */
+ pthread_mutex_lock(&g->start_work_mutex);
+ g->nr_tasks_working++;
+
+ /* Last one wake the main process: */
+ if (g->nr_tasks_working == g->p.nr_tasks)
+ pthread_mutex_unlock(&g->startup_done_mutex);
+
+ pthread_mutex_unlock(&g->start_work_mutex);
+ }
+
+ gettimeofday(&start0, NULL);
+
+ start = stop = start0;
+ last_perturbance = start.tv_sec;
+
+ for (l = 0; l < g->p.nr_loops; l++) {
+ start = stop;
+
+ if (g->stop_work)
+ break;
+
+ val += do_work(global_data, g->p.bytes_global, process_nr, g->p.nr_proc, l, val);
+ val += do_work(process_data, g->p.bytes_process, thread_nr, g->p.nr_threads, l, val);
+ val += do_work(thread_data, g->p.bytes_thread, 0, 1, l, val);
+
+ if (g->p.sleep_usecs) {
+ pthread_mutex_lock(td->process_lock);
+ usleep(g->p.sleep_usecs);
+ pthread_mutex_unlock(td->process_lock);
+ }
+ /*
+ * Amount of work to be done under a process-global lock:
+ */
+ if (g->p.bytes_process_locked) {
+ pthread_mutex_lock(td->process_lock);
+ val += do_work(process_data, g->p.bytes_process_locked, thread_nr, g->p.nr_threads, l, val);
+ pthread_mutex_unlock(td->process_lock);
+ }
+
+ work_done = g->p.bytes_global + g->p.bytes_process +
+ g->p.bytes_process_locked + g->p.bytes_thread;
+
+ update_curr_cpu(task_nr, work_done);
+ bytes_done += work_done;
+
+ if (details < 0 && !g->p.perturb_secs && !g->p.measure_convergence && !g->p.nr_secs)
+ continue;
+
+ td->loops_done = l;
+
+ gettimeofday(&stop, NULL);
+
+ /* Check whether our max runtime timed out: */
+ if (g->p.nr_secs) {
+ timersub(&stop, &start0, &diff);
+ if (diff.tv_sec >= g->p.nr_secs) {
+ g->stop_work = true;
+ break;
+ }
+ }
+
+ /* Update the summary at most once per second: */
+ if (start.tv_sec == stop.tv_sec)
+ continue;
+
+ /*
+ * Perturb the first task's equilibrium every g->p.perturb_secs seconds,
+ * by migrating to CPU#0:
+ */
+ if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
+ cpu_set_t orig_mask;
+ int target_cpu;
+ int this_cpu;
+
+ last_perturbance = stop.tv_sec;
+
+ /*
+ * Depending on where we are running, move into
+ * the other half of the system, to create some
+ * real disturbance:
+ */
+ this_cpu = g->threads[task_nr].curr_cpu;
+ if (this_cpu < g->p.nr_cpus/2)
+ target_cpu = g->p.nr_cpus-1;
+ else
+ target_cpu = 0;
+
+ orig_mask = bind_to_cpu(target_cpu);
+
+ /* Here we are running on the target CPU already */
+ if (details >= 1)
+ printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
+
+ bind_to_cpumask(orig_mask);
+ }
+
+ if (details >= 3) {
+ timersub(&stop, &start, &diff);
+ runtime_ns_max = diff.tv_sec * 1000000000;
+ runtime_ns_max += diff.tv_usec * 1000;
+
+ if (details >= 0) {
+ printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016lx]\n",
+ process_nr, thread_nr, runtime_ns_max / bytes_done, val);
+ }
+ fflush(stdout);
+ }
+ if (!last_task)
+ continue;
+
+ timersub(&stop, &start0, &diff);
+ runtime_ns_max = diff.tv_sec * 1000000000ULL;
+ runtime_ns_max += diff.tv_usec * 1000ULL;
+
+ show_summary(runtime_ns_max, l, &convergence);
+ }
+
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start0, &diff);
+ td->runtime_ns = diff.tv_sec * 1000000000ULL;
+ td->runtime_ns += diff.tv_usec * 1000ULL;
+
+ free_data(thread_data, g->p.bytes_thread);
+
+ pthread_mutex_lock(&g->stop_work_mutex);
+ g->bytes_done += bytes_done;
+ pthread_mutex_unlock(&g->stop_work_mutex);
+
+ return NULL;
+}
+
+/*
+ * A worker process starts a couple of threads:
+ */
+static void worker_process(int process_nr)
+{
+ pthread_mutex_t process_lock;
+ struct thread_data *td;
+ pthread_t *pthreads;
+ u8 *process_data;
+ int task_nr;
+ int ret;
+ int t;
+
+ pthread_mutex_init(&process_lock, NULL);
+ set_taskname("process %d", process_nr);
+
+ /*
+ * Pick up the memory policy and the CPU binding of our first thread,
+ * so that we initialize memory accordingly:
+ */
+ task_nr = process_nr*g->p.nr_threads;
+ td = g->threads + task_nr;
+
+ bind_to_memnode(td->bind_node);
+ bind_to_cpumask(td->bind_cpumask);
+
+ pthreads = zalloc(g->p.nr_threads * sizeof(pthread_t));
+ process_data = setup_private_data(g->p.bytes_process);
+
+ if (g->p.show_details >= 3) {
+ printf(" # process %2d global mem: %p, process mem: %p\n",
+ process_nr, g->data, process_data);
+ }
+
+ for (t = 0; t < g->p.nr_threads; t++) {
+ task_nr = process_nr*g->p.nr_threads + t;
+ td = g->threads + task_nr;
+
+ td->process_data = process_data;
+ td->process_nr = process_nr;
+ td->thread_nr = t;
+ td->task_nr = task_nr;
+ td->val = rand();
+ td->curr_cpu = -1;
+ td->process_lock = &process_lock;
+
+ ret = pthread_create(pthreads + t, NULL, worker_thread, td);
+ BUG_ON(ret);
+ }
+
+ for (t = 0; t < g->p.nr_threads; t++) {
+ ret = pthread_join(pthreads[t], NULL);
+ BUG_ON(ret);
+ }
+
+ free_data(process_data, g->p.bytes_process);
+ free(pthreads);
+}
+
+static void print_summary(void)
+{
+ if (g->p.show_details < 0)
+ return;
+
+ printf("\n ###\n");
+ printf(" # %d %s will execute (on %d nodes, %d CPUs):\n",
+ g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", g->p.nr_nodes, g->p.nr_cpus);
+ printf(" # %5dx %5ldMB global shared mem operations\n",
+ g->p.nr_loops, g->p.bytes_global/1024/1024);
+ printf(" # %5dx %5ldMB process shared mem operations\n",
+ g->p.nr_loops, g->p.bytes_process/1024/1024);
+ printf(" # %5dx %5ldMB thread local mem operations\n",
+ g->p.nr_loops, g->p.bytes_thread/1024/1024);
+
+ printf(" ###\n");
+
+ printf("\n ###\n"); fflush(stdout);
+}
+
+static void init_thread_data(void)
+{
+ ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
+ int t;
+
+ g->threads = zalloc_shared_data(size);
+
+ for (t = 0; t < g->p.nr_tasks; t++) {
+ struct thread_data *td = g->threads + t;
+ int cpu;
+
+ /* Allow all nodes by default: */
+ td->bind_node = -1;
+
+ /* Allow all CPUs by default: */
+ CPU_ZERO(&td->bind_cpumask);
+ for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
+ CPU_SET(cpu, &td->bind_cpumask);
+ }
+}
+
+static void deinit_thread_data(void)
+{
+ ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
+
+ free_data(g->threads, size);
+}
+
+static int init(void)
+{
+ g = (void *)alloc_data(sizeof(*g), MAP_SHARED, 1, 0, 0 /* THP */, 0);
+
+ /* Copy over options: */
+ g->p = p0;
+
+ g->p.nr_cpus = numa_num_configured_cpus();
+
+ g->p.nr_nodes = numa_max_node() + 1;
+
+ /* char array in count_process_nodes(): */
+ BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0);
+
+ if (g->p.show_quiet && !g->p.show_details)
+ g->p.show_details = -1;
+
+ /* Some memory should be specified: */
+ if (!g->p.mb_global_str && !g->p.mb_proc_str && !g->p.mb_thread_str)
+ return -1;
+
+ if (g->p.mb_global_str) {
+ g->p.mb_global = atof(g->p.mb_global_str);
+ BUG_ON(g->p.mb_global < 0);
+ }
+
+ if (g->p.mb_proc_str) {
+ g->p.mb_proc = atof(g->p.mb_proc_str);
+ BUG_ON(g->p.mb_proc < 0);
+ }
+
+ if (g->p.mb_proc_locked_str) {
+ g->p.mb_proc_locked = atof(g->p.mb_proc_locked_str);
+ BUG_ON(g->p.mb_proc_locked < 0);
+ BUG_ON(g->p.mb_proc_locked > g->p.mb_proc);
+ }
+
+ if (g->p.mb_thread_str) {
+ g->p.mb_thread = atof(g->p.mb_thread_str);
+ BUG_ON(g->p.mb_thread < 0);
+ }
+
+ BUG_ON(g->p.nr_threads <= 0);
+ BUG_ON(g->p.nr_proc <= 0);
+
+ g->p.nr_tasks = g->p.nr_proc*g->p.nr_threads;
+
+ g->p.bytes_global = g->p.mb_global *1024L*1024L;
+ g->p.bytes_process = g->p.mb_proc *1024L*1024L;
+ g->p.bytes_process_locked = g->p.mb_proc_locked *1024L*1024L;
+ g->p.bytes_thread = g->p.mb_thread *1024L*1024L;
+
+ g->data = setup_shared_data(g->p.bytes_global);
+
+ /* Startup serialization: */
+ init_global_mutex(&g->start_work_mutex);
+ init_global_mutex(&g->startup_mutex);
+ init_global_mutex(&g->startup_done_mutex);
+ init_global_mutex(&g->stop_work_mutex);
+
+ init_thread_data();
+
+ tprintf("#\n");
+ parse_setup_cpu_list();
+ parse_setup_node_list();
+ tprintf("#\n");
+
+ print_summary();
+
+ return 0;
+}
+
+static void deinit(void)
+{
+ free_data(g->data, g->p.bytes_global);
+ g->data = NULL;
+
+ deinit_thread_data();
+
+ free_data(g, sizeof(*g));
+ g = NULL;
+}
+
+/*
+ * Print a short or long result, depending on the verbosity setting:
+ */
+static void print_res(const char *name, double val,
+ const char *txt_unit, const char *txt_short, const char *txt_long)
+{
+ if (!name)
+ name = "main,";
+
+ if (g->p.show_quiet)
+ printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
+ else
+ printf(" %14.3f %s\n", val, txt_long);
+}
+
+static int __bench_numa(const char *name)
+{
+ struct timeval start, stop, diff;
+ u64 runtime_ns_min, runtime_ns_sum;
+ pid_t *pids, pid, wpid;
+ double delta_runtime;
+ double runtime_avg;
+ double runtime_sec_max;
+ double runtime_sec_min;
+ int wait_stat;
+ double bytes;
+ int i, t;
+
+ if (init())
+ return -1;
+
+ pids = zalloc(g->p.nr_proc * sizeof(*pids));
+ pid = -1;
+
+ /* All threads try to acquire it, this way we can wait for them to start up: */
+ pthread_mutex_lock(&g->start_work_mutex);
+
+ if (g->p.serialize_startup) {
+ tprintf(" #\n");
+ tprintf(" # Startup synchronization: ..."); fflush(stdout);
+ }
+
+ gettimeofday(&start, NULL);
+
+ for (i = 0; i < g->p.nr_proc; i++) {
+ pid = fork();
+ dprintf(" # process %2d: PID %d\n", i, pid);
+
+ BUG_ON(pid < 0);
+ if (!pid) {
+ /* Child process: */
+ worker_process(i);
+
+ exit(0);
+ }
+ pids[i] = pid;
+
+ }
+ /* Wait for all the threads to start up: */
+ while (g->nr_tasks_started != g->p.nr_tasks)
+ usleep(1000);
+
+ BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
+
+ if (g->p.serialize_startup) {
+ double startup_sec;
+
+ pthread_mutex_lock(&g->startup_done_mutex);
+
+ /* This will start all threads: */
+ pthread_mutex_unlock(&g->start_work_mutex);
+
+ /* This mutex is locked - the last started thread will wake us: */
+ pthread_mutex_lock(&g->startup_done_mutex);
+
+ gettimeofday(&stop, NULL);
+
+ timersub(&stop, &start, &diff);
+
+ startup_sec = diff.tv_sec * 1000000000.0;
+ startup_sec += diff.tv_usec * 1000.0;
+ startup_sec /= 1e9;
+
+ tprintf(" threads initialized in %.6f seconds.\n", startup_sec);
+ tprintf(" #\n");
+
+ start = stop;
+ pthread_mutex_unlock(&g->startup_done_mutex);
+ } else {
+ gettimeofday(&start, NULL);
+ }
+
+ /* Parent process: */
+
+
+ for (i = 0; i < g->p.nr_proc; i++) {
+ wpid = waitpid(pids[i], &wait_stat, 0);
+ BUG_ON(wpid < 0);
+ BUG_ON(!WIFEXITED(wait_stat));
+
+ }
+
+ runtime_ns_sum = 0;
+ runtime_ns_min = -1LL;
+
+ for (t = 0; t < g->p.nr_tasks; t++) {
+ u64 thread_runtime_ns = g->threads[t].runtime_ns;
+
+ runtime_ns_sum += thread_runtime_ns;
+ runtime_ns_min = min(thread_runtime_ns, runtime_ns_min);
+ }
+
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+
+ BUG_ON(bench_format != BENCH_FORMAT_DEFAULT);
+
+ tprintf("\n ###\n");
+ tprintf("\n");
+
+ runtime_sec_max = diff.tv_sec * 1000000000.0;
+ runtime_sec_max += diff.tv_usec * 1000.0;
+ runtime_sec_max /= 1e9;
+
+ runtime_sec_min = runtime_ns_min/1e9;
+
+ bytes = g->bytes_done;
+ runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / 1e9;
+
+ if (g->p.measure_convergence) {
+ print_res(name, runtime_sec_max,
+ "secs,", "NUMA-convergence-latency", "secs latency to NUMA-converge");
+ }
+
+ print_res(name, runtime_sec_max,
+ "secs,", "runtime-max/thread", "secs slowest (max) thread-runtime");
+
+ print_res(name, runtime_sec_min,
+ "secs,", "runtime-min/thread", "secs fastest (min) thread-runtime");
+
+ print_res(name, runtime_avg,
+ "secs,", "runtime-avg/thread", "secs average thread-runtime");
+
+ delta_runtime = (runtime_sec_max - runtime_sec_min)/2.0;
+ print_res(name, delta_runtime / runtime_sec_max * 100.0,
+ "%,", "spread-runtime/thread", "% difference between max/avg runtime");
+
+ print_res(name, bytes / g->p.nr_tasks / 1e9,
+ "GB,", "data/thread", "GB data processed, per thread");
+
+ print_res(name, bytes / 1e9,
+ "GB,", "data-total", "GB data processed, total");
+
+ print_res(name, runtime_sec_max * 1e9 / (bytes / g->p.nr_tasks),
+ "nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime");
+
+ print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max,
+ "GB/sec,", "thread-speed", "GB/sec/thread speed");
+
+ print_res(name, bytes / runtime_sec_max / 1e9,
+ "GB/sec,", "total-speed", "GB/sec total speed");
+
+ free(pids);
+
+ deinit();
+
+ return 0;
+}
+
+#define MAX_ARGS 50
+
+static int command_size(const char **argv)
+{
+ int size = 0;
+
+ while (*argv) {
+ size++;
+ argv++;
+ }
+
+ BUG_ON(size >= MAX_ARGS);
+
+ return size;
+}
+
+static void init_params(struct params *p, const char *name, int argc, const char **argv)
+{
+ int i;
+
+ printf("\n # Running %s \"perf bench numa", name);
+
+ for (i = 0; i < argc; i++)
+ printf(" %s", argv[i]);
+
+ printf("\"\n");
+
+ memset(p, 0, sizeof(*p));
+
+ /* Initialize nonzero defaults: */
+
+ p->serialize_startup = 1;
+ p->data_reads = true;
+ p->data_writes = true;
+ p->data_backwards = true;
+ p->data_rand_walk = true;
+ p->nr_loops = -1;
+ p->init_random = true;
+}
+
+static int run_bench_numa(const char *name, const char **argv)
+{
+ int argc = command_size(argv);
+
+ init_params(&p0, name, argc, argv);
+ argc = parse_options(argc, argv, options, bench_numa_usage, 0);
+ if (argc)
+ goto err;
+
+ if (__bench_numa(name))
+ goto err;
+
+ return 0;
+
+err:
+ usage_with_options(numa_usage, options);
+ return -1;
+}
+
+#define OPT_BW_RAM "-s", "20", "-zZq", "--thp", " 1", "--no-data_rand_walk"
+#define OPT_BW_RAM_NOTHP OPT_BW_RAM, "--thp", "-1"
+
+#define OPT_CONV "-s", "100", "-zZ0qcm", "--thp", " 1"
+#define OPT_CONV_NOTHP OPT_CONV, "--thp", "-1"
+
+#define OPT_BW "-s", "20", "-zZ0q", "--thp", " 1"
+#define OPT_BW_NOTHP OPT_BW, "--thp", "-1"
+
+/*
+ * The built-in test-suite executed by "perf bench numa -a".
+ *
+ * (A minimum of 4 nodes and 16 GB of RAM is recommended.)
+ */
+static const char *tests[][MAX_ARGS] = {
+ /* Basic single-stream NUMA bandwidth measurements: */
+ { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ "-C" , "0", "-M", "0", OPT_BW_RAM },
+ { "RAM-bw-local-NOTHP,",
+ "mem", "-p", "1", "-t", "1", "-P", "1024",
+ "-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP },
+ { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ "-C" , "0", "-M", "1", OPT_BW_RAM },
+
+ /* 2-stream NUMA bandwidth measurements: */
+ { "RAM-bw-local-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024",
+ "-C", "0,2", "-M", "0x2", OPT_BW_RAM },
+ { "RAM-bw-remote-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024",
+ "-C", "0,2", "-M", "1x2", OPT_BW_RAM },
+
+ /* Cross-stream NUMA bandwidth measurement: */
+ { "RAM-bw-cross,", "mem", "-p", "2", "-t", "1", "-P", "1024",
+ "-C", "0,8", "-M", "1,0", OPT_BW_RAM },
+
+ /* Convergence latency measurements: */
+ { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV },
+ { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV },
+ { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV },
+ { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
+ { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
+ { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV },
+ { " 4x4-convergence-NOTHP,",
+ "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV_NOTHP },
+ { " 4x6-convergence,", "mem", "-p", "4", "-t", "6", "-P", "1020", OPT_CONV },
+ { " 4x8-convergence,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_CONV },
+ { " 8x4-convergence,", "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV },
+ { " 8x4-convergence-NOTHP,",
+ "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV_NOTHP },
+ { " 3x1-convergence,", "mem", "-p", "3", "-t", "1", "-P", "512", OPT_CONV },
+ { " 4x1-convergence,", "mem", "-p", "4", "-t", "1", "-P", "512", OPT_CONV },
+ { " 8x1-convergence,", "mem", "-p", "8", "-t", "1", "-P", "512", OPT_CONV },
+ { "16x1-convergence,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_CONV },
+ { "32x1-convergence,", "mem", "-p", "32", "-t", "1", "-P", "128", OPT_CONV },
+
+ /* Various NUMA process/thread layout bandwidth measurements: */
+ { " 2x1-bw-process,", "mem", "-p", "2", "-t", "1", "-P", "1024", OPT_BW },
+ { " 3x1-bw-process,", "mem", "-p", "3", "-t", "1", "-P", "1024", OPT_BW },
+ { " 4x1-bw-process,", "mem", "-p", "4", "-t", "1", "-P", "1024", OPT_BW },
+ { " 8x1-bw-process,", "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW },
+ { " 8x1-bw-process-NOTHP,",
+ "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP },
+ { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW },
+
+ { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
+ { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
+ { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
+ { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
+
+ { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
+ { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
+ { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
+ { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
+ { " 4x8-bw-thread-NOTHP,",
+ "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP },
+ { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
+ { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
+
+ { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
+ { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
+
+ { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
+ { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP },
+ { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW },
+ { "numa01-bw-thread-NOTHP,",
+ "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW_NOTHP },
+};
+
+static int bench_all(void)
+{
+ int nr = ARRAY_SIZE(tests);
+ int ret;
+ int i;
+
+ ret = system("echo ' #'; echo ' # Running test on: '$(uname -a); echo ' #'");
+ BUG_ON(ret < 0);
+
+ for (i = 0; i < nr; i++) {
+ if (run_bench_numa(tests[i][0], tests[i] + 1))
+ return -1;
+ }
+
+ printf("\n");
+
+ return 0;
+}
+
+int bench_numa(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+ init_params(&p0, "main,", argc, argv);
+ argc = parse_options(argc, argv, options, bench_numa_usage, 0);
+ if (argc)
+ goto err;
+
+ if (p0.run_all)
+ return bench_all();
+
+ if (__bench_numa(NULL))
+ goto err;
+
+ return 0;
+
+err:
+ usage_with_options(numa_usage, options);
+ return -1;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index afd1255..77298bf 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -35,6 +35,18 @@ struct bench_suite {
/* sentinel: easy for help */
#define suite_all { "all", "Test all benchmark suites", NULL }
+#ifdef LIBNUMA_SUPPORT
+static struct bench_suite numa_suites[] = {
+ { "mem",
+ "Benchmark for NUMA workloads",
+ bench_numa },
+ suite_all,
+ { NULL,
+ NULL,
+ NULL }
+};
+#endif
+
static struct bench_suite sched_suites[] = {
{ "messaging",
"Benchmark for scheduler and IPC mechanisms",
@@ -68,6 +80,11 @@ struct bench_subsys {
};
static struct bench_subsys subsystems[] = {
+#ifdef LIBNUMA_SUPPORT
+ { "numa",
+ "NUMA scheduling and MM behavior",
+ numa_suites },
+#endif
{ "sched",
"scheduler and IPC mechanism",
sched_suites },
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index c746108..46878da 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -17,6 +17,7 @@
#include "util/debug.h"
#include <linux/rbtree.h>
+#include <linux/string.h>
struct alloc_stat;
typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
@@ -618,12 +619,11 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
for (i = 0; i < NUM_AVAIL_SORTS; i++) {
if (!strcmp(avail_sorts[i]->name, tok)) {
- sort = malloc(sizeof(*sort));
+ sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i]));
if (!sort) {
- pr_err("%s: malloc failed\n", __func__);
+ pr_err("%s: memdup failed\n", __func__);
return -1;
}
- memcpy(sort, avail_sorts[i], sizeof(*sort));
list_add_tail(&sort->list, list);
return 0;
}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 1c2ac14..0368a10 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -65,6 +65,10 @@
#define CNTR_NOT_SUPPORTED "<not supported>"
#define CNTR_NOT_COUNTED "<not counted>"
+static void print_stat(int argc, const char **argv);
+static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
+static void print_counter(struct perf_evsel *counter, char *prefix);
+
static struct perf_evlist *evsel_list;
static struct perf_target target = {
@@ -87,6 +91,8 @@ static FILE *output = NULL;
static const char *pre_cmd = NULL;
static const char *post_cmd = NULL;
static bool sync_run = false;
+static unsigned int interval = 0;
+static struct timespec ref_time;
static volatile int done = 0;
@@ -94,6 +100,28 @@ struct perf_stat {
struct stats res_stats[3];
};
+static inline void diff_timespec(struct timespec *r, struct timespec *a,
+ struct timespec *b)
+{
+ r->tv_sec = a->tv_sec - b->tv_sec;
+ if (a->tv_nsec < b->tv_nsec) {
+ r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
+ r->tv_sec--;
+ } else {
+ r->tv_nsec = a->tv_nsec - b->tv_nsec ;
+ }
+}
+
+static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
+{
+ return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
+}
+
+static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
+{
+ return perf_evsel__cpus(evsel)->nr;
+}
+
static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
{
evsel->priv = zalloc(sizeof(struct perf_stat));
@@ -106,14 +134,27 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
evsel->priv = NULL;
}
-static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
+static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
{
- return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
+ void *addr;
+ size_t sz;
+
+ sz = sizeof(*evsel->counts) +
+ (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
+
+ addr = zalloc(sz);
+ if (!addr)
+ return -ENOMEM;
+
+ evsel->prev_raw_counts = addr;
+
+ return 0;
}
-static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
+static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
{
- return perf_evsel__cpus(evsel)->nr;
+ free(evsel->prev_raw_counts);
+ evsel->prev_raw_counts = NULL;
}
static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
@@ -245,16 +286,69 @@ static int read_counter(struct perf_evsel *counter)
return 0;
}
+static void print_interval(void)
+{
+ static int num_print_interval;
+ struct perf_evsel *counter;
+ struct perf_stat *ps;
+ struct timespec ts, rs;
+ char prefix[64];
+
+ if (no_aggr) {
+ list_for_each_entry(counter, &evsel_list->entries, node) {
+ ps = counter->priv;
+ memset(ps->res_stats, 0, sizeof(ps->res_stats));
+ read_counter(counter);
+ }
+ } else {
+ list_for_each_entry(counter, &evsel_list->entries, node) {
+ ps = counter->priv;
+ memset(ps->res_stats, 0, sizeof(ps->res_stats));
+ read_counter_aggr(counter);
+ }
+ }
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ diff_timespec(&rs, &ts, &ref_time);
+ sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
+
+ if (num_print_interval == 0 && !csv_output) {
+ if (no_aggr)
+ fprintf(output, "# time CPU counts events\n");
+ else
+ fprintf(output, "# time counts events\n");
+ }
+
+ if (++num_print_interval == 25)
+ num_print_interval = 0;
+
+ if (no_aggr) {
+ list_for_each_entry(counter, &evsel_list->entries, node)
+ print_counter(counter, prefix);
+ } else {
+ list_for_each_entry(counter, &evsel_list->entries, node)
+ print_counter_aggr(counter, prefix);
+ }
+}
+
static int __run_perf_stat(int argc __maybe_unused, const char **argv)
{
char msg[512];
unsigned long long t0, t1;
struct perf_evsel *counter;
+ struct timespec ts;
int status = 0;
int child_ready_pipe[2], go_pipe[2];
const bool forks = (argc > 0);
char buf;
+ if (interval) {
+ ts.tv_sec = interval / 1000;
+ ts.tv_nsec = (interval % 1000) * 1000000;
+ } else {
+ ts.tv_sec = 1;
+ ts.tv_nsec = 0;
+ }
+
if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
perror("failed to create pipes");
return -1;
@@ -347,14 +441,25 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
* Enable counters and exec the command:
*/
t0 = rdclock();
+ clock_gettime(CLOCK_MONOTONIC, &ref_time);
if (forks) {
close(go_pipe[1]);
+ if (interval) {
+ while (!waitpid(child_pid, &status, WNOHANG)) {
+ nanosleep(&ts, NULL);
+ print_interval();
+ }
+ }
wait(&status);
if (WIFSIGNALED(status))
psignal(WTERMSIG(status), argv[0]);
} else {
- while(!done) sleep(1);
+ while (!done) {
+ nanosleep(&ts, NULL);
+ if (interval)
+ print_interval();
+ }
}
t1 = rdclock();
@@ -440,7 +545,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
if (evsel->cgrp)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
- if (csv_output)
+ if (csv_output || interval)
return;
if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
@@ -654,12 +759,11 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
if (evsel->cgrp)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
- if (csv_output)
+ if (csv_output || interval)
return;
if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
total = avg_stats(&runtime_cycles_stats[cpu]);
-
if (total)
ratio = avg / total;
@@ -753,12 +857,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
* Print out the results of a single counter:
* aggregated counts in system-wide mode
*/
-static void print_counter_aggr(struct perf_evsel *counter)
+static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
{
struct perf_stat *ps = counter->priv;
double avg = avg_stats(&ps->res_stats[0]);
int scaled = counter->counts->scaled;
+ if (prefix)
+ fprintf(output, "%s", prefix);
+
if (scaled == -1) {
fprintf(output, "%*s%s%*s",
csv_output ? 0 : 18,
@@ -801,7 +908,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
* Print out the results of a single counter:
* does not use aggregated count in system-wide
*/
-static void print_counter(struct perf_evsel *counter)
+static void print_counter(struct perf_evsel *counter, char *prefix)
{
u64 ena, run, val;
int cpu;
@@ -810,6 +917,10 @@ static void print_counter(struct perf_evsel *counter)
val = counter->counts->cpu[cpu].val;
ena = counter->counts->cpu[cpu].ena;
run = counter->counts->cpu[cpu].run;
+
+ if (prefix)
+ fprintf(output, "%s", prefix);
+
if (run == 0 || ena == 0) {
fprintf(output, "CPU%*d%s%*s%s%*s",
csv_output ? 0 : -4,
@@ -871,10 +982,10 @@ static void print_stat(int argc, const char **argv)
if (no_aggr) {
list_for_each_entry(counter, &evsel_list->entries, node)
- print_counter(counter);
+ print_counter(counter, NULL);
} else {
list_for_each_entry(counter, &evsel_list->entries, node)
- print_counter_aggr(counter);
+ print_counter_aggr(counter, NULL);
}
if (!csv_output) {
@@ -895,7 +1006,7 @@ static volatile int signr = -1;
static void skip_signal(int signo)
{
- if(child_pid == -1)
+ if ((child_pid == -1) || interval)
done = 1;
signr = signo;
@@ -1115,6 +1226,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
"command to run prior to the measured command"),
OPT_STRING(0, "post", &post_cmd, "command",
"command to run after to the measured command"),
+ OPT_UINTEGER('I', "interval-print", &interval,
+ "print counts at regular interval in ms (>= 100)"),
OPT_END()
};
const char * const stat_usage[] = {
@@ -1215,12 +1328,23 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
usage_with_options(stat_usage, options);
return -1;
}
+ if (interval && interval < 100) {
+ pr_err("print interval must be >= 100ms\n");
+ usage_with_options(stat_usage, options);
+ return -1;
+ }
list_for_each_entry(pos, &evsel_list->entries, node) {
if (perf_evsel__alloc_stat_priv(pos) < 0 ||
perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0)
goto out_free_fd;
}
+ if (interval) {
+ list_for_each_entry(pos, &evsel_list->entries, node) {
+ if (perf_evsel__alloc_prev_raw_counts(pos) < 0)
+ goto out_free_fd;
+ }
+ }
/*
* We dont want to block the signals - that would cause
@@ -1230,6 +1354,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
*/
atexit(sig_atexit);
signal(SIGINT, skip_signal);
+ signal(SIGCHLD, skip_signal);
signal(SIGALRM, skip_signal);
signal(SIGABRT, skip_signal);
@@ -1242,11 +1367,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
status = run_perf_stat(argc, argv);
}
- if (status != -1)
+ if (status != -1 && !interval)
print_stat(argc, argv);
out_free_fd:
- list_for_each_entry(pos, &evsel_list->entries, node)
+ list_for_each_entry(pos, &evsel_list->entries, node) {
perf_evsel__free_stat_priv(pos);
+ perf_evsel__free_counts(pos);
+ perf_evsel__free_prev_raw_counts(pos);
+ }
perf_evlist__delete_maps(evsel_list);
out:
perf_evlist__delete(evsel_list);
diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak
index f5ac774..b4eabb4 100644
--- a/tools/perf/config/feature-tests.mak
+++ b/tools/perf/config/feature-tests.mak
@@ -225,3 +225,14 @@ int main(void)
return on_exit(NULL, NULL);
}
endef
+
+define SOURCE_LIBNUMA
+#include <numa.h>
+#include <numaif.h>
+
+int main(void)
+{
+ numa_available();
+ return 0;
+}
+endef \ No newline at end of file
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index f61dd3f..bdcceb8 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -19,6 +19,11 @@
* permissions. All the event text files are stored there.
*/
+/*
+ * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
+ * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
+ */
+#define __SANE_USERSPACE_TYPES__
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
diff --git a/tools/perf/tests/open-syscall-all-cpus.c b/tools/perf/tests/open-syscall-all-cpus.c
index 9b920a0..b0657a9 100644
--- a/tools/perf/tests/open-syscall-all-cpus.c
+++ b/tools/perf/tests/open-syscall-all-cpus.c
@@ -98,6 +98,7 @@ int test__open_syscall_event_on_all_cpus(void)
}
}
+ perf_evsel__free_counts(evsel);
out_close_fd:
perf_evsel__close_fd(evsel, 1, threads->nr);
out_evsel_delete:
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 6ea66cf..1e8e512 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -96,7 +96,7 @@ int test__PERF_RECORD(void)
err = perf_evlist__prepare_workload(evlist, &opts, argv);
if (err < 0) {
pr_debug("Couldn't run the workload!\n");
- goto out_delete_evlist;
+ goto out_delete_maps;
}
/*
@@ -111,7 +111,7 @@ int test__PERF_RECORD(void)
err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
if (err < 0) {
pr_debug("sched__get_first_possible_cpu: %s\n", strerror(errno));
- goto out_delete_evlist;
+ goto out_delete_maps;
}
cpu = err;
@@ -121,7 +121,7 @@ int test__PERF_RECORD(void)
*/
if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) {
pr_debug("sched_setaffinity: %s\n", strerror(errno));
- goto out_delete_evlist;
+ goto out_delete_maps;
}
/*
@@ -131,7 +131,7 @@ int test__PERF_RECORD(void)
err = perf_evlist__open(evlist);
if (err < 0) {
pr_debug("perf_evlist__open: %s\n", strerror(errno));
- goto out_delete_evlist;
+ goto out_delete_maps;
}
/*
@@ -142,7 +142,7 @@ int test__PERF_RECORD(void)
err = perf_evlist__mmap(evlist, opts.mmap_pages, false);
if (err < 0) {
pr_debug("perf_evlist__mmap: %s\n", strerror(errno));
- goto out_delete_evlist;
+ goto out_delete_maps;
}
/*
@@ -305,6 +305,8 @@ found_exit:
}
out_err:
perf_evlist__munmap(evlist);
+out_delete_maps:
+ perf_evlist__delete_maps(evlist);
out_delete_evlist:
perf_evlist__delete(evlist);
out:
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index a1a8442..7b4c4d2 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -44,7 +44,7 @@ int test__vmlinux_matches_kallsyms(void)
*/
if (machine__create_kernel_maps(&kallsyms) < 0) {
pr_debug("machine__create_kernel_maps ");
- return -1;
+ goto out;
}
/*
@@ -227,5 +227,7 @@ detour:
map__fprintf(pos, stderr);
}
out:
+ machine__exit(&kallsyms);
+ machine__exit(&vmlinux);
return err;
}
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index 588bcb2..809ea463 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -273,6 +273,8 @@ void ui_browser__hide(struct ui_browser *browser __maybe_unused)
{
pthread_mutex_lock(&ui__lock);
ui_helpline__pop();
+ free(browser->helpline);
+ browser->helpline = NULL;
pthread_mutex_unlock(&ui__lock);
}
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 3cf2c3e..5cd13d7 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -476,8 +476,10 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
}
}
- if (kallsyms__parse(filename, &args, find_symbol_cb) <= 0)
+ if (kallsyms__parse(filename, &args, find_symbol_cb) <= 0) {
+ free(event);
return -ENOENT;
+ }
map = machine->vmlinux_maps[MAP__FUNCTION];
size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index e45332d..baa26dd 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -640,6 +640,11 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
}
}
+void perf_evsel__free_counts(struct perf_evsel *evsel)
+{
+ free(evsel->counts);
+}
+
void perf_evsel__exit(struct perf_evsel *evsel)
{
assert(list_empty(&evsel->node));
@@ -659,6 +664,28 @@ void perf_evsel__delete(struct perf_evsel *evsel)
free(evsel);
}
+static inline void compute_deltas(struct perf_evsel *evsel,
+ int cpu,
+ struct perf_counts_values *count)
+{
+ struct perf_counts_values tmp;
+
+ if (!evsel->prev_raw_counts)
+ return;
+
+ if (cpu == -1) {
+ tmp = evsel->prev_raw_counts->aggr;
+ evsel->prev_raw_counts->aggr = *count;
+ } else {
+ tmp = evsel->prev_raw_counts->cpu[cpu];
+ evsel->prev_raw_counts->cpu[cpu] = *count;
+ }
+
+ count->val = count->val - tmp.val;
+ count->ena = count->ena - tmp.ena;
+ count->run = count->run - tmp.run;
+}
+
int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
int cpu, int thread, bool scale)
{
@@ -674,6 +701,8 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
return -errno;
+ compute_deltas(evsel, cpu, &count);
+
if (scale) {
if (count.run == 0)
count.val = 0;
@@ -712,6 +741,8 @@ int __perf_evsel__read(struct perf_evsel *evsel,
}
}
+ compute_deltas(evsel, -1, aggr);
+
evsel->counts->scaled = 0;
if (scale) {
if (aggr->run == 0) {
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index c68d1b8..cbf4232 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -53,6 +53,7 @@ struct perf_evsel {
struct xyarray *sample_id;
u64 *id;
struct perf_counts *counts;
+ struct perf_counts *prev_raw_counts;
int idx;
u32 ids;
struct hists hists;
@@ -116,6 +117,7 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
void perf_evsel__free_fd(struct perf_evsel *evsel);
void perf_evsel__free_id(struct perf_evsel *evsel);
+void perf_evsel__free_counts(struct perf_evsel *evsel);
void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index fccd69d..f6081cb 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -313,7 +313,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
if (is_kallsyms) {
if (symbol_conf.kptr_restrict) {
pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n");
- return 0;
+ err = 0;
+ goto out_free;
}
realname = (char *) name;
} else
@@ -954,6 +955,7 @@ static int write_topo_node(int fd, int node)
}
fclose(fp);
+ fp = NULL;
ret = do_write(fd, &mem_total, sizeof(u64));
if (ret)
@@ -980,7 +982,8 @@ static int write_topo_node(int fd, int node)
ret = do_write_string(fd, buf);
done:
free(buf);
- fclose(fp);
+ if (fp)
+ fclose(fp);
return ret;
}
@@ -2921,16 +2924,22 @@ int perf_event__process_tracing_data(union perf_event *event,
session->repipe);
padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read;
- if (readn(session->fd, buf, padding) < 0)
- die("reading input file");
+ if (readn(session->fd, buf, padding) < 0) {
+ pr_err("%s: reading input file", __func__);
+ return -1;
+ }
if (session->repipe) {
int retw = write(STDOUT_FILENO, buf, padding);
- if (retw <= 0 || retw != padding)
- die("repiping tracing data padding");
+ if (retw <= 0 || retw != padding) {
+ pr_err("%s: repiping tracing data padding", __func__);
+ return -1;
+ }
}
- if (size_read + padding != size)
- die("tracing data size mismatch");
+ if (size_read + padding != size) {
+ pr_err("%s: tracing data size mismatch", __func__);
+ return -1;
+ }
perf_evlist__prepare_tracepoint_events(session->evlist,
session->pevent);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index ff94425..6fcb9de 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -11,6 +11,7 @@
#include "strlist.h"
#include "vdso.h"
#include "build-id.h"
+#include <linux/string.h>
const char *map_type__name[MAP__NR_TYPES] = {
[MAP__FUNCTION] = "Functions",
@@ -29,29 +30,29 @@ static inline int is_no_dso_memory(const char *filename)
!strcmp(filename, "[heap]");
}
-void map__init(struct map *self, enum map_type type,
+void map__init(struct map *map, enum map_type type,
u64 start, u64 end, u64 pgoff, struct dso *dso)
{
- self->type = type;
- self->start = start;
- self->end = end;
- self->pgoff = pgoff;
- self->dso = dso;
- self->map_ip = map__map_ip;
- self->unmap_ip = map__unmap_ip;
- RB_CLEAR_NODE(&self->rb_node);
- self->groups = NULL;
- self->referenced = false;
- self->erange_warned = false;
+ map->type = type;
+ map->start = start;
+ map->end = end;
+ map->pgoff = pgoff;
+ map->dso = dso;
+ map->map_ip = map__map_ip;
+ map->unmap_ip = map__unmap_ip;
+ RB_CLEAR_NODE(&map->rb_node);
+ map->groups = NULL;
+ map->referenced = false;
+ map->erange_warned = false;
}
struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
u64 pgoff, u32 pid, char *filename,
enum map_type type)
{
- struct map *self = malloc(sizeof(*self));
+ struct map *map = malloc(sizeof(*map));
- if (self != NULL) {
+ if (map != NULL) {
char newfilename[PATH_MAX];
struct dso *dso;
int anon, no_dso, vdso;
@@ -74,10 +75,10 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
if (dso == NULL)
goto out_delete;
- map__init(self, type, start, start + len, pgoff, dso);
+ map__init(map, type, start, start + len, pgoff, dso);
if (anon || no_dso) {
- self->map_ip = self->unmap_ip = identity__map_ip;
+ map->map_ip = map->unmap_ip = identity__map_ip;
/*
* Set memory without DSO as loaded. All map__find_*
@@ -85,12 +86,12 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
* unnecessary map__load warning.
*/
if (no_dso)
- dso__set_loaded(dso, self->type);
+ dso__set_loaded(dso, map->type);
}
}
- return self;
+ return map;
out_delete:
- free(self);
+ free(map);
return NULL;
}
@@ -113,48 +114,48 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
return map;
}
-void map__delete(struct map *self)
+void map__delete(struct map *map)
{
- free(self);
+ free(map);
}
-void map__fixup_start(struct map *self)
+void map__fixup_start(struct map *map)
{
- struct rb_root *symbols = &self->dso->symbols[self->type];
+ struct rb_root *symbols = &map->dso->symbols[map->type];
struct rb_node *nd = rb_first(symbols);
if (nd != NULL) {
struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
- self->start = sym->start;
+ map->start = sym->start;
}
}
-void map__fixup_end(struct map *self)
+void map__fixup_end(struct map *map)
{
- struct rb_root *symbols = &self->dso->symbols[self->type];
+ struct rb_root *symbols = &map->dso->symbols[map->type];
struct rb_node *nd = rb_last(symbols);
if (nd != NULL) {
struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
- self->end = sym->end;
+ map->end = sym->end;
}
}
#define DSO__DELETED "(deleted)"
-int map__load(struct map *self, symbol_filter_t filter)
+int map__load(struct map *map, symbol_filter_t filter)
{
- const char *name = self->dso->long_name;
+ const char *name = map->dso->long_name;
int nr;
- if (dso__loaded(self->dso, self->type))
+ if (dso__loaded(map->dso, map->type))
return 0;
- nr = dso__load(self->dso, self, filter);
+ nr = dso__load(map->dso, map, filter);
if (nr < 0) {
- if (self->dso->has_build_id) {
+ if (map->dso->has_build_id) {
char sbuild_id[BUILD_ID_SIZE * 2 + 1];
- build_id__sprintf(self->dso->build_id,
- sizeof(self->dso->build_id),
+ build_id__sprintf(map->dso->build_id,
+ sizeof(map->dso->build_id),
sbuild_id);
pr_warning("%s with build id %s not found",
name, sbuild_id);
@@ -184,43 +185,36 @@ int map__load(struct map *self, symbol_filter_t filter)
* Only applies to the kernel, as its symtabs aren't relative like the
* module ones.
*/
- if (self->dso->kernel)
- map__reloc_vmlinux(self);
+ if (map->dso->kernel)
+ map__reloc_vmlinux(map);
return 0;
}
-struct symbol *map__find_symbol(struct map *self, u64 addr,
+struct symbol *map__find_symbol(struct map *map, u64 addr,
symbol_filter_t filter)
{
- if (map__load(self, filter) < 0)
+ if (map__load(map, filter) < 0)
return NULL;
- return dso__find_symbol(self->dso, self->type, addr);
+ return dso__find_symbol(map->dso, map->type, addr);
}
-struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name,
symbol_filter_t filter)
{
- if (map__load(self, filter) < 0)
+ if (map__load(map, filter) < 0)
return NULL;
- if (!dso__sorted_by_name(self->dso, self->type))
- dso__sort_by_name(self->dso, self->type);
+ if (!dso__sorted_by_name(map->dso, map->type))
+ dso__sort_by_name(map->dso, map->type);
- return dso__find_symbol_by_name(self->dso, self->type, name);
+ return dso__find_symbol_by_name(map->dso, map->type, name);
}
-struct map *map__clone(struct map *self)
+struct map *map__clone(struct map *map)
{
- struct map *map = malloc(sizeof(*self));
-
- if (!map)
- return NULL;
-
- memcpy(map, self, sizeof(*self));
-
- return map;
+ return memdup(map, sizeof(*map));
}
int map__overlap(struct map *l, struct map *r)
@@ -237,10 +231,10 @@ int map__overlap(struct map *l, struct map *r)
return 0;
}
-size_t map__fprintf(struct map *self, FILE *fp)
+size_t map__fprintf(struct map *map, FILE *fp)
{
return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s\n",
- self->start, self->end, self->pgoff, self->dso->name);
+ map->start, map->end, map->pgoff, map->dso->name);
}
size_t map__fprintf_dsoname(struct map *map, FILE *fp)
@@ -528,9 +522,9 @@ static u64 map__reloc_unmap_ip(struct map *map, u64 ip)
return ip - (s64)map->pgoff;
}
-void map__reloc_vmlinux(struct map *self)
+void map__reloc_vmlinux(struct map *map)
{
- struct kmap *kmap = map__kmap(self);
+ struct kmap *kmap = map__kmap(map);
s64 reloc;
if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->unrelocated_addr)
@@ -542,9 +536,9 @@ void map__reloc_vmlinux(struct map *self)
if (!reloc)
return;
- self->map_ip = map__reloc_map_ip;
- self->unmap_ip = map__reloc_unmap_ip;
- self->pgoff = reloc;
+ map->map_ip = map__reloc_map_ip;
+ map->unmap_ip = map__reloc_unmap_ip;
+ map->pgoff = reloc;
}
void maps__insert(struct rb_root *maps, struct map *map)
@@ -567,9 +561,9 @@ void maps__insert(struct rb_root *maps, struct map *map)
rb_insert_color(&map->rb_node, maps);
}
-void maps__remove(struct rb_root *self, struct map *map)
+void maps__remove(struct rb_root *maps, struct map *map)
{
- rb_erase(&map->rb_node, self);
+ rb_erase(&map->rb_node, maps);
}
struct map *maps__find(struct rb_root *maps, u64 ip)
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index bcb39e2..a887f2c 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -57,9 +57,9 @@ struct map_groups {
struct machine *machine;
};
-static inline struct kmap *map__kmap(struct map *self)
+static inline struct kmap *map__kmap(struct map *map)
{
- return (struct kmap *)(self + 1);
+ return (struct kmap *)(map + 1);
}
static inline u64 map__map_ip(struct map *map, u64 ip)
@@ -85,27 +85,27 @@ struct symbol;
typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
-void map__init(struct map *self, enum map_type type,
+void map__init(struct map *map, enum map_type type,
u64 start, u64 end, u64 pgoff, struct dso *dso);
struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
u64 pgoff, u32 pid, char *filename,
enum map_type type);
struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
-void map__delete(struct map *self);
-struct map *map__clone(struct map *self);
+void map__delete(struct map *map);
+struct map *map__clone(struct map *map);
int map__overlap(struct map *l, struct map *r);
-size_t map__fprintf(struct map *self, FILE *fp);
+size_t map__fprintf(struct map *map, FILE *fp);
size_t map__fprintf_dsoname(struct map *map, FILE *fp);
-int map__load(struct map *self, symbol_filter_t filter);
-struct symbol *map__find_symbol(struct map *self,
+int map__load(struct map *map, symbol_filter_t filter);
+struct symbol *map__find_symbol(struct map *map,
u64 addr, symbol_filter_t filter);
-struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name,
symbol_filter_t filter);
-void map__fixup_start(struct map *self);
-void map__fixup_end(struct map *self);
+void map__fixup_start(struct map *map);
+void map__fixup_end(struct map *map);
-void map__reloc_vmlinux(struct map *self);
+void map__reloc_vmlinux(struct map *map);
size_t __map_groups__fprintf_maps(struct map_groups *mg,
enum map_type type, int verbose, FILE *fp);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 7ad6239..8333661 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -249,7 +249,7 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf,
size_t size,
unsigned int width __maybe_unused)
{
- FILE *fp;
+ FILE *fp = NULL;
char cmd[PATH_MAX + 2], *path = self->srcline, *nl;
size_t line_len;
@@ -270,7 +270,6 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf,
if (getline(&path, &line_len, fp) < 0 || !line_len)
goto out_ip;
- fclose(fp);
self->srcline = strdup(path);
if (self->srcline == NULL)
goto out_ip;
@@ -280,8 +279,12 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf,
*nl = '\0';
path = self->srcline;
out_path:
+ if (fp)
+ pclose(fp);
return repsep_snprintf(bf, size, "%s", path);
out_ip:
+ if (fp)
+ pclose(fp);
return repsep_snprintf(bf, size, "%-#*llx", BITS_PER_LONG / 4, self->ip);
}
diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c
index 155d8b7..55433aa4 100644
--- a/tools/perf/util/strlist.c
+++ b/tools/perf/util/strlist.c
@@ -35,11 +35,11 @@ out_delete:
return NULL;
}
-static void str_node__delete(struct str_node *self, bool dupstr)
+static void str_node__delete(struct str_node *snode, bool dupstr)
{
if (dupstr)
- free((void *)self->s);
- free(self);
+ free((void *)snode->s);
+ free(snode);
}
static
@@ -59,12 +59,12 @@ static int strlist__node_cmp(struct rb_node *rb_node, const void *entry)
return strcmp(snode->s, str);
}
-int strlist__add(struct strlist *self, const char *new_entry)
+int strlist__add(struct strlist *slist, const char *new_entry)
{
- return rblist__add_node(&self->rblist, new_entry);
+ return rblist__add_node(&slist->rblist, new_entry);
}
-int strlist__load(struct strlist *self, const char *filename)
+int strlist__load(struct strlist *slist, const char *filename)
{
char entry[1024];
int err;
@@ -80,7 +80,7 @@ int strlist__load(struct strlist *self, const char *filename)
continue;
entry[len - 1] = '\0';
- err = strlist__add(self, entry);
+ err = strlist__add(slist, entry);
if (err != 0)
goto out;
}
@@ -107,56 +107,56 @@ struct str_node *strlist__find(struct strlist *slist, const char *entry)
return snode;
}
-static int strlist__parse_list_entry(struct strlist *self, const char *s)
+static int strlist__parse_list_entry(struct strlist *slist, const char *s)
{
if (strncmp(s, "file://", 7) == 0)
- return strlist__load(self, s + 7);
+ return strlist__load(slist, s + 7);
- return strlist__add(self, s);
+ return strlist__add(slist, s);
}
-int strlist__parse_list(struct strlist *self, const char *s)
+int strlist__parse_list(struct strlist *slist, const char *s)
{
char *sep;
int err;
while ((sep = strchr(s, ',')) != NULL) {
*sep = '\0';
- err = strlist__parse_list_entry(self, s);
+ err = strlist__parse_list_entry(slist, s);
*sep = ',';
if (err != 0)
return err;
s = sep + 1;
}
- return *s ? strlist__parse_list_entry(self, s) : 0;
+ return *s ? strlist__parse_list_entry(slist, s) : 0;
}
-struct strlist *strlist__new(bool dupstr, const char *slist)
+struct strlist *strlist__new(bool dupstr, const char *list)
{
- struct strlist *self = malloc(sizeof(*self));
+ struct strlist *slist = malloc(sizeof(*slist));
- if (self != NULL) {
- rblist__init(&self->rblist);
- self->rblist.node_cmp = strlist__node_cmp;
- self->rblist.node_new = strlist__node_new;
- self->rblist.node_delete = strlist__node_delete;
+ if (slist != NULL) {
+ rblist__init(&slist->rblist);
+ slist->rblist.node_cmp = strlist__node_cmp;
+ slist->rblist.node_new = strlist__node_new;
+ slist->rblist.node_delete = strlist__node_delete;
- self->dupstr = dupstr;
- if (slist && strlist__parse_list(self, slist) != 0)
+ slist->dupstr = dupstr;
+ if (slist && strlist__parse_list(slist, list) != 0)
goto out_error;
}
- return self;
+ return slist;
out_error:
- free(self);
+ free(slist);
return NULL;
}
-void strlist__delete(struct strlist *self)
+void strlist__delete(struct strlist *slist)
{
- if (self != NULL)
- rblist__delete(&self->rblist);
+ if (slist != NULL)
+ rblist__delete(&slist->rblist);
}
struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx)
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
index dd9f922..5c7f870 100644
--- a/tools/perf/util/strlist.h
+++ b/tools/perf/util/strlist.h
@@ -17,34 +17,34 @@ struct strlist {
};
struct strlist *strlist__new(bool dupstr, const char *slist);
-void strlist__delete(struct strlist *self);
+void strlist__delete(struct strlist *slist);
-void strlist__remove(struct strlist *self, struct str_node *sn);
-int strlist__load(struct strlist *self, const char *filename);
-int strlist__add(struct strlist *self, const char *str);
+void strlist__remove(struct strlist *slist, struct str_node *sn);
+int strlist__load(struct strlist *slist, const char *filename);
+int strlist__add(struct strlist *slist, const char *str);
-struct str_node *strlist__entry(const struct strlist *self, unsigned int idx);
-struct str_node *strlist__find(struct strlist *self, const char *entry);
+struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx);
+struct str_node *strlist__find(struct strlist *slist, const char *entry);
-static inline bool strlist__has_entry(struct strlist *self, const char *entry)
+static inline bool strlist__has_entry(struct strlist *slist, const char *entry)
{
- return strlist__find(self, entry) != NULL;
+ return strlist__find(slist, entry) != NULL;
}
-static inline bool strlist__empty(const struct strlist *self)
+static inline bool strlist__empty(const struct strlist *slist)
{
- return rblist__empty(&self->rblist);
+ return rblist__empty(&slist->rblist);
}
-static inline unsigned int strlist__nr_entries(const struct strlist *self)
+static inline unsigned int strlist__nr_entries(const struct strlist *slist)
{
- return rblist__nr_entries(&self->rblist);
+ return rblist__nr_entries(&slist->rblist);
}
/* For strlist iteration */
-static inline struct str_node *strlist__first(struct strlist *self)
+static inline struct str_node *strlist__first(struct strlist *slist)
{
- struct rb_node *rn = rb_first(&self->rblist.entries);
+ struct rb_node *rn = rb_first(&slist->rblist.entries);
return rn ? rb_entry(rn, struct str_node, rb_node) : NULL;
}
static inline struct str_node *strlist__next(struct str_node *sn)
@@ -59,21 +59,21 @@ static inline struct str_node *strlist__next(struct str_node *sn)
/**
* strlist_for_each - iterate over a strlist
* @pos: the &struct str_node to use as a loop cursor.
- * @self: the &struct strlist for loop.
+ * @slist: the &struct strlist for loop.
*/
-#define strlist__for_each(pos, self) \
- for (pos = strlist__first(self); pos; pos = strlist__next(pos))
+#define strlist__for_each(pos, slist) \
+ for (pos = strlist__first(slist); pos; pos = strlist__next(pos))
/**
* strlist_for_each_safe - iterate over a strlist safe against removal of
* str_node
* @pos: the &struct str_node to use as a loop cursor.
* @n: another &struct str_node to use as temporary storage.
- * @self: the &struct strlist for loop.
+ * @slist: the &struct strlist for loop.
*/
-#define strlist__for_each_safe(pos, n, self) \
- for (pos = strlist__first(self), n = strlist__next(pos); pos;\
+#define strlist__for_each_safe(pos, n, slist) \
+ for (pos = strlist__first(slist), n = strlist__next(pos); pos;\
pos = n, n = strlist__next(n))
-int strlist__parse_list(struct strlist *self, const char *s);
+int strlist__parse_list(struct strlist *slist, const char *s);
#endif /* __PERF_STRLIST_H */
OpenPOWER on IntegriCloud