diff options
Diffstat (limited to 'tools/perf')
-rw-r--r-- | tools/perf/Makefile | 10 | ||||
-rw-r--r-- | tools/perf/builtin-annotate.c | 262 | ||||
-rw-r--r-- | tools/perf/builtin-record.c | 163 | ||||
-rw-r--r-- | tools/perf/builtin-report.c | 439 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 308 | ||||
-rw-r--r-- | tools/perf/builtin-top.c | 24 | ||||
-rw-r--r-- | tools/perf/perf.h | 13 | ||||
-rw-r--r-- | tools/perf/types.h | 17 | ||||
-rw-r--r-- | tools/perf/util/ctype.c | 17 | ||||
-rw-r--r-- | tools/perf/util/parse-events.c | 14 | ||||
-rw-r--r-- | tools/perf/util/string.c | 2 | ||||
-rw-r--r-- | tools/perf/util/string.h | 4 | ||||
-rw-r--r-- | tools/perf/util/symbol.c | 20 | ||||
-rw-r--r-- | tools/perf/util/symbol.h | 16 | ||||
-rw-r--r-- | tools/perf/util/util.h | 18 |
15 files changed, 998 insertions, 329 deletions
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 0cbd5d6..36d7eef 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -157,10 +157,15 @@ uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not') uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') +# If we're on a 64-bit kernel, use -m64 +ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M)) + M64 := -m64 +endif + # CFLAGS and LDFLAGS are for the users to override from the command line. -CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6 -LDFLAGS = -lpthread -lrt -lelf +CFLAGS = $(M64) -ggdb3 -Wall -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -Werror -O6 +LDFLAGS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) STRIP ?= strip @@ -285,6 +290,7 @@ LIB_FILE=libperf.a LIB_H += ../../include/linux/perf_counter.h LIB_H += perf.h +LIB_H += types.h LIB_H += util/list.h LIB_H += util/rbtree.h LIB_H += util/levenshtein.h diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index b1ed5f7..7e58e3a 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -25,6 +25,10 @@ #define SHOW_USER 2 #define SHOW_HV 4 +#define MIN_GREEN 0.5 +#define MIN_RED 5.0 + + static char const *input_name = "perf.data"; static char *vmlinux = "vmlinux"; @@ -39,40 +43,42 @@ static int dump_trace = 0; static int verbose; +static int print_line; + static unsigned long page_size; static unsigned long mmap_window = 32; struct ip_event { struct perf_event_header header; - __u64 ip; - __u32 pid, tid; + u64 ip; + u32 pid, tid; }; struct mmap_event { struct perf_event_header header; - __u32 pid, tid; - __u64 start; - __u64 len; - __u64 pgoff; + u32 pid, tid; + u64 start; + u64 len; + u64 pgoff; char filename[PATH_MAX]; }; struct comm_event { struct perf_event_header header; - __u32 pid, tid; + u32 pid, tid; char comm[16]; }; struct fork_event { struct perf_event_header header; - __u32 pid, ppid; + u32 pid, ppid; }; struct period_event { struct perf_event_header header; - __u64 time; - __u64 id; - __u64 sample_period; + u64 time; + u64 id; + u64 sample_period; }; typedef union event_union { @@ -84,6 +90,13 @@ typedef union event_union { struct period_event period; } event_t; + +struct sym_ext { + struct rb_node node; + double percent; + char *path; +}; + static LIST_HEAD(dsos); static struct dso *kernel_dso; static struct dso *vdso; @@ -145,7 +158,7 @@ static void dsos__fprintf(FILE *fp) dso__fprintf(pos, fp); } -static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip) +static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) { return dso__find_symbol(kernel_dso, ip); } @@ -178,19 +191,19 @@ static int load_kernel(void) struct map { struct list_head node; - __u64 start; - __u64 end; - __u64 pgoff; - __u64 (*map_ip)(struct map *, __u64); + u64 start; + u64 end; + u64 pgoff; + u64 (*map_ip)(struct map *, u64); struct dso *dso; }; -static __u64 map__map_ip(struct map *map, __u64 ip) +static u64 map__map_ip(struct map *map, u64 ip) { return ip - map->start + map->pgoff; } -static __u64 vdso__map_ip(struct map *map, __u64 ip) +static u64 vdso__map_ip(struct map *map, u64 ip) { return ip; } @@ -373,7 +386,7 @@ static int thread__fork(struct thread *self, struct thread *parent) return 0; } -static struct map *thread__find_map(struct thread *self, __u64 ip) +static struct map *thread__find_map(struct thread *self, u64 ip) { struct map *pos; @@ -414,7 +427,7 @@ struct hist_entry { struct map *map; struct dso *dso; struct symbol *sym; - __u64 ip; + u64 ip; char level; uint32_t count; @@ -519,7 +532,7 @@ sort__dso_print(FILE *fp, struct hist_entry *self) if (self->dso) return fprintf(fp, "%-25s", self->dso->name); - return fprintf(fp, "%016llx ", (__u64)self->ip); + return fprintf(fp, "%016llx ", (u64)self->ip); } static struct sort_entry sort_dso = { @@ -533,7 +546,7 @@ static struct sort_entry sort_dso = { static int64_t sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) { - __u64 ip_l, ip_r; + u64 ip_l, ip_r; if (left->sym == right->sym) return 0; @@ -550,13 +563,13 @@ sort__sym_print(FILE *fp, struct hist_entry *self) size_t ret = 0; if (verbose) - ret += fprintf(fp, "%#018llx ", (__u64)self->ip); + ret += fprintf(fp, "%#018llx ", (u64)self->ip); if (self->sym) { ret += fprintf(fp, "[%c] %s", self->dso == kernel_dso ? 'k' : '.', self->sym->name); } else { - ret += fprintf(fp, "%#016llx", (__u64)self->ip); + ret += fprintf(fp, "%#016llx", (u64)self->ip); } return ret; @@ -647,7 +660,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) /* * collect histogram counts */ -static void hist_hit(struct hist_entry *he, __u64 ip) +static void hist_hit(struct hist_entry *he, u64 ip) { unsigned int sym_size, offset; struct symbol *sym = he->sym; @@ -676,7 +689,7 @@ static void hist_hit(struct hist_entry *he, __u64 ip) static int hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, __u64 ip, char level) + struct symbol *sym, u64 ip, char level) { struct rb_node **p = &hist.rb_node; struct rb_node *parent = NULL; @@ -848,7 +861,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) int show = 0; struct dso *dso = NULL; struct thread *thread = threads__findnew(event->ip.pid); - __u64 ip = event->ip.ip; + u64 ip = event->ip.ip; struct map *map = NULL; dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n", @@ -1030,13 +1043,33 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return 0; } +static char *get_color(double percent) +{ + char *color = PERF_COLOR_NORMAL; + + /* + * We color high-overhead entries in red, mid-overhead + * entries in green - and keep the low overhead places + * normal: + */ + if (percent >= MIN_RED) + color = PERF_COLOR_RED; + else { + if (percent > MIN_GREEN) + color = PERF_COLOR_GREEN; + } + return color; +} + static int -parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) +parse_line(FILE *file, struct symbol *sym, u64 start, u64 len) { char *line = NULL, *tmp, *tmp2; + static const char *prev_line; + static const char *prev_color; unsigned int offset; size_t line_len; - __u64 line_ip; + u64 line_ip; int ret; char *c; @@ -1073,27 +1106,36 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) } if (line_ip != -1) { + const char *path = NULL; unsigned int hits = 0; double percent = 0.0; - char *color = PERF_COLOR_NORMAL; + char *color; + struct sym_ext *sym_ext = sym->priv; offset = line_ip - start; if (offset < len) hits = sym->hist[offset]; - if (sym->hist_sum) + if (offset < len && sym_ext) { + path = sym_ext[offset].path; + percent = sym_ext[offset].percent; + } else if (sym->hist_sum) percent = 100.0 * hits / sym->hist_sum; + color = get_color(percent); + /* - * We color high-overhead entries in red, mid-overhead - * entries in green - and keep the low overhead places - * normal: + * Also color the filename and line if needed, with + * the same color than the percentage. Don't print it + * twice for close colored ip with the same filename:line */ - if (percent >= 5.0) - color = PERF_COLOR_RED; - else { - if (percent > 0.5) - color = PERF_COLOR_GREEN; + if (path) { + if (!prev_line || strcmp(prev_line, path) + || color != prev_color) { + color_fprintf(stdout, color, " %s", path); + prev_line = path; + prev_color = color; + } } color_fprintf(stdout, color, " %7.2f", percent); @@ -1109,10 +1151,125 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) return 0; } +static struct rb_root root_sym_ext; + +static void insert_source_line(struct sym_ext *sym_ext) +{ + struct sym_ext *iter; + struct rb_node **p = &root_sym_ext.rb_node; + struct rb_node *parent = NULL; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct sym_ext, node); + + if (sym_ext->percent > iter->percent) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&sym_ext->node, parent, p); + rb_insert_color(&sym_ext->node, &root_sym_ext); +} + +static void free_source_line(struct symbol *sym, int len) +{ + struct sym_ext *sym_ext = sym->priv; + int i; + + if (!sym_ext) + return; + + for (i = 0; i < len; i++) + free(sym_ext[i].path); + free(sym_ext); + + sym->priv = NULL; + root_sym_ext = RB_ROOT; +} + +/* Get the filename:line for the colored entries */ +static void +get_source_line(struct symbol *sym, u64 start, int len, char *filename) +{ + int i; + char cmd[PATH_MAX * 2]; + struct sym_ext *sym_ext; + + if (!sym->hist_sum) + return; + + sym->priv = calloc(len, sizeof(struct sym_ext)); + if (!sym->priv) + return; + + sym_ext = sym->priv; + + for (i = 0; i < len; i++) { + char *path = NULL; + size_t line_len; + u64 offset; + FILE *fp; + + sym_ext[i].percent = 100.0 * sym->hist[i] / sym->hist_sum; + if (sym_ext[i].percent <= 0.5) + continue; + + offset = start + i; + sprintf(cmd, "addr2line -e %s %016llx", filename, offset); + fp = popen(cmd, "r"); + if (!fp) + continue; + + if (getline(&path, &line_len, fp) < 0 || !line_len) + goto next; + + sym_ext[i].path = malloc(sizeof(char) * line_len + 1); + if (!sym_ext[i].path) + goto next; + + strcpy(sym_ext[i].path, path); + insert_source_line(&sym_ext[i]); + + next: + pclose(fp); + } +} + +static void print_summary(char *filename) +{ + struct sym_ext *sym_ext; + struct rb_node *node; + + printf("\nSorted summary for file %s\n", filename); + printf("----------------------------------------------\n\n"); + + if (RB_EMPTY_ROOT(&root_sym_ext)) { + printf(" Nothing higher than %1.1f%%\n", MIN_GREEN); + return; + } + + node = rb_first(&root_sym_ext); + while (node) { + double percent; + char *color; + char *path; + + sym_ext = rb_entry(node, struct sym_ext, node); + percent = sym_ext->percent; + color = get_color(percent); + path = sym_ext->path; + + color_fprintf(stdout, color, " %7.2f %s", percent, path); + node = rb_next(node); + } +} + static void annotate_sym(struct dso *dso, struct symbol *sym) { char *filename = dso->name; - __u64 start, end, len; + u64 start, end, len; char command[PATH_MAX*2]; FILE *file; @@ -1121,13 +1278,6 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) if (dso == kernel_dso) filename = vmlinux; - printf("\n------------------------------------------------\n"); - printf(" Percent | Source code & Disassembly of %s\n", filename); - printf("------------------------------------------------\n"); - - if (verbose >= 2) - printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); - start = sym->obj_start; if (!start) start = sym->start; @@ -1135,7 +1285,19 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) end = start + sym->end - sym->start + 1; len = sym->end - sym->start; - sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (__u64)start, (__u64)end, filename); + if (print_line) { + get_source_line(sym, start, len, filename); + print_summary(filename); + } + + printf("\n\n------------------------------------------------\n"); + printf(" Percent | Source code & Disassembly of %s\n", filename); + printf("------------------------------------------------\n"); + + if (verbose >= 2) + printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); + + sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (u64)start, (u64)end, filename); if (verbose >= 3) printf("doing: %s\n", command); @@ -1150,6 +1312,8 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) } pclose(file); + if (print_line) + free_source_line(sym, len); } static void find_annotations(void) @@ -1308,6 +1472,8 @@ static const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), + OPT_BOOLEAN('l', "print-line", &print_line, + "print matching source lines (may be slow)"), OPT_END() }; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0f5771f..d7ebbd7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -37,33 +37,37 @@ static pid_t target_pid = -1; static int inherit = 1; static int force = 0; static int append_file = 0; +static int call_graph = 0; static int verbose = 0; static long samples; static struct timeval last_read; static struct timeval this_read; -static __u64 bytes_written; +static u64 bytes_written; static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; static int nr_poll; static int nr_cpu; +static int file_new = 1; +static struct perf_file_header file_header; + struct mmap_event { struct perf_event_header header; - __u32 pid; - __u32 tid; - __u64 start; - __u64 len; - __u64 pgoff; + u32 pid; + u32 tid; + u64 start; + u64 len; + u64 pgoff; char filename[PATH_MAX]; }; struct comm_event { struct perf_event_header header; - __u32 pid; - __u32 tid; + u32 pid; + u32 tid; char comm[16]; }; @@ -77,10 +81,10 @@ struct mmap_data { static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; -static unsigned int mmap_read_head(struct mmap_data *md) +static unsigned long mmap_read_head(struct mmap_data *md) { struct perf_counter_mmap_page *pc = md->base; - int head; + long head; head = pc->data_head; rmb(); @@ -88,6 +92,32 @@ static unsigned int mmap_read_head(struct mmap_data *md) return head; } +static void mmap_write_tail(struct mmap_data *md, unsigned long tail) +{ + struct perf_counter_mmap_page *pc = md->base; + + /* + * ensure all reads are done before we write the tail out. + */ + /* mb(); */ + pc->data_tail = tail; +} + +static void write_output(void *buf, size_t size) +{ + while (size) { + int ret = write(output, buf, size); + + if (ret < 0) + die("failed to write"); + + size -= ret; + buf += ret; + + bytes_written += ret; + } +} + static void mmap_read(struct mmap_data *md) { unsigned int head = mmap_read_head(md); @@ -108,7 +138,7 @@ static void mmap_read(struct mmap_data *md) * In either case, truncate and restart at head. */ diff = head - old; - if (diff > md->mask / 2 || diff < 0) { + if (diff < 0) { struct timeval iv; unsigned long msecs; @@ -136,36 +166,17 @@ static void mmap_read(struct mmap_data *md) size = md->mask + 1 - (old & md->mask); old += size; - while (size) { - int ret = write(output, buf, size); - - if (ret < 0) - die("failed to write"); - - size -= ret; - buf += ret; - - bytes_written += ret; - } + write_output(buf, size); } buf = &data[old & md->mask]; size = head - old; old += size; - while (size) { - int ret = write(output, buf, size); - - if (ret < 0) - die("failed to write"); - - size -= ret; - buf += ret; - - bytes_written += ret; - } + write_output(buf, size); md->prev = old; + mmap_write_tail(md, old); } static volatile int done = 0; @@ -191,7 +202,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full) struct comm_event comm_ev; char filename[PATH_MAX]; char bf[BUFSIZ]; - int fd, ret; + int fd; size_t size; char *field, *sep; DIR *tasks; @@ -201,8 +212,12 @@ static void pid_synthesize_comm_event(pid_t pid, int full) fd = open(filename, O_RDONLY); if (fd < 0) { - fprintf(stderr, "couldn't open %s\n", filename); - exit(EXIT_FAILURE); + /* + * We raced with a task exiting - just return: + */ + if (verbose) + fprintf(stderr, "couldn't open %s\n", filename); + return; } if (read(fd, bf, sizeof(bf)) < 0) { fprintf(stderr, "couldn't read %s\n", filename); @@ -223,17 +238,13 @@ static void pid_synthesize_comm_event(pid_t pid, int full) comm_ev.pid = pid; comm_ev.header.type = PERF_EVENT_COMM; - size = ALIGN(size, sizeof(__u64)); + size = ALIGN(size, sizeof(u64)); comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); if (!full) { comm_ev.tid = pid; - ret = write(output, &comm_ev, comm_ev.header.size); - if (ret < 0) { - perror("failed to write"); - exit(-1); - } + write_output(&comm_ev, comm_ev.header.size); return; } @@ -248,11 +259,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full) comm_ev.tid = pid; - ret = write(output, &comm_ev, comm_ev.header.size); - if (ret < 0) { - perror("failed to write"); - exit(-1); - } + write_output(&comm_ev, comm_ev.header.size); } closedir(tasks); return; @@ -272,8 +279,12 @@ static void pid_synthesize_mmap_samples(pid_t pid) fp = fopen(filename, "r"); if (fp == NULL) { - fprintf(stderr, "couldn't open %s\n", filename); - exit(EXIT_FAILURE); + /* + * We raced with a task exiting - just return: + */ + if (verbose) + fprintf(stderr, "couldn't open %s\n", filename); + return; } while (1) { char bf[BUFSIZ], *pbf = bf; @@ -304,17 +315,14 @@ static void pid_synthesize_mmap_samples(pid_t pid) size = strlen(execname); execname[size - 1] = '\0'; /* Remove \n */ memcpy(mmap_ev.filename, execname, size); - size = ALIGN(size, sizeof(__u64)); + size = ALIGN(size, sizeof(u64)); mmap_ev.len -= mmap_ev.start; mmap_ev.header.size = (sizeof(mmap_ev) - (sizeof(mmap_ev.filename) - size)); mmap_ev.pid = pid; mmap_ev.tid = pid; - if (write(output, &mmap_ev, mmap_ev.header.size) < 0) { - perror("failed to write"); - exit(-1); - } + write_output(&mmap_ev, mmap_ev.header.size); } } @@ -351,11 +359,25 @@ static void create_counter(int counter, int cpu, pid_t pid) int track = 1; attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; + if (freq) { attr->sample_type |= PERF_SAMPLE_PERIOD; attr->freq = 1; attr->sample_freq = freq; } + + if (call_graph) + attr->sample_type |= PERF_SAMPLE_CALLCHAIN; + + if (file_new) { + file_header.sample_type = attr->sample_type; + } else { + if (file_header.sample_type != attr->sample_type) { + fprintf(stderr, "incompatible append\n"); + exit(-1); + } + } + attr->mmap = track; attr->comm = track; attr->inherit = (cpu < 0) && inherit; @@ -410,7 +432,7 @@ try_again: mmap_array[nr_cpu][counter].prev = 0; mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, - PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0); + PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0); if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { error("failed to mmap with %d (%s)\n", errno, strerror(errno)); exit(-1); @@ -435,6 +457,14 @@ static void open_counters(int cpu, pid_t pid) nr_cpu++; } +static void atexit_header(void) +{ + file_header.data_size += bytes_written; + + if (pwrite(output, &file_header, sizeof(file_header), 0) == -1) + perror("failed to write on file headers"); +} + static int __cmd_record(int argc, const char **argv) { int i, counter; @@ -448,6 +478,10 @@ static int __cmd_record(int argc, const char **argv) assert(nr_cpus <= MAX_NR_CPUS); assert(nr_cpus >= 0); + atexit(sig_atexit); + signal(SIGCHLD, sig_handler); + signal(SIGINT, sig_handler); + if (!stat(output_name, &st) && !force && !append_file) { fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", output_name); @@ -456,7 +490,7 @@ static int __cmd_record(int argc, const char **argv) flags = O_CREAT|O_RDWR; if (append_file) - flags |= O_APPEND; + file_new = 0; else flags |= O_TRUNC; @@ -466,15 +500,22 @@ static int __cmd_record(int argc, const char **argv) exit(-1); } + if (!file_new) { + if (read(output, &file_header, sizeof(file_header)) == -1) { + perror("failed to read file headers"); + exit(-1); + } + + lseek(output, file_header.data_size, SEEK_CUR); + } + + atexit(atexit_header); + if (!system_wide) { open_counters(-1, target_pid != -1 ? target_pid : getpid()); } else for (i = 0; i < nr_cpus; i++) open_counters(i, target_pid); - atexit(sig_atexit); - signal(SIGCHLD, sig_handler); - signal(SIGINT, sig_handler); - if (target_pid == -1 && argc) { pid = fork(); if (pid < 0) @@ -555,6 +596,8 @@ static const struct option options[] = { "profile at this frequency"), OPT_INTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), + OPT_BOOLEAN('g', "call-graph", &call_graph, + "do call-graph (stack chain/backtrace) recording"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_END() diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 82fa93b..5eb5566 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -36,45 +36,65 @@ static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; static int dump_trace = 0; #define dprintf(x...) do { if (dump_trace) printf(x); } while (0) +#define cdprintf(x...) do { if (dump_trace) color_fprintf(stdout, color, x); } while (0) static int verbose; +#define eprintf(x...) do { if (verbose) fprintf(stderr, x); } while (0) + static int full_paths; static unsigned long page_size; static unsigned long mmap_window = 32; +static char default_parent_pattern[] = "^sys_|^do_page_fault"; +static char *parent_pattern = default_parent_pattern; +static regex_t parent_regex; + +static int exclude_other = 1; + struct ip_event { struct perf_event_header header; - __u64 ip; - __u32 pid, tid; - __u64 period; + u64 ip; + u32 pid, tid; + unsigned char __more_data[]; +}; + +struct ip_callchain { + u64 nr; + u64 ips[0]; }; struct mmap_event { struct perf_event_header header; - __u32 pid, tid; - __u64 start; - __u64 len; - __u64 pgoff; + u32 pid, tid; + u64 start; + u64 len; + u64 pgoff; char filename[PATH_MAX]; }; struct comm_event { struct perf_event_header header; - __u32 pid, tid; + u32 pid, tid; char comm[16]; }; struct fork_event { struct perf_event_header header; - __u32 pid, ppid; + u32 pid, ppid; }; struct period_event { struct perf_event_header header; - __u64 time; - __u64 id; - __u64 sample_period; + u64 time; + u64 id; + u64 sample_period; +}; + +struct lost_event { + struct perf_event_header header; + u64 id; + u64 lost; }; typedef union event_union { @@ -84,6 +104,7 @@ typedef union event_union { struct comm_event comm; struct fork_event fork; struct period_event period; + struct lost_event lost; } event_t; static LIST_HEAD(dsos); @@ -119,15 +140,11 @@ static struct dso *dsos__findnew(const char *name) nr = dso__load(dso, NULL, verbose); if (nr < 0) { - if (verbose) - fprintf(stderr, "Failed to open: %s\n", name); + eprintf("Failed to open: %s\n", name); goto out_delete_dso; } - if (!nr && verbose) { - fprintf(stderr, - "No symbols found in: %s, maybe install a debug package?\n", - name); - } + if (!nr) + eprintf("No symbols found in: %s, maybe install a debug package?\n", name); dsos__add(dso); @@ -146,7 +163,7 @@ static void dsos__fprintf(FILE *fp) dso__fprintf(pos, fp); } -static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip) +static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) { return dso__find_symbol(kernel_dso, ip); } @@ -193,19 +210,19 @@ static int strcommon(const char *pathname) struct map { struct list_head node; - __u64 start; - __u64 end; - __u64 pgoff; - __u64 (*map_ip)(struct map *, __u64); + u64 start; + u64 end; + u64 pgoff; + u64 (*map_ip)(struct map *, u64); struct dso *dso; }; -static __u64 map__map_ip(struct map *map, __u64 ip) +static u64 map__map_ip(struct map *map, u64 ip) { return ip - map->start + map->pgoff; } -static __u64 vdso__map_ip(struct map *map, __u64 ip) +static u64 vdso__map_ip(struct map *map, u64 ip) { return ip; } @@ -412,7 +429,7 @@ static int thread__fork(struct thread *self, struct thread *parent) return 0; } -static struct map *thread__find_map(struct thread *self, __u64 ip) +static struct map *thread__find_map(struct thread *self, u64 ip) { struct map *pos; @@ -453,10 +470,11 @@ struct hist_entry { struct map *map; struct dso *dso; struct symbol *sym; - __u64 ip; + struct symbol *parent; + u64 ip; char level; - __u64 count; + u64 count; }; /* @@ -473,6 +491,16 @@ struct sort_entry { size_t (*print)(FILE *fp, struct hist_entry *); }; +static int64_t cmp_null(void *l, void *r) +{ + if (!l && !r) + return 0; + else if (!l) + return -1; + else + return 1; +} + /* --sort pid */ static int64_t @@ -507,14 +535,8 @@ sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) char *comm_l = left->thread->comm; char *comm_r = right->thread->comm; - if (!comm_l || !comm_r) { - if (!comm_l && !comm_r) - return 0; - else if (!comm_l) - return -1; - else - return 1; - } + if (!comm_l || !comm_r) + return cmp_null(comm_l, comm_r); return strcmp(comm_l, comm_r); } @@ -540,14 +562,8 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) struct dso *dso_l = left->dso; struct dso *dso_r = right->dso; - if (!dso_l || !dso_r) { - if (!dso_l && !dso_r) - return 0; - else if (!dso_l) - return -1; - else - return 1; - } + if (!dso_l || !dso_r) + return cmp_null(dso_l, dso_r); return strcmp(dso_l->name, dso_r->name); } @@ -558,7 +574,7 @@ sort__dso_print(FILE *fp, struct hist_entry *self) if (self->dso) return fprintf(fp, "%-25s", self->dso->name); - return fprintf(fp, "%016llx ", (__u64)self->ip); + return fprintf(fp, "%016llx ", (u64)self->ip); } static struct sort_entry sort_dso = { @@ -572,7 +588,7 @@ static struct sort_entry sort_dso = { static int64_t sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) { - __u64 ip_l, ip_r; + u64 ip_l, ip_r; if (left->sym == right->sym) return 0; @@ -589,13 +605,13 @@ sort__sym_print(FILE *fp, struct hist_entry *self) size_t ret = 0; if (verbose) - ret += fprintf(fp, "%#018llx ", (__u64)self->ip); + ret += fprintf(fp, "%#018llx ", (u64)self->ip); if (self->sym) { ret += fprintf(fp, "[%c] %s", self->dso == kernel_dso ? 'k' : '.', self->sym->name); } else { - ret += fprintf(fp, "%#016llx", (__u64)self->ip); + ret += fprintf(fp, "%#016llx", (u64)self->ip); } return ret; @@ -607,7 +623,38 @@ static struct sort_entry sort_sym = { .print = sort__sym_print, }; +/* --sort parent */ + +static int64_t +sort__parent_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct symbol *sym_l = left->parent; + struct symbol *sym_r = right->parent; + + if (!sym_l || !sym_r) + return cmp_null(sym_l, sym_r); + + return strcmp(sym_l->name, sym_r->name); +} + +static size_t +sort__parent_print(FILE *fp, struct hist_entry *self) +{ + size_t ret = 0; + + ret += fprintf(fp, "%-20s", self->parent ? self->parent->name : "[other]"); + + return ret; +} + +static struct sort_entry sort_parent = { + .header = "Parent symbol ", + .cmp = sort__parent_cmp, + .print = sort__parent_print, +}; + static int sort__need_collapse = 0; +static int sort__has_parent = 0; struct sort_dimension { char *name; @@ -620,6 +667,7 @@ static struct sort_dimension sort_dimensions[] = { { .name = "comm", .entry = &sort_comm, }, { .name = "dso", .entry = &sort_dso, }, { .name = "symbol", .entry = &sort_sym, }, + { .name = "parent", .entry = &sort_parent, }, }; static LIST_HEAD(hist_entry__sort_list); @@ -640,6 +688,19 @@ static int sort_dimension__add(char *tok) if (sd->entry->collapse) sort__need_collapse = 1; + if (sd->entry == &sort_parent) { + int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); + if (ret) { + char err[BUFSIZ]; + + regerror(ret, &parent_regex, err, sizeof(err)); + fprintf(stderr, "Invalid regex: %s\n%s", + parent_pattern, err); + exit(-1); + } + sort__has_parent = 1; + } + list_add_tail(&sd->entry->list, &hist_entry__sort_list); sd->taken = 1; @@ -684,11 +745,14 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) } static size_t -hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) +hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) { struct sort_entry *se; size_t ret; + if (exclude_other && !self->parent) + return 0; + if (total_samples) { double percent = self->count * 100.0 / total_samples; char *color = PERF_COLOR_NORMAL; @@ -711,6 +775,9 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) ret = fprintf(fp, "%12Ld ", self->count); list_for_each_entry(se, &hist_entry__sort_list, list) { + if (exclude_other && (se == &sort_parent)) + continue; + fprintf(fp, " "); ret += se->print(fp, self); } @@ -721,12 +788,72 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) } /* + * + */ + +static struct symbol * +resolve_symbol(struct thread *thread, struct map **mapp, + struct dso **dsop, u64 *ipp) +{ + struct dso *dso = dsop ? *dsop : NULL; + struct map *map = mapp ? *mapp : NULL; + uint64_t ip = *ipp; + + if (!thread) + return NULL; + + if (dso) + goto got_dso; + + if (map) + goto got_map; + + map = thread__find_map(thread, ip); + if (map != NULL) { + if (mapp) + *mapp = map; +got_map: + ip = map->map_ip(map, ip); + *ipp = ip; + + dso = map->dso; + } else { + /* + * If this is outside of all known maps, + * and is a negative address, try to look it + * up in the kernel dso, as it might be a + * vsyscall (which executes in user-mode): + */ + if ((long long)ip < 0) + dso = kernel_dso; + } + dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>"); + + if (dsop) + *dsop = dso; + + if (!dso) + return NULL; +got_dso: + return dso->find_symbol(dso, ip); +} + +static int call__match(struct symbol *sym) +{ + if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0)) + return 1; + + return 0; +} + +/* * collect histogram counts */ static int hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, __u64 ip, char level, __u64 count) + struct symbol *sym, u64 ip, struct ip_callchain *chain, + char level, u64 count) { struct rb_node **p = &hist.rb_node; struct rb_node *parent = NULL; @@ -739,9 +866,41 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, .ip = ip, .level = level, .count = count, + .parent = NULL, }; int cmp; + if (sort__has_parent && chain) { + u64 context = PERF_CONTEXT_MAX; + int i; + + for (i = 0; i < chain->nr; i++) { + u64 ip = chain->ips[i]; + struct dso *dso = NULL; + struct symbol *sym; + + if (ip >= PERF_CONTEXT_MAX) { + context = ip; + continue; + } + + switch (context) { + case PERF_CONTEXT_KERNEL: + dso = kernel_dso; + break; + default: + break; + } + + sym = resolve_symbol(thread, NULL, &dso, &ip); + + if (sym && call__match(sym)) { + entry.parent = sym; + break; + } + } + } + while (*p != NULL) { parent = *p; he = rb_entry(parent, struct hist_entry, rb_node); @@ -873,7 +1032,7 @@ static void output__resort(void) } } -static size_t output__fprintf(FILE *fp, __u64 total_samples) +static size_t output__fprintf(FILE *fp, u64 total_samples) { struct hist_entry *pos; struct sort_entry *se; @@ -882,18 +1041,24 @@ static size_t output__fprintf(FILE *fp, __u64 total_samples) fprintf(fp, "\n"); fprintf(fp, "#\n"); - fprintf(fp, "# (%Ld samples)\n", (__u64)total_samples); + fprintf(fp, "# (%Ld samples)\n", (u64)total_samples); fprintf(fp, "#\n"); fprintf(fp, "# Overhead"); - list_for_each_entry(se, &hist_entry__sort_list, list) + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (exclude_other && (se == &sort_parent)) + continue; fprintf(fp, " %s", se->header); + } fprintf(fp, "\n"); fprintf(fp, "# ........"); list_for_each_entry(se, &hist_entry__sort_list, list) { int i; + if (exclude_other && (se == &sort_parent)) + continue; + fprintf(fp, " "); for (i = 0; i < strlen(se->header); i++) fprintf(fp, "."); @@ -907,7 +1072,8 @@ static size_t output__fprintf(FILE *fp, __u64 total_samples) ret += hist_entry__fprintf(fp, pos, total_samples); } - if (!strcmp(sort_order, default_sort_order)) { + if (sort_order == default_sort_order && + parent_pattern == default_parent_pattern) { fprintf(fp, "#\n"); fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n"); fprintf(fp, "#\n"); @@ -932,7 +1098,21 @@ static unsigned long total = 0, total_mmap = 0, total_comm = 0, total_fork = 0, - total_unknown = 0; + total_unknown = 0, + total_lost = 0; + +static int validate_chain(struct ip_callchain *chain, event_t *event) +{ + unsigned int chain_size; + + chain_size = event->header.size; + chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event; + + if (chain->nr*sizeof(u64) > chain_size) + return -1; + + return 0; +} static int process_overflow_event(event_t *event, unsigned long offset, unsigned long head) @@ -941,12 +1121,16 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) int show = 0; struct dso *dso = NULL; struct thread *thread = threads__findnew(event->ip.pid); - __u64 ip = event->ip.ip; - __u64 period = 1; + u64 ip = event->ip.ip; + u64 period = 1; struct map *map = NULL; + void *more_data = event->ip.__more_data; + struct ip_callchain *chain = NULL; - if (event->header.type & PERF_SAMPLE_PERIOD) - period = event->ip.period; + if (event->header.type & PERF_SAMPLE_PERIOD) { + period = *(u64 *)more_data; + more_data += sizeof(u64); + } dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", (void *)(offset + head), @@ -956,10 +1140,28 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) (void *)(long)ip, (long long)period); + if (event->header.type & PERF_SAMPLE_CALLCHAIN) { + int i; + + chain = (void *)more_data; + + dprintf("... chain: nr:%Lu\n", chain->nr); + + if (validate_chain(chain, event) < 0) { + eprintf("call-chain problem with event, skipping it.\n"); + return 0; + } + + if (dump_trace) { + for (i = 0; i < chain->nr; i++) + dprintf("..... %2d: %016Lx\n", i, chain->ips[i]); + } + } + dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); if (thread == NULL) { - fprintf(stderr, "problem processing %d event, skipping it.\n", + eprintf("problem processing %d event, skipping it.\n", event->header.type); return -1; } @@ -977,22 +1179,6 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) show = SHOW_USER; level = '.'; - map = thread__find_map(thread, ip); - if (map != NULL) { - ip = map->map_ip(map, ip); - dso = map->dso; - } else { - /* - * If this is outside of all known maps, - * and is a negative address, try to look it - * up in the kernel dso, as it might be a - * vsyscall (which executes in user-mode): - */ - if ((long long)ip < 0) - dso = kernel_dso; - } - dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>"); - } else { show = SHOW_HV; level = 'H'; @@ -1000,14 +1186,10 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) } if (show & show_mask) { - struct symbol *sym = NULL; - - if (dso) - sym = dso->find_symbol(dso, ip); + struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); - if (hist_entry__add(thread, map, dso, sym, ip, level, period)) { - fprintf(stderr, - "problem incrementing symbol count, skipping event\n"); + if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { + eprintf("problem incrementing symbol count, skipping event\n"); return -1; } } @@ -1096,8 +1278,60 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head) } static int +process_lost_event(event_t *event, unsigned long offset, unsigned long head) +{ + dprintf("%p [%p]: PERF_EVENT_LOST: id:%Ld: lost:%Ld\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->lost.id, + event->lost.lost); + + total_lost += event->lost.lost; + + return 0; +} + +static void trace_event(event_t *event) +{ + unsigned char *raw_event = (void *)event; + char *color = PERF_COLOR_BLUE; + int i, j; + + if (!dump_trace) + return; + + dprintf("."); + cdprintf("\n. ... raw event: size %d bytes\n", event->header.size); + + for (i = 0; i < event->header.size; i++) { + if ((i & 15) == 0) { + dprintf("."); + cdprintf(" %04x: ", i); + } + + cdprintf(" %02x", raw_event[i]); + + if (((i & 15) == 15) || i == event->header.size-1) { + cdprintf(" "); + for (j = 0; j < 15-(i & 15); j++) + cdprintf(" "); + for (j = 0; j < (i & 15); j++) { + if (isprint(raw_event[i-15+j])) + cdprintf("%c", raw_event[i-15+j]); + else + cdprintf("."); + } + cdprintf("\n"); + } + } + dprintf(".\n"); +} + +static int process_event(event_t *event, unsigned long offset, unsigned long head) { + trace_event(event); + if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) return process_overflow_event(event, offset, head); @@ -1113,6 +1347,10 @@ process_event(event_t *event, unsigned long offset, unsigned long head) case PERF_EVENT_PERIOD: return process_period_event(event, offset, head); + + case PERF_EVENT_LOST: + return process_lost_event(event, offset, head); + /* * We dont process them right now but they are fine: */ @@ -1128,11 +1366,13 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return 0; } +static struct perf_file_header file_header; + static int __cmd_report(void) { int ret, rc = EXIT_FAILURE; unsigned long offset = 0; - unsigned long head = 0; + unsigned long head = sizeof(file_header); struct stat stat; event_t *event; uint32_t size; @@ -1160,6 +1400,17 @@ static int __cmd_report(void) exit(0); } + if (read(input, &file_header, sizeof(file_header)) == -1) { + perror("failed to read file headers"); + exit(-1); + } + + if (sort__has_parent && + !(file_header.sample_type & PERF_SAMPLE_CALLCHAIN)) { + fprintf(stderr, "selected --sort parent, but no callchain data\n"); + exit(-1); + } + if (load_kernel() < 0) { perror("failed to load kernel symbols"); return EXIT_FAILURE; @@ -1204,7 +1455,7 @@ more: size = event->header.size; - dprintf("%p [%p]: event: %d\n", + dprintf("\n%p [%p]: event: %d\n", (void *)(offset + head), (void *)(long)event->header.size, event->header.type); @@ -1231,9 +1482,13 @@ more: head += size; + if (offset + head >= sizeof(file_header) + file_header.data_size) + goto done; + if (offset + head < stat.st_size) goto more; +done: rc = EXIT_SUCCESS; close(input); @@ -1241,6 +1496,7 @@ more: dprintf(" mmap events: %10ld\n", total_mmap); dprintf(" comm events: %10ld\n", total_comm); dprintf(" fork events: %10ld\n", total_fork); + dprintf(" lost events: %10ld\n", total_lost); dprintf(" unknown events: %10ld\n", total_unknown); if (dump_trace) @@ -1273,9 +1529,13 @@ static const struct option options[] = { "dump raw trace in ASCII"), OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"), + "sort by key(s): pid, comm, dso, symbol, parent"), OPT_BOOLEAN('P', "full-paths", &full_paths, "Don't shorten the pathnames taking into account the cwd"), + OPT_STRING('p', "parent", &parent_pattern, "regex", + "regex filter to identify parent, see: '--sort parent'"), + OPT_BOOLEAN('x', "exclude-other", &exclude_other, + "Only display entries with parent-match"), OPT_END() }; @@ -1304,6 +1564,11 @@ int cmd_report(int argc, const char **argv, const char *prefix) setup_sorting(); + if (parent_pattern != default_parent_pattern) + sort_dimension__add("parent"); + else + exclude_other = 0; + /* * Any (unrecognized) arguments left? */ diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c43e4a9..6d3eeac 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -43,6 +43,7 @@ #include "util/parse-events.h" #include <sys/prctl.h> +#include <math.h> static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { @@ -79,12 +80,34 @@ static const unsigned int default_count[] = { 10000, }; -static __u64 event_res[MAX_COUNTERS][3]; -static __u64 event_scaled[MAX_COUNTERS]; +#define MAX_RUN 100 -static __u64 runtime_nsecs; -static __u64 walltime_nsecs; -static __u64 runtime_cycles; +static int run_count = 1; +static int run_idx = 0; + +static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; +static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; + +//static u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; + + +static u64 runtime_nsecs[MAX_RUN]; +static u64 walltime_nsecs[MAX_RUN]; +static u64 runtime_cycles[MAX_RUN]; + +static u64 event_res_avg[MAX_COUNTERS][3]; +static u64 event_res_noise[MAX_COUNTERS][3]; + +static u64 event_scaled_avg[MAX_COUNTERS]; + +static u64 runtime_nsecs_avg; +static u64 runtime_nsecs_noise; + +static u64 walltime_nsecs_avg; +static u64 walltime_nsecs_noise; + +static u64 runtime_cycles_avg; +static u64 runtime_cycles_noise; static void create_perf_stat_counter(int counter) { @@ -135,12 +158,12 @@ static inline int nsec_counter(int counter) */ static void read_counter(int counter) { - __u64 *count, single_count[3]; + u64 *count, single_count[3]; ssize_t res; int cpu, nv; int scaled; - count = event_res[counter]; + count = event_res[run_idx][counter]; count[0] = count[1] = count[2] = 0; @@ -149,8 +172,10 @@ static void read_counter(int counter) if (fd[cpu][counter] < 0) continue; - res = read(fd[cpu][counter], single_count, nv * sizeof(__u64)); - assert(res == nv * sizeof(__u64)); + res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); + assert(res == nv * sizeof(u64)); + close(fd[cpu][counter]); + fd[cpu][counter] = -1; count[0] += single_count[0]; if (scale) { @@ -162,13 +187,13 @@ static void read_counter(int counter) scaled = 0; if (scale) { if (count[2] == 0) { - event_scaled[counter] = -1; + event_scaled[run_idx][counter] = -1; count[0] = 0; return; } if (count[2] < count[1]) { - event_scaled[counter] = 1; + event_scaled[run_idx][counter] = 1; count[0] = (unsigned long long) ((double)count[0] * count[1] / count[2] + 0.5); } @@ -178,10 +203,94 @@ static void read_counter(int counter) */ if (attrs[counter].type == PERF_TYPE_SOFTWARE && attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) - runtime_nsecs = count[0]; + runtime_nsecs[run_idx] = count[0]; if (attrs[counter].type == PERF_TYPE_HARDWARE && attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) - runtime_cycles = count[0]; + runtime_cycles[run_idx] = count[0]; +} + +static int run_perf_stat(int argc, const char **argv) +{ + unsigned long long t0, t1; + int status = 0; + int counter; + int pid; + + if (!system_wide) + nr_cpus = 1; + + for (counter = 0; counter < nr_counters; counter++) + create_perf_stat_counter(counter); + + /* + * Enable counters and exec the command: + */ + t0 = rdclock(); + prctl(PR_TASK_PERF_COUNTERS_ENABLE); + + if ((pid = fork()) < 0) + perror("failed to fork"); + + if (!pid) { + if (execvp(argv[0], (char **)argv)) { + perror(argv[0]); + exit(-1); + } + } + + wait(&status); + + prctl(PR_TASK_PERF_COUNTERS_DISABLE); + t1 = rdclock(); + + walltime_nsecs[run_idx] = t1 - t0; + + for (counter = 0; counter < nr_counters; counter++) + read_counter(counter); + + return WEXITSTATUS(status); +} + +static void print_noise(u64 *count, u64 *noise) +{ + if (run_count > 1) + fprintf(stderr, " ( +- %7.3f%% )", + (double)noise[0]/(count[0]+1)*100.0); +} + +static void nsec_printout(int counter, u64 *count, u64 *noise) +{ + double msecs = (double)count[0] / 1000000; + + fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); + + if (attrs[counter].type == PERF_TYPE_SOFTWARE && + attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { + + if (walltime_nsecs_avg) + fprintf(stderr, " # %10.3f CPUs ", + (double)count[0] / (double)walltime_nsecs_avg); + } + print_noise(count, noise); +} + +static void abs_printout(int counter, u64 *count, u64 *noise) +{ + fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); + + if (runtime_cycles_avg && + attrs[counter].type == PERF_TYPE_HARDWARE && + attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { + + fprintf(stderr, " # %10.3f IPC ", + (double)count[0] / (double)runtime_cycles_avg); + } else { + if (runtime_nsecs_avg) { + fprintf(stderr, " # %10.3f M/sec", + (double)count[0]/runtime_nsecs_avg*1000.0); + } + } + print_noise(count, noise); } /* @@ -189,11 +298,12 @@ static void read_counter(int counter) */ static void print_counter(int counter) { - __u64 *count; + u64 *count, *noise; int scaled; - count = event_res[counter]; - scaled = event_scaled[counter]; + count = event_res_avg[counter]; + noise = event_res_noise[counter]; + scaled = event_scaled_avg[counter]; if (scaled == -1) { fprintf(stderr, " %14s %-20s\n", @@ -201,75 +311,107 @@ static void print_counter(int counter) return; } - if (nsec_counter(counter)) { - double msecs = (double)count[0] / 1000000; - - fprintf(stderr, " %14.6f %-20s", - msecs, event_name(counter)); - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { + if (nsec_counter(counter)) + nsec_printout(counter, count, noise); + else + abs_printout(counter, count, noise); - if (walltime_nsecs) - fprintf(stderr, " # %11.3f CPU utilization factor", - (double)count[0] / (double)walltime_nsecs); - } - } else { - fprintf(stderr, " %14Ld %-20s", - count[0], event_name(counter)); - if (runtime_nsecs) - fprintf(stderr, " # %11.3f M/sec", - (double)count[0]/runtime_nsecs*1000.0); - if (runtime_cycles && - attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { - - fprintf(stderr, " # %1.3f per cycle", - (double)count[0] / (double)runtime_cycles); - } - } if (scaled) fprintf(stderr, " (scaled from %.2f%%)", (double) count[2] / count[1] * 100); + fprintf(stderr, "\n"); } -static int do_perf_stat(int argc, const char **argv) +/* + * normalize_noise noise values down to stddev: + */ +static void normalize_noise(u64 *val) { - unsigned long long t0, t1; - int counter; - int status; - int pid; - int i; + double res; - if (!system_wide) - nr_cpus = 1; + res = (double)*val / (run_count * sqrt((double)run_count)); - for (counter = 0; counter < nr_counters; counter++) - create_perf_stat_counter(counter); + *val = (u64)res; +} - /* - * Enable counters and exec the command: - */ - t0 = rdclock(); - prctl(PR_TASK_PERF_COUNTERS_ENABLE); +static void update_avg(const char *name, int idx, u64 *avg, u64 *val) +{ + *avg += *val; - if ((pid = fork()) < 0) - perror("failed to fork"); + if (verbose > 1) + fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val); +} +/* + * Calculate the averages and noises: + */ +static void calc_avg(void) +{ + int i, j; + + if (verbose > 1) + fprintf(stderr, "\n"); + + for (i = 0; i < run_count; i++) { + update_avg("runtime", 0, &runtime_nsecs_avg, runtime_nsecs + i); + update_avg("walltime", 0, &walltime_nsecs_avg, walltime_nsecs + i); + update_avg("runtime_cycles", 0, &runtime_cycles_avg, runtime_cycles + i); + + for (j = 0; j < nr_counters; j++) { + update_avg("counter/0", j, + event_res_avg[j]+0, event_res[i][j]+0); + update_avg("counter/1", j, + event_res_avg[j]+1, event_res[i][j]+1); + update_avg("counter/2", j, + event_res_avg[j]+2, event_res[i][j]+2); + update_avg("scaled", j, + event_scaled_avg + j, event_scaled[i]+j); + } + } + runtime_nsecs_avg /= run_count; + walltime_nsecs_avg /= run_count; + runtime_cycles_avg /= run_count; + + for (j = 0; j < nr_counters; j++) { + event_res_avg[j][0] /= run_count; + event_res_avg[j][1] /= run_count; + event_res_avg[j][2] /= run_count; + } - if (!pid) { - if (execvp(argv[0], (char **)argv)) { - perror(argv[0]); - exit(-1); + for (i = 0; i < run_count; i++) { + runtime_nsecs_noise += + abs((s64)(runtime_nsecs[i] - runtime_nsecs_avg)); + walltime_nsecs_noise += + abs((s64)(walltime_nsecs[i] - walltime_nsecs_avg)); + runtime_cycles_noise += + abs((s64)(runtime_cycles[i] - runtime_cycles_avg)); + + for (j = 0; j < nr_counters; j++) { + event_res_noise[j][0] += + abs((s64)(event_res[i][j][0] - event_res_avg[j][0])); + event_res_noise[j][1] += + abs((s64)(event_res[i][j][1] - event_res_avg[j][1])); + event_res_noise[j][2] += + abs((s64)(event_res[i][j][2] - event_res_avg[j][2])); } } - while (wait(&status) >= 0) - ; + normalize_noise(&runtime_nsecs_noise); + normalize_noise(&walltime_nsecs_noise); + normalize_noise(&runtime_cycles_noise); - prctl(PR_TASK_PERF_COUNTERS_DISABLE); - t1 = rdclock(); + for (j = 0; j < nr_counters; j++) { + normalize_noise(&event_res_noise[j][0]); + normalize_noise(&event_res_noise[j][1]); + normalize_noise(&event_res_noise[j][2]); + } +} - walltime_nsecs = t1 - t0; +static void print_stat(int argc, const char **argv) +{ + int i, counter; + + calc_avg(); fflush(stdout); @@ -279,22 +421,19 @@ static int do_perf_stat(int argc, const char **argv) for (i = 1; i < argc; i++) fprintf(stderr, " %s", argv[i]); - fprintf(stderr, "\':\n"); - fprintf(stderr, "\n"); - - for (counter = 0; counter < nr_counters; counter++) - read_counter(counter); + fprintf(stderr, "\'"); + if (run_count > 1) + fprintf(stderr, " (%d runs)", run_count); + fprintf(stderr, ":\n\n"); for (counter = 0; counter < nr_counters; counter++) print_counter(counter); fprintf(stderr, "\n"); - fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", - (double)(t1-t0)/1e6); + fprintf(stderr, " %14.9f seconds time elapsed.\n", + (double)walltime_nsecs_avg/1e9); fprintf(stderr, "\n"); - - return 0; } static volatile int signr = -1; @@ -332,11 +471,15 @@ static const struct option options[] = { "scale/normalize counters"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), + OPT_INTEGER('r', "repeat", &run_count, + "repeat command and print average + stddev (max: 100)"), OPT_END() }; int cmd_stat(int argc, const char **argv, const char *prefix) { + int status; + page_size = sysconf(_SC_PAGE_SIZE); memcpy(attrs, default_attrs, sizeof(attrs)); @@ -344,6 +487,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix) argc = parse_options(argc, argv, options, stat_usage, 0); if (!argc) usage_with_options(stat_usage, options); + if (run_count <= 0 || run_count > MAX_RUN) + usage_with_options(stat_usage, options); if (!nr_counters) nr_counters = 8; @@ -363,5 +508,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix) signal(SIGALRM, skip_signal); signal(SIGABRT, skip_signal); - return do_perf_stat(argc, argv); + status = 0; + for (run_idx = 0; run_idx < run_count; run_idx++) { + if (run_count != 1 && verbose) + fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1); + status = run_perf_stat(argc, argv); + } + + print_stat(argc, argv); + + return status; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index fe338d3c..5352b5e 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -54,7 +54,7 @@ static int system_wide = 0; static int default_interval = 100000; -static __u64 count_filter = 5; +static u64 count_filter = 5; static int print_entries = 15; static int target_pid = -1; @@ -79,8 +79,8 @@ static int dump_symtab; * Symbols */ -static __u64 min_ip; -static __u64 max_ip = -1ll; +static u64 min_ip; +static u64 max_ip = -1ll; struct sym_entry { struct rb_node rb_node; @@ -194,7 +194,7 @@ static void print_sym_table(void) 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); if (nr_counters == 1) { - printf("%Ld", attrs[0].sample_period); + printf("%Ld", (u64)attrs[0].sample_period); if (freq) printf("Hz "); else @@ -372,7 +372,7 @@ out_delete_dso: /* * Binary search in the histogram table and record the hit: */ -static void record_ip(__u64 ip, int counter) +static void record_ip(u64 ip, int counter) { struct symbol *sym = dso__find_symbol(kernel_dso, ip); @@ -392,7 +392,7 @@ static void record_ip(__u64 ip, int counter) samples--; } -static void process_event(__u64 ip, int counter) +static void process_event(u64 ip, int counter) { samples++; @@ -463,15 +463,15 @@ static void mmap_read_counter(struct mmap_data *md) for (; old != head;) { struct ip_event { struct perf_event_header header; - __u64 ip; - __u32 pid, target_pid; + u64 ip; + u32 pid, target_pid; }; struct mmap_event { struct perf_event_header header; - __u32 pid, target_pid; - __u64 start; - __u64 len; - __u64 pgoff; + u32 pid, target_pid; + u64 start; + u64 len; + u64 pgoff; char filename[PATH_MAX]; }; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 87a1aca..ceb68aa 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -13,12 +13,19 @@ #define cpu_relax() asm volatile ("" ::: "memory"); #endif +#ifdef __s390__ +#include "../../arch/s390/include/asm/unistd.h" +#define rmb() asm volatile("bcr 15,0" ::: "memory") +#define cpu_relax() asm volatile("" ::: "memory"); +#endif + #include <time.h> #include <unistd.h> #include <sys/types.h> #include <sys/syscall.h> #include "../../include/linux/perf_counter.h" +#include "types.h" /* * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all @@ -65,4 +72,10 @@ sys_perf_counter_open(struct perf_counter_attr *attr, #define MAX_COUNTERS 256 #define MAX_NR_CPUS 256 +struct perf_file_header { + u64 version; + u64 sample_type; + u64 data_size; +}; + #endif diff --git a/tools/perf/types.h b/tools/perf/types.h new file mode 100644 index 0000000..5e75f90 --- /dev/null +++ b/tools/perf/types.h @@ -0,0 +1,17 @@ +#ifndef _PERF_TYPES_H +#define _PERF_TYPES_H + +/* + * We define u64 as unsigned long long for every architecture + * so that we can print it with %Lx without getting warnings. + */ +typedef unsigned long long u64; +typedef signed long long s64; +typedef unsigned int u32; +typedef signed int s32; +typedef unsigned short u16; +typedef signed short s16; +typedef unsigned char u8; +typedef signed char s8; + +#endif /* _PERF_TYPES_H */ diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c index b90ec00..0b791bd 100644 --- a/tools/perf/util/ctype.c +++ b/tools/perf/util/ctype.c @@ -11,16 +11,21 @@ enum { D = GIT_DIGIT, G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */ + P = GIT_PRINT_EXTRA, /* printable - alpha - digit - glob - regex */ + + PS = GIT_SPACE | GIT_PRINT_EXTRA, }; unsigned char sane_ctype[256] = { +/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ - S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0, /* 32.. 47 */ - D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ - 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ - A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0, /* 80.. 95 */ - 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ - A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0, /* 112..127 */ + PS,P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */ + D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */ + P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ + A, A, A, A, A, A, A, A, A, A, A, G, G, P, R, P, /* 80.. 95 */ + P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ + A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */ /* Nothing in the 128.. range */ }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5a72586..35d04da 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -13,8 +13,8 @@ int nr_counters; struct perf_counter_attr attrs[MAX_COUNTERS]; struct event_symbol { - __u8 type; - __u64 config; + u8 type; + u64 config; char *symbol; }; @@ -63,8 +63,8 @@ static char *hw_event_names[] = { }; static char *sw_event_names[] = { - "cpu-clock-ticks", - "task-clock-ticks", + "cpu-clock-msecs", + "task-clock-msecs", "page-faults", "context-switches", "CPU-migrations", @@ -96,7 +96,7 @@ static char *hw_cache_result [][MAX_ALIASES] = { char *event_name(int counter) { - __u64 config = attrs[counter].config; + u64 config = attrs[counter].config; int type = attrs[counter].type; static char buf[32]; @@ -112,7 +112,7 @@ char *event_name(int counter) return "unknown-hardware"; case PERF_TYPE_HW_CACHE: { - __u8 cache_type, cache_op, cache_result; + u8 cache_type, cache_op, cache_result; static char name[100]; cache_type = (config >> 0) & 0xff; @@ -202,7 +202,7 @@ static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *a */ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) { - __u64 config, id; + u64 config, id; int type; unsigned int i; const char *sep, *pstr; diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index ec33c0c..c93eca9 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -15,7 +15,7 @@ static int hex(char ch) * While we find nice hex chars, build a long_val. * Return number of chars processed. */ -int hex2u64(const char *ptr, __u64 *long_val) +int hex2u64(const char *ptr, u64 *long_val) { const char *p = ptr; *long_val = 0; diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h index 72812c1..37b0325 100644 --- a/tools/perf/util/string.h +++ b/tools/perf/util/string.h @@ -1,8 +1,8 @@ #ifndef _PERF_STRING_H_ #define _PERF_STRING_H_ -#include <linux/types.h> +#include "../types.h" -int hex2u64(const char *ptr, __u64 *val); +int hex2u64(const char *ptr, u64 *val); #endif diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 49a55f8..86e1437 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -9,9 +9,9 @@ const char *sym_hist_filter; -static struct symbol *symbol__new(__u64 start, __u64 len, +static struct symbol *symbol__new(u64 start, u64 len, const char *name, unsigned int priv_size, - __u64 obj_start, int verbose) + u64 obj_start, int verbose) { size_t namelen = strlen(name) + 1; struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen); @@ -21,14 +21,14 @@ static struct symbol *symbol__new(__u64 start, __u64 len, if (verbose >= 2) printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n", - (__u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start); + (u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start); self->obj_start= obj_start; self->hist = NULL; self->hist_sum = 0; if (sym_hist_filter && !strcmp(name, sym_hist_filter)) - self->hist = calloc(sizeof(__u64), len); + self->hist = calloc(sizeof(u64), len); if (priv_size) { memset(self, 0, priv_size); @@ -89,7 +89,7 @@ static void dso__insert_symbol(struct dso *self, struct symbol *sym) { struct rb_node **p = &self->syms.rb_node; struct rb_node *parent = NULL; - const __u64 ip = sym->start; + const u64 ip = sym->start; struct symbol *s; while (*p != NULL) { @@ -104,7 +104,7 @@ static void dso__insert_symbol(struct dso *self, struct symbol *sym) rb_insert_color(&sym->rb_node, &self->syms); } -struct symbol *dso__find_symbol(struct dso *self, __u64 ip) +struct symbol *dso__find_symbol(struct dso *self, u64 ip) { struct rb_node *n; @@ -151,7 +151,7 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verb goto out_failure; while (!feof(file)) { - __u64 start; + u64 start; struct symbol *sym; int line_len, len; char symbol_type; @@ -232,7 +232,7 @@ static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int verb goto out_failure; while (!feof(file)) { - __u64 start, size; + u64 start, size; struct symbol *sym; int line_len, len; @@ -353,7 +353,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf, { uint32_t nr_rel_entries, idx; GElf_Sym sym; - __u64 plt_offset; + u64 plt_offset; GElf_Shdr shdr_plt; struct symbol *f; GElf_Shdr shdr_rel_plt; @@ -523,7 +523,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { struct symbol *f; - __u64 obj_start; + u64 obj_start; if (!elf_sym__is_function(&sym)) continue; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 0d1292b..ea332e56 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -2,16 +2,18 @@ #define _PERF_SYMBOL_ 1 #include <linux/types.h> +#include "../types.h" #include "list.h" #include "rbtree.h" struct symbol { struct rb_node rb_node; - __u64 start; - __u64 end; - __u64 obj_start; - __u64 hist_sum; - __u64 *hist; + u64 start; + u64 end; + u64 obj_start; + u64 hist_sum; + u64 *hist; + void *priv; char name[0]; }; @@ -19,7 +21,7 @@ struct dso { struct list_head node; struct rb_root syms; unsigned int sym_priv_size; - struct symbol *(*find_symbol)(struct dso *, __u64 ip); + struct symbol *(*find_symbol)(struct dso *, u64 ip); char name[0]; }; @@ -35,7 +37,7 @@ static inline void *dso__sym_priv(struct dso *self, struct symbol *sym) return ((void *)sym) - self->sym_priv_size; } -struct symbol *dso__find_symbol(struct dso *self, __u64 ip); +struct symbol *dso__find_symbol(struct dso *self, u64 ip); int dso__load_kernel(struct dso *self, const char *vmlinux, symbol_filter_t filter, int verbose); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 76590a1..b8cfed7 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -100,11 +100,6 @@ #include <iconv.h> #endif -#ifndef NO_OPENSSL -#include <openssl/ssl.h> -#include <openssl/err.h> -#endif - /* On most systems <limits.h> would have given us this, but * not on some systems (e.g. GNU/Hurd). */ @@ -332,17 +327,20 @@ static inline int has_extension(const char *filename, const char *ext) #undef tolower #undef toupper extern unsigned char sane_ctype[256]; -#define GIT_SPACE 0x01 -#define GIT_DIGIT 0x02 -#define GIT_ALPHA 0x04 -#define GIT_GLOB_SPECIAL 0x08 -#define GIT_REGEX_SPECIAL 0x10 +#define GIT_SPACE 0x01 +#define GIT_DIGIT 0x02 +#define GIT_ALPHA 0x04 +#define GIT_GLOB_SPECIAL 0x08 +#define GIT_REGEX_SPECIAL 0x10 +#define GIT_PRINT_EXTRA 0x20 +#define GIT_PRINT 0x3E #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) #define isascii(x) (((x) & ~0x7f) == 0) #define isspace(x) sane_istest(x,GIT_SPACE) #define isdigit(x) sane_istest(x,GIT_DIGIT) #define isalpha(x) sane_istest(x,GIT_ALPHA) #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) +#define isprint(x) sane_istest(x,GIT_PRINT) #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL) #define tolower(x) sane_case((unsigned char)(x), 0x20) |