summaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2010-05-10 08:20:19 +0200
committerIngo Molnar <mingo@elte.hu>2010-05-10 08:20:19 +0200
commit1f0ac7183f4d270bd9ce511254ba5d931d4f29c9 (patch)
tree124b2682a249b0393f29e929537aa76ab299bb5f /tools/perf
parent232a5c948da5e23dff27e48180abf4a4238f7602 (diff)
parent76ba7e846fcc89d9d4b25b89e303c9058de96d60 (diff)
downloadop-kernel-dev-1f0ac7183f4d270bd9ce511254ba5d931d4f29c9.zip
op-kernel-dev-1f0ac7183f4d270bd9ce511254ba5d931d4f29c9.tar.gz
Merge branch 'perf/test' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing into perf/core
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/builtin-lock.c145
-rw-r--r--tools/perf/builtin-record.c34
-rw-r--r--tools/perf/util/event.h3
-rw-r--r--tools/perf/util/session.c125
-rw-r--r--tools/perf/util/session.h36
5 files changed, 238 insertions, 105 deletions
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 6605000..e18dfdc 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -387,7 +387,20 @@ static struct lock_seq_stat *get_seq(struct thread_stat *ts, void *addr)
return seq;
}
-static int bad_hist[4];
+enum broken_state {
+ BROKEN_ACQUIRE,
+ BROKEN_ACQUIRED,
+ BROKEN_CONTENDED,
+ BROKEN_RELEASE,
+ BROKEN_MAX,
+};
+
+static int bad_hist[BROKEN_MAX];
+
+enum acquire_flags {
+ TRY_LOCK = 1,
+ READ_LOCK = 2,
+};
static void
report_lock_acquire_event(struct trace_acquire_event *acquire_event,
@@ -413,9 +426,9 @@ report_lock_acquire_event(struct trace_acquire_event *acquire_event,
if (!acquire_event->flag) {
seq->state = SEQ_STATE_ACQUIRING;
} else {
- if (acquire_event->flag & 1)
+ if (acquire_event->flag & TRY_LOCK)
ls->nr_trylock++;
- if (acquire_event->flag & 2)
+ if (acquire_event->flag & READ_LOCK)
ls->nr_readlock++;
seq->state = SEQ_STATE_READ_ACQUIRED;
seq->read_count = 1;
@@ -423,7 +436,7 @@ report_lock_acquire_event(struct trace_acquire_event *acquire_event,
}
break;
case SEQ_STATE_READ_ACQUIRED:
- if (acquire_event->flag & 2) {
+ if (acquire_event->flag & READ_LOCK) {
seq->read_count++;
ls->nr_acquired++;
goto end;
@@ -437,7 +450,7 @@ report_lock_acquire_event(struct trace_acquire_event *acquire_event,
broken:
/* broken lock sequence, discard it */
ls->discard = 1;
- bad_hist[0]++;
+ bad_hist[BROKEN_ACQUIRE]++;
list_del(&seq->list);
free(seq);
goto end;
@@ -481,10 +494,9 @@ report_lock_acquired_event(struct trace_acquired_event *acquired_event,
case SEQ_STATE_CONTENDED:
contended_term = timestamp - seq->prev_event_time;
ls->wait_time_total += contended_term;
-
if (contended_term < ls->wait_time_min)
ls->wait_time_min = contended_term;
- else if (ls->wait_time_max < contended_term)
+ if (ls->wait_time_max < contended_term)
ls->wait_time_max = contended_term;
break;
case SEQ_STATE_RELEASED:
@@ -492,7 +504,7 @@ report_lock_acquired_event(struct trace_acquired_event *acquired_event,
case SEQ_STATE_READ_ACQUIRED:
/* broken lock sequence, discard it */
ls->discard = 1;
- bad_hist[1]++;
+ bad_hist[BROKEN_ACQUIRED]++;
list_del(&seq->list);
free(seq);
goto end;
@@ -540,7 +552,7 @@ report_lock_contended_event(struct trace_contended_event *contended_event,
case SEQ_STATE_CONTENDED:
/* broken lock sequence, discard it */
ls->discard = 1;
- bad_hist[2]++;
+ bad_hist[BROKEN_CONTENDED]++;
list_del(&seq->list);
free(seq);
goto end;
@@ -594,7 +606,7 @@ report_lock_release_event(struct trace_release_event *release_event,
case SEQ_STATE_RELEASED:
/* broken lock sequence, discard it */
ls->discard = 1;
- bad_hist[3]++;
+ bad_hist[BROKEN_RELEASE]++;
goto free_seq;
break;
default:
@@ -713,6 +725,21 @@ process_raw_event(void *data, int cpu, u64 timestamp, struct thread *thread)
process_lock_release_event(data, event, cpu, timestamp, thread);
}
+static void print_bad_events(int bad, int total)
+{
+ /* Output for debug, this have to be removed */
+ int i;
+ const char *name[4] =
+ { "acquire", "acquired", "contended", "release" };
+
+ pr_info("\n=== output for debug===\n\n");
+ pr_info("bad: %d, total: %d\n", bad, total);
+ pr_info("bad rate: %f %%\n", (double)bad / (double)total * 100);
+ pr_info("histogram of events caused bad sequence\n");
+ for (i = 0; i < BROKEN_MAX; i++)
+ pr_info(" %10s: %d\n", name[i], bad_hist[i]);
+}
+
/* TODO: various way to print, coloring, nano or milli sec */
static void print_result(void)
{
@@ -720,15 +747,15 @@ static void print_result(void)
char cut_name[20];
int bad, total;
- printf("%20s ", "Name");
- printf("%10s ", "acquired");
- printf("%10s ", "contended");
+ pr_info("%20s ", "Name");
+ pr_info("%10s ", "acquired");
+ pr_info("%10s ", "contended");
- printf("%15s ", "total wait (ns)");
- printf("%15s ", "max wait (ns)");
- printf("%15s ", "min wait (ns)");
+ pr_info("%15s ", "total wait (ns)");
+ pr_info("%15s ", "max wait (ns)");
+ pr_info("%15s ", "min wait (ns)");
- printf("\n\n");
+ pr_info("\n\n");
bad = total = 0;
while ((st = pop_from_result())) {
@@ -741,7 +768,7 @@ static void print_result(void)
if (strlen(st->name) < 16) {
/* output raw name */
- printf("%20s ", st->name);
+ pr_info("%20s ", st->name);
} else {
strncpy(cut_name, st->name, 16);
cut_name[16] = '.';
@@ -749,33 +776,40 @@ static void print_result(void)
cut_name[18] = '.';
cut_name[19] = '\0';
/* cut off name for saving output style */
- printf("%20s ", cut_name);
+ pr_info("%20s ", cut_name);
}
- printf("%10u ", st->nr_acquired);
- printf("%10u ", st->nr_contended);
+ pr_info("%10u ", st->nr_acquired);
+ pr_info("%10u ", st->nr_contended);
- printf("%15llu ", st->wait_time_total);
- printf("%15llu ", st->wait_time_max);
- printf("%15llu ", st->wait_time_min == ULLONG_MAX ?
+ pr_info("%15llu ", st->wait_time_total);
+ pr_info("%15llu ", st->wait_time_max);
+ pr_info("%15llu ", st->wait_time_min == ULLONG_MAX ?
0 : st->wait_time_min);
- printf("\n");
+ pr_info("\n");
}
- {
- /* Output for debug, this have to be removed */
- int i;
- const char *name[4] =
- { "acquire", "acquired", "contended", "release" };
+ print_bad_events(bad, total);
+}
- printf("\n=== output for debug===\n\n");
- printf("bad:%d, total:%d\n", bad, total);
- printf("bad rate:%f\n", (double)(bad / total));
+static int info_threads;
+static int info_map;
- printf("histogram of events caused bad sequence\n");
- for (i = 0; i < 4; i++)
- printf(" %10s: %d\n", name[i], bad_hist[i]);
- }
+static void dump_threads(void)
+{
+ struct thread_stat *st;
+ struct rb_node *node;
+ struct thread *t;
+
+ pr_info("%10s: comm\n", "Thread ID");
+
+ node = rb_first(&thread_stats);
+ while (node) {
+ st = container_of(node, struct thread_stat, rb);
+ t = perf_session__findnew(session, st->tid);
+ pr_info("%10d: %s\n", st->tid, t->comm);
+ node = rb_next(node);
+ };
}
static void dump_map(void)
@@ -783,13 +817,24 @@ static void dump_map(void)
unsigned int i;
struct lock_stat *st;
+ pr_info("Address of instance: name of class\n");
for (i = 0; i < LOCKHASH_SIZE; i++) {
list_for_each_entry(st, &lockhash_table[i], hash_entry) {
- printf("%p: %s\n", st->addr, st->name);
+ pr_info(" %p: %s\n", st->addr, st->name);
}
}
}
+static void dump_info(void)
+{
+ if (info_threads)
+ dump_threads();
+ else if (info_map)
+ dump_map();
+ else
+ die("Unknown type of information\n");
+}
+
static int process_sample_event(event_t *self, struct perf_session *s)
{
struct sample_data data;
@@ -858,6 +903,19 @@ static const struct option report_options[] = {
OPT_END()
};
+static const char * const info_usage[] = {
+ "perf lock info [<options>]",
+ NULL
+};
+
+static const struct option info_options[] = {
+ OPT_BOOLEAN('t', "threads", &info_threads,
+ "dump thread list in perf.data"),
+ OPT_BOOLEAN('m', "map", &info_map,
+ "map of lock instances (name:address table)"),
+ OPT_END()
+};
+
static const char * const lock_usage[] = {
"perf lock [<options>] {record|trace|report}",
NULL
@@ -872,7 +930,6 @@ static const struct option lock_options[] = {
static const char *record_args[] = {
"record",
- "-a",
"-R",
"-f",
"-m", "1024",
@@ -929,12 +986,18 @@ int cmd_lock(int argc, const char **argv, const char *prefix __used)
} else if (!strcmp(argv[0], "trace")) {
/* Aliased to 'perf trace' */
return cmd_trace(argc, argv, prefix);
- } else if (!strcmp(argv[0], "map")) {
+ } else if (!strcmp(argv[0], "info")) {
+ if (argc) {
+ argc = parse_options(argc, argv,
+ info_options, info_usage, 0);
+ if (argc)
+ usage_with_options(info_usage, info_options);
+ }
/* recycling report_lock_ops */
trace_handler = &report_lock_ops;
setup_pager();
read_events();
- dump_map();
+ dump_info();
} else {
usage_with_options(lock_usage, lock_options);
}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index d3981ac..6b77b28 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -494,6 +494,29 @@ static void event__synthesize_guest_os(struct machine *machine, void *data)
" relocation symbol.\n", machine->pid);
}
+static struct perf_event_header finished_round_event = {
+ .size = sizeof(struct perf_event_header),
+ .type = PERF_RECORD_FINISHED_ROUND,
+};
+
+static void mmap_read_all(void)
+{
+ int i, counter, thread;
+
+ for (i = 0; i < nr_cpu; i++) {
+ for (counter = 0; counter < nr_counters; counter++) {
+ for (thread = 0; thread < thread_num; thread++) {
+ if (mmap_array[i][counter][thread].base)
+ mmap_read(&mmap_array[i][counter][thread]);
+ }
+
+ }
+ }
+
+ if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
+ write_output(&finished_round_event, sizeof(finished_round_event));
+}
+
static int __cmd_record(int argc, const char **argv)
{
int i, counter;
@@ -726,16 +749,7 @@ static int __cmd_record(int argc, const char **argv)
int hits = samples;
int thread;
- for (i = 0; i < nr_cpu; i++) {
- for (counter = 0; counter < nr_counters; counter++) {
- for (thread = 0;
- thread < thread_num; thread++) {
- if (mmap_array[i][counter][thread].base)
- mmap_read(&mmap_array[i][counter][thread]);
- }
-
- }
- }
+ mmap_read_all();
if (hits == samples) {
if (done)
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index b364da5..6cc1b1d 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -84,11 +84,12 @@ struct build_id_event {
char filename[];
};
-enum perf_header_event_type { /* above any possible kernel type */
+enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_HEADER_ATTR = 64,
PERF_RECORD_HEADER_EVENT_TYPE = 65,
PERF_RECORD_HEADER_TRACING_DATA = 66,
PERF_RECORD_HEADER_BUILD_ID = 67,
+ PERF_RECORD_FINISHED_ROUND = 68,
PERF_RECORD_HEADER_MAX
};
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 71bc608..c088d8f 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -98,7 +98,6 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
self->unknown_events = 0;
self->machines = RB_ROOT;
self->repipe = repipe;
- self->ordered_samples.flush_limit = ULLONG_MAX;
INIT_LIST_HEAD(&self->ordered_samples.samples_head);
machine__init(&self->host_machine, "", HOST_KERNEL_ID);
@@ -195,6 +194,18 @@ static int process_event_stub(event_t *event __used,
return 0;
}
+static int process_finished_round_stub(event_t *event __used,
+ struct perf_session *session __used,
+ struct perf_event_ops *ops __used)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_finished_round(event_t *event,
+ struct perf_session *session,
+ struct perf_event_ops *ops);
+
static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
{
if (handler->sample == NULL)
@@ -223,6 +234,12 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
handler->tracing_data = process_event_stub;
if (handler->build_id == NULL)
handler->build_id = process_event_stub;
+ if (handler->finished_round == NULL) {
+ if (handler->ordered_samples)
+ handler->finished_round = process_finished_round;
+ else
+ handler->finished_round = process_finished_round_stub;
+ }
}
static const char *event__name[] = {
@@ -360,16 +377,14 @@ struct sample_queue {
struct list_head list;
};
-#define FLUSH_PERIOD (2 * NSEC_PER_SEC)
-
static void flush_sample_queue(struct perf_session *s,
struct perf_event_ops *ops)
{
struct list_head *head = &s->ordered_samples.samples_head;
- u64 limit = s->ordered_samples.flush_limit;
+ u64 limit = s->ordered_samples.next_flush;
struct sample_queue *tmp, *iter;
- if (!ops->ordered_samples)
+ if (!ops->ordered_samples || !limit)
return;
list_for_each_entry_safe(iter, tmp, head, list) {
@@ -388,6 +403,55 @@ static void flush_sample_queue(struct perf_session *s,
}
}
+/*
+ * When perf record finishes a pass on every buffers, it records this pseudo
+ * event.
+ * We record the max timestamp t found in the pass n.
+ * Assuming these timestamps are monotonic across cpus, we know that if
+ * a buffer still has events with timestamps below t, they will be all
+ * available and then read in the pass n + 1.
+ * Hence when we start to read the pass n + 2, we can safely flush every
+ * events with timestamps below t.
+ *
+ * ============ PASS n =================
+ * CPU 0 | CPU 1
+ * |
+ * cnt1 timestamps | cnt2 timestamps
+ * 1 | 2
+ * 2 | 3
+ * - | 4 <--- max recorded
+ *
+ * ============ PASS n + 1 ==============
+ * CPU 0 | CPU 1
+ * |
+ * cnt1 timestamps | cnt2 timestamps
+ * 3 | 5
+ * 4 | 6
+ * 5 | 7 <---- max recorded
+ *
+ * Flush every events below timestamp 4
+ *
+ * ============ PASS n + 2 ==============
+ * CPU 0 | CPU 1
+ * |
+ * cnt1 timestamps | cnt2 timestamps
+ * 6 | 8
+ * 7 | 9
+ * - | 10
+ *
+ * Flush every events below timestamp 7
+ * etc...
+ */
+static int process_finished_round(event_t *event __used,
+ struct perf_session *session,
+ struct perf_event_ops *ops)
+{
+ flush_sample_queue(session, ops);
+ session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;
+
+ return 0;
+}
+
static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
{
struct sample_queue *iter;
@@ -456,17 +520,12 @@ static void __queue_sample_event(struct sample_queue *new,
}
static int queue_sample_event(event_t *event, struct sample_data *data,
- struct perf_session *s,
- struct perf_event_ops *ops)
+ struct perf_session *s)
{
u64 timestamp = data->time;
struct sample_queue *new;
- u64 flush_limit;
- if (s->ordered_samples.flush_limit == ULLONG_MAX)
- s->ordered_samples.flush_limit = timestamp + FLUSH_PERIOD;
-
if (timestamp < s->ordered_samples.last_flush) {
printf("Warning: Timestamp below last timeslice flush\n");
return -EINVAL;
@@ -489,23 +548,8 @@ static int queue_sample_event(event_t *event, struct sample_data *data,
__queue_sample_event(new, s);
s->ordered_samples.last_inserted = new;
- /*
- * We want to have a slice of events covering 2 * FLUSH_PERIOD
- * If FLUSH_PERIOD is big enough, it ensures every events that occured
- * in the first half of the timeslice have all been buffered and there
- * are none remaining (we need that because of the weakly ordered
- * event recording we have). Then once we reach the 2 * FLUSH_PERIOD
- * timeslice, we flush the first half to be gentle with the memory
- * (the second half can still get new events in the middle, so wait
- * another period to flush it)
- */
- flush_limit = s->ordered_samples.flush_limit;
-
- if (new->timestamp > flush_limit &&
- new->timestamp - flush_limit > FLUSH_PERIOD) {
- s->ordered_samples.flush_limit += FLUSH_PERIOD;
- flush_sample_queue(s, ops);
- }
+ if (new->timestamp > s->ordered_samples.max_timestamp)
+ s->ordered_samples.max_timestamp = new->timestamp;
return 0;
}
@@ -521,7 +565,7 @@ static int perf_session__process_sample(event_t *event, struct perf_session *s,
bzero(&data, sizeof(struct sample_data));
event__parse_sample(event, s->sample_type, &data);
- queue_sample_event(event, &data, s, ops);
+ queue_sample_event(event, &data, s);
return 0;
}
@@ -573,6 +617,8 @@ static int perf_session__process_event(struct perf_session *self,
return ops->tracing_data(event, self);
case PERF_RECORD_HEADER_BUILD_ID:
return ops->build_id(event, self);
+ case PERF_RECORD_FINISHED_ROUND:
+ return ops->finished_round(event, self, ops);
default:
self->unknown_events++;
return -1;
@@ -651,15 +697,18 @@ more:
p = &event;
p += sizeof(struct perf_event_header);
- err = do_read(self->fd, p, size - sizeof(struct perf_event_header));
- if (err <= 0) {
- if (err == 0) {
- pr_err("unexpected end of event stream\n");
- goto done;
- }
+ if (size - sizeof(struct perf_event_header)) {
+ err = do_read(self->fd, p,
+ size - sizeof(struct perf_event_header));
+ if (err <= 0) {
+ if (err == 0) {
+ pr_err("unexpected end of event stream\n");
+ goto done;
+ }
- pr_err("failed to read event data\n");
- goto out_err;
+ pr_err("failed to read event data\n");
+ goto out_err;
+ }
}
if (size == 0 ||
@@ -787,7 +836,7 @@ more:
done:
err = 0;
/* do the final flush for ordered samples */
- self->ordered_samples.flush_limit = ULLONG_MAX;
+ self->ordered_samples.next_flush = ULLONG_MAX;
flush_sample_queue(self, ops);
out_err:
ui_progress__delete(progress);
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index eb9f179..242d528 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -14,7 +14,8 @@ struct thread;
struct ordered_samples {
u64 last_flush;
- u64 flush_limit;
+ u64 next_flush;
+ u64 max_timestamp;
struct list_head samples_head;
struct sample_queue *last_inserted;
};
@@ -42,23 +43,28 @@ struct perf_session {
char filename[0];
};
+struct perf_event_ops;
+
typedef int (*event_op)(event_t *self, struct perf_session *session);
+typedef int (*event_op2)(event_t *self, struct perf_session *session,
+ struct perf_event_ops *ops);
struct perf_event_ops {
- event_op sample,
- mmap,
- comm,
- fork,
- exit,
- lost,
- read,
- throttle,
- unthrottle,
- attr,
- event_type,
- tracing_data,
- build_id;
- bool ordered_samples;
+ event_op sample,
+ mmap,
+ comm,
+ fork,
+ exit,
+ lost,
+ read,
+ throttle,
+ unthrottle,
+ attr,
+ event_type,
+ tracing_data,
+ build_id;
+ event_op2 finished_round;
+ bool ordered_samples;
};
struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe);
OpenPOWER on IntegriCloud