From d8eeb2d3b26d25c44c10f28430e2157a2d20bd1d Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 31 Jul 2009 14:58:04 +0200 Subject: ring-buffer: consolidate interface of rb_buffer_peek() rb_buffer_peek() operates with struct ring_buffer_per_cpu *cpu_buffer only. Thus, instead of passing variables buffer and cpu it is better to use cpu_buffer directly. This also reduces the risk of races since cpu_buffer is not calculated twice. Signed-off-by: Robert Richter LKML-Reference: <1249045084-3028-1-git-send-email-robert.richter@amd.com> Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 454e74e..8786c35 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2997,15 +2997,12 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) } static struct ring_buffer_event * -rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) +rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) { - struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; struct buffer_page *reader; int nr_loops = 0; - cpu_buffer = buffer->buffers[cpu]; - again: /* * We repeat when a timestamp is encountered. It is possible @@ -3049,7 +3046,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) case RINGBUF_TYPE_DATA: if (ts) { *ts = cpu_buffer->read_stamp + event->time_delta; - ring_buffer_normalize_time_stamp(buffer, + ring_buffer_normalize_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu, ts); } return event; @@ -3168,7 +3165,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) local_irq_save(flags); if (dolock) spin_lock(&cpu_buffer->reader_lock); - event = rb_buffer_peek(buffer, cpu, ts); + event = rb_buffer_peek(cpu_buffer, ts); if (event && event->type_len == RINGBUF_TYPE_PADDING) rb_advance_reader(cpu_buffer); if (dolock) @@ -3237,7 +3234,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) if (dolock) spin_lock(&cpu_buffer->reader_lock); - event = rb_buffer_peek(buffer, cpu, ts); + event = rb_buffer_peek(cpu_buffer, ts); if (event) rb_advance_reader(cpu_buffer); -- cgit v1.1 From 478142c39c8c2f5f63038e5f2224e6729406e587 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 9 Sep 2009 10:36:01 -0400 Subject: tracing: do not grab lock in wakeup latency function tracing The wakeup tracer, when enabled, has its own function tracer. It only traces the functions on the CPU where the task it is following is on. If a task is woken on one CPU but then migrates to another CPU before it wakes up, the latency tracer will then start tracing functions on the other CPU. To find which CPU the task is on, the wakeup function tracer performs a task_cpu(wakeup_task). But to make sure the task does not disappear it grabs the wakeup_lock, which is also taken when the task wakes up. By taking this lock, the function tracer does not need to worry about the task being freed as it checks its cpu. Jan Blunck found a problem with this approach on his 32 CPU box. When a task is being traced by the wakeup tracer, all functions take this lock. That means that on all 32 CPUs, each function call is taking this one lock to see if the task is on that CPU. This lock has just serialized all functions on all 32 CPUs. Needless to say, this caused major issues on that box. It would even lockup. This patch changes the wakeup latency to insert a probe on the migrate task tracepoint. When a task changes its CPU that it will run on, the probe will take note. Now the wakeup function tracer no longer needs to take the lock. It only compares the current CPU with a variable that holds the current CPU the task is on. We don't worry about races since it is OK to add or miss a function trace. Reported-by: Jan Blunck Tested-by: Jan Blunck Signed-off-by: Steven Rostedt --- kernel/trace/trace_sched_wakeup.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index ad69f10..cf43bdb 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -24,6 +24,7 @@ static int __read_mostly tracer_enabled; static struct task_struct *wakeup_task; static int wakeup_cpu; +static int wakeup_current_cpu; static unsigned wakeup_prio = -1; static int wakeup_rt; @@ -56,33 +57,23 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) resched = ftrace_preempt_disable(); cpu = raw_smp_processor_id(); + if (cpu != wakeup_current_cpu) + goto out_enable; + data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); if (unlikely(disabled != 1)) goto out; local_irq_save(flags); - __raw_spin_lock(&wakeup_lock); - - if (unlikely(!wakeup_task)) - goto unlock; - - /* - * The task can't disappear because it needs to - * wake up first, and we have the wakeup_lock. - */ - if (task_cpu(wakeup_task) != cpu) - goto unlock; trace_function(tr, ip, parent_ip, flags, pc); - unlock: - __raw_spin_unlock(&wakeup_lock); local_irq_restore(flags); out: atomic_dec(&data->disabled); - + out_enable: ftrace_preempt_enable(resched); } @@ -107,6 +98,14 @@ static int report_latency(cycle_t delta) return 1; } +static void probe_wakeup_migrate_task(struct task_struct *task, int cpu) +{ + if (task != wakeup_task) + return; + + wakeup_current_cpu = cpu; +} + static void notrace probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) @@ -244,6 +243,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success) __wakeup_reset(wakeup_trace); wakeup_cpu = task_cpu(p); + wakeup_current_cpu = wakeup_cpu; wakeup_prio = p->prio; wakeup_task = p; @@ -293,6 +293,13 @@ static void start_wakeup_tracer(struct trace_array *tr) goto fail_deprobe_wake_new; } + ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task); + if (ret) { + pr_info("wakeup trace: Couldn't activate tracepoint" + " probe to kernel_sched_migrate_task\n"); + return; + } + wakeup_reset(tr); /* @@ -325,6 +332,7 @@ static void stop_wakeup_tracer(struct trace_array *tr) unregister_trace_sched_switch(probe_wakeup_sched_switch); unregister_trace_sched_wakeup_new(probe_wakeup); unregister_trace_sched_wakeup(probe_wakeup); + unregister_trace_sched_migrate_task(probe_wakeup_migrate_task); } static int __wakeup_tracer_init(struct trace_array *tr) -- cgit v1.1 From e0ab5f2daee1c7a6a387591bf37f0bad4e407112 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 10 Sep 2009 09:34:19 +0800 Subject: tracing: remove dead code Removes unreachable code. Signed-off-by: Li Zefan LKML-Reference: <4AA8579B.4020706@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index fa1dccb..536ae1d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -120,14 +120,6 @@ struct print_entry { char buf[]; }; -#define TRACE_OLD_SIZE 88 - -struct trace_field_cont { - unsigned char type; - /* Temporary till we get rid of this completely */ - char buf[TRACE_OLD_SIZE - 1]; -}; - struct trace_mmiotrace_rw { struct trace_entry ent; struct mmiotrace_rw rw; @@ -509,20 +501,6 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags, extern cycle_t ftrace_now(int cpu); -#ifdef CONFIG_CONTEXT_SWITCH_TRACER -typedef void -(*tracer_switch_func_t)(void *private, - void *__rq, - struct task_struct *prev, - struct task_struct *next); - -struct tracer_switch_ops { - tracer_switch_func_t func; - void *private; - struct tracer_switch_ops *next; -}; -#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ - extern void trace_find_cmdline(int pid, char comm[]); #ifdef CONFIG_DYNAMIC_FTRACE -- cgit v1.1 From bd9cfca9cb71200dd82b320bba12540dc078f4e0 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 10 Sep 2009 09:34:19 +0800 Subject: tracing: format clean ups Fix white-space formatting. Signed-off-by: Li Zefan LKML-Reference: <4AA8579B.4020706@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 536ae1d..86a0523 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -169,20 +169,20 @@ enum kmemtrace_type_id { struct kmemtrace_alloc_entry { struct trace_entry ent; - enum kmemtrace_type_id type_id; - unsigned long call_site; - const void *ptr; - size_t bytes_req; - size_t bytes_alloc; - gfp_t gfp_flags; - int node; + enum kmemtrace_type_id type_id; + unsigned long call_site; + const void *ptr; + size_t bytes_req; + size_t bytes_alloc; + gfp_t gfp_flags; + int node; }; struct kmemtrace_free_entry { struct trace_entry ent; - enum kmemtrace_type_id type_id; - unsigned long call_site; - const void *ptr; + enum kmemtrace_type_id type_id; + unsigned long call_site; + const void *ptr; }; struct syscall_trace_enter { @@ -203,7 +203,7 @@ struct syscall_trace_exit { * states when a trace occurs. These are: * IRQS_OFF - interrupts were disabled * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags - * NEED_RESCED - reschedule is requested + * NEED_RESCHED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler */ -- cgit v1.1 From a5921c6c37d51ee2079ca3c69ea6f7b7384f5d87 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 10 Sep 2009 09:34:19 +0800 Subject: tracing: remove stats from struct tracer Remove unused field @stats from struct tracer. Signed-off-by: Li Zefan LKML-Reference: <4AA8579B.4020706@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 86a0523..2163d18 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -382,7 +382,6 @@ struct tracer { struct tracer *next; int print_max; struct tracer_flags *flags; - struct tracer_stat *stats; }; -- cgit v1.1 From 197e2eabc90c203d1086916b7f66694ba5fbb937 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 10 Sep 2009 09:34:19 +0800 Subject: tracing: move PRED macros to trace_events_filter.c Move DEFINE_COMPARISON_PRED() and DEFINE_EQUALITY_PRED() to kernel/trace/trace_events_filter.c Signed-off-by: Li Zefan LKML-Reference: <4AA8579B.4020706@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 41 -------------------------------------- kernel/trace/trace_events_filter.c | 41 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2163d18..acaa680 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -800,47 +800,6 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, return 0; } -#define DEFINE_COMPARISON_PRED(type) \ -static int filter_pred_##type(struct filter_pred *pred, void *event, \ - int val1, int val2) \ -{ \ - type *addr = (type *)(event + pred->offset); \ - type val = (type)pred->val; \ - int match = 0; \ - \ - switch (pred->op) { \ - case OP_LT: \ - match = (*addr < val); \ - break; \ - case OP_LE: \ - match = (*addr <= val); \ - break; \ - case OP_GT: \ - match = (*addr > val); \ - break; \ - case OP_GE: \ - match = (*addr >= val); \ - break; \ - default: \ - break; \ - } \ - \ - return match; \ -} - -#define DEFINE_EQUALITY_PRED(size) \ -static int filter_pred_##size(struct filter_pred *pred, void *event, \ - int val1, int val2) \ -{ \ - u##size *addr = (u##size *)(event + pred->offset); \ - u##size val = (u##size)pred->val; \ - int match; \ - \ - match = (val == *addr) ^ pred->not; \ - \ - return match; \ -} - extern struct mutex event_mutex; extern struct list_head ftrace_events; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 93660fb..2324578 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -121,6 +121,47 @@ struct filter_parse_state { } operand; }; +#define DEFINE_COMPARISON_PRED(type) \ +static int filter_pred_##type(struct filter_pred *pred, void *event, \ + int val1, int val2) \ +{ \ + type *addr = (type *)(event + pred->offset); \ + type val = (type)pred->val; \ + int match = 0; \ + \ + switch (pred->op) { \ + case OP_LT: \ + match = (*addr < val); \ + break; \ + case OP_LE: \ + match = (*addr <= val); \ + break; \ + case OP_GT: \ + match = (*addr > val); \ + break; \ + case OP_GE: \ + match = (*addr >= val); \ + break; \ + default: \ + break; \ + } \ + \ + return match; \ +} + +#define DEFINE_EQUALITY_PRED(size) \ +static int filter_pred_##size(struct filter_pred *pred, void *event, \ + int val1, int val2) \ +{ \ + u##size *addr = (u##size *)(event + pred->offset); \ + u##size val = (u##size)pred->val; \ + int match; \ + \ + match = (val == *addr) ^ pred->not; \ + \ + return match; \ +} + DEFINE_COMPARISON_PRED(s64); DEFINE_COMPARISON_PRED(u64); DEFINE_COMPARISON_PRED(s32); -- cgit v1.1 From fc06b8520b3bf9aaeb2e27debe6719c215bd4916 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Sep 2009 17:22:44 -0400 Subject: x86/tracing: comment need for atomic nop The dynamic function tracer relys on the macro P6_NOP5 always being an atomic NOP. If for some reason it is changed to be two operations (like a nop2 nop3) it can faults within the kernel when the function tracer modifies the code. This patch adds a comment to note that the P6_NOPs are expected to be atomic. This will hopefully prevent anyone from changing that. Reported-by: Mathieu Desnoyer Signed-off-by: Steven Rostedt --- arch/x86/include/asm/nops.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index ad2668e..6d8723a 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h @@ -65,6 +65,8 @@ 6: osp nopl 0x00(%eax,%eax,1) 7: nopl 0x00000000(%eax) 8: nopl 0x00000000(%eax,%eax,1) + Note: All the above are assumed to be a single instruction. + There is kernel code that depends on this. */ #define P6_NOP1 GENERIC_NOP1 #define P6_NOP2 ".byte 0x66,0x90\n" -- cgit v1.1 From 49ff590390a22c49e9063dcdec4cd5903127526b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 11 Sep 2009 00:30:26 -0400 Subject: tracing: add latency format to function_graph tracer While debugging something with the function_graph tracer, I found the need to see the preempt count of the traces. Unfortunately, since the function graph tracer has its own output formatting, it does not honor the latency-format option. This patch makes the function_graph tracer honor the latency-format option, but still keeps control of the output. But now we have the same details that the latency-format supplies. # tracer: function_graph # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / # |||| # CPU|||| DURATION FUNCTION CALLS # | |||| | | | | | | 3) d..1 1.333 us | idle_cpu(); 3) d.h1 | tick_check_idle() { 3) d.h1 0.550 us | tick_check_oneshot_broadcast(); 3) d.h1 | tick_nohz_stop_idle() { 3) d.h1 | ktime_get() { 3) d.h1 | ktime_get_ts() { Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 74 +++++++++++++++++++++++++++++++++--- 1 file changed, 68 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index b3749a2..ee791a9 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -364,6 +364,29 @@ print_graph_proc(struct trace_seq *s, pid_t pid) } +static enum print_line_t +print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry) +{ + int hardirq, softirq; + + hardirq = entry->flags & TRACE_FLAG_HARDIRQ; + softirq = entry->flags & TRACE_FLAG_SOFTIRQ; + + if (!trace_seq_printf(s, " %c%c%c", + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? + 'X' : '.', + (entry->flags & TRACE_FLAG_NEED_RESCHED) ? + 'N' : '.', + (hardirq && softirq) ? 'H' : + hardirq ? 'h' : softirq ? 's' : '.')) + return 0; + + if (entry->preempt_count) + return trace_seq_printf(s, "%x", entry->preempt_count); + return trace_seq_puts(s, "."); +} + /* If the pid changed since the last trace, output this event */ static enum print_line_t verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) @@ -521,6 +544,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; } + /* Proc */ if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { ret = print_graph_proc(s, pid); @@ -758,6 +782,13 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, return TRACE_TYPE_PARTIAL_LINE; } + /* Latency format */ + if (trace_flags & TRACE_ITER_LATENCY_FMT) { + ret = print_graph_lat_fmt(s, ent); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + } + return 0; } @@ -952,28 +983,59 @@ print_graph_function(struct trace_iterator *iter) return TRACE_TYPE_HANDLED; } +static void print_lat_header(struct seq_file *s) +{ + static const char spaces[] = " " /* 16 spaces */ + " " /* 4 spaces */ + " "; /* 17 spaces */ + int size = 0; + + if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) + size += 16; + if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) + size += 4; + if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) + size += 17; + + seq_printf(s, "#%.*s _-----=> irqs-off \n", size, spaces); + seq_printf(s, "#%.*s / _----=> need-resched \n", size, spaces); + seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces); + seq_printf(s, "#%.*s|| / _--=> preempt-depth \n", size, spaces); + seq_printf(s, "#%.*s||| / \n", size, spaces); + seq_printf(s, "#%.*s|||| \n", size, spaces); +} + static void print_graph_headers(struct seq_file *s) { + int lat = trace_flags & TRACE_ITER_LATENCY_FMT; + + if (lat) + print_lat_header(s); + /* 1st line */ - seq_printf(s, "# "); + seq_printf(s, "#"); if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) seq_printf(s, " TIME "); if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) - seq_printf(s, "CPU"); + seq_printf(s, " CPU"); if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) - seq_printf(s, " TASK/PID "); + seq_printf(s, " TASK/PID "); + if (lat) + seq_printf(s, "||||"); if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) seq_printf(s, " DURATION "); seq_printf(s, " FUNCTION CALLS\n"); /* 2nd line */ - seq_printf(s, "# "); + seq_printf(s, "#"); if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) seq_printf(s, " | "); if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) - seq_printf(s, "| "); + seq_printf(s, " | "); if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) - seq_printf(s, " | | "); + seq_printf(s, " | | "); + if (lat) + seq_printf(s, "||||"); if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) seq_printf(s, " | | "); seq_printf(s, " | | | |\n"); -- cgit v1.1 From 48659d31195bb76d688e99dabd816c5472fb1656 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 11 Sep 2009 11:36:23 -0400 Subject: tracing: move tgid out of generic entry and into userstack The userstack trace required the recording of the tgid entry. Unfortunately, it was added to the generic entry where it wasted 4 bytes of every entry and was only used by one entry. This patch moves it out of the generic field and moves it into the only user (userstack_entry). Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 1 - kernel/trace/trace.c | 2 +- kernel/trace/trace.h | 1 + kernel/trace/trace_events.c | 5 +---- kernel/trace/trace_output.c | 2 +- 5 files changed, 4 insertions(+), 7 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 23f7179..06c795b 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -34,7 +34,6 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; - int tgid; }; #define FTRACE_MAX_EVENT \ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5c75dee..1a37da2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -886,7 +886,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; - entry->tgid = (tsk) ? tsk->tgid : 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | @@ -1068,6 +1067,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) return; entry = ring_buffer_event_data(event); + entry->tgid = current->tgid; memset(&entry->caller, 0, sizeof(entry->caller)); trace.nr_entries = 0; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index acaa680..b69697b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -101,6 +101,7 @@ struct stack_entry { struct userstack_entry { struct trace_entry ent; + unsigned int tgid; unsigned long caller[FTRACE_STACK_ENTRIES]; }; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 78b1ed2..28d9202 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -86,7 +86,6 @@ int trace_define_common_fields(struct ftrace_event_call *call) __common_field(unsigned char, flags); __common_field(unsigned char, preempt_count); __common_field(int, pid); - __common_field(int, tgid); return ret; } @@ -572,13 +571,11 @@ static int trace_write_header(struct trace_seq *s) "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\n", FIELD(unsigned short, type), FIELD(unsigned char, flags), FIELD(unsigned char, preempt_count), - FIELD(int, pid), - FIELD(int, tgid)); + FIELD(int, pid)); } static ssize_t diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index e0c2545..be34a6a 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -407,7 +407,7 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, * since individual threads might have already quit! */ rcu_read_lock(); - task = find_task_by_vpid(entry->ent.tgid); + task = find_task_by_vpid(entry->tgid); if (task) mm = get_task_mm(task); rcu_read_unlock(); -- cgit v1.1 From 637e7e864103a7a68c1ce43ada27dfc25c0d113f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 11 Sep 2009 13:55:35 -0400 Subject: tracing: add lock depth to entries This patch adds the lock depth of the big kernel lock to the generic entry header. This way we can see the depth of the lock and help in removing the BKL. Example: # _------=> CPU# # / _-----=> irqs-off # | / _----=> need-resched # || / _---=> hardirq/softirq # ||| / _--=> preempt-depth # |||| /_--=> lock-depth # |||||/ delay # cmd pid |||||| time | caller # \ / |||||| \ | / -0 2.N..3 5902255250us+: lock_acquire: read rcu_read_lock -0 2.N..3 5902255253us+: lock_release: rcu_read_lock -0 2dN..3 5902255257us+: lock_acquire: xtime_lock -0 2dN..4 5902255259us : lock_acquire: clocksource_lock -0 2dN..4 5902255261us+: lock_release: clocksource_lock Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 1 + kernel/trace/trace.c | 9 +++++---- kernel/trace/trace_events.c | 5 ++++- kernel/trace/trace_functions_graph.c | 16 ++++++++++++---- kernel/trace/trace_output.c | 10 +++++++++- 5 files changed, 31 insertions(+), 10 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 06c795b..0608b0f 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -34,6 +34,7 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; + int lock_depth; }; #define FTRACE_MAX_EVENT \ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1a37da2..3b91828 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -886,6 +886,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; + entry->lock_depth = (tsk) ? tsk->lock_depth : 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | @@ -1530,10 +1531,10 @@ static void print_lat_help_header(struct seq_file *m) seq_puts(m, "# | / _----=> need-resched \n"); seq_puts(m, "# || / _---=> hardirq/softirq \n"); seq_puts(m, "# ||| / _--=> preempt-depth \n"); - seq_puts(m, "# |||| / \n"); - seq_puts(m, "# ||||| delay \n"); - seq_puts(m, "# cmd pid ||||| time | caller \n"); - seq_puts(m, "# \\ / ||||| \\ | / \n"); + seq_puts(m, "# |||| /_--=> lock-depth \n"); + seq_puts(m, "# |||||/ delay \n"); + seq_puts(m, "# cmd pid |||||| time | caller \n"); + seq_puts(m, "# \\ / |||||| \\ | / \n"); } static void print_func_help_header(struct seq_file *m) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 28d9202..975f324 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -86,6 +86,7 @@ int trace_define_common_fields(struct ftrace_event_call *call) __common_field(unsigned char, flags); __common_field(unsigned char, preempt_count); __common_field(int, pid); + __common_field(int, lock_depth); return ret; } @@ -571,11 +572,13 @@ static int trace_write_header(struct trace_seq *s) "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\n", FIELD(unsigned short, type), FIELD(unsigned char, flags), FIELD(unsigned char, preempt_count), - FIELD(int, pid)); + FIELD(int, pid), + FIELD(int, lock_depth)); } static ssize_t diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index ee791a9..48af493 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -368,6 +368,7 @@ static enum print_line_t print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry) { int hardirq, softirq; + int ret; hardirq = entry->flags & TRACE_FLAG_HARDIRQ; softirq = entry->flags & TRACE_FLAG_SOFTIRQ; @@ -382,6 +383,13 @@ print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry) hardirq ? 'h' : softirq ? 's' : '.')) return 0; + if (entry->lock_depth < 0) + ret = trace_seq_putc(s, '.'); + else + ret = trace_seq_printf(s, "%d", entry->lock_depth); + if (!ret) + return 0; + if (entry->preempt_count) return trace_seq_printf(s, "%x", entry->preempt_count); return trace_seq_puts(s, "."); @@ -1001,8 +1009,8 @@ static void print_lat_header(struct seq_file *s) seq_printf(s, "#%.*s / _----=> need-resched \n", size, spaces); seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces); seq_printf(s, "#%.*s|| / _--=> preempt-depth \n", size, spaces); - seq_printf(s, "#%.*s||| / \n", size, spaces); - seq_printf(s, "#%.*s|||| \n", size, spaces); + seq_printf(s, "#%.*s||| / _-=> lock-depth \n", size, spaces); + seq_printf(s, "#%.*s|||| / \n", size, spaces); } static void print_graph_headers(struct seq_file *s) @@ -1021,7 +1029,7 @@ static void print_graph_headers(struct seq_file *s) if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) seq_printf(s, " TASK/PID "); if (lat) - seq_printf(s, "||||"); + seq_printf(s, "|||||"); if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) seq_printf(s, " DURATION "); seq_printf(s, " FUNCTION CALLS\n"); @@ -1035,7 +1043,7 @@ static void print_graph_headers(struct seq_file *s) if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) seq_printf(s, " | | "); if (lat) - seq_printf(s, "||||"); + seq_printf(s, "|||||"); if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) seq_printf(s, " | | "); seq_printf(s, " | | | |\n"); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index be34a6a..29a370a 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -465,6 +465,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) { int hardirq, softirq; char comm[TASK_COMM_LEN]; + int ret; trace_find_cmdline(entry->pid, comm); hardirq = entry->flags & TRACE_FLAG_HARDIRQ; @@ -481,9 +482,16 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) hardirq ? 'h' : softirq ? 's' : '.')) return 0; + if (entry->lock_depth < 0) + ret = trace_seq_putc(s, '.'); + else + ret = trace_seq_printf(s, "%d", entry->lock_depth); + if (!ret) + return 0; + if (entry->preempt_count) return trace_seq_printf(s, "%x", entry->preempt_count); - return trace_seq_puts(s, "."); + return trace_seq_putc(s, '.'); } static unsigned long preempt_mark_thresh = 100; -- cgit v1.1 From f81c972d27c36729e65d4a815e3d7b782a540bad Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 11 Sep 2009 14:24:13 -0400 Subject: tracing: consolidate code between trace_output.c and trace_function_graph.c Both trace_output.c and trace_function_graph.c do basically the same thing to handle the printing of the latency-format. This patch moves the code into one function that both can use. Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 26 ++------------------------ kernel/trace/trace_output.c | 30 ++++++++++++++++++++++++------ kernel/trace/trace_output.h | 2 ++ 3 files changed, 28 insertions(+), 30 deletions(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 48af493..61f1667 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -367,32 +367,10 @@ print_graph_proc(struct trace_seq *s, pid_t pid) static enum print_line_t print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry) { - int hardirq, softirq; - int ret; - - hardirq = entry->flags & TRACE_FLAG_HARDIRQ; - softirq = entry->flags & TRACE_FLAG_SOFTIRQ; - - if (!trace_seq_printf(s, " %c%c%c", - (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : - (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? - 'X' : '.', - (entry->flags & TRACE_FLAG_NEED_RESCHED) ? - 'N' : '.', - (hardirq && softirq) ? 'H' : - hardirq ? 'h' : softirq ? 's' : '.')) - return 0; - - if (entry->lock_depth < 0) - ret = trace_seq_putc(s, '.'); - else - ret = trace_seq_printf(s, "%d", entry->lock_depth); - if (!ret) + if (!trace_seq_putc(s, ' ')) return 0; - if (entry->preempt_count) - return trace_seq_printf(s, "%x", entry->preempt_count); - return trace_seq_puts(s, "."); + return trace_print_lat_fmt(s, entry); } /* If the pid changed since the last trace, output this event */ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 29a370a..f572f44 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -460,19 +460,23 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) return ret; } -static int -lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) +/** + * trace_print_lat_fmt - print the irq, preempt and lockdep fields + * @s: trace seq struct to write to + * @entry: The trace entry field from the ring buffer + * + * Prints the generic fields of irqs off, in hard or softirq, preempt + * count and lock depth. + */ +int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) { int hardirq, softirq; - char comm[TASK_COMM_LEN]; int ret; - trace_find_cmdline(entry->pid, comm); hardirq = entry->flags & TRACE_FLAG_HARDIRQ; softirq = entry->flags & TRACE_FLAG_SOFTIRQ; - if (!trace_seq_printf(s, "%8.8s-%-5d %3d%c%c%c", - comm, entry->pid, cpu, + if (!trace_seq_printf(s, "%c%c%c", (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.', @@ -494,6 +498,20 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) return trace_seq_putc(s, '.'); } +static int +lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) +{ + char comm[TASK_COMM_LEN]; + + trace_find_cmdline(entry->pid, comm); + + if (!trace_seq_printf(s, "%8.8s-%-5d %3d", + comm, entry->pid, cpu)) + return 0; + + return trace_print_lat_fmt(s, entry); +} + static unsigned long preempt_mark_thresh = 100; static int diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index d38bec4..9d91c72 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -26,6 +26,8 @@ extern struct trace_event *ftrace_find_event(int type); extern enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags); +extern int +trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry); /* used by module unregistering */ extern int __unregister_ftrace_event(struct trace_event *event); -- cgit v1.1 From b63f39ea50330f836e301ddda21c6a93dcf0d6a3 Mon Sep 17 00:00:00 2001 From: "jolsa@redhat.com" Date: Fri, 11 Sep 2009 17:29:27 +0200 Subject: tracing: create generic trace parser Create a "trace_parser" that can parse the user space input for separate words. struct trace_parser is the descriptor. Generic "trace_get_user" function that can be a helper to read multiple words passed in by user space. Signed-off-by: Jiri Olsa LKML-Reference: <1252682969-3366-2-git-send-email-jolsa@redhat.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 35 +++++++++++++++++ 2 files changed, 141 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3b91828..45c3f03 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -339,6 +339,112 @@ static struct { int trace_clock_id; +/* + * trace_parser_get_init - gets the buffer for trace parser + */ +int trace_parser_get_init(struct trace_parser *parser, int size) +{ + memset(parser, 0, sizeof(*parser)); + + parser->buffer = kmalloc(size, GFP_KERNEL); + if (!parser->buffer) + return 1; + + parser->size = size; + return 0; +} + +/* + * trace_parser_put - frees the buffer for trace parser + */ +void trace_parser_put(struct trace_parser *parser) +{ + kfree(parser->buffer); +} + +/* + * trace_get_user - reads the user input string separated by space + * (matched by isspace(ch)) + * + * For each string found the 'struct trace_parser' is updated, + * and the function returns. + * + * Returns number of bytes read. + * + * See kernel/trace/trace.h for 'struct trace_parser' details. + */ +int trace_get_user(struct trace_parser *parser, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char ch; + size_t read = 0; + ssize_t ret; + + if (!*ppos) + trace_parser_clear(parser); + + ret = get_user(ch, ubuf++); + if (ret) + goto out; + + read++; + cnt--; + + /* + * The parser is not finished with the last write, + * continue reading the user input without skipping spaces. + */ + if (!parser->cont) { + /* skip white space */ + while (cnt && isspace(ch)) { + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + } + + /* only spaces were written */ + if (isspace(ch)) { + *ppos += read; + ret = read; + goto out; + } + + parser->idx = 0; + } + + /* read the non-space input */ + while (cnt && !isspace(ch)) { + if (parser->idx < parser->size) + parser->buffer[parser->idx++] = ch; + else { + ret = -EINVAL; + goto out; + } + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + } + + /* We either got finished input or we have to wait for another call. */ + if (isspace(ch)) { + parser->buffer[parser->idx] = 0; + parser->cont = false; + } else { + parser->cont = true; + parser->buffer[parser->idx++] = ch; + } + + *ppos += read; + ret = read; + +out: + return ret; +} + ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) { int len; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b69697b..28247ce 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -616,6 +616,41 @@ static inline int ftrace_trace_task(struct task_struct *task) #endif /* + * struct trace_parser - servers for reading the user input separated by spaces + * @cont: set if the input is not complete - no final space char was found + * @buffer: holds the parsed user input + * @idx: user input lenght + * @size: buffer size + */ +struct trace_parser { + bool cont; + char *buffer; + unsigned idx; + unsigned size; +}; + +static inline bool trace_parser_loaded(struct trace_parser *parser) +{ + return (parser->idx != 0); +} + +static inline bool trace_parser_cont(struct trace_parser *parser) +{ + return parser->cont; +} + +static inline void trace_parser_clear(struct trace_parser *parser) +{ + parser->cont = false; + parser->idx = 0; +} + +extern int trace_parser_get_init(struct trace_parser *parser, int size); +extern void trace_parser_put(struct trace_parser *parser); +extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, + size_t cnt, loff_t *ppos); + +/* * trace_iterator_flags is an enumeration that defines bit * positions into trace_flags that controls the output. * -- cgit v1.1 From 489663644c35d50a20f58d468a7cbc705e6a29ce Mon Sep 17 00:00:00 2001 From: "jolsa@redhat.com" Date: Fri, 11 Sep 2009 17:29:28 +0200 Subject: tracing: trace parser support for set_event Convert the parsing of the file 'set_event' to use the generic trace_praser 'trace_get_user' function. Signed-off-by: Jiri Olsa LKML-Reference: <1252682969-3366-3-git-send-email-jolsa@redhat.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 60 ++++++++++----------------------------------- 1 file changed, 13 insertions(+), 47 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 975f324..f46d14c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -230,11 +230,9 @@ static ssize_t ftrace_event_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { + struct trace_parser parser; size_t read = 0; - int i, set = 1; ssize_t ret; - char *buf; - char ch; if (!cnt || cnt < 0) return 0; @@ -243,60 +241,28 @@ ftrace_event_write(struct file *file, const char __user *ubuf, if (ret < 0) return ret; - ret = get_user(ch, ubuf++); - if (ret) - return ret; - read++; - cnt--; - - /* skip white space */ - while (cnt && isspace(ch)) { - ret = get_user(ch, ubuf++); - if (ret) - return ret; - read++; - cnt--; - } - - /* Only white space found? */ - if (isspace(ch)) { - file->f_pos += read; - ret = read; - return ret; - } - - buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL); - if (!buf) + if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1)) return -ENOMEM; - if (cnt > EVENT_BUF_SIZE) - cnt = EVENT_BUF_SIZE; + read = trace_get_user(&parser, ubuf, cnt, ppos); + + if (trace_parser_loaded((&parser))) { + int set = 1; - i = 0; - while (cnt && !isspace(ch)) { - if (!i && ch == '!') + if (*parser.buffer == '!') set = 0; - else - buf[i++] = ch; - ret = get_user(ch, ubuf++); + parser.buffer[parser.idx] = 0; + + ret = ftrace_set_clr_event(parser.buffer + !set, set); if (ret) - goto out_free; - read++; - cnt--; + goto out_put; } - buf[i] = 0; - - file->f_pos += read; - - ret = ftrace_set_clr_event(buf, set); - if (ret) - goto out_free; ret = read; - out_free: - kfree(buf); + out_put: + trace_parser_put(&parser); return ret; } -- cgit v1.1 From 689fd8b65d669b96d612ccc37d6fb87bf7ed6907 Mon Sep 17 00:00:00 2001 From: "jolsa@redhat.com" Date: Fri, 11 Sep 2009 17:29:29 +0200 Subject: tracing: trace parser support for function and graph Convert the writing to 'set_graph_function', 'set_ftrace_filter' and 'set_ftrace_notrace' to use the generic trace_parser 'trace_get_user' function. Removed FTRACE_ITER_CONT flag, since it's not needed after this change. Minor fix in set_graph_function display - g_show function. Signed-off-by: Jiri Olsa LKML-Reference: <1252682969-3366-4-git-send-email-jolsa@redhat.com> Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 150 ++++++++++++++------------------------------------ 1 file changed, 40 insertions(+), 110 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8c804e2..8b23d56 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1323,11 +1323,10 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) enum { FTRACE_ITER_FILTER = (1 << 0), - FTRACE_ITER_CONT = (1 << 1), - FTRACE_ITER_NOTRACE = (1 << 2), - FTRACE_ITER_FAILURES = (1 << 3), - FTRACE_ITER_PRINTALL = (1 << 4), - FTRACE_ITER_HASH = (1 << 5), + FTRACE_ITER_NOTRACE = (1 << 1), + FTRACE_ITER_FAILURES = (1 << 2), + FTRACE_ITER_PRINTALL = (1 << 3), + FTRACE_ITER_HASH = (1 << 4), }; #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ @@ -1337,8 +1336,7 @@ struct ftrace_iterator { int hidx; int idx; unsigned flags; - unsigned char buffer[FTRACE_BUFF_MAX+1]; - unsigned buffer_idx; + struct trace_parser parser; }; static void * @@ -1604,6 +1602,11 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable) if (!iter) return -ENOMEM; + if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX)) { + kfree(iter); + return -ENOMEM; + } + mutex_lock(&ftrace_regex_lock); if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) @@ -2196,9 +2199,8 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos, int enable) { struct ftrace_iterator *iter; - char ch; - size_t read = 0; - ssize_t ret; + struct trace_parser *parser; + ssize_t ret, read; if (!cnt || cnt < 0) return 0; @@ -2211,72 +2213,23 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, } else iter = file->private_data; - if (!*ppos) { - iter->flags &= ~FTRACE_ITER_CONT; - iter->buffer_idx = 0; - } - - ret = get_user(ch, ubuf++); - if (ret) - goto out; - read++; - cnt--; - - /* - * If the parser haven't finished with the last write, - * continue reading the user input without skipping spaces. - */ - if (!(iter->flags & FTRACE_ITER_CONT)) { - /* skip white space */ - while (cnt && isspace(ch)) { - ret = get_user(ch, ubuf++); - if (ret) - goto out; - read++; - cnt--; - } + parser = &iter->parser; + read = trace_get_user(parser, ubuf, cnt, ppos); - /* only spaces were written */ - if (isspace(ch)) { - *ppos += read; - ret = read; - goto out; - } - - iter->buffer_idx = 0; - } - - while (cnt && !isspace(ch)) { - if (iter->buffer_idx < FTRACE_BUFF_MAX) - iter->buffer[iter->buffer_idx++] = ch; - else { - ret = -EINVAL; - goto out; - } - ret = get_user(ch, ubuf++); + if (trace_parser_loaded(parser) && + !trace_parser_cont(parser)) { + ret = ftrace_process_regex(parser->buffer, + parser->idx, enable); if (ret) goto out; - read++; - cnt--; - } - if (isspace(ch)) { - iter->buffer[iter->buffer_idx] = 0; - ret = ftrace_process_regex(iter->buffer, - iter->buffer_idx, enable); - if (ret) - goto out; - iter->buffer_idx = 0; - } else { - iter->flags |= FTRACE_ITER_CONT; - iter->buffer[iter->buffer_idx++] = ch; + trace_parser_clear(parser); } - *ppos += read; ret = read; - out: - mutex_unlock(&ftrace_regex_lock); + mutex_unlock(&ftrace_regex_lock); +out: return ret; } @@ -2381,6 +2334,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) { struct seq_file *m = (struct seq_file *)file->private_data; struct ftrace_iterator *iter; + struct trace_parser *parser; mutex_lock(&ftrace_regex_lock); if (file->f_mode & FMODE_READ) { @@ -2390,9 +2344,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) } else iter = file->private_data; - if (iter->buffer_idx) { - iter->buffer[iter->buffer_idx] = 0; - ftrace_match_records(iter->buffer, iter->buffer_idx, enable); + parser = &iter->parser; + if (trace_parser_loaded(parser)) { + parser->buffer[parser->idx] = 0; + ftrace_match_records(parser->buffer, parser->idx, enable); } mutex_lock(&ftrace_lock); @@ -2400,7 +2355,9 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) ftrace_run_update_code(FTRACE_ENABLE_CALLS); mutex_unlock(&ftrace_lock); + trace_parser_put(parser); kfree(iter); + mutex_unlock(&ftrace_regex_lock); return 0; } @@ -2499,7 +2456,7 @@ static int g_show(struct seq_file *m, void *v) return 0; } - seq_printf(m, "%pf\n", v); + seq_printf(m, "%pf\n", (void *)*ptr); return 0; } @@ -2602,12 +2559,10 @@ static ssize_t ftrace_graph_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { - unsigned char buffer[FTRACE_BUFF_MAX+1]; + struct trace_parser parser; unsigned long *array; size_t read = 0; ssize_t ret; - int index = 0; - char ch; if (!cnt || cnt < 0) return 0; @@ -2625,51 +2580,26 @@ ftrace_graph_write(struct file *file, const char __user *ubuf, } else array = file->private_data; - ret = get_user(ch, ubuf++); - if (ret) + if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { + ret = -ENOMEM; goto out; - read++; - cnt--; - - /* skip white space */ - while (cnt && isspace(ch)) { - ret = get_user(ch, ubuf++); - if (ret) - goto out; - read++; - cnt--; } - if (isspace(ch)) { - *ppos += read; - ret = read; - goto out; - } + read = trace_get_user(&parser, ubuf, cnt, ppos); - while (cnt && !isspace(ch)) { - if (index < FTRACE_BUFF_MAX) - buffer[index++] = ch; - else { - ret = -EINVAL; - goto out; - } - ret = get_user(ch, ubuf++); + if (trace_parser_loaded((&parser))) { + parser.buffer[parser.idx] = 0; + + /* we allow only one expression at a time */ + ret = ftrace_set_func(array, &ftrace_graph_count, + parser.buffer); if (ret) goto out; - read++; - cnt--; } - buffer[index] = 0; - - /* we allow only one expression at a time */ - ret = ftrace_set_func(array, &ftrace_graph_count, buffer); - if (ret) - goto out; - - file->f_pos += read; ret = read; out: + trace_parser_put(&parser); mutex_unlock(&graph_lock); return ret; -- cgit v1.1 From fe63b94a430c0c8058de317f0a8ce921c69dbee4 Mon Sep 17 00:00:00 2001 From: Carsten Emde Date: Sat, 12 Sep 2009 00:05:37 +0200 Subject: tracing: prevent NULL pointer dereference in ftrace_raw_event_block_bio_bounce Booting 2.6.31 and executing echo 1 >/sys/kernel/debug/tracing/events/enable leads to BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] ftrace_raw_event_block_bio_bounce+0x4b/0xb9 Apparently, bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask); is called in block/blk-map.c:58 where bio->bi_bdev in set to NULL and still is NULL when an attempt is made to evaluate bio->bi_bdev->bd_dev in include/trace/events/block.h:189. The tracepoint should ensure bio->bi_bdev is not dereferenced, if NULL. Signed-off-by: Carsten Emde LKML-Reference: <4AAAC9B1.9060505@osadl.org> Signed-off-by: Steven Rostedt --- include/trace/events/block.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 9a74b46..d86af94 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -171,6 +171,7 @@ TRACE_EVENT(block_rq_complete, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->errors) ); + TRACE_EVENT(block_bio_bounce, TP_PROTO(struct request_queue *q, struct bio *bio), @@ -186,7 +187,8 @@ TRACE_EVENT(block_bio_bounce, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio->bi_bdev ? + bio->bi_bdev->bd_dev : 0; __entry->sector = bio->bi_sector; __entry->nr_sector = bio->bi_size >> 9; blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); -- cgit v1.1 From 41dfba4367109b92d92ec6e059be6950497d932f Mon Sep 17 00:00:00 2001 From: Carsten Emde Date: Sun, 13 Sep 2009 01:41:31 +0200 Subject: tracing: remove unused local variables in tracer probe functions When the nsecs_to_usecs() conversion in probe_wakeup_sched_switch() and check_critical_timing() was moved to a later stage in order to avoid unnecessary computing, it was overlooked to remove the original variables, assignments and comments.. Signed-off-by: Carsten Emde Signed-off-by: Steven Rostedt --- kernel/trace/trace_irqsoff.c | 12 +----------- kernel/trace/trace_sched_wakeup.c | 10 ---------- 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 5555b75..06f8ea9 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -129,15 +129,10 @@ check_critical_timing(struct trace_array *tr, unsigned long parent_ip, int cpu) { - unsigned long latency, t0, t1; cycle_t T0, T1, delta; unsigned long flags; int pc; - /* - * usecs conversion is slow so we try to delay the conversion - * as long as possible: - */ T0 = data->preempt_timestamp; T1 = ftrace_now(cpu); delta = T1-T0; @@ -157,17 +152,12 @@ check_critical_timing(struct trace_array *tr, trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); - latency = nsecs_to_usecs(delta); - if (data->critical_sequence != max_sequence) goto out_unlock; - tracing_max_latency = delta; - t0 = nsecs_to_usecs(T0); - t1 = nsecs_to_usecs(T1); - data->critical_end = parent_ip; + tracing_max_latency = delta; update_max_tr_single(tr, current, cpu); max_sequence++; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index cf43bdb..6e1529b 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -110,7 +110,6 @@ static void notrace probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { - unsigned long latency = 0, t0 = 0, t1 = 0; struct trace_array_cpu *data; cycle_t T0, T1, delta; unsigned long flags; @@ -156,10 +155,6 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); - /* - * usecs conversion is slow so we try to delay the conversion - * as long as possible: - */ T0 = data->preempt_timestamp; T1 = ftrace_now(cpu); delta = T1-T0; @@ -167,12 +162,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, if (!report_latency(delta)) goto out_unlock; - latency = nsecs_to_usecs(delta); - tracing_max_latency = delta; - t0 = nsecs_to_usecs(T0); - t1 = nsecs_to_usecs(T1); - update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu); out_unlock: -- cgit v1.1 From b5130b1e7d3717d03ab1916b198bf0d49fa0a619 Mon Sep 17 00:00:00 2001 From: Carsten Emde Date: Sun, 13 Sep 2009 01:43:07 +0200 Subject: tracing: do not update tracing_max_latency when tracer is stopped The state of the function pair tracing_stop()/tracing_start() is correctly considered when tracer data are updated. However, the global and externally accessible variable tracing_max_latency is always updated - even when tracing is stopped. The update should only occur, if tracing was not stopped. Signed-off-by: Carsten Emde Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 5 +++++ kernel/trace/trace.h | 1 + kernel/trace/trace_irqsoff.c | 6 ++++-- kernel/trace/trace_sched_wakeup.c | 6 ++++-- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 45c3f03..ef82a7f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -825,6 +825,11 @@ static void trace_init_cmdlines(void) cmdline_idx = 0; } +int is_tracing_stopped(void) +{ + return trace_stop_count; +} + /** * ftrace_off_permanent - disable all ftrace code permanently * diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 28247ce..4ad4e1d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -461,6 +461,7 @@ void tracing_stop_sched_switch_record(void); void tracing_start_sched_switch_record(void); int register_tracer(struct tracer *type); void unregister_tracer(struct tracer *type); +int is_tracing_stopped(void); extern unsigned long nsecs_to_usecs(unsigned long nsecs); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 06f8ea9..3aa7eaa 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -157,8 +157,10 @@ check_critical_timing(struct trace_array *tr, data->critical_end = parent_ip; - tracing_max_latency = delta; - update_max_tr_single(tr, current, cpu); + if (likely(!is_tracing_stopped())) { + tracing_max_latency = delta; + update_max_tr_single(tr, current, cpu); + } max_sequence++; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 6e1529b..26185d7 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -162,8 +162,10 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, if (!report_latency(delta)) goto out_unlock; - tracing_max_latency = delta; - update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu); + if (likely(!is_tracing_stopped())) { + tracing_max_latency = delta; + update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu); + } out_unlock: __wakeup_reset(wakeup_trace); -- cgit v1.1 From 4b3b4c5e64ce26612646867ee354373620063534 Mon Sep 17 00:00:00 2001 From: John Reiser Date: Mon, 27 Jul 2009 11:23:50 -0700 Subject: ftrace: __start_mcount_loc should be .init.rodata __start_mcount_loc[] is unused after init, yet occupies RAM forever as part of .rodata. 152kiB is typical on a 64-bit architecture. Instead, __start_mcount_loc should be in the interval [__init_begin, __init_end) so that the space is reclaimed after init. __start_mcount_loc[] is generated during the load portion of kernel build, and is used only by ftrace_init(). ftrace_init is declared '__init' and is in .init.text, which is freed after init. __start_mcount_loc is placed into .rodata by a call to MCOUNT_REC inside the RO_DATA macro of include/asm-generic/vmlinux.lds.h. The array *is* read-only, but more importantly it is not used after init. So the call to MCOUNT_REC should be moved from RO_DATA to INIT_DATA. This patch has been tested on x86_64 with CONFIG_DEBUG_PAGEALLOC=y which verifies that the address range never is accessed after init. Signed-off-by: John Reiser LKML-Reference: <4A6DF0B6.7080402@bitwagon.com> Signed-off-by: Steven Rostedt --- include/asm-generic/vmlinux.lds.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 6ad76bf..98b37cf 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -91,7 +91,8 @@ #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD -#define MCOUNT_REC() VMLINUX_SYMBOL(__start_mcount_loc) = .; \ +#define MCOUNT_REC() . = ALIGN(8); \ + VMLINUX_SYMBOL(__start_mcount_loc) = .; \ *(__mcount_loc) \ VMLINUX_SYMBOL(__stop_mcount_loc) = .; #else @@ -331,7 +332,6 @@ /* __*init sections */ \ __init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) { \ *(.ref.rodata) \ - MCOUNT_REC() \ DEV_KEEP(init.rodata) \ DEV_KEEP(exit.rodata) \ CPU_KEEP(init.rodata) \ @@ -455,6 +455,7 @@ MEM_DISCARD(init.data) \ KERNEL_CTORS() \ *(.init.rodata) \ + MCOUNT_REC() \ DEV_DISCARD(init.rodata) \ CPU_DISCARD(init.rodata) \ MEM_DISCARD(init.rodata) -- cgit v1.1 From 4818d80942b7c0021d213b7c5f1a14a832820a01 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 29 Jul 2009 10:58:37 +0200 Subject: tracing/function-graph: x86_64 stack allocation cleanup Only 24 bytes needs to be reserved on the stack for the function graph tracer on x86_64. Signed-off-by: Jiri Olsa LKML-Reference: <20090729085837.GB4998@jolsa.lab.eng.brq.redhat.com> Signed-off-by: Steven Rostedt --- arch/x86/kernel/entry_64.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c251be7..d59fe32 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -146,7 +146,7 @@ ENTRY(ftrace_graph_caller) END(ftrace_graph_caller) GLOBAL(return_to_handler) - subq $80, %rsp + subq $24, %rsp /* Save the return values */ movq %rax, (%rsp) @@ -155,10 +155,10 @@ GLOBAL(return_to_handler) call ftrace_return_to_handler - movq %rax, 72(%rsp) + movq %rax, 16(%rsp) movq 8(%rsp), %rdx movq (%rsp), %rax - addq $72, %rsp + addq $16, %rsp retq #endif -- cgit v1.1 From 558e6547e4b8a2b13608a24a9d3679802f91c4c7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 24 Aug 2009 12:19:47 +0800 Subject: tracing/profile: fix profile_disable vs module_unload If the correspoding module is unloaded before ftrace_profile_disable() is called, event->profile_disable() won't be called, which can cause oops: # insmod trace-events-sample.ko # perf record -f -a -e sample:foo_bar sleep 3 & # sleep 1 # rmmod trace_events_sample # insmod trace-events-sample.ko OOPS! Signed-off-by: Li Zefan LKML-Reference: <4A9214E3.2070807@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_event_profile.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 11ba5bb..55a25c9 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -5,6 +5,7 @@ * */ +#include #include "trace.h" int ftrace_profile_enable(int event_id) @@ -14,7 +15,8 @@ int ftrace_profile_enable(int event_id) mutex_lock(&event_mutex); list_for_each_entry(event, &ftrace_events, list) { - if (event->id == event_id && event->profile_enable) { + if (event->id == event_id && event->profile_enable && + try_module_get(event->mod)) { ret = event->profile_enable(event); break; } @@ -32,6 +34,7 @@ void ftrace_profile_disable(int event_id) list_for_each_entry(event, &ftrace_events, list) { if (event->id == event_id) { event->profile_disable(event); + module_put(event->mod); break; } } -- cgit v1.1 From 95b696088c1cf152c3bb62560bbe3a441922d68b Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 10 Sep 2009 23:13:51 -0500 Subject: tracing/filters: add filter Documentation Documentation for event filters and formats. v2 changes: fix a few problems noticed by Randy Dunlap. Reviewed-by: Li Zefan Acked-by: Randy Dunlap Signed-off-by: Tom Zanussi LKML-Reference: <1252642431.8016.9.camel@tropicana> Signed-off-by: Steven Rostedt --- Documentation/trace/events.txt | 184 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 183 insertions(+), 1 deletion(-) diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt index 2bcc8d4..6e5f35e 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -1,7 +1,7 @@ Event Tracing Documentation written by Theodore Ts'o - Updated by Li Zefan + Updated by Li Zefan and Tom Zanussi 1. Introduction =============== @@ -97,3 +97,185 @@ The format of this boot option is the same as described in section 2.1. See The example provided in samples/trace_events +4. Event formats +================ + +Each trace event has a 'format' file associated with it that contains +a description of each field in a logged event. This information can +be used to parse the binary trace stream, and is also the place to +find the field names that can be used in event filters (see section 5). + +It also displays the format string that will be used to print the +event in text mode, along with the event name and ID used for +profiling. + +Every event has a set of 'common' fields associated with it; these are +the fields prefixed with 'common_'. The other fields vary between +events and correspond to the fields defined in the TRACE_EVENT +definition for that event. + +Each field in the format has the form: + + field:field-type field-name; offset:N; size:N; + +where offset is the offset of the field in the trace record and size +is the size of the data item, in bytes. + +For example, here's the information displayed for the 'sched_wakeup' +event: + +# cat /debug/tracing/events/sched/sched_wakeup/format + +name: sched_wakeup +ID: 60 +format: + field:unsigned short common_type; offset:0; size:2; + field:unsigned char common_flags; offset:2; size:1; + field:unsigned char common_preempt_count; offset:3; size:1; + field:int common_pid; offset:4; size:4; + field:int common_tgid; offset:8; size:4; + + field:char comm[TASK_COMM_LEN]; offset:12; size:16; + field:pid_t pid; offset:28; size:4; + field:int prio; offset:32; size:4; + field:int success; offset:36; size:4; + field:int cpu; offset:40; size:4; + +print fmt: "task %s:%d [%d] success=%d [%03d]", REC->comm, REC->pid, + REC->prio, REC->success, REC->cpu + +This event contains 10 fields, the first 5 common and the remaining 5 +event-specific. All the fields for this event are numeric, except for +'comm' which is a string, a distinction important for event filtering. + +5. Event filtering +================== + +Trace events can be filtered in the kernel by associating boolean +'filter expressions' with them. As soon as an event is logged into +the trace buffer, its fields are checked against the filter expression +associated with that event type. An event with field values that +'match' the filter will appear in the trace output, and an event whose +values don't match will be discarded. An event with no filter +associated with it matches everything, and is the default when no +filter has been set for an event. + +5.1 Expression syntax +--------------------- + +A filter expression consists of one or more 'predicates' that can be +combined using the logical operators '&&' and '||'. A predicate is +simply a clause that compares the value of a field contained within a +logged event with a constant value and returns either 0 or 1 depending +on whether the field value matched (1) or didn't match (0): + + field-name relational-operator value + +Parentheses can be used to provide arbitrary logical groupings and +double-quotes can be used to prevent the shell from interpreting +operators as shell metacharacters. + +The field-names available for use in filters can be found in the +'format' files for trace events (see section 4). + +The relational-operators depend on the type of the field being tested: + +The operators available for numeric fields are: + +==, !=, <, <=, >, >= + +And for string fields they are: + +==, != + +Currently, only exact string matches are supported. + +Currently, the maximum number of predicates in a filter is 16. + +5.2 Setting filters +------------------- + +A filter for an individual event is set by writing a filter expression +to the 'filter' file for the given event. + +For example: + +# cd /debug/tracing/events/sched/sched_wakeup +# echo "common_preempt_count > 4" > filter + +A slightly more involved example: + +# cd /debug/tracing/events/sched/sched_signal_send +# echo "((sig >= 10 && sig < 15) || sig == 17) && comm != bash" > filter + +If there is an error in the expression, you'll get an 'Invalid +argument' error when setting it, and the erroneous string along with +an error message can be seen by looking at the filter e.g.: + +# cd /debug/tracing/events/sched/sched_signal_send +# echo "((sig >= 10 && sig < 15) || dsig == 17) && comm != bash" > filter +-bash: echo: write error: Invalid argument +# cat filter +((sig >= 10 && sig < 15) || dsig == 17) && comm != bash +^ +parse_error: Field not found + +Currently the caret ('^') for an error always appears at the beginning of +the filter string; the error message should still be useful though +even without more accurate position info. + +5.3 Clearing filters +-------------------- + +To clear the filter for an event, write a '0' to the event's filter +file. + +To clear the filters for all events in a subsystem, write a '0' to the +subsystem's filter file. + +5.3 Subsystem filters +--------------------- + +For convenience, filters for every event in a subsystem can be set or +cleared as a group by writing a filter expression into the filter file +at the root of the subsytem. Note however, that if a filter for any +event within the subsystem lacks a field specified in the subsystem +filter, or if the filter can't be applied for any other reason, the +filter for that event will retain its previous setting. This can +result in an unintended mixture of filters which could lead to +confusing (to the user who might think different filters are in +effect) trace output. Only filters that reference just the common +fields can be guaranteed to propagate successfully to all events. + +Here are a few subsystem filter examples that also illustrate the +above points: + +Clear the filters on all events in the sched subsytem: + +# cd /sys/kernel/debug/tracing/events/sched +# echo 0 > filter +# cat sched_switch/filter +none +# cat sched_wakeup/filter +none + +Set a filter using only common fields for all events in the sched +subsytem (all events end up with the same filter): + +# cd /sys/kernel/debug/tracing/events/sched +# echo common_pid == 0 > filter +# cat sched_switch/filter +common_pid == 0 +# cat sched_wakeup/filter +common_pid == 0 + +Attempt to set a filter using a non-common field for all events in the +sched subsytem (all events but those that have a prev_pid field retain +their old filters): + +# cd /sys/kernel/debug/tracing/events/sched +# echo prev_pid == 0 > filter +# cat sched_switch/filter +prev_pid == 0 +# cat sched_wakeup/filter +common_pid == 0 -- cgit v1.1 From 16bb8eb1b73bf940d30ff88cae622bfcd3790f61 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 12 Sep 2009 19:04:54 -0400 Subject: tracing: allow filter predicates to handle ksym symbols This patch increases the max string used by predicates to handle KSYM_SYMBOL_LEN. Also moves an include to look nicer. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 0608b0f..bd099ba 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -1,8 +1,8 @@ #ifndef _LINUX_FTRACE_EVENT_H #define _LINUX_FTRACE_EVENT_H -#include #include +#include #include struct trace_array; @@ -135,7 +135,7 @@ struct ftrace_event_call { }; #define MAX_FILTER_PRED 32 -#define MAX_FILTER_STR_VAL 128 +#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ extern void destroy_preds(struct ftrace_event_call *call); extern int filter_match_preds(struct ftrace_event_call *call, void *rec); -- cgit v1.1 From 0a1c49db8d91c538f104f8d70e560c6fdd589bd4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 12 Sep 2009 19:17:15 -0400 Subject: tracing: use macros to create internal ftrace entry ring buffer structures The entries used by ftrace internal code (plugins) currently have their formats manually exported to userspace. That is, the format files in debugfs/tracing/events/ftrace/*/format are currently created by hand. This is a maintenance nightmare, and can easily become out of sync with what is actually shown. This patch uses the methodology of the TRACE_EVENT macros to build the structures so that their formats can be automated and this will keep the structures in sync with what users can see. This patch only changes the way the structures are created. Further patches will build off of this to automate the format files. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 160 ++++---------------- kernel/trace/trace_entries.h | 342 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 369 insertions(+), 133 deletions(-) create mode 100644 kernel/trace/trace_entries.h diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4ad4e1d..d308195 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -42,150 +42,45 @@ enum trace_type { __TRACE_LAST_TYPE, }; -/* - * Function trace entry - function address and parent function addres: - */ -struct ftrace_entry { - struct trace_entry ent; - unsigned long ip; - unsigned long parent_ip; -}; - -/* Function call entry */ -struct ftrace_graph_ent_entry { - struct trace_entry ent; - struct ftrace_graph_ent graph_ent; +enum kmemtrace_type_id { + KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ + KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ + KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ }; -/* Function return entry */ -struct ftrace_graph_ret_entry { - struct trace_entry ent; - struct ftrace_graph_ret ret; -}; extern struct tracer boot_tracer; -/* - * Context switch trace entry - which task (and prio) we switched from/to: - */ -struct ctx_switch_entry { - struct trace_entry ent; - unsigned int prev_pid; - unsigned char prev_prio; - unsigned char prev_state; - unsigned int next_pid; - unsigned char next_prio; - unsigned char next_state; - unsigned int next_cpu; -}; - -/* - * Special (free-form) trace entry: - */ -struct special_entry { - struct trace_entry ent; - unsigned long arg1; - unsigned long arg2; - unsigned long arg3; -}; - -/* - * Stack-trace entry: - */ +#undef __field +#define __field(type, item) type item; -#define FTRACE_STACK_ENTRIES 8 +#undef __array +#define __array(type, item, size) type item[size]; -struct stack_entry { - struct trace_entry ent; - unsigned long caller[FTRACE_STACK_ENTRIES]; -}; +#undef __dynamic_array +#define __dynamic_array(type, item) type item[]; -struct userstack_entry { - struct trace_entry ent; - unsigned int tgid; - unsigned long caller[FTRACE_STACK_ENTRIES]; -}; - -/* - * trace_printk entry: - */ -struct bprint_entry { - struct trace_entry ent; - unsigned long ip; - const char *fmt; - u32 buf[]; -}; - -struct print_entry { - struct trace_entry ent; - unsigned long ip; - char buf[]; -}; +#undef F_STRUCT +#define F_STRUCT(args...) args -struct trace_mmiotrace_rw { - struct trace_entry ent; - struct mmiotrace_rw rw; -}; - -struct trace_mmiotrace_map { - struct trace_entry ent; - struct mmiotrace_map map; -}; - -struct trace_boot_call { - struct trace_entry ent; - struct boot_trace_call boot_call; -}; - -struct trace_boot_ret { - struct trace_entry ent; - struct boot_trace_ret boot_ret; -}; - -#define TRACE_FUNC_SIZE 30 -#define TRACE_FILE_SIZE 20 -struct trace_branch { - struct trace_entry ent; - unsigned line; - char func[TRACE_FUNC_SIZE+1]; - char file[TRACE_FILE_SIZE+1]; - char correct; -}; - -struct hw_branch_entry { - struct trace_entry ent; - u64 from; - u64 to; -}; - -struct trace_power { - struct trace_entry ent; - struct power_trace state_data; -}; +#undef FTRACE_ENTRY +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ + struct struct_name { \ + struct trace_entry ent; \ + tstruct \ + } -enum kmemtrace_type_id { - KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ - KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ - KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ -}; +#undef TP_ARGS +#define TP_ARGS(args...) args -struct kmemtrace_alloc_entry { - struct trace_entry ent; - enum kmemtrace_type_id type_id; - unsigned long call_site; - const void *ptr; - size_t bytes_req; - size_t bytes_alloc; - gfp_t gfp_flags; - int node; -}; +#undef FTRACE_ENTRY_DUP +#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk) -struct kmemtrace_free_entry { - struct trace_entry ent; - enum kmemtrace_type_id type_id; - unsigned long call_site; - const void *ptr; -}; +#include "trace_entries.h" +/* + * syscalls are special, and need special handling, this is why + * they are not included in trace_entries.h + */ struct syscall_trace_enter { struct trace_entry ent; int nr; @@ -198,7 +93,6 @@ struct syscall_trace_exit { unsigned long ret; }; - /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h new file mode 100644 index 0000000..82c51fd --- /dev/null +++ b/kernel/trace/trace_entries.h @@ -0,0 +1,342 @@ +/* + * This file defines the trace event structures that go into the ring + * buffer directly. They are created via macros so that changes for them + * appear in the format file. Using macros will automate this process. + * + * The macro used to create a ftrace data structure is: + * + * FTRACE_ENTRY( name, struct_name, id, structure, print ) + * + * @name: the name used the event name, as well as the name of + * the directory that holds the format file. + * + * @struct_name: the name of the structure that is created. + * + * @id: The event identifier that is used to detect what event + * this is from the ring buffer. + * + * @structure: the structure layout + * + * - __field( type, item ) + * This is equivalent to declaring + * type item; + * in the structure. + * - __array( type, item, size ) + * This is equivalent to declaring + * type item[size]; + * in the structure. + * + * @print: the print format shown to users in the format file. + */ + +/* + * Function trace entry - function address and parent function addres: + */ +FTRACE_ENTRY(function, ftrace_entry, + + TRACE_FN, + + F_STRUCT( + __field( unsigned long, ip ) + __field( unsigned long, parent_ip ) + ), + + F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip) +); + +/* Function call entry */ +FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry, + + TRACE_GRAPH_ENT, + + F_STRUCT( + __field( struct ftrace_graph_ent, graph_ent ) + ), + + F_printk("--> %lx (%d)", __entry->graph_ent.func, __entry->depth) +); + +/* Function return entry */ +FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry, + + TRACE_GRAPH_RET, + + F_STRUCT( + __field( struct ftrace_graph_ret, ret ) + ), + + F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d", + __entry->func, __entry->depth, + __entry->calltime, __entry->rettim, + __entrty->depth) +); + +/* + * Context switch trace entry - which task (and prio) we switched from/to: + * + * This is used for both wakeup and context switches. We only want + * to create one structure, but we need two outputs for it. + */ +#define FTRACE_CTX_FIELDS \ + __field( unsigned int, prev_pid ) \ + __field( unsigned char, prev_prio ) \ + __field( unsigned char, prev_state ) \ + __field( unsigned int, next_pid ) \ + __field( unsigned char, next_prio ) \ + __field( unsigned char, next_state ) \ + __field( unsigned int, next_cpu ) + +#if 0 +FTRACE_ENTRY_STRUCT_ONLY(ctx_switch_entry, + + F_STRUCT( + FTRACE_CTX_FIELDS + ) +); +#endif + +FTRACE_ENTRY(context_switch, ctx_switch_entry, + + TRACE_CTX, + + F_STRUCT( + FTRACE_CTX_FIELDS + ), + + F_printk(b"%u:%u:%u ==> %u:%u:%u [%03u]", + __entry->prev_pid, __entry->prev_prio, __entry->prev_state, + __entry->next_pid, __entry->next_prio, __entry->next_state, + __entry->next_cpu + ) +); + +/* + * FTRACE_ENTRY_DUP only creates the format file, it will not + * create another structure. + */ +FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry, + + TRACE_WAKE, + + F_STRUCT( + FTRACE_CTX_FIELDS + ), + + F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]", + __entry->prev_pid, __entry->prev_prio, __entry->prev_state, + __entry->next_pid, __entry->next_prio, __entry->next_state, + __entry->next_cpu + ) +); + +/* + * Special (free-form) trace entry: + */ +FTRACE_ENTRY(special, special_entry, + + TRACE_SPECIAL, + + F_STRUCT( + __field( unsigned long, arg1 ) + __field( unsigned long, arg2 ) + __field( unsigned long, arg3 ) + ), + + F_printk("(%08lx) (%08lx) (%08lx)", + __entry->arg1, __entry->arg2, __entry->arg3) +); + +/* + * Stack-trace entry: + */ + +#define FTRACE_STACK_ENTRIES 8 + +FTRACE_ENTRY(kernel_stack, stack_entry, + + TRACE_STACK, + + F_STRUCT( + __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) + ), + + F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" + "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", + __entry->caller[0], __entry->caller[1], __entry->caller[2], + __entry->caller[3], __entry->caller[4], __entry->caller[5], + __entry->caller[6], __entry->caller[7]) +); + +FTRACE_ENTRY(user_stack, userstack_entry, + + TRACE_USER_STACK, + + F_STRUCT( + __field( unsigned int, tgid ) + __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) + ), + + F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" + "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", + __entry->caller[0], __entry->caller[1], __entry->caller[2], + __entry->caller[3], __entry->caller[4], __entry->caller[5], + __entry->caller[6], __entry->caller[7]) +); + +/* + * trace_printk entry: + */ +FTRACE_ENTRY(bprint, bprint_entry, + + TRACE_BPRINT, + + F_STRUCT( + __field( unsigned long, ip ) + __field( const char *, fmt ) + __dynamic_array( u32, buf ) + ), + + F_printk("%08lx fmt:%p", + __entry->ip, __entry->fmt) +); + +FTRACE_ENTRY(print, print_entry, + + TRACE_PRINT, + + F_STRUCT( + __field( unsigned long, ip ) + __dynamic_array( char, buf ) + ), + + F_printk("%08lx %s", + __entry->ip, __entry->buf) +); + +FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, + + TRACE_MMIO_RW, + + F_STRUCT( + __field( struct mmiotrace_rw, rw ) + ), + + F_printk("%lx %lx %lx %d %lx %lx", + __entry->phs, __entry->value, __entry->pc, + __entry->map_id, __entry->opcode, __entry->width) +); + +FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, + + TRACE_MMIO_MAP, + + F_STRUCT( + __field( struct mmiotrace_map, map ) + ), + + F_printk("%lx %lx %lx %d %lx", + __entry->phs, __entry->virt, __entry->len, + __entry->map_id, __entry->opcode) +); + +FTRACE_ENTRY(boot_call, trace_boot_call, + + TRACE_BOOT_CALL, + + F_STRUCT( + __field( struct boot_trace_call, boot_call ) + ), + + F_printk("%d %s", __entry->caller, __entry->func) +); + +FTRACE_ENTRY(boot_ret, trace_boot_ret, + + TRACE_BOOT_RET, + + F_STRUCT( + __field( struct boot_trace_ret, boot_ret ) + ), + + F_printk("%s %d %lx", + __entry->func, __entry->result, __entry->duration) +); + +#define TRACE_FUNC_SIZE 30 +#define TRACE_FILE_SIZE 20 + +FTRACE_ENTRY(branch, trace_branch, + + TRACE_BRANCH, + + F_STRUCT( + __field( unsigned int, line ) + __array( char, func, TRACE_FUNC_SIZE+1 ) + __array( char, file, TRACE_FILE_SIZE+1 ) + __field( char, correct ) + ), + + F_printk("%u:%s:%s (%u)", + __entry->line, + __entry->func, __entry->file, __entry->correct) +); + +FTRACE_ENTRY(hw_branch, hw_branch_entry, + + TRACE_HW_BRANCHES, + + F_STRUCT( + __field( u64, from ) + __field( u64, to ) + ), + + F_printk("from: %llx to: %llx", __entry->from, __entry->to) +); + +FTRACE_ENTRY(power, trace_power, + + TRACE_POWER, + + F_STRUCT( + __field( struct power_trace, state_data ) + ), + + F_printk("%llx->%llx type:%u state:%u", + __entry->stamp, __entry->end, + __entry->type, __entry->state) +); + +FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry, + + TRACE_KMEM_ALLOC, + + F_STRUCT( + __field( enum kmemtrace_type_id, type_id ) + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + F_printk("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu" + " flags:%x node:%d", + __entry->type_id, __entry->call_site, __entry->ptr, + __entry->bytes_req, __entry->bytes_alloc, + __entry->gfp_flags, __entry->node) +); + +FTRACE_ENTRY(kmem_free, kmemtrace_free_entry, + + TRACE_KMEM_FREE, + + F_STRUCT( + __field( enum kmemtrace_type_id, type_id ) + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + F_printk("type:%u call_site:%lx ptr:%p", + __entry->type_id, __entry->call_site, __entry->ptr) +); -- cgit v1.1 From d73150943cf47b6cabcb4f4e52dd25975e820ae2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 12 Sep 2009 19:22:23 -0400 Subject: tracing: show details of structures within the ftrace structures Some of the internal ftrace structures use structures within. The output of a field saying it is just a structure is useless for a format file. A binary reader of the ring buffer needs to know more about how the fields are broken up. This patch adds to the ftrace structure macros new fields to describe the structures inside a structure. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 9 +++++++ kernel/trace/trace_entries.h | 64 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 66 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d308195..b0d287d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -53,9 +53,18 @@ extern struct tracer boot_tracer; #undef __field #define __field(type, item) type item; +#undef __field_struct +#define __field_struct(type, item) __field(type, item) + +#undef __field_desc +#define __field_desc(type, container, item) + #undef __array #define __array(type, item, size) type item[size]; +#undef __array_desc +#define __array_desc(type, container, item, size) + #undef __dynamic_array #define __dynamic_array(type, item) type item[]; diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 82c51fd..c866d34 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -26,6 +26,29 @@ * type item[size]; * in the structure. * + * * for structures within structures, the format of the internal + * structure is layed out. This allows the internal structure + * to be deciphered for the format file. Although these macros + * may become out of sync with the internal structure, they + * will create a compile error if it happens. Since the + * internel structures are just tracing helpers, this is not + * an issue. + * + * When an internal structure is used, it should use: + * + * __field_struct( type, item ) + * + * instead of __field. This will prevent it from being shown in + * the output file. The fields in the structure should use. + * + * __field_desc( type, container, item ) + * __array_desc( type, container, item, len ) + * + * type, item and len are the same as __field and __array, but + * container is added. This is the name of the item in + * __field_struct that this is describing. + * + * * @print: the print format shown to users in the format file. */ @@ -50,7 +73,9 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry, TRACE_GRAPH_ENT, F_STRUCT( - __field( struct ftrace_graph_ent, graph_ent ) + __field_struct( struct ftrace_graph_ent, graph_ent ) + __field_desc( unsigned long, graph_ent, func ) + __field_desc( int, graph_ent, depth ) ), F_printk("--> %lx (%d)", __entry->graph_ent.func, __entry->depth) @@ -62,7 +87,12 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry, TRACE_GRAPH_RET, F_STRUCT( - __field( struct ftrace_graph_ret, ret ) + __field_struct( struct ftrace_graph_ret, ret ) + __field_desc( unsigned long, ret, func ) + __field_desc( unsigned long long, ret, calltime) + __field_desc( unsigned long long, ret, rettime ) + __field_desc( unsigned long, ret, overrun ) + __field_desc( int, ret, depth ) ), F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d", @@ -218,7 +248,13 @@ FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, TRACE_MMIO_RW, F_STRUCT( - __field( struct mmiotrace_rw, rw ) + __field_struct( struct mmiotrace_rw, rw ) + __field_desc( resource_size_t, rw, phys ) + __field_desc( unsigned long, rw, value ) + __field_desc( unsigned long, rw, pc ) + __field_desc( int, rw, map_id ) + __field_desc( unsigned char, rw, opcode ) + __field_desc( unsigned char, rw, width ) ), F_printk("%lx %lx %lx %d %lx %lx", @@ -231,7 +267,12 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, TRACE_MMIO_MAP, F_STRUCT( - __field( struct mmiotrace_map, map ) + __field_struct( struct mmiotrace_map, map ) + __field_desc( resource_size_t, map, phys ) + __field_desc( unsigned long, map, virt ) + __field_desc( unsigned long, map, len ) + __field_desc( int, map, map_id ) + __field_desc( unsigned char, map, opcode ) ), F_printk("%lx %lx %lx %d %lx", @@ -244,7 +285,9 @@ FTRACE_ENTRY(boot_call, trace_boot_call, TRACE_BOOT_CALL, F_STRUCT( - __field( struct boot_trace_call, boot_call ) + __field_struct( struct boot_trace_call, boot_call ) + __field_desc( pid_t, boot_call, caller ) + __array_desc( char, boot_call, func, KSYM_SYMBOL_LEN) ), F_printk("%d %s", __entry->caller, __entry->func) @@ -255,7 +298,10 @@ FTRACE_ENTRY(boot_ret, trace_boot_ret, TRACE_BOOT_RET, F_STRUCT( - __field( struct boot_trace_ret, boot_ret ) + __field_struct( struct boot_trace_ret, boot_ret ) + __array_desc( char, boot_ret, func, KSYM_SYMBOL_LEN) + __field_desc( int, boot_ret, result ) + __field_desc( unsigned long, boot_ret, duration ) ), F_printk("%s %d %lx", @@ -298,7 +344,11 @@ FTRACE_ENTRY(power, trace_power, TRACE_POWER, F_STRUCT( - __field( struct power_trace, state_data ) + __field_struct( struct power_trace, state_data ) + __field_desc( s64, state_data, stamp ) + __field_desc( s64, state_data, end ) + __field_desc( int, state_data, type ) + __field_desc( int, state_data, state ) ), F_printk("%llx->%llx type:%u state:%u", -- cgit v1.1 From 4e5292ea1ac0c2939e815e6c44fad3d8696ea281 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 12 Sep 2009 19:26:21 -0400 Subject: tracing: use the new trace_entries.h to create format files This patch changes the way the format files in debugfs/tracing/events/ftrace/*/format are created. It uses the new trace_entries.h file to automate the creation of the format files to ensure that they are always in sync with the actual structures. This is the same methodology used to create the format files for the TRACE_EVENT macro. This also updates the filter creation that was built on the creation of the format files. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 12 ++- kernel/trace/trace_events.c | 1 + kernel/trace/trace_export.c | 241 +++++++++++++++++++++++--------------------- 3 files changed, 136 insertions(+), 118 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b0d287d..86bcff9 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -746,11 +747,12 @@ extern struct list_head ftrace_events; extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +#undef FTRACE_ENTRY +#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ extern struct ftrace_event_call event_##call; -#undef TRACE_EVENT_FORMAT_NOFILTER -#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt) -#include "trace_event_types.h" +#undef FTRACE_ENTRY_DUP +#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ + FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) +#include "trace_entries.h" #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f46d14c..adbed12 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -21,6 +21,7 @@ #include "trace_output.h" +#undef TRACE_SYSTEM #define TRACE_SYSTEM "TRACE_SYSTEM" DEFINE_MUTEX(event_mutex); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index df1bf6e..4cb29d8 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -15,82 +15,163 @@ #include "trace_output.h" +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ftrace -#undef TRACE_STRUCT -#define TRACE_STRUCT(args...) args +/* not needed for this file */ +#undef __field_struct +#define __field_struct(type, item) -extern void __bad_type_size(void); +#undef __field +#define __field(type, item) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%zu;\tsize:%zu;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (!ret) \ + return 0; -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign) \ - if (sizeof(type) != sizeof(field.item)) \ - __bad_type_size(); \ +#undef __field_desc +#define __field_desc(type, container, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ + "offset:%zu;\tsize:%zu;\n", \ + offsetof(typeof(field), container.item), \ + sizeof(field.container.item)); \ if (!ret) \ return 0; +#undef __array +#define __array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ + "offset:%zu;\tsize:%zu;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (!ret) \ + return 0; -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ - ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ +#undef __array_desc +#define __array_desc(type, container, item, len) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ + "offset:%zu;\tsize:%zu;\n", \ + offsetof(typeof(field), container.item), \ + sizeof(field.container.item)); \ if (!ret) \ return 0; -#undef TRACE_FIELD_ZERO_CHAR -#define TRACE_FIELD_ZERO_CHAR(item) \ - ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \ - "offset:%u;\tsize:0;\n", \ - (unsigned int)offsetof(typeof(field), item)); \ +#undef __dynamic_array +#define __dynamic_array(type, item) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%zu;\tsize:0;\n", \ + offsetof(typeof(field), item)); \ if (!ret) \ return 0; -#undef TRACE_FIELD_SIGN -#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ - TRACE_FIELD(type, item, assign) +#undef F_printk +#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) -#undef TP_RAW_FMT -#define TP_RAW_FMT(args...) args +#undef __entry +#define __entry REC -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +#undef FTRACE_ENTRY +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ static int \ -ftrace_format_##call(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ +ftrace_format_##name(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ { \ - struct args field; \ - int ret; \ + struct struct_name field __attribute__((unused)); \ + int ret = 0; \ \ tstruct; \ \ - trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ + trace_seq_printf(s, "\nprint fmt: " print); \ \ return ret; \ } -#undef TRACE_EVENT_FORMAT_NOFILTER -#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ - tpfmt) \ -static int \ -ftrace_format_##call(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ +#undef FTRACE_ENTRY_DUP +#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \ + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) + +#include "trace_entries.h" + + +#undef __field +#define __field(type, item) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), \ + is_signed_type(type), FILTER_OTHER); \ + if (ret) \ + return ret; + +#undef __field_desc +#define __field_desc(type, container, item) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), \ + container.item), \ + sizeof(field.container.item), \ + is_signed_type(type), FILTER_OTHER); \ + if (ret) \ + return ret; + +#undef __array +#define __array(type, item, len) \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), 0, FILTER_OTHER); \ + if (ret) \ + return ret; + +#undef __array_desc +#define __array_desc(type, container, item, len) \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + offsetof(typeof(field), \ + container.item), \ + sizeof(field.container.item), 0, \ + FILTER_OTHER); \ + if (ret) \ + return ret; + +#undef __dynamic_array +#define __dynamic_array(type, item) + +#undef FTRACE_ENTRY +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ +int \ +ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ { \ - struct args field; \ + struct struct_name field; \ int ret; \ \ - tstruct; \ + ret = trace_define_common_fields(event_call); \ + if (ret) \ + return ret; \ \ - trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ + tstruct; \ \ return ret; \ } -#include "trace_event_types.h" +#include "trace_entries.h" + + +#undef __field +#define __field(type, item) + +#undef __field_desc +#define __field_desc(type, container, item) + +#undef __array +#define __array(type, item, len) + +#undef __array_desc +#define __array_desc(type, container, item, len) + +#undef __dynamic_array +#define __dynamic_array(type, item) + #undef TRACE_ZERO_CHAR #define TRACE_ZERO_CHAR(arg) @@ -117,16 +198,15 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ #define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ cmd; -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ -int ftrace_define_fields_##call(struct ftrace_event_call *event_call); \ +#undef FTRACE_ENTRY +#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ static int ftrace_raw_init_event_##call(void); \ \ struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ - .id = proto, \ + .id = type, \ .system = __stringify(TRACE_SYSTEM), \ .raw_init = ftrace_raw_init_event_##call, \ .show_format = ftrace_format_##call, \ @@ -138,69 +218,4 @@ static int ftrace_raw_init_event_##call(void) \ return 0; \ } \ -#undef TRACE_EVENT_FORMAT_NOFILTER -#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ - tpfmt) \ - \ -struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ - .id = proto, \ - .system = __stringify(TRACE_SYSTEM), \ - .show_format = ftrace_format_##call, \ -}; - -#include "trace_event_types.h" - -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign) \ - ret = trace_define_field(event_call, #type, #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item), \ - is_signed_type(type), FILTER_OTHER); \ - if (ret) \ - return ret; - -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type, item, len, cmd) \ - ret = trace_define_field(event_call, #type "[" #len "]", #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item), 0, FILTER_OTHER); \ - if (ret) \ - return ret; - -#undef TRACE_FIELD_SIGN -#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ - ret = trace_define_field(event_call, #type, #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item), is_signed, \ - FILTER_OTHER); \ - if (ret) \ - return ret; - -#undef TRACE_FIELD_ZERO_CHAR -#define TRACE_FIELD_ZERO_CHAR(item) - -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ -int \ -ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ -{ \ - struct args field; \ - int ret; \ - \ - ret = trace_define_common_fields(event_call); \ - if (ret) \ - return ret; \ - \ - tstruct; \ - \ - return ret; \ -} - -#undef TRACE_EVENT_FORMAT_NOFILTER -#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ - tpfmt) - -#include "trace_event_types.h" +#include "trace_entries.h" -- cgit v1.1 From 51df5fcbc1296a84cf1c093c6cb56d40ca3e697e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 12 Sep 2009 20:29:22 -0400 Subject: tracing: remove trace_event_types.h The macros in trace_entries.h have made the code in trace_event_types.h obsolete. The file is no longer used, so this patch removes it. Signed-off-by: Steven Rostedt --- kernel/trace/trace_event_types.h | 178 --------------------------------------- 1 file changed, 178 deletions(-) delete mode 100644 kernel/trace/trace_event_types.h diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h deleted file mode 100644 index 6db005e..0000000 --- a/kernel/trace/trace_event_types.h +++ /dev/null @@ -1,178 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM ftrace - -/* - * We cheat and use the proto type field as the ID - * and args as the entry type (minus 'struct') - */ -TRACE_EVENT_FORMAT(function, TRACE_FN, ftrace_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, ip, ip) - TRACE_FIELD(unsigned long, parent_ip, parent_ip) - ), - TP_RAW_FMT(" %lx <-- %lx") -); - -TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT, - ftrace_graph_ent_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, graph_ent.func, func) - TRACE_FIELD(int, graph_ent.depth, depth) - ), - TP_RAW_FMT("--> %lx (%d)") -); - -TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET, - ftrace_graph_ret_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, ret.func, func) - TRACE_FIELD(unsigned long long, ret.calltime, calltime) - TRACE_FIELD(unsigned long long, ret.rettime, rettime) - TRACE_FIELD(unsigned long, ret.overrun, overrun) - TRACE_FIELD(int, ret.depth, depth) - ), - TP_RAW_FMT("<-- %lx (%d)") -); - -TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned int, prev_pid, prev_pid) - TRACE_FIELD(unsigned char, prev_prio, prev_prio) - TRACE_FIELD(unsigned char, prev_state, prev_state) - TRACE_FIELD(unsigned int, next_pid, next_pid) - TRACE_FIELD(unsigned char, next_prio, next_prio) - TRACE_FIELD(unsigned char, next_state, next_state) - TRACE_FIELD(unsigned int, next_cpu, next_cpu) - ), - TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") -); - -TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned int, prev_pid, prev_pid) - TRACE_FIELD(unsigned char, prev_prio, prev_prio) - TRACE_FIELD(unsigned char, prev_state, prev_state) - TRACE_FIELD(unsigned int, next_pid, next_pid) - TRACE_FIELD(unsigned char, next_prio, next_prio) - TRACE_FIELD(unsigned char, next_state, next_state) - TRACE_FIELD(unsigned int, next_cpu, next_cpu) - ), - TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") -); - -TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, arg1, arg1) - TRACE_FIELD(unsigned long, arg2, arg2) - TRACE_FIELD(unsigned long, arg3, arg3) - ), - TP_RAW_FMT("(%08lx) (%08lx) (%08lx)") -); - -/* - * Stack-trace entry: - */ - -/* #define FTRACE_STACK_ENTRIES 8 */ - -TRACE_EVENT_FORMAT(kernel_stack, TRACE_STACK, stack_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, caller[0], stack0) - TRACE_FIELD(unsigned long, caller[1], stack1) - TRACE_FIELD(unsigned long, caller[2], stack2) - TRACE_FIELD(unsigned long, caller[3], stack3) - TRACE_FIELD(unsigned long, caller[4], stack4) - TRACE_FIELD(unsigned long, caller[5], stack5) - TRACE_FIELD(unsigned long, caller[6], stack6) - TRACE_FIELD(unsigned long, caller[7], stack7) - ), - TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" - "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") -); - -TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, caller[0], stack0) - TRACE_FIELD(unsigned long, caller[1], stack1) - TRACE_FIELD(unsigned long, caller[2], stack2) - TRACE_FIELD(unsigned long, caller[3], stack3) - TRACE_FIELD(unsigned long, caller[4], stack4) - TRACE_FIELD(unsigned long, caller[5], stack5) - TRACE_FIELD(unsigned long, caller[6], stack6) - TRACE_FIELD(unsigned long, caller[7], stack7) - ), - TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" - "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") -); - -TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, ip, ip) - TRACE_FIELD(char *, fmt, fmt) - TRACE_FIELD_ZERO_CHAR(buf) - ), - TP_RAW_FMT("%08lx (%d) fmt:%p %s") -); - -TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, ip, ip) - TRACE_FIELD_ZERO_CHAR(buf) - ), - TP_RAW_FMT("%08lx (%d) fmt:%p %s") -); - -TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned int, line, line) - TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, - TRACE_FUNC_SIZE+1, func) - TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, - TRACE_FUNC_SIZE+1, file) - TRACE_FIELD(char, correct, correct) - ), - TP_RAW_FMT("%u:%s:%s (%u)") -); - -TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(u64, from, from) - TRACE_FIELD(u64, to, to) - ), - TP_RAW_FMT("from: %llx to: %llx") -); - -TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore, - TRACE_STRUCT( - TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1) - TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1) - TRACE_FIELD(int, state_data.type, type) - TRACE_FIELD(int, state_data.state, state) - ), - TP_RAW_FMT("%llx->%llx type:%u state:%u") -); - -TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id) - TRACE_FIELD(unsigned long, call_site, call_site) - TRACE_FIELD(const void *, ptr, ptr) - TRACE_FIELD(size_t, bytes_req, bytes_req) - TRACE_FIELD(size_t, bytes_alloc, bytes_alloc) - TRACE_FIELD(gfp_t, gfp_flags, gfp_flags) - TRACE_FIELD(int, node, node) - ), - TP_RAW_FMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu" - " flags:%x node:%d") -); - -TRACE_EVENT_FORMAT(kmem_free, TRACE_KMEM_FREE, kmemtrace_free_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id) - TRACE_FIELD(unsigned long, call_site, call_site) - TRACE_FIELD(const void *, ptr, ptr) - ), - TP_RAW_FMT("type:%u call_site:%lx ptr:%p") -); - -#undef TRACE_SYSTEM -- cgit v1.1 From 60ba77022712c7cda0eda286154bae160446b24a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 12 Sep 2009 23:34:04 -0400 Subject: tracing: add filter event logic to special, mmiotrace and boot tracers Now that the pluging tracers use macros to create the structures and automate the exporting of their formats to the format files, they also automatically get a filter file. This patch adds the code to implement the filter logic in the trace recordings. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 5 ++++- kernel/trace/trace_boot.c | 8 ++++++-- kernel/trace/trace_mmiotrace.c | 10 ++++++++-- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ef82a7f..fd52a19 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1206,6 +1206,7 @@ ftrace_trace_special(void *__tr, unsigned long arg1, unsigned long arg2, unsigned long arg3, int pc) { + struct ftrace_event_call *call = &event_special; struct ring_buffer_event *event; struct trace_array *tr = __tr; struct ring_buffer *buffer = tr->buffer; @@ -1219,7 +1220,9 @@ ftrace_trace_special(void *__tr, entry->arg1 = arg1; entry->arg2 = arg2; entry->arg3 = arg3; - trace_buffer_unlock_commit(buffer, event, 0, pc); + + if (!filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit(buffer, event, 0, pc); } void diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 19bfc75..c21d5f3 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -129,6 +129,7 @@ struct tracer boot_tracer __read_mostly = void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { + struct ftrace_event_call *call = &event_boot_call; struct ring_buffer_event *event; struct ring_buffer *buffer; struct trace_boot_call *entry; @@ -150,13 +151,15 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) goto out; entry = ring_buffer_event_data(event); entry->boot_call = *bt; - trace_buffer_unlock_commit(buffer, event, 0, 0); + if (!filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit(buffer, event, 0, 0); out: preempt_enable(); } void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { + struct ftrace_event_call *call = &event_boot_ret; struct ring_buffer_event *event; struct ring_buffer *buffer; struct trace_boot_ret *entry; @@ -175,7 +178,8 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) goto out; entry = ring_buffer_event_data(event); entry->boot_ret = *bt; - trace_buffer_unlock_commit(buffer, event, 0, 0); + if (!filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit(buffer, event, 0, 0); out: preempt_enable(); } diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index c4c9bbd..0acd834 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -307,6 +307,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, struct mmiotrace_rw *rw) { + struct ftrace_event_call *call = &event_mmiotrace_rw; struct ring_buffer *buffer = tr->buffer; struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; @@ -320,7 +321,9 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, } entry = ring_buffer_event_data(event); entry->rw = *rw; - trace_buffer_unlock_commit(buffer, event, 0, pc); + + if (!filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit(buffer, event, 0, pc); } void mmio_trace_rw(struct mmiotrace_rw *rw) @@ -334,6 +337,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data, struct mmiotrace_map *map) { + struct ftrace_event_call *call = &event_mmiotrace_map; struct ring_buffer *buffer = tr->buffer; struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; @@ -347,7 +351,9 @@ static void __trace_mmiotrace_map(struct trace_array *tr, } entry = ring_buffer_event_data(event); entry->map = *map; - trace_buffer_unlock_commit(buffer, event, 0, pc); + + if (!filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit(buffer, event, 0, pc); } void mmio_trace_mapping(struct mmiotrace_map *map) -- cgit v1.1 From 08a408161749d2406f94f4e3d47cfdbc826ad1cc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 14 Sep 2009 09:31:35 -0400 Subject: ring-buffer: typecast cmpxchg to fix PowerPC warning The cmpxchg used by PowerPC does the following: ({ \ __typeof__(*(ptr)) _o_ = (o); \ __typeof__(*(ptr)) _n_ = (n); \ (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ (unsigned long)_n_, sizeof(*(ptr))); \ }) This does a type check of *ptr to both o and n. Unfortunately, the code in ring-buffer.c assigns longs to pointers and pointers to longs and causes a warning on PowerPC: ring_buffer.c: In function 'rb_head_page_set': ring_buffer.c:704: warning: initialization makes pointer from integer without a cast ring_buffer.c:704: warning: initialization makes pointer from integer without a cast ring_buffer.c: In function 'rb_head_page_replace': ring_buffer.c:797: warning: initialization makes integer from pointer without a cast This patch adds the typecasts inside cmpxchg to annotate that a long is being cast to a pointer and a pointer is being casted to a long and this removes the PowerPC warnings. Reported-by: Stephen Rothwell Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 8786c35..6eef389 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -701,8 +701,8 @@ static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer, val &= ~RB_FLAG_MASK; - ret = (unsigned long)cmpxchg(&list->next, - val | old_flag, val | new_flag); + ret = cmpxchg((unsigned long *)&list->next, + val | old_flag, val | new_flag); /* check if the reader took the page */ if ((ret & ~RB_FLAG_MASK) != val) @@ -794,7 +794,7 @@ static int rb_head_page_replace(struct buffer_page *old, val = *ptr & ~RB_FLAG_MASK; val |= RB_PAGE_HEAD; - ret = cmpxchg(ptr, val, &new->list); + ret = cmpxchg(ptr, val, (unsigned long)&new->list); return ret == val; } -- cgit v1.1 From ec827c7ece8901044e6b3f92aeea489be9e1bcf7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 14 Sep 2009 10:50:23 -0400 Subject: tracing: add static to generated TRACE_EVENT functions Some of the generated functions used in the TRACE_EVENT macros are not declared static, but they are not global. Discovered by sparse. Reported-by: Jaswinder Singh Rajput Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 4 ++-- kernel/trace/trace_events.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 308bafd..fa8ce03 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -254,7 +254,7 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -enum print_line_t \ +static enum print_line_t \ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ { \ struct trace_seq *s = &iter->seq; \ @@ -317,7 +317,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ -int \ +static int \ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ struct ftrace_raw_##call field; \ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index adbed12..0fa8f9f 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1154,7 +1154,7 @@ static int trace_module_notify(struct notifier_block *self, } #endif /* CONFIG_MODULES */ -struct notifier_block trace_module_nb = { +static struct notifier_block trace_module_nb = { .notifier_call = trace_module_notify, .priority = 0, }; -- cgit v1.1 From a48f494e1dbdcf4fb7c02100ae3208c4c1daecbf Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 14 Sep 2009 11:18:02 -0400 Subject: tracing: have TRACE_EVENT macro use __flags to not shadow parameter The generated functions of TRACE_EVENT uses "flags" in one of the sub macros which shadows a parameter in the outside macro. Simple fix is to make the submacro use __flags instead. Discovered by sparse. Reported-by: Jaswinder Singh Rajput Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index fa8ce03..72a3b43 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -239,9 +239,9 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ #undef __print_flags #define __print_flags(flag, delim, flag_array...) \ ({ \ - static const struct trace_print_flags flags[] = \ + static const struct trace_print_flags __flags[] = \ { flag_array, { -1, NULL }}; \ - ftrace_print_flags_seq(p, delim, flag, flags); \ + ftrace_print_flags_seq(p, delim, flag, __flags); \ }) #undef __print_symbolic -- cgit v1.1 From c16de8fd7a608aba8708dd056cf6e4d9462e800a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 14 Sep 2009 15:51:39 +0800 Subject: tracing: fix F_printk() typos I found some typos in F_printk(), so I wrote compile-time check for it, and triggered some compile errors and warnings. I've fixed them on x86_32, but I have no x86_64 in my hand, so there may still be some compile warnings on 64bits. Signed-off-by: Li Zefan LKML-Reference: <4AADF60B.5070407@cn.fujitsu.com> [ tested on x86_64, and works fine ] Signed-off-by: Steven Rostedt --- kernel/trace/trace_entries.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index c866d34..0c10095 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -78,7 +78,7 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry, __field_desc( int, graph_ent, depth ) ), - F_printk("--> %lx (%d)", __entry->graph_ent.func, __entry->depth) + F_printk("--> %lx (%d)", __entry->func, __entry->depth) ); /* Function return entry */ @@ -97,8 +97,8 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry, F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d", __entry->func, __entry->depth, - __entry->calltime, __entry->rettim, - __entrty->depth) + __entry->calltime, __entry->rettime, + __entry->depth) ); /* @@ -133,7 +133,7 @@ FTRACE_ENTRY(context_switch, ctx_switch_entry, FTRACE_CTX_FIELDS ), - F_printk(b"%u:%u:%u ==> %u:%u:%u [%03u]", + F_printk("%u:%u:%u ==> %u:%u:%u [%03u]", __entry->prev_pid, __entry->prev_prio, __entry->prev_state, __entry->next_pid, __entry->next_prio, __entry->next_state, __entry->next_cpu @@ -257,8 +257,8 @@ FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, __field_desc( unsigned char, rw, width ) ), - F_printk("%lx %lx %lx %d %lx %lx", - __entry->phs, __entry->value, __entry->pc, + F_printk("%lx %lx %lx %d %x %x", + (unsigned long)__entry->phys, __entry->value, __entry->pc, __entry->map_id, __entry->opcode, __entry->width) ); @@ -275,8 +275,8 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, __field_desc( unsigned char, map, opcode ) ), - F_printk("%lx %lx %lx %d %lx", - __entry->phs, __entry->virt, __entry->len, + F_printk("%lx %lx %lx %d %x", + (unsigned long)__entry->phys, __entry->virt, __entry->len, __entry->map_id, __entry->opcode) ); @@ -370,7 +370,7 @@ FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry, __field( int, node ) ), - F_printk("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu" + F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi" " flags:%x node:%d", __entry->type_id, __entry->call_site, __entry->ptr, __entry->bytes_req, __entry->bytes_alloc, -- cgit v1.1 From 05ffa2d02066c2cb169c02d5417308ee8ecab638 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 14 Sep 2009 15:54:52 +0800 Subject: ftrace: add compile-time check on F_printk() Make sure F_printk() has corrent format and args, and make sure changes in F_STRUCT() won't break F_printk(). Signed-off-by: Li Zefan LKML-Reference: <4AADF6CC.1060809@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_export.c | 45 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4cb29d8..2435d55 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -23,6 +23,47 @@ #define __field_struct(type, item) #undef __field +#define __field(type, item) type item; + +#undef __field_desc +#define __field_desc(type, container, item) type item; + +#undef __array +#define __array(type, item, size) type item[size]; + +#undef __array_desc +#define __array_desc(type, container, item, size) type item[size]; + +#undef __dynamic_array +#define __dynamic_array(type, item) type item[]; + +#undef F_STRUCT +#define F_STRUCT(args...) args + +#undef F_printk +#define F_printk(fmt, args...) fmt, args + +#undef FTRACE_ENTRY +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ +struct ____ftrace_##name { \ + tstruct \ +}; \ +static void __used ____ftrace_check_##name(void) \ +{ \ + struct ____ftrace_##name *__entry = NULL; \ + \ + /* force cmpile-time check on F_printk() */ \ + printk(print); \ +} + +#undef FTRACE_ENTRY_DUP +#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \ + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) + +#include "trace_entries.h" + + +#undef __field #define __field(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ "offset:%zu;\tsize:%zu;\n", \ @@ -88,10 +129,6 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ return ret; \ } -#undef FTRACE_ENTRY_DUP -#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \ - FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) - #include "trace_entries.h" -- cgit v1.1 From 20a58a77231c5f5f61470932503b889303e8d4f3 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 14 Sep 2009 15:55:18 +0800 Subject: tracing: remove some unused macros - remove FTRACE_ENTRY_STRUCT_ONLY() - remove TRACE_XXX() macros Signed-off-by: Li Zefan LKML-Reference: <4AADF6E6.3080606@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_entries.h | 9 --------- kernel/trace/trace_export.c | 26 -------------------------- 2 files changed, 35 deletions(-) diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 0c10095..a431748 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -116,15 +116,6 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry, __field( unsigned char, next_state ) \ __field( unsigned int, next_cpu ) -#if 0 -FTRACE_ENTRY_STRUCT_ONLY(ctx_switch_entry, - - F_STRUCT( - FTRACE_CTX_FIELDS - ) -); -#endif - FTRACE_ENTRY(context_switch, ctx_switch_entry, TRACE_CTX, diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 2435d55..9753fcc 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -209,32 +209,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ #undef __dynamic_array #define __dynamic_array(type, item) - -#undef TRACE_ZERO_CHAR -#define TRACE_ZERO_CHAR(arg) - -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign)\ - entry->item = assign; - -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign)\ - entry->item = assign; - -#undef TRACE_FIELD_SIGN -#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ - TRACE_FIELD(type, item, assign) - -#undef TP_CMD -#define TP_CMD(cmd...) cmd - -#undef TRACE_ENTRY -#define TRACE_ENTRY entry - -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ - cmd; - #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ static int ftrace_raw_init_event_##call(void); \ -- cgit v1.1 From 1f5a6b45416694ff8c0d04625f1a438a0e380add Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 14 Sep 2009 11:58:24 -0400 Subject: tracing: make testing syscall events a separate configuration Parag noticed that the number of event tests has increased tremendously: grep "Testing event" dmesg.31rc9 |wc -l 100 grep "Testing event" dmesg.31git |wc -l 1172 This is due to the testing of every syscall event when ftrace self test is enabled. This adds a bit more time to kernel boot up and can affect development by slowing down the time it takes between reboots. This option makes the testing of the syscall events into a separate config, to still be able to test most of ftrace internals at boot up but not have to wait for all the syscall events to be tested. The syscall event testing only tests the enabling and disabling of the trace point, since the syscalls are not executed. What really needs to be done is to somehow have a userspace tool test the syscall tracepoints as well. Reported-by: Parag Warudkar LKML-Reference: Signed-off-by: Steven Rostedt --- kernel/trace/Kconfig | 12 ++++++++++++ kernel/trace/trace_events.c | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 1ea0d12..aa002ce 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -469,6 +469,18 @@ config FTRACE_STARTUP_TEST functioning properly. It will do tests on all the configured tracers of ftrace. +config EVENT_TRACE_TEST_SYSCALLS + bool "Run selftest on syscall events" + depends on FTRACE_STARTUP_TEST + help + This option will also enable testing every syscall event. + It only enables the event and disables it and runs various loads + with the event enabled. This adds a bit more time for kernel boot + up since it runs this on every system call defined. + + TBD - enable a way to actually call the syscalls as we test their + events + config MMIOTRACE bool "Memory mapped IO tracing" depends on HAVE_MMIOTRACE_SUPPORT && PCI diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0fa8f9f..787f0fb 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1326,6 +1326,18 @@ static __init void event_trace_self_tests(void) if (!call->regfunc) continue; +/* + * Testing syscall events here is pretty useless, but + * we still do it if configured. But this is time consuming. + * What we really need is a user thread to perform the + * syscalls as we test. + */ +#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS + if (call->system && + strcmp(call->system, "syscalls") == 0) + continue; +#endif + pr_info("Testing event %s: ", call->name); /* -- cgit v1.1 From 555f386c98cc93890f48fdea098936755270304b Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 14 Sep 2009 20:10:15 -0400 Subject: ftrace: document function and function graph implementation While implementing function tracer and function tracer graph support, I found the exact arch implementation details to be a bit lacking (and my x86 foo ain't great). So after pounding out support for the Blackfin arch, start documenting the requirements/details. Signed-off-by: Mike Frysinger LKML-Reference: <1252973415-21264-1-git-send-email-vapier@gentoo.org> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace-design.txt | 233 ++++++++++++++++++++++++++++++++++ Documentation/trace/ftrace.txt | 6 + kernel/trace/Kconfig | 16 ++- 3 files changed, 252 insertions(+), 3 deletions(-) create mode 100644 Documentation/trace/ftrace-design.txt diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt new file mode 100644 index 0000000..7003e10 --- /dev/null +++ b/Documentation/trace/ftrace-design.txt @@ -0,0 +1,233 @@ + function tracer guts + ==================== + +Introduction +------------ + +Here we will cover the architecture pieces that the common function tracing +code relies on for proper functioning. Things are broken down into increasing +complexity so that you can start simple and at least get basic functionality. + +Note that this focuses on architecture implementation details only. If you +want more explanation of a feature in terms of common code, review the common +ftrace.txt file. + + +Prerequisites +------------- + +Ftrace relies on these features being implemented: + STACKTRACE_SUPPORT - implement save_stack_trace() + TRACE_IRQFLAGS_SUPPORT - implement include/asm/irqflags.h + + +HAVE_FUNCTION_TRACER +-------------------- + +You will need to implement the mcount and the ftrace_stub functions. + +The exact mcount symbol name will depend on your toolchain. Some call it +"mcount", "_mcount", or even "__mcount". You can probably figure it out by +running something like: + $ echo 'main(){}' | gcc -x c -S -o - - -pg | grep mcount + call mcount +We'll make the assumption below that the symbol is "mcount" just to keep things +nice and simple in the examples. + +Keep in mind that the ABI that is in effect inside of the mcount function is +*highly* architecture/toolchain specific. We cannot help you in this regard, +sorry. Dig up some old documentation and/or find someone more familiar than +you to bang ideas off of. Typically, register usage (argument/scratch/etc...) +is a major issue at this point, especially in relation to the location of the +mcount call (before/after function prologue). You might also want to look at +how glibc has implemented the mcount function for your architecture. It might +be (semi-)relevant. + +The mcount function should check the function pointer ftrace_trace_function +to see if it is set to ftrace_stub. If it is, there is nothing for you to do, +so return immediately. If it isn't, then call that function in the same way +the mcount function normally calls __mcount_internal -- the first argument is +the "frompc" while the second argument is the "selfpc" (adjusted to remove the +size of the mcount call that is embedded in the function). + +For example, if the function foo() calls bar(), when the bar() function calls +mcount(), the arguments mcount() will pass to the tracer are: + "frompc" - the address bar() will use to return to foo() + "selfpc" - the address bar() (with _mcount() size adjustment) + +Also keep in mind that this mcount function will be called *a lot*, so +optimizing for the default case of no tracer will help the smooth running of +your system when tracing is disabled. So the start of the mcount function is +typically the bare min with checking things before returning. That also means +the code flow should usually kept linear (i.e. no branching in the nop case). +This is of course an optimization and not a hard requirement. + +Here is some pseudo code that should help (these functions should actually be +implemented in assembly): + +void ftrace_stub(void) +{ + return; +} + +void mcount(void) +{ + /* save any bare state needed in order to do initial checking */ + + extern void (*ftrace_trace_function)(unsigned long, unsigned long); + if (ftrace_trace_function != ftrace_stub) + goto do_trace; + + /* restore any bare state */ + + return; + +do_trace: + + /* save all state needed by the ABI (see paragraph above) */ + + unsigned long frompc = ...; + unsigned long selfpc = - MCOUNT_INSN_SIZE; + ftrace_trace_function(frompc, selfpc); + + /* restore all state needed by the ABI */ +} + +Don't forget to export mcount for modules ! +extern void mcount(void); +EXPORT_SYMBOL(mcount); + + +HAVE_FUNCTION_TRACE_MCOUNT_TEST +------------------------------- + +This is an optional optimization for the normal case when tracing is turned off +in the system. If you do not enable this Kconfig option, the common ftrace +code will take care of doing the checking for you. + +To support this feature, you only need to check the function_trace_stop +variable in the mcount function. If it is non-zero, there is no tracing to be +done at all, so you can return. + +This additional pseudo code would simply be: +void mcount(void) +{ + /* save any bare state needed in order to do initial checking */ + ++ if (function_trace_stop) ++ return; + + extern void (*ftrace_trace_function)(unsigned long, unsigned long); + if (ftrace_trace_function != ftrace_stub) +... + + +HAVE_FUNCTION_GRAPH_TRACER +-------------------------- + +Deep breath ... time to do some real work. Here you will need to update the +mcount function to check ftrace graph function pointers, as well as implement +some functions to save (hijack) and restore the return address. + +The mcount function should check the function pointers ftrace_graph_return +(compare to ftrace_stub) and ftrace_graph_entry (compare to +ftrace_graph_entry_stub). If either of those are not set to the relevant stub +function, call the arch-specific function ftrace_graph_caller which in turn +calls the arch-specific function prepare_ftrace_return. Neither of these +function names are strictly required, but you should use them anyways to stay +consistent across the architecture ports -- easier to compare & contrast +things. + +The arguments to prepare_ftrace_return are slightly different than what are +passed to ftrace_trace_function. The second argument "selfpc" is the same, +but the first argument should be a pointer to the "frompc". Typically this is +located on the stack. This allows the function to hijack the return address +temporarily to have it point to the arch-specific function return_to_handler. +That function will simply call the common ftrace_return_to_handler function and +that will return the original return address with which, you can return to the +original call site. + +Here is the updated mcount pseudo code: +void mcount(void) +{ +... + if (ftrace_trace_function != ftrace_stub) + goto do_trace; + ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++ extern void (*ftrace_graph_return)(...); ++ extern void (*ftrace_graph_entry)(...); ++ if (ftrace_graph_return != ftrace_stub || ++ ftrace_graph_entry != ftrace_graph_entry_stub) ++ ftrace_graph_caller(); ++#endif + + /* restore any bare state */ +... + +Here is the pseudo code for the new ftrace_graph_caller assembly function: +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void ftrace_graph_caller(void) +{ + /* save all state needed by the ABI */ + + unsigned long *frompc = &...; + unsigned long selfpc = - MCOUNT_INSN_SIZE; + prepare_ftrace_return(frompc, selfpc); + + /* restore all state needed by the ABI */ +} +#endif + +For information on how to implement prepare_ftrace_return(), simply look at +the x86 version. The only architecture-specific piece in it is the setup of +the fault recovery table (the asm(...) code). The rest should be the same +across architectures. + +Here is the pseudo code for the new return_to_handler assembly function. Note +that the ABI that applies here is different from what applies to the mcount +code. Since you are returning from a function (after the epilogue), you might +be able to skimp on things saved/restored (usually just registers used to pass +return values). + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void return_to_handler(void) +{ + /* save all state needed by the ABI (see paragraph above) */ + + void (*original_return_point)(void) = ftrace_return_to_handler(); + + /* restore all state needed by the ABI */ + + /* this is usually either a return or a jump */ + original_return_point(); +} +#endif + + +HAVE_FTRACE_NMI_ENTER +--------------------- + +If you can't trace NMI functions, then skip this option. + +
+ + +HAVE_FTRACE_SYSCALLS +--------------------- + +
+ + +HAVE_FTRACE_MCOUNT_RECORD +------------------------- + +See scripts/recordmcount.pl for more info. + +
+ + +HAVE_DYNAMIC_FTRACE +--------------------- + +
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 355d0f1..1b6292b 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -26,6 +26,12 @@ disabled, and more (ftrace allows for tracer plugins, which means that the list of tracers can always grow). +Implementation Details +---------------------- + +See ftrace-design.txt for details for arch porters and such. + + The File System --------------- diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index aa002ce..e716346 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -11,12 +11,18 @@ config NOP_TRACER config HAVE_FTRACE_NMI_ENTER bool + help + See Documentation/trace/ftrace-implementation.txt config HAVE_FUNCTION_TRACER bool + help + See Documentation/trace/ftrace-implementation.txt config HAVE_FUNCTION_GRAPH_TRACER bool + help + See Documentation/trace/ftrace-implementation.txt config HAVE_FUNCTION_GRAPH_FP_TEST bool @@ -28,21 +34,25 @@ config HAVE_FUNCTION_GRAPH_FP_TEST config HAVE_FUNCTION_TRACE_MCOUNT_TEST bool help - This gets selected when the arch tests the function_trace_stop - variable at the mcount call site. Otherwise, this variable - is tested by the called function. + See Documentation/trace/ftrace-implementation.txt config HAVE_DYNAMIC_FTRACE bool + help + See Documentation/trace/ftrace-implementation.txt config HAVE_FTRACE_MCOUNT_RECORD bool + help + See Documentation/trace/ftrace-implementation.txt config HAVE_HW_BRANCH_TRACER bool config HAVE_SYSCALL_TRACEPOINTS bool + help + See Documentation/trace/ftrace-implementation.txt config TRACER_MAX_TRACE bool -- cgit v1.1 From 983f2163e7fdf11a15e05816de243f93f07eafca Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 15 Sep 2009 12:29:20 +0200 Subject: MAINTAINERS: Update tracing tree details Acked-by: Steven Rostedt Acked-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- MAINTAINERS | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8dca9d8..1505129 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2118,13 +2118,16 @@ F: Documentation/filesystems/caching/ F: fs/fscache/ F: include/linux/fscache*.h -FTRACE +TRACING M: Steven Rostedt +M: Frederic Weisbecker +M: Ingo Molnar +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing/core S: Maintained F: Documentation/trace/ftrace.txt F: arch/*/*/*/ftrace.h F: arch/*/kernel/ftrace.c -F: include/*/ftrace.h +F: include/*/ftrace.h include/trace/ include/linux/trace*.h F: kernel/trace/ FUJITSU FR-V (FRV) PORT -- cgit v1.1 From 6ca6cca31ddc7cc8b1dc38b12d7593d2667defe8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 15 Sep 2009 12:24:22 -0400 Subject: tracing: optimize global_trace_clock cachelines The prev_trace_clock_time is only read or written to when the trace_clock_lock is taken. For better perfomance, they should share the same cache line. Reported-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- kernel/trace/trace_clock.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index b588fd8..20c5f92 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -66,10 +66,14 @@ u64 notrace trace_clock(void) * Used by plugins that need globally coherent timestamps. */ -static u64 prev_trace_clock_time; - -static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp = - (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; +/* keep prev_time and lock in the same cacheline. */ +static struct { + u64 prev_time; + raw_spinlock_t lock; +} trace_clock_struct ____cacheline_aligned_in_smp = + { + .lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED, + }; u64 notrace trace_clock_global(void) { @@ -88,19 +92,19 @@ u64 notrace trace_clock_global(void) if (unlikely(in_nmi())) goto out; - __raw_spin_lock(&trace_clock_lock); + __raw_spin_lock(&trace_clock_struct.lock); /* * TODO: if this happens often then maybe we should reset - * my_scd->clock to prev_trace_clock_time+1, to make sure + * my_scd->clock to prev_time+1, to make sure * we start ticking with the local clock from now on? */ - if ((s64)(now - prev_trace_clock_time) < 0) - now = prev_trace_clock_time + 1; + if ((s64)(now - trace_clock_struct.prev_time) < 0) + now = trace_clock_struct.prev_time + 1; - prev_trace_clock_time = now; + trace_clock_struct.prev_time = now; - __raw_spin_unlock(&trace_clock_lock); + __raw_spin_unlock(&trace_clock_struct.lock); out: raw_local_irq_restore(flags); -- cgit v1.1 From fe832a3a48737b24f95fab202b1c67fb588b071d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 15 Sep 2009 23:51:31 -0400 Subject: tracing: remove notrace from __kprobes annotation When ftrace had issues with NMIs, it was needed to annotate all the areas that kprobes had issues with notrace. Now that ftrace is NMI safe, the functions that limit ftrace from tracing are just a small few. Kprobes is too big of a set for ftrace not to trace. Remove the coupling. Signed-off-by: Steven Rostedt --- include/linux/kprobes.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index bcd9c07..3a46b7b 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -48,13 +48,13 @@ #define KPROBE_HIT_SSDONE 0x00000008 /* Attach to insert probes on any functions which should be ignored*/ -#define __kprobes __attribute__((__section__(".kprobes.text"))) notrace +#define __kprobes __attribute__((__section__(".kprobes.text"))) #else /* CONFIG_KPROBES */ typedef int kprobe_opcode_t; struct arch_specific_insn { int dummy; }; -#define __kprobes notrace +#define __kprobes #endif /* CONFIG_KPROBES */ struct kprobe; -- cgit v1.1 From b36461da2a0389149d7f88f3cbc05a30d1db9faa Mon Sep 17 00:00:00 2001 From: Atsushi Tsuji Date: Tue, 15 Sep 2009 19:06:30 +0900 Subject: tracing: Fix minor bugs for __unregister_ftrace_function_probe Fix the condition of strcmp for "*". Also fix NULL pointer dereference when glob is NULL. Signed-off-by: Atsushi Tsuji LKML-Reference: <4AAF6726.5090905@bk.jp.nec.com> Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8b23d56..f7ab7fc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2062,9 +2062,9 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, int i, len = 0; char *search; - if (glob && (strcmp(glob, "*") || !strlen(glob))) + if (glob && (strcmp(glob, "*") == 0 || !strlen(glob))) glob = NULL; - else { + else if (glob) { int not; type = ftrace_setup_glob(glob, strlen(glob), &search, ¬); -- cgit v1.1 From 91adcd2c4b104a8ce2973e6e84b01fd48735ffc6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 16 Sep 2009 20:03:06 -0400 Subject: vsprintf: add %ps that is the same as %pS but is like %pf On PowerPC64 function pointers do not point directly at the functions, but instead point to pointers to the functions. The output of %pF expects to point to a pointer to the function, whereas %pS will show the function itself. mcount returns the direct pointer to the function and not the pointer to the pointer. Thus %pS must be used to show this. The function tracer requires printing of the functions without offsets and uses the %pf instead. %pF produces run_local_timers+0x4/0x1f %pf produces just run_local_timers For PowerPC64, we need to use the direct pointer, and we only have %pS which will produce .run_local_timers+0x4/0x1f This patch creates a %ps that matches the %pf as %pS matches %pF. Cc: Linus Torvalds Cc: Zhao Lei Acked-by: Benjamin Herrenschmidt Signed-off-by: Steven Rostedt --- lib/vsprintf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index cb8a112..c8f3ed6 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -581,7 +581,7 @@ static char *symbol_string(char *buf, char *end, void *ptr, unsigned long value = (unsigned long) ptr; #ifdef CONFIG_KALLSYMS char sym[KSYM_SYMBOL_LEN]; - if (ext != 'f') + if (ext != 'f' && ext != 's') sprint_symbol(sym, value); else kallsyms_lookup(value, NULL, NULL, NULL, sym); @@ -822,6 +822,7 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'F': case 'f': ptr = dereference_function_descriptor(ptr); + case 's': /* Fallthrough */ case 'S': return symbol_string(buf, end, ptr, spec, *fmt); @@ -1063,7 +1064,8 @@ qualifier: * @args: Arguments for the format string * * This function follows C99 vsnprintf, but has some extensions: - * %pS output the name of a text symbol + * %pS output the name of a text symbol with offset + * %ps output the name of a text symbol without offset * %pF output the name of a function pointer with its offset * %pf output the name of a function pointer without its offset * %pR output the address range in a struct resource -- cgit v1.1 From b375a11a239e9e1cac40c7f3ff28b343d9f7ac51 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 17 Sep 2009 00:05:58 -0400 Subject: tracing: switch function prints from %pf to %ps For direct function pointers (like what mcount provides) PowerPC64 requires the use of %ps, otherwise nothing is printed. This patch converts all prints of functions retrieved through mcount to use the %ps format from the %pf. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 6 +++--- kernel/trace/trace_functions.c | 2 +- kernel/trace/trace_functions_graph.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f7ab7fc..cc615f8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1405,7 +1405,7 @@ static int t_hash_show(struct seq_file *m, void *v) if (rec->ops->print) return rec->ops->print(m, rec->ip, rec->ops, rec->data); - seq_printf(m, "%pf:%pf", (void *)rec->ip, (void *)rec->ops->func); + seq_printf(m, "%ps:%ps", (void *)rec->ip, (void *)rec->ops->func); if (rec->data) seq_printf(m, ":%p", rec->data); @@ -1515,7 +1515,7 @@ static int t_show(struct seq_file *m, void *v) if (!rec) return 0; - seq_printf(m, "%pf\n", (void *)rec->ip); + seq_printf(m, "%ps\n", (void *)rec->ip); return 0; } @@ -2456,7 +2456,7 @@ static int g_show(struct seq_file *m, void *v) return 0; } - seq_printf(m, "%pf\n", (void *)*ptr); + seq_printf(m, "%ps\n", (void *)*ptr); return 0; } diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 5b01b94..b3f3776 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -290,7 +290,7 @@ ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, { long count = (long)data; - seq_printf(m, "%pf:", (void *)ip); + seq_printf(m, "%ps:", (void *)ip); if (ops == &traceon_probe_ops) seq_printf(m, "traceon"); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 61f1667..45e6c01 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -124,7 +124,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, if (unlikely(current->ret_stack[index].fp != frame_pointer)) { ftrace_graph_stop(); WARN(1, "Bad frame pointer: expected %lx, received %lx\n" - " from func %pF return to %lx\n", + " from func %ps return to %lx\n", current->ret_stack[index].fp, frame_pointer, (void *)current->ret_stack[index].func, @@ -669,7 +669,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, return TRACE_TYPE_PARTIAL_LINE; } - ret = trace_seq_printf(s, "%pf();\n", (void *)call->func); + ret = trace_seq_printf(s, "%ps();\n", (void *)call->func); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -712,7 +712,7 @@ print_graph_entry_nested(struct trace_iterator *iter, return TRACE_TYPE_PARTIAL_LINE; } - ret = trace_seq_printf(s, "%pf() {\n", (void *)call->func); + ret = trace_seq_printf(s, "%ps() {\n", (void *)call->func); if (!ret) return TRACE_TYPE_PARTIAL_LINE; -- cgit v1.1 From 9a5963eb494c8a7be65199e15854551df7d2d939 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 16 Sep 2009 21:56:49 +0200 Subject: oprofile: fix oprofile regression: select RING_BUFFER_ALLOW_SWAP commit 85bac32c4a52c592b857f2c360cc5ec93a097d70 ring-buffer: only enable ring_buffer_swap_cpu when needed broke oprofile (at least on s390, but likely on all platforms). this patch lets oprofile select RING_BUFER_ALLOW_SWAP to make ring_buffer_swap_cpu usable for oprofile. Signed-off-by: Christian Borntraeger LKML-Reference: <200909162156.49239.borntraeger@de.ibm.com> Cc: Ingo Molnar Cc: Robert Richter Signed-off-by: Steven Rostedt --- arch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/Kconfig b/arch/Kconfig index beea3cc..7f418bb 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -9,6 +9,7 @@ config OPROFILE depends on TRACING_SUPPORT select TRACING select RING_BUFFER + select RING_BUFFER_ALLOW_SWAP help OProfile is a profiling system capable of profiling the whole system, include the kernel, kernel modules, libraries, -- cgit v1.1 From 5dd4de587fd9c25cb32a7a0fe9feec3647509b6f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 17 Sep 2009 17:38:32 +0800 Subject: softirq: add BLOCK_IOPOLL to softirq_to_name With BLOCK_IOPOLL_SOFTIRQ added, softirq_to_name[] and show_softirq_name() needs to be updated. Signed-off-by: Li Zefan LKML-Reference: <4AB20398.8070209@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/trace/events/irq.h | 21 +++++++++++---------- kernel/softirq.c | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 1cb0c3a..b89f9db 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -8,16 +8,17 @@ #include #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } -#define show_softirq_name(val) \ - __print_symbolic(val, \ - softirq_name(HI), \ - softirq_name(TIMER), \ - softirq_name(NET_TX), \ - softirq_name(NET_RX), \ - softirq_name(BLOCK), \ - softirq_name(TASKLET), \ - softirq_name(SCHED), \ - softirq_name(HRTIMER), \ +#define show_softirq_name(val) \ + __print_symbolic(val, \ + softirq_name(HI), \ + softirq_name(TIMER), \ + softirq_name(NET_TX), \ + softirq_name(NET_RX), \ + softirq_name(BLOCK), \ + softirq_name(BLOCK_IOPOLL), \ + softirq_name(TASKLET), \ + softirq_name(SCHED), \ + softirq_name(HRTIMER), \ softirq_name(RCU)) /** diff --git a/kernel/softirq.c b/kernel/softirq.c index 7db2506..f8749e5 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -57,7 +57,7 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); char *softirq_to_name[NR_SOFTIRQS] = { - "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", + "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", "TASKLET", "SCHED", "HRTIMER", "RCU" }; -- cgit v1.1 From 0efb4d20723d58edbad29d1ff98a86b631adb5e6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 17 Sep 2009 09:27:29 -0400 Subject: vsnprintf: remove duplicate comment of vsnprintf Remove the duplicate comment of bstr_printf that is the same as the vsnprintf. Add the 's' option to the comment for the pointer function. This is more of an internal function so the little duplication of the comment here is OK. Reported-by: Zhaolei Cc: Linus Torvalds Signed-off-by: Steven Rostedt --- lib/vsprintf.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index c8f3ed6..d320c18 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -794,7 +794,8 @@ static char *ip4_addr_string(char *buf, char *end, const u8 *addr, * * - 'F' For symbolic function descriptor pointers with offset * - 'f' For simple symbolic function names without offset - * - 'S' For symbolic direct pointers + * - 'S' For symbolic direct pointers with offset + * - 's' For symbolic direct pointers without offset * - 'R' For a struct resource pointer, it prints the range of * addresses (not the name nor the flags) * - 'M' For a 6-byte MAC address, it prints the address in the @@ -1069,6 +1070,7 @@ qualifier: * %pF output the name of a function pointer with its offset * %pf output the name of a function pointer without its offset * %pR output the address range in a struct resource + * %n is ignored * * The return value is the number of characters which would * be generated for the given input, excluding the trailing @@ -1524,11 +1526,7 @@ EXPORT_SYMBOL_GPL(vbin_printf); * a binary buffer that generated by vbin_printf. * * The format follows C99 vsnprintf, but has some extensions: - * %pS output the name of a text symbol - * %pF output the name of a function pointer with its offset - * %pf output the name of a function pointer without its offset - * %pR output the address range in a struct resource - * %n is ignored + * see vsnprintf comment for details. * * The return value is the number of characters which would * be generated for the given input, excluding the trailing -- cgit v1.1