From 6cc8a7c1d8560c042f486b23318a6291569ab96b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 19 Mar 2010 01:23:53 +0100 Subject: perf: Fetch hot regs from the template caller Trace events can be defined from a template using DECLARE_EVENT_CLASS/DEFINE_EVENT or directly with TRACE_EVENT. In both cases we have a template tracepoint handler, used to record the trace, to which we pass our ftrace event instance. In the function level, if the class is named "foo" and the event is named "blah", we have the following chain of calls: perf_trace_blah() -> perf_trace_templ_foo() In the case we have several events sharing the class "blah", we'll have multiple users of perf_trace_templ_foo(), and it won't be inlined by the compiler. This is usually what happens with the DECLARE_EVENT_CLASS/DEFINE_EVENT based definition. But if perf_trace_blah() is the only caller of perf_trace_templ_foo() there are fair chances that it will be inlined. The problem is that we fetch the regs from perf_trace_templ_foo() after we rewinded the frame pointer to the second caller, we want to reach the caller of perf_trace_blah() to get the right source of the event. And we do this by always assuming that perf_trace_templ_foo() is not inlined. But as shown above this is not always true. And if it is inlined we miss the first caller, losing the most important level of precision. We get: 61.31% ls [kernel.kallsyms] [k] do_softirq | --- do_softirq irq_exit do_IRQ common_interrupt | |--25.00%-- tty_buffer_request_room Instead of: 61.31% ls [kernel.kallsyms] [k] __do_softirq | --- __do_softirq do_softirq irq_exit do_IRQ common_interrupt | |--25.00%-- tty_buffer_request_room To fix this, we fetch the regs from perf_trace_blah() rather than perf_trace_templ_foo() so that we don't have to deal with inlining surprises. That also bring us the advantage of having the true source of the event even if we don't have frame pointers. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Ingo Molnar --- include/trace/ftrace.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'include/trace') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index ea6f9d4..882c648 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -758,13 +758,12 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static notrace void \ perf_trace_templ_##call(struct ftrace_event_call *event_call, \ - proto) \ + struct pt_regs *__regs, proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ - struct pt_regs *__regs; \ int __entry_size; \ int __data_size; \ int rctx; \ @@ -785,20 +784,22 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \ \ { assign; } \ \ - __regs = &__get_cpu_var(perf_trace_regs); \ - perf_fetch_caller_regs(__regs, 2); \ - \ perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ __count, irq_flags, __regs); \ } #undef DEFINE_EVENT -#define DEFINE_EVENT(template, call, proto, args) \ -static notrace void perf_trace_##call(proto) \ -{ \ - struct ftrace_event_call *event_call = &event_##call; \ - \ - perf_trace_templ_##template(event_call, args); \ +#define DEFINE_EVENT(template, call, proto, args) \ +static notrace void perf_trace_##call(proto) \ +{ \ + struct ftrace_event_call *event_call = &event_##call; \ + struct pt_regs *__regs = &get_cpu_var(perf_trace_regs); \ + \ + perf_fetch_caller_regs(__regs, 1); \ + \ + perf_trace_templ_##template(event_call, __regs, args); \ + \ + put_cpu_var(perf_trace_regs); \ } #undef DEFINE_EVENT_PRINT -- cgit v1.1 From 883a2a3189dae9d2912c417e47152f51cb922a3f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 8 May 2010 06:16:11 +0200 Subject: tracing: Drop lock_acquired waittime field Drop the waittime field from the lock_acquired event, we can calculate it by substracting the lock_acquired event timestamp with the matching lock_acquire one. It is not needed and takes useless space in the traces. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Hitoshi Mitake Cc: Steven Rostedt --- include/trace/events/lock.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h index 5c1dcfc..17ca287 100644 --- a/include/trace/events/lock.h +++ b/include/trace/events/lock.h @@ -78,24 +78,21 @@ TRACE_EVENT(lock_contended, ); TRACE_EVENT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip, waittime), + TP_ARGS(lock, ip), TP_STRUCT__entry( __string(name, lock->name) - __field(s64, wait_nsec) __field(void *, lockdep_addr) ), TP_fast_assign( __assign_str(name, lock->name); - __entry->wait_nsec = waittime; __entry->lockdep_addr = lock; ), - TP_printk("%p %s (%llu ns)", __entry->lockdep_addr, - __get_str(name), - __entry->wait_nsec) + TP_printk("%p %s", __entry->lockdep_addr, + __get_str(name)) ); #endif -- cgit v1.1 From 93135439459920c4d856f4ab8f068c030085c8df Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 8 May 2010 06:24:25 +0200 Subject: tracing: Drop the nested field from lock_release event Drop the nested field as we don't use it. Every nested state can be computed from a state machine on post processing already. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Hitoshi Mitake Cc: Steven Rostedt --- include/trace/events/lock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h index 17ca287..fde4c38 100644 --- a/include/trace/events/lock.h +++ b/include/trace/events/lock.h @@ -37,9 +37,9 @@ TRACE_EVENT(lock_acquire, TRACE_EVENT(lock_release, - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, nested, ip), + TP_ARGS(lock, ip), TP_STRUCT__entry( __string(name, lock->name) -- cgit v1.1 From 2c193c736803ceb547daec725e5c5d992d039f20 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 8 May 2010 06:36:02 +0200 Subject: tracing: Factorize lock events in a lock class lock_acquired, lock_contended and lock_release now share the same prototype and format. Let's factorize them into a lock event class. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Hitoshi Mitake Cc: Steven Rostedt --- include/trace/events/lock.h | 48 ++++++++++++++------------------------------- 1 file changed, 15 insertions(+), 33 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h index fde4c38..2821b86 100644 --- a/include/trace/events/lock.h +++ b/include/trace/events/lock.h @@ -35,15 +35,15 @@ TRACE_EVENT(lock_acquire, __get_str(name)) ); -TRACE_EVENT(lock_release, +DECLARE_EVENT_CLASS(lock, TP_PROTO(struct lockdep_map *lock, unsigned long ip), TP_ARGS(lock, ip), TP_STRUCT__entry( - __string(name, lock->name) - __field(void *, lockdep_addr) + __string( name, lock->name ) + __field( void *, lockdep_addr ) ), TP_fast_assign( @@ -51,48 +51,30 @@ TRACE_EVENT(lock_release, __entry->lockdep_addr = lock; ), - TP_printk("%p %s", - __entry->lockdep_addr, __get_str(name)) + TP_printk("%p %s", __entry->lockdep_addr, __get_str(name)) ); -#ifdef CONFIG_LOCK_STAT - -TRACE_EVENT(lock_contended, +DEFINE_EVENT(lock, lock_release, TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - - TP_STRUCT__entry( - __string(name, lock->name) - __field(void *, lockdep_addr) - ), + TP_ARGS(lock, ip) +); - TP_fast_assign( - __assign_str(name, lock->name); - __entry->lockdep_addr = lock; - ), +#ifdef CONFIG_LOCK_STAT - TP_printk("%p %s", - __entry->lockdep_addr, __get_str(name)) -); +DEFINE_EVENT(lock, lock_contended, -TRACE_EVENT(lock_acquired, TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), + TP_ARGS(lock, ip) +); - TP_STRUCT__entry( - __string(name, lock->name) - __field(void *, lockdep_addr) - ), +DEFINE_EVENT(lock, lock_acquired, - TP_fast_assign( - __assign_str(name, lock->name); - __entry->lockdep_addr = lock; - ), - TP_printk("%p %s", __entry->lockdep_addr, - __get_str(name)) + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + + TP_ARGS(lock, ip) ); #endif -- cgit v1.1