From ad71d889b88055e61e3970a6744a271a51a94f42 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 30 Apr 2013 15:46:14 -0400 Subject: tracing: Add function probe to trigger a ftrace dump to console Add the "dump" command to have the ftrace buffer dumped to console if a function is hit. This is useful when debugging a tripple fault, where you have an idea of a function that is called just before the tripple fault occurs, and can tell ftrace to dump its content out to the console before it continues. Format is: :dump echo 'bad_address:dump' > /debug/tracing/set_ftrace_filter To remove this: echo '!bad_address:dump' > /debug/tracing/set_ftrace_filter Requested-by: Luis Claudio R. Goncalves Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace.txt | 7 +++++ kernel/trace/trace_functions.c | 59 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index bfe8c29..cc9ec57 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -2430,6 +2430,13 @@ The following commands are supported: echo '!schedule:disable_event:sched:sched_switch' > \ set_ftrace_filter +- dump + When the function is hit, it will dump the contents of the ftrace + ring buffer to the console. This is useful if you need to debug + something, and want to dump the trace when a certain function + is hit. Perhaps its a function that is called before a tripple + fault happens and does not allow you to get a regular dump. + trace_pipe ---------- diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index c4d6d71..d7c8719 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -290,6 +290,13 @@ ftrace_stacktrace_count(unsigned long ip, unsigned long parent_ip, void **data) trace_dump_stack(STACK_SKIP); } +static void +ftrace_dump_probe(unsigned long ip, unsigned long parent_ip, void **data) +{ + if (update_count(data)) + ftrace_dump(DUMP_ALL); +} + static int ftrace_probe_print(const char *name, struct seq_file *m, unsigned long ip, void *data) @@ -327,6 +334,13 @@ ftrace_stacktrace_print(struct seq_file *m, unsigned long ip, return ftrace_probe_print("stacktrace", m, ip, data); } +static int +ftrace_dump_print(struct seq_file *m, unsigned long ip, + struct ftrace_probe_ops *ops, void *data) +{ + return ftrace_probe_print("dump", m, ip, data); +} + static struct ftrace_probe_ops traceon_count_probe_ops = { .func = ftrace_traceon_count, .print = ftrace_traceon_print, @@ -342,6 +356,11 @@ static struct ftrace_probe_ops stacktrace_count_probe_ops = { .print = ftrace_stacktrace_print, }; +static struct ftrace_probe_ops dump_probe_ops = { + .func = ftrace_dump_probe, + .print = ftrace_dump_print, +}; + static struct ftrace_probe_ops traceon_probe_ops = { .func = ftrace_traceon, .print = ftrace_traceon_print, @@ -425,6 +444,19 @@ ftrace_stacktrace_callback(struct ftrace_hash *hash, param, enable); } +static int +ftrace_dump_callback(struct ftrace_hash *hash, + char *glob, char *cmd, char *param, int enable) +{ + struct ftrace_probe_ops *ops; + + ops = &dump_probe_ops; + + /* Only dump once. */ + return ftrace_trace_probe_callback(ops, hash, glob, cmd, + "1", enable); +} + static struct ftrace_func_command ftrace_traceon_cmd = { .name = "traceon", .func = ftrace_trace_onoff_callback, @@ -440,6 +472,11 @@ static struct ftrace_func_command ftrace_stacktrace_cmd = { .func = ftrace_stacktrace_callback, }; +static struct ftrace_func_command ftrace_dump_cmd = { + .name = "dump", + .func = ftrace_dump_callback, +}; + static int __init init_func_cmd_traceon(void) { int ret; @@ -450,13 +487,25 @@ static int __init init_func_cmd_traceon(void) ret = register_ftrace_command(&ftrace_traceon_cmd); if (ret) - unregister_ftrace_command(&ftrace_traceoff_cmd); + goto out_free_traceoff; ret = register_ftrace_command(&ftrace_stacktrace_cmd); - if (ret) { - unregister_ftrace_command(&ftrace_traceoff_cmd); - unregister_ftrace_command(&ftrace_traceon_cmd); - } + if (ret) + goto out_free_traceon; + + ret = register_ftrace_command(&ftrace_dump_cmd); + if (ret) + goto out_free_stacktrace; + + return 0; + + out_free_stacktrace: + unregister_ftrace_command(&ftrace_stacktrace_cmd); + out_free_traceon: + unregister_ftrace_command(&ftrace_traceon_cmd); + out_free_traceoff: + unregister_ftrace_command(&ftrace_traceoff_cmd); + return ret; } #else -- cgit v1.1 From 90e3c03c3a09a7b176b3fe59d78f5d9755ac8e37 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 30 Apr 2013 19:00:46 -0400 Subject: tracing: Add function probe to trigger a ftrace dump of current CPU trace Add the "cpudump" command to have the current CPU ftrace buffer dumped to console if a function is hit. This is useful when debugging a tripple fault, where you have an idea of a function that is called just before the tripple fault occurs, and can tell ftrace to dump its content out to the console before it continues. This differs from the "dump" command as it only dumps the content of the ring buffer for the currently executing CPU, and does not show the contents of the other CPUs. Format is: :cpudump echo 'bad_address:cpudump' > /debug/tracing/set_ftrace_filter To remove this: echo '!bad_address:cpudump' > /debug/tracing/set_ftrace_filter Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace.txt | 6 ++++++ kernel/trace/trace_functions.c | 44 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index cc9ec57..b937c6e 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -2437,6 +2437,12 @@ The following commands are supported: is hit. Perhaps its a function that is called before a tripple fault happens and does not allow you to get a regular dump. +- cpudump + When the function is hit, it will dump the contents of the ftrace + ring buffer for the current CPU to the console. Unlike the "dump" + command, it only prints out the contents of the ring buffer for the + CPU that executed the function that triggered the dump. + trace_pipe ---------- diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index d7c8719..b863f93 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -297,6 +297,14 @@ ftrace_dump_probe(unsigned long ip, unsigned long parent_ip, void **data) ftrace_dump(DUMP_ALL); } +/* Only dump the current CPU buffer. */ +static void +ftrace_cpudump_probe(unsigned long ip, unsigned long parent_ip, void **data) +{ + if (update_count(data)) + ftrace_dump(DUMP_ORIG); +} + static int ftrace_probe_print(const char *name, struct seq_file *m, unsigned long ip, void *data) @@ -341,6 +349,13 @@ ftrace_dump_print(struct seq_file *m, unsigned long ip, return ftrace_probe_print("dump", m, ip, data); } +static int +ftrace_cpudump_print(struct seq_file *m, unsigned long ip, + struct ftrace_probe_ops *ops, void *data) +{ + return ftrace_probe_print("cpudump", m, ip, data); +} + static struct ftrace_probe_ops traceon_count_probe_ops = { .func = ftrace_traceon_count, .print = ftrace_traceon_print, @@ -361,6 +376,11 @@ static struct ftrace_probe_ops dump_probe_ops = { .print = ftrace_dump_print, }; +static struct ftrace_probe_ops cpudump_probe_ops = { + .func = ftrace_cpudump_probe, + .print = ftrace_cpudump_print, +}; + static struct ftrace_probe_ops traceon_probe_ops = { .func = ftrace_traceon, .print = ftrace_traceon_print, @@ -457,6 +477,19 @@ ftrace_dump_callback(struct ftrace_hash *hash, "1", enable); } +static int +ftrace_cpudump_callback(struct ftrace_hash *hash, + char *glob, char *cmd, char *param, int enable) +{ + struct ftrace_probe_ops *ops; + + ops = &cpudump_probe_ops; + + /* Only dump once. */ + return ftrace_trace_probe_callback(ops, hash, glob, cmd, + "1", enable); +} + static struct ftrace_func_command ftrace_traceon_cmd = { .name = "traceon", .func = ftrace_trace_onoff_callback, @@ -477,6 +510,11 @@ static struct ftrace_func_command ftrace_dump_cmd = { .func = ftrace_dump_callback, }; +static struct ftrace_func_command ftrace_cpudump_cmd = { + .name = "cpudump", + .func = ftrace_cpudump_callback, +}; + static int __init init_func_cmd_traceon(void) { int ret; @@ -497,8 +535,14 @@ static int __init init_func_cmd_traceon(void) if (ret) goto out_free_stacktrace; + ret = register_ftrace_command(&ftrace_cpudump_cmd); + if (ret) + goto out_free_dump; + return 0; + out_free_dump: + unregister_ftrace_command(&ftrace_dump_cmd); out_free_stacktrace: unregister_ftrace_command(&ftrace_stacktrace_cmd); out_free_traceon: -- cgit v1.1 From 8092e808a31839c502a52d391b15f31c1d8764f5 Mon Sep 17 00:00:00 2001 From: Harsh Prateek Bora Date: Fri, 24 May 2013 12:52:17 +0530 Subject: tracing/trivial: Consolidate error return condition Consolidate the checks for !enabled and !param to return -EINVAL in event_enable_func(). Link: http://lkml.kernel.org/r/1369380137-12452-1-git-send-email-harsh@linux.vnet.ibm.com Signed-off-by: Harsh Prateek Bora Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 27963e2..db086f1 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2011,10 +2011,7 @@ event_enable_func(struct ftrace_hash *hash, int ret; /* hash funcs only work with set_ftrace_filter */ - if (!enabled) - return -EINVAL; - - if (!param) + if (!enabled || !param) return -EINVAL; system = strsep(¶m, ":"); -- cgit v1.1 From 238ae93d699d59876b470bf6455de22bcfaa9a1b Mon Sep 17 00:00:00 2001 From: Wang YanQing Date: Sun, 26 May 2013 16:52:01 +0800 Subject: tracing: Fix file mode of free_buffer Commit 4f271a2a60c748599b30bb4dafff30d770439b96 (tracing: Add a proc file to stop tracing and free buffer) implement a method to free up ring buffer in kernel memory in the release code path of free_buffer's fd. Then we don't need read/write support for free_buffer, indeed we just have a dummy write fop, and don't implement read fop. So the 0200 is more reasonable file mode for free_buffer than the current file mode 0644. Link: http://lkml.kernel.org/r/20130526085201.GA3183@udknight Acked-by: Vaibhav Nagarnaik Acked-by: David Sharp Signed-off-by: Wang YanQing Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1a41023..5f4a09c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5935,7 +5935,7 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) trace_create_file("buffer_total_size_kb", 0444, d_tracer, tr, &tracing_total_entries_fops); - trace_create_file("free_buffer", 0644, d_tracer, + trace_create_file("free_buffer", 0200, d_tracer, tr, &tracing_free_buffer_fops); trace_create_file("trace_marker", 0220, d_tracer, -- cgit v1.1 From 7614c3dc74733dff4b0e774f7a894b9ea6ec508c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 28 May 2013 20:01:16 -0400 Subject: ftrace: Use schedule_on_each_cpu() as a heavy synchronize_sched() The function tracer uses preempt_disable/enable_notrace() for synchronization between reading registered ftrace_ops and unregistering them. Most of the ftrace_ops are global permanent structures that do not require this synchronization. That is, ops may be added and removed from the hlist but are never freed, and wont hurt if a synchronization is missed. But this is not true for dynamically created ftrace_ops or control_ops, which are used by the perf function tracing. The problem here is that the function tracer can be used to trace kernel/user context switches as well as going to and from idle. Basically, it can be used to trace blind spots of the RCU subsystem. This means that even though preempt_disable() is done, a synchronize_sched() will ignore CPUs that haven't made it out of user space or idle. These can include functions that are being traced just before entering or exiting the kernel sections. To implement the RCU synchronization, instead of using synchronize_sched() the use of schedule_on_each_cpu() is performed. This means that when a dynamically allocated ftrace_ops, or a control ops is being unregistered, all CPUs must be touched and execute a ftrace_sync() stub function via the work queues. This will rip CPUs out from idle or in dynamic tick mode. This only happens when a user disables perf function tracing or other dynamically allocated function tracers, but it allows us to continue to debug RCU and context tracking with function tracing. Link: http://lkml.kernel.org/r/1369785676.15552.55.camel@gandalf.local.home Cc: "Paul E. McKenney" Cc: Tejun Heo Cc: Ingo Molnar Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Acked-by: Paul E. McKenney Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6c508ff..800a8a2 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -413,6 +413,17 @@ static int __register_ftrace_function(struct ftrace_ops *ops) return 0; } +static void ftrace_sync(struct work_struct *work) +{ + /* + * This function is just a stub to implement a hard force + * of synchronize_sched(). This requires synchronizing + * tasks even in userspace and idle. + * + * Yes, function tracing is rude. + */ +} + static int __unregister_ftrace_function(struct ftrace_ops *ops) { int ret; @@ -440,8 +451,12 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) * so there'll be no new users. We must ensure * all current users are done before we free * the control data. + * Note synchronize_sched() is not enough, as we + * use preempt_disable() to do RCU, but the function + * tracer can be called where RCU is not active + * (before user_exit()). */ - synchronize_sched(); + schedule_on_each_cpu(ftrace_sync); control_ops_free(ops); } } else @@ -456,9 +471,13 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) /* * Dynamic ops may be freed, we must make sure that all * callers are done before leaving this function. + * + * Again, normal synchronize_sched() is not good enough. + * We need to do a hard force of sched synchronization. */ if (ops->flags & FTRACE_OPS_FL_DYNAMIC) - synchronize_sched(); + schedule_on_each_cpu(ftrace_sync); + return 0; } -- cgit v1.1 From 1b3d0623cd9da35fe1d52281ed06c2ff543cd660 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 7 Jun 2013 17:02:53 +0800 Subject: ftrace: Remove ftrace_regex_lseek() This is a leftover after ftrace_regex_lseek() was renamed to ftrace_filter_lseek() and then ftrace_filter_lseek() was moved out side of CONFIG_DYNAMIC_FTRACE. Link: http://lkml.kernel.org/r/51B1A1BD.40905@huawei.com Signed-off-by: Li Zefan Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 99d0fbc..e48ed1d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -566,10 +566,6 @@ static inline ssize_t ftrace_filter_write(struct file *file, const char __user * size_t cnt, loff_t *ppos) { return -ENODEV; } static inline ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { return -ENODEV; } -static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int whence) -{ - return -ENODEV; -} static inline int ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; } #endif /* CONFIG_DYNAMIC_FTRACE */ -- cgit v1.1 From aaf6ac0f0871cb7fc0f28f3a00edf329bc7adc29 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 7 Jun 2013 15:07:48 +0900 Subject: tracing: Do not call kmem_cache_free() on allocation failure There's no point calling it when _alloc() failed. Link: http://lkml.kernel.org/r/1370585268-29169-1-git-send-email-namhyung@kernel.org Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index db086f1..f57b015 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -97,7 +97,7 @@ static int __trace_define_field(struct list_head *head, const char *type, field = kmem_cache_alloc(field_cachep, GFP_TRACE); if (!field) - goto err; + return -ENOMEM; field->name = name; field->type = type; @@ -114,11 +114,6 @@ static int __trace_define_field(struct list_head *head, const char *type, list_add(&field->link, head); return 0; - -err: - kmem_cache_free(field_cachep, field); - - return -ENOMEM; } int trace_define_field(struct ftrace_event_call *call, const char *type, -- cgit v1.1 From 1a891cf19cdfb645827969cc6aeaeebdefeb87b2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Jun 2013 13:16:25 -0400 Subject: tracing: Add binary '&' filter for events There are some cases when filtering on a set flag of a field of a tracepoint is useful. But currently the only filtering commands for numbered fields is ==, !=, <, <=, >, >=. This does not help when you just want to trace if a specific flag is set. For example: > # sudo trace-cmd record -e brcmfmac:brcmf_dbg -f 'level & 0x40000' > disable all > enable brcmfmac:brcmf_dbg > path = /sys/kernel/debug/tracing/events/brcmfmac/brcmf_dbg/enable > (level & 0x40000) > ^ > parse_error: Invalid operator > When trying to trace brcmf_dbg when level has its 1 << 18 bit set, the filter fails to perform. By allowing a binary '&' operation, this gives the user the ability to test a bit. Note, a binary '|' is not added, as it doesn't make sense as fields must be compared to constants (for now), and ORing a constant will always return true. Link: http://lkml.kernel.org/r/1371057385.9844.261.camel@gandalf.local.home Suggested-by: Arend van Spriel Tested-by: Arend van Spriel Signed-off-by: Steven Rostedt --- Documentation/trace/events.txt | 2 +- kernel/trace/trace_events_filter.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt index bb24c2a0e..4191124 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -183,7 +183,7 @@ The relational-operators depend on the type of the field being tested: The operators available for numeric fields are: -==, !=, <, <=, >, >= +==, !=, <, <=, >, >=, & And for string fields they are: diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e1b653f..0d883dc 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -44,6 +44,7 @@ enum filter_op_ids OP_LE, OP_GT, OP_GE, + OP_BAND, OP_NONE, OP_OPEN_PAREN, }; @@ -54,6 +55,7 @@ struct filter_op { int precedence; }; +/* Order must be the same as enum filter_op_ids above */ static struct filter_op filter_ops[] = { { OP_OR, "||", 1 }, { OP_AND, "&&", 2 }, @@ -64,6 +66,7 @@ static struct filter_op filter_ops[] = { { OP_LE, "<=", 5 }, { OP_GT, ">", 5 }, { OP_GE, ">=", 5 }, + { OP_BAND, "&", 6 }, { OP_NONE, "OP_NONE", 0 }, { OP_OPEN_PAREN, "(", 0 }, }; @@ -156,6 +159,9 @@ static int filter_pred_##type(struct filter_pred *pred, void *event) \ case OP_GE: \ match = (*addr >= val); \ break; \ + case OP_BAND: \ + match = (*addr & val); \ + break; \ default: \ break; \ } \ -- cgit v1.1 From c3e13c7c0605677a2c94957b39157f4501cea9a8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 17 Jun 2013 10:59:17 -0400 Subject: tracing: Update documentation on tracepoint glob matching b0f1a59a "tracing/filters: Use a different op for glob match" added glob matching to tracepoint filter strings. It uses the ftrace function tracing glob matching facility that allows for the wild card character (*) to be used at the start and/or end of the matching string. But the documentation still states that the filtering only allows for exact string matches. Cc: Li Zefan Signed-off-by: Steven Rostedt --- Documentation/trace/events.txt | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt index 4191124..37732a2 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -187,9 +187,18 @@ The operators available for numeric fields are: And for string fields they are: -==, != +==, !=, ~ -Currently, only exact string matches are supported. +The glob (~) only accepts a wild card character (*) at the start and or +end of the string. For example: + + prev_comm ~ "*sh" + prev_comm ~ "sh*" + prev_comm ~ "*sh*" + +But does not allow for it to be within the string: + + prev_comm ~ "ba*sh" <-- is invalid 5.2 Setting filters ------------------- -- cgit v1.1 From de7edd31457b626e54a0b2a7e8ff4d65492f01ad Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 14 Jun 2013 16:21:43 -0400 Subject: tracing: Disable tracing on warning Add a traceoff_on_warning option in both the kernel command line as well as a sysctl option. When set, any WARN*() function that is hit will cause the tracing_on variable to be cleared, which disables writing to the ring buffer. This is useful especially when tracing a bug with function tracing. When a warning is hit, the print caused by the warning can flood the trace with the functions that producing the output for the warning. This can make the resulting trace useless by either hiding where the bug happened, or worse, by overflowing the buffer and losing the trace of the bug totally. Acked-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- Documentation/kernel-parameters.txt | 13 +++++++++++++ include/linux/ftrace.h | 5 +++++ kernel/panic.c | 3 +++ kernel/sysctl.c | 7 +++++++ kernel/trace/trace.c | 17 +++++++++++++++++ 5 files changed, 45 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6e3b18a..729d0b9 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3069,6 +3069,19 @@ bytes respectively. Such letter suffixes can also be entirely omitted. See also Documentation/trace/ftrace.txt "trace options" section. + traceoff_on_warning + [FTRACE] enable this option to disable tracing when a + warning is hit. This turns off "tracing_on". Tracing can + be enabled again by echoing '1' into the "tracing_on" + file located in /sys/kernel/debug/tracing/ + + This option is useful, as it disables the trace before + the WARNING dump is called, which prevents the trace to + be filled with content caused by the warning output. + + This option can also be set at run time via the sysctl + option: kernel/traceoff_on_warning + transparent_hugepage= [KNL] Format: [always|madvise|never] diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e48ed1d..9f15c00 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -824,10 +824,15 @@ enum ftrace_dump_mode; extern enum ftrace_dump_mode ftrace_dump_on_oops; +extern void disable_trace_on_warning(void); +extern int __disable_trace_on_warning; + #ifdef CONFIG_PREEMPT #define INIT_TRACE_RECURSION .trace_recursion = 0, #endif +#else /* CONFIG_TRACING */ +static inline void disable_trace_on_warning(void) { } #endif /* CONFIG_TRACING */ #ifndef INIT_TRACE_RECURSION diff --git a/kernel/panic.c b/kernel/panic.c index 167ec097..4cea6cc 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -399,6 +400,8 @@ struct slowpath_args { static void warn_slowpath_common(const char *file, int line, void *caller, unsigned taint, struct slowpath_args *args) { + disable_trace_on_warning(); + printk(KERN_WARNING "------------[ cut here ]------------\n"); printk(KERN_WARNING "WARNING: at %s:%d %pS()\n", file, line, caller); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9edcf45..5b0f18c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -600,6 +600,13 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "traceoff_on_warning", + .data = &__disable_trace_on_warning, + .maxlen = sizeof(__disable_trace_on_warning), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif #ifdef CONFIG_MODULES { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5f4a09c..c4c9296 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -115,6 +115,9 @@ cpumask_var_t __read_mostly tracing_buffer_mask; enum ftrace_dump_mode ftrace_dump_on_oops; +/* When set, tracing will stop when a WARN*() is hit */ +int __disable_trace_on_warning; + static int tracing_set_tracer(const char *buf); #define MAX_TRACER_SIZE 100 @@ -149,6 +152,13 @@ static int __init set_ftrace_dump_on_oops(char *str) } __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); +static int __init stop_trace_on_warning(char *str) +{ + __disable_trace_on_warning = 1; + return 1; +} +__setup("traceoff_on_warning=", stop_trace_on_warning); + static int __init boot_alloc_snapshot(char *str) { allocate_snapshot = true; @@ -170,6 +180,7 @@ static int __init set_trace_boot_options(char *str) } __setup("trace_options=", set_trace_boot_options); + unsigned long long ns2usecs(cycle_t nsec) { nsec += 500; @@ -562,6 +573,12 @@ void tracing_off(void) } EXPORT_SYMBOL_GPL(tracing_off); +void disable_trace_on_warning(void) +{ + if (__disable_trace_on_warning) + tracing_off(); +} + /** * tracing_is_on - show state of ring buffers enabled */ -- cgit v1.1 From 195a84d91e92ee3fe571a2086a6db7e17bf5bc7c Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Fri, 14 Jun 2013 10:10:38 +0800 Subject: tracing/kprobes: Remove unnecessary checking of trace_probe_is_enabled Since tp->flags assignment was moved into function enable_trace_probe(), there is no need to use trace_probe_is_enabled to check flags in the same function. Remove the unnecessary checking. Link: http://lkml.kernel.org/r/51BA7B9E.3040807@huawei.com Acked-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Oleg Nesterov Cc: Srikar Dronamraju Signed-off-by: zhangwei(Jovi) Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9f46e98..f237417 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -240,8 +240,7 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) } else tp->flags |= TP_FLAG_PROFILE; - if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) && - !trace_probe_has_gone(tp)) { + if (trace_probe_is_registered(tp) && !trace_probe_has_gone(tp)) { if (trace_probe_is_return(tp)) ret = enable_kretprobe(&tp->rp); else -- cgit v1.1 From 52d85d763086594f139bf7d3a5641abeb91d9f57 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Wed, 12 Jun 2013 12:03:18 +0200 Subject: ftrace: Fix stddev calculation in function profiler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When FUNCTION_GRAPH_TRACER is enabled, ftrace can profile kernel functions and print basic statistics about them. Unfortunately, running stddev calculation is wrong. This patch corrects it implementing Welford’s method: s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) . Link: http://lkml.kernel.org/r/1371031398-24048-1-git-send-email-juri.lelli@gmail.com Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Juri Lelli Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 800a8a2..26e1910 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -641,12 +641,18 @@ static int function_stat_show(struct seq_file *m, void *v) if (rec->counter <= 1) stddev = 0; else { - stddev = rec->time_squared - rec->counter * avg * avg; + /* + * Apply Welford's method: + * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) + */ + stddev = rec->counter * rec->time_squared - + rec->time * rec->time; + /* * Divide only 1000 for ns^2 -> us^2 conversion. * trace_print_graph_duration will divide 1000 again. */ - do_div(stddev, (rec->counter - 1) * 1000); + do_div(stddev, rec->counter * (rec->counter - 1) * 1000); } trace_seq_init(&s); -- cgit v1.1 From 6e94a780374ed31b280f939d4757e8d7858dff16 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jun 2013 10:58:31 -0400 Subject: tracing: Failed to create system directory Running the following: # cd /sys/kernel/debug/tracing # echo p:i do_sys_open > kprobe_events # echo p:j schedule >> kprobe_events # cat kprobe_events p:kprobes/i do_sys_open p:kprobes/j schedule # echo p:i do_sys_open >> kprobe_events # cat kprobe_events p:kprobes/j schedule p:kprobes/i do_sys_open # ls /sys/kernel/debug/tracing/events/kprobes/ enable filter j Notice that the 'i' is missing from the kprobes directory. The console produces: "Failed to create system directory kprobes" This is because kprobes passes in a allocated name for the system and the ftrace event subsystem saves off that name instead of creating a duplicate for it. But the kprobes may free the system name making the pointer to it invalid. This bug was introduced by 92edca073c37 "tracing: Use direct field, type and system names" which switched from using kstrdup() on the system name in favor of just keeping apointer to it, as the internal ftrace event system names are static and exist for the life of the computer being booted. Instead of reverting back to duplicating system names again, we can use core_kernel_data() to determine if the passed in name was allocated or static. Then use the MSB of the ref_count to be a flag to keep track if the name was allocated or not. Then we can still save from having to duplicate strings that will always exist, but still copy the ones that may be freed. Cc: stable@vger.kernel.org # 3.10 Reported-by: "zhangwei(Jovi)" Reported-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f57b015..903a0bf 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -41,6 +41,23 @@ static LIST_HEAD(ftrace_common_fields); static struct kmem_cache *field_cachep; static struct kmem_cache *file_cachep; +#define SYSTEM_FL_FREE_NAME (1 << 31) + +static inline int system_refcount(struct event_subsystem *system) +{ + return system->ref_count & ~SYSTEM_FL_FREE_NAME; +} + +static int system_refcount_inc(struct event_subsystem *system) +{ + return (system->ref_count++) & ~SYSTEM_FL_FREE_NAME; +} + +static int system_refcount_dec(struct event_subsystem *system) +{ + return (--system->ref_count) & ~SYSTEM_FL_FREE_NAME; +} + /* Double loops, do not use break, only goto's work */ #define do_for_each_event_file(tr, file) \ list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ @@ -344,8 +361,8 @@ static void __put_system(struct event_subsystem *system) { struct event_filter *filter = system->filter; - WARN_ON_ONCE(system->ref_count == 0); - if (--system->ref_count) + WARN_ON_ONCE(system_refcount(system) == 0); + if (system_refcount_dec(system)) return; list_del(&system->list); @@ -354,13 +371,15 @@ static void __put_system(struct event_subsystem *system) kfree(filter->filter_string); kfree(filter); } + if (system->ref_count & SYSTEM_FL_FREE_NAME) + kfree(system->name); kfree(system); } static void __get_system(struct event_subsystem *system) { - WARN_ON_ONCE(system->ref_count == 0); - system->ref_count++; + WARN_ON_ONCE(system_refcount(system) == 0); + system_refcount_inc(system); } static void __get_system_dir(struct ftrace_subsystem_dir *dir) @@ -374,7 +393,7 @@ static void __put_system_dir(struct ftrace_subsystem_dir *dir) { WARN_ON_ONCE(dir->ref_count == 0); /* If the subsystem is about to be freed, the dir must be too */ - WARN_ON_ONCE(dir->subsystem->ref_count == 1 && dir->ref_count != 1); + WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1); __put_system(dir->subsystem); if (!--dir->ref_count) @@ -1274,7 +1293,15 @@ create_new_subsystem(const char *name) return NULL; system->ref_count = 1; - system->name = name; + + /* Only allocate if dynamic (kprobes and modules) */ + if (!core_kernel_data((unsigned long)name)) { + system->ref_count |= SYSTEM_FL_FREE_NAME; + system->name = kstrdup(name, GFP_KERNEL); + if (!system->name) + goto out_free; + } else + system->name = name; system->filter = NULL; @@ -1287,6 +1314,8 @@ create_new_subsystem(const char *name) return system; out_free: + if (system->ref_count & SYSTEM_FL_FREE_NAME) + kfree(system->name); kfree(system); return NULL; } -- cgit v1.1 From 288e984e622336bab8bc3dfdf2f190816362d9a1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 20 Jun 2013 19:38:06 +0200 Subject: tracing/kprobes: Avoid perf_trace_buf_*() if ->perf_events is empty perf_trace_buf_prepare() + perf_trace_buf_submit() make no sense if this task/CPU has no active counters. Change kprobe_perf_func() and kretprobe_perf_func() to check call->perf_events beforehand and return if this list is empty. For example, "perf record -e some_probe -p1". Only /sbin/init will report, all other threads which hit the same probe will do perf_trace_buf_prepare/perf_trace_buf_submit just to realize that nobody wants perf_swevent_event(). Link: http://lkml.kernel.org/r/20130620173806.GA13151@redhat.com Acked-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index f237417..c35bebe 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1156,6 +1156,10 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) int size, __size, dsize; int rctx; + head = this_cpu_ptr(call->perf_events); + if (hlist_empty(head)) + return; + dsize = __get_data_size(tp, regs); __size = sizeof(*entry) + tp->size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); @@ -1171,8 +1175,6 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) entry->ip = (unsigned long)tp->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); - - head = this_cpu_ptr(call->perf_events); perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL); } @@ -1188,6 +1190,10 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, int size, __size, dsize; int rctx; + head = this_cpu_ptr(call->perf_events); + if (hlist_empty(head)) + return; + dsize = __get_data_size(tp, regs); __size = sizeof(*entry) + tp->size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); @@ -1203,8 +1209,6 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, entry->func = (unsigned long)tp->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); - - head = this_cpu_ptr(call->perf_events); perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head, NULL); } -- cgit v1.1 From 3fe3d6193e7cd7b4dd2bde10772f048bdefea4ee Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 20 Jun 2013 19:38:09 +0200 Subject: tracing/kprobes: Kill probe_enable_lock enable_trace_probe() and disable_trace_probe() should not worry about serialization, the caller (perf_trace_init or __ftrace_set_clr_event) holds event_mutex. They are also called by kprobe_trace_self_tests_init(), but this __init function can't race with itself or trace_events.c And note that this code depended on event_mutex even before 41a7dd420c which introduced probe_enable_lock. In fact it assumes that the caller kprobe_register() can never race with itself. Otherwise, say, tp->flags manipulations are racy. Link: http://lkml.kernel.org/r/20130620173809.GA13158@redhat.com Acked-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c35bebe..282f86c 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -183,16 +183,15 @@ static struct trace_probe *find_trace_probe(const char *event, return NULL; } +/* + * This and enable_trace_probe/disable_trace_probe rely on event_mutex + * held by the caller, __ftrace_set_clr_event(). + */ static int trace_probe_nr_files(struct trace_probe *tp) { - struct ftrace_event_file **file; + struct ftrace_event_file **file = rcu_dereference_raw(tp->files); int ret = 0; - /* - * Since all tp->files updater is protected by probe_enable_lock, - * we don't need to lock an rcu_read_lock. - */ - file = rcu_dereference_raw(tp->files); if (file) while (*(file++)) ret++; @@ -200,8 +199,6 @@ static int trace_probe_nr_files(struct trace_probe *tp) return ret; } -static DEFINE_MUTEX(probe_enable_lock); - /* * Enable trace_probe * if the file is NULL, enable "perf" handler, or enable "trace" handler. @@ -211,8 +208,6 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) { int ret = 0; - mutex_lock(&probe_enable_lock); - if (file) { struct ftrace_event_file **new, **old; int n = trace_probe_nr_files(tp); @@ -223,7 +218,7 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) GFP_KERNEL); if (!new) { ret = -ENOMEM; - goto out_unlock; + goto out; } memcpy(new, old, n * sizeof(struct ftrace_event_file *)); new[n] = file; @@ -246,10 +241,7 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) else ret = enable_kprobe(&tp->rp.kp); } - - out_unlock: - mutex_unlock(&probe_enable_lock); - + out: return ret; } @@ -282,8 +274,6 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) { int ret = 0; - mutex_lock(&probe_enable_lock); - if (file) { struct ftrace_event_file **new, **old; int n = trace_probe_nr_files(tp); @@ -292,7 +282,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) old = rcu_dereference_raw(tp->files); if (n == 0 || trace_probe_file_index(tp, file) < 0) { ret = -EINVAL; - goto out_unlock; + goto out; } if (n == 1) { /* Remove the last file */ @@ -303,7 +293,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) GFP_KERNEL); if (!new) { ret = -ENOMEM; - goto out_unlock; + goto out; } /* This copy & check loop copies the NULL stopper too */ @@ -326,10 +316,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) else disable_kprobe(&tp->rp.kp); } - - out_unlock: - mutex_unlock(&probe_enable_lock); - + out: return ret; } @@ -1214,6 +1201,12 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, } #endif /* CONFIG_PERF_EVENTS */ +/* + * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex. + * + * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe + * lockless, but we can't race with this __init function. + */ static __kprobes int kprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) @@ -1379,6 +1372,10 @@ find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr) return NULL; } +/* + * Nobody but us can call enable_trace_probe/disable_trace_probe at this + * stage, we can do this lockless. + */ static __init int kprobe_trace_self_tests_init(void) { int ret, warn = 0; -- cgit v1.1 From a439059610ecd257dba29a612729132e470d118f Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sat, 29 Jun 2013 00:08:04 -0500 Subject: tracing: Simplify code for showing of soft disabled flag Rather than enumerating each permutation, build the enable state string up from the combination of states. This also allows for the simpler addition of more states. Link: http://lkml.kernel.org/r/9aff5af6dee2f5a40ca30df41c39d5f33e998d7a.1372479499.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 903a0bf..7ee08b95 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -638,17 +638,17 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct ftrace_event_file *file = filp->private_data; - char *buf; + char buf[4] = "0"; - if (file->flags & FTRACE_EVENT_FL_ENABLED) { - if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED) - buf = "0*\n"; - else if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) - buf = "1*\n"; - else - buf = "1\n"; - } else - buf = "0\n"; + if (file->flags & FTRACE_EVENT_FL_ENABLED && + !(file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)) + strcpy(buf, "1"); + + if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED || + file->flags & FTRACE_EVENT_FL_SOFT_MODE) + strcat(buf, "*"); + + strcat(buf, "\n"); return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf)); } -- cgit v1.1 From 44a6a4ee1aed0eada8f21e6db81b4cd099788f82 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sat, 29 Jun 2013 00:08:05 -0500 Subject: tracing: Add missing syscall_metadata comment Add the missing syscall_metadata description for the enter_fields struct member. Link: http://lkml.kernel.org/r/74c3407cd1e5d37f2c5aaca637aa4d35f66f1aa2.1372479499.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt --- include/trace/syscall.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 84bc419..fed853f 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -16,6 +16,7 @@ * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) + * @enter_fields: list of fields for syscall_enter trace event * @enter_event: associated syscall_enter trace event * @exit_event: associated syscall_exit trace event */ -- cgit v1.1 From 3baa5e4cf224b8a55220cc841bb475e164b84ceb Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sat, 29 Jun 2013 00:08:07 -0500 Subject: tracing: Fix disabling of soft disable The comment on the soft disable 'disable' case of __ftrace_event_enable_disable() states that the soft disable bit should be cleared in that case, but currently only the soft mode bit is actually cleared. This essentially leaves the standard non-soft-enable enable/disable paths as the only way to clear the soft disable flag, but the soft disable bit should also be cleared when removing a trigger with '!'. Also, the SOFT_DISABLED bit should never be set if SOFT_MODE is cleared. This fixes the above discrepancies. Link: http://lkml.kernel.org/r/b9c68dd50bc07019e6c67d3f9b29be4ef1b2badb.1372479499.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 7ee08b95..5892470 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -291,9 +291,11 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, } call->class->reg(call, TRACE_REG_UNREGISTER, file); } - /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT */ + /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + else + clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); break; case 1: /* -- cgit v1.1 From b04d52e368e2cf526abb2bab61f304eaea126af2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 20 Jun 2013 19:38:14 +0200 Subject: tracing/kprobes: Turn trace_probe->files into list_head I think that "ftrace_event_file *trace_probe[]" complicates the code for no reason, turn it into list_head to simplify the code. enable_trace_probe() no longer needs synchronize_sched(). This needs the extra sizeof(list_head) memory for every attached ftrace_event_file, hopefully not a problem in this case. Link: http://lkml.kernel.org/r/20130620173814.GA13165@redhat.com Acked-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 138 ++++++++++++-------------------------------- 1 file changed, 37 insertions(+), 101 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 282f86c..405b5b0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -35,12 +35,17 @@ struct trace_probe { const char *symbol; /* symbol name */ struct ftrace_event_class class; struct ftrace_event_call call; - struct ftrace_event_file * __rcu *files; + struct list_head files; ssize_t size; /* trace entry size */ unsigned int nr_args; struct probe_arg args[]; }; +struct event_file_link { + struct ftrace_event_file *file; + struct list_head list; +}; + #define SIZEOF_TRACE_PROBE(n) \ (offsetof(struct trace_probe, args) + \ (sizeof(struct probe_arg) * (n))) @@ -150,6 +155,7 @@ static struct trace_probe *alloc_trace_probe(const char *group, goto error; INIT_LIST_HEAD(&tp->list); + INIT_LIST_HEAD(&tp->files); return tp; error: kfree(tp->call.name); @@ -184,22 +190,6 @@ static struct trace_probe *find_trace_probe(const char *event, } /* - * This and enable_trace_probe/disable_trace_probe rely on event_mutex - * held by the caller, __ftrace_set_clr_event(). - */ -static int trace_probe_nr_files(struct trace_probe *tp) -{ - struct ftrace_event_file **file = rcu_dereference_raw(tp->files); - int ret = 0; - - if (file) - while (*(file++)) - ret++; - - return ret; -} - -/* * Enable trace_probe * if the file is NULL, enable "perf" handler, or enable "trace" handler. */ @@ -209,29 +199,18 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) int ret = 0; if (file) { - struct ftrace_event_file **new, **old; - int n = trace_probe_nr_files(tp); - - old = rcu_dereference_raw(tp->files); - /* 1 is for new one and 1 is for stopper */ - new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *), - GFP_KERNEL); - if (!new) { + struct event_file_link *link; + + link = kmalloc(sizeof(*link), GFP_KERNEL); + if (!link) { ret = -ENOMEM; goto out; } - memcpy(new, old, n * sizeof(struct ftrace_event_file *)); - new[n] = file; - /* The last one keeps a NULL */ - rcu_assign_pointer(tp->files, new); - tp->flags |= TP_FLAG_TRACE; + link->file = file; + list_add_tail_rcu(&link->list, &tp->files); - if (old) { - /* Make sure the probe is done with old files */ - synchronize_sched(); - kfree(old); - } + tp->flags |= TP_FLAG_TRACE; } else tp->flags |= TP_FLAG_PROFILE; @@ -245,24 +224,16 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) return ret; } -static int -trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file) +static struct event_file_link * +find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) { - struct ftrace_event_file **files; - int i; + struct event_file_link *link; - /* - * Since all tp->files updater is protected by probe_enable_lock, - * we don't need to lock an rcu_read_lock. - */ - files = rcu_dereference_raw(tp->files); - if (files) { - for (i = 0; files[i]; i++) - if (files[i] == file) - return i; - } + list_for_each_entry(link, &tp->files, list) + if (link->file == file) + return link; - return -1; + return NULL; } /* @@ -275,38 +246,23 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) int ret = 0; if (file) { - struct ftrace_event_file **new, **old; - int n = trace_probe_nr_files(tp); - int i, j; + struct event_file_link *link; - old = rcu_dereference_raw(tp->files); - if (n == 0 || trace_probe_file_index(tp, file) < 0) { + link = find_event_file_link(tp, file); + if (!link) { ret = -EINVAL; goto out; } - if (n == 1) { /* Remove the last file */ - tp->flags &= ~TP_FLAG_TRACE; - new = NULL; - } else { - new = kzalloc(n * sizeof(struct ftrace_event_file *), - GFP_KERNEL); - if (!new) { - ret = -ENOMEM; - goto out; - } - - /* This copy & check loop copies the NULL stopper too */ - for (i = 0, j = 0; j < n && i < n + 1; i++) - if (old[i] != file) - new[j++] = old[i]; - } + list_del_rcu(&link->list); + /* synchronize with kprobe_trace_func/kretprobe_trace_func */ + synchronize_sched(); + kfree(link); - rcu_assign_pointer(tp->files, new); + if (!list_empty(&tp->files)) + goto out; - /* Make sure the probe is done with old files */ - synchronize_sched(); - kfree(old); + tp->flags &= ~TP_FLAG_TRACE; } else tp->flags &= ~TP_FLAG_PROFILE; @@ -871,20 +827,10 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs, static __kprobes void kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) { - /* - * Note: preempt is already disabled around the kprobe handler. - * However, we still need an smp_read_barrier_depends() corresponding - * to smp_wmb() in rcu_assign_pointer() to access the pointer. - */ - struct ftrace_event_file **file = rcu_dereference_raw(tp->files); - - if (unlikely(!file)) - return; + struct event_file_link *link; - while (*file) { - __kprobe_trace_func(tp, regs, *file); - file++; - } + list_for_each_entry_rcu(link, &tp->files, list) + __kprobe_trace_func(tp, regs, link->file); } /* Kretprobe handler */ @@ -931,20 +877,10 @@ static __kprobes void kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, struct pt_regs *regs) { - /* - * Note: preempt is already disabled around the kprobe handler. - * However, we still need an smp_read_barrier_depends() corresponding - * to smp_wmb() in rcu_assign_pointer() to access the pointer. - */ - struct ftrace_event_file **file = rcu_dereference_raw(tp->files); - - if (unlikely(!file)) - return; + struct event_file_link *link; - while (*file) { - __kretprobe_trace_func(tp, ri, regs, *file); - file++; - } + list_for_each_entry_rcu(link, &tp->files, list) + __kretprobe_trace_func(tp, ri, regs, link->file); } /* Event entry printers */ -- cgit v1.1 From 10246fa35d4ffdfe472185d4cbf9c2dfd9a9f023 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 1 Jul 2013 15:58:24 -0400 Subject: tracing: Use flag buffer_disabled for irqsoff tracer If the ring buffer is disabled and the irqsoff tracer records a trace it will clear out its buffer and lose the data it had previously recorded. Currently there's a callback when writing to the tracing_of file, but if tracing is disabled via the function tracer trigger, it will not inform the irqsoff tracer to stop recording. By using the "mirror" flag (buffer_disabled) in the trace_array, that keeps track of the status of the trace_array's buffer, it gives the irqsoff tracer a fast way to know if it should record a new trace or not. The flag may be a little behind the real state of the buffer, but it should not affect the trace too much. It's more important for the irqsoff tracer to be fast. Reported-by: Dave Jones Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 101 ++++++++++++++++++++++++++++++------------- kernel/trace/trace_irqsoff.c | 4 +- 2 files changed, 72 insertions(+), 33 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c4c9296..0dc5071 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -226,9 +226,24 @@ cycle_t ftrace_now(int cpu) return ts; } +/** + * tracing_is_enabled - Show if global_trace has been disabled + * + * Shows if the global trace has been enabled or not. It uses the + * mirror flag "buffer_disabled" to be used in fast paths such as for + * the irqsoff tracer. But it may be inaccurate due to races. If you + * need to know the accurate state, use tracing_is_on() which is a little + * slower, but accurate. + */ int tracing_is_enabled(void) { - return tracing_is_on(); + /* + * For quick access (irqsoff uses this in fast path), just + * return the mirror variable of the state of the ring buffer. + * It's a little racy, but we don't really care. + */ + smp_rmb(); + return !global_trace.buffer_disabled; } /* @@ -341,6 +356,23 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION; +void tracer_tracing_on(struct trace_array *tr) +{ + if (tr->trace_buffer.buffer) + ring_buffer_record_on(tr->trace_buffer.buffer); + /* + * This flag is looked at when buffers haven't been allocated + * yet, or by some tracers (like irqsoff), that just want to + * know if the ring buffer has been disabled, but it can handle + * races of where it gets disabled but we still do a record. + * As the check is in the fast path of the tracers, it is more + * important to be fast than accurate. + */ + tr->buffer_disabled = 0; + /* Make the flag seen by readers */ + smp_wmb(); +} + /** * tracing_on - enable tracing buffers * @@ -349,15 +381,7 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | */ void tracing_on(void) { - if (global_trace.trace_buffer.buffer) - ring_buffer_record_on(global_trace.trace_buffer.buffer); - /* - * This flag is only looked at when buffers haven't been - * allocated yet. We don't really care about the race - * between setting this flag and actually turning - * on the buffer. - */ - global_trace.buffer_disabled = 0; + tracer_tracing_on(&global_trace); } EXPORT_SYMBOL_GPL(tracing_on); @@ -551,6 +575,23 @@ void tracing_snapshot_alloc(void) EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); #endif /* CONFIG_TRACER_SNAPSHOT */ +void tracer_tracing_off(struct trace_array *tr) +{ + if (tr->trace_buffer.buffer) + ring_buffer_record_off(tr->trace_buffer.buffer); + /* + * This flag is looked at when buffers haven't been allocated + * yet, or by some tracers (like irqsoff), that just want to + * know if the ring buffer has been disabled, but it can handle + * races of where it gets disabled but we still do a record. + * As the check is in the fast path of the tracers, it is more + * important to be fast than accurate. + */ + tr->buffer_disabled = 1; + /* Make the flag seen by readers */ + smp_wmb(); +} + /** * tracing_off - turn off tracing buffers * @@ -561,15 +602,7 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); */ void tracing_off(void) { - if (global_trace.trace_buffer.buffer) - ring_buffer_record_off(global_trace.trace_buffer.buffer); - /* - * This flag is only looked at when buffers haven't been - * allocated yet. We don't really care about the race - * between setting this flag and actually turning - * on the buffer. - */ - global_trace.buffer_disabled = 1; + tracer_tracing_off(&global_trace); } EXPORT_SYMBOL_GPL(tracing_off); @@ -580,13 +613,24 @@ void disable_trace_on_warning(void) } /** + * tracer_tracing_is_on - show real state of ring buffer enabled + * @tr : the trace array to know if ring buffer is enabled + * + * Shows real state of the ring buffer if it is enabled or not. + */ +int tracer_tracing_is_on(struct trace_array *tr) +{ + if (tr->trace_buffer.buffer) + return ring_buffer_record_is_on(tr->trace_buffer.buffer); + return !tr->buffer_disabled; +} + +/** * tracing_is_on - show state of ring buffers enabled */ int tracing_is_on(void) { - if (global_trace.trace_buffer.buffer) - return ring_buffer_record_is_on(global_trace.trace_buffer.buffer); - return !global_trace.buffer_disabled; + return tracer_tracing_is_on(&global_trace); } EXPORT_SYMBOL_GPL(tracing_is_on); @@ -3958,7 +4002,7 @@ static int tracing_wait_pipe(struct file *filp) * * iter->pos will be 0 if we haven't read anything. */ - if (!tracing_is_enabled() && iter->pos) + if (!tracing_is_on() && iter->pos) break; } @@ -5631,15 +5675,10 @@ rb_simple_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; - struct ring_buffer *buffer = tr->trace_buffer.buffer; char buf[64]; int r; - if (buffer) - r = ring_buffer_record_is_on(buffer); - else - r = 0; - + r = tracer_tracing_is_on(tr); r = sprintf(buf, "%d\n", r); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); @@ -5661,11 +5700,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf, if (buffer) { mutex_lock(&trace_types_lock); if (val) { - ring_buffer_record_on(buffer); + tracer_tracing_on(tr); if (tr->current_trace->start) tr->current_trace->start(tr); } else { - ring_buffer_record_off(buffer); + tracer_tracing_off(tr); if (tr->current_trace->stop) tr->current_trace->stop(tr); } diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index b19d065..2aefbee 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -373,7 +373,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) struct trace_array_cpu *data; unsigned long flags; - if (likely(!tracer_enabled)) + if (!tracer_enabled || !tracing_is_enabled()) return; cpu = raw_smp_processor_id(); @@ -416,7 +416,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) else return; - if (!tracer_enabled) + if (!tracer_enabled || !tracing_is_enabled()) return; data = per_cpu_ptr(tr->trace_buffer.data, cpu); -- cgit v1.1 From cf6735a4b103b801753748531e3658cdc8cafa5e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 20 Jun 2013 19:38:11 +0200 Subject: tracing/kprobes: Don't pass addr=ip to perf_trace_buf_submit() kprobe_perf_func() and kretprobe_perf_func() pass addr=ip to perf_trace_buf_submit() for no reason. This sets perf_sample_data->addr for PERF_SAMPLE_ADDR, we already have perf_sample_data->ip initialized if PERF_SAMPLE_IP. Link: http://lkml.kernel.org/r/20130620173811.GA13161@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 405b5b0..7ed6976 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1098,8 +1098,7 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) entry->ip = (unsigned long)tp->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); - perf_trace_buf_submit(entry, size, rctx, - entry->ip, 1, regs, head, NULL); + perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); } /* Kretprobe profile handler */ @@ -1132,8 +1131,7 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, entry->func = (unsigned long)tp->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); - perf_trace_buf_submit(entry, size, rctx, - entry->ret_ip, 1, regs, head, NULL); + perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); } #endif /* CONFIG_PERF_EVENTS */ -- cgit v1.1 From f1ed7c741fcd0c3d7d318e7c19813d89934b9296 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 27 Jun 2013 22:18:06 -0400 Subject: ftrace: Do not run selftest if command line parameter is set If the kernel command line ftrace filter parameters are set (ftrace_filter or ftrace_notrace), force the function self test to pass, with a warning why it was forced. If the user adds a filter to the kernel command line, it is assumed that they know what they are doing, and the self test should just not run instead of failing (which disables function tracing) or clearing the filter, as that will probably annoy the user. If the user wants the selftest to run, the message will tell them why it did not. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 5 +++++ kernel/trace/trace.h | 1 + kernel/trace/trace_selftest.c | 18 ++++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 26e1910..67708f4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3537,8 +3537,12 @@ EXPORT_SYMBOL_GPL(ftrace_set_global_notrace); static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata; static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; +/* Used by function selftest to not test if filter is set */ +bool ftrace_filter_param __initdata; + static int __init set_ftrace_notrace(char *str) { + ftrace_filter_param = true; strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); return 1; } @@ -3546,6 +3550,7 @@ __setup("ftrace_notrace=", set_ftrace_notrace); static int __init set_ftrace_filter(char *str) { + ftrace_filter_param = true; strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); return 1; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 711ca7d..a88939e 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -776,6 +776,7 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) extern struct list_head ftrace_pids; #ifdef CONFIG_FUNCTION_TRACER +extern bool ftrace_filter_param __initdata; static inline int ftrace_trace_task(struct task_struct *task) { if (list_empty(&ftrace_pids)) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 2901e3b..a7329b7 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -640,13 +640,20 @@ out: * Enable ftrace, sleep 1/10 second, and then read the trace * buffer to see if all is in order. */ -int +__init int trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) { int save_ftrace_enabled = ftrace_enabled; unsigned long count; int ret; +#ifdef CONFIG_DYNAMIC_FTRACE + if (ftrace_filter_param) { + printk(KERN_CONT " ... kernel command line filter set: force PASS ... "); + return 0; + } +#endif + /* make sure msleep has been recorded */ msleep(1); @@ -727,13 +734,20 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace) * Pretty much the same than for the function tracer from which the selftest * has been borrowed. */ -int +__init int trace_selftest_startup_function_graph(struct tracer *trace, struct trace_array *tr) { int ret; unsigned long count; +#ifdef CONFIG_DYNAMIC_FTRACE + if (ftrace_filter_param) { + printk(KERN_CONT " ... kernel command line filter set: force PASS ... "); + return 0; + } +#endif + /* * Simulate the init() callback but we attach a watchdog callback * to detect and recover from possible hangs -- cgit v1.1 From 2d71619c59fac95a5415a326162fa046161b938c Mon Sep 17 00:00:00 2001 From: Alexander Z Lam Date: Mon, 1 Jul 2013 15:31:24 -0700 Subject: tracing: Make trace_marker use the correct per-instance buffer The trace_marker file was present for each new instance created, but it added the trace mark to the global trace buffer instead of to the instance's buffer. Link: http://lkml.kernel.org/r/1372717885-4543-2-git-send-email-azl@google.com Cc: David Sharp Cc: Vaibhav Nagarnaik Cc: Alexander Z Lam Cc: stable@vger.kernel.org # 3.10 Signed-off-by: Alexander Z Lam Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0dc5071..e04e711 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4391,6 +4391,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { unsigned long addr = (unsigned long)ubuf; + struct trace_array *tr = filp->private_data; struct ring_buffer_event *event; struct ring_buffer *buffer; struct print_entry *entry; @@ -4450,7 +4451,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, local_save_flags(irq_flags); size = sizeof(*entry) + cnt + 2; /* possible \n added */ - buffer = global_trace.trace_buffer.buffer; + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, irq_flags, preempt_count()); if (!event) { -- cgit v1.1 From a82274151af2b075163e3c42c828529dee311487 Mon Sep 17 00:00:00 2001 From: Alexander Z Lam Date: Mon, 1 Jul 2013 19:37:54 -0700 Subject: tracing: Protect ftrace_trace_arrays list in trace_events.c There are multiple places where the ftrace_trace_arrays list is accessed in trace_events.c without the trace_types_lock held. Link: http://lkml.kernel.org/r/1372732674-22726-1-git-send-email-azl@google.com Cc: Vaibhav Nagarnaik Cc: David Sharp Cc: Alexander Z Lam Cc: stable@vger.kernel.org # 3.10 Signed-off-by: Alexander Z Lam Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- kernel/trace/trace.h | 2 ++ kernel/trace/trace_events.c | 11 ++++++++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e04e711..e36da7f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -266,7 +266,7 @@ static struct tracer *trace_types __read_mostly; /* * trace_types_lock is used to protect the trace_types list. */ -static DEFINE_MUTEX(trace_types_lock); +DEFINE_MUTEX(trace_types_lock); /* * serialize the access of the ring buffer diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index a88939e..2c3cba59 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -224,6 +224,8 @@ enum { extern struct list_head ftrace_trace_arrays; +extern struct mutex trace_types_lock; + /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 5892470..35c6f23 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1008,6 +1008,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) int ret; /* Make sure the system still exists */ + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); list_for_each_entry(tr, &ftrace_trace_arrays, list) { list_for_each_entry(dir, &tr->systems, list) { @@ -1023,6 +1024,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) } exit_loop: mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); if (!system) return -ENODEV; @@ -1617,6 +1619,7 @@ static void __add_event_to_tracers(struct ftrace_event_call *call, int trace_add_event_call(struct ftrace_event_call *call) { int ret; + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); ret = __register_event(call, NULL); @@ -1624,11 +1627,13 @@ int trace_add_event_call(struct ftrace_event_call *call) __add_event_to_tracers(call, NULL); mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); return ret; } /* - * Must be called under locking both of event_mutex and trace_event_sem. + * Must be called under locking of trace_types_lock, event_mutex and + * trace_event_sem. */ static void __trace_remove_event_call(struct ftrace_event_call *call) { @@ -1640,11 +1645,13 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) /* Remove an event_call */ void trace_remove_event_call(struct ftrace_event_call *call) { + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); down_write(&trace_event_sem); __trace_remove_event_call(call); up_write(&trace_event_sem); mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); } #define for_each_event(event, start, end) \ @@ -1788,6 +1795,7 @@ static int trace_module_notify(struct notifier_block *self, { struct module *mod = data; + mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); switch (val) { case MODULE_STATE_COMING: @@ -1798,6 +1806,7 @@ static int trace_module_notify(struct notifier_block *self, break; } mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); return 0; } -- cgit v1.1 From ff451961a8b2a17667a7bfa39c86fb9b351445db Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 1 Jul 2013 22:50:29 -0400 Subject: tracing: Add trace_array_get/put() to handle instance refs better Commit a695cb58162 "tracing: Prevent deleting instances when they are being read" tried to fix a race between deleting a trace instance and reading contents of a trace file. But it wasn't good enough. The following could crash the kernel: # cd /sys/kernel/debug/tracing/instances # ( while :; do mkdir foo; rmdir foo; done ) & # ( while :; do cat foo/trace &> /dev/null; done ) & Luckily this can only be done by root user, but it should be fixed regardless. The problem is that a delete of the file can happen after the reader starts to open the file but before it grabs the trace_types_mutex. The solution is to validate the trace array before using it. If the trace array does not exist in the list of trace arrays, then it returns -ENODEV. There's a possibility that a trace_array could be deleted and a new one created and the open would open its file instead. But that is very minor as it will just return the data of the new trace array, it may confuse the user but it will not crash the system. As this can only be done by root anyway, the race will only occur if root is deleting what its trying to read at the same time. Cc: stable@vger.kernel.org # 3.10 Reported-by: Alexander Lam Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 83 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 65 insertions(+), 18 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e36da7f..6be9df1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -204,6 +204,37 @@ static struct trace_array global_trace; LIST_HEAD(ftrace_trace_arrays); +int trace_array_get(struct trace_array *this_tr) +{ + struct trace_array *tr; + int ret = -ENODEV; + + mutex_lock(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (tr == this_tr) { + tr->ref++; + ret = 0; + break; + } + } + mutex_unlock(&trace_types_lock); + + return ret; +} + +static void __trace_array_put(struct trace_array *this_tr) +{ + WARN_ON(!this_tr->ref); + this_tr->ref--; +} + +void trace_array_put(struct trace_array *this_tr) +{ + mutex_lock(&trace_types_lock); + __trace_array_put(this_tr); + mutex_unlock(&trace_types_lock); +} + int filter_current_check_discard(struct ring_buffer *buffer, struct ftrace_event_call *call, void *rec, struct ring_buffer_event *event) @@ -2831,10 +2862,9 @@ static const struct seq_operations tracer_seq_ops = { }; static struct trace_iterator * -__tracing_open(struct inode *inode, struct file *file, bool snapshot) +__tracing_open(struct trace_array *tr, struct trace_cpu *tc, + struct inode *inode, struct file *file, bool snapshot) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; struct trace_iterator *iter; int cpu; @@ -2913,8 +2943,6 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) tracing_iter_reset(iter, cpu); } - tr->ref++; - mutex_unlock(&trace_types_lock); return iter; @@ -2944,17 +2972,20 @@ static int tracing_release(struct inode *inode, struct file *file) struct trace_array *tr; int cpu; - if (!(file->f_mode & FMODE_READ)) + /* Writes do not use seq_file, need to grab tr from inode */ + if (!(file->f_mode & FMODE_READ)) { + struct trace_cpu *tc = inode->i_private; + + trace_array_put(tc->tr); return 0; + } iter = m->private; tr = iter->tr; + trace_array_put(tr); mutex_lock(&trace_types_lock); - WARN_ON(!tr->ref); - tr->ref--; - for_each_tracing_cpu(cpu) { if (iter->buffer_iter[cpu]) ring_buffer_read_finish(iter->buffer_iter[cpu]); @@ -2973,20 +3004,23 @@ static int tracing_release(struct inode *inode, struct file *file) kfree(iter->trace); kfree(iter->buffer_iter); seq_release_private(inode, file); + return 0; } static int tracing_open(struct inode *inode, struct file *file) { + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; struct trace_iterator *iter; int ret = 0; + if (trace_array_get(tr) < 0) + return -ENODEV; + /* If this file was open for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; - if (tc->cpu == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(&tr->trace_buffer); else @@ -2994,12 +3028,16 @@ static int tracing_open(struct inode *inode, struct file *file) } if (file->f_mode & FMODE_READ) { - iter = __tracing_open(inode, file, false); + iter = __tracing_open(tr, tc, inode, file, false); if (IS_ERR(iter)) ret = PTR_ERR(iter); else if (trace_flags & TRACE_ITER_LATENCY_FMT) iter->iter_flags |= TRACE_FILE_LAT_FMT; } + + if (ret < 0) + trace_array_put(tr); + return ret; } @@ -4575,12 +4613,16 @@ struct ftrace_buffer_info { static int tracing_snapshot_open(struct inode *inode, struct file *file) { struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; struct trace_iterator *iter; struct seq_file *m; int ret = 0; + if (trace_array_get(tr) < 0) + return -ENODEV; + if (file->f_mode & FMODE_READ) { - iter = __tracing_open(inode, file, true); + iter = __tracing_open(tr, tc, inode, file, true); if (IS_ERR(iter)) ret = PTR_ERR(iter); } else { @@ -4593,13 +4635,16 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file) kfree(m); return -ENOMEM; } - iter->tr = tc->tr; + iter->tr = tr; iter->trace_buffer = &tc->tr->max_buffer; iter->cpu_file = tc->cpu; m->private = iter; file->private_data = m; } + if (ret < 0) + trace_array_put(tr); + return ret; } @@ -4680,9 +4725,12 @@ out: static int tracing_snapshot_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; + int ret; + + ret = tracing_release(inode, file); if (file->f_mode & FMODE_READ) - return tracing_release(inode, file); + return ret; /* If write only, the seq_file is just a stub */ if (m) @@ -4927,8 +4975,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) mutex_lock(&trace_types_lock); - WARN_ON(!iter->tr->ref); - iter->tr->ref--; + __trace_array_put(iter->tr); if (info->spare) ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare); -- cgit v1.1 From 7b85af63034818e43aee6c1d7bf1c7c6796a9073 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 1 Jul 2013 23:34:22 -0400 Subject: tracing: Get trace_array ref counts when accessing trace files When a trace file is opened that may access a trace array, it must increment its ref count to prevent it from being deleted. Cc: stable@vger.kernel.org # 3.10 Reported-by: Alexander Lam Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 121 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 112 insertions(+), 9 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6be9df1..6d9bd9b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2965,6 +2965,43 @@ int tracing_open_generic(struct inode *inode, struct file *filp) return 0; } +/* + * Open and update trace_array ref count. + * Must have the current trace_array passed to it. + */ +int tracing_open_generic_tr(struct inode *inode, struct file *filp) +{ + struct trace_array *tr = inode->i_private; + + if (tracing_disabled) + return -ENODEV; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + filp->private_data = inode->i_private; + + return 0; + +} + +int tracing_open_generic_tc(struct inode *inode, struct file *filp) +{ + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; + + if (tracing_disabled) + return -ENODEV; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + filp->private_data = inode->i_private; + + return 0; + +} + static int tracing_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; @@ -3008,6 +3045,32 @@ static int tracing_release(struct inode *inode, struct file *file) return 0; } +static int tracing_release_generic_tr(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + return 0; +} + +static int tracing_release_generic_tc(struct inode *inode, struct file *file) +{ + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; + + trace_array_put(tr); + return 0; +} + +static int tracing_single_release_tr(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + + return single_release(inode, file); +} + static int tracing_open(struct inode *inode, struct file *file) { struct trace_cpu *tc = inode->i_private; @@ -3394,9 +3457,14 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, static int tracing_trace_options_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; + if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + return single_open(file, tracing_trace_options_show, inode->i_private); } @@ -3404,7 +3472,7 @@ static const struct file_operations tracing_iter_fops = { .open = tracing_trace_options_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = tracing_single_release_tr, .write = tracing_trace_options_write, }; @@ -3892,6 +3960,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + mutex_lock(&trace_types_lock); /* create a buffer to store the information to pass to userspace */ @@ -3944,6 +4015,7 @@ out: fail: kfree(iter->trace); kfree(iter); + __trace_array_put(tr); mutex_unlock(&trace_types_lock); return ret; } @@ -3951,6 +4023,8 @@ fail: static int tracing_release_pipe(struct inode *inode, struct file *file) { struct trace_iterator *iter = file->private_data; + struct trace_cpu *tc = inode->i_private; + struct trace_array *tr = tc->tr; mutex_lock(&trace_types_lock); @@ -3964,6 +4038,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) kfree(iter->trace); kfree(iter); + trace_array_put(tr); + return 0; } @@ -4421,6 +4497,8 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) /* resize the ring buffer to 0 */ tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); + trace_array_put(tr); + return 0; } @@ -4597,10 +4675,20 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, static int tracing_clock_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; + int ret; + if (tracing_disabled) return -ENODEV; - return single_open(file, tracing_clock_show, inode->i_private); + if (trace_array_get(tr)) + return -ENODEV; + + ret = single_open(file, tracing_clock_show, inode->i_private); + if (ret < 0) + trace_array_put(tr); + + return ret; } struct ftrace_buffer_info { @@ -4796,34 +4884,38 @@ static const struct file_operations tracing_pipe_fops = { }; static const struct file_operations tracing_entries_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tc, .read = tracing_entries_read, .write = tracing_entries_write, .llseek = generic_file_llseek, + .release = tracing_release_generic_tc, }; static const struct file_operations tracing_total_entries_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = tracing_total_entries_read, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; static const struct file_operations tracing_free_buffer_fops = { + .open = tracing_open_generic_tr, .write = tracing_free_buffer_write, .release = tracing_free_buffer_release, }; static const struct file_operations tracing_mark_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .write = tracing_mark_write, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; static const struct file_operations trace_clock_fops = { .open = tracing_clock_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = tracing_single_release_tr, .write = tracing_clock_write, }; @@ -4851,13 +4943,19 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) struct trace_cpu *tc = inode->i_private; struct trace_array *tr = tc->tr; struct ftrace_buffer_info *info; + int ret; if (tracing_disabled) return -ENODEV; + if (trace_array_get(tr) < 0) + return -ENODEV; + info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) + if (!info) { + trace_array_put(tr); return -ENOMEM; + } mutex_lock(&trace_types_lock); @@ -4875,7 +4973,11 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) mutex_unlock(&trace_types_lock); - return nonseekable_open(inode, filp); + ret = nonseekable_open(inode, filp); + if (ret < 0) + trace_array_put(tr); + + return ret; } static unsigned int @@ -5765,9 +5867,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf, } static const struct file_operations rb_simple_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = rb_simple_read, .write = rb_simple_write, + .release = tracing_release_generic_tr, .llseek = default_llseek, }; -- cgit v1.1 From 8e2e2fa47129532a30cff6c25a47078dc97d9260 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 2 Jul 2013 15:30:53 -0400 Subject: tracing: Add trace_array_get/put() to event handling Commit a695cb58162 "tracing: Prevent deleting instances when they are being read" tried to fix a race between deleting a trace instance and reading contents of a trace file. But it wasn't good enough. The following could crash the kernel: # cd /sys/kernel/debug/tracing/instances # ( while :; do mkdir foo; rmdir foo; done ) & # ( while :; do echo 1 > foo/events/sched/sched_switch 2> /dev/null; done ) & Luckily this can only be done by root user, but it should be fixed regardless. The problem is that a delete of the file can happen after the write to the event is opened, but before the enabling happens. The solution is to make sure the trace_array is available before succeeding in opening for write, and incerment the ref counter while opened. Now the instance can be deleted when the events are writing to the buffer, but the deletion of the instance will disable all events before the instance is actually deleted. Cc: stable@vger.kernel.org # 3.10 Reported-by: Alexander Lam Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 3 +++ kernel/trace/trace_events.c | 55 +++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2c3cba59..c7fbf93 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -226,6 +226,9 @@ extern struct list_head ftrace_trace_arrays; extern struct mutex trace_types_lock; +extern int trace_array_get(struct trace_array *tr); +extern void trace_array_put(struct trace_array *tr); + /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 35c6f23..920e08f 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -410,6 +410,35 @@ static void put_system(struct ftrace_subsystem_dir *dir) } /* + * Open and update trace_array ref count. + * Must have the current trace_array passed to it. + */ +static int tracing_open_generic_file(struct inode *inode, struct file *filp) +{ + struct ftrace_event_file *file = inode->i_private; + struct trace_array *tr = file->tr; + int ret; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + ret = tracing_open_generic(inode, filp); + if (ret < 0) + trace_array_put(tr); + return ret; +} + +static int tracing_release_generic_file(struct inode *inode, struct file *filp) +{ + struct ftrace_event_file *file = inode->i_private; + struct trace_array *tr = file->tr; + + trace_array_put(tr); + + return 0; +} + +/* * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. */ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, @@ -1032,9 +1061,17 @@ static int subsystem_open(struct inode *inode, struct file *filp) /* Some versions of gcc think dir can be uninitialized here */ WARN_ON(!dir); + /* Still need to increment the ref count of the system */ + if (trace_array_get(tr) < 0) { + put_system(dir); + return -ENODEV; + } + ret = tracing_open_generic(inode, filp); - if (ret < 0) + if (ret < 0) { + trace_array_put(tr); put_system(dir); + } return ret; } @@ -1045,16 +1082,23 @@ static int system_tr_open(struct inode *inode, struct file *filp) struct trace_array *tr = inode->i_private; int ret; + if (trace_array_get(tr) < 0) + return -ENODEV; + /* Make a temporary dir that has no system but points to tr */ dir = kzalloc(sizeof(*dir), GFP_KERNEL); - if (!dir) + if (!dir) { + trace_array_put(tr); return -ENOMEM; + } dir->tr = tr; ret = tracing_open_generic(inode, filp); - if (ret < 0) + if (ret < 0) { + trace_array_put(tr); kfree(dir); + } filp->private_data = dir; @@ -1065,6 +1109,8 @@ static int subsystem_release(struct inode *inode, struct file *file) { struct ftrace_subsystem_dir *dir = file->private_data; + trace_array_put(dir->tr); + /* * If dir->subsystem is NULL, then this is a temporary * descriptor that was made for a trace_array to enable @@ -1192,9 +1238,10 @@ static const struct file_operations ftrace_set_event_fops = { }; static const struct file_operations ftrace_enable_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_file, .read = event_enable_read, .write = event_enable_write, + .release = tracing_release_generic_file, .llseek = default_llseek, }; -- cgit v1.1 From 2a6c24afab70dbcfee49f4c76e1511eec1a3298b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 2 Jul 2013 14:48:23 -0400 Subject: tracing: Fix race between deleting buffer and setting events While analyzing the code, I discovered that there's a potential race between deleting a trace instance and setting events. There are a few races that can occur if events are being traced as the buffer is being deleted. Mostly the problem comes with freeing the descriptor used by the trace event callback. To prevent problems like this, the events are disabled before the buffer is deleted. The problem with the current solution is that the event_mutex is let go between disabling the events and freeing the files, which means that the events could be enabled again while the freeing takes place. Cc: stable@vger.kernel.org # 3.10 Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 920e08f..7d85429 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -441,14 +441,14 @@ static int tracing_release_generic_file(struct inode *inode, struct file *filp) /* * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. */ -static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, - const char *sub, const char *event, int set) +static int +__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, + const char *sub, const char *event, int set) { struct ftrace_event_file *file; struct ftrace_event_call *call; int ret = -EINVAL; - mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { call = file->event_call; @@ -474,6 +474,17 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, ret = 0; } + + return ret; +} + +static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, + const char *sub, const char *event, int set) +{ + int ret; + + mutex_lock(&event_mutex); + ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set); mutex_unlock(&event_mutex); return ret; @@ -2408,11 +2419,11 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr) int event_trace_del_tracer(struct trace_array *tr) { - /* Disable any running events */ - __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); - mutex_lock(&event_mutex); + /* Disable any running events */ + __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); + down_write(&trace_event_sem); __trace_remove_event_dirs(tr); debugfs_remove_recursive(tr->event_dir); -- cgit v1.1 From fa44063f9ef163c3a4c8d8c0465bb8a056b42035 Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Thu, 13 Jun 2013 14:21:51 +0800 Subject: uprobes: Fix return value in error handling path When wrong argument is passed into uprobe_events it does not return an error: [root@jovi tracing]# echo 'p:myprobe /bin/bash' > uprobe_events [root@jovi tracing]# The proper response is: [root@jovi tracing]# echo 'p:myprobe /bin/bash' > uprobe_events -bash: echo: write error: Invalid argument Link: http://lkml.kernel.org/r/51B964FF.5000106@huawei.com Cc: Frederic Weisbecker Cc: Cc: stable@vger.kernel.org # 3.5+ Signed-off-by: zhangwei(Jovi) Signed-off-by: Steven Rostedt --- kernel/trace/trace_uprobe.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 32494fb0..d5d0cd3 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -283,8 +283,10 @@ static int create_trace_uprobe(int argc, char **argv) return -EINVAL; } arg = strchr(argv[1], ':'); - if (!arg) + if (!arg) { + ret = -EINVAL; goto fail_address_parse; + } *arg++ = '\0'; filename = argv[1]; -- cgit v1.1 From 11034ae9c20f4057a6127fc965906417978e69b2 Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Wed, 10 Apr 2013 11:26:23 +0800 Subject: tracing: Fix irqs-off tag display in syscall tracing All syscall tracing irqs-off tags are wrong, the syscall enter entry doesn't disable irqs. [root@jovi tracing]#echo "syscalls:sys_enter_open" > set_event [root@jovi tracing]# cat trace # tracer: nop # # entries-in-buffer/entries-written: 13/13 #P:2 # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | irqbalance-513 [000] d... 56115.496766: sys_open(filename: 804e1a6, flags: 0, mode: 1b6) irqbalance-513 [000] d... 56115.497008: sys_open(filename: 804e1bb, flags: 0, mode: 1b6) sendmail-771 [000] d... 56115.827982: sys_open(filename: b770e6d1, flags: 0, mode: 1b6) The reason is syscall tracing doesn't record irq_flags into buffer. The proper display is: [root@jovi tracing]#echo "syscalls:sys_enter_open" > set_event [root@jovi tracing]# cat trace # tracer: nop # # entries-in-buffer/entries-written: 14/14 #P:2 # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | irqbalance-514 [001] .... 46.213921: sys_open(filename: 804e1a6, flags: 0, mode: 1b6) irqbalance-514 [001] .... 46.214160: sys_open(filename: 804e1bb, flags: 0, mode: 1b6) <...>-920 [001] .... 47.307260: sys_open(filename: 4e82a0c5, flags: 80000, mode: 0) Link: http://lkml.kernel.org/r/1365564393-10972-3-git-send-email-jovi.zhangwei@huawei.com Cc: stable@vger.kernel.org # 2.6.35 Signed-off-by: zhangwei(Jovi) Signed-off-by: Steven Rostedt --- kernel/trace/trace_syscalls.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8f2ac73..322e164 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -306,6 +306,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct ring_buffer *buffer; + unsigned long irq_flags; + int pc; int syscall_nr; int size; @@ -321,9 +323,12 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; + local_save_flags(irq_flags); + pc = preempt_count(); + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, - sys_data->enter_event->event.type, size, 0, 0); + sys_data->enter_event->event.type, size, irq_flags, pc); if (!event) return; @@ -333,7 +338,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) if (!filter_current_check_discard(buffer, sys_data->enter_event, entry, event)) - trace_current_buffer_unlock_commit(buffer, event, 0, 0); + trace_current_buffer_unlock_commit(buffer, event, + irq_flags, pc); } static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) @@ -343,6 +349,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct ring_buffer *buffer; + unsigned long irq_flags; + int pc; int syscall_nr; syscall_nr = trace_get_syscall_nr(current, regs); @@ -355,9 +363,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!sys_data) return; + local_save_flags(irq_flags); + pc = preempt_count(); + buffer = tr->trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, - sys_data->exit_event->event.type, sizeof(*entry), 0, 0); + sys_data->exit_event->event.type, sizeof(*entry), + irq_flags, pc); if (!event) return; @@ -367,7 +379,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!filter_current_check_discard(buffer, sys_data->exit_event, entry, event)) - trace_current_buffer_unlock_commit(buffer, event, 0, 0); + trace_current_buffer_unlock_commit(buffer, event, + irq_flags, pc); } static int reg_event_syscall_enter(struct ftrace_event_file *file, -- cgit v1.1 From 5280bcef91e706770cc1706eb97353e3513322b9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 2 Jul 2013 19:59:57 -0400 Subject: tracing: Make tracer_tracing_{off,on,is_on}() static I have patches that will use tracer_tracing_on/off/is_on() in other files, but as they are not ready to be merged yet, and Fengguang Wu's sparse scripts pointed out that these functions were not declared anywhere, I'll make them static for now. When these functions are required to be used elsewhere, I'll remove the static then. Reported-by: kbuild test robot Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6d9bd9b..48aceb8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -387,7 +387,7 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION; -void tracer_tracing_on(struct trace_array *tr) +static void tracer_tracing_on(struct trace_array *tr) { if (tr->trace_buffer.buffer) ring_buffer_record_on(tr->trace_buffer.buffer); @@ -606,7 +606,7 @@ void tracing_snapshot_alloc(void) EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); #endif /* CONFIG_TRACER_SNAPSHOT */ -void tracer_tracing_off(struct trace_array *tr) +static void tracer_tracing_off(struct trace_array *tr) { if (tr->trace_buffer.buffer) ring_buffer_record_off(tr->trace_buffer.buffer); @@ -649,7 +649,7 @@ void disable_trace_on_warning(void) * * Shows real state of the ring buffer if it is enabled or not. */ -int tracer_tracing_is_on(struct trace_array *tr) +static int tracer_tracing_is_on(struct trace_array *tr) { if (tr->trace_buffer.buffer) return ring_buffer_record_is_on(tr->trace_buffer.buffer); -- cgit v1.1 From 4480361c3c592fcbce3ef74e030719f0715e3a7e Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Wed, 10 Apr 2013 11:26:28 +0800 Subject: tracing: Remove TRACE_EVENT_TYPE enum definition TRACE_EVENT_TYPE enum is not used at present, remove it. Link: http://lkml.kernel.org/r/1365564393-10972-8-git-send-email-jovi.zhangwei@huawei.com Signed-off-by: zhangwei(Jovi) Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c7fbf93..1cbba04 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -907,12 +907,6 @@ static inline void trace_branch_disable(void) /* set ring buffers to default size if not already done so */ int tracing_update_buffers(void); -/* trace event type bit fields, not numeric */ -enum { - TRACE_EVENT_TYPE_PRINTF = 1, - TRACE_EVENT_TYPE_RAW = 2, -}; - struct ftrace_event_field { struct list_head link; const char *name; -- cgit v1.1 From 8de1eb02778b64f8b292db531cf39a429f84315f Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Wed, 10 Apr 2013 11:26:30 +0800 Subject: tracing: Remove ftrace() function The only caller of function ftrace(...) was removed a long time ago, so remove the function body as well. Link: http://lkml.kernel.org/r/1365564393-10972-10-git-send-email-jovi.zhangwei@huawei.com Signed-off-by: zhangwei(Jovi) Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 9 --------- kernel/trace/trace.h | 5 ----- 2 files changed, 14 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 48aceb8..f6fed9e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1637,15 +1637,6 @@ trace_function(struct trace_array *tr, __buffer_unlock_commit(buffer, event); } -void -ftrace(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long ip, unsigned long parent_ip, unsigned long flags, - int pc) -{ - if (likely(!atomic_read(&data->disabled))) - trace_function(tr, ip, parent_ip, flags, pc); -} - #ifdef CONFIG_STACKTRACE #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long)) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1cbba04..a4ed382 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -559,11 +559,6 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu); void poll_wait_pipe(struct trace_iterator *iter); -void ftrace(struct trace_array *tr, - struct trace_array_cpu *data, - unsigned long ip, - unsigned long parent_ip, - unsigned long flags, int pc); void tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *prev, struct task_struct *next, -- cgit v1.1 From dcc302232c1f9b3ca16f6b8ee190eb0b1a8a0da3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 2 Jul 2013 20:30:52 -0400 Subject: tracing: Make tracing_open_generic_{tr,tc}() static I have patches that will use tracing_open_generic_tr/tc() in other files, but as they are not ready to be merged yet, and Fengguang Wu's sparse scripts pointed out that these functions were not declared anywhere, I'll make them static for now. When these functions are required to be used elsewhere, I'll remove the static then. Reported-by: kbuild test robot Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f6fed9e..dc473b5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2960,7 +2960,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp) * Open and update trace_array ref count. * Must have the current trace_array passed to it. */ -int tracing_open_generic_tr(struct inode *inode, struct file *filp) +static int tracing_open_generic_tr(struct inode *inode, struct file *filp) { struct trace_array *tr = inode->i_private; @@ -2976,7 +2976,7 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp) } -int tracing_open_generic_tc(struct inode *inode, struct file *filp) +static int tracing_open_generic_tc(struct inode *inode, struct file *filp) { struct trace_cpu *tc = inode->i_private; struct trace_array *tr = tc->tr; -- cgit v1.1