diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 4 | ||||
-rw-r--r-- | kernel/extable.c | 5 | ||||
-rw-r--r-- | kernel/fork.c | 11 | ||||
-rw-r--r-- | kernel/lockdep.c | 1 | ||||
-rw-r--r-- | kernel/module.c | 2 | ||||
-rw-r--r-- | kernel/sched.c | 2 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 42 | ||||
-rw-r--r-- | kernel/trace/Makefile | 4 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 663 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 311 | ||||
-rw-r--r-- | kernel/trace/trace.c | 148 | ||||
-rw-r--r-- | kernel/trace/trace.h | 103 | ||||
-rw-r--r-- | kernel/trace/trace_branch.c | 5 | ||||
-rw-r--r-- | kernel/trace/trace_bts.c | 276 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 611 | ||||
-rw-r--r-- | kernel/trace/trace_functions_return.c | 98 | ||||
-rw-r--r-- | kernel/trace/trace_mmiotrace.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_power.c | 179 | ||||
-rw-r--r-- | kernel/trace/trace_stack.c | 13 |
19 files changed, 2132 insertions, 348 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 010ccb3..6a212b8 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -20,10 +20,6 @@ CFLAGS_REMOVE_rtmutex-debug.o = -pg CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_sched_clock.o = -pg endif -ifdef CONFIG_FUNCTION_RET_TRACER -CFLAGS_REMOVE_extable.o = -pg # For __kernel_text_address() -CFLAGS_REMOVE_module.o = -pg # For __module_text_address() -endif obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o diff --git a/kernel/extable.c b/kernel/extable.c index adf0cc9..e136ed8 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -17,6 +17,7 @@ */ #include <linux/module.h> #include <linux/init.h> +#include <linux/ftrace.h> #include <asm/uaccess.h> #include <asm/sections.h> @@ -40,7 +41,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) return e; } -int core_kernel_text(unsigned long addr) +__notrace_funcgraph int core_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) @@ -53,7 +54,7 @@ int core_kernel_text(unsigned long addr) return 0; } -int __kernel_text_address(unsigned long addr) +__notrace_funcgraph int __kernel_text_address(unsigned long addr) { if (core_kernel_text(addr)) return 1; diff --git a/kernel/fork.c b/kernel/fork.c index d6e1a32..7407ab3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -140,7 +140,7 @@ void free_task(struct task_struct *tsk) prop_local_destroy_single(&tsk->dirties); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); - ftrace_retfunc_exit_task(tsk); + ftrace_graph_exit_task(tsk); free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -1137,6 +1137,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, } } + ftrace_graph_init_task(p); + p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) @@ -1145,7 +1147,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (current->nsproxy != p->nsproxy) { retval = ns_cgroup_clone(p, pid); if (retval) - goto bad_fork_free_pid; + goto bad_fork_free_graph; } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; @@ -1238,7 +1240,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; - goto bad_fork_free_pid; + goto bad_fork_free_graph; } if (clone_flags & CLONE_THREAD) { @@ -1271,11 +1273,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); - ftrace_retfunc_init_task(p); proc_fork_connector(p); cgroup_post_fork(p); return p; +bad_fork_free_graph: + ftrace_graph_exit_task(p); bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); diff --git a/kernel/lockdep.c b/kernel/lockdep.c index e4bdda8..c4c7df2 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -25,6 +25,7 @@ * Thanks to Arjan van de Ven for coming up with the initial idea of * mapping lock dependencies runtime. */ +#define DISABLE_BRANCH_PROFILING #include <linux/mutex.h> #include <linux/sched.h> #include <linux/delay.h> diff --git a/kernel/module.c b/kernel/module.c index 89bcf7c..dd2a541 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2704,7 +2704,7 @@ int is_module_address(unsigned long addr) /* Is this a valid kernel address? */ -struct module *__module_text_address(unsigned long addr) +__notrace_funcgraph struct module *__module_text_address(unsigned long addr) { struct module *mod; diff --git a/kernel/sched.c b/kernel/sched.c index 8050a61..4ed9f58 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5893,7 +5893,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * The idle tasks have their own, simple scheduling class: */ idle->sched_class = &idle_sched_class; - ftrace_retfunc_init_task(idle); + ftrace_graph_init_task(idle); } /* diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 9cbf776..bde6f03 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -12,7 +12,7 @@ config NOP_TRACER config HAVE_FUNCTION_TRACER bool -config HAVE_FUNCTION_RET_TRACER +config HAVE_FUNCTION_GRAPH_TRACER bool config HAVE_FUNCTION_TRACE_MCOUNT_TEST @@ -28,6 +28,9 @@ config HAVE_DYNAMIC_FTRACE config HAVE_FTRACE_MCOUNT_RECORD bool +config HAVE_HW_BRANCH_TRACER + bool + config TRACER_MAX_TRACE bool @@ -60,15 +63,19 @@ config FUNCTION_TRACER (the bootup default), then the overhead of the instructions is very small and not measurable even in micro-benchmarks. -config FUNCTION_RET_TRACER - bool "Kernel Function return Tracer" - depends on HAVE_FUNCTION_RET_TRACER +config FUNCTION_GRAPH_TRACER + bool "Kernel Function Graph Tracer" + depends on HAVE_FUNCTION_GRAPH_TRACER depends on FUNCTION_TRACER + default y help - Enable the kernel to trace a function at its return. - It's first purpose is to trace the duration of functions. - This is done by setting the current return address on the thread - info structure of the current task. + Enable the kernel to trace a function at both its return + and its entry. + It's first purpose is to trace the duration of functions and + draw a call graph for each thread with some informations like + the return value. + This is done by setting the current return address on the current + task structure into a stack of calls. config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" @@ -214,6 +221,17 @@ config BRANCH_TRACER Say N if unsure. +config POWER_TRACER + bool "Trace power consumption behavior" + depends on DEBUG_KERNEL + depends on X86 + select TRACING + help + This tracer helps developers to analyze and optimize the kernels + power management decisions, specifically the C-state and P-state + behavior. + + config STACK_TRACER bool "Trace max stack" depends on HAVE_FUNCTION_TRACER @@ -233,6 +251,14 @@ config STACK_TRACER Say N if unsure. +config BTS_TRACER + depends on HAVE_HW_BRANCH_TRACER + bool "Trace branches" + select TRACING + help + This tracer records all branches on the system in a circular + buffer giving access to the last N branches for each cpu. + config DYNAMIC_FTRACE bool "enable/disable ftrace tracepoints dynamically" depends on FUNCTION_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 1a8c925..62dc561 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -29,7 +29,9 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o obj-$(CONFIG_STACK_TRACER) += trace_stack.o obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o obj-$(CONFIG_BOOT_TRACER) += trace_boot.o -obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o +obj-$(CONFIG_BTS_TRACER) += trace_bts.o +obj-$(CONFIG_POWER_TRACER) += trace_power.o libftrace-y := ftrace.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 53042f1..a12f80e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -47,12 +47,13 @@ int ftrace_enabled __read_mostly; static int last_ftrace_enabled; +/* set when tracing only a pid */ +struct pid *ftrace_pid_trace; +static struct pid * const ftrace_swapper_pid = &init_struct_pid; + /* Quick disabling of function tracer. */ int function_trace_stop; -/* By default, current tracing type is normal tracing. */ -enum ftrace_tracing_type_t ftrace_tracing_type = FTRACE_TYPE_ENTER; - /* * ftrace_disabled is set when an anomaly is discovered. * ftrace_disabled is much stronger than ftrace_enabled. @@ -61,6 +62,7 @@ static int ftrace_disabled __read_mostly; static DEFINE_SPINLOCK(ftrace_lock); static DEFINE_MUTEX(ftrace_sysctl_lock); +static DEFINE_MUTEX(ftrace_start_lock); static struct ftrace_ops ftrace_list_end __read_mostly = { @@ -70,6 +72,7 @@ static struct ftrace_ops ftrace_list_end __read_mostly = static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; +ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) { @@ -86,6 +89,21 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) }; } +static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip) +{ + if (!test_tsk_trace_trace(current)) + return; + + ftrace_pid_function(ip, parent_ip); +} + +static void set_ftrace_pid_function(ftrace_func_t func) +{ + /* do not set ftrace_pid_function to itself! */ + if (func != ftrace_pid_func) + ftrace_pid_function = func; +} + /** * clear_ftrace_function - reset the ftrace function * @@ -96,6 +114,7 @@ void clear_ftrace_function(void) { ftrace_trace_function = ftrace_stub; __ftrace_trace_function = ftrace_stub; + ftrace_pid_function = ftrace_stub; } #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST @@ -128,20 +147,26 @@ static int __register_ftrace_function(struct ftrace_ops *ops) ftrace_list = ops; if (ftrace_enabled) { + ftrace_func_t func; + + if (ops->next == &ftrace_list_end) + func = ops->func; + else + func = ftrace_list_func; + + if (ftrace_pid_trace) { + set_ftrace_pid_function(func); + func = ftrace_pid_func; + } + /* * For one func, simply call it directly. * For more than one func, call the chain. */ #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST - if (ops->next == &ftrace_list_end) - ftrace_trace_function = ops->func; - else - ftrace_trace_function = ftrace_list_func; + ftrace_trace_function = func; #else - if (ops->next == &ftrace_list_end) - __ftrace_trace_function = ops->func; - else - __ftrace_trace_function = ftrace_list_func; + __ftrace_trace_function = func; ftrace_trace_function = ftrace_test_stop_func; #endif } @@ -182,8 +207,19 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (ftrace_enabled) { /* If we only have one func left, then call that directly */ - if (ftrace_list->next == &ftrace_list_end) - ftrace_trace_function = ftrace_list->func; + if (ftrace_list->next == &ftrace_list_end) { + ftrace_func_t func = ftrace_list->func; + + if (ftrace_pid_trace) { + set_ftrace_pid_function(func); + func = ftrace_pid_func; + } +#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST + ftrace_trace_function = func; +#else + __ftrace_trace_function = func; +#endif + } } out: @@ -192,6 +228,36 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) return ret; } +static void ftrace_update_pid_func(void) +{ + ftrace_func_t func; + + /* should not be called from interrupt context */ + spin_lock(&ftrace_lock); + + if (ftrace_trace_function == ftrace_stub) + goto out; + + func = ftrace_trace_function; + + if (ftrace_pid_trace) { + set_ftrace_pid_function(func); + func = ftrace_pid_func; + } else { + if (func == ftrace_pid_func) + func = ftrace_pid_function; + } + +#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST + ftrace_trace_function = func; +#else + __ftrace_trace_function = func; +#endif + + out: + spin_unlock(&ftrace_lock); +} + #ifdef CONFIG_DYNAMIC_FTRACE #ifndef CONFIG_FTRACE_MCOUNT_RECORD # error Dynamic ftrace depends on MCOUNT_RECORD @@ -211,6 +277,8 @@ enum { FTRACE_UPDATE_TRACE_FUNC = (1 << 2), FTRACE_ENABLE_MCOUNT = (1 << 3), FTRACE_DISABLE_MCOUNT = (1 << 4), + FTRACE_START_FUNC_RET = (1 << 5), + FTRACE_STOP_FUNC_RET = (1 << 6), }; static int ftrace_filtered; @@ -395,14 +463,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) unsigned long ip, fl; unsigned long ftrace_addr; -#ifdef CONFIG_FUNCTION_RET_TRACER - if (ftrace_tracing_type == FTRACE_TYPE_ENTER) - ftrace_addr = (unsigned long)ftrace_caller; - else - ftrace_addr = (unsigned long)ftrace_return_caller; -#else ftrace_addr = (unsigned long)ftrace_caller; -#endif ip = rec->ip; @@ -535,6 +596,11 @@ static int __ftrace_modify_code(void *data) if (*command & FTRACE_UPDATE_TRACE_FUNC) ftrace_update_ftrace_func(ftrace_trace_function); + if (*command & FTRACE_START_FUNC_RET) + ftrace_enable_ftrace_graph_caller(); + else if (*command & FTRACE_STOP_FUNC_RET) + ftrace_disable_ftrace_graph_caller(); + return 0; } @@ -545,12 +611,22 @@ static void ftrace_run_update_code(int command) static ftrace_func_t saved_ftrace_func; static int ftrace_start_up; -static DEFINE_MUTEX(ftrace_start_lock); -static void ftrace_startup(void) +static void ftrace_startup_enable(int command) { - int command = 0; + if (saved_ftrace_func != ftrace_trace_function) { + saved_ftrace_func = ftrace_trace_function; + command |= FTRACE_UPDATE_TRACE_FUNC; + } + if (!command || !ftrace_enabled) + return; + + ftrace_run_update_code(command); +} + +static void ftrace_startup(int command) +{ if (unlikely(ftrace_disabled)) return; @@ -558,23 +634,13 @@ static void ftrace_startup(void) ftrace_start_up++; command |= FTRACE_ENABLE_CALLS; - if (saved_ftrace_func != ftrace_trace_function) { - saved_ftrace_func = ftrace_trace_function; - command |= FTRACE_UPDATE_TRACE_FUNC; - } + ftrace_startup_enable(command); - if (!command || !ftrace_enabled) - goto out; - - ftrace_run_update_code(command); - out: mutex_unlock(&ftrace_start_lock); } -static void ftrace_shutdown(void) +static void ftrace_shutdown(int command) { - int command = 0; - if (unlikely(ftrace_disabled)) return; @@ -719,7 +785,6 @@ enum { #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ struct ftrace_iterator { - loff_t pos; struct ftrace_page *pg; unsigned idx; unsigned flags; @@ -744,6 +809,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos) iter->pg = iter->pg->next; iter->idx = 0; goto retry; + } else { + iter->idx = -1; } } else { rec = &iter->pg->records[iter->idx++]; @@ -766,8 +833,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos) } spin_unlock(&ftrace_lock); - iter->pos = *pos; - return rec; } @@ -775,13 +840,15 @@ static void *t_start(struct seq_file *m, loff_t *pos) { struct ftrace_iterator *iter = m->private; void *p = NULL; - loff_t l = -1; - if (*pos > iter->pos) - *pos = iter->pos; + if (*pos > 0) { + if (iter->idx < 0) + return p; + (*pos)--; + iter->idx--; + } - l = *pos; - p = t_next(m, p, &l); + p = t_next(m, p, pos); return p; } @@ -792,21 +859,15 @@ static void t_stop(struct seq_file *m, void *p) static int t_show(struct seq_file *m, void *v) { - struct ftrace_iterator *iter = m->private; struct dyn_ftrace *rec = v; char str[KSYM_SYMBOL_LEN]; - int ret = 0; if (!rec) return 0; kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); - ret = seq_printf(m, "%s\n", str); - if (ret < 0) { - iter->pos--; - iter->idx--; - } + seq_printf(m, "%s\n", str); return 0; } @@ -832,7 +893,6 @@ ftrace_avail_open(struct inode *inode, struct file *file) return -ENOMEM; iter->pg = ftrace_pages_start; - iter->pos = 0; ret = seq_open(file, &show_ftrace_seq_ops); if (!ret) { @@ -919,7 +979,6 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable) if (file->f_mode & FMODE_READ) { iter->pg = ftrace_pages_start; - iter->pos = 0; iter->flags = enable ? FTRACE_ITER_FILTER : FTRACE_ITER_NOTRACE; @@ -1262,12 +1321,233 @@ static struct file_operations ftrace_notrace_fops = { .release = ftrace_notrace_release, }; -static __init int ftrace_init_debugfs(void) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +static DEFINE_MUTEX(graph_lock); + +int ftrace_graph_count; +unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; + +static void * +g_next(struct seq_file *m, void *v, loff_t *pos) { - struct dentry *d_tracer; - struct dentry *entry; + unsigned long *array = m->private; + int index = *pos; - d_tracer = tracing_init_dentry(); + (*pos)++; + + if (index >= ftrace_graph_count) + return NULL; + + return &array[index]; +} + +static void *g_start(struct seq_file *m, loff_t *pos) +{ + void *p = NULL; + + mutex_lock(&graph_lock); + + p = g_next(m, p, pos); + + return p; +} + +static void g_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&graph_lock); +} + +static int g_show(struct seq_file *m, void *v) +{ + unsigned long *ptr = v; + char str[KSYM_SYMBOL_LEN]; + + if (!ptr) + return 0; + + kallsyms_lookup(*ptr, NULL, NULL, NULL, str); + + seq_printf(m, "%s\n", str); + + return 0; +} + +static struct seq_operations ftrace_graph_seq_ops = { + .start = g_start, + .next = g_next, + .stop = g_stop, + .show = g_show, +}; + +static int +ftrace_graph_open(struct inode *inode, struct file *file) +{ + int ret = 0; + + if (unlikely(ftrace_disabled)) + return -ENODEV; + + mutex_lock(&graph_lock); + if ((file->f_mode & FMODE_WRITE) && + !(file->f_flags & O_APPEND)) { + ftrace_graph_count = 0; + memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); + } + + if (file->f_mode & FMODE_READ) { + ret = seq_open(file, &ftrace_graph_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = ftrace_graph_funcs; + } + } else + file->private_data = ftrace_graph_funcs; + mutex_unlock(&graph_lock); + + return ret; +} + +static ssize_t +ftrace_graph_read(struct file *file, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + if (file->f_mode & FMODE_READ) + return seq_read(file, ubuf, cnt, ppos); + else + return -EPERM; +} + +static int +ftrace_set_func(unsigned long *array, int idx, char *buffer) +{ + char str[KSYM_SYMBOL_LEN]; + struct dyn_ftrace *rec; + struct ftrace_page *pg; + int found = 0; + int i, j; + + if (ftrace_disabled) + return -ENODEV; + + /* should not be called from interrupt context */ + spin_lock(&ftrace_lock); + + for (pg = ftrace_pages_start; pg; pg = pg->next) { + for (i = 0; i < pg->index; i++) { + rec = &pg->records[i]; + + if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) + continue; + + kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); + if (strcmp(str, buffer) == 0) { + found = 1; + for (j = 0; j < idx; j++) + if (array[j] == rec->ip) { + found = 0; + break; + } + if (found) + array[idx] = rec->ip; + break; + } + } + } + spin_unlock(&ftrace_lock); + + return found ? 0 : -EINVAL; +} + +static ssize_t +ftrace_graph_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + unsigned char buffer[FTRACE_BUFF_MAX+1]; + unsigned long *array; + size_t read = 0; + ssize_t ret; + int index = 0; + char ch; + + if (!cnt || cnt < 0) + return 0; + + mutex_lock(&graph_lock); + + if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) { + ret = -EBUSY; + goto out; + } + + if (file->f_mode & FMODE_READ) { + struct seq_file *m = file->private_data; + array = m->private; + } else + array = file->private_data; + + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + + /* skip white space */ + while (cnt && isspace(ch)) { + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + } + + if (isspace(ch)) { + *ppos += read; + ret = read; + goto out; + } + + while (cnt && !isspace(ch)) { + if (index < FTRACE_BUFF_MAX) + buffer[index++] = ch; + else { + ret = -EINVAL; + goto out; + } + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + } + buffer[index] = 0; + + /* we allow only one at a time */ + ret = ftrace_set_func(array, ftrace_graph_count, buffer); + if (ret) + goto out; + + ftrace_graph_count++; + + file->f_pos += read; + + ret = read; + out: + mutex_unlock(&graph_lock); + + return ret; +} + +static const struct file_operations ftrace_graph_fops = { + .open = ftrace_graph_open, + .read = ftrace_graph_read, + .write = ftrace_graph_write, +}; +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) +{ + struct dentry *entry; entry = debugfs_create_file("available_filter_functions", 0444, d_tracer, NULL, &ftrace_avail_fops); @@ -1292,11 +1572,18 @@ static __init int ftrace_init_debugfs(void) pr_warning("Could not create debugfs " "'set_ftrace_notrace' entry\n"); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + entry = debugfs_create_file("set_graph_function", 0444, d_tracer, + NULL, + &ftrace_graph_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'set_graph_function' entry\n"); +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + return 0; } -fs_initcall(ftrace_init_debugfs); - static int ftrace_convert_nops(struct module *mod, unsigned long *start, unsigned long *end) @@ -1382,12 +1669,186 @@ static int __init ftrace_nodyn_init(void) } device_initcall(ftrace_nodyn_init); -# define ftrace_startup() do { } while (0) -# define ftrace_shutdown() do { } while (0) +static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } +static inline void ftrace_startup_enable(int command) { } +/* Keep as macros so we do not need to define the commands */ +# define ftrace_startup(command) do { } while (0) +# define ftrace_shutdown(command) do { } while (0) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ +static ssize_t +ftrace_pid_read(struct file *file, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + int r; + + if (ftrace_pid_trace == ftrace_swapper_pid) + r = sprintf(buf, "swapper tasks\n"); + else if (ftrace_pid_trace) + r = sprintf(buf, "%u\n", pid_nr(ftrace_pid_trace)); + else + r = sprintf(buf, "no pid\n"); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static void clear_ftrace_swapper(void) +{ + struct task_struct *p; + int cpu; + + get_online_cpus(); + for_each_online_cpu(cpu) { + p = idle_task(cpu); + clear_tsk_trace_trace(p); + } + put_online_cpus(); +} + +static void set_ftrace_swapper(void) +{ + struct task_struct *p; + int cpu; + + get_online_cpus(); + for_each_online_cpu(cpu) { + p = idle_task(cpu); + set_tsk_trace_trace(p); + } + put_online_cpus(); +} + +static void clear_ftrace_pid(struct pid *pid) +{ + struct task_struct *p; + + do_each_pid_task(pid, PIDTYPE_PID, p) { + clear_tsk_trace_trace(p); + } while_each_pid_task(pid, PIDTYPE_PID, p); + put_pid(pid); +} + +static void set_ftrace_pid(struct pid *pid) +{ + struct task_struct *p; + + do_each_pid_task(pid, PIDTYPE_PID, p) { + set_tsk_trace_trace(p); + } while_each_pid_task(pid, PIDTYPE_PID, p); +} + +static void clear_ftrace_pid_task(struct pid **pid) +{ + if (*pid == ftrace_swapper_pid) + clear_ftrace_swapper(); + else + clear_ftrace_pid(*pid); + + *pid = NULL; +} + +static void set_ftrace_pid_task(struct pid *pid) +{ + if (pid == ftrace_swapper_pid) + set_ftrace_swapper(); + else + set_ftrace_pid(pid); +} + +static ssize_t +ftrace_pid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct pid *pid; + char buf[64]; + long val; + int ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + ret = strict_strtol(buf, 10, &val); + if (ret < 0) + return ret; + + mutex_lock(&ftrace_start_lock); + if (val < 0) { + /* disable pid tracing */ + if (!ftrace_pid_trace) + goto out; + + clear_ftrace_pid_task(&ftrace_pid_trace); + + } else { + /* swapper task is special */ + if (!val) { + pid = ftrace_swapper_pid; + if (pid == ftrace_pid_trace) + goto out; + } else { + pid = find_get_pid(val); + + if (pid == ftrace_pid_trace) { + put_pid(pid); + goto out; + } + } + + if (ftrace_pid_trace) + clear_ftrace_pid_task(&ftrace_pid_trace); + + if (!pid) + goto out; + + ftrace_pid_trace = pid; + + set_ftrace_pid_task(ftrace_pid_trace); + } + + /* update the function call */ + ftrace_update_pid_func(); + ftrace_startup_enable(0); + + out: + mutex_unlock(&ftrace_start_lock); + + return cnt; +} + +static struct file_operations ftrace_pid_fops = { + .read = ftrace_pid_read, + .write = ftrace_pid_write, +}; + +static __init int ftrace_init_debugfs(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + if (!d_tracer) + return 0; + + ftrace_init_dyn_debugfs(d_tracer); + + entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer, + NULL, &ftrace_pid_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'set_ftrace_pid' entry\n"); + return 0; +} + +fs_initcall(ftrace_init_debugfs); + /** * ftrace_kill - kill ftrace * @@ -1422,15 +1883,9 @@ int register_ftrace_function(struct ftrace_ops *ops) mutex_lock(&ftrace_sysctl_lock); - if (ftrace_tracing_type == FTRACE_TYPE_RETURN) { - ret = -EBUSY; - goto out; - } - ret = __register_ftrace_function(ops); - ftrace_startup(); + ftrace_startup(0); -out: mutex_unlock(&ftrace_sysctl_lock); return ret; } @@ -1447,7 +1902,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) mutex_lock(&ftrace_sysctl_lock); ret = __unregister_ftrace_function(ops); - ftrace_shutdown(); + ftrace_shutdown(0); mutex_unlock(&ftrace_sysctl_lock); return ret; @@ -1496,14 +1951,19 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, return ret; } -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER -static atomic_t ftrace_retfunc_active; +static atomic_t ftrace_graph_active; -/* The callback that hooks the return of a function */ -trace_function_return_t ftrace_function_return = - (trace_function_return_t)ftrace_stub; +int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) +{ + return 0; +} +/* The callbacks that hook a function */ +trace_func_graph_ret_t ftrace_graph_return = + (trace_func_graph_ret_t)ftrace_stub; +trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) @@ -1534,8 +1994,11 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) } if (t->ret_stack == NULL) { - t->ret_stack = ret_stack_list[start++]; t->curr_ret_stack = -1; + /* Make sure IRQs see the -1 first: */ + barrier(); + t->ret_stack = ret_stack_list[start++]; + atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); } } while_each_thread(g, t); @@ -1549,7 +2012,7 @@ free: } /* Allocate a return stack for each task */ -static int start_return_tracing(void) +static int start_graph_tracing(void) { struct ftrace_ret_stack **ret_stack_list; int ret; @@ -1569,64 +2032,59 @@ static int start_return_tracing(void) return ret; } -int register_ftrace_return(trace_function_return_t func) +int register_ftrace_graph(trace_func_graph_ret_t retfunc, + trace_func_graph_ent_t entryfunc) { int ret = 0; mutex_lock(&ftrace_sysctl_lock); - /* - * Don't launch return tracing if normal function - * tracing is already running. - */ - if (ftrace_trace_function != ftrace_stub) { - ret = -EBUSY; - goto out; - } - atomic_inc(&ftrace_retfunc_active); - ret = start_return_tracing(); + atomic_inc(&ftrace_graph_active); + ret = start_graph_tracing(); if (ret) { - atomic_dec(&ftrace_retfunc_active); + atomic_dec(&ftrace_graph_active); goto out; } - ftrace_tracing_type = FTRACE_TYPE_RETURN; - ftrace_function_return = func; - ftrace_startup(); + + ftrace_graph_return = retfunc; + ftrace_graph_entry = entryfunc; + + ftrace_startup(FTRACE_START_FUNC_RET); out: mutex_unlock(&ftrace_sysctl_lock); return ret; } -void unregister_ftrace_return(void) +void unregister_ftrace_graph(void) { mutex_lock(&ftrace_sysctl_lock); - atomic_dec(&ftrace_retfunc_active); - ftrace_function_return = (trace_function_return_t)ftrace_stub; - ftrace_shutdown(); - /* Restore normal tracing type */ - ftrace_tracing_type = FTRACE_TYPE_ENTER; + atomic_dec(&ftrace_graph_active); + ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; + ftrace_graph_entry = ftrace_graph_entry_stub; + ftrace_shutdown(FTRACE_STOP_FUNC_RET); mutex_unlock(&ftrace_sysctl_lock); } /* Allocate a return stack for newly created task */ -void ftrace_retfunc_init_task(struct task_struct *t) +void ftrace_graph_init_task(struct task_struct *t) { - if (atomic_read(&ftrace_retfunc_active)) { + if (atomic_read(&ftrace_graph_active)) { t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH * sizeof(struct ftrace_ret_stack), GFP_KERNEL); if (!t->ret_stack) return; t->curr_ret_stack = -1; + atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); } else t->ret_stack = NULL; } -void ftrace_retfunc_exit_task(struct task_struct *t) +void ftrace_graph_exit_task(struct task_struct *t) { struct ftrace_ret_stack *ret_stack = t->ret_stack; @@ -1636,7 +2094,10 @@ void ftrace_retfunc_exit_task(struct task_struct *t) kfree(ret_stack); } -#endif - +void ftrace_graph_stop(void) +{ + ftrace_stop(); +} +#endif diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index e206951..7f69cfea 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -195,20 +195,24 @@ void *ring_buffer_event_data(struct ring_buffer_event *event) #define TS_MASK ((1ULL << TS_SHIFT) - 1) #define TS_DELTA_TEST (~TS_MASK) -/* - * This hack stolen from mm/slob.c. - * We can store per page timing information in the page frame of the page. - * Thanks to Peter Zijlstra for suggesting this idea. - */ -struct buffer_page { +struct buffer_data_page { u64 time_stamp; /* page time stamp */ - local_t write; /* index for next write */ local_t commit; /* write commited index */ + unsigned char data[]; /* data of buffer page */ +}; + +struct buffer_page { + local_t write; /* index for next write */ unsigned read; /* index for next read */ struct list_head list; /* list of free pages */ - void *page; /* Actual data page */ + struct buffer_data_page *page; /* Actual data page */ }; +static void rb_init_page(struct buffer_data_page *bpage) +{ + local_set(&bpage->commit, 0); +} + /* * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing * this issue out. @@ -230,7 +234,7 @@ static inline int test_time_stamp(u64 delta) return 0; } -#define BUF_PAGE_SIZE PAGE_SIZE +#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page)) /* * head_page == tail_page && head == tail then buffer is empty. @@ -294,19 +298,19 @@ struct ring_buffer_iter { static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *head = &cpu_buffer->pages; - struct buffer_page *page, *tmp; + struct buffer_page *bpage, *tmp; if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) return -1; if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) return -1; - list_for_each_entry_safe(page, tmp, head, list) { + list_for_each_entry_safe(bpage, tmp, head, list) { if (RB_WARN_ON(cpu_buffer, - page->list.next->prev != &page->list)) + bpage->list.next->prev != &bpage->list)) return -1; if (RB_WARN_ON(cpu_buffer, - page->list.prev->next != &page->list)) + bpage->list.prev->next != &bpage->list)) return -1; } @@ -317,22 +321,23 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) { struct list_head *head = &cpu_buffer->pages; - struct buffer_page *page, *tmp; + struct buffer_page *bpage, *tmp; unsigned long addr; LIST_HEAD(pages); unsigned i; for (i = 0; i < nr_pages; i++) { - page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), + bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); - if (!page) + if (!bpage) goto free_pages; - list_add(&page->list, &pages); + list_add(&bpage->list, &pages); addr = __get_free_page(GFP_KERNEL); if (!addr) goto free_pages; - page->page = (void *)addr; + bpage->page = (void *)addr; + rb_init_page(bpage->page); } list_splice(&pages, head); @@ -342,9 +347,9 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, return 0; free_pages: - list_for_each_entry_safe(page, tmp, &pages, list) { - list_del_init(&page->list); - free_buffer_page(page); + list_for_each_entry_safe(bpage, tmp, &pages, list) { + list_del_init(&bpage->list); + free_buffer_page(bpage); } return -ENOMEM; } @@ -353,7 +358,7 @@ static struct ring_buffer_per_cpu * rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; - struct buffer_page *page; + struct buffer_page *bpage; unsigned long addr; int ret; @@ -368,16 +373,17 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; INIT_LIST_HEAD(&cpu_buffer->pages); - page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), + bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu)); - if (!page) + if (!bpage) goto fail_free_buffer; - cpu_buffer->reader_page = page; + cpu_buffer->reader_page = bpage; addr = __get_free_page(GFP_KERNEL); if (!addr) goto fail_free_reader; - page->page = (void *)addr; + bpage->page = (void *)addr; + rb_init_page(bpage->page); INIT_LIST_HEAD(&cpu_buffer->reader_page->list); @@ -402,14 +408,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *head = &cpu_buffer->pages; - struct buffer_page *page, *tmp; + struct buffer_page *bpage, *tmp; list_del_init(&cpu_buffer->reader_page->list); free_buffer_page(cpu_buffer->reader_page); - list_for_each_entry_safe(page, tmp, head, list) { - list_del_init(&page->list); - free_buffer_page(page); + list_for_each_entry_safe(bpage, tmp, head, list) { + list_del_init(&bpage->list); + free_buffer_page(bpage); } kfree(cpu_buffer); } @@ -506,7 +512,7 @@ static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); static void rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) { - struct buffer_page *page; + struct buffer_page *bpage; struct list_head *p; unsigned i; @@ -517,9 +523,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) return; p = cpu_buffer->pages.next; - page = list_entry(p, struct buffer_page, list); - list_del_init(&page->list); - free_buffer_page(page); + bpage = list_entry(p, struct buffer_page, list); + list_del_init(&bpage->list); + free_buffer_page(bpage); } if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) return; @@ -536,7 +542,7 @@ static void rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, struct list_head *pages, unsigned nr_pages) { - struct buffer_page *page; + struct buffer_page *bpage; struct list_head *p; unsigned i; @@ -547,9 +553,9 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, if (RB_WARN_ON(cpu_buffer, list_empty(pages))) return; p = pages->next; - page = list_entry(p, struct buffer_page, list); - list_del_init(&page->list); - list_add_tail(&page->list, &cpu_buffer->pages); + bpage = list_entry(p, struct buffer_page, list); + list_del_init(&bpage->list); + list_add_tail(&bpage->list, &cpu_buffer->pages); } rb_reset_cpu(cpu_buffer); @@ -576,7 +582,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) { struct ring_buffer_per_cpu *cpu_buffer; unsigned nr_pages, rm_pages, new_pages; - struct buffer_page *page, *tmp; + struct buffer_page *bpage, *tmp; unsigned long buffer_size; unsigned long addr; LIST_HEAD(pages); @@ -637,16 +643,17 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) for_each_buffer_cpu(buffer, cpu) { for (i = 0; i < new_pages; i++) { - page = kzalloc_node(ALIGN(sizeof(*page), + bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu)); - if (!page) + if (!bpage) goto free_pages; - list_add(&page->list, &pages); + list_add(&bpage->list, &pages); addr = __get_free_page(GFP_KERNEL); if (!addr) goto free_pages; - page->page = (void *)addr; + bpage->page = (void *)addr; + rb_init_page(bpage->page); } } @@ -667,9 +674,9 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) return size; free_pages: - list_for_each_entry_safe(page, tmp, &pages, list) { - list_del_init(&page->list); - free_buffer_page(page); + list_for_each_entry_safe(bpage, tmp, &pages, list) { + list_del_init(&bpage->list); + free_buffer_page(bpage); } mutex_unlock(&buffer->mutex); return -ENOMEM; @@ -680,9 +687,15 @@ static inline int rb_null_event(struct ring_buffer_event *event) return event->type == RINGBUF_TYPE_PADDING; } -static inline void *__rb_page_index(struct buffer_page *page, unsigned index) +static inline void * +__rb_data_page_index(struct buffer_data_page *bpage, unsigned index) { - return page->page + index; + return bpage->data + index; +} + +static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) +{ + return bpage->page->data + index; } static inline struct ring_buffer_event * @@ -712,7 +725,7 @@ static inline unsigned rb_page_write(struct buffer_page *bpage) static inline unsigned rb_page_commit(struct buffer_page *bpage) { - return local_read(&bpage->commit); + return local_read(&bpage->page->commit); } /* Size is determined by what has been commited */ @@ -758,14 +771,14 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer) } static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, - struct buffer_page **page) + struct buffer_page **bpage) { - struct list_head *p = (*page)->list.next; + struct list_head *p = (*bpage)->list.next; if (p == &cpu_buffer->pages) p = p->next; - *page = list_entry(p, struct buffer_page, list); + *bpage = list_entry(p, struct buffer_page, list); } static inline unsigned @@ -804,14 +817,15 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, if (RB_WARN_ON(cpu_buffer, cpu_buffer->commit_page == cpu_buffer->tail_page)) return; - cpu_buffer->commit_page->commit = + cpu_buffer->commit_page->page->commit = cpu_buffer->commit_page->write; rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); - cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; + cpu_buffer->write_stamp = + cpu_buffer->commit_page->page->time_stamp; } /* Now set the commit to the event's index */ - local_set(&cpu_buffer->commit_page->commit, index); + local_set(&cpu_buffer->commit_page->page->commit, index); } static inline void @@ -826,16 +840,17 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) * assign the commit to the tail. */ while (cpu_buffer->commit_page != cpu_buffer->tail_page) { - cpu_buffer->commit_page->commit = + cpu_buffer->commit_page->page->commit = cpu_buffer->commit_page->write; rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); - cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; + cpu_buffer->write_stamp = + cpu_buffer->commit_page->page->time_stamp; /* add barrier to keep gcc from optimizing too much */ barrier(); } while (rb_commit_index(cpu_buffer) != rb_page_write(cpu_buffer->commit_page)) { - cpu_buffer->commit_page->commit = + cpu_buffer->commit_page->page->commit = cpu_buffer->commit_page->write; barrier(); } @@ -843,7 +858,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) { - cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp; + cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp; cpu_buffer->reader_page->read = 0; } @@ -862,7 +877,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter) else rb_inc_page(cpu_buffer, &iter->head_page); - iter->read_stamp = iter->head_page->time_stamp; + iter->read_stamp = iter->head_page->page->time_stamp; iter->head = 0; } @@ -998,12 +1013,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, */ if (tail_page == cpu_buffer->tail_page) { local_set(&next_page->write, 0); - local_set(&next_page->commit, 0); + local_set(&next_page->page->commit, 0); cpu_buffer->tail_page = next_page; /* reread the time stamp */ *ts = ring_buffer_time_stamp(cpu_buffer->cpu); - cpu_buffer->tail_page->time_stamp = *ts; + cpu_buffer->tail_page->page->time_stamp = *ts; } /* @@ -1048,7 +1063,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, * this page's time stamp. */ if (!tail && rb_is_commit(cpu_buffer, event)) - cpu_buffer->commit_page->time_stamp = *ts; + cpu_buffer->commit_page->page->time_stamp = *ts; return event; @@ -1099,7 +1114,7 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, event->time_delta = *delta & TS_MASK; event->array[0] = *delta >> TS_SHIFT; } else { - cpu_buffer->commit_page->time_stamp = *ts; + cpu_buffer->commit_page->page->time_stamp = *ts; event->time_delta = 0; event->array[0] = 0; } @@ -1552,7 +1567,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) if (iter->head) iter->read_stamp = cpu_buffer->read_stamp; else - iter->read_stamp = iter->head_page->time_stamp; + iter->read_stamp = iter->head_page->page->time_stamp; } /** @@ -1696,7 +1711,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page->list.prev = reader->list.prev; local_set(&cpu_buffer->reader_page->write, 0); - local_set(&cpu_buffer->reader_page->commit, 0); + local_set(&cpu_buffer->reader_page->page->commit, 0); /* Make the reader page now replace the head */ reader->list.prev->next = &cpu_buffer->reader_page->list; @@ -2088,7 +2103,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->head_page = list_entry(cpu_buffer->pages.next, struct buffer_page, list); local_set(&cpu_buffer->head_page->write, 0); - local_set(&cpu_buffer->head_page->commit, 0); + local_set(&cpu_buffer->head_page->page->commit, 0); cpu_buffer->head_page->read = 0; @@ -2097,7 +2112,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) INIT_LIST_HEAD(&cpu_buffer->reader_page->list); local_set(&cpu_buffer->reader_page->write, 0); - local_set(&cpu_buffer->reader_page->commit, 0); + local_set(&cpu_buffer->reader_page->page->commit, 0); cpu_buffer->reader_page->read = 0; cpu_buffer->overrun = 0; @@ -2223,6 +2238,166 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, return 0; } +static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, + struct buffer_data_page *bpage) +{ + struct ring_buffer_event *event; + unsigned long head; + + __raw_spin_lock(&cpu_buffer->lock); + for (head = 0; head < local_read(&bpage->commit); + head += rb_event_length(event)) { + + event = __rb_data_page_index(bpage, head); + if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) + return; + /* Only count data entries */ + if (event->type != RINGBUF_TYPE_DATA) + continue; + cpu_buffer->entries--; + } + __raw_spin_unlock(&cpu_buffer->lock); +} + +/** + * ring_buffer_alloc_read_page - allocate a page to read from buffer + * @buffer: the buffer to allocate for. + * + * This function is used in conjunction with ring_buffer_read_page. + * When reading a full page from the ring buffer, these functions + * can be used to speed up the process. The calling function should + * allocate a few pages first with this function. Then when it + * needs to get pages from the ring buffer, it passes the result + * of this function into ring_buffer_read_page, which will swap + * the page that was allocated, with the read page of the buffer. + * + * Returns: + * The page allocated, or NULL on error. + */ +void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) +{ + unsigned long addr; + struct buffer_data_page *bpage; + + addr = __get_free_page(GFP_KERNEL); + if (!addr) + return NULL; + + bpage = (void *)addr; + + return bpage; +} + +/** + * ring_buffer_free_read_page - free an allocated read page + * @buffer: the buffer the page was allocate for + * @data: the page to free + * + * Free a page allocated from ring_buffer_alloc_read_page. + */ +void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) +{ + free_page((unsigned long)data); +} + +/** + * ring_buffer_read_page - extract a page from the ring buffer + * @buffer: buffer to extract from + * @data_page: the page to use allocated from ring_buffer_alloc_read_page + * @cpu: the cpu of the buffer to extract + * @full: should the extraction only happen when the page is full. + * + * This function will pull out a page from the ring buffer and consume it. + * @data_page must be the address of the variable that was returned + * from ring_buffer_alloc_read_page. This is because the page might be used + * to swap with a page in the ring buffer. + * + * for example: + * rpage = ring_buffer_alloc_page(buffer); + * if (!rpage) + * return error; + * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0); + * if (ret) + * process_page(rpage); + * + * When @full is set, the function will not return true unless + * the writer is off the reader page. + * + * Note: it is up to the calling functions to handle sleeps and wakeups. + * The ring buffer can be used anywhere in the kernel and can not + * blindly call wake_up. The layer that uses the ring buffer must be + * responsible for that. + * + * Returns: + * 1 if data has been transferred + * 0 if no data has been transferred. + */ +int ring_buffer_read_page(struct ring_buffer *buffer, + void **data_page, int cpu, int full) +{ + struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; + struct ring_buffer_event *event; + struct buffer_data_page *bpage; + unsigned long flags; + int ret = 0; + + if (!data_page) + return 0; + + bpage = *data_page; + if (!bpage) + return 0; + + spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + + /* + * rb_buffer_peek will get the next ring buffer if + * the current reader page is empty. + */ + event = rb_buffer_peek(buffer, cpu, NULL); + if (!event) + goto out; + + /* check for data */ + if (!local_read(&cpu_buffer->reader_page->page->commit)) + goto out; + /* + * If the writer is already off of the read page, then simply + * switch the read page with the given page. Otherwise + * we need to copy the data from the reader to the writer. + */ + if (cpu_buffer->reader_page == cpu_buffer->commit_page) { + unsigned int read = cpu_buffer->reader_page->read; + + if (full) + goto out; + /* The writer is still on the reader page, we must copy */ + bpage = cpu_buffer->reader_page->page; + memcpy(bpage->data, + cpu_buffer->reader_page->page->data + read, + local_read(&bpage->commit) - read); + + /* consume what was read */ + cpu_buffer->reader_page += read; + + } else { + /* swap the pages */ + rb_init_page(bpage); + bpage = cpu_buffer->reader_page->page; + cpu_buffer->reader_page->page = *data_page; + cpu_buffer->reader_page->read = 0; + *data_page = bpage; + } + ret = 1; + + /* update the entry counter */ + rb_remove_entries(cpu_buffer, bpage); + out: + spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + + return ret; +} + static ssize_t rb_simple_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a45b59e..8ebe007 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -44,6 +44,15 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; +/* + * We need to change this state when a selftest is running. + * A selftest will lurk into the ring-buffer to count the + * entries inserted during the selftest although some concurrent + * insertions into the ring-buffer such as ftrace_printk could occurred + * at the same time, giving false positive or negative results. + */ +static bool __read_mostly tracing_selftest_running; + /* For tracers that don't implement custom flags */ static struct tracer_opt dummy_tracer_opt[] = { { } @@ -566,6 +575,8 @@ int register_tracer(struct tracer *type) unlock_kernel(); mutex_lock(&trace_types_lock); + tracing_selftest_running = true; + for (t = trace_types; t; t = t->next) { if (strcmp(type->name, t->name) == 0) { /* already found */ @@ -589,6 +600,7 @@ int register_tracer(struct tracer *type) struct tracer *saved_tracer = current_trace; struct trace_array *tr = &global_trace; int i; + /* * Run a selftest on this tracer. * Here we reset the trace buffer, and set the current @@ -624,6 +636,7 @@ int register_tracer(struct tracer *type) max_tracer_type_len = len; out: + tracing_selftest_running = false; mutex_unlock(&trace_types_lock); lock_kernel(); @@ -804,7 +817,7 @@ static void trace_save_cmdline(struct task_struct *tsk) spin_unlock(&trace_cmdline_lock); } -static char *trace_find_cmdline(int pid) +char *trace_find_cmdline(int pid) { char *cmdline = "<...>"; unsigned map; @@ -878,15 +891,39 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, ring_buffer_unlock_commit(tr->buffer, event, irq_flags); } -#ifdef CONFIG_FUNCTION_RET_TRACER -static void __trace_function_return(struct trace_array *tr, +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static void __trace_graph_entry(struct trace_array *tr, + struct trace_array_cpu *data, + struct ftrace_graph_ent *trace, + unsigned long flags, + int pc) +{ + struct ring_buffer_event *event; + struct ftrace_graph_ent_entry *entry; + unsigned long irq_flags; + + if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) + return; + + event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_GRAPH_ENT; + entry->graph_ent = *trace; + ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); +} + +static void __trace_graph_return(struct trace_array *tr, struct trace_array_cpu *data, - struct ftrace_retfunc *trace, + struct ftrace_graph_ret *trace, unsigned long flags, int pc) { struct ring_buffer_event *event; - struct ftrace_ret_entry *entry; + struct ftrace_graph_ret_entry *entry; unsigned long irq_flags; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) @@ -898,12 +935,8 @@ static void __trace_function_return(struct trace_array *tr, return; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_FN_RET; - entry->ip = trace->func; - entry->parent_ip = trace->ret; - entry->rettime = trace->rettime; - entry->calltime = trace->calltime; - entry->overrun = trace->overrun; + entry->ent.type = TRACE_GRAPH_RET; + entry->ret = *trace; ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); } #endif @@ -963,6 +996,7 @@ static void ftrace_trace_userstack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, int pc) { +#ifdef CONFIG_STACKTRACE struct ring_buffer_event *event; struct userstack_entry *entry; struct stack_trace trace; @@ -988,6 +1022,7 @@ static void ftrace_trace_userstack(struct trace_array *tr, save_stack_trace_user(&trace); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); +#endif } void __trace_userstack(struct trace_array *tr, @@ -1177,8 +1212,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) local_irq_restore(flags); } -#ifdef CONFIG_FUNCTION_RET_TRACER -void trace_function_return(struct ftrace_retfunc *trace) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +int trace_graph_entry(struct ftrace_graph_ent *trace) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1187,18 +1222,52 @@ void trace_function_return(struct ftrace_retfunc *trace) int cpu; int pc; - raw_local_irq_save(flags); + if (!ftrace_trace_task(current)) + return 0; + + if (!ftrace_graph_addr(trace->func)) + return 0; + + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_function_return(tr, data, trace, flags, pc); + __trace_graph_entry(tr, data, trace, flags, pc); } + /* Only do the atomic if it is not already set */ + if (!test_tsk_trace_graph(current)) + set_tsk_trace_graph(current); atomic_dec(&data->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); + + return 1; } -#endif /* CONFIG_FUNCTION_RET_TRACER */ + +void trace_graph_return(struct ftrace_graph_ret *trace) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + if (likely(disabled == 1)) { + pc = preempt_count(); + __trace_graph_return(tr, data, trace, flags, pc); + } + if (!trace->depth) + clear_tsk_trace_graph(current); + atomic_dec(&data->disabled); + local_irq_restore(flags); +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ static struct ftrace_ops trace_ops __read_mostly = { @@ -2000,9 +2069,11 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) trace_seq_print_cont(s, iter); break; } - case TRACE_FN_RET: { - return print_return_function(iter); - break; + case TRACE_GRAPH_RET: { + return print_graph_function(iter); + } + case TRACE_GRAPH_ENT: { + return print_graph_function(iter); } case TRACE_BRANCH: { struct trace_branch *field; @@ -2298,7 +2369,9 @@ static int s_show(struct seq_file *m, void *v) seq_printf(m, "# tracer: %s\n", iter->trace->name); seq_puts(m, "#\n"); } - if (iter->iter_flags & TRACE_FILE_LAT_FMT) { + if (iter->trace && iter->trace->print_header) + iter->trace->print_header(m); + else if (iter->iter_flags & TRACE_FILE_LAT_FMT) { /* print nothing if the buffers are empty */ if (trace_empty(iter)) return 0; @@ -2350,6 +2423,10 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) iter->trace = current_trace; iter->pos = -1; + /* Notify the tracer early; before we stop tracing. */ + if (iter->trace && iter->trace->open) + iter->trace->open(iter); + /* Annotate start of buffers if we had overruns */ if (ring_buffer_overruns(iter->tr->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; @@ -2375,9 +2452,6 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) /* stop the trace while dumping */ tracing_stop(); - if (iter->trace && iter->trace->open) - iter->trace->open(iter); - mutex_unlock(&trace_types_lock); out: @@ -2597,7 +2671,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, if (err) goto err_unlock; - raw_local_irq_disable(); + local_irq_disable(); __raw_spin_lock(&ftrace_max_lock); for_each_tracing_cpu(cpu) { /* @@ -2614,7 +2688,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, } } __raw_spin_unlock(&ftrace_max_lock); - raw_local_irq_enable(); + local_irq_enable(); tracing_cpumask = tracing_cpumask_new; @@ -3285,7 +3359,7 @@ static int mark_printk(const char *fmt, ...) int ret; va_list args; va_start(args, fmt); - ret = trace_vprintk(0, fmt, args); + ret = trace_vprintk(0, -1, fmt, args); va_end(args); return ret; } @@ -3514,7 +3588,7 @@ static __init int tracer_init_debugfs(void) return 0; } -int trace_vprintk(unsigned long ip, const char *fmt, va_list args) +int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) { static DEFINE_SPINLOCK(trace_buf_lock); static char trace_buf[TRACE_BUF_SIZE]; @@ -3522,11 +3596,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) struct ring_buffer_event *event; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; - struct print_entry *entry; - unsigned long flags, irq_flags; int cpu, len = 0, size, pc; + struct print_entry *entry; + unsigned long irq_flags; - if (tracing_disabled) + if (tracing_disabled || tracing_selftest_running) return 0; pc = preempt_count(); @@ -3537,7 +3611,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) if (unlikely(atomic_read(&data->disabled))) goto out; - spin_lock_irqsave(&trace_buf_lock, flags); + pause_graph_tracing(); + spin_lock_irqsave(&trace_buf_lock, irq_flags); len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); len = min(len, TRACE_BUF_SIZE-1); @@ -3548,17 +3623,18 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) if (!event) goto out_unlock; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); + tracing_generic_entry_update(&entry->ent, irq_flags, pc); entry->ent.type = TRACE_PRINT; entry->ip = ip; + entry->depth = depth; memcpy(&entry->buf, trace_buf, len); entry->buf[len] = 0; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); out_unlock: - spin_unlock_irqrestore(&trace_buf_lock, flags); - + spin_unlock_irqrestore(&trace_buf_lock, irq_flags); + unpause_graph_tracing(); out: preempt_enable_notrace(); @@ -3575,7 +3651,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) return 0; va_start(ap, fmt); - ret = trace_vprintk(ip, fmt, ap); + ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); va_end(ap); return ret; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 28c15c2..5ac6970 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -25,8 +25,11 @@ enum trace_type { TRACE_BRANCH, TRACE_BOOT_CALL, TRACE_BOOT_RET, - TRACE_FN_RET, + TRACE_GRAPH_RET, + TRACE_GRAPH_ENT, TRACE_USER_STACK, + TRACE_BTS, + TRACE_POWER, __TRACE_LAST_TYPE }; @@ -55,14 +58,16 @@ struct ftrace_entry { unsigned long parent_ip; }; +/* Function call entry */ +struct ftrace_graph_ent_entry { + struct trace_entry ent; + struct ftrace_graph_ent graph_ent; +}; + /* Function return entry */ -struct ftrace_ret_entry { - struct trace_entry ent; - unsigned long ip; - unsigned long parent_ip; - unsigned long long calltime; - unsigned long long rettime; - unsigned long overrun; +struct ftrace_graph_ret_entry { + struct trace_entry ent; + struct ftrace_graph_ret ret; }; extern struct tracer boot_tracer; @@ -112,6 +117,7 @@ struct userstack_entry { struct print_entry { struct trace_entry ent; unsigned long ip; + int depth; char buf[]; }; @@ -153,6 +159,17 @@ struct trace_branch { char correct; }; +struct bts_entry { + struct trace_entry ent; + unsigned long from; + unsigned long to; +}; + +struct trace_power { + struct trace_entry ent; + struct power_trace state_data; +}; + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -257,7 +274,12 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\ IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\ IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ - IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\ + IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ + TRACE_GRAPH_ENT); \ + IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ + TRACE_GRAPH_RET); \ + IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\ + IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ __ftrace_bad_type(); \ } while (0) @@ -311,6 +333,7 @@ struct tracer { int (*selftest)(struct tracer *trace, struct trace_array *tr); #endif + void (*print_header)(struct seq_file *m); enum print_line_t (*print_line)(struct trace_iterator *iter); /* If you handled the flag setting, return 0 */ int (*set_flag)(u32 old_flags, u32 bit, int set); @@ -388,8 +411,12 @@ void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); -void -trace_function_return(struct ftrace_retfunc *trace); + +void trace_graph_return(struct ftrace_graph_ret *trace); +int trace_graph_entry(struct ftrace_graph_ent *trace); +void trace_bts(struct trace_array *tr, + unsigned long from, + unsigned long to); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); @@ -431,6 +458,7 @@ struct tracer_switch_ops { struct tracer_switch_ops *next; }; +char *trace_find_cmdline(int pid); #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE @@ -471,20 +499,63 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt); extern long ns2usecs(cycle_t nsec); -extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args); +extern int +trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); extern unsigned long trace_flags; /* Standard output formatting function used for function return traces */ -#ifdef CONFIG_FUNCTION_RET_TRACER -extern enum print_line_t print_return_function(struct trace_iterator *iter); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +extern enum print_line_t print_graph_function(struct trace_iterator *iter); + +#ifdef CONFIG_DYNAMIC_FTRACE +/* TODO: make this variable */ +#define FTRACE_GRAPH_MAX_FUNCS 32 +extern int ftrace_graph_count; +extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; + +static inline int ftrace_graph_addr(unsigned long addr) +{ + int i; + + if (!ftrace_graph_count || test_tsk_trace_graph(current)) + return 1; + + for (i = 0; i < ftrace_graph_count; i++) { + if (addr == ftrace_graph_funcs[i]) + return 1; + } + + return 0; +} #else +static inline int ftrace_trace_addr(unsigned long addr) +{ + return 1; +} +static inline int ftrace_graph_addr(unsigned long addr) +{ + return 1; +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t -print_return_function(struct trace_iterator *iter) +print_graph_function(struct trace_iterator *iter) { return TRACE_TYPE_UNHANDLED; } -#endif +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +extern struct pid *ftrace_pid_trace; + +static inline int ftrace_trace_task(struct task_struct *task) +{ + if (!ftrace_pid_trace) + return 1; + + return test_tsk_trace_trace(task); +} /* * trace_iterator_flags is an enumeration that defines bit diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 877ee88..6c00feb 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -6,6 +6,7 @@ #include <linux/kallsyms.h> #include <linux/seq_file.h> #include <linux/spinlock.h> +#include <linux/irqflags.h> #include <linux/debugfs.h> #include <linux/uaccess.h> #include <linux/module.h> @@ -41,7 +42,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) if (unlikely(!tr)) return; - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) goto out; @@ -73,7 +74,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) out: atomic_dec(&tr->data[cpu]->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); } static inline diff --git a/kernel/trace/trace_bts.c b/kernel/trace/trace_bts.c new file mode 100644 index 0000000..23b76e4 --- /dev/null +++ b/kernel/trace/trace_bts.c @@ -0,0 +1,276 @@ +/* + * BTS tracer + * + * Copyright (C) 2008 Markus Metzger <markus.t.metzger@gmail.com> + * + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/debugfs.h> +#include <linux/ftrace.h> +#include <linux/kallsyms.h> + +#include <asm/ds.h> + +#include "trace.h" + + +#define SIZEOF_BTS (1 << 13) + +static DEFINE_PER_CPU(struct bts_tracer *, tracer); +static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); + +#define this_tracer per_cpu(tracer, smp_processor_id()) +#define this_buffer per_cpu(buffer, smp_processor_id()) + + +/* + * Information to interpret a BTS record. + * This will go into an in-kernel BTS interface. + */ +static unsigned char sizeof_field; +static unsigned long debugctl_mask; + +#define sizeof_bts (3 * sizeof_field) + +static void bts_trace_cpuinit(struct cpuinfo_x86 *c) +{ + switch (c->x86) { + case 0x6: + switch (c->x86_model) { + case 0x0 ... 0xC: + break; + case 0xD: + case 0xE: /* Pentium M */ + sizeof_field = sizeof(long); + debugctl_mask = (1<<6)|(1<<7); + break; + default: + sizeof_field = 8; + debugctl_mask = (1<<6)|(1<<7); + break; + } + break; + case 0xF: + switch (c->x86_model) { + case 0x0: + case 0x1: + case 0x2: /* Netburst */ + sizeof_field = sizeof(long); + debugctl_mask = (1<<2)|(1<<3); + break; + default: + /* sorry, don't know about them */ + break; + } + break; + default: + /* sorry, don't know about them */ + break; + } +} + +static inline void bts_enable(void) +{ + unsigned long debugctl; + + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl | debugctl_mask); +} + +static inline void bts_disable(void) +{ + unsigned long debugctl; + + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl & ~debugctl_mask); +} + +static void bts_trace_reset(struct trace_array *tr) +{ + int cpu; + + tr->time_start = ftrace_now(tr->cpu); + + for_each_online_cpu(cpu) + tracing_reset(tr, cpu); +} + +static void bts_trace_start_cpu(void *arg) +{ + this_tracer = + ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS, + /* ovfl = */ NULL, /* th = */ (size_t)-1); + if (IS_ERR(this_tracer)) { + this_tracer = NULL; + return; + } + + bts_enable(); +} + +static void bts_trace_start(struct trace_array *tr) +{ + int cpu; + + bts_trace_reset(tr); + + for_each_cpu_mask(cpu, cpu_possible_map) + smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1); +} + +static void bts_trace_stop_cpu(void *arg) +{ + if (this_tracer) { + bts_disable(); + + ds_release_bts(this_tracer); + this_tracer = NULL; + } +} + +static void bts_trace_stop(struct trace_array *tr) +{ + int cpu; + + for_each_cpu_mask(cpu, cpu_possible_map) + smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); +} + +static int bts_trace_init(struct trace_array *tr) +{ + bts_trace_cpuinit(&boot_cpu_data); + bts_trace_reset(tr); + bts_trace_start(tr); + + return 0; +} + +static void bts_trace_print_header(struct seq_file *m) +{ +#ifdef __i386__ + seq_puts(m, "# CPU# FROM TO FUNCTION\n"); + seq_puts(m, "# | | | |\n"); +#else + seq_puts(m, + "# CPU# FROM TO FUNCTION\n"); + seq_puts(m, + "# | | | |\n"); +#endif +} + +static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + struct trace_seq *seq = &iter->seq; + struct bts_entry *it; + + trace_assign_type(it, entry); + + if (entry->type == TRACE_BTS) { + int ret; +#ifdef CONFIG_KALLSYMS + char function[KSYM_SYMBOL_LEN]; + sprint_symbol(function, it->from); +#else + char *function = "<unknown>"; +#endif + + ret = trace_seq_printf(seq, "%4d 0x%lx -> 0x%lx [%s]\n", + entry->cpu, it->from, it->to, function); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE;; + return TRACE_TYPE_HANDLED; + } + return TRACE_TYPE_UNHANDLED; +} + +void trace_bts(struct trace_array *tr, unsigned long from, unsigned long to) +{ + struct ring_buffer_event *event; + struct bts_entry *entry; + unsigned long irq; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, from); + entry->ent.type = TRACE_BTS; + entry->ent.cpu = smp_processor_id(); + entry->from = from; + entry->to = to; + ring_buffer_unlock_commit(tr->buffer, event, irq); +} + +static void trace_bts_at(struct trace_array *tr, size_t index) +{ + const void *raw = NULL; + unsigned long from, to; + int err; + + err = ds_access_bts(this_tracer, index, &raw); + if (err < 0) + return; + + from = *(const unsigned long *)raw; + to = *(const unsigned long *)((const char *)raw + sizeof_field); + + trace_bts(tr, from, to); +} + +static void trace_bts_cpu(void *arg) +{ + struct trace_array *tr = (struct trace_array *) arg; + size_t index = 0, end = 0, i; + int err; + + if (!this_tracer) + return; + + bts_disable(); + + err = ds_get_bts_index(this_tracer, &index); + if (err < 0) + goto out; + + err = ds_get_bts_end(this_tracer, &end); + if (err < 0) + goto out; + + for (i = index; i < end; i++) + trace_bts_at(tr, i); + + for (i = 0; i < index; i++) + trace_bts_at(tr, i); + +out: + bts_enable(); +} + +static void trace_bts_prepare(struct trace_iterator *iter) +{ + int cpu; + + for_each_cpu_mask(cpu, cpu_possible_map) + smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1); +} + +struct tracer bts_tracer __read_mostly = +{ + .name = "bts", + .init = bts_trace_init, + .reset = bts_trace_stop, + .print_header = bts_trace_print_header, + .print_line = bts_trace_print_line, + .start = bts_trace_start, + .stop = bts_trace_stop, + .open = trace_bts_prepare +}; + +__init static int init_bts_trace(void) +{ + return register_tracer(&bts_tracer); +} +device_initcall(init_bts_trace); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c new file mode 100644 index 0000000..af60eef --- /dev/null +++ b/kernel/trace/trace_functions_graph.c @@ -0,0 +1,611 @@ +/* + * + * Function graph tracer. + * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com> + * Mostly borrowed from function tracer which + * is Copyright (c) Steven Rostedt <srostedt@redhat.com> + * + */ +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/ftrace.h> +#include <linux/fs.h> + +#include "trace.h" + +#define TRACE_GRAPH_INDENT 2 + +/* Flag options */ +#define TRACE_GRAPH_PRINT_OVERRUN 0x1 +#define TRACE_GRAPH_PRINT_CPU 0x2 +#define TRACE_GRAPH_PRINT_OVERHEAD 0x4 +#define TRACE_GRAPH_PRINT_PROC 0x8 + +static struct tracer_opt trace_opts[] = { + /* Display overruns ? */ + { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) }, + /* Display CPU ? */ + { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) }, + /* Display Overhead ? */ + { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) }, + /* Display proc name/pid */ + { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) }, + { } /* Empty entry */ +}; + +static struct tracer_flags tracer_flags = { + /* Don't display overruns and proc by default */ + .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD, + .opts = trace_opts +}; + +/* pid on the last trace processed */ +static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 }; + +static int graph_trace_init(struct trace_array *tr) +{ + int cpu, ret; + + for_each_online_cpu(cpu) + tracing_reset(tr, cpu); + + ret = register_ftrace_graph(&trace_graph_return, + &trace_graph_entry); + if (ret) + return ret; + tracing_start_cmdline_record(); + + return 0; +} + +static void graph_trace_reset(struct trace_array *tr) +{ + tracing_stop_cmdline_record(); + unregister_ftrace_graph(); +} + +static inline int log10_cpu(int nb) +{ + if (nb / 100) + return 3; + if (nb / 10) + return 2; + return 1; +} + +static enum print_line_t +print_graph_cpu(struct trace_seq *s, int cpu) +{ + int i; + int ret; + int log10_this = log10_cpu(cpu); + int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map)); + + + /* + * Start with a space character - to make it stand out + * to the right a bit when trace output is pasted into + * email: + */ + ret = trace_seq_printf(s, " "); + + /* + * Tricky - we space the CPU field according to the max + * number of online CPUs. On a 2-cpu system it would take + * a maximum of 1 digit - on a 128 cpu system it would + * take up to 3 digits: + */ + for (i = 0; i < log10_all - log10_this; i++) { + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + ret = trace_seq_printf(s, "%d) ", cpu); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +#define TRACE_GRAPH_PROCINFO_LENGTH 14 + +static enum print_line_t +print_graph_proc(struct trace_seq *s, pid_t pid) +{ + int i; + int ret; + int len; + char comm[8]; + int spaces = 0; + /* sign + log10(MAX_INT) + '\0' */ + char pid_str[11]; + + strncpy(comm, trace_find_cmdline(pid), 7); + comm[7] = '\0'; + sprintf(pid_str, "%d", pid); + + /* 1 stands for the "-" character */ + len = strlen(comm) + strlen(pid_str) + 1; + + if (len < TRACE_GRAPH_PROCINFO_LENGTH) + spaces = TRACE_GRAPH_PROCINFO_LENGTH - len; + + /* First spaces to align center */ + for (i = 0; i < spaces / 2; i++) { + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + ret = trace_seq_printf(s, "%s-%s", comm, pid_str); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Last spaces to align center */ + for (i = 0; i < spaces - (spaces / 2); i++) { + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + return TRACE_TYPE_HANDLED; +} + + +/* If the pid changed since the last trace, output this event */ +static enum print_line_t +verif_pid(struct trace_seq *s, pid_t pid, int cpu) +{ + pid_t prev_pid; + int ret; + + if (last_pid[cpu] != -1 && last_pid[cpu] == pid) + return TRACE_TYPE_HANDLED; + + prev_pid = last_pid[cpu]; + last_pid[cpu] = pid; + +/* + * Context-switch trace line: + + ------------------------------------------ + | 1) migration/0--1 => sshd-1755 + ------------------------------------------ + + */ + ret = trace_seq_printf(s, + " ------------------------------------------\n"); + if (!ret) + TRACE_TYPE_PARTIAL_LINE; + + ret = print_graph_cpu(s, cpu); + if (ret == TRACE_TYPE_PARTIAL_LINE) + TRACE_TYPE_PARTIAL_LINE; + + ret = print_graph_proc(s, prev_pid); + if (ret == TRACE_TYPE_PARTIAL_LINE) + TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, " => "); + if (!ret) + TRACE_TYPE_PARTIAL_LINE; + + ret = print_graph_proc(s, pid); + if (ret == TRACE_TYPE_PARTIAL_LINE) + TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, + "\n ------------------------------------------\n\n"); + if (!ret) + TRACE_TYPE_PARTIAL_LINE; + + return ret; +} + +static bool +trace_branch_is_leaf(struct trace_iterator *iter, + struct ftrace_graph_ent_entry *curr) +{ + struct ring_buffer_iter *ring_iter; + struct ring_buffer_event *event; + struct ftrace_graph_ret_entry *next; + + ring_iter = iter->buffer_iter[iter->cpu]; + + if (!ring_iter) + return false; + + event = ring_buffer_iter_peek(ring_iter, NULL); + + if (!event) + return false; + + next = ring_buffer_event_data(event); + + if (next->ent.type != TRACE_GRAPH_RET) + return false; + + if (curr->ent.pid != next->ent.pid || + curr->graph_ent.func != next->ret.func) + return false; + + return true; +} + + +static enum print_line_t +print_graph_duration(unsigned long long duration, struct trace_seq *s) +{ + unsigned long nsecs_rem = do_div(duration, 1000); + /* log10(ULONG_MAX) + '\0' */ + char msecs_str[21]; + char nsecs_str[5]; + int ret, len; + int i; + + sprintf(msecs_str, "%lu", (unsigned long) duration); + + /* Print msecs */ + ret = trace_seq_printf(s, msecs_str); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + len = strlen(msecs_str); + + /* Print nsecs (we don't want to exceed 7 numbers) */ + if (len < 7) { + snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem); + ret = trace_seq_printf(s, ".%s", nsecs_str); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + len += strlen(nsecs_str); + } + + ret = trace_seq_printf(s, " us "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Print remaining spaces to fit the row's width */ + for (i = len; i < 7; i++) { + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + ret = trace_seq_printf(s, "| "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + return TRACE_TYPE_HANDLED; + +} + +/* Signal a overhead of time execution to the output */ +static int +print_graph_overhead(unsigned long long duration, struct trace_seq *s) +{ + /* Duration exceeded 100 msecs */ + if (duration > 100000ULL) + return trace_seq_printf(s, "! "); + + /* Duration exceeded 10 msecs */ + if (duration > 10000ULL) + return trace_seq_printf(s, "+ "); + + return trace_seq_printf(s, " "); +} + +/* Case of a leaf function on its call entry */ +static enum print_line_t +print_graph_entry_leaf(struct trace_iterator *iter, + struct ftrace_graph_ent_entry *entry, struct trace_seq *s) +{ + struct ftrace_graph_ret_entry *ret_entry; + struct ftrace_graph_ret *graph_ret; + struct ring_buffer_event *event; + struct ftrace_graph_ent *call; + unsigned long long duration; + int ret; + int i; + + event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); + ret_entry = ring_buffer_event_data(event); + graph_ret = &ret_entry->ret; + call = &entry->graph_ent; + duration = graph_ret->rettime - graph_ret->calltime; + + /* Overhead */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { + ret = print_graph_overhead(duration, s); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + /* Duration */ + ret = print_graph_duration(duration, s); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + + /* Function */ + for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + ret = seq_print_ip_sym(s, call->func, 0); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, "();\n"); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +print_graph_entry_nested(struct ftrace_graph_ent_entry *entry, + struct trace_seq *s) +{ + int i; + int ret; + struct ftrace_graph_ent *call = &entry->graph_ent; + + /* No overhead */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + /* No time */ + ret = trace_seq_printf(s, " | "); + + /* Function */ + for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + ret = seq_print_ip_sym(s, call->func, 0); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, "() {\n"); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, + struct trace_iterator *iter, int cpu) +{ + int ret; + struct trace_entry *ent = iter->ent; + + /* Pid */ + if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + + /* Cpu */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { + ret = print_graph_cpu(s, cpu); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + } + + /* Proc */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { + ret = print_graph_proc(s, ent->pid); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, " | "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + if (trace_branch_is_leaf(iter, field)) + return print_graph_entry_leaf(iter, field, s); + else + return print_graph_entry_nested(field, s); + +} + +static enum print_line_t +print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, + struct trace_entry *ent, int cpu) +{ + int i; + int ret; + unsigned long long duration = trace->rettime - trace->calltime; + + /* Pid */ + if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + + /* Cpu */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { + ret = print_graph_cpu(s, cpu); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + } + + /* Proc */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { + ret = print_graph_proc(s, ent->pid); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, " | "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + /* Overhead */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { + ret = print_graph_overhead(duration, s); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + /* Duration */ + ret = print_graph_duration(duration, s); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + + /* Closing brace */ + for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + ret = trace_seq_printf(s, "}\n"); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Overrun */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { + ret = trace_seq_printf(s, " (Overruns: %lu)\n", + trace->overrun); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +print_graph_comment(struct print_entry *trace, struct trace_seq *s, + struct trace_entry *ent, struct trace_iterator *iter) +{ + int i; + int ret; + + /* Pid */ + if (verif_pid(s, ent->pid, iter->cpu) == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + + /* Cpu */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { + ret = print_graph_cpu(s, iter->cpu); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + } + + /* Proc */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { + ret = print_graph_proc(s, ent->pid); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, " | "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + /* No overhead */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + /* No time */ + ret = trace_seq_printf(s, " | "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Indentation */ + if (trace->depth > 0) + for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) { + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + /* The comment */ + ret = trace_seq_printf(s, "/* %s", trace->buf); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + if (ent->flags & TRACE_FLAG_CONT) + trace_seq_print_cont(s, iter); + + ret = trace_seq_printf(s, " */\n"); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + + +enum print_line_t +print_graph_function(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + + switch (entry->type) { + case TRACE_GRAPH_ENT: { + struct ftrace_graph_ent_entry *field; + trace_assign_type(field, entry); + return print_graph_entry(field, s, iter, + iter->cpu); + } + case TRACE_GRAPH_RET: { + struct ftrace_graph_ret_entry *field; + trace_assign_type(field, entry); + return print_graph_return(&field->ret, s, entry, iter->cpu); + } + case TRACE_PRINT: { + struct print_entry *field; + trace_assign_type(field, entry); + return print_graph_comment(field, s, entry, iter); + } + default: + return TRACE_TYPE_UNHANDLED; + } +} + +static void print_graph_headers(struct seq_file *s) +{ + /* 1st line */ + seq_printf(s, "# "); + if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) + seq_printf(s, "CPU "); + if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) + seq_printf(s, "TASK/PID "); + if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) + seq_printf(s, "OVERHEAD/"); + seq_printf(s, "DURATION FUNCTION CALLS\n"); + + /* 2nd line */ + seq_printf(s, "# "); + if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) + seq_printf(s, "| "); + if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) + seq_printf(s, "| | "); + if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { + seq_printf(s, "| "); + seq_printf(s, "| | | | |\n"); + } else + seq_printf(s, " | | | | |\n"); +} +static struct tracer graph_trace __read_mostly = { + .name = "function_graph", + .init = graph_trace_init, + .reset = graph_trace_reset, + .print_line = print_graph_function, + .print_header = print_graph_headers, + .flags = &tracer_flags, +}; + +static __init int init_graph_trace(void) +{ + return register_tracer(&graph_trace); +} + +device_initcall(init_graph_trace); diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c deleted file mode 100644 index e00d645..0000000 --- a/kernel/trace/trace_functions_return.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * - * Function return tracer. - * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com> - * Mostly borrowed from function tracer which - * is Copyright (c) Steven Rostedt <srostedt@redhat.com> - * - */ -#include <linux/debugfs.h> -#include <linux/uaccess.h> -#include <linux/ftrace.h> -#include <linux/fs.h> - -#include "trace.h" - - -#define TRACE_RETURN_PRINT_OVERRUN 0x1 -static struct tracer_opt trace_opts[] = { - /* Display overruns or not */ - { TRACER_OPT(overrun, TRACE_RETURN_PRINT_OVERRUN) }, - { } /* Empty entry */ -}; - -static struct tracer_flags tracer_flags = { - .val = 0, /* Don't display overruns by default */ - .opts = trace_opts -}; - - -static int return_trace_init(struct trace_array *tr) -{ - int cpu; - for_each_online_cpu(cpu) - tracing_reset(tr, cpu); - - return register_ftrace_return(&trace_function_return); -} - -static void return_trace_reset(struct trace_array *tr) -{ - unregister_ftrace_return(); -} - - -enum print_line_t -print_return_function(struct trace_iterator *iter) -{ - struct trace_seq *s = &iter->seq; - struct trace_entry *entry = iter->ent; - struct ftrace_ret_entry *field; - int ret; - - if (entry->type == TRACE_FN_RET) { - trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = seq_print_ip_sym(s, field->ip, - trace_flags & TRACE_ITER_SYM_MASK); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_printf(s, " (%llu ns)", - field->rettime - field->calltime); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - if (tracer_flags.val & TRACE_RETURN_PRINT_OVERRUN) { - ret = trace_seq_printf(s, " (Overruns: %lu)", - field->overrun); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - ret = trace_seq_printf(s, "\n"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; - } - return TRACE_TYPE_UNHANDLED; -} - -static struct tracer return_trace __read_mostly = { - .name = "return", - .init = return_trace_init, - .reset = return_trace_reset, - .print_line = print_return_function, - .flags = &tracer_flags, -}; - -static __init int init_return_trace(void) -{ - return register_tracer(&return_trace); -} - -device_initcall(init_return_trace); diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 2a98a20..2fb6da6 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -366,5 +366,5 @@ void mmio_trace_mapping(struct mmiotrace_map *map) int mmio_trace_printk(const char *fmt, va_list args) { - return trace_vprintk(0, fmt, args); + return trace_vprintk(0, -1, fmt, args); } diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c new file mode 100644 index 0000000..a7172a3 --- /dev/null +++ b/kernel/trace/trace_power.c @@ -0,0 +1,179 @@ +/* + * ring buffer based C-state tracer + * + * Arjan van de Ven <arjan@linux.intel.com> + * Copyright (C) 2008 Intel Corporation + * + * Much is borrowed from trace_boot.c which is + * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> + * + */ + +#include <linux/init.h> +#include <linux/debugfs.h> +#include <linux/ftrace.h> +#include <linux/kallsyms.h> +#include <linux/module.h> + +#include "trace.h" + +static struct trace_array *power_trace; +static int __read_mostly trace_power_enabled; + + +static void start_power_trace(struct trace_array *tr) +{ + trace_power_enabled = 1; +} + +static void stop_power_trace(struct trace_array *tr) +{ + trace_power_enabled = 0; +} + + +static int power_trace_init(struct trace_array *tr) +{ + int cpu; + power_trace = tr; + + trace_power_enabled = 1; + + for_each_cpu_mask(cpu, cpu_possible_map) + tracing_reset(tr, cpu); + return 0; +} + +static enum print_line_t power_print_line(struct trace_iterator *iter) +{ + int ret = 0; + struct trace_entry *entry = iter->ent; + struct trace_power *field ; + struct power_trace *it; + struct trace_seq *s = &iter->seq; + struct timespec stamp; + struct timespec duration; + + trace_assign_type(field, entry); + it = &field->state_data; + stamp = ktime_to_timespec(it->stamp); + duration = ktime_to_timespec(ktime_sub(it->end, it->stamp)); + + if (entry->type == TRACE_POWER) { + if (it->type == POWER_CSTATE) + ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n", + stamp.tv_sec, + stamp.tv_nsec, + it->state, iter->cpu, + duration.tv_sec, + duration.tv_nsec); + if (it->type == POWER_PSTATE) + ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n", + stamp.tv_sec, + stamp.tv_nsec, + it->state, iter->cpu); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + return TRACE_TYPE_HANDLED; + } + return TRACE_TYPE_UNHANDLED; +} + +static struct tracer power_tracer __read_mostly = +{ + .name = "power", + .init = power_trace_init, + .start = start_power_trace, + .stop = stop_power_trace, + .reset = stop_power_trace, + .print_line = power_print_line, +}; + +static int init_power_trace(void) +{ + return register_tracer(&power_tracer); +} +device_initcall(init_power_trace); + +void trace_power_start(struct power_trace *it, unsigned int type, + unsigned int level) +{ + if (!trace_power_enabled) + return; + + memset(it, 0, sizeof(struct power_trace)); + it->state = level; + it->type = type; + it->stamp = ktime_get(); +} +EXPORT_SYMBOL_GPL(trace_power_start); + + +void trace_power_end(struct power_trace *it) +{ + struct ring_buffer_event *event; + struct trace_power *entry; + struct trace_array_cpu *data; + unsigned long irq_flags; + struct trace_array *tr = power_trace; + + if (!trace_power_enabled) + return; + + preempt_disable(); + it->end = ktime_get(); + data = tr->data[smp_processor_id()]; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + entry->ent.type = TRACE_POWER; + entry->state_data = *it; + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + trace_wake_up(); + + out: + preempt_enable(); +} +EXPORT_SYMBOL_GPL(trace_power_end); + +void trace_power_mark(struct power_trace *it, unsigned int type, + unsigned int level) +{ + struct ring_buffer_event *event; + struct trace_power *entry; + struct trace_array_cpu *data; + unsigned long irq_flags; + struct trace_array *tr = power_trace; + + if (!trace_power_enabled) + return; + + memset(it, 0, sizeof(struct power_trace)); + it->state = level; + it->type = type; + it->stamp = ktime_get(); + preempt_disable(); + it->end = it->stamp; + data = tr->data[smp_processor_id()]; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + entry->ent.type = TRACE_POWER; + entry->state_data = *it; + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + trace_wake_up(); + + out: + preempt_enable(); +} +EXPORT_SYMBOL_GPL(trace_power_mark); diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index fde3be1..0b863f2 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -48,7 +48,7 @@ static inline void check_stack(void) if (!object_is_on_stack(&this_size)) return; - raw_local_irq_save(flags); + local_irq_save(flags); __raw_spin_lock(&max_stack_lock); /* a race could have already updated it */ @@ -78,6 +78,7 @@ static inline void check_stack(void) * on a new max, so it is far from a fast path. */ while (i < max_stack_trace.nr_entries) { + int found = 0; stack_dump_index[i] = this_size; p = start; @@ -86,17 +87,19 @@ static inline void check_stack(void) if (*p == stack_dump_trace[i]) { this_size = stack_dump_index[i++] = (top - p) * sizeof(unsigned long); + found = 1; /* Start the search from here */ start = p + 1; } } - i++; + if (!found) + i++; } out: __raw_spin_unlock(&max_stack_lock); - raw_local_irq_restore(flags); + local_irq_restore(flags); } static void @@ -162,11 +165,11 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, if (ret < 0) return ret; - raw_local_irq_save(flags); + local_irq_save(flags); __raw_spin_lock(&max_stack_lock); *ptr = val; __raw_spin_unlock(&max_stack_lock); - raw_local_irq_restore(flags); + local_irq_restore(flags); return count; } |