diff options
Diffstat (limited to 'kernel/trace')
30 files changed, 2060 insertions, 1481 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 5efeb42..06c3d5b 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -11,12 +11,18 @@ config NOP_TRACER config HAVE_FTRACE_NMI_ENTER bool + help + See Documentation/trace/ftrace-implementation.txt config HAVE_FUNCTION_TRACER bool + help + See Documentation/trace/ftrace-implementation.txt config HAVE_FUNCTION_GRAPH_TRACER bool + help + See Documentation/trace/ftrace-implementation.txt config HAVE_FUNCTION_GRAPH_FP_TEST bool @@ -28,21 +34,25 @@ config HAVE_FUNCTION_GRAPH_FP_TEST config HAVE_FUNCTION_TRACE_MCOUNT_TEST bool help - This gets selected when the arch tests the function_trace_stop - variable at the mcount call site. Otherwise, this variable - is tested by the called function. + See Documentation/trace/ftrace-implementation.txt config HAVE_DYNAMIC_FTRACE bool + help + See Documentation/trace/ftrace-implementation.txt config HAVE_FTRACE_MCOUNT_RECORD bool + help + See Documentation/trace/ftrace-implementation.txt config HAVE_HW_BRANCH_TRACER bool config HAVE_SYSCALL_TRACEPOINTS bool + help + See Documentation/trace/ftrace-implementation.txt config TRACER_MAX_TRACE bool @@ -73,7 +83,7 @@ config RING_BUFFER_ALLOW_SWAP # This allows those options to appear when no other tracer is selected. But the # options do not appear when something else selects it. We need the two options # GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the -# hidding of the automatic options options. +# hidding of the automatic options. config TRACING bool @@ -490,6 +500,18 @@ config FTRACE_STARTUP_TEST functioning properly. It will do tests on all the configured tracers of ftrace. +config EVENT_TRACE_TEST_SYSCALLS + bool "Run selftest on syscall events" + depends on FTRACE_STARTUP_TEST + help + This option will also enable testing every syscall event. + It only enables the event and disables it and runs various loads + with the event enabled. This adds a bit more time for kernel boot + up since it runs this on every system call defined. + + TBD - enable a way to actually call the syscalls as we test their + events + config MMIOTRACE bool "Memory mapped IO tracing" depends on HAVE_MMIOTRACE_SUPPORT && PCI diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index ce3b1cd..0f84c52 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -42,7 +42,6 @@ obj-$(CONFIG_BOOT_TRACER) += trace_boot.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o -obj-$(CONFIG_POWER_TRACER) += trace_power.o obj-$(CONFIG_KMEMTRACE) += kmemtrace.o obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o @@ -55,5 +54,6 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o +obj-$(CONFIG_EVENT_TRACING) += power-traces.o libftrace-y := ftrace.o diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 3eb159c..d9d6206 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -856,6 +856,37 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, } /** + * blk_add_trace_rq_remap - Add a trace for a request-remap operation + * @q: queue the io is for + * @rq: the source request + * @dev: target device + * @from: source sector + * + * Description: + * Device mapper remaps request to other devices. + * Add a trace for that action. + * + **/ +static void blk_add_trace_rq_remap(struct request_queue *q, + struct request *rq, dev_t dev, + sector_t from) +{ + struct blk_trace *bt = q->blk_trace; + struct blk_io_trace_remap r; + + if (likely(!bt)) + return; + + r.device_from = cpu_to_be32(dev); + r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); + r.sector_from = cpu_to_be64(from); + + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), + rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors, + sizeof(r), &r); +} + +/** * blk_add_driver_data - Add binary message with driver-specific data * @q: queue the io is for * @rq: io request @@ -922,10 +953,13 @@ static void blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_remap(blk_add_trace_remap); WARN_ON(ret); + ret = register_trace_block_rq_remap(blk_add_trace_rq_remap); + WARN_ON(ret); } static void blk_unregister_tracepoints(void) { + unregister_trace_block_rq_remap(blk_add_trace_rq_remap); unregister_trace_block_remap(blk_add_trace_remap); unregister_trace_block_split(blk_add_trace_split); unregister_trace_block_unplug_io(blk_add_trace_unplug_io); @@ -1657,6 +1691,11 @@ int blk_trace_init_sysfs(struct device *dev) return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); } +void blk_trace_remove_sysfs(struct device *dev) +{ + sysfs_remove_group(&dev->kobj, &blk_trace_attr_group); +} + #endif /* CONFIG_BLK_DEV_IO_TRACE */ #ifdef CONFIG_EVENT_TRACING diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8c804e2..b10c0d9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -60,6 +60,13 @@ static int last_ftrace_enabled; /* Quick disabling of function tracer. */ int function_trace_stop; +/* List for set_ftrace_pid's pids. */ +LIST_HEAD(ftrace_pids); +struct ftrace_pid { + struct list_head list; + struct pid *pid; +}; + /* * ftrace_disabled is set when an anomaly is discovered. * ftrace_disabled is much stronger than ftrace_enabled. @@ -78,6 +85,10 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static int ftrace_set_func(unsigned long *array, int *idx, char *buffer); +#endif + static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) { struct ftrace_ops *op = ftrace_list; @@ -155,7 +166,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops) else func = ftrace_list_func; - if (ftrace_pid_trace) { + if (!list_empty(&ftrace_pids)) { set_ftrace_pid_function(func); func = ftrace_pid_func; } @@ -203,7 +214,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (ftrace_list->next == &ftrace_list_end) { ftrace_func_t func = ftrace_list->func; - if (ftrace_pid_trace) { + if (!list_empty(&ftrace_pids)) { set_ftrace_pid_function(func); func = ftrace_pid_func; } @@ -225,9 +236,13 @@ static void ftrace_update_pid_func(void) if (ftrace_trace_function == ftrace_stub) return; +#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST func = ftrace_trace_function; +#else + func = __ftrace_trace_function; +#endif - if (ftrace_pid_trace) { + if (!list_empty(&ftrace_pids)) { set_ftrace_pid_function(func); func = ftrace_pid_func; } else { @@ -817,8 +832,6 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer) } #endif /* CONFIG_FUNCTION_PROFILER */ -/* set when tracing only a pid */ -struct pid *ftrace_pid_trace; static struct pid * const ftrace_swapper_pid = &init_struct_pid; #ifdef CONFIG_DYNAMIC_FTRACE @@ -1074,14 +1087,9 @@ static void ftrace_replace_code(int enable) failed = __ftrace_replace_code(rec, enable); if (failed) { rec->flags |= FTRACE_FL_FAILED; - if ((system_state == SYSTEM_BOOTING) || - !core_kernel_text(rec->ip)) { - ftrace_free_rec(rec); - } else { - ftrace_bug(failed, rec->ip); - /* Stop processing */ - return; - } + ftrace_bug(failed, rec->ip); + /* Stop processing */ + return; } } while_for_each_ftrace_rec(); } @@ -1262,12 +1270,34 @@ static int ftrace_update_code(struct module *mod) ftrace_new_addrs = p->newlist; p->flags = 0L; - /* convert record (i.e, patch mcount-call with NOP) */ - if (ftrace_code_disable(mod, p)) { - p->flags |= FTRACE_FL_CONVERTED; - ftrace_update_cnt++; - } else + /* + * Do the initial record convertion from mcount jump + * to the NOP instructions. + */ + if (!ftrace_code_disable(mod, p)) { ftrace_free_rec(p); + continue; + } + + p->flags |= FTRACE_FL_CONVERTED; + ftrace_update_cnt++; + + /* + * If the tracing is enabled, go ahead and enable the record. + * + * The reason not to enable the record immediatelly is the + * inherent check of ftrace_make_nop/ftrace_make_call for + * correct previous instructions. Making first the NOP + * conversion puts the module to the correct state, thus + * passing the ftrace_make_call check. + */ + if (ftrace_start_up) { + int failed = __ftrace_replace_code(p, 1); + if (failed) { + ftrace_bug(failed, p->ip); + ftrace_free_rec(p); + } + } } stop = ftrace_now(raw_smp_processor_id()); @@ -1323,11 +1353,10 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) enum { FTRACE_ITER_FILTER = (1 << 0), - FTRACE_ITER_CONT = (1 << 1), - FTRACE_ITER_NOTRACE = (1 << 2), - FTRACE_ITER_FAILURES = (1 << 3), - FTRACE_ITER_PRINTALL = (1 << 4), - FTRACE_ITER_HASH = (1 << 5), + FTRACE_ITER_NOTRACE = (1 << 1), + FTRACE_ITER_FAILURES = (1 << 2), + FTRACE_ITER_PRINTALL = (1 << 3), + FTRACE_ITER_HASH = (1 << 4), }; #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ @@ -1337,8 +1366,7 @@ struct ftrace_iterator { int hidx; int idx; unsigned flags; - unsigned char buffer[FTRACE_BUFF_MAX+1]; - unsigned buffer_idx; + struct trace_parser parser; }; static void * @@ -1407,7 +1435,7 @@ static int t_hash_show(struct seq_file *m, void *v) if (rec->ops->print) return rec->ops->print(m, rec->ip, rec->ops, rec->data); - seq_printf(m, "%pf:%pf", (void *)rec->ip, (void *)rec->ops->func); + seq_printf(m, "%ps:%ps", (void *)rec->ip, (void *)rec->ops->func); if (rec->data) seq_printf(m, ":%p", rec->data); @@ -1517,12 +1545,12 @@ static int t_show(struct seq_file *m, void *v) if (!rec) return 0; - seq_printf(m, "%pf\n", (void *)rec->ip); + seq_printf(m, "%ps\n", (void *)rec->ip); return 0; } -static struct seq_operations show_ftrace_seq_ops = { +static const struct seq_operations show_ftrace_seq_ops = { .start = t_start, .next = t_next, .stop = t_stop, @@ -1604,6 +1632,11 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable) if (!iter) return -ENOMEM; + if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX)) { + kfree(iter); + return -ENOMEM; + } + mutex_lock(&ftrace_regex_lock); if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) @@ -1618,8 +1651,10 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable) if (!ret) { struct seq_file *m = file->private_data; m->private = iter; - } else + } else { + trace_parser_put(&iter->parser); kfree(iter); + } } else file->private_data = iter; mutex_unlock(&ftrace_regex_lock); @@ -1652,60 +1687,6 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin) return ret; } -enum { - MATCH_FULL, - MATCH_FRONT_ONLY, - MATCH_MIDDLE_ONLY, - MATCH_END_ONLY, -}; - -/* - * (static function - no need for kernel doc) - * - * Pass in a buffer containing a glob and this function will - * set search to point to the search part of the buffer and - * return the type of search it is (see enum above). - * This does modify buff. - * - * Returns enum type. - * search returns the pointer to use for comparison. - * not returns 1 if buff started with a '!' - * 0 otherwise. - */ -static int -ftrace_setup_glob(char *buff, int len, char **search, int *not) -{ - int type = MATCH_FULL; - int i; - - if (buff[0] == '!') { - *not = 1; - buff++; - len--; - } else - *not = 0; - - *search = buff; - - for (i = 0; i < len; i++) { - if (buff[i] == '*') { - if (!i) { - *search = buff + 1; - type = MATCH_END_ONLY; - } else { - if (type == MATCH_END_ONLY) - type = MATCH_MIDDLE_ONLY; - else - type = MATCH_FRONT_ONLY; - buff[i] = 0; - break; - } - } - } - - return type; -} - static int ftrace_match(char *str, char *regex, int len, int type) { int matched = 0; @@ -1754,7 +1735,7 @@ static void ftrace_match_records(char *buff, int len, int enable) int not; flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; - type = ftrace_setup_glob(buff, len, &search, ¬); + type = filter_parse_regex(buff, len, &search, ¬); search_len = strlen(search); @@ -1822,7 +1803,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable) } if (strlen(buff)) { - type = ftrace_setup_glob(buff, strlen(buff), &search, ¬); + type = filter_parse_regex(buff, strlen(buff), &search, ¬); search_len = strlen(search); } @@ -1987,7 +1968,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, int count = 0; char *search; - type = ftrace_setup_glob(glob, strlen(glob), &search, ¬); + type = filter_parse_regex(glob, strlen(glob), &search, ¬); len = strlen(search); /* we do not support '!' for function probes */ @@ -2059,12 +2040,12 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, int i, len = 0; char *search; - if (glob && (strcmp(glob, "*") || !strlen(glob))) + if (glob && (strcmp(glob, "*") == 0 || !strlen(glob))) glob = NULL; - else { + else if (glob) { int not; - type = ftrace_setup_glob(glob, strlen(glob), &search, ¬); + type = filter_parse_regex(glob, strlen(glob), &search, ¬); len = strlen(search); /* we do not support '!' for function probes */ @@ -2196,11 +2177,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos, int enable) { struct ftrace_iterator *iter; - char ch; - size_t read = 0; - ssize_t ret; + struct trace_parser *parser; + ssize_t ret, read; - if (!cnt || cnt < 0) + if (!cnt) return 0; mutex_lock(&ftrace_regex_lock); @@ -2211,72 +2191,23 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, } else iter = file->private_data; - if (!*ppos) { - iter->flags &= ~FTRACE_ITER_CONT; - iter->buffer_idx = 0; - } - - ret = get_user(ch, ubuf++); - if (ret) - goto out; - read++; - cnt--; + parser = &iter->parser; + read = trace_get_user(parser, ubuf, cnt, ppos); - /* - * If the parser haven't finished with the last write, - * continue reading the user input without skipping spaces. - */ - if (!(iter->flags & FTRACE_ITER_CONT)) { - /* skip white space */ - while (cnt && isspace(ch)) { - ret = get_user(ch, ubuf++); - if (ret) - goto out; - read++; - cnt--; - } - - /* only spaces were written */ - if (isspace(ch)) { - *ppos += read; - ret = read; - goto out; - } - - iter->buffer_idx = 0; - } - - while (cnt && !isspace(ch)) { - if (iter->buffer_idx < FTRACE_BUFF_MAX) - iter->buffer[iter->buffer_idx++] = ch; - else { - ret = -EINVAL; - goto out; - } - ret = get_user(ch, ubuf++); + if (read >= 0 && trace_parser_loaded(parser) && + !trace_parser_cont(parser)) { + ret = ftrace_process_regex(parser->buffer, + parser->idx, enable); if (ret) goto out; - read++; - cnt--; - } - if (isspace(ch)) { - iter->buffer[iter->buffer_idx] = 0; - ret = ftrace_process_regex(iter->buffer, - iter->buffer_idx, enable); - if (ret) - goto out; - iter->buffer_idx = 0; - } else { - iter->flags |= FTRACE_ITER_CONT; - iter->buffer[iter->buffer_idx++] = ch; + trace_parser_clear(parser); } - *ppos += read; ret = read; - out: - mutex_unlock(&ftrace_regex_lock); + mutex_unlock(&ftrace_regex_lock); +out: return ret; } @@ -2343,6 +2274,7 @@ void ftrace_set_notrace(unsigned char *buf, int len, int reset) #define FTRACE_FILTER_SIZE COMMAND_LINE_SIZE static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata; static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; +static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; static int __init set_ftrace_notrace(char *str) { @@ -2358,6 +2290,31 @@ static int __init set_ftrace_filter(char *str) } __setup("ftrace_filter=", set_ftrace_filter); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static int __init set_graph_function(char *str) +{ + strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); + return 1; +} +__setup("ftrace_graph_filter=", set_graph_function); + +static void __init set_ftrace_early_graph(char *buf) +{ + int ret; + char *func; + + while (buf) { + func = strsep(&buf, ","); + /* we allow only one expression at a time */ + ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count, + func); + if (ret) + printk(KERN_DEBUG "ftrace: function %s not " + "traceable\n", func); + } +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + static void __init set_ftrace_early_filter(char *buf, int enable) { char *func; @@ -2374,6 +2331,10 @@ static void __init set_ftrace_early_filters(void) set_ftrace_early_filter(ftrace_filter_buf, 1); if (ftrace_notrace_buf[0]) set_ftrace_early_filter(ftrace_notrace_buf, 0); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + if (ftrace_graph_buf[0]) + set_ftrace_early_graph(ftrace_graph_buf); +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ } static int @@ -2381,6 +2342,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) { struct seq_file *m = (struct seq_file *)file->private_data; struct ftrace_iterator *iter; + struct trace_parser *parser; mutex_lock(&ftrace_regex_lock); if (file->f_mode & FMODE_READ) { @@ -2390,9 +2352,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) } else iter = file->private_data; - if (iter->buffer_idx) { - iter->buffer[iter->buffer_idx] = 0; - ftrace_match_records(iter->buffer, iter->buffer_idx, enable); + parser = &iter->parser; + if (trace_parser_loaded(parser)) { + parser->buffer[parser->idx] = 0; + ftrace_match_records(parser->buffer, parser->idx, enable); } mutex_lock(&ftrace_lock); @@ -2400,7 +2363,9 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) ftrace_run_update_code(FTRACE_ENABLE_CALLS); mutex_unlock(&ftrace_lock); + trace_parser_put(parser); kfree(iter); + mutex_unlock(&ftrace_regex_lock); return 0; } @@ -2457,11 +2422,9 @@ unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; static void * __g_next(struct seq_file *m, loff_t *pos) { - unsigned long *array = m->private; - if (*pos >= ftrace_graph_count) return NULL; - return &array[*pos]; + return &ftrace_graph_funcs[*pos]; } static void * @@ -2499,12 +2462,12 @@ static int g_show(struct seq_file *m, void *v) return 0; } - seq_printf(m, "%pf\n", v); + seq_printf(m, "%ps\n", (void *)*ptr); return 0; } -static struct seq_operations ftrace_graph_seq_ops = { +static const struct seq_operations ftrace_graph_seq_ops = { .start = g_start, .next = g_next, .stop = g_stop, @@ -2525,16 +2488,10 @@ ftrace_graph_open(struct inode *inode, struct file *file) ftrace_graph_count = 0; memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); } + mutex_unlock(&graph_lock); - if (file->f_mode & FMODE_READ) { + if (file->f_mode & FMODE_READ) ret = seq_open(file, &ftrace_graph_seq_ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = ftrace_graph_funcs; - } - } else - file->private_data = ftrace_graph_funcs; - mutex_unlock(&graph_lock); return ret; } @@ -2563,7 +2520,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer) return -ENODEV; /* decode regex */ - type = ftrace_setup_glob(buffer, strlen(buffer), &search, ¬); + type = filter_parse_regex(buffer, strlen(buffer), &search, ¬); if (not) return -EINVAL; @@ -2602,12 +2559,8 @@ static ssize_t ftrace_graph_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { - unsigned char buffer[FTRACE_BUFF_MAX+1]; - unsigned long *array; - size_t read = 0; - ssize_t ret; - int index = 0; - char ch; + struct trace_parser parser; + ssize_t read, ret; if (!cnt || cnt < 0) return 0; @@ -2616,60 +2569,31 @@ ftrace_graph_write(struct file *file, const char __user *ubuf, if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) { ret = -EBUSY; - goto out; + goto out_unlock; } - if (file->f_mode & FMODE_READ) { - struct seq_file *m = file->private_data; - array = m->private; - } else - array = file->private_data; - - ret = get_user(ch, ubuf++); - if (ret) - goto out; - read++; - cnt--; - - /* skip white space */ - while (cnt && isspace(ch)) { - ret = get_user(ch, ubuf++); - if (ret) - goto out; - read++; - cnt--; + if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { + ret = -ENOMEM; + goto out_unlock; } - if (isspace(ch)) { - *ppos += read; - ret = read; - goto out; - } + read = trace_get_user(&parser, ubuf, cnt, ppos); - while (cnt && !isspace(ch)) { - if (index < FTRACE_BUFF_MAX) - buffer[index++] = ch; - else { - ret = -EINVAL; - goto out; - } - ret = get_user(ch, ubuf++); + if (read >= 0 && trace_parser_loaded((&parser))) { + parser.buffer[parser.idx] = 0; + + /* we allow only one expression at a time */ + ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count, + parser.buffer); if (ret) - goto out; - read++; - cnt--; + goto out_free; } - buffer[index] = 0; - - /* we allow only one expression at a time */ - ret = ftrace_set_func(array, &ftrace_graph_count, buffer); - if (ret) - goto out; - - file->f_pos += read; ret = read; - out: + +out_free: + trace_parser_put(&parser); +out_unlock: mutex_unlock(&graph_lock); return ret; @@ -2707,7 +2631,7 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) return 0; } -static int ftrace_convert_nops(struct module *mod, +static int ftrace_process_locs(struct module *mod, unsigned long *start, unsigned long *end) { @@ -2740,19 +2664,17 @@ static int ftrace_convert_nops(struct module *mod, } #ifdef CONFIG_MODULES -void ftrace_release(void *start, void *end) +void ftrace_release_mod(struct module *mod) { struct dyn_ftrace *rec; struct ftrace_page *pg; - unsigned long s = (unsigned long)start; - unsigned long e = (unsigned long)end; - if (ftrace_disabled || !start || start == end) + if (ftrace_disabled) return; mutex_lock(&ftrace_lock); do_for_each_ftrace_rec(pg, rec) { - if ((rec->ip >= s) && (rec->ip < e)) { + if (within_module_core(rec->ip, mod)) { /* * rec->ip is changed in ftrace_free_rec() * It should not between s and e if record was freed. @@ -2769,7 +2691,7 @@ static void ftrace_init_module(struct module *mod, { if (ftrace_disabled || start == end) return; - ftrace_convert_nops(mod, start, end); + ftrace_process_locs(mod, start, end); } static int ftrace_module_notify(struct notifier_block *self, @@ -2784,9 +2706,7 @@ static int ftrace_module_notify(struct notifier_block *self, mod->num_ftrace_callsites); break; case MODULE_STATE_GOING: - ftrace_release(mod->ftrace_callsites, - mod->ftrace_callsites + - mod->num_ftrace_callsites); + ftrace_release_mod(mod); break; } @@ -2832,7 +2752,7 @@ void __init ftrace_init(void) last_ftrace_enabled = ftrace_enabled = 1; - ret = ftrace_convert_nops(NULL, + ret = ftrace_process_locs(NULL, __start_mcount_loc, __stop_mcount_loc); @@ -2865,23 +2785,6 @@ static inline void ftrace_startup_enable(int command) { } # define ftrace_shutdown_sysctl() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ -static ssize_t -ftrace_pid_read(struct file *file, char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - char buf[64]; - int r; - - if (ftrace_pid_trace == ftrace_swapper_pid) - r = sprintf(buf, "swapper tasks\n"); - else if (ftrace_pid_trace) - r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace)); - else - r = sprintf(buf, "no pid\n"); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - static void clear_ftrace_swapper(void) { struct task_struct *p; @@ -2932,14 +2835,12 @@ static void set_ftrace_pid(struct pid *pid) rcu_read_unlock(); } -static void clear_ftrace_pid_task(struct pid **pid) +static void clear_ftrace_pid_task(struct pid *pid) { - if (*pid == ftrace_swapper_pid) + if (pid == ftrace_swapper_pid) clear_ftrace_swapper(); else - clear_ftrace_pid(*pid); - - *pid = NULL; + clear_ftrace_pid(pid); } static void set_ftrace_pid_task(struct pid *pid) @@ -2950,11 +2851,140 @@ static void set_ftrace_pid_task(struct pid *pid) set_ftrace_pid(pid); } +static int ftrace_pid_add(int p) +{ + struct pid *pid; + struct ftrace_pid *fpid; + int ret = -EINVAL; + + mutex_lock(&ftrace_lock); + + if (!p) + pid = ftrace_swapper_pid; + else + pid = find_get_pid(p); + + if (!pid) + goto out; + + ret = 0; + + list_for_each_entry(fpid, &ftrace_pids, list) + if (fpid->pid == pid) + goto out_put; + + ret = -ENOMEM; + + fpid = kmalloc(sizeof(*fpid), GFP_KERNEL); + if (!fpid) + goto out_put; + + list_add(&fpid->list, &ftrace_pids); + fpid->pid = pid; + + set_ftrace_pid_task(pid); + + ftrace_update_pid_func(); + ftrace_startup_enable(0); + + mutex_unlock(&ftrace_lock); + return 0; + +out_put: + if (pid != ftrace_swapper_pid) + put_pid(pid); + +out: + mutex_unlock(&ftrace_lock); + return ret; +} + +static void ftrace_pid_reset(void) +{ + struct ftrace_pid *fpid, *safe; + + mutex_lock(&ftrace_lock); + list_for_each_entry_safe(fpid, safe, &ftrace_pids, list) { + struct pid *pid = fpid->pid; + + clear_ftrace_pid_task(pid); + + list_del(&fpid->list); + kfree(fpid); + } + + ftrace_update_pid_func(); + ftrace_startup_enable(0); + + mutex_unlock(&ftrace_lock); +} + +static void *fpid_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&ftrace_lock); + + if (list_empty(&ftrace_pids) && (!*pos)) + return (void *) 1; + + return seq_list_start(&ftrace_pids, *pos); +} + +static void *fpid_next(struct seq_file *m, void *v, loff_t *pos) +{ + if (v == (void *)1) + return NULL; + + return seq_list_next(v, &ftrace_pids, pos); +} + +static void fpid_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&ftrace_lock); +} + +static int fpid_show(struct seq_file *m, void *v) +{ + const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list); + + if (v == (void *)1) { + seq_printf(m, "no pid\n"); + return 0; + } + + if (fpid->pid == ftrace_swapper_pid) + seq_printf(m, "swapper tasks\n"); + else + seq_printf(m, "%u\n", pid_vnr(fpid->pid)); + + return 0; +} + +static const struct seq_operations ftrace_pid_sops = { + .start = fpid_start, + .next = fpid_next, + .stop = fpid_stop, + .show = fpid_show, +}; + +static int +ftrace_pid_open(struct inode *inode, struct file *file) +{ + int ret = 0; + + if ((file->f_mode & FMODE_WRITE) && + (file->f_flags & O_TRUNC)) + ftrace_pid_reset(); + + if (file->f_mode & FMODE_READ) + ret = seq_open(file, &ftrace_pid_sops); + + return ret; +} + static ssize_t ftrace_pid_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct pid *pid; char buf[64]; long val; int ret; @@ -2967,57 +2997,38 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf, buf[cnt] = 0; + /* + * Allow "echo > set_ftrace_pid" or "echo -n '' > set_ftrace_pid" + * to clean the filter quietly. + */ + strstrip(buf); + if (strlen(buf) == 0) + return 1; + ret = strict_strtol(buf, 10, &val); if (ret < 0) return ret; - mutex_lock(&ftrace_lock); - if (val < 0) { - /* disable pid tracing */ - if (!ftrace_pid_trace) - goto out; - - clear_ftrace_pid_task(&ftrace_pid_trace); - - } else { - /* swapper task is special */ - if (!val) { - pid = ftrace_swapper_pid; - if (pid == ftrace_pid_trace) - goto out; - } else { - pid = find_get_pid(val); - - if (pid == ftrace_pid_trace) { - put_pid(pid); - goto out; - } - } - - if (ftrace_pid_trace) - clear_ftrace_pid_task(&ftrace_pid_trace); - - if (!pid) - goto out; + ret = ftrace_pid_add(val); - ftrace_pid_trace = pid; - - set_ftrace_pid_task(ftrace_pid_trace); - } - - /* update the function call */ - ftrace_update_pid_func(); - ftrace_startup_enable(0); + return ret ? ret : cnt; +} - out: - mutex_unlock(&ftrace_lock); +static int +ftrace_pid_release(struct inode *inode, struct file *file) +{ + if (file->f_mode & FMODE_READ) + seq_release(inode, file); - return cnt; + return 0; } static const struct file_operations ftrace_pid_fops = { - .read = ftrace_pid_read, - .write = ftrace_pid_write, + .open = ftrace_pid_open, + .write = ftrace_pid_write, + .read = seq_read, + .llseek = seq_lseek, + .release = ftrace_pid_release, }; static __init int ftrace_init_debugfs(void) @@ -3100,7 +3111,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) int ftrace_enable_sysctl(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -3110,7 +3121,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, mutex_lock(&ftrace_lock); - ret = proc_dointvec(table, write, file, buffer, lenp, ppos); + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) goto out; diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c index 81b1645..a91da69 100644 --- a/kernel/trace/kmemtrace.c +++ b/kernel/trace/kmemtrace.c @@ -501,7 +501,7 @@ static int __init init_kmem_tracer(void) return 1; } - if (!register_tracer(&kmem_tracer)) { + if (register_tracer(&kmem_tracer) != 0) { pr_warning("Warning: could not register the kmem tracer\n"); return 1; } diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c new file mode 100644 index 0000000..e06c6e3 --- /dev/null +++ b/kernel/trace/power-traces.c @@ -0,0 +1,20 @@ +/* + * Power trace points + * + * Copyright (C) 2009 Arjan van de Ven <arjan@linux.intel.com> + */ + +#include <linux/string.h> +#include <linux/types.h> +#include <linux/workqueue.h> +#include <linux/sched.h> +#include <linux/module.h> +#include <linux/slab.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/power.h> + +EXPORT_TRACEPOINT_SYMBOL_GPL(power_start); +EXPORT_TRACEPOINT_SYMBOL_GPL(power_end); +EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); + diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 454e74e..e43c928 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -201,8 +201,6 @@ int tracing_is_on(void) } EXPORT_SYMBOL_GPL(tracing_is_on); -#include "trace.h" - #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) #define RB_ALIGNMENT 4U #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) @@ -399,18 +397,21 @@ int ring_buffer_print_page_header(struct trace_seq *s) int ret; ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" - "offset:0;\tsize:%u;\n", - (unsigned int)sizeof(field.time_stamp)); + "offset:0;\tsize:%u;\tsigned:%u;\n", + (unsigned int)sizeof(field.time_stamp), + (unsigned int)is_signed_type(u64)); ret = trace_seq_printf(s, "\tfield: local_t commit;\t" - "offset:%u;\tsize:%u;\n", + "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), commit), - (unsigned int)sizeof(field.commit)); + (unsigned int)sizeof(field.commit), + (unsigned int)is_signed_type(long)); ret = trace_seq_printf(s, "\tfield: char data;\t" - "offset:%u;\tsize:%u;\n", + "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), data), - (unsigned int)BUF_PAGE_SIZE); + (unsigned int)BUF_PAGE_SIZE, + (unsigned int)is_signed_type(char)); return ret; } @@ -701,8 +702,8 @@ static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer, val &= ~RB_FLAG_MASK; - ret = (unsigned long)cmpxchg(&list->next, - val | old_flag, val | new_flag); + ret = cmpxchg((unsigned long *)&list->next, + val | old_flag, val | new_flag); /* check if the reader took the page */ if ((ret & ~RB_FLAG_MASK) != val) @@ -794,7 +795,7 @@ static int rb_head_page_replace(struct buffer_page *old, val = *ptr & ~RB_FLAG_MASK; val |= RB_PAGE_HEAD; - ret = cmpxchg(ptr, val, &new->list); + ret = cmpxchg(ptr, val, (unsigned long)&new->list); return ret == val; } @@ -2997,15 +2998,12 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) } static struct ring_buffer_event * -rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) +rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) { - struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; struct buffer_page *reader; int nr_loops = 0; - cpu_buffer = buffer->buffers[cpu]; - again: /* * We repeat when a timestamp is encountered. It is possible @@ -3049,7 +3047,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) case RINGBUF_TYPE_DATA: if (ts) { *ts = cpu_buffer->read_stamp + event->time_delta; - ring_buffer_normalize_time_stamp(buffer, + ring_buffer_normalize_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu, ts); } return event; @@ -3168,7 +3166,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) local_irq_save(flags); if (dolock) spin_lock(&cpu_buffer->reader_lock); - event = rb_buffer_peek(buffer, cpu, ts); + event = rb_buffer_peek(cpu_buffer, ts); if (event && event->type_len == RINGBUF_TYPE_PADDING) rb_advance_reader(cpu_buffer); if (dolock) @@ -3237,7 +3235,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) if (dolock) spin_lock(&cpu_buffer->reader_lock); - event = rb_buffer_peek(buffer, cpu, ts); + event = rb_buffer_peek(cpu_buffer, ts); if (event) rb_advance_reader(cpu_buffer); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5c75dee..026e715 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -125,19 +125,19 @@ int ftrace_dump_on_oops; static int tracing_set_tracer(const char *buf); -#define BOOTUP_TRACER_SIZE 100 -static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata; +#define MAX_TRACER_SIZE 100 +static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; static char *default_bootup_tracer; -static int __init set_ftrace(char *str) +static int __init set_cmdline_ftrace(char *str) { - strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE); + strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); default_bootup_tracer = bootup_tracer_buf; /* We are using ftrace early, expand it */ ring_buffer_expanded = 1; return 1; } -__setup("ftrace=", set_ftrace); +__setup("ftrace=", set_cmdline_ftrace); static int __init set_ftrace_dump_on_oops(char *str) { @@ -242,13 +242,6 @@ static struct tracer *trace_types __read_mostly; static struct tracer *current_trace __read_mostly; /* - * max_tracer_type_len is used to simplify the allocating of - * buffers to read userspace tracer names. We keep track of - * the longest tracer name registered. - */ -static int max_tracer_type_len; - -/* * trace_types_lock is used to protect the trace_types list. * This lock is also used to keep user access serialized. * Accesses from userspace will grab this lock while userspace @@ -275,12 +268,18 @@ static DEFINE_SPINLOCK(tracing_start_lock); */ void trace_wake_up(void) { + int cpu; + + if (trace_flags & TRACE_ITER_BLOCK) + return; /* * The runqueue_is_locked() can fail, but this is the best we * have for now: */ - if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked()) + cpu = get_cpu(); + if (!runqueue_is_locked(cpu)) wake_up(&trace_wait); + put_cpu(); } static int __init set_buf_size(char *str) @@ -339,6 +338,112 @@ static struct { int trace_clock_id; +/* + * trace_parser_get_init - gets the buffer for trace parser + */ +int trace_parser_get_init(struct trace_parser *parser, int size) +{ + memset(parser, 0, sizeof(*parser)); + + parser->buffer = kmalloc(size, GFP_KERNEL); + if (!parser->buffer) + return 1; + + parser->size = size; + return 0; +} + +/* + * trace_parser_put - frees the buffer for trace parser + */ +void trace_parser_put(struct trace_parser *parser) +{ + kfree(parser->buffer); +} + +/* + * trace_get_user - reads the user input string separated by space + * (matched by isspace(ch)) + * + * For each string found the 'struct trace_parser' is updated, + * and the function returns. + * + * Returns number of bytes read. + * + * See kernel/trace/trace.h for 'struct trace_parser' details. + */ +int trace_get_user(struct trace_parser *parser, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char ch; + size_t read = 0; + ssize_t ret; + + if (!*ppos) + trace_parser_clear(parser); + + ret = get_user(ch, ubuf++); + if (ret) + goto out; + + read++; + cnt--; + + /* + * The parser is not finished with the last write, + * continue reading the user input without skipping spaces. + */ + if (!parser->cont) { + /* skip white space */ + while (cnt && isspace(ch)) { + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + } + + /* only spaces were written */ + if (isspace(ch)) { + *ppos += read; + ret = read; + goto out; + } + + parser->idx = 0; + } + + /* read the non-space input */ + while (cnt && !isspace(ch)) { + if (parser->idx < parser->size - 1) + parser->buffer[parser->idx++] = ch; + else { + ret = -EINVAL; + goto out; + } + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + } + + /* We either got finished input or we have to wait for another call. */ + if (isspace(ch)) { + parser->buffer[parser->idx] = 0; + parser->cont = false; + } else { + parser->cont = true; + parser->buffer[parser->idx++] = ch; + } + + *ppos += read; + ret = read; + +out: + return ret; +} + ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) { int len; @@ -513,7 +618,6 @@ __releases(kernel_lock) __acquires(kernel_lock) { struct tracer *t; - int len; int ret = 0; if (!type->name) { @@ -521,6 +625,11 @@ __acquires(kernel_lock) return -1; } + if (strlen(type->name) > MAX_TRACER_SIZE) { + pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); + return -1; + } + /* * When this gets called we hold the BKL which means that * preemption is disabled. Various trace selftests however @@ -535,7 +644,7 @@ __acquires(kernel_lock) for (t = trace_types; t; t = t->next) { if (strcmp(type->name, t->name) == 0) { /* already found */ - pr_info("Trace %s already registered\n", + pr_info("Tracer %s already registered\n", type->name); ret = -1; goto out; @@ -586,9 +695,6 @@ __acquires(kernel_lock) type->next = trace_types; trace_types = type; - len = strlen(type->name); - if (len > max_tracer_type_len) - max_tracer_type_len = len; out: tracing_selftest_running = false; @@ -597,7 +703,7 @@ __acquires(kernel_lock) if (ret || !default_bootup_tracer) goto out_unlock; - if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE)) + if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) goto out_unlock; printk(KERN_INFO "Starting tracer '%s'\n", type->name); @@ -619,14 +725,13 @@ __acquires(kernel_lock) void unregister_tracer(struct tracer *type) { struct tracer **t; - int len; mutex_lock(&trace_types_lock); for (t = &trace_types; *t; t = &(*t)->next) { if (*t == type) goto found; } - pr_info("Trace %s not registered\n", type->name); + pr_info("Tracer %s not registered\n", type->name); goto out; found: @@ -639,17 +744,7 @@ void unregister_tracer(struct tracer *type) current_trace->stop(&global_trace); current_trace = &nop_trace; } - - if (strlen(type->name) != max_tracer_type_len) - goto out; - - max_tracer_type_len = 0; - for (t = &trace_types; *t; t = &(*t)->next) { - len = strlen((*t)->name); - if (len > max_tracer_type_len) - max_tracer_type_len = len; - } - out: +out: mutex_unlock(&trace_types_lock); } @@ -719,6 +814,11 @@ static void trace_init_cmdlines(void) cmdline_idx = 0; } +int is_tracing_stopped(void) +{ + return trace_stop_count; +} + /** * ftrace_off_permanent - disable all ftrace code permanently * @@ -886,7 +986,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; - entry->tgid = (tsk) ? tsk->tgid : 0; + entry->lock_depth = (tsk) ? tsk->lock_depth : 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | @@ -1068,6 +1168,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) return; entry = ring_buffer_event_data(event); + entry->tgid = current->tgid; memset(&entry->caller, 0, sizeof(entry->caller)); trace.nr_entries = 0; @@ -1094,6 +1195,7 @@ ftrace_trace_special(void *__tr, unsigned long arg1, unsigned long arg2, unsigned long arg3, int pc) { + struct ftrace_event_call *call = &event_special; struct ring_buffer_event *event; struct trace_array *tr = __tr; struct ring_buffer *buffer = tr->buffer; @@ -1107,7 +1209,9 @@ ftrace_trace_special(void *__tr, entry->arg1 = arg1; entry->arg2 = arg2; entry->arg3 = arg3; - trace_buffer_unlock_commit(buffer, event, 0, pc); + + if (!filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit(buffer, event, 0, pc); } void @@ -1289,7 +1393,7 @@ int trace_array_vprintk(struct trace_array *tr, int trace_vprintk(unsigned long ip, const char *fmt, va_list args) { - return trace_array_printk(&global_trace, ip, fmt, args); + return trace_array_vprintk(&global_trace, ip, fmt, args); } EXPORT_SYMBOL_GPL(trace_vprintk); @@ -1530,10 +1634,10 @@ static void print_lat_help_header(struct seq_file *m) seq_puts(m, "# | / _----=> need-resched \n"); seq_puts(m, "# || / _---=> hardirq/softirq \n"); seq_puts(m, "# ||| / _--=> preempt-depth \n"); - seq_puts(m, "# |||| / \n"); - seq_puts(m, "# ||||| delay \n"); - seq_puts(m, "# cmd pid ||||| time | caller \n"); - seq_puts(m, "# \\ / ||||| \\ | / \n"); + seq_puts(m, "# |||| /_--=> lock-depth \n"); + seq_puts(m, "# |||||/ delay \n"); + seq_puts(m, "# cmd pid |||||| time | caller \n"); + seq_puts(m, "# \\ / |||||| \\ | / \n"); } static void print_func_help_header(struct seq_file *m) @@ -1845,7 +1949,7 @@ static int s_show(struct seq_file *m, void *v) return 0; } -static struct seq_operations tracer_seq_ops = { +static const struct seq_operations tracer_seq_ops = { .start = s_start, .next = s_next, .stop = s_stop, @@ -1880,11 +1984,9 @@ __tracing_open(struct inode *inode, struct file *file) if (current_trace) *iter->trace = *current_trace; - if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) + if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) goto fail; - cpumask_clear(iter->started); - if (current_trace && current_trace->print_max) iter->tr = &max_tr; else @@ -2059,7 +2161,7 @@ static int t_show(struct seq_file *m, void *v) return 0; } -static struct seq_operations show_traces_seq_ops = { +static const struct seq_operations show_traces_seq_ops = { .start = t_start, .next = t_next, .stop = t_stop, @@ -2489,7 +2591,7 @@ static ssize_t tracing_set_trace_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - char buf[max_tracer_type_len+2]; + char buf[MAX_TRACER_SIZE+2]; int r; mutex_lock(&trace_types_lock); @@ -2639,15 +2741,15 @@ static ssize_t tracing_set_trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - char buf[max_tracer_type_len+1]; + char buf[MAX_TRACER_SIZE+1]; int i; size_t ret; int err; ret = cnt; - if (cnt > max_tracer_type_len) - cnt = max_tracer_type_len; + if (cnt > MAX_TRACER_SIZE) + cnt = MAX_TRACER_SIZE; if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; @@ -4285,7 +4387,7 @@ __init static int tracer_alloc_buffers(void) if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) goto out_free_buffer_mask; - if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL)) goto out_free_tracing_cpumask; /* To save memory, keep the ring buffer size to its minimum */ @@ -4296,7 +4398,6 @@ __init static int tracer_alloc_buffers(void) cpumask_copy(tracing_buffer_mask, cpu_possible_mask); cpumask_copy(tracing_cpumask, cpu_all_mask); - cpumask_clear(tracing_reader_cpumask); /* TODO: make the number of buffers hot pluggable with CPUS */ global_trace.buffer = ring_buffer_alloc(ring_buf_size, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ea7e0bc..91c3d0e 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -7,10 +7,10 @@ #include <linux/clocksource.h> #include <linux/ring_buffer.h> #include <linux/mmiotrace.h> +#include <linux/tracepoint.h> #include <linux/ftrace.h> #include <trace/boot.h> #include <linux/kmemtrace.h> -#include <trace/power.h> #include <linux/trace_seq.h> #include <linux/ftrace_event.h> @@ -40,164 +40,60 @@ enum trace_type { TRACE_HW_BRANCHES, TRACE_KMEM_ALLOC, TRACE_KMEM_FREE, - TRACE_POWER, TRACE_BLK, TRACE_KSYM, __TRACE_LAST_TYPE, }; -/* - * Function trace entry - function address and parent function addres: - */ -struct ftrace_entry { - struct trace_entry ent; - unsigned long ip; - unsigned long parent_ip; -}; - -/* Function call entry */ -struct ftrace_graph_ent_entry { - struct trace_entry ent; - struct ftrace_graph_ent graph_ent; +enum kmemtrace_type_id { + KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ + KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ + KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ }; -/* Function return entry */ -struct ftrace_graph_ret_entry { - struct trace_entry ent; - struct ftrace_graph_ret ret; -}; extern struct tracer boot_tracer; -/* - * Context switch trace entry - which task (and prio) we switched from/to: - */ -struct ctx_switch_entry { - struct trace_entry ent; - unsigned int prev_pid; - unsigned char prev_prio; - unsigned char prev_state; - unsigned int next_pid; - unsigned char next_prio; - unsigned char next_state; - unsigned int next_cpu; -}; - -/* - * Special (free-form) trace entry: - */ -struct special_entry { - struct trace_entry ent; - unsigned long arg1; - unsigned long arg2; - unsigned long arg3; -}; - -/* - * Stack-trace entry: - */ - -#define FTRACE_STACK_ENTRIES 8 - -struct stack_entry { - struct trace_entry ent; - unsigned long caller[FTRACE_STACK_ENTRIES]; -}; - -struct userstack_entry { - struct trace_entry ent; - unsigned long caller[FTRACE_STACK_ENTRIES]; -}; - -/* - * trace_printk entry: - */ -struct bprint_entry { - struct trace_entry ent; - unsigned long ip; - const char *fmt; - u32 buf[]; -}; - -struct print_entry { - struct trace_entry ent; - unsigned long ip; - char buf[]; -}; - -#define TRACE_OLD_SIZE 88 - -struct trace_field_cont { - unsigned char type; - /* Temporary till we get rid of this completely */ - char buf[TRACE_OLD_SIZE - 1]; -}; +#undef __field +#define __field(type, item) type item; -struct trace_mmiotrace_rw { - struct trace_entry ent; - struct mmiotrace_rw rw; -}; +#undef __field_struct +#define __field_struct(type, item) __field(type, item) -struct trace_mmiotrace_map { - struct trace_entry ent; - struct mmiotrace_map map; -}; +#undef __field_desc +#define __field_desc(type, container, item) -struct trace_boot_call { - struct trace_entry ent; - struct boot_trace_call boot_call; -}; +#undef __array +#define __array(type, item, size) type item[size]; -struct trace_boot_ret { - struct trace_entry ent; - struct boot_trace_ret boot_ret; -}; +#undef __array_desc +#define __array_desc(type, container, item, size) -#define TRACE_FUNC_SIZE 30 -#define TRACE_FILE_SIZE 20 -struct trace_branch { - struct trace_entry ent; - unsigned line; - char func[TRACE_FUNC_SIZE+1]; - char file[TRACE_FILE_SIZE+1]; - char correct; -}; +#undef __dynamic_array +#define __dynamic_array(type, item) type item[]; -struct hw_branch_entry { - struct trace_entry ent; - u64 from; - u64 to; -}; +#undef F_STRUCT +#define F_STRUCT(args...) args -struct trace_power { - struct trace_entry ent; - struct power_trace state_data; -}; +#undef FTRACE_ENTRY +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ + struct struct_name { \ + struct trace_entry ent; \ + tstruct \ + } -enum kmemtrace_type_id { - KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ - KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ - KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ -}; +#undef TP_ARGS +#define TP_ARGS(args...) args -struct kmemtrace_alloc_entry { - struct trace_entry ent; - enum kmemtrace_type_id type_id; - unsigned long call_site; - const void *ptr; - size_t bytes_req; - size_t bytes_alloc; - gfp_t gfp_flags; - int node; -}; +#undef FTRACE_ENTRY_DUP +#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk) -struct kmemtrace_free_entry { - struct trace_entry ent; - enum kmemtrace_type_id type_id; - unsigned long call_site; - const void *ptr; -}; +#include "trace_entries.h" +/* + * syscalls are special, and need special handling, this is why + * they are not included in trace_entries.h + */ struct syscall_trace_enter { struct trace_entry ent; int nr; @@ -210,23 +106,12 @@ struct syscall_trace_exit { unsigned long ret; }; -#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy" -extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); - -struct ksym_trace_entry { - struct trace_entry ent; - unsigned long ip; - unsigned char type; - char ksym_name[KSYM_NAME_LEN]; - char cmd[TASK_COMM_LEN]; -}; - /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: * IRQS_OFF - interrupts were disabled * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags - * NEED_RESCED - reschedule is requested + * NEED_RESCHED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler */ @@ -325,7 +210,6 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ TRACE_GRAPH_RET); \ IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ - IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ TRACE_KMEM_ALLOC); \ IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ @@ -406,7 +290,6 @@ struct tracer { struct tracer *next; int print_max; struct tracer_flags *flags; - struct tracer_stat *stats; }; @@ -485,6 +368,10 @@ void tracing_stop_sched_switch_record(void); void tracing_start_sched_switch_record(void); int register_tracer(struct tracer *type); void unregister_tracer(struct tracer *type); +int is_tracing_stopped(void); + +#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy" +extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); extern unsigned long nsecs_to_usecs(unsigned long nsecs); @@ -525,20 +412,6 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags, extern cycle_t ftrace_now(int cpu); -#ifdef CONFIG_CONTEXT_SWITCH_TRACER -typedef void -(*tracer_switch_func_t)(void *private, - void *__rq, - struct task_struct *prev, - struct task_struct *next); - -struct tracer_switch_ops { - tracer_switch_func_t func; - void *private; - struct tracer_switch_ops *next; -}; -#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ - extern void trace_find_cmdline(int pid, char comm[]); #ifdef CONFIG_DYNAMIC_FTRACE @@ -621,10 +494,6 @@ static inline int ftrace_graph_addr(unsigned long addr) return 0; } #else -static inline int ftrace_trace_addr(unsigned long addr) -{ - return 1; -} static inline int ftrace_graph_addr(unsigned long addr) { return 1; @@ -638,12 +507,12 @@ print_graph_function(struct trace_iterator *iter) } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -extern struct pid *ftrace_pid_trace; +extern struct list_head ftrace_pids; #ifdef CONFIG_FUNCTION_TRACER static inline int ftrace_trace_task(struct task_struct *task) { - if (!ftrace_pid_trace) + if (list_empty(&ftrace_pids)) return 1; return test_tsk_trace_trace(task); @@ -656,6 +525,41 @@ static inline int ftrace_trace_task(struct task_struct *task) #endif /* + * struct trace_parser - servers for reading the user input separated by spaces + * @cont: set if the input is not complete - no final space char was found + * @buffer: holds the parsed user input + * @idx: user input lenght + * @size: buffer size + */ +struct trace_parser { + bool cont; + char *buffer; + unsigned idx; + unsigned size; +}; + +static inline bool trace_parser_loaded(struct trace_parser *parser) +{ + return (parser->idx != 0); +} + +static inline bool trace_parser_cont(struct trace_parser *parser) +{ + return parser->cont; +} + +static inline void trace_parser_clear(struct trace_parser *parser) +{ + parser->cont = false; + parser->idx = 0; +} + +extern int trace_parser_get_init(struct trace_parser *parser, int size); +extern void trace_parser_put(struct trace_parser *parser); +extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, + size_t cnt, loff_t *ppos); + +/* * trace_iterator_flags is an enumeration that defines bit * positions into trace_flags that controls the output. * @@ -790,7 +694,6 @@ struct event_filter { int n_preds; struct filter_pred **preds; char *filter_string; - bool no_reset; }; struct event_subsystem { @@ -802,22 +705,40 @@ struct event_subsystem { }; struct filter_pred; +struct regex; typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, int val1, int val2); -struct filter_pred { - filter_pred_fn_t fn; - u64 val; - char str_val[MAX_FILTER_STR_VAL]; - int str_len; - char *field_name; - int offset; - int not; - int op; - int pop_n; +typedef int (*regex_match_func)(char *str, struct regex *r, int len); + +enum regex_type { + MATCH_FULL = 0, + MATCH_FRONT_ONLY, + MATCH_MIDDLE_ONLY, + MATCH_END_ONLY, +}; + +struct regex { + char pattern[MAX_FILTER_STR_VAL]; + int len; + int field_len; + regex_match_func match; }; +struct filter_pred { + filter_pred_fn_t fn; + u64 val; + struct regex regex; + char *field_name; + int offset; + int not; + int op; + int pop_n; +}; + +extern enum regex_type +filter_parse_regex(char *buff, int len, char **search, int *not); extern void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s); extern int apply_event_filter(struct ftrace_event_call *call, @@ -833,7 +754,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event) { - if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { + if (unlikely(call->filter_active) && + !filter_match_preds(call->filter, rec)) { ring_buffer_discard_commit(buffer, event); return 1; } @@ -841,58 +763,18 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, return 0; } -#define DEFINE_COMPARISON_PRED(type) \ -static int filter_pred_##type(struct filter_pred *pred, void *event, \ - int val1, int val2) \ -{ \ - type *addr = (type *)(event + pred->offset); \ - type val = (type)pred->val; \ - int match = 0; \ - \ - switch (pred->op) { \ - case OP_LT: \ - match = (*addr < val); \ - break; \ - case OP_LE: \ - match = (*addr <= val); \ - break; \ - case OP_GT: \ - match = (*addr > val); \ - break; \ - case OP_GE: \ - match = (*addr >= val); \ - break; \ - default: \ - break; \ - } \ - \ - return match; \ -} - -#define DEFINE_EQUALITY_PRED(size) \ -static int filter_pred_##size(struct filter_pred *pred, void *event, \ - int val1, int val2) \ -{ \ - u##size *addr = (u##size *)(event + pred->offset); \ - u##size val = (u##size)pred->val; \ - int match; \ - \ - match = (val == *addr) ^ pred->not; \ - \ - return match; \ -} - extern struct mutex event_mutex; extern struct list_head ftrace_events; extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +#undef FTRACE_ENTRY +#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ extern struct ftrace_event_call event_##call; -#undef TRACE_EVENT_FORMAT_NOFILTER -#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt) -#include "trace_event_types.h" +#undef FTRACE_ENTRY_DUP +#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ + FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) +#include "trace_entries.h" #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 19bfc75..c21d5f3 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -129,6 +129,7 @@ struct tracer boot_tracer __read_mostly = void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { + struct ftrace_event_call *call = &event_boot_call; struct ring_buffer_event *event; struct ring_buffer *buffer; struct trace_boot_call *entry; @@ -150,13 +151,15 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) goto out; entry = ring_buffer_event_data(event); entry->boot_call = *bt; - trace_buffer_unlock_commit(buffer, event, 0, 0); + if (!filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit(buffer, event, 0, 0); out: preempt_enable(); } void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { + struct ftrace_event_call *call = &event_boot_ret; struct ring_buffer_event *event; struct ring_buffer *buffer; struct trace_boot_ret *entry; @@ -175,7 +178,8 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) goto out; entry = ring_buffer_event_data(event); entry->boot_ret = *bt; - trace_buffer_unlock_commit(buffer, event, 0, 0); + if (!filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit(buffer, event, 0, 0); out: preempt_enable(); } diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 7a7a9fd..4a194f0 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -34,6 +34,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) struct trace_array *tr = branch_tracer; struct ring_buffer_event *event; struct trace_branch *entry; + struct ring_buffer *buffer; unsigned long flags; int cpu, pc; const char *p; @@ -54,7 +55,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) goto out; pc = preempt_count(); - event = trace_buffer_lock_reserve(tr, TRACE_BRANCH, + buffer = tr->buffer; + event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH, sizeof(*entry), flags, pc); if (!event) goto out; @@ -74,8 +76,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) entry->line = f->line; entry->correct = val == expect; - if (!filter_check_discard(call, entry, tr->buffer, event)) - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, buffer, event)) + ring_buffer_unlock_commit(buffer, event); out: atomic_dec(&tr->data[cpu]->disabled); diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index b588fd8..20c5f92 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -66,10 +66,14 @@ u64 notrace trace_clock(void) * Used by plugins that need globally coherent timestamps. */ -static u64 prev_trace_clock_time; - -static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp = - (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; +/* keep prev_time and lock in the same cacheline. */ +static struct { + u64 prev_time; + raw_spinlock_t lock; +} trace_clock_struct ____cacheline_aligned_in_smp = + { + .lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED, + }; u64 notrace trace_clock_global(void) { @@ -88,19 +92,19 @@ u64 notrace trace_clock_global(void) if (unlikely(in_nmi())) goto out; - __raw_spin_lock(&trace_clock_lock); + __raw_spin_lock(&trace_clock_struct.lock); /* * TODO: if this happens often then maybe we should reset - * my_scd->clock to prev_trace_clock_time+1, to make sure + * my_scd->clock to prev_time+1, to make sure * we start ticking with the local clock from now on? */ - if ((s64)(now - prev_trace_clock_time) < 0) - now = prev_trace_clock_time + 1; + if ((s64)(now - trace_clock_struct.prev_time) < 0) + now = trace_clock_struct.prev_time + 1; - prev_trace_clock_time = now; + trace_clock_struct.prev_time = now; - __raw_spin_unlock(&trace_clock_lock); + __raw_spin_unlock(&trace_clock_struct.lock); out: raw_local_irq_restore(flags); diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h new file mode 100644 index 0000000..e19747d --- /dev/null +++ b/kernel/trace/trace_entries.h @@ -0,0 +1,382 @@ +/* + * This file defines the trace event structures that go into the ring + * buffer directly. They are created via macros so that changes for them + * appear in the format file. Using macros will automate this process. + * + * The macro used to create a ftrace data structure is: + * + * FTRACE_ENTRY( name, struct_name, id, structure, print ) + * + * @name: the name used the event name, as well as the name of + * the directory that holds the format file. + * + * @struct_name: the name of the structure that is created. + * + * @id: The event identifier that is used to detect what event + * this is from the ring buffer. + * + * @structure: the structure layout + * + * - __field( type, item ) + * This is equivalent to declaring + * type item; + * in the structure. + * - __array( type, item, size ) + * This is equivalent to declaring + * type item[size]; + * in the structure. + * + * * for structures within structures, the format of the internal + * structure is layed out. This allows the internal structure + * to be deciphered for the format file. Although these macros + * may become out of sync with the internal structure, they + * will create a compile error if it happens. Since the + * internel structures are just tracing helpers, this is not + * an issue. + * + * When an internal structure is used, it should use: + * + * __field_struct( type, item ) + * + * instead of __field. This will prevent it from being shown in + * the output file. The fields in the structure should use. + * + * __field_desc( type, container, item ) + * __array_desc( type, container, item, len ) + * + * type, item and len are the same as __field and __array, but + * container is added. This is the name of the item in + * __field_struct that this is describing. + * + * + * @print: the print format shown to users in the format file. + */ + +/* + * Function trace entry - function address and parent function addres: + */ +FTRACE_ENTRY(function, ftrace_entry, + + TRACE_FN, + + F_STRUCT( + __field( unsigned long, ip ) + __field( unsigned long, parent_ip ) + ), + + F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip) +); + +/* Function call entry */ +FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry, + + TRACE_GRAPH_ENT, + + F_STRUCT( + __field_struct( struct ftrace_graph_ent, graph_ent ) + __field_desc( unsigned long, graph_ent, func ) + __field_desc( int, graph_ent, depth ) + ), + + F_printk("--> %lx (%d)", __entry->func, __entry->depth) +); + +/* Function return entry */ +FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry, + + TRACE_GRAPH_RET, + + F_STRUCT( + __field_struct( struct ftrace_graph_ret, ret ) + __field_desc( unsigned long, ret, func ) + __field_desc( unsigned long long, ret, calltime) + __field_desc( unsigned long long, ret, rettime ) + __field_desc( unsigned long, ret, overrun ) + __field_desc( int, ret, depth ) + ), + + F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d", + __entry->func, __entry->depth, + __entry->calltime, __entry->rettime, + __entry->depth) +); + +/* + * Context switch trace entry - which task (and prio) we switched from/to: + * + * This is used for both wakeup and context switches. We only want + * to create one structure, but we need two outputs for it. + */ +#define FTRACE_CTX_FIELDS \ + __field( unsigned int, prev_pid ) \ + __field( unsigned char, prev_prio ) \ + __field( unsigned char, prev_state ) \ + __field( unsigned int, next_pid ) \ + __field( unsigned char, next_prio ) \ + __field( unsigned char, next_state ) \ + __field( unsigned int, next_cpu ) + +FTRACE_ENTRY(context_switch, ctx_switch_entry, + + TRACE_CTX, + + F_STRUCT( + FTRACE_CTX_FIELDS + ), + + F_printk("%u:%u:%u ==> %u:%u:%u [%03u]", + __entry->prev_pid, __entry->prev_prio, __entry->prev_state, + __entry->next_pid, __entry->next_prio, __entry->next_state, + __entry->next_cpu + ) +); + +/* + * FTRACE_ENTRY_DUP only creates the format file, it will not + * create another structure. + */ +FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry, + + TRACE_WAKE, + + F_STRUCT( + FTRACE_CTX_FIELDS + ), + + F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]", + __entry->prev_pid, __entry->prev_prio, __entry->prev_state, + __entry->next_pid, __entry->next_prio, __entry->next_state, + __entry->next_cpu + ) +); + +/* + * Special (free-form) trace entry: + */ +FTRACE_ENTRY(special, special_entry, + + TRACE_SPECIAL, + + F_STRUCT( + __field( unsigned long, arg1 ) + __field( unsigned long, arg2 ) + __field( unsigned long, arg3 ) + ), + + F_printk("(%08lx) (%08lx) (%08lx)", + __entry->arg1, __entry->arg2, __entry->arg3) +); + +/* + * Stack-trace entry: + */ + +#define FTRACE_STACK_ENTRIES 8 + +FTRACE_ENTRY(kernel_stack, stack_entry, + + TRACE_STACK, + + F_STRUCT( + __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) + ), + + F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" + "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", + __entry->caller[0], __entry->caller[1], __entry->caller[2], + __entry->caller[3], __entry->caller[4], __entry->caller[5], + __entry->caller[6], __entry->caller[7]) +); + +FTRACE_ENTRY(user_stack, userstack_entry, + + TRACE_USER_STACK, + + F_STRUCT( + __field( unsigned int, tgid ) + __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) + ), + + F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" + "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", + __entry->caller[0], __entry->caller[1], __entry->caller[2], + __entry->caller[3], __entry->caller[4], __entry->caller[5], + __entry->caller[6], __entry->caller[7]) +); + +/* + * trace_printk entry: + */ +FTRACE_ENTRY(bprint, bprint_entry, + + TRACE_BPRINT, + + F_STRUCT( + __field( unsigned long, ip ) + __field( const char *, fmt ) + __dynamic_array( u32, buf ) + ), + + F_printk("%08lx fmt:%p", + __entry->ip, __entry->fmt) +); + +FTRACE_ENTRY(print, print_entry, + + TRACE_PRINT, + + F_STRUCT( + __field( unsigned long, ip ) + __dynamic_array( char, buf ) + ), + + F_printk("%08lx %s", + __entry->ip, __entry->buf) +); + +FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, + + TRACE_MMIO_RW, + + F_STRUCT( + __field_struct( struct mmiotrace_rw, rw ) + __field_desc( resource_size_t, rw, phys ) + __field_desc( unsigned long, rw, value ) + __field_desc( unsigned long, rw, pc ) + __field_desc( int, rw, map_id ) + __field_desc( unsigned char, rw, opcode ) + __field_desc( unsigned char, rw, width ) + ), + + F_printk("%lx %lx %lx %d %x %x", + (unsigned long)__entry->phys, __entry->value, __entry->pc, + __entry->map_id, __entry->opcode, __entry->width) +); + +FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, + + TRACE_MMIO_MAP, + + F_STRUCT( + __field_struct( struct mmiotrace_map, map ) + __field_desc( resource_size_t, map, phys ) + __field_desc( unsigned long, map, virt ) + __field_desc( unsigned long, map, len ) + __field_desc( int, map, map_id ) + __field_desc( unsigned char, map, opcode ) + ), + + F_printk("%lx %lx %lx %d %x", + (unsigned long)__entry->phys, __entry->virt, __entry->len, + __entry->map_id, __entry->opcode) +); + +FTRACE_ENTRY(boot_call, trace_boot_call, + + TRACE_BOOT_CALL, + + F_STRUCT( + __field_struct( struct boot_trace_call, boot_call ) + __field_desc( pid_t, boot_call, caller ) + __array_desc( char, boot_call, func, KSYM_SYMBOL_LEN) + ), + + F_printk("%d %s", __entry->caller, __entry->func) +); + +FTRACE_ENTRY(boot_ret, trace_boot_ret, + + TRACE_BOOT_RET, + + F_STRUCT( + __field_struct( struct boot_trace_ret, boot_ret ) + __array_desc( char, boot_ret, func, KSYM_SYMBOL_LEN) + __field_desc( int, boot_ret, result ) + __field_desc( unsigned long, boot_ret, duration ) + ), + + F_printk("%s %d %lx", + __entry->func, __entry->result, __entry->duration) +); + +#define TRACE_FUNC_SIZE 30 +#define TRACE_FILE_SIZE 20 + +FTRACE_ENTRY(branch, trace_branch, + + TRACE_BRANCH, + + F_STRUCT( + __field( unsigned int, line ) + __array( char, func, TRACE_FUNC_SIZE+1 ) + __array( char, file, TRACE_FILE_SIZE+1 ) + __field( char, correct ) + ), + + F_printk("%u:%s:%s (%u)", + __entry->line, + __entry->func, __entry->file, __entry->correct) +); + +FTRACE_ENTRY(hw_branch, hw_branch_entry, + + TRACE_HW_BRANCHES, + + F_STRUCT( + __field( u64, from ) + __field( u64, to ) + ), + + F_printk("from: %llx to: %llx", __entry->from, __entry->to) +); + +FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry, + + TRACE_KMEM_ALLOC, + + F_STRUCT( + __field( enum kmemtrace_type_id, type_id ) + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi" + " flags:%x node:%d", + __entry->type_id, __entry->call_site, __entry->ptr, + __entry->bytes_req, __entry->bytes_alloc, + __entry->gfp_flags, __entry->node) +); + +FTRACE_ENTRY(kmem_free, kmemtrace_free_entry, + + TRACE_KMEM_FREE, + + F_STRUCT( + __field( enum kmemtrace_type_id, type_id ) + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + F_printk("type:%u call_site:%lx ptr:%p", + __entry->type_id, __entry->call_site, __entry->ptr) +); + +FTRACE_ENTRY(ksym_trace, ksym_trace_entry, + + TRACE_KSYM, + + F_STRUCT( + __field( unsigned long, ip ) + __field( unsigned char, type ) + __array( char , ksym_name, KSYM_NAME_LEN ) + __array( char , cmd, TASK_COMM_LEN ) + ), + + F_printk("ip: %pF type: %d ksym_name: %s cmd: %s", + (void *)__entry->ip, (unsigned int)__entry->type, + __entry->ksym_name, __entry->cmd) +); diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 11ba5bb..8d5c171 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -5,8 +5,65 @@ * */ +#include <linux/module.h> #include "trace.h" +/* + * We can't use a size but a type in alloc_percpu() + * So let's create a dummy type that matches the desired size + */ +typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t; + +char *trace_profile_buf; +EXPORT_SYMBOL_GPL(trace_profile_buf); + +char *trace_profile_buf_nmi; +EXPORT_SYMBOL_GPL(trace_profile_buf_nmi); + +/* Count the events in use (per event id, not per instance) */ +static int total_profile_count; + +static int ftrace_profile_enable_event(struct ftrace_event_call *event) +{ + char *buf; + int ret = -ENOMEM; + + if (atomic_inc_return(&event->profile_count)) + return 0; + + if (!total_profile_count) { + buf = (char *)alloc_percpu(profile_buf_t); + if (!buf) + goto fail_buf; + + rcu_assign_pointer(trace_profile_buf, buf); + + buf = (char *)alloc_percpu(profile_buf_t); + if (!buf) + goto fail_buf_nmi; + + rcu_assign_pointer(trace_profile_buf_nmi, buf); + } + + ret = event->profile_enable(); + if (!ret) { + total_profile_count++; + return 0; + } + +fail_buf_nmi: + if (!total_profile_count) { + free_percpu(trace_profile_buf_nmi); + free_percpu(trace_profile_buf); + trace_profile_buf_nmi = NULL; + trace_profile_buf = NULL; + } +fail_buf: + atomic_dec(&event->profile_count); + + return ret; +} + int ftrace_profile_enable(int event_id) { struct ftrace_event_call *event; @@ -14,8 +71,9 @@ int ftrace_profile_enable(int event_id) mutex_lock(&event_mutex); list_for_each_entry(event, &ftrace_events, list) { - if (event->id == event_id && event->profile_enable) { - ret = event->profile_enable(event); + if (event->id == event_id && event->profile_enable && + try_module_get(event->mod)) { + ret = ftrace_profile_enable_event(event); break; } } @@ -24,6 +82,33 @@ int ftrace_profile_enable(int event_id) return ret; } +static void ftrace_profile_disable_event(struct ftrace_event_call *event) +{ + char *buf, *nmi_buf; + + if (!atomic_add_negative(-1, &event->profile_count)) + return; + + event->profile_disable(); + + if (!--total_profile_count) { + buf = trace_profile_buf; + rcu_assign_pointer(trace_profile_buf, NULL); + + nmi_buf = trace_profile_buf_nmi; + rcu_assign_pointer(trace_profile_buf_nmi, NULL); + + /* + * Ensure every events in profiling have finished before + * releasing the buffers + */ + synchronize_sched(); + + free_percpu(buf); + free_percpu(nmi_buf); + } +} + void ftrace_profile_disable(int event_id) { struct ftrace_event_call *event; @@ -31,7 +116,8 @@ void ftrace_profile_disable(int event_id) mutex_lock(&event_mutex); list_for_each_entry(event, &ftrace_events, list) { if (event->id == event_id) { - event->profile_disable(event); + ftrace_profile_disable_event(event); + module_put(event->mod); break; } } diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h deleted file mode 100644 index 6db005e..0000000 --- a/kernel/trace/trace_event_types.h +++ /dev/null @@ -1,178 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM ftrace - -/* - * We cheat and use the proto type field as the ID - * and args as the entry type (minus 'struct') - */ -TRACE_EVENT_FORMAT(function, TRACE_FN, ftrace_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, ip, ip) - TRACE_FIELD(unsigned long, parent_ip, parent_ip) - ), - TP_RAW_FMT(" %lx <-- %lx") -); - -TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT, - ftrace_graph_ent_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, graph_ent.func, func) - TRACE_FIELD(int, graph_ent.depth, depth) - ), - TP_RAW_FMT("--> %lx (%d)") -); - -TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET, - ftrace_graph_ret_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, ret.func, func) - TRACE_FIELD(unsigned long long, ret.calltime, calltime) - TRACE_FIELD(unsigned long long, ret.rettime, rettime) - TRACE_FIELD(unsigned long, ret.overrun, overrun) - TRACE_FIELD(int, ret.depth, depth) - ), - TP_RAW_FMT("<-- %lx (%d)") -); - -TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned int, prev_pid, prev_pid) - TRACE_FIELD(unsigned char, prev_prio, prev_prio) - TRACE_FIELD(unsigned char, prev_state, prev_state) - TRACE_FIELD(unsigned int, next_pid, next_pid) - TRACE_FIELD(unsigned char, next_prio, next_prio) - TRACE_FIELD(unsigned char, next_state, next_state) - TRACE_FIELD(unsigned int, next_cpu, next_cpu) - ), - TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") -); - -TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned int, prev_pid, prev_pid) - TRACE_FIELD(unsigned char, prev_prio, prev_prio) - TRACE_FIELD(unsigned char, prev_state, prev_state) - TRACE_FIELD(unsigned int, next_pid, next_pid) - TRACE_FIELD(unsigned char, next_prio, next_prio) - TRACE_FIELD(unsigned char, next_state, next_state) - TRACE_FIELD(unsigned int, next_cpu, next_cpu) - ), - TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") -); - -TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, arg1, arg1) - TRACE_FIELD(unsigned long, arg2, arg2) - TRACE_FIELD(unsigned long, arg3, arg3) - ), - TP_RAW_FMT("(%08lx) (%08lx) (%08lx)") -); - -/* - * Stack-trace entry: - */ - -/* #define FTRACE_STACK_ENTRIES 8 */ - -TRACE_EVENT_FORMAT(kernel_stack, TRACE_STACK, stack_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, caller[0], stack0) - TRACE_FIELD(unsigned long, caller[1], stack1) - TRACE_FIELD(unsigned long, caller[2], stack2) - TRACE_FIELD(unsigned long, caller[3], stack3) - TRACE_FIELD(unsigned long, caller[4], stack4) - TRACE_FIELD(unsigned long, caller[5], stack5) - TRACE_FIELD(unsigned long, caller[6], stack6) - TRACE_FIELD(unsigned long, caller[7], stack7) - ), - TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" - "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") -); - -TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, caller[0], stack0) - TRACE_FIELD(unsigned long, caller[1], stack1) - TRACE_FIELD(unsigned long, caller[2], stack2) - TRACE_FIELD(unsigned long, caller[3], stack3) - TRACE_FIELD(unsigned long, caller[4], stack4) - TRACE_FIELD(unsigned long, caller[5], stack5) - TRACE_FIELD(unsigned long, caller[6], stack6) - TRACE_FIELD(unsigned long, caller[7], stack7) - ), - TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" - "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") -); - -TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, ip, ip) - TRACE_FIELD(char *, fmt, fmt) - TRACE_FIELD_ZERO_CHAR(buf) - ), - TP_RAW_FMT("%08lx (%d) fmt:%p %s") -); - -TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, ip, ip) - TRACE_FIELD_ZERO_CHAR(buf) - ), - TP_RAW_FMT("%08lx (%d) fmt:%p %s") -); - -TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned int, line, line) - TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, - TRACE_FUNC_SIZE+1, func) - TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, - TRACE_FUNC_SIZE+1, file) - TRACE_FIELD(char, correct, correct) - ), - TP_RAW_FMT("%u:%s:%s (%u)") -); - -TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(u64, from, from) - TRACE_FIELD(u64, to, to) - ), - TP_RAW_FMT("from: %llx to: %llx") -); - -TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore, - TRACE_STRUCT( - TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1) - TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1) - TRACE_FIELD(int, state_data.type, type) - TRACE_FIELD(int, state_data.state, state) - ), - TP_RAW_FMT("%llx->%llx type:%u state:%u") -); - -TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id) - TRACE_FIELD(unsigned long, call_site, call_site) - TRACE_FIELD(const void *, ptr, ptr) - TRACE_FIELD(size_t, bytes_req, bytes_req) - TRACE_FIELD(size_t, bytes_alloc, bytes_alloc) - TRACE_FIELD(gfp_t, gfp_flags, gfp_flags) - TRACE_FIELD(int, node, node) - ), - TP_RAW_FMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu" - " flags:%x node:%d") -); - -TRACE_EVENT_FORMAT(kmem_free, TRACE_KMEM_FREE, kmemtrace_free_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id) - TRACE_FIELD(unsigned long, call_site, call_site) - TRACE_FIELD(const void *, ptr, ptr) - ), - TP_RAW_FMT("type:%u call_site:%lx ptr:%p") -); - -#undef TRACE_SYSTEM diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 78b1ed2..7c18d15 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -21,6 +21,7 @@ #include "trace_output.h" +#undef TRACE_SYSTEM #define TRACE_SYSTEM "TRACE_SYSTEM" DEFINE_MUTEX(event_mutex); @@ -86,7 +87,7 @@ int trace_define_common_fields(struct ftrace_event_call *call) __common_field(unsigned char, flags); __common_field(unsigned char, preempt_count); __common_field(int, pid); - __common_field(int, tgid); + __common_field(int, lock_depth); return ret; } @@ -230,73 +231,38 @@ static ssize_t ftrace_event_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { - size_t read = 0; - int i, set = 1; - ssize_t ret; - char *buf; - char ch; + struct trace_parser parser; + ssize_t read, ret; - if (!cnt || cnt < 0) + if (!cnt) return 0; ret = tracing_update_buffers(); if (ret < 0) return ret; - ret = get_user(ch, ubuf++); - if (ret) - return ret; - read++; - cnt--; - - /* skip white space */ - while (cnt && isspace(ch)) { - ret = get_user(ch, ubuf++); - if (ret) - return ret; - read++; - cnt--; - } - - /* Only white space found? */ - if (isspace(ch)) { - file->f_pos += read; - ret = read; - return ret; - } - - buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL); - if (!buf) + if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1)) return -ENOMEM; - if (cnt > EVENT_BUF_SIZE) - cnt = EVENT_BUF_SIZE; + read = trace_get_user(&parser, ubuf, cnt, ppos); + + if (read >= 0 && trace_parser_loaded((&parser))) { + int set = 1; - i = 0; - while (cnt && !isspace(ch)) { - if (!i && ch == '!') + if (*parser.buffer == '!') set = 0; - else - buf[i++] = ch; - ret = get_user(ch, ubuf++); + parser.buffer[parser.idx] = 0; + + ret = ftrace_set_clr_event(parser.buffer + !set, set); if (ret) - goto out_free; - read++; - cnt--; + goto out_put; } - buf[i] = 0; - - file->f_pos += read; - - ret = ftrace_set_clr_event(buf, set); - if (ret) - goto out_free; ret = read; - out_free: - kfree(buf); + out_put: + trace_parser_put(&parser); return ret; } @@ -304,42 +270,32 @@ ftrace_event_write(struct file *file, const char __user *ubuf, static void * t_next(struct seq_file *m, void *v, loff_t *pos) { - struct list_head *list = m->private; - struct ftrace_event_call *call; + struct ftrace_event_call *call = v; (*pos)++; - for (;;) { - if (list == &ftrace_events) - return NULL; - - call = list_entry(list, struct ftrace_event_call, list); - + list_for_each_entry_continue(call, &ftrace_events, list) { /* * The ftrace subsystem is for showing formats only. * They can not be enabled or disabled via the event files. */ if (call->regfunc) - break; - - list = list->next; + return call; } - m->private = list->next; - - return call; + return NULL; } static void *t_start(struct seq_file *m, loff_t *pos) { - struct ftrace_event_call *call = NULL; + struct ftrace_event_call *call; loff_t l; mutex_lock(&event_mutex); - m->private = ftrace_events.next; + call = list_entry(&ftrace_events, struct ftrace_event_call, list); for (l = 0; l <= *pos; ) { - call = t_next(m, NULL, &l); + call = t_next(m, call, &l); if (!call) break; } @@ -349,37 +305,28 @@ static void *t_start(struct seq_file *m, loff_t *pos) static void * s_next(struct seq_file *m, void *v, loff_t *pos) { - struct list_head *list = m->private; - struct ftrace_event_call *call; + struct ftrace_event_call *call = v; (*pos)++; - retry: - if (list == &ftrace_events) - return NULL; - - call = list_entry(list, struct ftrace_event_call, list); - - if (!call->enabled) { - list = list->next; - goto retry; + list_for_each_entry_continue(call, &ftrace_events, list) { + if (call->enabled) + return call; } - m->private = list->next; - - return call; + return NULL; } static void *s_start(struct seq_file *m, loff_t *pos) { - struct ftrace_event_call *call = NULL; + struct ftrace_event_call *call; loff_t l; mutex_lock(&event_mutex); - m->private = ftrace_events.next; + call = list_entry(&ftrace_events, struct ftrace_event_call, list); for (l = 0; l <= *pos; ) { - call = s_next(m, NULL, &l); + call = s_next(m, call, &l); if (!call) break; } @@ -560,7 +507,7 @@ extern char *__bad_type_size(void); #define FIELD(type, name) \ sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ #type, "common_" #name, offsetof(typeof(field), name), \ - sizeof(field.name) + sizeof(field.name), is_signed_type(type) static int trace_write_header(struct trace_seq *s) { @@ -568,17 +515,17 @@ static int trace_write_header(struct trace_seq *s) /* struct trace_entry */ return trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\n", - FIELD(unsigned short, type), - FIELD(unsigned char, flags), - FIELD(unsigned char, preempt_count), - FIELD(int, pid), - FIELD(int, tgid)); + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\n", + FIELD(unsigned short, type), + FIELD(unsigned char, flags), + FIELD(unsigned char, preempt_count), + FIELD(int, pid), + FIELD(int, lock_depth)); } static ssize_t @@ -931,9 +878,9 @@ event_subsystem_dir(const char *name, struct dentry *d_events) "'%s/filter' entry\n", name); } - entry = trace_create_file("enable", 0644, system->entry, - (void *)system->name, - &ftrace_system_enable_fops); + trace_create_file("enable", 0644, system->entry, + (void *)system->name, + &ftrace_system_enable_fops); return system->entry; } @@ -945,7 +892,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, const struct file_operations *filter, const struct file_operations *format) { - struct dentry *entry; int ret; /* @@ -963,12 +909,12 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, } if (call->regfunc) - entry = trace_create_file("enable", 0644, call->dir, call, - enable); + trace_create_file("enable", 0644, call->dir, call, + enable); if (call->id && call->profile_enable) - entry = trace_create_file("id", 0444, call->dir, call, - id); + trace_create_file("id", 0444, call->dir, call, + id); if (call->define_fields) { ret = call->define_fields(call); @@ -977,16 +923,16 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, " events/%s\n", call->name); return ret; } - entry = trace_create_file("filter", 0644, call->dir, call, - filter); + trace_create_file("filter", 0644, call->dir, call, + filter); } /* A trace may not want to export its format */ if (!call->show_format) return 0; - entry = trace_create_file("format", 0444, call->dir, call, - format); + trace_create_file("format", 0444, call->dir, call, + format); return 0; } @@ -1187,7 +1133,7 @@ static int trace_module_notify(struct notifier_block *self, } #endif /* CONFIG_MODULES */ -struct notifier_block trace_module_nb = { +static struct notifier_block trace_module_nb = { .notifier_call = trace_module_notify, .priority = 0, }; @@ -1359,6 +1305,18 @@ static __init void event_trace_self_tests(void) if (!call->regfunc) continue; +/* + * Testing syscall events here is pretty useless, but + * we still do it if configured. But this is time consuming. + * What we really need is a user thread to perform the + * syscalls as we test. + */ +#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS + if (call->system && + strcmp(call->system, "syscalls") == 0) + continue; +#endif + pr_info("Testing event %s: ", call->name); /* @@ -1432,7 +1390,7 @@ static __init void event_trace_self_tests(void) #ifdef CONFIG_FUNCTION_TRACER -static DEFINE_PER_CPU(atomic_t, test_event_disable); +static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); static void function_test_events_call(unsigned long ip, unsigned long parent_ip) @@ -1449,7 +1407,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip) pc = preempt_count(); resched = ftrace_preempt_disable(); cpu = raw_smp_processor_id(); - disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu)); + disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); if (disabled != 1) goto out; @@ -1468,7 +1426,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip) trace_nowake_buffer_unlock_commit(buffer, event, flags, pc); out: - atomic_dec(&per_cpu(test_event_disable, cpu)); + atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); ftrace_preempt_enable(resched); } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 93660fb..21d3475 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -18,11 +18,10 @@ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> */ -#include <linux/debugfs.h> -#include <linux/uaccess.h> #include <linux/module.h> #include <linux/ctype.h> #include <linux/mutex.h> +#include <linux/perf_event.h> #include "trace.h" #include "trace_output.h" @@ -31,6 +30,7 @@ enum filter_op_ids { OP_OR, OP_AND, + OP_GLOB, OP_NE, OP_EQ, OP_LT, @@ -48,16 +48,17 @@ struct filter_op { }; static struct filter_op filter_ops[] = { - { OP_OR, "||", 1 }, - { OP_AND, "&&", 2 }, - { OP_NE, "!=", 4 }, - { OP_EQ, "==", 4 }, - { OP_LT, "<", 5 }, - { OP_LE, "<=", 5 }, - { OP_GT, ">", 5 }, - { OP_GE, ">=", 5 }, - { OP_NONE, "OP_NONE", 0 }, - { OP_OPEN_PAREN, "(", 0 }, + { OP_OR, "||", 1 }, + { OP_AND, "&&", 2 }, + { OP_GLOB, "~", 4 }, + { OP_NE, "!=", 4 }, + { OP_EQ, "==", 4 }, + { OP_LT, "<", 5 }, + { OP_LE, "<=", 5 }, + { OP_GT, ">", 5 }, + { OP_GE, ">=", 5 }, + { OP_NONE, "OP_NONE", 0 }, + { OP_OPEN_PAREN, "(", 0 }, }; enum { @@ -121,6 +122,47 @@ struct filter_parse_state { } operand; }; +#define DEFINE_COMPARISON_PRED(type) \ +static int filter_pred_##type(struct filter_pred *pred, void *event, \ + int val1, int val2) \ +{ \ + type *addr = (type *)(event + pred->offset); \ + type val = (type)pred->val; \ + int match = 0; \ + \ + switch (pred->op) { \ + case OP_LT: \ + match = (*addr < val); \ + break; \ + case OP_LE: \ + match = (*addr <= val); \ + break; \ + case OP_GT: \ + match = (*addr > val); \ + break; \ + case OP_GE: \ + match = (*addr >= val); \ + break; \ + default: \ + break; \ + } \ + \ + return match; \ +} + +#define DEFINE_EQUALITY_PRED(size) \ +static int filter_pred_##size(struct filter_pred *pred, void *event, \ + int val1, int val2) \ +{ \ + u##size *addr = (u##size *)(event + pred->offset); \ + u##size val = (u##size)pred->val; \ + int match; \ + \ + match = (val == *addr) ^ pred->not; \ + \ + return match; \ +} + DEFINE_COMPARISON_PRED(s64); DEFINE_COMPARISON_PRED(u64); DEFINE_COMPARISON_PRED(s32); @@ -156,9 +198,9 @@ static int filter_pred_string(struct filter_pred *pred, void *event, char *addr = (char *)(event + pred->offset); int cmp, match; - cmp = strncmp(addr, pred->str_val, pred->str_len); + cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len); - match = (!cmp) ^ pred->not; + match = cmp ^ pred->not; return match; } @@ -170,9 +212,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event, char **addr = (char **)(event + pred->offset); int cmp, match; - cmp = strncmp(*addr, pred->str_val, pred->str_len); + cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len); - match = (!cmp) ^ pred->not; + match = cmp ^ pred->not; return match; } @@ -196,9 +238,9 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event, char *addr = (char *)(event + str_loc); int cmp, match; - cmp = strncmp(addr, pred->str_val, str_len); + cmp = pred->regex.match(addr, &pred->regex, str_len); - match = (!cmp) ^ pred->not; + match = cmp ^ pred->not; return match; } @@ -209,10 +251,121 @@ static int filter_pred_none(struct filter_pred *pred, void *event, return 0; } +/* Basic regex callbacks */ +static int regex_match_full(char *str, struct regex *r, int len) +{ + if (strncmp(str, r->pattern, len) == 0) + return 1; + return 0; +} + +static int regex_match_front(char *str, struct regex *r, int len) +{ + if (strncmp(str, r->pattern, len) == 0) + return 1; + return 0; +} + +static int regex_match_middle(char *str, struct regex *r, int len) +{ + if (strstr(str, r->pattern)) + return 1; + return 0; +} + +static int regex_match_end(char *str, struct regex *r, int len) +{ + char *ptr = strstr(str, r->pattern); + + if (ptr && (ptr[r->len] == 0)) + return 1; + return 0; +} + +/** + * filter_parse_regex - parse a basic regex + * @buff: the raw regex + * @len: length of the regex + * @search: will point to the beginning of the string to compare + * @not: tell whether the match will have to be inverted + * + * This passes in a buffer containing a regex and this function will + * set search to point to the search part of the buffer and + * return the type of search it is (see enum above). + * This does modify buff. + * + * Returns enum type. + * search returns the pointer to use for comparison. + * not returns 1 if buff started with a '!' + * 0 otherwise. + */ +enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not) +{ + int type = MATCH_FULL; + int i; + + if (buff[0] == '!') { + *not = 1; + buff++; + len--; + } else + *not = 0; + + *search = buff; + + for (i = 0; i < len; i++) { + if (buff[i] == '*') { + if (!i) { + *search = buff + 1; + type = MATCH_END_ONLY; + } else { + if (type == MATCH_END_ONLY) + type = MATCH_MIDDLE_ONLY; + else + type = MATCH_FRONT_ONLY; + buff[i] = 0; + break; + } + } + } + + return type; +} + +static void filter_build_regex(struct filter_pred *pred) +{ + struct regex *r = &pred->regex; + char *search; + enum regex_type type = MATCH_FULL; + int not = 0; + + if (pred->op == OP_GLOB) { + type = filter_parse_regex(r->pattern, r->len, &search, ¬); + r->len = strlen(search); + memmove(r->pattern, search, r->len+1); + } + + switch (type) { + case MATCH_FULL: + r->match = regex_match_full; + break; + case MATCH_FRONT_ONLY: + r->match = regex_match_front; + break; + case MATCH_MIDDLE_ONLY: + r->match = regex_match_middle; + break; + case MATCH_END_ONLY: + r->match = regex_match_end; + break; + } + + pred->not ^= not; +} + /* return 1 if event matches, 0 otherwise (discard) */ -int filter_match_preds(struct ftrace_event_call *call, void *rec) +int filter_match_preds(struct event_filter *filter, void *rec) { - struct event_filter *filter = call->filter; int match, top = 0, val1 = 0, val2 = 0; int stack[MAX_FILTER_PRED]; struct filter_pred *pred; @@ -355,7 +508,7 @@ static void filter_clear_pred(struct filter_pred *pred) { kfree(pred->field_name); pred->field_name = NULL; - pred->str_len = 0; + pred->regex.len = 0; } static int filter_set_pred(struct filter_pred *dest, @@ -385,9 +538,8 @@ static void filter_disable_preds(struct ftrace_event_call *call) filter->preds[i]->fn = filter_pred_none; } -void destroy_preds(struct ftrace_event_call *call) +static void __free_preds(struct event_filter *filter) { - struct event_filter *filter = call->filter; int i; if (!filter) @@ -400,21 +552,24 @@ void destroy_preds(struct ftrace_event_call *call) kfree(filter->preds); kfree(filter->filter_string); kfree(filter); +} + +void destroy_preds(struct ftrace_event_call *call) +{ + __free_preds(call->filter); call->filter = NULL; + call->filter_active = 0; } -static int init_preds(struct ftrace_event_call *call) +static struct event_filter *__alloc_preds(void) { struct event_filter *filter; struct filter_pred *pred; int i; - if (call->filter) - return 0; - - filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL); - if (!call->filter) - return -ENOMEM; + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return ERR_PTR(-ENOMEM); filter->n_preds = 0; @@ -430,12 +585,24 @@ static int init_preds(struct ftrace_event_call *call) filter->preds[i] = pred; } - return 0; + return filter; oom: - destroy_preds(call); + __free_preds(filter); + return ERR_PTR(-ENOMEM); +} - return -ENOMEM; +static int init_preds(struct ftrace_event_call *call) +{ + if (call->filter) + return 0; + + call->filter_active = 0; + call->filter = __alloc_preds(); + if (IS_ERR(call->filter)) + return PTR_ERR(call->filter); + + return 0; } static int init_subsystem_preds(struct event_subsystem *system) @@ -458,14 +625,7 @@ static int init_subsystem_preds(struct event_subsystem *system) return 0; } -enum { - FILTER_DISABLE_ALL, - FILTER_INIT_NO_RESET, - FILTER_SKIP_NO_RESET, -}; - -static void filter_free_subsystem_preds(struct event_subsystem *system, - int flag) +static void filter_free_subsystem_preds(struct event_subsystem *system) { struct ftrace_event_call *call; @@ -476,14 +636,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system, if (strcmp(call->system, system->name) != 0) continue; - if (flag == FILTER_INIT_NO_RESET) { - call->filter->no_reset = false; - continue; - } - - if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset) - continue; - filter_disable_preds(call); remove_filter_string(call->filter); } @@ -491,10 +643,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system, static int filter_add_pred_fn(struct filter_parse_state *ps, struct ftrace_event_call *call, + struct event_filter *filter, struct filter_pred *pred, filter_pred_fn_t fn) { - struct event_filter *filter = call->filter; int idx, err; if (filter->n_preds == MAX_FILTER_PRED) { @@ -509,7 +661,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps, return err; filter->n_preds++; - call->filter_active = 1; return 0; } @@ -534,7 +685,10 @@ static bool is_string_field(struct ftrace_event_field *field) static int is_legal_op(struct ftrace_event_field *field, int op) { - if (is_string_field(field) && (op != OP_EQ && op != OP_NE)) + if (is_string_field(field) && + (op != OP_EQ && op != OP_NE && op != OP_GLOB)) + return 0; + if (!is_string_field(field) && op == OP_GLOB) return 0; return 1; @@ -585,6 +739,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size, static int filter_add_pred(struct filter_parse_state *ps, struct ftrace_event_call *call, + struct event_filter *filter, struct filter_pred *pred, bool dry_run) { @@ -619,21 +774,22 @@ static int filter_add_pred(struct filter_parse_state *ps, } if (is_string_field(field)) { - pred->str_len = field->size; + filter_build_regex(pred); - if (field->filter_type == FILTER_STATIC_STRING) + if (field->filter_type == FILTER_STATIC_STRING) { fn = filter_pred_string; - else if (field->filter_type == FILTER_DYN_STRING) + pred->regex.field_len = field->size; + } else if (field->filter_type == FILTER_DYN_STRING) fn = filter_pred_strloc; else { fn = filter_pred_pchar; - pred->str_len = strlen(pred->str_val); + pred->regex.field_len = strlen(pred->regex.pattern); } } else { if (field->is_signed) - ret = strict_strtoll(pred->str_val, 0, &val); + ret = strict_strtoll(pred->regex.pattern, 0, &val); else - ret = strict_strtoull(pred->str_val, 0, &val); + ret = strict_strtoull(pred->regex.pattern, 0, &val); if (ret) { parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); return -EINVAL; @@ -653,45 +809,7 @@ static int filter_add_pred(struct filter_parse_state *ps, add_pred_fn: if (!dry_run) - return filter_add_pred_fn(ps, call, pred, fn); - return 0; -} - -static int filter_add_subsystem_pred(struct filter_parse_state *ps, - struct event_subsystem *system, - struct filter_pred *pred, - char *filter_string, - bool dry_run) -{ - struct ftrace_event_call *call; - int err = 0; - bool fail = true; - - list_for_each_entry(call, &ftrace_events, list) { - - if (!call->define_fields) - continue; - - if (strcmp(call->system, system->name)) - continue; - - if (call->filter->no_reset) - continue; - - err = filter_add_pred(ps, call, pred, dry_run); - if (err) - call->filter->no_reset = true; - else - fail = false; - - if (!dry_run) - replace_filter_string(call->filter, filter_string); - } - - if (fail) { - parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); - return err; - } + return filter_add_pred_fn(ps, call, filter, pred, fn); return 0; } @@ -892,8 +1010,9 @@ static void postfix_clear(struct filter_parse_state *ps) while (!list_empty(&ps->postfix)) { elt = list_first_entry(&ps->postfix, struct postfix_elt, list); - kfree(elt->operand); list_del(&elt->list); + kfree(elt->operand); + kfree(elt); } } @@ -1003,8 +1122,8 @@ static struct filter_pred *create_pred(int op, char *operand1, char *operand2) return NULL; } - strcpy(pred->str_val, operand2); - pred->str_len = strlen(operand2); + strcpy(pred->regex.pattern, operand2); + pred->regex.len = strlen(pred->regex.pattern); pred->op = op; @@ -1048,8 +1167,8 @@ static int check_preds(struct filter_parse_state *ps) return 0; } -static int replace_preds(struct event_subsystem *system, - struct ftrace_event_call *call, +static int replace_preds(struct ftrace_event_call *call, + struct event_filter *filter, struct filter_parse_state *ps, char *filter_string, bool dry_run) @@ -1096,11 +1215,7 @@ static int replace_preds(struct event_subsystem *system, add_pred: if (!pred) return -ENOMEM; - if (call) - err = filter_add_pred(ps, call, pred, false); - else - err = filter_add_subsystem_pred(ps, system, pred, - filter_string, dry_run); + err = filter_add_pred(ps, call, filter, pred, dry_run); filter_free_pred(pred); if (err) return err; @@ -1111,10 +1226,50 @@ add_pred: return 0; } -int apply_event_filter(struct ftrace_event_call *call, char *filter_string) +static int replace_system_preds(struct event_subsystem *system, + struct filter_parse_state *ps, + char *filter_string) { + struct event_filter *filter = system->filter; + struct ftrace_event_call *call; + bool fail = true; int err; + list_for_each_entry(call, &ftrace_events, list) { + + if (!call->define_fields) + continue; + + if (strcmp(call->system, system->name) != 0) + continue; + + /* try to see if the filter can be applied */ + err = replace_preds(call, filter, ps, filter_string, true); + if (err) + continue; + + /* really apply the filter */ + filter_disable_preds(call); + err = replace_preds(call, filter, ps, filter_string, false); + if (err) + filter_disable_preds(call); + else { + call->filter_active = 1; + replace_filter_string(filter, filter_string); + } + fail = false; + } + + if (fail) { + parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); + return -EINVAL; + } + return 0; +} + +int apply_event_filter(struct ftrace_event_call *call, char *filter_string) +{ + int err; struct filter_parse_state *ps; mutex_lock(&event_mutex); @@ -1126,8 +1281,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) if (!strcmp(strstrip(filter_string), "0")) { filter_disable_preds(call); remove_filter_string(call->filter); - mutex_unlock(&event_mutex); - return 0; + goto out_unlock; } err = -ENOMEM; @@ -1145,10 +1299,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) goto out; } - err = replace_preds(NULL, call, ps, filter_string, false); + err = replace_preds(call, call->filter, ps, filter_string, false); if (err) append_filter_err(ps, call->filter); - + else + call->filter_active = 1; out: filter_opstack_clear(ps); postfix_clear(ps); @@ -1163,7 +1318,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system, char *filter_string) { int err; - struct filter_parse_state *ps; mutex_lock(&event_mutex); @@ -1173,10 +1327,9 @@ int apply_subsystem_event_filter(struct event_subsystem *system, goto out_unlock; if (!strcmp(strstrip(filter_string), "0")) { - filter_free_subsystem_preds(system, FILTER_DISABLE_ALL); + filter_free_subsystem_preds(system); remove_filter_string(system->filter); - mutex_unlock(&event_mutex); - return 0; + goto out_unlock; } err = -ENOMEM; @@ -1193,31 +1346,87 @@ int apply_subsystem_event_filter(struct event_subsystem *system, goto out; } - filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET); - - /* try to see the filter can be applied to which events */ - err = replace_preds(system, NULL, ps, filter_string, true); - if (err) { + err = replace_system_preds(system, ps, filter_string); + if (err) append_filter_err(ps, system->filter); - goto out; + +out: + filter_opstack_clear(ps); + postfix_clear(ps); + kfree(ps); +out_unlock: + mutex_unlock(&event_mutex); + + return err; +} + +#ifdef CONFIG_EVENT_PROFILE + +void ftrace_profile_free_filter(struct perf_event *event) +{ + struct event_filter *filter = event->filter; + + event->filter = NULL; + __free_preds(filter); +} + +int ftrace_profile_set_filter(struct perf_event *event, int event_id, + char *filter_str) +{ + int err; + struct event_filter *filter; + struct filter_parse_state *ps; + struct ftrace_event_call *call = NULL; + + mutex_lock(&event_mutex); + + list_for_each_entry(call, &ftrace_events, list) { + if (call->id == event_id) + break; } - filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET); + err = -EINVAL; + if (!call) + goto out_unlock; + + err = -EEXIST; + if (event->filter) + goto out_unlock; - /* really apply the filter to the events */ - err = replace_preds(system, NULL, ps, filter_string, false); - if (err) { - append_filter_err(ps, system->filter); - filter_free_subsystem_preds(system, 2); + filter = __alloc_preds(); + if (IS_ERR(filter)) { + err = PTR_ERR(filter); + goto out_unlock; } -out: + err = -ENOMEM; + ps = kzalloc(sizeof(*ps), GFP_KERNEL); + if (!ps) + goto free_preds; + + parse_init(ps, filter_ops, filter_str); + err = filter_parse(ps); + if (err) + goto free_ps; + + err = replace_preds(call, filter, ps, filter_str, false); + if (!err) + event->filter = filter; + +free_ps: filter_opstack_clear(ps); postfix_clear(ps); kfree(ps); + +free_preds: + if (err) + __free_preds(filter); + out_unlock: mutex_unlock(&event_mutex); return err; } +#endif /* CONFIG_EVENT_PROFILE */ + diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index df1bf6e..31da218 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -15,146 +15,128 @@ #include "trace_output.h" +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ftrace -#undef TRACE_STRUCT -#define TRACE_STRUCT(args...) args +/* not needed for this file */ +#undef __field_struct +#define __field_struct(type, item) -extern void __bad_type_size(void); +#undef __field +#define __field(type, item) type item; -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign) \ - if (sizeof(type) != sizeof(field.item)) \ - __bad_type_size(); \ +#undef __field_desc +#define __field_desc(type, container, item) type item; + +#undef __array +#define __array(type, item, size) type item[size]; + +#undef __array_desc +#define __array_desc(type, container, item, size) type item[size]; + +#undef __dynamic_array +#define __dynamic_array(type, item) type item[]; + +#undef F_STRUCT +#define F_STRUCT(args...) args + +#undef F_printk +#define F_printk(fmt, args...) fmt, args + +#undef FTRACE_ENTRY +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ +struct ____ftrace_##name { \ + tstruct \ +}; \ +static void __used ____ftrace_check_##name(void) \ +{ \ + struct ____ftrace_##name *__entry = NULL; \ + \ + /* force cmpile-time check on F_printk() */ \ + printk(print); \ +} + +#undef FTRACE_ENTRY_DUP +#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \ + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) + +#include "trace_entries.h" + + +#undef __field +#define __field(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed_type(type)); \ if (!ret) \ return 0; +#undef __field_desc +#define __field_desc(type, container, item) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ + offsetof(typeof(field), container.item), \ + sizeof(field.container.item), \ + is_signed_type(type)); \ + if (!ret) \ + return 0; -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ - ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ +#undef __array +#define __array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed_type(type)); \ if (!ret) \ return 0; -#undef TRACE_FIELD_ZERO_CHAR -#define TRACE_FIELD_ZERO_CHAR(item) \ - ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \ - "offset:%u;\tsize:0;\n", \ - (unsigned int)offsetof(typeof(field), item)); \ +#undef __array_desc +#define __array_desc(type, container, item, len) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ + offsetof(typeof(field), container.item), \ + sizeof(field.container.item), \ + is_signed_type(type)); \ if (!ret) \ return 0; -#undef TRACE_FIELD_SIGN -#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ - TRACE_FIELD(type, item, assign) +#undef __dynamic_array +#define __dynamic_array(type, item) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%zu;\tsize:0;\tsigned:%u;\n", \ + offsetof(typeof(field), item), \ + is_signed_type(type)); \ + if (!ret) \ + return 0; -#undef TP_RAW_FMT -#define TP_RAW_FMT(args...) args +#undef F_printk +#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ -static int \ -ftrace_format_##call(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ -{ \ - struct args field; \ - int ret; \ - \ - tstruct; \ - \ - trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ - \ - return ret; \ -} +#undef __entry +#define __entry REC -#undef TRACE_EVENT_FORMAT_NOFILTER -#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ - tpfmt) \ +#undef FTRACE_ENTRY +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ static int \ -ftrace_format_##call(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ +ftrace_format_##name(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ { \ - struct args field; \ - int ret; \ + struct struct_name field __attribute__((unused)); \ + int ret = 0; \ \ tstruct; \ \ - trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ + trace_seq_printf(s, "\nprint fmt: " print); \ \ return ret; \ } -#include "trace_event_types.h" - -#undef TRACE_ZERO_CHAR -#define TRACE_ZERO_CHAR(arg) - -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign)\ - entry->item = assign; - -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign)\ - entry->item = assign; - -#undef TRACE_FIELD_SIGN -#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ - TRACE_FIELD(type, item, assign) - -#undef TP_CMD -#define TP_CMD(cmd...) cmd - -#undef TRACE_ENTRY -#define TRACE_ENTRY entry - -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ - cmd; - -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ -int ftrace_define_fields_##call(struct ftrace_event_call *event_call); \ -static int ftrace_raw_init_event_##call(void); \ - \ -struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ - .id = proto, \ - .system = __stringify(TRACE_SYSTEM), \ - .raw_init = ftrace_raw_init_event_##call, \ - .show_format = ftrace_format_##call, \ - .define_fields = ftrace_define_fields_##call, \ -}; \ -static int ftrace_raw_init_event_##call(void) \ -{ \ - INIT_LIST_HEAD(&event_##call.fields); \ - return 0; \ -} \ - -#undef TRACE_EVENT_FORMAT_NOFILTER -#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ - tpfmt) \ - \ -struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ - .id = proto, \ - .system = __stringify(TRACE_SYSTEM), \ - .show_format = ftrace_format_##call, \ -}; +#include "trace_entries.h" -#include "trace_event_types.h" -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign) \ +#undef __field +#define __field(type, item) \ ret = trace_define_field(event_call, #type, #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ @@ -162,32 +144,45 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ if (ret) \ return ret; -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type, item, len, cmd) \ +#undef __field_desc +#define __field_desc(type, container, item) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), \ + container.item), \ + sizeof(field.container.item), \ + is_signed_type(type), FILTER_OTHER); \ + if (ret) \ + return ret; + +#undef __array +#define __array(type, item, len) \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ sizeof(field.item), 0, FILTER_OTHER); \ if (ret) \ return ret; -#undef TRACE_FIELD_SIGN -#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ - ret = trace_define_field(event_call, #type, #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item), is_signed, \ +#undef __array_desc +#define __array_desc(type, container, item, len) \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + offsetof(typeof(field), \ + container.item), \ + sizeof(field.container.item), 0, \ FILTER_OTHER); \ if (ret) \ return ret; -#undef TRACE_FIELD_ZERO_CHAR -#define TRACE_FIELD_ZERO_CHAR(item) +#undef __dynamic_array +#define __dynamic_array(type, item) -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +#undef FTRACE_ENTRY +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ int \ -ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ +ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ { \ - struct args field; \ + struct struct_name field; \ int ret; \ \ ret = trace_define_common_fields(event_call); \ @@ -199,8 +194,42 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ return ret; \ } -#undef TRACE_EVENT_FORMAT_NOFILTER -#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ - tpfmt) +#include "trace_entries.h" + + +#undef __field +#define __field(type, item) + +#undef __field_desc +#define __field_desc(type, container, item) + +#undef __array +#define __array(type, item, len) + +#undef __array_desc +#define __array_desc(type, container, item, len) + +#undef __dynamic_array +#define __dynamic_array(type, item) + +#undef FTRACE_ENTRY +#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ +static int ftrace_raw_init_event_##call(void); \ + \ +struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .id = type, \ + .system = __stringify(TRACE_SYSTEM), \ + .raw_init = ftrace_raw_init_event_##call, \ + .show_format = ftrace_format_##call, \ + .define_fields = ftrace_define_fields_##call, \ +}; \ +static int ftrace_raw_init_event_##call(void) \ +{ \ + INIT_LIST_HEAD(&event_##call.fields); \ + return 0; \ +} \ -#include "trace_event_types.h" +#include "trace_entries.h" diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 5b01b94..b3f3776 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -290,7 +290,7 @@ ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, { long count = (long)data; - seq_printf(m, "%pf:", (void *)ip); + seq_printf(m, "%ps:", (void *)ip); if (ops == &traceon_probe_ops) seq_printf(m, "traceon"); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index b3749a2..45e6c01 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -124,7 +124,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, if (unlikely(current->ret_stack[index].fp != frame_pointer)) { ftrace_graph_stop(); WARN(1, "Bad frame pointer: expected %lx, received %lx\n" - " from func %pF return to %lx\n", + " from func %ps return to %lx\n", current->ret_stack[index].fp, frame_pointer, (void *)current->ret_stack[index].func, @@ -364,6 +364,15 @@ print_graph_proc(struct trace_seq *s, pid_t pid) } +static enum print_line_t +print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry) +{ + if (!trace_seq_putc(s, ' ')) + return 0; + + return trace_print_lat_fmt(s, entry); +} + /* If the pid changed since the last trace, output this event */ static enum print_line_t verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) @@ -521,6 +530,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; } + /* Proc */ if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { ret = print_graph_proc(s, pid); @@ -659,7 +669,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, return TRACE_TYPE_PARTIAL_LINE; } - ret = trace_seq_printf(s, "%pf();\n", (void *)call->func); + ret = trace_seq_printf(s, "%ps();\n", (void *)call->func); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -702,7 +712,7 @@ print_graph_entry_nested(struct trace_iterator *iter, return TRACE_TYPE_PARTIAL_LINE; } - ret = trace_seq_printf(s, "%pf() {\n", (void *)call->func); + ret = trace_seq_printf(s, "%ps() {\n", (void *)call->func); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -758,6 +768,13 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, return TRACE_TYPE_PARTIAL_LINE; } + /* Latency format */ + if (trace_flags & TRACE_ITER_LATENCY_FMT) { + ret = print_graph_lat_fmt(s, ent); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + } + return 0; } @@ -952,28 +969,59 @@ print_graph_function(struct trace_iterator *iter) return TRACE_TYPE_HANDLED; } +static void print_lat_header(struct seq_file *s) +{ + static const char spaces[] = " " /* 16 spaces */ + " " /* 4 spaces */ + " "; /* 17 spaces */ + int size = 0; + + if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) + size += 16; + if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) + size += 4; + if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) + size += 17; + + seq_printf(s, "#%.*s _-----=> irqs-off \n", size, spaces); + seq_printf(s, "#%.*s / _----=> need-resched \n", size, spaces); + seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces); + seq_printf(s, "#%.*s|| / _--=> preempt-depth \n", size, spaces); + seq_printf(s, "#%.*s||| / _-=> lock-depth \n", size, spaces); + seq_printf(s, "#%.*s|||| / \n", size, spaces); +} + static void print_graph_headers(struct seq_file *s) { + int lat = trace_flags & TRACE_ITER_LATENCY_FMT; + + if (lat) + print_lat_header(s); + /* 1st line */ - seq_printf(s, "# "); + seq_printf(s, "#"); if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) seq_printf(s, " TIME "); if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) - seq_printf(s, "CPU"); + seq_printf(s, " CPU"); if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) - seq_printf(s, " TASK/PID "); + seq_printf(s, " TASK/PID "); + if (lat) + seq_printf(s, "|||||"); if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) seq_printf(s, " DURATION "); seq_printf(s, " FUNCTION CALLS\n"); /* 2nd line */ - seq_printf(s, "# "); + seq_printf(s, "#"); if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) seq_printf(s, " | "); if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) - seq_printf(s, "| "); + seq_printf(s, " | "); if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) - seq_printf(s, " | | "); + seq_printf(s, " | | "); + if (lat) + seq_printf(s, "|||||"); if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) seq_printf(s, " | | "); seq_printf(s, " | | | |\n"); diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index ca7d7c4..69543a9 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -155,7 +155,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) seq_print_ip_sym(seq, it->from, symflags) && trace_seq_printf(seq, "\n")) return TRACE_TYPE_HANDLED; - return TRACE_TYPE_PARTIAL_LINE;; + return TRACE_TYPE_PARTIAL_LINE; } return TRACE_TYPE_UNHANDLED; } @@ -165,6 +165,7 @@ void trace_hw_branch(u64 from, u64 to) struct ftrace_event_call *call = &event_hw_branch; struct trace_array *tr = hw_branch_trace; struct ring_buffer_event *event; + struct ring_buffer *buf; struct hw_branch_entry *entry; unsigned long irq1; int cpu; @@ -180,7 +181,8 @@ void trace_hw_branch(u64 from, u64 to) if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) goto out; - event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES, + buf = tr->buffer; + event = trace_buffer_lock_reserve(buf, TRACE_HW_BRANCHES, sizeof(*entry), 0, 0); if (!event) goto out; @@ -189,8 +191,8 @@ void trace_hw_branch(u64 from, u64 to) entry->ent.type = TRACE_HW_BRANCHES; entry->from = from; entry->to = to; - if (!filter_check_discard(call, entry, tr->buffer, event)) - trace_buffer_unlock_commit(tr, event, 0, 0); + if (!filter_check_discard(call, entry, buf, event)) + trace_buffer_unlock_commit(buf, event, 0, 0); out: atomic_dec(&tr->data[cpu]->disabled); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 5555b75..3aa7eaa 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -129,15 +129,10 @@ check_critical_timing(struct trace_array *tr, unsigned long parent_ip, int cpu) { - unsigned long latency, t0, t1; cycle_t T0, T1, delta; unsigned long flags; int pc; - /* - * usecs conversion is slow so we try to delay the conversion - * as long as possible: - */ T0 = data->preempt_timestamp; T1 = ftrace_now(cpu); delta = T1-T0; @@ -157,18 +152,15 @@ check_critical_timing(struct trace_array *tr, trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); - latency = nsecs_to_usecs(delta); - if (data->critical_sequence != max_sequence) goto out_unlock; - tracing_max_latency = delta; - t0 = nsecs_to_usecs(T0); - t1 = nsecs_to_usecs(T1); - data->critical_end = parent_ip; - update_max_tr_single(tr, current, cpu); + if (likely(!is_tracing_stopped())) { + tracing_max_latency = delta; + update_max_tr_single(tr, current, cpu); + } max_sequence++; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index c4c9bbd..0acd834 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -307,6 +307,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, struct mmiotrace_rw *rw) { + struct ftrace_event_call *call = &event_mmiotrace_rw; struct ring_buffer *buffer = tr->buffer; struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; @@ -320,7 +321,9 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, } entry = ring_buffer_event_data(event); entry->rw = *rw; - trace_buffer_unlock_commit(buffer, event, 0, pc); + + if (!filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit(buffer, event, 0, pc); } void mmio_trace_rw(struct mmiotrace_rw *rw) @@ -334,6 +337,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data, struct mmiotrace_map *map) { + struct ftrace_event_call *call = &event_mmiotrace_map; struct ring_buffer *buffer = tr->buffer; struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; @@ -347,7 +351,9 @@ static void __trace_mmiotrace_map(struct trace_array *tr, } entry = ring_buffer_event_data(event); entry->map = *map; - trace_buffer_unlock_commit(buffer, event, 0, pc); + + if (!filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit(buffer, event, 0, pc); } void mmio_trace_mapping(struct mmiotrace_map *map) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index e0c2545..ed17565 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -407,7 +407,7 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, * since individual threads might have already quit! */ rcu_read_lock(); - task = find_task_by_vpid(entry->ent.tgid); + task = find_task_by_vpid(entry->tgid); if (task) mm = get_task_mm(task); rcu_read_unlock(); @@ -460,18 +460,23 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) return ret; } -static int -lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) +/** + * trace_print_lat_fmt - print the irq, preempt and lockdep fields + * @s: trace seq struct to write to + * @entry: The trace entry field from the ring buffer + * + * Prints the generic fields of irqs off, in hard or softirq, preempt + * count and lock depth. + */ +int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) { int hardirq, softirq; - char comm[TASK_COMM_LEN]; + int ret; - trace_find_cmdline(entry->pid, comm); hardirq = entry->flags & TRACE_FLAG_HARDIRQ; softirq = entry->flags & TRACE_FLAG_SOFTIRQ; - if (!trace_seq_printf(s, "%8.8s-%-5d %3d%c%c%c", - comm, entry->pid, cpu, + if (!trace_seq_printf(s, "%c%c%c", (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.', @@ -482,8 +487,31 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) return 0; if (entry->preempt_count) - return trace_seq_printf(s, "%x", entry->preempt_count); - return trace_seq_puts(s, "."); + ret = trace_seq_printf(s, "%x", entry->preempt_count); + else + ret = trace_seq_putc(s, '.'); + + if (!ret) + return 0; + + if (entry->lock_depth < 0) + return trace_seq_putc(s, '.'); + + return trace_seq_printf(s, "%d", entry->lock_depth); +} + +static int +lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) +{ + char comm[TASK_COMM_LEN]; + + trace_find_cmdline(entry->pid, comm); + + if (!trace_seq_printf(s, "%8.8s-%-5d %3d", + comm, entry->pid, cpu)) + return 0; + + return trace_print_lat_fmt(s, entry); } static unsigned long preempt_mark_thresh = 100; @@ -857,7 +885,7 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S) trace_assign_type(field, iter->ent); if (!S) - task_state_char(field->prev_state); + S = task_state_char(field->prev_state); T = task_state_char(field->next_state); if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", field->prev_pid, @@ -892,7 +920,7 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S) trace_assign_type(field, iter->ent); if (!S) - task_state_char(field->prev_state); + S = task_state_char(field->prev_state); T = task_state_char(field->next_state); SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid); diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index d38bec4..9d91c72 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -26,6 +26,8 @@ extern struct trace_event *ftrace_find_event(int type); extern enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags); +extern int +trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry); /* used by module unregistering */ extern int __unregister_ftrace_event(struct trace_event *event); diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c deleted file mode 100644 index fe1a00f..0000000 --- a/kernel/trace/trace_power.c +++ /dev/null @@ -1,218 +0,0 @@ -/* - * ring buffer based C-state tracer - * - * Arjan van de Ven <arjan@linux.intel.com> - * Copyright (C) 2008 Intel Corporation - * - * Much is borrowed from trace_boot.c which is - * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> - * - */ - -#include <linux/init.h> -#include <linux/debugfs.h> -#include <trace/power.h> -#include <linux/kallsyms.h> -#include <linux/module.h> - -#include "trace.h" -#include "trace_output.h" - -static struct trace_array *power_trace; -static int __read_mostly trace_power_enabled; - -static void probe_power_start(struct power_trace *it, unsigned int type, - unsigned int level) -{ - if (!trace_power_enabled) - return; - - memset(it, 0, sizeof(struct power_trace)); - it->state = level; - it->type = type; - it->stamp = ktime_get(); -} - - -static void probe_power_end(struct power_trace *it) -{ - struct ftrace_event_call *call = &event_power; - struct ring_buffer_event *event; - struct ring_buffer *buffer; - struct trace_power *entry; - struct trace_array_cpu *data; - struct trace_array *tr = power_trace; - - if (!trace_power_enabled) - return; - - buffer = tr->buffer; - - preempt_disable(); - it->end = ktime_get(); - data = tr->data[smp_processor_id()]; - - event = trace_buffer_lock_reserve(buffer, TRACE_POWER, - sizeof(*entry), 0, 0); - if (!event) - goto out; - entry = ring_buffer_event_data(event); - entry->state_data = *it; - if (!filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(buffer, event, 0, 0); - out: - preempt_enable(); -} - -static void probe_power_mark(struct power_trace *it, unsigned int type, - unsigned int level) -{ - struct ftrace_event_call *call = &event_power; - struct ring_buffer_event *event; - struct ring_buffer *buffer; - struct trace_power *entry; - struct trace_array_cpu *data; - struct trace_array *tr = power_trace; - - if (!trace_power_enabled) - return; - - buffer = tr->buffer; - - memset(it, 0, sizeof(struct power_trace)); - it->state = level; - it->type = type; - it->stamp = ktime_get(); - preempt_disable(); - it->end = it->stamp; - data = tr->data[smp_processor_id()]; - - event = trace_buffer_lock_reserve(buffer, TRACE_POWER, - sizeof(*entry), 0, 0); - if (!event) - goto out; - entry = ring_buffer_event_data(event); - entry->state_data = *it; - if (!filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(buffer, event, 0, 0); - out: - preempt_enable(); -} - -static int tracing_power_register(void) -{ - int ret; - - ret = register_trace_power_start(probe_power_start); - if (ret) { - pr_info("power trace: Couldn't activate tracepoint" - " probe to trace_power_start\n"); - return ret; - } - ret = register_trace_power_end(probe_power_end); - if (ret) { - pr_info("power trace: Couldn't activate tracepoint" - " probe to trace_power_end\n"); - goto fail_start; - } - ret = register_trace_power_mark(probe_power_mark); - if (ret) { - pr_info("power trace: Couldn't activate tracepoint" - " probe to trace_power_mark\n"); - goto fail_end; - } - return ret; -fail_end: - unregister_trace_power_end(probe_power_end); -fail_start: - unregister_trace_power_start(probe_power_start); - return ret; -} - -static void start_power_trace(struct trace_array *tr) -{ - trace_power_enabled = 1; -} - -static void stop_power_trace(struct trace_array *tr) -{ - trace_power_enabled = 0; -} - -static void power_trace_reset(struct trace_array *tr) -{ - trace_power_enabled = 0; - unregister_trace_power_start(probe_power_start); - unregister_trace_power_end(probe_power_end); - unregister_trace_power_mark(probe_power_mark); -} - - -static int power_trace_init(struct trace_array *tr) -{ - power_trace = tr; - - trace_power_enabled = 1; - tracing_power_register(); - - tracing_reset_online_cpus(tr); - return 0; -} - -static enum print_line_t power_print_line(struct trace_iterator *iter) -{ - int ret = 0; - struct trace_entry *entry = iter->ent; - struct trace_power *field ; - struct power_trace *it; - struct trace_seq *s = &iter->seq; - struct timespec stamp; - struct timespec duration; - - trace_assign_type(field, entry); - it = &field->state_data; - stamp = ktime_to_timespec(it->stamp); - duration = ktime_to_timespec(ktime_sub(it->end, it->stamp)); - - if (entry->type == TRACE_POWER) { - if (it->type == POWER_CSTATE) - ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n", - stamp.tv_sec, - stamp.tv_nsec, - it->state, iter->cpu, - duration.tv_sec, - duration.tv_nsec); - if (it->type == POWER_PSTATE) - ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n", - stamp.tv_sec, - stamp.tv_nsec, - it->state, iter->cpu); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - return TRACE_TYPE_HANDLED; - } - return TRACE_TYPE_UNHANDLED; -} - -static void power_print_header(struct seq_file *s) -{ - seq_puts(s, "# TIMESTAMP STATE EVENT\n"); - seq_puts(s, "# | | |\n"); -} - -static struct tracer power_tracer __read_mostly = -{ - .name = "power", - .init = power_trace_init, - .start = start_power_trace, - .stop = stop_power_trace, - .reset = power_trace_reset, - .print_line = power_print_line, - .print_header = power_print_header, -}; - -static int init_power_trace(void) -{ - return register_tracer(&power_tracer); -} -device_initcall(init_power_trace); diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 687699d..2547d88 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -11,7 +11,6 @@ #include <linux/ftrace.h> #include <linux/string.h> #include <linux/module.h> -#include <linux/marker.h> #include <linux/mutex.h> #include <linux/ctype.h> #include <linux/list.h> diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index ad69f10..26185d7 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -24,6 +24,7 @@ static int __read_mostly tracer_enabled; static struct task_struct *wakeup_task; static int wakeup_cpu; +static int wakeup_current_cpu; static unsigned wakeup_prio = -1; static int wakeup_rt; @@ -56,33 +57,23 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) resched = ftrace_preempt_disable(); cpu = raw_smp_processor_id(); + if (cpu != wakeup_current_cpu) + goto out_enable; + data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); if (unlikely(disabled != 1)) goto out; local_irq_save(flags); - __raw_spin_lock(&wakeup_lock); - - if (unlikely(!wakeup_task)) - goto unlock; - - /* - * The task can't disappear because it needs to - * wake up first, and we have the wakeup_lock. - */ - if (task_cpu(wakeup_task) != cpu) - goto unlock; trace_function(tr, ip, parent_ip, flags, pc); - unlock: - __raw_spin_unlock(&wakeup_lock); local_irq_restore(flags); out: atomic_dec(&data->disabled); - + out_enable: ftrace_preempt_enable(resched); } @@ -107,11 +98,18 @@ static int report_latency(cycle_t delta) return 1; } +static void probe_wakeup_migrate_task(struct task_struct *task, int cpu) +{ + if (task != wakeup_task) + return; + + wakeup_current_cpu = cpu; +} + static void notrace probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { - unsigned long latency = 0, t0 = 0, t1 = 0; struct trace_array_cpu *data; cycle_t T0, T1, delta; unsigned long flags; @@ -157,10 +155,6 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); - /* - * usecs conversion is slow so we try to delay the conversion - * as long as possible: - */ T0 = data->preempt_timestamp; T1 = ftrace_now(cpu); delta = T1-T0; @@ -168,13 +162,10 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, if (!report_latency(delta)) goto out_unlock; - latency = nsecs_to_usecs(delta); - - tracing_max_latency = delta; - t0 = nsecs_to_usecs(T0); - t1 = nsecs_to_usecs(T1); - - update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu); + if (likely(!is_tracing_stopped())) { + tracing_max_latency = delta; + update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu); + } out_unlock: __wakeup_reset(wakeup_trace); @@ -244,6 +235,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success) __wakeup_reset(wakeup_trace); wakeup_cpu = task_cpu(p); + wakeup_current_cpu = wakeup_cpu; wakeup_prio = p->prio; wakeup_task = p; @@ -293,6 +285,13 @@ static void start_wakeup_tracer(struct trace_array *tr) goto fail_deprobe_wake_new; } + ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task); + if (ret) { + pr_info("wakeup trace: Couldn't activate tracepoint" + " probe to kernel_sched_migrate_task\n"); + return; + } + wakeup_reset(tr); /* @@ -325,6 +324,7 @@ static void stop_wakeup_tracer(struct trace_array *tr) unregister_trace_sched_switch(probe_wakeup_sched_switch); unregister_trace_sched_wakeup_new(probe_wakeup); unregister_trace_sched_wakeup(probe_wakeup); + unregister_trace_sched_migrate_task(probe_wakeup_migrate_task); } static int __wakeup_tracer_init(struct trace_array *tr) diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 0f6facb..8504ac7 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -296,14 +296,14 @@ static const struct file_operations stack_trace_fops = { int stack_trace_sysctl(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; mutex_lock(&stack_sysctl_mutex); - ret = proc_dointvec(table, write, file, buffer, lenp, ppos); + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret || !write || (last_stack_tracer_enabled == !!stack_tracer_enabled)) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8712ce3..d00d1a8 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -2,7 +2,7 @@ #include <trace/events/syscalls.h> #include <linux/kernel.h> #include <linux/ftrace.h> -#include <linux/perf_counter.h> +#include <linux/perf_event.h> #include <asm/syscall.h> #include "trace_output.h" @@ -14,6 +14,69 @@ static int sys_refcount_exit; static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); +extern unsigned long __start_syscalls_metadata[]; +extern unsigned long __stop_syscalls_metadata[]; + +static struct syscall_metadata **syscalls_metadata; + +static struct syscall_metadata *find_syscall_meta(unsigned long syscall) +{ + struct syscall_metadata *start; + struct syscall_metadata *stop; + char str[KSYM_SYMBOL_LEN]; + + + start = (struct syscall_metadata *)__start_syscalls_metadata; + stop = (struct syscall_metadata *)__stop_syscalls_metadata; + kallsyms_lookup(syscall, NULL, NULL, NULL, str); + + for ( ; start < stop; start++) { + /* + * Only compare after the "sys" prefix. Archs that use + * syscall wrappers may have syscalls symbols aliases prefixed + * with "SyS" instead of "sys", leading to an unwanted + * mismatch. + */ + if (start->name && !strcmp(start->name + 3, str + 3)) + return start; + } + return NULL; +} + +static struct syscall_metadata *syscall_nr_to_meta(int nr) +{ + if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) + return NULL; + + return syscalls_metadata[nr]; +} + +int syscall_name_to_nr(char *name) +{ + int i; + + if (!syscalls_metadata) + return -1; + + for (i = 0; i < NR_syscalls; i++) { + if (syscalls_metadata[i]) { + if (!strcmp(syscalls_metadata[i]->name, name)) + return i; + } + } + return -1; +} + +void set_syscall_enter_id(int num, int id) +{ + syscalls_metadata[num]->enter_id = id; +} + +void set_syscall_exit_id(int num, int id) +{ + syscalls_metadata[num]->exit_id = id; +} + enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags) { @@ -103,7 +166,8 @@ extern char *__bad_type_size(void); #define SYSCALL_FIELD(type, name) \ sizeof(type) != sizeof(trace.name) ? \ __bad_type_size() : \ - #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) + #type, #name, offsetof(typeof(trace), name), \ + sizeof(trace.name), is_signed_type(type) int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) { @@ -120,7 +184,8 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) if (!entry) return 0; - ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", + ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n", SYSCALL_FIELD(int, nr)); if (!ret) return 0; @@ -130,8 +195,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) entry->args[i]); if (!ret) return 0; - ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset, - sizeof(unsigned long)); + ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;" + "\tsigned:%u;\n", offset, + sizeof(unsigned long), + is_signed_type(unsigned long)); if (!ret) return 0; offset += sizeof(unsigned long); @@ -163,10 +230,12 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) struct syscall_trace_exit trace; ret = trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n", SYSCALL_FIELD(int, nr), - SYSCALL_FIELD(unsigned long, ret)); + SYSCALL_FIELD(long, ret)); if (!ret) return 0; @@ -212,7 +281,7 @@ int syscall_exit_define_fields(struct ftrace_event_call *call) if (ret) return ret; - ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0, + ret = trace_define_field(call, SYSCALL_FIELD(long, ret), FILTER_OTHER); return ret; @@ -375,6 +444,29 @@ struct trace_event event_syscall_exit = { .trace = print_syscall_exit, }; +int __init init_ftrace_syscalls(void) +{ + struct syscall_metadata *meta; + unsigned long addr; + int i; + + syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * + NR_syscalls, GFP_KERNEL); + if (!syscalls_metadata) { + WARN_ON(1); + return -ENOMEM; + } + + for (i = 0; i < NR_syscalls; i++) { + addr = arch_syscall_addr(i); + meta = find_syscall_meta(addr); + syscalls_metadata[i] = meta; + } + + return 0; +} +core_initcall(init_ftrace_syscalls); + #ifdef CONFIG_EVENT_PROFILE static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); @@ -384,10 +476,13 @@ static int sys_prof_refcount_exit; static void prof_syscall_enter(struct pt_regs *regs, long id) { - struct syscall_trace_enter *rec; struct syscall_metadata *sys_data; + struct syscall_trace_enter *rec; + unsigned long flags; + char *raw_data; int syscall_nr; int size; + int cpu; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) @@ -402,20 +497,38 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) size = ALIGN(size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - do { - char raw_data[size]; + if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, + "profile buffer not large enough")) + return; + + /* Protect the per cpu buffer, begin the rcu read side */ + local_irq_save(flags); - /* zero the dead bytes from align to not leak stack to user */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + cpu = smp_processor_id(); - rec = (struct syscall_trace_enter *) raw_data; - tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->enter_id; - rec->nr = syscall_nr; - syscall_get_arguments(current, regs, 0, sys_data->nb_args, - (unsigned long *)&rec->args); - perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); - } while(0); + if (in_nmi()) + raw_data = rcu_dereference(trace_profile_buf_nmi); + else + raw_data = rcu_dereference(trace_profile_buf); + + if (!raw_data) + goto end; + + raw_data = per_cpu_ptr(raw_data, cpu); + + /* zero the dead bytes from align to not leak stack to user */ + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + + rec = (struct syscall_trace_enter *) raw_data; + tracing_generic_entry_update(&rec->ent, 0, 0); + rec->ent.type = sys_data->enter_id; + rec->nr = syscall_nr; + syscall_get_arguments(current, regs, 0, sys_data->nb_args, + (unsigned long *)&rec->args); + perf_tp_event(sys_data->enter_id, 0, 1, rec, size); + +end: + local_irq_restore(flags); } int reg_prof_syscall_enter(char *name) @@ -460,8 +573,12 @@ void unreg_prof_syscall_enter(char *name) static void prof_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; - struct syscall_trace_exit rec; + struct syscall_trace_exit *rec; + unsigned long flags; int syscall_nr; + char *raw_data; + int size; + int cpu; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) @@ -471,12 +588,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) if (!sys_data) return; - tracing_generic_entry_update(&rec.ent, 0, 0); - rec.ent.type = sys_data->exit_id; - rec.nr = syscall_nr; - rec.ret = syscall_get_return_value(current, regs); + /* We can probably do that at build time */ + size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); + size -= sizeof(u32); + + /* + * Impossible, but be paranoid with the future + * How to put this check outside runtime? + */ + if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, + "exit event has grown above profile buffer size")) + return; + + /* Protect the per cpu buffer, begin the rcu read side */ + local_irq_save(flags); + cpu = smp_processor_id(); + + if (in_nmi()) + raw_data = rcu_dereference(trace_profile_buf_nmi); + else + raw_data = rcu_dereference(trace_profile_buf); + + if (!raw_data) + goto end; + + raw_data = per_cpu_ptr(raw_data, cpu); + + /* zero the dead bytes from align to not leak stack to user */ + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + + rec = (struct syscall_trace_exit *)raw_data; - perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); + tracing_generic_entry_update(&rec->ent, 0, 0); + rec->ent.type = sys_data->exit_id; + rec->nr = syscall_nr; + rec->ret = syscall_get_return_value(current, regs); + + perf_tp_event(sys_data->exit_id, 0, 1, rec, size); + +end: + local_irq_restore(flags); } int reg_prof_syscall_exit(char *name) |