diff options
Diffstat (limited to 'kernel')
30 files changed, 453 insertions, 404 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 0dfeca4..86e3285 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -174,10 +174,8 @@ signing_key.priv signing_key.x509: x509.genkey @echo "###" @echo "### If this takes a long time, you might wish to run rngd in the" @echo "### background to keep the supply of entropy topped up. It" - @echo "### needs to be run as root, and should use a hardware random" - @echo "### number generator if one is available, eg:" - @echo "###" - @echo "### rngd -r /dev/hwrandom" + @echo "### needs to be run as root, and uses a hardware random" + @echo "### number generator if one is available." @echo "###" openssl req -new -nodes -utf8 $(sign_key_with_hash) -days 36500 -batch \ -x509 -config x509.genkey \ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 13774b3..f24f724 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1962,9 +1962,8 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, * trading it for newcg is protected by cgroup_mutex, we're safe to drop * it here; it will be freed under RCU. */ - put_css_set(oldcg); - set_bit(CGRP_RELEASABLE, &oldcgrp->flags); + put_css_set(oldcg); } /** @@ -4815,31 +4814,20 @@ static const struct file_operations proc_cgroupstats_operations = { * * A pointer to the shared css_set was automatically copied in * fork.c by dup_task_struct(). However, we ignore that copy, since - * it was not made under the protection of RCU, cgroup_mutex or - * threadgroup_change_begin(), so it might no longer be a valid - * cgroup pointer. cgroup_attach_task() might have already changed - * current->cgroups, allowing the previously referenced cgroup - * group to be removed and freed. - * - * Outside the pointer validity we also need to process the css_set - * inheritance between threadgoup_change_begin() and - * threadgoup_change_end(), this way there is no leak in any process - * wide migration performed by cgroup_attach_proc() that could otherwise - * miss a thread because it is too early or too late in the fork stage. + * it was not made under the protection of RCU or cgroup_mutex, so + * might no longer be a valid cgroup pointer. cgroup_attach_task() might + * have already changed current->cgroups, allowing the previously + * referenced cgroup group to be removed and freed. * * At the point that cgroup_fork() is called, 'current' is the parent * task, and the passed argument 'child' points to the child task. */ void cgroup_fork(struct task_struct *child) { - /* - * We don't need to task_lock() current because current->cgroups - * can't be changed concurrently here. The parent obviously hasn't - * exited and called cgroup_exit(), and we are synchronized against - * cgroup migration through threadgroup_change_begin(). - */ + task_lock(current); child->cgroups = current->cgroups; get_css_set(child->cgroups); + task_unlock(current); INIT_LIST_HEAD(&child->cg_list); } @@ -4895,19 +4883,10 @@ void cgroup_post_fork(struct task_struct *child) */ if (use_task_css_set_links) { write_lock(&css_set_lock); - if (list_empty(&child->cg_list)) { - /* - * It's safe to use child->cgroups without task_lock() - * here because we are protected through - * threadgroup_change_begin() against concurrent - * css_set change in cgroup_task_migrate(). Also - * the task can't exit at that point until - * wake_up_new_task() is called, so we are protected - * against cgroup_exit() setting child->cgroup to - * init_css_set. - */ + task_lock(child); + if (list_empty(&child->cg_list)) list_add(&child->cg_list, &child->cgroups->tasks); - } + task_unlock(child); write_unlock(&css_set_lock); } } diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 9a7b487..fe8a916 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -111,14 +111,16 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) * Count the number of breakpoints of the same type and same task. * The given event must be not on the list. */ -static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type) +static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type) { struct task_struct *tsk = bp->hw.bp_target; struct perf_event *iter; int count = 0; list_for_each_entry(iter, &bp_task_head, hw.bp_list) { - if (iter->hw.bp_target == tsk && find_slot_idx(iter) == type) + if (iter->hw.bp_target == tsk && + find_slot_idx(iter) == type && + cpu == iter->cpu) count += hw_breakpoint_weight(iter); } @@ -141,7 +143,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, if (!tsk) slots->pinned += max_task_bp_pinned(cpu, type); else - slots->pinned += task_bp_pinned(bp, type); + slots->pinned += task_bp_pinned(cpu, bp, type); slots->flexible = per_cpu(nr_bp_flexible[type], cpu); return; @@ -154,7 +156,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, if (!tsk) nr += max_task_bp_pinned(cpu, type); else - nr += task_bp_pinned(bp, type); + nr += task_bp_pinned(cpu, bp, type); if (nr > slots->pinned) slots->pinned = nr; @@ -188,7 +190,7 @@ static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable, int old_idx = 0; int idx = 0; - old_count = task_bp_pinned(bp, type); + old_count = task_bp_pinned(cpu, bp, type); old_idx = old_count - 1; idx = old_idx + weight; diff --git a/kernel/futex.c b/kernel/futex.c index 3717e7b..19eb089 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -716,7 +716,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, struct futex_pi_state **ps, struct task_struct *task, int set_waiters) { - int lock_taken, ret, ownerdied = 0; + int lock_taken, ret, force_take = 0; u32 uval, newval, curval, vpid = task_pid_vnr(task); retry: @@ -755,17 +755,15 @@ retry: newval = curval | FUTEX_WAITERS; /* - * There are two cases, where a futex might have no owner (the - * owner TID is 0): OWNER_DIED. We take over the futex in this - * case. We also do an unconditional take over, when the owner - * of the futex died. - * - * This is safe as we are protected by the hash bucket lock ! + * Should we force take the futex? See below. */ - if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { - /* Keep the OWNER_DIED bit */ + if (unlikely(force_take)) { + /* + * Keep the OWNER_DIED and the WAITERS bit and set the + * new TID value. + */ newval = (curval & ~FUTEX_TID_MASK) | vpid; - ownerdied = 0; + force_take = 0; lock_taken = 1; } @@ -775,7 +773,7 @@ retry: goto retry; /* - * We took the lock due to owner died take over. + * We took the lock due to forced take over. */ if (unlikely(lock_taken)) return 1; @@ -790,20 +788,25 @@ retry: switch (ret) { case -ESRCH: /* - * No owner found for this futex. Check if the - * OWNER_DIED bit is set to figure out whether - * this is a robust futex or not. + * We failed to find an owner for this + * futex. So we have no pi_state to block + * on. This can happen in two cases: + * + * 1) The owner died + * 2) A stale FUTEX_WAITERS bit + * + * Re-read the futex value. */ if (get_futex_value_locked(&curval, uaddr)) return -EFAULT; /* - * We simply start over in case of a robust - * futex. The code above will take the futex - * and return happy. + * If the owner died or we have a stale + * WAITERS bit the owner TID in the user space + * futex is 0. */ - if (curval & FUTEX_OWNER_DIED) { - ownerdied = 1; + if (!(curval & FUTEX_TID_MASK)) { + force_take = 1; goto retry; } default: @@ -840,6 +843,9 @@ static void wake_futex(struct futex_q *q) { struct task_struct *p = q->task; + if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) + return; + /* * We set q->lock_ptr = NULL _before_ we wake up the task. If * a non-futex wake up happens on another CPU then the task @@ -1075,6 +1081,10 @@ retry_private: plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key1)) { + if (this->pi_state || this->rt_waiter) { + ret = -EINVAL; + goto out_unlock; + } wake_futex(this); if (++ret >= nr_wake) break; @@ -1087,6 +1097,10 @@ retry_private: op_ret = 0; plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key2)) { + if (this->pi_state || this->rt_waiter) { + ret = -EINVAL; + goto out_unlock; + } wake_futex(this); if (++op_ret >= nr_wake2) break; @@ -1095,6 +1109,7 @@ retry_private: ret += op_ret; } +out_unlock: double_unlock_hb(hb1, hb2); out_put_keys: put_futex_key(&key2); @@ -1384,9 +1399,13 @@ retry_private: /* * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always * be paired with each other and no other futex ops. + * + * We should never be requeueing a futex_q with a pi_state, + * which is awaiting a futex_unlock_pi(). */ if ((requeue_pi && !this->rt_waiter) || - (!requeue_pi && this->rt_waiter)) { + (!requeue_pi && this->rt_waiter) || + this->pi_state) { ret = -EINVAL; break; } diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c index 4646eb2..767e559 100644 --- a/kernel/modsign_pubkey.c +++ b/kernel/modsign_pubkey.c @@ -21,10 +21,10 @@ struct key *modsign_keyring; extern __initdata const u8 modsign_certificate_list[]; extern __initdata const u8 modsign_certificate_list_end[]; asm(".section .init.data,\"aw\"\n" - "modsign_certificate_list:\n" + SYMBOL_PREFIX "modsign_certificate_list:\n" ".incbin \"signing_key.x509\"\n" ".incbin \"extra_certificates\"\n" - "modsign_certificate_list_end:" + SYMBOL_PREFIX "modsign_certificate_list_end:" ); /* diff --git a/kernel/module.c b/kernel/module.c index 6085f5e..6e48c3a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2293,12 +2293,17 @@ static void layout_symtab(struct module *mod, struct load_info *info) src = (void *)info->hdr + symsect->sh_offset; nsrc = symsect->sh_size / sizeof(*src); + /* strtab always starts with a nul, so offset 0 is the empty string. */ + strtab_size = 1; + /* Compute total space required for the core symbols' strtab. */ - for (ndst = i = strtab_size = 1; i < nsrc; ++i, ++src) - if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) { - strtab_size += strlen(&info->strtab[src->st_name]) + 1; + for (ndst = i = 0; i < nsrc; i++) { + if (i == 0 || + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { + strtab_size += strlen(&info->strtab[src[i].st_name])+1; ndst++; } + } /* Append room for core symbols at end of core part. */ info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); @@ -2332,15 +2337,15 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) mod->core_symtab = dst = mod->module_core + info->symoffs; mod->core_strtab = s = mod->module_core + info->stroffs; src = mod->symtab; - *dst = *src; *s++ = 0; - for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { - if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) - continue; - - dst[ndst] = *src; - dst[ndst++].st_name = s - mod->core_strtab; - s += strlcpy(s, &mod->strtab[src->st_name], KSYM_NAME_LEN) + 1; + for (ndst = i = 0; i < mod->num_symtab; i++) { + if (i == 0 || + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { + dst[ndst] = src[i]; + dst[ndst++].st_name = s - mod->core_strtab; + s += strlcpy(s, &mod->strtab[src[i].st_name], + KSYM_NAME_LEN) + 1; + } } mod->core_num_syms = ndst; } diff --git a/kernel/module_signing.c b/kernel/module_signing.c index ea1b1df..f2970bd 100644 --- a/kernel/module_signing.c +++ b/kernel/module_signing.c @@ -27,13 +27,13 @@ * - Information block */ struct module_signature { - enum pkey_algo algo : 8; /* Public-key crypto algorithm */ - enum pkey_hash_algo hash : 8; /* Digest algorithm */ - enum pkey_id_type id_type : 8; /* Key identifier type */ - u8 signer_len; /* Length of signer's name */ - u8 key_id_len; /* Length of key identifier */ - u8 __pad[3]; - __be32 sig_len; /* Length of signature data */ + u8 algo; /* Public-key crypto algorithm [enum pkey_algo] */ + u8 hash; /* Digest algorithm [enum pkey_hash_algo] */ + u8 id_type; /* Key identifier type [enum pkey_id_type] */ + u8 signer_len; /* Length of signer's name */ + u8 key_id_len; /* Length of key identifier */ + u8 __pad[3]; + __be32 sig_len; /* Length of signature data */ }; /* diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index eb00be2..7b07cc0d 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -71,12 +71,22 @@ err_alloc: return NULL; } +/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ +#define MAX_PID_NS_LEVEL 32 + static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns) { struct pid_namespace *ns; unsigned int level = parent_pid_ns->level + 1; - int i, err = -ENOMEM; + int i; + int err; + + if (level > MAX_PID_NS_LEVEL) { + err = -EINVAL; + goto out; + } + err = -ENOMEM; ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); if (ns == NULL) goto out; diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index 0984a21..15f60d0 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c @@ -143,15 +143,11 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag) p->signal->autogroup = autogroup_kref_get(ag); - if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled)) - goto out; - t = p; do { sched_move_task(t); } while_each_thread(p, t); -out: unlock_task_sighand(p, &flags); autogroup_kref_put(prev); } diff --git a/kernel/sched/auto_group.h b/kernel/sched/auto_group.h index 8bd0471..443232e 100644 --- a/kernel/sched/auto_group.h +++ b/kernel/sched/auto_group.h @@ -4,11 +4,6 @@ #include <linux/rwsem.h> struct autogroup { - /* - * reference doesn't mean how many thread attach to this - * autogroup now. It just stands for the number of task - * could use this autogroup. - */ struct kref kref; struct task_group *tg; struct rw_semaphore lock; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 4cea4f4..5d89335 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -119,6 +119,7 @@ config TRACING select BINARY_PRINTF select EVENT_TRACING select TRACE_CLOCK + select IRQ_WORK config GENERIC_TRACER bool diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9dcf15d..4451aa3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2868,7 +2868,7 @@ static int __init ftrace_mod_cmd_init(void) { return register_ftrace_command(&ftrace_mod_cmd); } -device_initcall(ftrace_mod_cmd_init); +core_initcall(ftrace_mod_cmd_init); static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) @@ -4055,7 +4055,7 @@ static int __init ftrace_nodyn_init(void) ftrace_enabled = 1; return 0; } -device_initcall(ftrace_nodyn_init); +core_initcall(ftrace_nodyn_init); static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } static inline void ftrace_startup_enable(int command) { } @@ -4381,7 +4381,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf, if (strlen(tmp) == 0) return 1; - ret = strict_strtol(tmp, 10, &val); + ret = kstrtol(tmp, 10, &val); if (ret < 0) return ret; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b979426..3c7834c 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -460,9 +460,10 @@ struct ring_buffer_per_cpu { unsigned long lost_events; unsigned long last_overrun; local_t entries_bytes; - local_t commit_overrun; - local_t overrun; local_t entries; + local_t overrun; + local_t commit_overrun; + local_t dropped_events; local_t committing; local_t commits; unsigned long read; @@ -1820,7 +1821,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) } /** - * ring_buffer_update_event - update event type and data + * rb_update_event - update event type and data * @event: the even to update * @type: the type of event * @length: the size of the event field in the ring buffer @@ -2155,8 +2156,10 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, * If we are not in overwrite mode, * this is easy, just stop here. */ - if (!(buffer->flags & RB_FL_OVERWRITE)) + if (!(buffer->flags & RB_FL_OVERWRITE)) { + local_inc(&cpu_buffer->dropped_events); goto out_reset; + } ret = rb_handle_head_page(cpu_buffer, tail_page, @@ -2720,8 +2723,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); * and not the length of the event which would hold the header. */ int ring_buffer_write(struct ring_buffer *buffer, - unsigned long length, - void *data) + unsigned long length, + void *data) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; @@ -2929,12 +2932,12 @@ rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ -unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) +u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) { unsigned long flags; struct ring_buffer_per_cpu *cpu_buffer; struct buffer_page *bpage; - unsigned long ret; + u64 ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; @@ -2995,7 +2998,8 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); /** - * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer + * ring_buffer_overrun_cpu - get the number of overruns caused by the ring + * buffer wrapping around (only if RB_FL_OVERWRITE is on). * @buffer: The ring buffer * @cpu: The per CPU buffer to get the number of overruns from */ @@ -3015,7 +3019,9 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); /** - * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits + * ring_buffer_commit_overrun_cpu - get the number of overruns caused by + * commits failing due to the buffer wrapping around while there are uncommitted + * events, such as during an interrupt storm. * @buffer: The ring buffer * @cpu: The per CPU buffer to get the number of overruns from */ @@ -3036,6 +3042,28 @@ ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu); /** + * ring_buffer_dropped_events_cpu - get the number of dropped events caused by + * the ring buffer filling up (only if RB_FL_OVERWRITE is off). + * @buffer: The ring buffer + * @cpu: The per CPU buffer to get the number of overruns from + */ +unsigned long +ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu) +{ + struct ring_buffer_per_cpu *cpu_buffer; + unsigned long ret; + + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return 0; + + cpu_buffer = buffer->buffers[cpu]; + ret = local_read(&cpu_buffer->dropped_events); + + return ret; +} +EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu); + +/** * ring_buffer_entries - get the number of entries in a buffer * @buffer: The ring buffer * @@ -3864,9 +3892,10 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) local_set(&cpu_buffer->reader_page->page->commit, 0); cpu_buffer->reader_page->read = 0; - local_set(&cpu_buffer->commit_overrun, 0); local_set(&cpu_buffer->entries_bytes, 0); local_set(&cpu_buffer->overrun, 0); + local_set(&cpu_buffer->commit_overrun, 0); + local_set(&cpu_buffer->dropped_events, 0); local_set(&cpu_buffer->entries, 0); local_set(&cpu_buffer->committing, 0); local_set(&cpu_buffer->commits, 0); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 31e4f55..c1434b5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -19,6 +19,7 @@ #include <linux/seq_file.h> #include <linux/notifier.h> #include <linux/irqflags.h> +#include <linux/irq_work.h> #include <linux/debugfs.h> #include <linux/pagemap.h> #include <linux/hardirq.h> @@ -78,6 +79,21 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set) } /* + * To prevent the comm cache from being overwritten when no + * tracing is active, only save the comm when a trace event + * occurred. + */ +static DEFINE_PER_CPU(bool, trace_cmdline_save); + +/* + * When a reader is waiting for data, then this variable is + * set to true. + */ +static bool trace_wakeup_needed; + +static struct irq_work trace_work_wakeup; + +/* * Kill all tracing for good (never come back). * It is initialized to 1 but will turn to zero if the initialization * of the tracer is successful. But that is the only place that sets @@ -139,6 +155,18 @@ static int __init set_ftrace_dump_on_oops(char *str) } __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); + +static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; +static char *trace_boot_options __initdata; + +static int __init set_trace_boot_options(char *str) +{ + strncpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); + trace_boot_options = trace_boot_options_buf; + return 0; +} +__setup("trace_options=", set_trace_boot_options); + unsigned long long ns2usecs(cycle_t nsec) { nsec += 500; @@ -198,20 +226,9 @@ static struct trace_array max_tr; static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data); -/* tracer_enabled is used to toggle activation of a tracer */ -static int tracer_enabled = 1; - -/** - * tracing_is_enabled - return tracer_enabled status - * - * This function is used by other tracers to know the status - * of the tracer_enabled flag. Tracers may use this function - * to know if it should enable their features when starting - * up. See irqsoff tracer for an example (start_irqsoff_tracer). - */ int tracing_is_enabled(void) { - return tracer_enabled; + return tracing_is_on(); } /* @@ -333,12 +350,18 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | static int trace_stop_count; static DEFINE_RAW_SPINLOCK(tracing_start_lock); -static void wakeup_work_handler(struct work_struct *work) +/** + * trace_wake_up - wake up tasks waiting for trace input + * + * Schedules a delayed work to wake up any task that is blocked on the + * trace_wait queue. These is used with trace_poll for tasks polling the + * trace. + */ +static void trace_wake_up(struct irq_work *work) { - wake_up(&trace_wait); -} + wake_up_all(&trace_wait); -static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler); +} /** * tracing_on - enable tracing buffers @@ -393,22 +416,6 @@ int tracing_is_on(void) } EXPORT_SYMBOL_GPL(tracing_is_on); -/** - * trace_wake_up - wake up tasks waiting for trace input - * - * Schedules a delayed work to wake up any task that is blocked on the - * trace_wait queue. These is used with trace_poll for tasks polling the - * trace. - */ -void trace_wake_up(void) -{ - const unsigned long delay = msecs_to_jiffies(2); - - if (trace_flags & TRACE_ITER_BLOCK) - return; - schedule_delayed_work(&wakeup_work, delay); -} - static int __init set_buf_size(char *str) { unsigned long buf_size; @@ -431,7 +438,7 @@ static int __init set_tracing_thresh(char *str) if (!str) return 0; - ret = strict_strtoul(str, 0, &threshold); + ret = kstrtoul(str, 0, &threshold); if (ret < 0) return 0; tracing_thresh = threshold * 1000; @@ -757,6 +764,40 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) } #endif /* CONFIG_TRACER_MAX_TRACE */ +static void default_wait_pipe(struct trace_iterator *iter) +{ + DEFINE_WAIT(wait); + + prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); + + /* + * The events can happen in critical sections where + * checking a work queue can cause deadlocks. + * After adding a task to the queue, this flag is set + * only to notify events to try to wake up the queue + * using irq_work. + * + * We don't clear it even if the buffer is no longer + * empty. The flag only causes the next event to run + * irq_work to do the work queue wake up. The worse + * that can happen if we race with !trace_empty() is that + * an event will cause an irq_work to try to wake up + * an empty queue. + * + * There's no reason to protect this flag either, as + * the work queue and irq_work logic will do the necessary + * synchronization for the wake ups. The only thing + * that is necessary is that the wake up happens after + * a task has been queued. It's OK for spurious wake ups. + */ + trace_wakeup_needed = true; + + if (trace_empty(iter)) + schedule(); + + finish_wait(&trace_wait, &wait); +} + /** * register_tracer - register a tracer with the ftrace system. * @type - the plugin for the tracer @@ -875,32 +916,6 @@ int register_tracer(struct tracer *type) return ret; } -void unregister_tracer(struct tracer *type) -{ - struct tracer **t; - - mutex_lock(&trace_types_lock); - for (t = &trace_types; *t; t = &(*t)->next) { - if (*t == type) - goto found; - } - pr_info("Tracer %s not registered\n", type->name); - goto out; - - found: - *t = (*t)->next; - - if (type == current_trace && tracer_enabled) { - tracer_enabled = 0; - tracing_stop(); - if (current_trace->stop) - current_trace->stop(&global_trace); - current_trace = &nop_trace; - } -out: - mutex_unlock(&trace_types_lock); -} - void tracing_reset(struct trace_array *tr, int cpu) { struct ring_buffer *buffer = tr->buffer; @@ -1131,10 +1146,14 @@ void trace_find_cmdline(int pid, char comm[]) void tracing_record_cmdline(struct task_struct *tsk) { - if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled || - !tracing_is_on()) + if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on()) + return; + + if (!__this_cpu_read(trace_cmdline_save)) return; + __this_cpu_write(trace_cmdline_save, false); + trace_save_cmdline(tsk); } @@ -1178,27 +1197,36 @@ trace_buffer_lock_reserve(struct ring_buffer *buffer, return event; } +void +__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) +{ + __this_cpu_write(trace_cmdline_save, true); + if (trace_wakeup_needed) { + trace_wakeup_needed = false; + /* irq_work_queue() supplies it's own memory barriers */ + irq_work_queue(&trace_work_wakeup); + } + ring_buffer_unlock_commit(buffer, event); +} + static inline void __trace_buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event, - unsigned long flags, int pc, - int wake) + unsigned long flags, int pc) { - ring_buffer_unlock_commit(buffer, event); + __buffer_unlock_commit(buffer, event); ftrace_trace_stack(buffer, flags, 6, pc); ftrace_trace_userstack(buffer, flags, pc); - - if (wake) - trace_wake_up(); } void trace_buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc) { - __trace_buffer_unlock_commit(buffer, event, flags, pc, 1); + __trace_buffer_unlock_commit(buffer, event, flags, pc); } +EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit); struct ring_buffer_event * trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, @@ -1215,29 +1243,21 @@ void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc) { - __trace_buffer_unlock_commit(buffer, event, flags, pc, 1); + __trace_buffer_unlock_commit(buffer, event, flags, pc); } EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); -void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc) -{ - __trace_buffer_unlock_commit(buffer, event, flags, pc, 0); -} -EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); - -void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc, - struct pt_regs *regs) +void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer, + struct ring_buffer_event *event, + unsigned long flags, int pc, + struct pt_regs *regs) { - ring_buffer_unlock_commit(buffer, event); + __buffer_unlock_commit(buffer, event); ftrace_trace_stack_regs(buffer, flags, 0, pc, regs); ftrace_trace_userstack(buffer, flags, pc); } -EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs); +EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs); void trace_current_buffer_discard_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) @@ -1269,7 +1289,7 @@ trace_function(struct trace_array *tr, entry->parent_ip = parent_ip; if (!filter_check_discard(call, entry, buffer, event)) - ring_buffer_unlock_commit(buffer, event); + __buffer_unlock_commit(buffer, event); } void @@ -1362,7 +1382,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, entry->size = trace.nr_entries; if (!filter_check_discard(call, entry, buffer, event)) - ring_buffer_unlock_commit(buffer, event); + __buffer_unlock_commit(buffer, event); out: /* Again, don't let gcc optimize things here */ @@ -1458,7 +1478,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) save_stack_trace_user(&trace); if (!filter_check_discard(call, entry, buffer, event)) - ring_buffer_unlock_commit(buffer, event); + __buffer_unlock_commit(buffer, event); out_drop_count: __this_cpu_dec(user_stack_count); @@ -1559,10 +1579,10 @@ static int alloc_percpu_trace_buffer(void) return -ENOMEM; } +static int buffers_allocated; + void trace_printk_init_buffers(void) { - static int buffers_allocated; - if (buffers_allocated) return; @@ -1571,7 +1591,38 @@ void trace_printk_init_buffers(void) pr_info("ftrace: Allocated trace_printk buffers\n"); + /* Expand the buffers to set size */ + tracing_update_buffers(); + buffers_allocated = 1; + + /* + * trace_printk_init_buffers() can be called by modules. + * If that happens, then we need to start cmdline recording + * directly here. If the global_trace.buffer is already + * allocated here, then this was called by module code. + */ + if (global_trace.buffer) + tracing_start_cmdline_record(); +} + +void trace_printk_start_comm(void) +{ + /* Start tracing comms if trace printk is set */ + if (!buffers_allocated) + return; + tracing_start_cmdline_record(); +} + +static void trace_printk_start_stop_comm(int enabled) +{ + if (!buffers_allocated) + return; + + if (enabled) + tracing_start_cmdline_record(); + else + tracing_stop_cmdline_record(); } /** @@ -1622,7 +1673,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) memcpy(entry->buf, tbuffer, sizeof(u32) * len); if (!filter_check_discard(call, entry, buffer, event)) { - ring_buffer_unlock_commit(buffer, event); + __buffer_unlock_commit(buffer, event); ftrace_trace_stack(buffer, flags, 6, pc); } @@ -1693,7 +1744,7 @@ int trace_array_vprintk(struct trace_array *tr, memcpy(&entry->buf, tbuffer, len); entry->buf[len] = '\0'; if (!filter_check_discard(call, entry, buffer, event)) { - ring_buffer_unlock_commit(buffer, event); + __buffer_unlock_commit(buffer, event); ftrace_trace_stack(buffer, flags, 6, pc); } out: @@ -2794,26 +2845,19 @@ static void set_tracer_flags(unsigned int mask, int enabled) if (mask == TRACE_ITER_OVERWRITE) ring_buffer_change_overwrite(global_trace.buffer, enabled); + + if (mask == TRACE_ITER_PRINTK) + trace_printk_start_stop_comm(enabled); } -static ssize_t -tracing_trace_options_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) +static int trace_set_options(char *option) { - char buf[64]; char *cmp; int neg = 0; - int ret; + int ret = 0; int i; - if (cnt >= sizeof(buf)) - return -EINVAL; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - cmp = strstrip(buf); + cmp = strstrip(option); if (strncmp(cmp, "no", 2) == 0) { neg = 1; @@ -2832,10 +2876,25 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, mutex_lock(&trace_types_lock); ret = set_tracer_option(current_trace, cmp, neg); mutex_unlock(&trace_types_lock); - if (ret) - return ret; } + return ret; +} + +static ssize_t +tracing_trace_options_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + trace_set_options(buf); + *ppos += cnt; return cnt; @@ -2940,56 +2999,6 @@ static const struct file_operations tracing_saved_cmdlines_fops = { }; static ssize_t -tracing_ctrl_read(struct file *filp, char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - char buf[64]; - int r; - - r = sprintf(buf, "%u\n", tracer_enabled); - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - -static ssize_t -tracing_ctrl_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - struct trace_array *tr = filp->private_data; - unsigned long val; - int ret; - - ret = kstrtoul_from_user(ubuf, cnt, 10, &val); - if (ret) - return ret; - - val = !!val; - - mutex_lock(&trace_types_lock); - if (tracer_enabled ^ val) { - - /* Only need to warn if this is used to change the state */ - WARN_ONCE(1, "tracing_enabled is deprecated. Use tracing_on"); - - if (val) { - tracer_enabled = 1; - if (current_trace->start) - current_trace->start(tr); - tracing_start(); - } else { - tracer_enabled = 0; - tracing_stop(); - if (current_trace->stop) - current_trace->stop(tr); - } - } - mutex_unlock(&trace_types_lock); - - *ppos += cnt; - - return cnt; -} - -static ssize_t tracing_set_trace_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { @@ -3030,6 +3039,10 @@ static int __tracing_resize_ring_buffer(unsigned long size, int cpu) */ ring_buffer_expanded = 1; + /* May be called before buffers are initialized */ + if (!global_trace.buffer) + return 0; + ret = ring_buffer_resize(global_trace.buffer, size, cpu); if (ret < 0) return ret; @@ -3385,19 +3398,6 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table) } } - -void default_wait_pipe(struct trace_iterator *iter) -{ - DEFINE_WAIT(wait); - - prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); - - if (trace_empty(iter)) - schedule(); - - finish_wait(&trace_wait, &wait); -} - /* * This is a make-shift waitqueue. * A tracer might use this callback on some rare cases: @@ -3438,7 +3438,7 @@ static int tracing_wait_pipe(struct file *filp) return -EINTR; /* - * We block until we read something and tracing is disabled. + * We block until we read something and tracing is enabled. * We still block if tracing is disabled, but we have never * read anything. This allows a user to cat this file, and * then enable tracing. But after we have read something, @@ -3446,7 +3446,7 @@ static int tracing_wait_pipe(struct file *filp) * * iter->pos will be 0 if we haven't read anything. */ - if (!tracer_enabled && iter->pos) + if (tracing_is_enabled() && iter->pos) break; } @@ -3955,7 +3955,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, } else entry->buf[cnt] = '\0'; - ring_buffer_unlock_commit(buffer, event); + __buffer_unlock_commit(buffer, event); written = cnt; @@ -4016,6 +4016,14 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, if (max_tr.buffer) ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func); + /* + * New clock may not be consistent with the previous clock. + * Reset the buffer so that it doesn't have incomparable timestamps. + */ + tracing_reset_online_cpus(&global_trace); + if (max_tr.buffer) + tracing_reset_online_cpus(&max_tr); + mutex_unlock(&trace_types_lock); *fpos += cnt; @@ -4037,13 +4045,6 @@ static const struct file_operations tracing_max_lat_fops = { .llseek = generic_file_llseek, }; -static const struct file_operations tracing_ctrl_fops = { - .open = tracing_open_generic, - .read = tracing_ctrl_read, - .write = tracing_ctrl_write, - .llseek = generic_file_llseek, -}; - static const struct file_operations set_tracer_fops = { .open = tracing_open_generic, .read = tracing_set_trace_read, @@ -4385,6 +4386,9 @@ tracing_stats_read(struct file *filp, char __user *ubuf, usec_rem = do_div(t, USEC_PER_SEC); trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); + cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu); + trace_seq_printf(s, "dropped events: %ld\n", cnt); + count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); kfree(s); @@ -4815,9 +4819,6 @@ static __init int tracer_init_debugfs(void) d_tracer = tracing_init_dentry(); - trace_create_file("tracing_enabled", 0644, d_tracer, - &global_trace, &tracing_ctrl_fops); - trace_create_file("trace_options", 0644, d_tracer, NULL, &tracing_iter_fops); @@ -5089,6 +5090,7 @@ __init static int tracer_alloc_buffers(void) /* Only allocate trace_printk buffers if a trace_printk exists */ if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt) + /* Must be called before global_trace.buffer is allocated */ trace_printk_init_buffers(); /* To save memory, keep the ring buffer size to its minimum */ @@ -5136,6 +5138,7 @@ __init static int tracer_alloc_buffers(void) #endif trace_init_cmdlines(); + init_irq_work(&trace_work_wakeup, trace_wake_up); register_tracer(&nop_trace); current_trace = &nop_trace; @@ -5147,6 +5150,13 @@ __init static int tracer_alloc_buffers(void) register_die_notifier(&trace_die_notifier); + while (trace_boot_options) { + char *option; + + option = strsep(&trace_boot_options, ","); + trace_set_options(option); + } + return 0; out_free_cpumask: diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c15f528..55010ed 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -285,8 +285,8 @@ struct tracer { int (*set_flag)(u32 old_flags, u32 bit, int set); struct tracer *next; struct tracer_flags *flags; - int print_max; - int use_max_tr; + bool print_max; + bool use_max_tr; }; @@ -327,7 +327,6 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu) int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); -void trace_wake_up(void); void tracing_reset(struct trace_array *tr, int cpu); void tracing_reset_online_cpus(struct trace_array *tr); void tracing_reset_current(int cpu); @@ -349,9 +348,6 @@ trace_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long len, unsigned long flags, int pc); -void trace_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); @@ -359,6 +355,9 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts); +void __buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event); + int trace_empty(struct trace_iterator *iter); void *trace_find_next_entry_inc(struct trace_iterator *iter); @@ -367,7 +366,6 @@ void trace_init_global_iter(struct trace_iterator *iter); void tracing_iter_reset(struct trace_iterator *iter, int cpu); -void default_wait_pipe(struct trace_iterator *iter); void poll_wait_pipe(struct trace_iterator *iter); void ftrace(struct trace_array *tr, @@ -407,7 +405,6 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr); void tracing_stop_sched_switch_record(void); void tracing_start_sched_switch_record(void); int register_tracer(struct tracer *type); -void unregister_tracer(struct tracer *type); int is_tracing_stopped(void); enum trace_file_type { TRACE_FILE_LAT_FMT = 1, @@ -841,6 +838,7 @@ extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; void trace_printk_init_buffers(void); +void trace_printk_start_comm(void); #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 8d3538b..95e9684 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -77,7 +77,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) entry->correct = val == expect; if (!filter_check_discard(call, entry, buffer, event)) - ring_buffer_unlock_commit(buffer, event); + __buffer_unlock_commit(buffer, event); out: atomic_dec(&tr->data[cpu]->disabled); @@ -199,7 +199,7 @@ __init static int init_branch_tracer(void) } return register_tracer(&branch_trace); } -device_initcall(init_branch_tracer); +core_initcall(init_branch_tracer); #else static inline diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d608d09..880073d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -491,19 +491,6 @@ static void t_stop(struct seq_file *m, void *p) mutex_unlock(&event_mutex); } -static int -ftrace_event_seq_open(struct inode *inode, struct file *file) -{ - const struct seq_operations *seq_ops; - - if ((file->f_mode & FMODE_WRITE) && - (file->f_flags & O_TRUNC)) - ftrace_clear_events(); - - seq_ops = inode->i_private; - return seq_open(file, seq_ops); -} - static ssize_t event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -980,6 +967,9 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) return r; } +static int ftrace_event_avail_open(struct inode *inode, struct file *file); +static int ftrace_event_set_open(struct inode *inode, struct file *file); + static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, @@ -995,14 +985,14 @@ static const struct seq_operations show_set_event_seq_ops = { }; static const struct file_operations ftrace_avail_fops = { - .open = ftrace_event_seq_open, + .open = ftrace_event_avail_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; static const struct file_operations ftrace_set_event_fops = { - .open = ftrace_event_seq_open, + .open = ftrace_event_set_open, .read = seq_read, .write = ftrace_event_write, .llseek = seq_lseek, @@ -1078,6 +1068,26 @@ static struct dentry *event_trace_events_dir(void) return d_events; } +static int +ftrace_event_avail_open(struct inode *inode, struct file *file) +{ + const struct seq_operations *seq_ops = &show_event_seq_ops; + + return seq_open(file, seq_ops); +} + +static int +ftrace_event_set_open(struct inode *inode, struct file *file) +{ + const struct seq_operations *seq_ops = &show_set_event_seq_ops; + + if ((file->f_mode & FMODE_WRITE) && + (file->f_flags & O_TRUNC)) + ftrace_clear_events(); + + return seq_open(file, seq_ops); +} + static struct dentry * event_subsystem_dir(const char *name, struct dentry *d_events) { @@ -1489,6 +1499,9 @@ static __init int event_trace_enable(void) if (ret) pr_warn("Failed to enable trace event: %s\n", token); } + + trace_printk_start_comm(); + return 0; } @@ -1505,15 +1518,13 @@ static __init int event_trace_init(void) return 0; entry = debugfs_create_file("available_events", 0444, d_tracer, - (void *)&show_event_seq_ops, - &ftrace_avail_fops); + NULL, &ftrace_avail_fops); if (!entry) pr_warning("Could not create debugfs " "'available_events' entry\n"); entry = debugfs_create_file("set_event", 0644, d_tracer, - (void *)&show_set_event_seq_ops, - &ftrace_set_event_fops); + NULL, &ftrace_set_event_fops); if (!entry) pr_warning("Could not create debugfs " "'set_event' entry\n"); @@ -1749,7 +1760,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, entry->ip = ip; entry->parent_ip = parent_ip; - trace_nowake_buffer_unlock_commit(buffer, event, flags, pc); + trace_buffer_unlock_commit(buffer, event, flags, pc); out: atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index c154797..e5b0ca8 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1000,9 +1000,9 @@ static int init_pred(struct filter_parse_state *ps, } } else { if (field->is_signed) - ret = strict_strtoll(pred->regex.pattern, 0, &val); + ret = kstrtoll(pred->regex.pattern, 0, &val); else - ret = strict_strtoull(pred->regex.pattern, 0, &val); + ret = kstrtoull(pred->regex.pattern, 0, &val); if (ret) { parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); return -EINVAL; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 507a7a9..bb227e3 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -366,7 +366,7 @@ ftrace_trace_onoff_callback(struct ftrace_hash *hash, * We use the callback data field (which is a pointer) * as our counter. */ - ret = strict_strtoul(number, 0, (unsigned long *)&count); + ret = kstrtoul(number, 0, (unsigned long *)&count); if (ret) return ret; @@ -411,5 +411,4 @@ static __init int init_function_trace(void) init_func_cmd_traceon(); return register_tracer(&function_trace); } -device_initcall(init_function_trace); - +core_initcall(init_function_trace); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 99b4378..4edb4b7 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -223,7 +223,7 @@ int __trace_graph_entry(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->graph_ent = *trace; if (!filter_current_check_discard(buffer, call, entry, event)) - ring_buffer_unlock_commit(buffer, event); + __buffer_unlock_commit(buffer, event); return 1; } @@ -327,7 +327,7 @@ void __trace_graph_return(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->ret = *trace; if (!filter_current_check_discard(buffer, call, entry, event)) - ring_buffer_unlock_commit(buffer, event); + __buffer_unlock_commit(buffer, event); } void trace_graph_return(struct ftrace_graph_ret *trace) @@ -1474,4 +1474,4 @@ static __init int init_graph_trace(void) return register_tracer(&graph_trace); } -device_initcall(init_graph_trace); +core_initcall(init_graph_trace); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index d98ee82..5ffce7b 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -604,7 +604,7 @@ static struct tracer irqsoff_tracer __read_mostly = .reset = irqsoff_tracer_reset, .start = irqsoff_tracer_start, .stop = irqsoff_tracer_stop, - .print_max = 1, + .print_max = true, .print_header = irqsoff_print_header, .print_line = irqsoff_print_line, .flags = &tracer_flags, @@ -614,7 +614,7 @@ static struct tracer irqsoff_tracer __read_mostly = #endif .open = irqsoff_trace_open, .close = irqsoff_trace_close, - .use_max_tr = 1, + .use_max_tr = true, }; # define register_irqsoff(trace) register_tracer(&trace) #else @@ -637,7 +637,7 @@ static struct tracer preemptoff_tracer __read_mostly = .reset = irqsoff_tracer_reset, .start = irqsoff_tracer_start, .stop = irqsoff_tracer_stop, - .print_max = 1, + .print_max = true, .print_header = irqsoff_print_header, .print_line = irqsoff_print_line, .flags = &tracer_flags, @@ -647,7 +647,7 @@ static struct tracer preemptoff_tracer __read_mostly = #endif .open = irqsoff_trace_open, .close = irqsoff_trace_close, - .use_max_tr = 1, + .use_max_tr = true, }; # define register_preemptoff(trace) register_tracer(&trace) #else @@ -672,7 +672,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly = .reset = irqsoff_tracer_reset, .start = irqsoff_tracer_start, .stop = irqsoff_tracer_stop, - .print_max = 1, + .print_max = true, .print_header = irqsoff_print_header, .print_line = irqsoff_print_line, .flags = &tracer_flags, @@ -682,7 +682,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly = #endif .open = irqsoff_trace_open, .close = irqsoff_trace_close, - .use_max_tr = 1, + .use_max_tr = true, }; # define register_preemptirqsoff(trace) register_tracer(&trace) @@ -698,4 +698,4 @@ __init static int init_irqsoff_tracer(void) return 0; } -device_initcall(init_irqsoff_tracer); +core_initcall(init_irqsoff_tracer); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 1a21170..1865d5f 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -444,7 +444,7 @@ static int create_trace_probe(int argc, char **argv) return -EINVAL; } /* an address specified */ - ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr); + ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr); if (ret) { pr_info("Failed to parse address.\n"); return ret; @@ -751,8 +751,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); if (!filter_current_check_discard(buffer, call, entry, event)) - trace_nowake_buffer_unlock_commit_regs(buffer, event, - irq_flags, pc, regs); + trace_buffer_unlock_commit_regs(buffer, event, + irq_flags, pc, regs); } /* Kretprobe handler */ @@ -784,8 +784,8 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); if (!filter_current_check_discard(buffer, call, entry, event)) - trace_nowake_buffer_unlock_commit_regs(buffer, event, - irq_flags, pc, regs); + trace_buffer_unlock_commit_regs(buffer, event, + irq_flags, pc, regs); } /* Event entry printers */ diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index daa9980..412e959 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -441,7 +441,7 @@ static const struct fetch_type *find_fetch_type(const char *type) goto fail; type++; - if (strict_strtoul(type, 0, &bs)) + if (kstrtoul(type, 0, &bs)) goto fail; switch (bs) { @@ -501,8 +501,8 @@ int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset) tmp = strchr(symbol, '+'); if (tmp) { - /* skip sign because strict_strtol doesn't accept '+' */ - ret = strict_strtoul(tmp + 1, 0, offset); + /* skip sign because kstrtoul doesn't accept '+' */ + ret = kstrtoul(tmp + 1, 0, offset); if (ret) return ret; @@ -533,7 +533,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, else ret = -EINVAL; } else if (isdigit(arg[5])) { - ret = strict_strtoul(arg + 5, 10, ¶m); + ret = kstrtoul(arg + 5, 10, ¶m); if (ret || param > PARAM_MAX_STACK) ret = -EINVAL; else { @@ -579,7 +579,7 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, case '@': /* memory or symbol */ if (isdigit(arg[1])) { - ret = strict_strtoul(arg + 1, 0, ¶m); + ret = kstrtoul(arg + 1, 0, ¶m); if (ret) break; @@ -597,14 +597,14 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, break; case '+': /* deref memory */ - arg++; /* Skip '+', because strict_strtol() rejects it. */ + arg++; /* Skip '+', because kstrtol() rejects it. */ case '-': tmp = strchr(arg, '('); if (!tmp) break; *tmp = '\0'; - ret = strict_strtol(arg, 0, &offset); + ret = kstrtol(arg, 0, &offset); if (ret) break; diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 7e62c0a..3374c79 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -102,9 +102,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_cpu = task_cpu(wakee); if (!filter_check_discard(call, entry, buffer, event)) - ring_buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr->buffer, flags, 6, pc); - ftrace_trace_userstack(tr->buffer, flags, pc); + trace_buffer_unlock_commit(buffer, event, flags, pc); } static void diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 02170c0..bc64fc1 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -589,7 +589,7 @@ static struct tracer wakeup_tracer __read_mostly = .reset = wakeup_tracer_reset, .start = wakeup_tracer_start, .stop = wakeup_tracer_stop, - .print_max = 1, + .print_max = true, .print_header = wakeup_print_header, .print_line = wakeup_print_line, .flags = &tracer_flags, @@ -599,7 +599,7 @@ static struct tracer wakeup_tracer __read_mostly = #endif .open = wakeup_trace_open, .close = wakeup_trace_close, - .use_max_tr = 1, + .use_max_tr = true, }; static struct tracer wakeup_rt_tracer __read_mostly = @@ -610,7 +610,7 @@ static struct tracer wakeup_rt_tracer __read_mostly = .start = wakeup_tracer_start, .stop = wakeup_tracer_stop, .wait_pipe = poll_wait_pipe, - .print_max = 1, + .print_max = true, .print_header = wakeup_print_header, .print_line = wakeup_print_line, .flags = &tracer_flags, @@ -620,7 +620,7 @@ static struct tracer wakeup_rt_tracer __read_mostly = #endif .open = wakeup_trace_open, .close = wakeup_trace_close, - .use_max_tr = 1, + .use_max_tr = true, }; __init static int init_wakeup_tracer(void) @@ -637,4 +637,4 @@ __init static int init_wakeup_tracer(void) return 0; } -device_initcall(init_wakeup_tracer); +core_initcall(init_wakeup_tracer); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 2c00a69..4762316 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -320,7 +320,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, int (*func)(void)) { int save_ftrace_enabled = ftrace_enabled; - int save_tracer_enabled = tracer_enabled; unsigned long count; char *func_name; int ret; @@ -331,7 +330,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, /* enable tracing, and record the filter function */ ftrace_enabled = 1; - tracer_enabled = 1; /* passed in by parameter to fool gcc from optimizing */ func(); @@ -395,7 +393,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, out: ftrace_enabled = save_ftrace_enabled; - tracer_enabled = save_tracer_enabled; /* Enable tracing on all functions again */ ftrace_set_global_filter(NULL, 0, 1); @@ -452,7 +449,6 @@ static int trace_selftest_function_recursion(void) { int save_ftrace_enabled = ftrace_enabled; - int save_tracer_enabled = tracer_enabled; char *func_name; int len; int ret; @@ -465,7 +461,6 @@ trace_selftest_function_recursion(void) /* enable tracing, and record the filter function */ ftrace_enabled = 1; - tracer_enabled = 1; /* Handle PPC64 '.' name */ func_name = "*" __stringify(DYN_FTRACE_TEST_NAME); @@ -534,7 +529,6 @@ trace_selftest_function_recursion(void) ret = 0; out: ftrace_enabled = save_ftrace_enabled; - tracer_enabled = save_tracer_enabled; return ret; } @@ -569,7 +563,6 @@ static int trace_selftest_function_regs(void) { int save_ftrace_enabled = ftrace_enabled; - int save_tracer_enabled = tracer_enabled; char *func_name; int len; int ret; @@ -586,7 +579,6 @@ trace_selftest_function_regs(void) /* enable tracing, and record the filter function */ ftrace_enabled = 1; - tracer_enabled = 1; /* Handle PPC64 '.' name */ func_name = "*" __stringify(DYN_FTRACE_TEST_NAME); @@ -648,7 +640,6 @@ trace_selftest_function_regs(void) ret = 0; out: ftrace_enabled = save_ftrace_enabled; - tracer_enabled = save_tracer_enabled; return ret; } @@ -662,7 +653,6 @@ int trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) { int save_ftrace_enabled = ftrace_enabled; - int save_tracer_enabled = tracer_enabled; unsigned long count; int ret; @@ -671,7 +661,6 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) /* start the tracing */ ftrace_enabled = 1; - tracer_enabled = 1; ret = tracer_init(trace, tr); if (ret) { @@ -708,7 +697,6 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ret = trace_selftest_function_regs(); out: ftrace_enabled = save_ftrace_enabled; - tracer_enabled = save_tracer_enabled; /* kill ftrace totally if we failed */ if (ret) @@ -1106,6 +1094,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) tracing_stop(); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); + printk("ret = %d\n", ret); if (!ret) ret = trace_test_buffer(&max_tr, &count); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 2485a7d..7609dd6 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -21,9 +21,6 @@ static int syscall_enter_register(struct ftrace_event_call *event, static int syscall_exit_register(struct ftrace_event_call *event, enum trace_reg type, void *data); -static int syscall_enter_define_fields(struct ftrace_event_call *call); -static int syscall_exit_define_fields(struct ftrace_event_call *call); - static struct list_head * syscall_get_enter_fields(struct ftrace_event_call *call) { @@ -32,30 +29,6 @@ syscall_get_enter_fields(struct ftrace_event_call *call) return &entry->enter_fields; } -struct trace_event_functions enter_syscall_print_funcs = { - .trace = print_syscall_enter, -}; - -struct trace_event_functions exit_syscall_print_funcs = { - .trace = print_syscall_exit, -}; - -struct ftrace_event_class event_class_syscall_enter = { - .system = "syscalls", - .reg = syscall_enter_register, - .define_fields = syscall_enter_define_fields, - .get_fields = syscall_get_enter_fields, - .raw_init = init_syscall_trace, -}; - -struct ftrace_event_class event_class_syscall_exit = { - .system = "syscalls", - .reg = syscall_exit_register, - .define_fields = syscall_exit_define_fields, - .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), - .raw_init = init_syscall_trace, -}; - extern struct syscall_metadata *__start_syscalls_metadata[]; extern struct syscall_metadata *__stop_syscalls_metadata[]; @@ -432,7 +405,7 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call) mutex_unlock(&syscall_trace_lock); } -int init_syscall_trace(struct ftrace_event_call *call) +static int init_syscall_trace(struct ftrace_event_call *call) { int id; int num; @@ -457,6 +430,30 @@ int init_syscall_trace(struct ftrace_event_call *call) return id; } +struct trace_event_functions enter_syscall_print_funcs = { + .trace = print_syscall_enter, +}; + +struct trace_event_functions exit_syscall_print_funcs = { + .trace = print_syscall_exit, +}; + +struct ftrace_event_class event_class_syscall_enter = { + .system = "syscalls", + .reg = syscall_enter_register, + .define_fields = syscall_enter_define_fields, + .get_fields = syscall_get_enter_fields, + .raw_init = init_syscall_trace, +}; + +struct ftrace_event_class event_class_syscall_exit = { + .system = "syscalls", + .reg = syscall_exit_register, + .define_fields = syscall_exit_define_fields, + .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), + .raw_init = init_syscall_trace, +}; + unsigned long __init __weak arch_syscall_addr(int nr) { return (unsigned long)sys_call_table[nr]; @@ -537,7 +534,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); } -int perf_sysenter_enable(struct ftrace_event_call *call) +static int perf_sysenter_enable(struct ftrace_event_call *call) { int ret = 0; int num; @@ -558,7 +555,7 @@ int perf_sysenter_enable(struct ftrace_event_call *call) return ret; } -void perf_sysenter_disable(struct ftrace_event_call *call) +static void perf_sysenter_disable(struct ftrace_event_call *call) { int num; @@ -615,7 +612,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); } -int perf_sysexit_enable(struct ftrace_event_call *call) +static int perf_sysexit_enable(struct ftrace_event_call *call) { int ret = 0; int num; @@ -636,7 +633,7 @@ int perf_sysexit_enable(struct ftrace_event_call *call) return ret; } -void perf_sysexit_disable(struct ftrace_event_call *call) +static void perf_sysexit_disable(struct ftrace_event_call *call) { int num; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index f3c3811..9614db8 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -252,7 +252,7 @@ static int create_trace_uprobe(int argc, char **argv) if (ret) goto fail_address_parse; - ret = strict_strtoul(arg, 0, &offset); + ret = kstrtoul(arg, 0, &offset); if (ret) goto fail_address_parse; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9d4c8d5..c8c21be 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -116,7 +116,7 @@ static unsigned long get_timestamp(int this_cpu) return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ } -static unsigned long get_sample_period(void) +static u64 get_sample_period(void) { /* * convert watchdog_thresh from seconds to ns @@ -125,7 +125,7 @@ static unsigned long get_sample_period(void) * and hard thresholds) to increment before the * hardlockup detector generates a warning */ - return get_softlockup_thresh() * (NSEC_PER_SEC / 5); + return get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5); } /* Commands for resetting the watchdog */ @@ -368,6 +368,9 @@ static void watchdog_disable(unsigned int cpu) { struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); + if (!watchdog_enabled) + return; + watchdog_set_prio(SCHED_NORMAL, 0); hrtimer_cancel(hrtimer); /* disable the perf event */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d951daa..1dae900 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1361,8 +1361,19 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, WARN_ON_ONCE(timer->function != delayed_work_timer_fn || timer->data != (unsigned long)dwork); - BUG_ON(timer_pending(timer)); - BUG_ON(!list_empty(&work->entry)); + WARN_ON_ONCE(timer_pending(timer)); + WARN_ON_ONCE(!list_empty(&work->entry)); + + /* + * If @delay is 0, queue @dwork->work immediately. This is for + * both optimization and correctness. The earliest @timer can + * expire is on the closest next tick and delayed_work users depend + * on that there's no such delay when @delay is 0. + */ + if (!delay) { + __queue_work(cpu, wq, &dwork->work); + return; + } timer_stats_timer_set_start_info(&dwork->timer); @@ -1417,9 +1428,6 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, bool ret = false; unsigned long flags; - if (!delay) - return queue_work_on(cpu, wq, &dwork->work); - /* read the comment in __queue_work() */ local_irq_save(flags); @@ -2407,8 +2415,10 @@ static int rescuer_thread(void *__wq) repeat: set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) + if (kthread_should_stop()) { + __set_current_state(TASK_RUNNING); return 0; + } /* * See whether any cpu is asking for help. Unbounded @@ -2982,7 +2992,7 @@ bool cancel_delayed_work(struct delayed_work *dwork) set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work)); local_irq_restore(flags); - return true; + return ret; } EXPORT_SYMBOL(cancel_delayed_work); |