summaryrefslogtreecommitdiffstats
path: root/kernel/trace/trace.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/trace.c')
-rw-r--r--kernel/trace/trace.c408
1 files changed, 285 insertions, 123 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0df1b0f..086d363 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -32,10 +32,11 @@
#include <linux/splice.h>
#include <linux/kdebug.h>
#include <linux/string.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/poll.h>
-#include <linux/gfp.h>
#include <linux/fs.h>
#include "trace.h"
@@ -91,20 +92,17 @@ DEFINE_PER_CPU(int, ftrace_cpu_disabled);
static inline void ftrace_disable_cpu(void)
{
preempt_disable();
- __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled));
+ __this_cpu_inc(ftrace_cpu_disabled);
}
static inline void ftrace_enable_cpu(void)
{
- __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled));
+ __this_cpu_dec(ftrace_cpu_disabled);
preempt_enable();
}
static cpumask_var_t __read_mostly tracing_buffer_mask;
-/* Define which cpu buffers are currently read in trace_pipe */
-static cpumask_var_t tracing_reader_cpumask;
-
#define for_each_tracing_cpu(cpu) \
for_each_cpu(cpu, tracing_buffer_mask)
@@ -119,9 +117,12 @@ static cpumask_var_t tracing_reader_cpumask;
*
* It is default off, but you can enable it with either specifying
* "ftrace_dump_on_oops" in the kernel command line, or setting
- * /proc/sys/kernel/ftrace_dump_on_oops to true.
+ * /proc/sys/kernel/ftrace_dump_on_oops
+ * Set 1 if you want to dump buffers of all CPUs
+ * Set 2 if you want to dump the buffer of the CPU that triggered oops
*/
-int ftrace_dump_on_oops;
+
+enum ftrace_dump_mode ftrace_dump_on_oops;
static int tracing_set_tracer(const char *buf);
@@ -141,8 +142,17 @@ __setup("ftrace=", set_cmdline_ftrace);
static int __init set_ftrace_dump_on_oops(char *str)
{
- ftrace_dump_on_oops = 1;
- return 1;
+ if (*str++ != '=' || !*str) {
+ ftrace_dump_on_oops = DUMP_ALL;
+ return 1;
+ }
+
+ if (!strcmp("orig_cpu", str)) {
+ ftrace_dump_on_oops = DUMP_ORIG;
+ return 1;
+ }
+
+ return 0;
}
__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
@@ -243,12 +253,91 @@ static struct tracer *current_trace __read_mostly;
/*
* trace_types_lock is used to protect the trace_types list.
- * This lock is also used to keep user access serialized.
- * Accesses from userspace will grab this lock while userspace
- * activities happen inside the kernel.
*/
static DEFINE_MUTEX(trace_types_lock);
+/*
+ * serialize the access of the ring buffer
+ *
+ * ring buffer serializes readers, but it is low level protection.
+ * The validity of the events (which returns by ring_buffer_peek() ..etc)
+ * are not protected by ring buffer.
+ *
+ * The content of events may become garbage if we allow other process consumes
+ * these events concurrently:
+ * A) the page of the consumed events may become a normal page
+ * (not reader page) in ring buffer, and this page will be rewrited
+ * by events producer.
+ * B) The page of the consumed events may become a page for splice_read,
+ * and this page will be returned to system.
+ *
+ * These primitives allow multi process access to different cpu ring buffer
+ * concurrently.
+ *
+ * These primitives don't distinguish read-only and read-consume access.
+ * Multi read-only access are also serialized.
+ */
+
+#ifdef CONFIG_SMP
+static DECLARE_RWSEM(all_cpu_access_lock);
+static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
+
+static inline void trace_access_lock(int cpu)
+{
+ if (cpu == TRACE_PIPE_ALL_CPU) {
+ /* gain it for accessing the whole ring buffer. */
+ down_write(&all_cpu_access_lock);
+ } else {
+ /* gain it for accessing a cpu ring buffer. */
+
+ /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
+ down_read(&all_cpu_access_lock);
+
+ /* Secondly block other access to this @cpu ring buffer. */
+ mutex_lock(&per_cpu(cpu_access_lock, cpu));
+ }
+}
+
+static inline void trace_access_unlock(int cpu)
+{
+ if (cpu == TRACE_PIPE_ALL_CPU) {
+ up_write(&all_cpu_access_lock);
+ } else {
+ mutex_unlock(&per_cpu(cpu_access_lock, cpu));
+ up_read(&all_cpu_access_lock);
+ }
+}
+
+static inline void trace_access_lock_init(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ mutex_init(&per_cpu(cpu_access_lock, cpu));
+}
+
+#else
+
+static DEFINE_MUTEX(access_lock);
+
+static inline void trace_access_lock(int cpu)
+{
+ (void)cpu;
+ mutex_lock(&access_lock);
+}
+
+static inline void trace_access_unlock(int cpu)
+{
+ (void)cpu;
+ mutex_unlock(&access_lock);
+}
+
+static inline void trace_access_lock_init(void)
+{
+}
+
+#endif
+
/* trace_wait is a waitqueue for tasks blocked on trace_poll */
static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
@@ -297,6 +386,21 @@ static int __init set_buf_size(char *str)
}
__setup("trace_buf_size=", set_buf_size);
+static int __init set_tracing_thresh(char *str)
+{
+ unsigned long threshhold;
+ int ret;
+
+ if (!str)
+ return 0;
+ ret = strict_strtoul(str, 0, &threshhold);
+ if (ret < 0)
+ return 0;
+ tracing_thresh = threshhold * 1000;
+ return 1;
+}
+__setup("tracing_thresh=", set_tracing_thresh);
+
unsigned long nsecs_to_usecs(unsigned long nsecs)
{
return nsecs / 1000;
@@ -502,9 +606,10 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
static arch_spinlock_t ftrace_max_lock =
(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+unsigned long __read_mostly tracing_thresh;
+
#ifdef CONFIG_TRACER_MAX_TRACE
unsigned long __read_mostly tracing_max_latency;
-unsigned long __read_mostly tracing_thresh;
/*
* Copy the new maximum trace into the separate maximum-trace
@@ -515,7 +620,7 @@ static void
__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
struct trace_array_cpu *data = tr->data[cpu];
- struct trace_array_cpu *max_data = tr->data[cpu];
+ struct trace_array_cpu *max_data;
max_tr.cpu = cpu;
max_tr.time_start = data->preempt_timestamp;
@@ -525,7 +630,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
max_data->critical_start = data->critical_start;
max_data->critical_end = data->critical_end;
- memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
+ memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
max_data->pid = tsk->pid;
max_data->uid = task_uid(tsk);
max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
@@ -747,10 +852,10 @@ out:
mutex_unlock(&trace_types_lock);
}
-static void __tracing_reset(struct trace_array *tr, int cpu)
+static void __tracing_reset(struct ring_buffer *buffer, int cpu)
{
ftrace_disable_cpu();
- ring_buffer_reset_cpu(tr->buffer, cpu);
+ ring_buffer_reset_cpu(buffer, cpu);
ftrace_enable_cpu();
}
@@ -762,7 +867,7 @@ void tracing_reset(struct trace_array *tr, int cpu)
/* Make sure all commits have finished */
synchronize_sched();
- __tracing_reset(tr, cpu);
+ __tracing_reset(buffer, cpu);
ring_buffer_record_enable(buffer);
}
@@ -780,7 +885,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
tr->time_start = ftrace_now(tr->cpu);
for_each_online_cpu(cpu)
- __tracing_reset(tr, cpu);
+ __tracing_reset(buffer, cpu);
ring_buffer_record_enable(buffer);
}
@@ -857,6 +962,8 @@ void tracing_start(void)
goto out;
}
+ /* Prevent the buffers from switching */
+ arch_spin_lock(&ftrace_max_lock);
buffer = global_trace.buffer;
if (buffer)
@@ -866,6 +973,8 @@ void tracing_start(void)
if (buffer)
ring_buffer_record_enable(buffer);
+ arch_spin_unlock(&ftrace_max_lock);
+
ftrace_start();
out:
spin_unlock_irqrestore(&tracing_start_lock, flags);
@@ -887,6 +996,9 @@ void tracing_stop(void)
if (trace_stop_count++)
goto out;
+ /* Prevent the buffers from switching */
+ arch_spin_lock(&ftrace_max_lock);
+
buffer = global_trace.buffer;
if (buffer)
ring_buffer_record_disable(buffer);
@@ -895,6 +1007,8 @@ void tracing_stop(void)
if (buffer)
ring_buffer_record_disable(buffer);
+ arch_spin_unlock(&ftrace_max_lock);
+
out:
spin_unlock_irqrestore(&tracing_start_lock, flags);
}
@@ -951,6 +1065,11 @@ void trace_find_cmdline(int pid, char comm[])
return;
}
+ if (WARN_ON_ONCE(pid < 0)) {
+ strcpy(comm, "<XXX>");
+ return;
+ }
+
if (pid > PID_MAX_DEFAULT) {
strcpy(comm, "<...>");
return;
@@ -1084,7 +1203,7 @@ trace_function(struct trace_array *tr,
struct ftrace_entry *entry;
/* If we are reading the ring buffer, don't trace */
- if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
+ if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
return;
event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@ -1177,6 +1296,13 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
return;
+ /*
+ * NMIs can not handle page faults, even with fix ups.
+ * The save user stack can (and often does) fault.
+ */
+ if (unlikely(in_nmi()))
+ return;
+
event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
sizeof(*entry), flags, pc);
if (!event)
@@ -1315,8 +1441,10 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
entry->fmt = fmt;
memcpy(entry->buf, trace_buf, sizeof(u32) * len);
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!filter_check_discard(call, entry, buffer, event)) {
ring_buffer_unlock_commit(buffer, event);
+ ftrace_trace_stack(buffer, flags, 6, pc);
+ }
out_unlock:
arch_spin_unlock(&trace_buf_lock);
@@ -1389,8 +1517,10 @@ int trace_array_vprintk(struct trace_array *tr,
memcpy(&entry->buf, trace_buf, len);
entry->buf[len] = '\0';
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!filter_check_discard(call, entry, buffer, event)) {
ring_buffer_unlock_commit(buffer, event);
+ ftrace_trace_stack(buffer, irq_flags, 6, pc);
+ }
out_unlock:
arch_spin_unlock(&trace_buf_lock);
@@ -1427,7 +1557,8 @@ static void trace_iterator_increment(struct trace_iterator *iter)
}
static struct trace_entry *
-peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
+peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
+ unsigned long *lost_events)
{
struct ring_buffer_event *event;
struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
@@ -1438,7 +1569,8 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
if (buf_iter)
event = ring_buffer_iter_peek(buf_iter, ts);
else
- event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
+ event = ring_buffer_peek(iter->tr->buffer, cpu, ts,
+ lost_events);
ftrace_enable_cpu();
@@ -1446,10 +1578,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
}
static struct trace_entry *
-__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
+__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
+ unsigned long *missing_events, u64 *ent_ts)
{
struct ring_buffer *buffer = iter->tr->buffer;
struct trace_entry *ent, *next = NULL;
+ unsigned long lost_events = 0, next_lost = 0;
int cpu_file = iter->cpu_file;
u64 next_ts = 0, ts;
int next_cpu = -1;
@@ -1462,7 +1596,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
if (cpu_file > TRACE_PIPE_ALL_CPU) {
if (ring_buffer_empty_cpu(buffer, cpu_file))
return NULL;
- ent = peek_next_entry(iter, cpu_file, ent_ts);
+ ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
if (ent_cpu)
*ent_cpu = cpu_file;
@@ -1474,7 +1608,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
if (ring_buffer_empty_cpu(buffer, cpu))
continue;
- ent = peek_next_entry(iter, cpu, &ts);
+ ent = peek_next_entry(iter, cpu, &ts, &lost_events);
/*
* Pick the entry with the smallest timestamp:
@@ -1483,6 +1617,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
next = ent;
next_cpu = cpu;
next_ts = ts;
+ next_lost = lost_events;
}
}
@@ -1492,6 +1627,9 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
if (ent_ts)
*ent_ts = next_ts;
+ if (missing_events)
+ *missing_events = next_lost;
+
return next;
}
@@ -1499,13 +1637,14 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
int *ent_cpu, u64 *ent_ts)
{
- return __find_next_entry(iter, ent_cpu, ent_ts);
+ return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
}
/* Find the next real entry, and increment the iterator to the next entry */
static void *find_next_entry_inc(struct trace_iterator *iter)
{
- iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
+ iter->ent = __find_next_entry(iter, &iter->cpu,
+ &iter->lost_events, &iter->ts);
if (iter->ent)
trace_iterator_increment(iter);
@@ -1517,7 +1656,8 @@ static void trace_consume(struct trace_iterator *iter)
{
/* Don't allow ftrace to trace into the ring buffers */
ftrace_disable_cpu();
- ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
+ ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts,
+ &iter->lost_events);
ftrace_enable_cpu();
}
@@ -1580,12 +1720,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
}
/*
- * No necessary locking here. The worst thing which can
- * happen is loosing events consumed at the same time
- * by a trace_pipe reader.
- * Other than that, we don't risk to crash the ring buffer
- * because it serializes the readers.
- *
* The current tracer is copied to avoid a global locking
* all around.
*/
@@ -1623,6 +1757,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
ftrace_enable_cpu();
+ iter->leftover = 0;
for (p = iter; p && l < *pos; p = s_next(m, p, &l))
;
@@ -1640,12 +1775,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
}
trace_event_read_lock();
+ trace_access_lock(cpu_file);
return p;
}
static void s_stop(struct seq_file *m, void *p)
{
+ struct trace_iterator *iter = m->private;
+
atomic_dec(&trace_record_cmdline_disabled);
+ trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
}
@@ -1669,7 +1808,7 @@ static void print_func_help_header(struct seq_file *m)
}
-static void
+void
print_trace_header(struct seq_file *m, struct trace_iterator *iter)
{
unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
@@ -1797,7 +1936,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
}
if (event)
- return event->trace(iter, sym_flags);
+ return event->funcs->trace(iter, sym_flags, event);
if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
goto partial;
@@ -1823,7 +1962,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
event = ftrace_find_event(entry->type);
if (event)
- return event->raw(iter, 0);
+ return event->funcs->raw(iter, 0, event);
if (!trace_seq_printf(s, "%d ?\n", entry->type))
goto partial;
@@ -1850,7 +1989,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
event = ftrace_find_event(entry->type);
if (event) {
- enum print_line_t ret = event->hex(iter, 0);
+ enum print_line_t ret = event->funcs->hex(iter, 0, event);
if (ret != TRACE_TYPE_HANDLED)
return ret;
}
@@ -1875,10 +2014,11 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
}
event = ftrace_find_event(entry->type);
- return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED;
+ return event ? event->funcs->binary(iter, 0, event) :
+ TRACE_TYPE_HANDLED;
}
-static int trace_empty(struct trace_iterator *iter)
+int trace_empty(struct trace_iterator *iter)
{
int cpu;
@@ -1913,6 +2053,10 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
{
enum print_line_t ret;
+ if (iter->lost_events)
+ trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
+ iter->cpu, iter->lost_events);
+
if (iter->trace && iter->trace->print_line) {
ret = iter->trace->print_line(iter);
if (ret != TRACE_TYPE_UNHANDLED)
@@ -1941,6 +2085,23 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
return print_trace_fmt(iter);
}
+void trace_default_header(struct seq_file *m)
+{
+ struct trace_iterator *iter = m->private;
+
+ if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
+ /* print nothing if the buffers are empty */
+ if (trace_empty(iter))
+ return;
+ print_trace_header(m, iter);
+ if (!(trace_flags & TRACE_ITER_VERBOSE))
+ print_lat_help_header(m);
+ } else {
+ if (!(trace_flags & TRACE_ITER_VERBOSE))
+ print_func_help_header(m);
+ }
+}
+
static int s_show(struct seq_file *m, void *v)
{
struct trace_iterator *iter = v;
@@ -1953,17 +2114,9 @@ static int s_show(struct seq_file *m, void *v)
}
if (iter->trace && iter->trace->print_header)
iter->trace->print_header(m);
- else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
- /* print nothing if the buffers are empty */
- if (trace_empty(iter))
- return 0;
- print_trace_header(m, iter);
- if (!(trace_flags & TRACE_ITER_VERBOSE))
- print_lat_help_header(m);
- } else {
- if (!(trace_flags & TRACE_ITER_VERBOSE))
- print_func_help_header(m);
- }
+ else
+ trace_default_header(m);
+
} else if (iter->leftover) {
/*
* If we filled the seq_file buffer earlier, we
@@ -2049,15 +2202,20 @@ __tracing_open(struct inode *inode, struct file *file)
if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
for_each_tracing_cpu(cpu) {
-
iter->buffer_iter[cpu] =
- ring_buffer_read_start(iter->tr->buffer, cpu);
+ ring_buffer_read_prepare(iter->tr->buffer, cpu);
+ }
+ ring_buffer_read_prepare_sync();
+ for_each_tracing_cpu(cpu) {
+ ring_buffer_read_start(iter->buffer_iter[cpu]);
tracing_iter_reset(iter, cpu);
}
} else {
cpu = iter->cpu_file;
iter->buffer_iter[cpu] =
- ring_buffer_read_start(iter->tr->buffer, cpu);
+ ring_buffer_read_prepare(iter->tr->buffer, cpu);
+ ring_buffer_read_prepare_sync();
+ ring_buffer_read_start(iter->buffer_iter[cpu]);
tracing_iter_reset(iter, cpu);
}
@@ -2836,22 +2994,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
mutex_lock(&trace_types_lock);
- /* We only allow one reader per cpu */
- if (cpu_file == TRACE_PIPE_ALL_CPU) {
- if (!cpumask_empty(tracing_reader_cpumask)) {
- ret = -EBUSY;
- goto out;
- }
- cpumask_setall(tracing_reader_cpumask);
- } else {
- if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
- cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
- else {
- ret = -EBUSY;
- goto out;
- }
- }
-
/* create a buffer to store the information to pass to userspace */
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
if (!iter) {
@@ -2907,12 +3049,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
mutex_lock(&trace_types_lock);
- if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
- cpumask_clear(tracing_reader_cpumask);
- else
- cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
-
-
if (iter->trace->pipe_close)
iter->trace->pipe_close(iter);
@@ -3074,6 +3210,7 @@ waitagain:
iter->pos = -1;
trace_event_read_lock();
+ trace_access_lock(iter->cpu_file);
while (find_next_entry_inc(iter) != NULL) {
enum print_line_t ret;
int len = iter->seq.len;
@@ -3090,6 +3227,7 @@ waitagain:
if (iter->seq.len >= cnt)
break;
}
+ trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
/* Now copy what we have to the user */
@@ -3172,12 +3310,12 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
size_t len,
unsigned int flags)
{
- struct page *pages[PIPE_BUFFERS];
- struct partial_page partial[PIPE_BUFFERS];
+ struct page *pages_def[PIPE_DEF_BUFFERS];
+ struct partial_page partial_def[PIPE_DEF_BUFFERS];
struct trace_iterator *iter = filp->private_data;
struct splice_pipe_desc spd = {
- .pages = pages,
- .partial = partial,
+ .pages = pages_def,
+ .partial = partial_def,
.nr_pages = 0, /* This gets updated below. */
.flags = flags,
.ops = &tracing_pipe_buf_ops,
@@ -3188,6 +3326,9 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
size_t rem;
unsigned int i;
+ if (splice_grow_spd(pipe, &spd))
+ return -ENOMEM;
+
/* copy the tracer to avoid using a global lock all around */
mutex_lock(&trace_types_lock);
if (unlikely(old_tracer != current_trace && current_trace)) {
@@ -3215,40 +3356,44 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
}
trace_event_read_lock();
+ trace_access_lock(iter->cpu_file);
/* Fill as many pages as possible. */
- for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
- pages[i] = alloc_page(GFP_KERNEL);
- if (!pages[i])
+ for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
+ spd.pages[i] = alloc_page(GFP_KERNEL);
+ if (!spd.pages[i])
break;
rem = tracing_fill_pipe_page(rem, iter);
/* Copy the data into the page, so we can start over. */
ret = trace_seq_to_buffer(&iter->seq,
- page_address(pages[i]),
+ page_address(spd.pages[i]),
iter->seq.len);
if (ret < 0) {
- __free_page(pages[i]);
+ __free_page(spd.pages[i]);
break;
}
- partial[i].offset = 0;
- partial[i].len = iter->seq.len;
+ spd.partial[i].offset = 0;
+ spd.partial[i].len = iter->seq.len;
trace_seq_init(&iter->seq);
}
+ trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
mutex_unlock(&iter->mutex);
spd.nr_pages = i;
- return splice_to_pipe(pipe, &spd);
+ ret = splice_to_pipe(pipe, &spd);
+out:
+ splice_shrink_spd(pipe, &spd);
+ return ret;
out_err:
mutex_unlock(&iter->mutex);
-
- return ret;
+ goto out;
}
static ssize_t
@@ -3521,7 +3666,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *ppos)
{
struct ftrace_buffer_info *info = filp->private_data;
- unsigned int pos;
ssize_t ret;
size_t size;
@@ -3539,18 +3683,15 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
info->read = 0;
+ trace_access_lock(info->cpu);
ret = ring_buffer_read_page(info->tr->buffer,
&info->spare,
count,
info->cpu, 0);
+ trace_access_unlock(info->cpu);
if (ret < 0)
return 0;
- pos = ring_buffer_page_len(info->spare);
-
- if (pos < PAGE_SIZE)
- memset(info->spare + pos, 0, PAGE_SIZE - pos);
-
read:
size = PAGE_SIZE - info->read;
if (size > count)
@@ -3645,11 +3786,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
unsigned int flags)
{
struct ftrace_buffer_info *info = file->private_data;
- struct partial_page partial[PIPE_BUFFERS];
- struct page *pages[PIPE_BUFFERS];
+ struct partial_page partial_def[PIPE_DEF_BUFFERS];
+ struct page *pages_def[PIPE_DEF_BUFFERS];
struct splice_pipe_desc spd = {
- .pages = pages,
- .partial = partial,
+ .pages = pages_def,
+ .partial = partial_def,
.flags = flags,
.ops = &buffer_pipe_buf_ops,
.spd_release = buffer_spd_release,
@@ -3658,21 +3799,28 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
int entries, size, i;
size_t ret;
+ if (splice_grow_spd(pipe, &spd))
+ return -ENOMEM;
+
if (*ppos & (PAGE_SIZE - 1)) {
WARN_ONCE(1, "Ftrace: previous read must page-align\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
if (len & (PAGE_SIZE - 1)) {
WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
- if (len < PAGE_SIZE)
- return -EINVAL;
+ if (len < PAGE_SIZE) {
+ ret = -EINVAL;
+ goto out;
+ }
len &= PAGE_MASK;
}
+ trace_access_lock(info->cpu);
entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
- for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
+ for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
struct page *page;
int r;
@@ -3717,6 +3865,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
}
+ trace_access_unlock(info->cpu);
spd.nr_pages = i;
/* did we read anything? */
@@ -3726,11 +3875,12 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
else
ret = 0;
/* TODO: block */
- return ret;
+ goto out;
}
ret = splice_to_pipe(pipe, &spd);
-
+ splice_shrink_spd(pipe, &spd);
+out:
return ret;
}
@@ -4153,6 +4303,8 @@ static __init int tracer_init_debugfs(void)
struct dentry *d_tracer;
int cpu;
+ trace_access_lock_init();
+
d_tracer = tracing_init_dentry();
trace_create_file("tracing_enabled", 0644, d_tracer,
@@ -4176,10 +4328,10 @@ static __init int tracer_init_debugfs(void)
#ifdef CONFIG_TRACER_MAX_TRACE
trace_create_file("tracing_max_latency", 0644, d_tracer,
&tracing_max_latency, &tracing_max_lat_fops);
+#endif
trace_create_file("tracing_thresh", 0644, d_tracer,
&tracing_thresh, &tracing_max_lat_fops);
-#endif
trace_create_file("README", 0444, d_tracer,
NULL, &tracing_readme_fops);
@@ -4219,7 +4371,7 @@ static int trace_panic_handler(struct notifier_block *this,
unsigned long event, void *unused)
{
if (ftrace_dump_on_oops)
- ftrace_dump();
+ ftrace_dump(ftrace_dump_on_oops);
return NOTIFY_OK;
}
@@ -4236,7 +4388,7 @@ static int trace_die_handler(struct notifier_block *self,
switch (val) {
case DIE_OOPS:
if (ftrace_dump_on_oops)
- ftrace_dump();
+ ftrace_dump(ftrace_dump_on_oops);
break;
default:
break;
@@ -4277,7 +4429,8 @@ trace_printk_seq(struct trace_seq *s)
trace_seq_init(s);
}
-static void __ftrace_dump(bool disable_tracing)
+static void
+__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
{
static arch_spinlock_t ftrace_dump_lock =
(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
@@ -4310,12 +4463,25 @@ static void __ftrace_dump(bool disable_tracing)
/* don't look at user memory in panic mode */
trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
- printk(KERN_TRACE "Dumping ftrace buffer:\n");
-
/* Simulate the iterator */
iter.tr = &global_trace;
iter.trace = current_trace;
- iter.cpu_file = TRACE_PIPE_ALL_CPU;
+
+ switch (oops_dump_mode) {
+ case DUMP_ALL:
+ iter.cpu_file = TRACE_PIPE_ALL_CPU;
+ break;
+ case DUMP_ORIG:
+ iter.cpu_file = raw_smp_processor_id();
+ break;
+ case DUMP_NONE:
+ goto out_enable;
+ default:
+ printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
+ iter.cpu_file = TRACE_PIPE_ALL_CPU;
+ }
+
+ printk(KERN_TRACE "Dumping ftrace buffer:\n");
/*
* We need to stop all tracing on all CPUS to read the
@@ -4354,6 +4520,7 @@ static void __ftrace_dump(bool disable_tracing)
else
printk(KERN_TRACE "---------------------------------\n");
+ out_enable:
/* Re-enable tracing if requested */
if (!disable_tracing) {
trace_flags |= old_userobj;
@@ -4370,9 +4537,9 @@ static void __ftrace_dump(bool disable_tracing)
}
/* By default: disable tracing after the dump */
-void ftrace_dump(void)
+void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
{
- __ftrace_dump(true);
+ __ftrace_dump(true, oops_dump_mode);
}
__init static int tracer_alloc_buffers(void)
@@ -4387,9 +4554,6 @@ __init static int tracer_alloc_buffers(void)
if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
goto out_free_buffer_mask;
- if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
- goto out_free_tracing_cpumask;
-
/* To save memory, keep the ring buffer size to its minimum */
if (ring_buffer_expanded)
ring_buf_size = trace_buf_size;
@@ -4447,8 +4611,6 @@ __init static int tracer_alloc_buffers(void)
return 0;
out_free_cpumask:
- free_cpumask_var(tracing_reader_cpumask);
-out_free_tracing_cpumask:
free_cpumask_var(tracing_cpumask);
out_free_buffer_mask:
free_cpumask_var(tracing_buffer_mask);
OpenPOWER on IntegriCloud