summaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorTimothy Pearson <tpearson@raptorengineering.com>2017-08-23 14:45:25 -0500
committerTimothy Pearson <tpearson@raptorengineering.com>2017-08-23 14:45:25 -0500
commitfcbb27b0ec6dcbc5a5108cb8fb19eae64593d204 (patch)
tree22962a4387943edc841c72a4e636a068c66d58fd /kernel/trace
downloadast2050-linux-kernel-fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204.zip
ast2050-linux-kernel-fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204.tar.gz
Initial import of modified Linux 2.6.28 tree
Original upstream URL: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git | branch linux-2.6.28.y
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig198
-rw-r--r--kernel/trace/Makefile28
-rw-r--r--kernel/trace/ftrace.c1451
-rw-r--r--kernel/trace/ring_buffer.c2201
-rw-r--r--kernel/trace/trace.c3235
-rw-r--r--kernel/trace/trace.h422
-rw-r--r--kernel/trace/trace_boot.c126
-rw-r--r--kernel/trace/trace_functions.c81
-rw-r--r--kernel/trace/trace_irqsoff.c493
-rw-r--r--kernel/trace/trace_mmiotrace.c375
-rw-r--r--kernel/trace/trace_nop.c64
-rw-r--r--kernel/trace/trace_sched_switch.c211
-rw-r--r--kernel/trace/trace_sched_wakeup.c393
-rw-r--r--kernel/trace/trace_selftest.c524
-rw-r--r--kernel/trace/trace_selftest_dynamic.c7
-rw-r--r--kernel/trace/trace_stack.c320
-rw-r--r--kernel/trace/trace_sysprof.c363
17 files changed, 10492 insertions, 0 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
new file mode 100644
index 0000000..33dbefd
--- /dev/null
+++ b/kernel/trace/Kconfig
@@ -0,0 +1,198 @@
+#
+# Architectures that offer an FUNCTION_TRACER implementation should
+# select HAVE_FUNCTION_TRACER:
+#
+
+config NOP_TRACER
+ bool
+
+config HAVE_FUNCTION_TRACER
+ bool
+
+config HAVE_DYNAMIC_FTRACE
+ bool
+
+config HAVE_FTRACE_MCOUNT_RECORD
+ bool
+
+config TRACER_MAX_TRACE
+ bool
+
+config RING_BUFFER
+ bool
+
+config TRACING
+ bool
+ select DEBUG_FS
+ select RING_BUFFER
+ select STACKTRACE if STACKTRACE_SUPPORT
+ select TRACEPOINTS
+ select NOP_TRACER
+
+menu "Tracers"
+
+config FUNCTION_TRACER
+ bool "Kernel Function Tracer"
+ depends on HAVE_FUNCTION_TRACER
+ depends on DEBUG_KERNEL
+ select FRAME_POINTER
+ select TRACING
+ select CONTEXT_SWITCH_TRACER
+ help
+ Enable the kernel to trace every kernel function. This is done
+ by using a compiler feature to insert a small, 5-byte No-Operation
+ instruction to the beginning of every kernel function, which NOP
+ sequence is then dynamically patched into a tracer call when
+ tracing is enabled by the administrator. If it's runtime disabled
+ (the bootup default), then the overhead of the instructions is very
+ small and not measurable even in micro-benchmarks.
+
+config IRQSOFF_TRACER
+ bool "Interrupts-off Latency Tracer"
+ default n
+ depends on TRACE_IRQFLAGS_SUPPORT
+ depends on GENERIC_TIME
+ depends on DEBUG_KERNEL
+ select TRACE_IRQFLAGS
+ select TRACING
+ select TRACER_MAX_TRACE
+ help
+ This option measures the time spent in irqs-off critical
+ sections, with microsecond accuracy.
+
+ The default measurement method is a maximum search, which is
+ disabled by default and can be runtime (re-)started
+ via:
+
+ echo 0 > /debugfs/tracing/tracing_max_latency
+
+ (Note that kernel size and overhead increases with this option
+ enabled. This option and the preempt-off timing option can be
+ used together or separately.)
+
+config PREEMPT_TRACER
+ bool "Preemption-off Latency Tracer"
+ default n
+ depends on GENERIC_TIME
+ depends on PREEMPT
+ depends on DEBUG_KERNEL
+ select TRACING
+ select TRACER_MAX_TRACE
+ help
+ This option measures the time spent in preemption off critical
+ sections, with microsecond accuracy.
+
+ The default measurement method is a maximum search, which is
+ disabled by default and can be runtime (re-)started
+ via:
+
+ echo 0 > /debugfs/tracing/tracing_max_latency
+
+ (Note that kernel size and overhead increases with this option
+ enabled. This option and the irqs-off timing option can be
+ used together or separately.)
+
+config SYSPROF_TRACER
+ bool "Sysprof Tracer"
+ depends on X86
+ select TRACING
+ help
+ This tracer provides the trace needed by the 'Sysprof' userspace
+ tool.
+
+config SCHED_TRACER
+ bool "Scheduling Latency Tracer"
+ depends on DEBUG_KERNEL
+ select TRACING
+ select CONTEXT_SWITCH_TRACER
+ select TRACER_MAX_TRACE
+ help
+ This tracer tracks the latency of the highest priority task
+ to be scheduled in, starting from the point it has woken up.
+
+config CONTEXT_SWITCH_TRACER
+ bool "Trace process context switches"
+ depends on DEBUG_KERNEL
+ select TRACING
+ select MARKERS
+ help
+ This tracer gets called from the context switch and records
+ all switching of tasks.
+
+config BOOT_TRACER
+ bool "Trace boot initcalls"
+ depends on DEBUG_KERNEL
+ select TRACING
+ select CONTEXT_SWITCH_TRACER
+ help
+ This tracer helps developers to optimize boot times: it records
+ the timings of the initcalls and traces key events and the identity
+ of tasks that can cause boot delays, such as context-switches.
+
+ Its aim is to be parsed by the /scripts/bootgraph.pl tool to
+ produce pretty graphics about boot inefficiencies, giving a visual
+ representation of the delays during initcalls - but the raw
+ /debug/tracing/trace text output is readable too.
+
+ ( Note that tracing self tests can't be enabled if this tracer is
+ selected, because the self-tests are an initcall as well and that
+ would invalidate the boot trace. )
+
+config STACK_TRACER
+ bool "Trace max stack"
+ depends on HAVE_FUNCTION_TRACER
+ depends on DEBUG_KERNEL
+ select FUNCTION_TRACER
+ select STACKTRACE
+ help
+ This special tracer records the maximum stack footprint of the
+ kernel and displays it in debugfs/tracing/stack_trace.
+
+ This tracer works by hooking into every function call that the
+ kernel executes, and keeping a maximum stack depth value and
+ stack-trace saved. Because this logic has to execute in every
+ kernel function, all the time, this option can slow down the
+ kernel measurably and is generally intended for kernel
+ developers only.
+
+ Say N if unsure.
+
+config DYNAMIC_FTRACE
+ bool "enable/disable ftrace tracepoints dynamically"
+ depends on FUNCTION_TRACER
+ depends on HAVE_DYNAMIC_FTRACE
+ depends on DEBUG_KERNEL
+ default y
+ help
+ This option will modify all the calls to ftrace dynamically
+ (will patch them out of the binary image and replaces them
+ with a No-Op instruction) as they are called. A table is
+ created to dynamically enable them again.
+
+ This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise
+ has native performance as long as no tracing is active.
+
+ The changes to the code are done by a kernel thread that
+ wakes up once a second and checks to see if any ftrace calls
+ were made. If so, it runs stop_machine (stops all CPUS)
+ and modifies the code to jump over the call to ftrace.
+
+config FTRACE_MCOUNT_RECORD
+ def_bool y
+ depends on DYNAMIC_FTRACE
+ depends on HAVE_FTRACE_MCOUNT_RECORD
+
+config FTRACE_SELFTEST
+ bool
+
+config FTRACE_STARTUP_TEST
+ bool "Perform a startup test on ftrace"
+ depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER
+ select FTRACE_SELFTEST
+ help
+ This option performs a series of startup tests on ftrace. On bootup
+ a series of tests are made to verify that the tracer is
+ functioning properly. It will do tests on all the configured
+ tracers of ftrace.
+
+endmenu
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
new file mode 100644
index 0000000..c8228b1
--- /dev/null
+++ b/kernel/trace/Makefile
@@ -0,0 +1,28 @@
+
+# Do not instrument the tracer itself:
+
+ifdef CONFIG_FUNCTION_TRACER
+ORIG_CFLAGS := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
+
+# selftest needs instrumentation
+CFLAGS_trace_selftest_dynamic.o = -pg
+obj-y += trace_selftest_dynamic.o
+endif
+
+obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
+obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
+
+obj-$(CONFIG_TRACING) += trace.o
+obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
+obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
+obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
+obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
+obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
+obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
+obj-$(CONFIG_NOP_TRACER) += trace_nop.o
+obj-$(CONFIG_STACK_TRACER) += trace_stack.o
+obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
+obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
+
+libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
new file mode 100644
index 0000000..78db083
--- /dev/null
+++ b/kernel/trace/ftrace.c
@@ -0,0 +1,1451 @@
+/*
+ * Infrastructure for profiling code inserted by 'gcc -pg'.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2004-2008 Ingo Molnar <mingo@redhat.com>
+ *
+ * Originally ported from the -rt patch by:
+ * Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Based on code in the latency_tracer, that is:
+ *
+ * Copyright (C) 2004-2006 Ingo Molnar
+ * Copyright (C) 2004 William Lee Irwin III
+ */
+
+#include <linux/stop_machine.h>
+#include <linux/clocksource.h>
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/hardirq.h>
+#include <linux/kthread.h>
+#include <linux/uaccess.h>
+#include <linux/kprobes.h>
+#include <linux/ftrace.h>
+#include <linux/sysctl.h>
+#include <linux/ctype.h>
+#include <linux/list.h>
+
+#include <asm/ftrace.h>
+
+#include "trace.h"
+
+#define FTRACE_WARN_ON(cond) \
+ do { \
+ if (WARN_ON(cond)) \
+ ftrace_kill(); \
+ } while (0)
+
+#define FTRACE_WARN_ON_ONCE(cond) \
+ do { \
+ if (WARN_ON_ONCE(cond)) \
+ ftrace_kill(); \
+ } while (0)
+
+/* ftrace_enabled is a method to turn ftrace on or off */
+int ftrace_enabled __read_mostly;
+static int last_ftrace_enabled;
+
+/*
+ * ftrace_disabled is set when an anomaly is discovered.
+ * ftrace_disabled is much stronger than ftrace_enabled.
+ */
+static int ftrace_disabled __read_mostly;
+
+static DEFINE_SPINLOCK(ftrace_lock);
+static DEFINE_MUTEX(ftrace_sysctl_lock);
+
+static struct ftrace_ops ftrace_list_end __read_mostly =
+{
+ .func = ftrace_stub,
+};
+
+static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
+ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
+
+static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
+{
+ struct ftrace_ops *op = ftrace_list;
+
+ /* in case someone actually ports this to alpha! */
+ read_barrier_depends();
+
+ while (op != &ftrace_list_end) {
+ /* silly alpha */
+ read_barrier_depends();
+ op->func(ip, parent_ip);
+ op = op->next;
+ };
+}
+
+/**
+ * clear_ftrace_function - reset the ftrace function
+ *
+ * This NULLs the ftrace function and in essence stops
+ * tracing. There may be lag
+ */
+void clear_ftrace_function(void)
+{
+ ftrace_trace_function = ftrace_stub;
+}
+
+static int __register_ftrace_function(struct ftrace_ops *ops)
+{
+ /* should not be called from interrupt context */
+ spin_lock(&ftrace_lock);
+
+ ops->next = ftrace_list;
+ /*
+ * We are entering ops into the ftrace_list but another
+ * CPU might be walking that list. We need to make sure
+ * the ops->next pointer is valid before another CPU sees
+ * the ops pointer included into the ftrace_list.
+ */
+ smp_wmb();
+ ftrace_list = ops;
+
+ if (ftrace_enabled) {
+ /*
+ * For one func, simply call it directly.
+ * For more than one func, call the chain.
+ */
+ if (ops->next == &ftrace_list_end)
+ ftrace_trace_function = ops->func;
+ else
+ ftrace_trace_function = ftrace_list_func;
+ }
+
+ spin_unlock(&ftrace_lock);
+
+ return 0;
+}
+
+static int __unregister_ftrace_function(struct ftrace_ops *ops)
+{
+ struct ftrace_ops **p;
+ int ret = 0;
+
+ /* should not be called from interrupt context */
+ spin_lock(&ftrace_lock);
+
+ /*
+ * If we are removing the last function, then simply point
+ * to the ftrace_stub.
+ */
+ if (ftrace_list == ops && ops->next == &ftrace_list_end) {
+ ftrace_trace_function = ftrace_stub;
+ ftrace_list = &ftrace_list_end;
+ goto out;
+ }
+
+ for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next)
+ if (*p == ops)
+ break;
+
+ if (*p != ops) {
+ ret = -1;
+ goto out;
+ }
+
+ *p = (*p)->next;
+
+ if (ftrace_enabled) {
+ /* If we only have one func left, then call that directly */
+ if (ftrace_list == &ftrace_list_end ||
+ ftrace_list->next == &ftrace_list_end)
+ ftrace_trace_function = ftrace_list->func;
+ }
+
+ out:
+ spin_unlock(&ftrace_lock);
+
+ return ret;
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+#ifndef CONFIG_FTRACE_MCOUNT_RECORD
+# error Dynamic ftrace depends on MCOUNT_RECORD
+#endif
+
+/*
+ * Since MCOUNT_ADDR may point to mcount itself, we do not want
+ * to get it confused by reading a reference in the code as we
+ * are parsing on objcopy output of text. Use a variable for
+ * it instead.
+ */
+static unsigned long mcount_addr = MCOUNT_ADDR;
+
+enum {
+ FTRACE_ENABLE_CALLS = (1 << 0),
+ FTRACE_DISABLE_CALLS = (1 << 1),
+ FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
+ FTRACE_ENABLE_MCOUNT = (1 << 3),
+ FTRACE_DISABLE_MCOUNT = (1 << 4),
+};
+
+static int ftrace_filtered;
+
+static LIST_HEAD(ftrace_new_addrs);
+
+static DEFINE_MUTEX(ftrace_regex_lock);
+
+struct ftrace_page {
+ struct ftrace_page *next;
+ unsigned long index;
+ struct dyn_ftrace records[];
+};
+
+#define ENTRIES_PER_PAGE \
+ ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
+
+/* estimate from running different kernels */
+#define NR_TO_INIT 10000
+
+static struct ftrace_page *ftrace_pages_start;
+static struct ftrace_page *ftrace_pages;
+
+static struct dyn_ftrace *ftrace_free_records;
+
+
+#ifdef CONFIG_KPROBES
+
+static int frozen_record_count;
+
+static inline void freeze_record(struct dyn_ftrace *rec)
+{
+ if (!(rec->flags & FTRACE_FL_FROZEN)) {
+ rec->flags |= FTRACE_FL_FROZEN;
+ frozen_record_count++;
+ }
+}
+
+static inline void unfreeze_record(struct dyn_ftrace *rec)
+{
+ if (rec->flags & FTRACE_FL_FROZEN) {
+ rec->flags &= ~FTRACE_FL_FROZEN;
+ frozen_record_count--;
+ }
+}
+
+static inline int record_frozen(struct dyn_ftrace *rec)
+{
+ return rec->flags & FTRACE_FL_FROZEN;
+}
+#else
+# define freeze_record(rec) ({ 0; })
+# define unfreeze_record(rec) ({ 0; })
+# define record_frozen(rec) ({ 0; })
+#endif /* CONFIG_KPROBES */
+
+static void ftrace_free_rec(struct dyn_ftrace *rec)
+{
+ rec->ip = (unsigned long)ftrace_free_records;
+ ftrace_free_records = rec;
+ rec->flags |= FTRACE_FL_FREE;
+}
+
+void ftrace_release(void *start, unsigned long size)
+{
+ struct dyn_ftrace *rec;
+ struct ftrace_page *pg;
+ unsigned long s = (unsigned long)start;
+ unsigned long e = s + size;
+ int i;
+
+ if (ftrace_disabled || !start)
+ return;
+
+ /* should not be called from interrupt context */
+ spin_lock(&ftrace_lock);
+
+ for (pg = ftrace_pages_start; pg; pg = pg->next) {
+ for (i = 0; i < pg->index; i++) {
+ rec = &pg->records[i];
+
+ if ((rec->ip >= s) && (rec->ip < e))
+ ftrace_free_rec(rec);
+ }
+ }
+ spin_unlock(&ftrace_lock);
+}
+
+static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
+{
+ struct dyn_ftrace *rec;
+
+ /* First check for freed records */
+ if (ftrace_free_records) {
+ rec = ftrace_free_records;
+
+ if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
+ FTRACE_WARN_ON_ONCE(1);
+ ftrace_free_records = NULL;
+ return NULL;
+ }
+
+ ftrace_free_records = (void *)rec->ip;
+ memset(rec, 0, sizeof(*rec));
+ return rec;
+ }
+
+ if (ftrace_pages->index == ENTRIES_PER_PAGE) {
+ if (!ftrace_pages->next) {
+ /* allocate another page */
+ ftrace_pages->next =
+ (void *)get_zeroed_page(GFP_KERNEL);
+ if (!ftrace_pages->next)
+ return NULL;
+ }
+ ftrace_pages = ftrace_pages->next;
+ }
+
+ return &ftrace_pages->records[ftrace_pages->index++];
+}
+
+static struct dyn_ftrace *
+ftrace_record_ip(unsigned long ip)
+{
+ struct dyn_ftrace *rec;
+
+ if (!ftrace_enabled || ftrace_disabled)
+ return NULL;
+
+ rec = ftrace_alloc_dyn_node(ip);
+ if (!rec)
+ return NULL;
+
+ rec->ip = ip;
+
+ list_add(&rec->list, &ftrace_new_addrs);
+
+ return rec;
+}
+
+#define FTRACE_ADDR ((long)(ftrace_caller))
+
+static int
+__ftrace_replace_code(struct dyn_ftrace *rec,
+ unsigned char *nop, int enable)
+{
+ unsigned long ip, fl;
+ unsigned char *call, *old, *new;
+
+ ip = rec->ip;
+
+ /*
+ * If this record is not to be traced and
+ * it is not enabled then do nothing.
+ *
+ * If this record is not to be traced and
+ * it is enabled then disabled it.
+ *
+ */
+ if (rec->flags & FTRACE_FL_NOTRACE) {
+ if (rec->flags & FTRACE_FL_ENABLED)
+ rec->flags &= ~FTRACE_FL_ENABLED;
+ else
+ return 0;
+
+ } else if (ftrace_filtered && enable) {
+ /*
+ * Filtering is on:
+ */
+
+ fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
+
+ /* Record is filtered and enabled, do nothing */
+ if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
+ return 0;
+
+ /* Record is not filtered and is not enabled do nothing */
+ if (!fl)
+ return 0;
+
+ /* Record is not filtered but enabled, disable it */
+ if (fl == FTRACE_FL_ENABLED)
+ rec->flags &= ~FTRACE_FL_ENABLED;
+ else
+ /* Otherwise record is filtered but not enabled, enable it */
+ rec->flags |= FTRACE_FL_ENABLED;
+ } else {
+ /* Disable or not filtered */
+
+ if (enable) {
+ /* if record is enabled, do nothing */
+ if (rec->flags & FTRACE_FL_ENABLED)
+ return 0;
+
+ rec->flags |= FTRACE_FL_ENABLED;
+
+ } else {
+
+ /* if record is not enabled do nothing */
+ if (!(rec->flags & FTRACE_FL_ENABLED))
+ return 0;
+
+ rec->flags &= ~FTRACE_FL_ENABLED;
+ }
+ }
+
+ call = ftrace_call_replace(ip, FTRACE_ADDR);
+
+ if (rec->flags & FTRACE_FL_ENABLED) {
+ old = nop;
+ new = call;
+ } else {
+ old = call;
+ new = nop;
+ }
+
+ return ftrace_modify_code(ip, old, new);
+}
+
+static void ftrace_replace_code(int enable)
+{
+ int i, failed;
+ unsigned char *nop = NULL;
+ struct dyn_ftrace *rec;
+ struct ftrace_page *pg;
+
+ nop = ftrace_nop_replace();
+
+ for (pg = ftrace_pages_start; pg; pg = pg->next) {
+ for (i = 0; i < pg->index; i++) {
+ rec = &pg->records[i];
+
+ /* don't modify code that has already faulted */
+ if (rec->flags & FTRACE_FL_FAILED)
+ continue;
+
+ /* ignore updates to this record's mcount site */
+ if (get_kprobe((void *)rec->ip)) {
+ freeze_record(rec);
+ continue;
+ } else {
+ unfreeze_record(rec);
+ }
+
+ failed = __ftrace_replace_code(rec, nop, enable);
+ if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
+ rec->flags |= FTRACE_FL_FAILED;
+ if ((system_state == SYSTEM_BOOTING) ||
+ !core_kernel_text(rec->ip)) {
+ ftrace_free_rec(rec);
+ }
+ }
+ }
+ }
+}
+
+static void print_ip_ins(const char *fmt, unsigned char *p)
+{
+ int i;
+
+ printk(KERN_CONT "%s", fmt);
+
+ for (i = 0; i < MCOUNT_INSN_SIZE; i++)
+ printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
+}
+
+static int
+ftrace_code_disable(struct dyn_ftrace *rec)
+{
+ unsigned long ip;
+ unsigned char *nop, *call;
+ int ret;
+
+ ip = rec->ip;
+
+ nop = ftrace_nop_replace();
+ call = ftrace_call_replace(ip, mcount_addr);
+
+ ret = ftrace_modify_code(ip, call, nop);
+ if (ret) {
+ switch (ret) {
+ case -EFAULT:
+ FTRACE_WARN_ON_ONCE(1);
+ pr_info("ftrace faulted on modifying ");
+ print_ip_sym(ip);
+ break;
+ case -EINVAL:
+ FTRACE_WARN_ON_ONCE(1);
+ pr_info("ftrace failed to modify ");
+ print_ip_sym(ip);
+ print_ip_ins(" expected: ", call);
+ print_ip_ins(" actual: ", (unsigned char *)ip);
+ print_ip_ins(" replace: ", nop);
+ printk(KERN_CONT "\n");
+ break;
+ case -EPERM:
+ FTRACE_WARN_ON_ONCE(1);
+ pr_info("ftrace faulted on writing ");
+ print_ip_sym(ip);
+ break;
+ default:
+ FTRACE_WARN_ON_ONCE(1);
+ pr_info("ftrace faulted on unknown error ");
+ print_ip_sym(ip);
+ }
+
+ rec->flags |= FTRACE_FL_FAILED;
+ return 0;
+ }
+ return 1;
+}
+
+static int __ftrace_modify_code(void *data)
+{
+ int *command = data;
+
+ if (*command & FTRACE_ENABLE_CALLS)
+ ftrace_replace_code(1);
+ else if (*command & FTRACE_DISABLE_CALLS)
+ ftrace_replace_code(0);
+
+ if (*command & FTRACE_UPDATE_TRACE_FUNC)
+ ftrace_update_ftrace_func(ftrace_trace_function);
+
+ return 0;
+}
+
+static void ftrace_run_update_code(int command)
+{
+ stop_machine(__ftrace_modify_code, &command, NULL);
+}
+
+static ftrace_func_t saved_ftrace_func;
+static int ftrace_start;
+static DEFINE_MUTEX(ftrace_start_lock);
+
+static void ftrace_startup(void)
+{
+ int command = 0;
+
+ if (unlikely(ftrace_disabled))
+ return;
+
+ mutex_lock(&ftrace_start_lock);
+ ftrace_start++;
+ command |= FTRACE_ENABLE_CALLS;
+
+ if (saved_ftrace_func != ftrace_trace_function) {
+ saved_ftrace_func = ftrace_trace_function;
+ command |= FTRACE_UPDATE_TRACE_FUNC;
+ }
+
+ if (!command || !ftrace_enabled)
+ goto out;
+
+ ftrace_run_update_code(command);
+ out:
+ mutex_unlock(&ftrace_start_lock);
+}
+
+static void ftrace_shutdown(void)
+{
+ int command = 0;
+
+ if (unlikely(ftrace_disabled))
+ return;
+
+ mutex_lock(&ftrace_start_lock);
+ ftrace_start--;
+ if (!ftrace_start)
+ command |= FTRACE_DISABLE_CALLS;
+
+ if (saved_ftrace_func != ftrace_trace_function) {
+ saved_ftrace_func = ftrace_trace_function;
+ command |= FTRACE_UPDATE_TRACE_FUNC;
+ }
+
+ if (!command || !ftrace_enabled)
+ goto out;
+
+ ftrace_run_update_code(command);
+ out:
+ mutex_unlock(&ftrace_start_lock);
+}
+
+static void ftrace_startup_sysctl(void)
+{
+ int command = FTRACE_ENABLE_MCOUNT;
+
+ if (unlikely(ftrace_disabled))
+ return;
+
+ mutex_lock(&ftrace_start_lock);
+ /* Force update next time */
+ saved_ftrace_func = NULL;
+ /* ftrace_start is true if we want ftrace running */
+ if (ftrace_start)
+ command |= FTRACE_ENABLE_CALLS;
+
+ ftrace_run_update_code(command);
+ mutex_unlock(&ftrace_start_lock);
+}
+
+static void ftrace_shutdown_sysctl(void)
+{
+ int command = FTRACE_DISABLE_MCOUNT;
+
+ if (unlikely(ftrace_disabled))
+ return;
+
+ mutex_lock(&ftrace_start_lock);
+ /* ftrace_start is true if ftrace is running */
+ if (ftrace_start)
+ command |= FTRACE_DISABLE_CALLS;
+
+ ftrace_run_update_code(command);
+ mutex_unlock(&ftrace_start_lock);
+}
+
+static cycle_t ftrace_update_time;
+static unsigned long ftrace_update_cnt;
+unsigned long ftrace_update_tot_cnt;
+
+static int ftrace_update_code(void)
+{
+ struct dyn_ftrace *p, *t;
+ cycle_t start, stop;
+
+ start = ftrace_now(raw_smp_processor_id());
+ ftrace_update_cnt = 0;
+
+ list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) {
+
+ /* If something went wrong, bail without enabling anything */
+ if (unlikely(ftrace_disabled))
+ return -1;
+
+ list_del_init(&p->list);
+
+ /* convert record (i.e, patch mcount-call with NOP) */
+ if (ftrace_code_disable(p)) {
+ p->flags |= FTRACE_FL_CONVERTED;
+ ftrace_update_cnt++;
+ } else
+ ftrace_free_rec(p);
+ }
+
+ stop = ftrace_now(raw_smp_processor_id());
+ ftrace_update_time = stop - start;
+ ftrace_update_tot_cnt += ftrace_update_cnt;
+
+ return 0;
+}
+
+static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
+{
+ struct ftrace_page *pg;
+ int cnt;
+ int i;
+
+ /* allocate a few pages */
+ ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!ftrace_pages_start)
+ return -1;
+
+ /*
+ * Allocate a few more pages.
+ *
+ * TODO: have some parser search vmlinux before
+ * final linking to find all calls to ftrace.
+ * Then we can:
+ * a) know how many pages to allocate.
+ * and/or
+ * b) set up the table then.
+ *
+ * The dynamic code is still necessary for
+ * modules.
+ */
+
+ pg = ftrace_pages = ftrace_pages_start;
+
+ cnt = num_to_init / ENTRIES_PER_PAGE;
+ pr_info("ftrace: allocating %ld entries in %d pages\n",
+ num_to_init, cnt + 1);
+
+ for (i = 0; i < cnt; i++) {
+ pg->next = (void *)get_zeroed_page(GFP_KERNEL);
+
+ /* If we fail, we'll try later anyway */
+ if (!pg->next)
+ break;
+
+ pg = pg->next;
+ }
+
+ return 0;
+}
+
+enum {
+ FTRACE_ITER_FILTER = (1 << 0),
+ FTRACE_ITER_CONT = (1 << 1),
+ FTRACE_ITER_NOTRACE = (1 << 2),
+ FTRACE_ITER_FAILURES = (1 << 3),
+};
+
+#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
+
+struct ftrace_iterator {
+ loff_t pos;
+ struct ftrace_page *pg;
+ unsigned idx;
+ unsigned flags;
+ unsigned char buffer[FTRACE_BUFF_MAX+1];
+ unsigned buffer_idx;
+ unsigned filtered;
+};
+
+static void *
+t_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct ftrace_iterator *iter = m->private;
+ struct dyn_ftrace *rec = NULL;
+
+ (*pos)++;
+
+ /* should not be called from interrupt context */
+ spin_lock(&ftrace_lock);
+ retry:
+ if (iter->idx >= iter->pg->index) {
+ if (iter->pg->next) {
+ iter->pg = iter->pg->next;
+ iter->idx = 0;
+ goto retry;
+ }
+ } else {
+ rec = &iter->pg->records[iter->idx++];
+ if ((rec->flags & FTRACE_FL_FREE) ||
+
+ (!(iter->flags & FTRACE_ITER_FAILURES) &&
+ (rec->flags & FTRACE_FL_FAILED)) ||
+
+ ((iter->flags & FTRACE_ITER_FAILURES) &&
+ !(rec->flags & FTRACE_FL_FAILED)) ||
+
+ ((iter->flags & FTRACE_ITER_FILTER) &&
+ !(rec->flags & FTRACE_FL_FILTER)) ||
+
+ ((iter->flags & FTRACE_ITER_NOTRACE) &&
+ !(rec->flags & FTRACE_FL_NOTRACE))) {
+ rec = NULL;
+ goto retry;
+ }
+ }
+ spin_unlock(&ftrace_lock);
+
+ iter->pos = *pos;
+
+ return rec;
+}
+
+static void *t_start(struct seq_file *m, loff_t *pos)
+{
+ struct ftrace_iterator *iter = m->private;
+ void *p = NULL;
+ loff_t l = -1;
+
+ if (*pos > iter->pos)
+ *pos = iter->pos;
+
+ l = *pos;
+ p = t_next(m, p, &l);
+
+ return p;
+}
+
+static void t_stop(struct seq_file *m, void *p)
+{
+}
+
+static int t_show(struct seq_file *m, void *v)
+{
+ struct ftrace_iterator *iter = m->private;
+ struct dyn_ftrace *rec = v;
+ char str[KSYM_SYMBOL_LEN];
+ int ret = 0;
+
+ if (!rec)
+ return 0;
+
+ kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+
+ ret = seq_printf(m, "%s\n", str);
+ if (ret < 0) {
+ iter->pos--;
+ iter->idx--;
+ }
+
+ return 0;
+}
+
+static struct seq_operations show_ftrace_seq_ops = {
+ .start = t_start,
+ .next = t_next,
+ .stop = t_stop,
+ .show = t_show,
+};
+
+static int
+ftrace_avail_open(struct inode *inode, struct file *file)
+{
+ struct ftrace_iterator *iter;
+ int ret;
+
+ if (unlikely(ftrace_disabled))
+ return -ENODEV;
+
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ iter->pg = ftrace_pages_start;
+ iter->pos = 0;
+
+ ret = seq_open(file, &show_ftrace_seq_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+
+ m->private = iter;
+ } else {
+ kfree(iter);
+ }
+
+ return ret;
+}
+
+int ftrace_avail_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *m = (struct seq_file *)file->private_data;
+ struct ftrace_iterator *iter = m->private;
+
+ seq_release(inode, file);
+ kfree(iter);
+
+ return 0;
+}
+
+static int
+ftrace_failures_open(struct inode *inode, struct file *file)
+{
+ int ret;
+ struct seq_file *m;
+ struct ftrace_iterator *iter;
+
+ ret = ftrace_avail_open(inode, file);
+ if (!ret) {
+ m = (struct seq_file *)file->private_data;
+ iter = (struct ftrace_iterator *)m->private;
+ iter->flags = FTRACE_ITER_FAILURES;
+ }
+
+ return ret;
+}
+
+
+static void ftrace_filter_reset(int enable)
+{
+ struct ftrace_page *pg;
+ struct dyn_ftrace *rec;
+ unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
+ unsigned i;
+
+ /* should not be called from interrupt context */
+ spin_lock(&ftrace_lock);
+ if (enable)
+ ftrace_filtered = 0;
+ pg = ftrace_pages_start;
+ while (pg) {
+ for (i = 0; i < pg->index; i++) {
+ rec = &pg->records[i];
+ if (rec->flags & FTRACE_FL_FAILED)
+ continue;
+ rec->flags &= ~type;
+ }
+ pg = pg->next;
+ }
+ spin_unlock(&ftrace_lock);
+}
+
+static int
+ftrace_regex_open(struct inode *inode, struct file *file, int enable)
+{
+ struct ftrace_iterator *iter;
+ int ret = 0;
+
+ if (unlikely(ftrace_disabled))
+ return -ENODEV;
+
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ mutex_lock(&ftrace_regex_lock);
+ if ((file->f_mode & FMODE_WRITE) &&
+ !(file->f_flags & O_APPEND))
+ ftrace_filter_reset(enable);
+
+ if (file->f_mode & FMODE_READ) {
+ iter->pg = ftrace_pages_start;
+ iter->pos = 0;
+ iter->flags = enable ? FTRACE_ITER_FILTER :
+ FTRACE_ITER_NOTRACE;
+
+ ret = seq_open(file, &show_ftrace_seq_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = iter;
+ } else
+ kfree(iter);
+ } else
+ file->private_data = iter;
+ mutex_unlock(&ftrace_regex_lock);
+
+ return ret;
+}
+
+static int
+ftrace_filter_open(struct inode *inode, struct file *file)
+{
+ return ftrace_regex_open(inode, file, 1);
+}
+
+static int
+ftrace_notrace_open(struct inode *inode, struct file *file)
+{
+ return ftrace_regex_open(inode, file, 0);
+}
+
+static ssize_t
+ftrace_regex_read(struct file *file, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ if (file->f_mode & FMODE_READ)
+ return seq_read(file, ubuf, cnt, ppos);
+ else
+ return -EPERM;
+}
+
+static loff_t
+ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
+{
+ loff_t ret;
+
+ if (file->f_mode & FMODE_READ)
+ ret = seq_lseek(file, offset, origin);
+ else
+ file->f_pos = ret = 1;
+
+ return ret;
+}
+
+enum {
+ MATCH_FULL,
+ MATCH_FRONT_ONLY,
+ MATCH_MIDDLE_ONLY,
+ MATCH_END_ONLY,
+};
+
+static void
+ftrace_match(unsigned char *buff, int len, int enable)
+{
+ char str[KSYM_SYMBOL_LEN];
+ char *search = NULL;
+ struct ftrace_page *pg;
+ struct dyn_ftrace *rec;
+ int type = MATCH_FULL;
+ unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
+ unsigned i, match = 0, search_len = 0;
+
+ for (i = 0; i < len; i++) {
+ if (buff[i] == '*') {
+ if (!i) {
+ search = buff + i + 1;
+ type = MATCH_END_ONLY;
+ search_len = len - (i + 1);
+ } else {
+ if (type == MATCH_END_ONLY) {
+ type = MATCH_MIDDLE_ONLY;
+ } else {
+ match = i;
+ type = MATCH_FRONT_ONLY;
+ }
+ buff[i] = 0;
+ break;
+ }
+ }
+ }
+
+ /* should not be called from interrupt context */
+ spin_lock(&ftrace_lock);
+ if (enable)
+ ftrace_filtered = 1;
+ pg = ftrace_pages_start;
+ while (pg) {
+ for (i = 0; i < pg->index; i++) {
+ int matched = 0;
+ char *ptr;
+
+ rec = &pg->records[i];
+ if (rec->flags & FTRACE_FL_FAILED)
+ continue;
+ kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+ switch (type) {
+ case MATCH_FULL:
+ if (strcmp(str, buff) == 0)
+ matched = 1;
+ break;
+ case MATCH_FRONT_ONLY:
+ if (memcmp(str, buff, match) == 0)
+ matched = 1;
+ break;
+ case MATCH_MIDDLE_ONLY:
+ if (strstr(str, search))
+ matched = 1;
+ break;
+ case MATCH_END_ONLY:
+ ptr = strstr(str, search);
+ if (ptr && (ptr[search_len] == 0))
+ matched = 1;
+ break;
+ }
+ if (matched)
+ rec->flags |= flag;
+ }
+ pg = pg->next;
+ }
+ spin_unlock(&ftrace_lock);
+}
+
+static ssize_t
+ftrace_regex_write(struct file *file, const char __user *ubuf,
+ size_t cnt, loff_t *ppos, int enable)
+{
+ struct ftrace_iterator *iter;
+ char ch;
+ size_t read = 0;
+ ssize_t ret;
+
+ if (!cnt || cnt < 0)
+ return 0;
+
+ mutex_lock(&ftrace_regex_lock);
+
+ if (file->f_mode & FMODE_READ) {
+ struct seq_file *m = file->private_data;
+ iter = m->private;
+ } else
+ iter = file->private_data;
+
+ if (!*ppos) {
+ iter->flags &= ~FTRACE_ITER_CONT;
+ iter->buffer_idx = 0;
+ }
+
+ ret = get_user(ch, ubuf++);
+ if (ret)
+ goto out;
+ read++;
+ cnt--;
+
+ if (!(iter->flags & ~FTRACE_ITER_CONT)) {
+ /* skip white space */
+ while (cnt && isspace(ch)) {
+ ret = get_user(ch, ubuf++);
+ if (ret)
+ goto out;
+ read++;
+ cnt--;
+ }
+
+ if (isspace(ch)) {
+ file->f_pos += read;
+ ret = read;
+ goto out;
+ }
+
+ iter->buffer_idx = 0;
+ }
+
+ while (cnt && !isspace(ch)) {
+ if (iter->buffer_idx < FTRACE_BUFF_MAX)
+ iter->buffer[iter->buffer_idx++] = ch;
+ else {
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = get_user(ch, ubuf++);
+ if (ret)
+ goto out;
+ read++;
+ cnt--;
+ }
+
+ if (isspace(ch)) {
+ iter->filtered++;
+ iter->buffer[iter->buffer_idx] = 0;
+ ftrace_match(iter->buffer, iter->buffer_idx, enable);
+ iter->buffer_idx = 0;
+ } else
+ iter->flags |= FTRACE_ITER_CONT;
+
+
+ file->f_pos += read;
+
+ ret = read;
+ out:
+ mutex_unlock(&ftrace_regex_lock);
+
+ return ret;
+}
+
+static ssize_t
+ftrace_filter_write(struct file *file, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
+}
+
+static ssize_t
+ftrace_notrace_write(struct file *file, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ return ftrace_regex_write(file, ubuf, cnt, ppos, 0);
+}
+
+static void
+ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
+{
+ if (unlikely(ftrace_disabled))
+ return;
+
+ mutex_lock(&ftrace_regex_lock);
+ if (reset)
+ ftrace_filter_reset(enable);
+ if (buf)
+ ftrace_match(buf, len, enable);
+ mutex_unlock(&ftrace_regex_lock);
+}
+
+/**
+ * ftrace_set_filter - set a function to filter on in ftrace
+ * @buf - the string that holds the function filter text.
+ * @len - the length of the string.
+ * @reset - non zero to reset all filters before applying this filter.
+ *
+ * Filters denote which functions should be enabled when tracing is enabled.
+ * If @buf is NULL and reset is set, all functions will be enabled for tracing.
+ */
+void ftrace_set_filter(unsigned char *buf, int len, int reset)
+{
+ ftrace_set_regex(buf, len, reset, 1);
+}
+
+/**
+ * ftrace_set_notrace - set a function to not trace in ftrace
+ * @buf - the string that holds the function notrace text.
+ * @len - the length of the string.
+ * @reset - non zero to reset all filters before applying this filter.
+ *
+ * Notrace Filters denote which functions should not be enabled when tracing
+ * is enabled. If @buf is NULL and reset is set, all functions will be enabled
+ * for tracing.
+ */
+void ftrace_set_notrace(unsigned char *buf, int len, int reset)
+{
+ ftrace_set_regex(buf, len, reset, 0);
+}
+
+static int
+ftrace_regex_release(struct inode *inode, struct file *file, int enable)
+{
+ struct seq_file *m = (struct seq_file *)file->private_data;
+ struct ftrace_iterator *iter;
+
+ mutex_lock(&ftrace_regex_lock);
+ if (file->f_mode & FMODE_READ) {
+ iter = m->private;
+
+ seq_release(inode, file);
+ } else
+ iter = file->private_data;
+
+ if (iter->buffer_idx) {
+ iter->filtered++;
+ iter->buffer[iter->buffer_idx] = 0;
+ ftrace_match(iter->buffer, iter->buffer_idx, enable);
+ }
+
+ mutex_lock(&ftrace_sysctl_lock);
+ mutex_lock(&ftrace_start_lock);
+ if (ftrace_start && ftrace_enabled)
+ ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+ mutex_unlock(&ftrace_start_lock);
+ mutex_unlock(&ftrace_sysctl_lock);
+
+ kfree(iter);
+ mutex_unlock(&ftrace_regex_lock);
+ return 0;
+}
+
+static int
+ftrace_filter_release(struct inode *inode, struct file *file)
+{
+ return ftrace_regex_release(inode, file, 1);
+}
+
+static int
+ftrace_notrace_release(struct inode *inode, struct file *file)
+{
+ return ftrace_regex_release(inode, file, 0);
+}
+
+static struct file_operations ftrace_avail_fops = {
+ .open = ftrace_avail_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = ftrace_avail_release,
+};
+
+static struct file_operations ftrace_failures_fops = {
+ .open = ftrace_failures_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = ftrace_avail_release,
+};
+
+static struct file_operations ftrace_filter_fops = {
+ .open = ftrace_filter_open,
+ .read = ftrace_regex_read,
+ .write = ftrace_filter_write,
+ .llseek = ftrace_regex_lseek,
+ .release = ftrace_filter_release,
+};
+
+static struct file_operations ftrace_notrace_fops = {
+ .open = ftrace_notrace_open,
+ .read = ftrace_regex_read,
+ .write = ftrace_notrace_write,
+ .llseek = ftrace_regex_lseek,
+ .release = ftrace_notrace_release,
+};
+
+static __init int ftrace_init_debugfs(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+
+ entry = debugfs_create_file("available_filter_functions", 0444,
+ d_tracer, NULL, &ftrace_avail_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'available_filter_functions' entry\n");
+
+ entry = debugfs_create_file("failures", 0444,
+ d_tracer, NULL, &ftrace_failures_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'failures' entry\n");
+
+ entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer,
+ NULL, &ftrace_filter_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'set_ftrace_filter' entry\n");
+
+ entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer,
+ NULL, &ftrace_notrace_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'set_ftrace_notrace' entry\n");
+
+ return 0;
+}
+
+fs_initcall(ftrace_init_debugfs);
+
+static int ftrace_convert_nops(unsigned long *start,
+ unsigned long *end)
+{
+ unsigned long *p;
+ unsigned long addr;
+ unsigned long flags;
+
+ mutex_lock(&ftrace_start_lock);
+ p = start;
+ while (p < end) {
+ addr = ftrace_call_adjust(*p++);
+ ftrace_record_ip(addr);
+ }
+
+ /* disable interrupts to prevent kstop machine */
+ local_irq_save(flags);
+ ftrace_update_code();
+ local_irq_restore(flags);
+ mutex_unlock(&ftrace_start_lock);
+
+ return 0;
+}
+
+void ftrace_init_module(unsigned long *start, unsigned long *end)
+{
+ if (ftrace_disabled || start == end)
+ return;
+ ftrace_convert_nops(start, end);
+}
+
+extern unsigned long __start_mcount_loc[];
+extern unsigned long __stop_mcount_loc[];
+
+void __init ftrace_init(void)
+{
+ unsigned long count, addr, flags;
+ int ret;
+
+ /* Keep the ftrace pointer to the stub */
+ addr = (unsigned long)ftrace_stub;
+
+ local_irq_save(flags);
+ ftrace_dyn_arch_init(&addr);
+ local_irq_restore(flags);
+
+ /* ftrace_dyn_arch_init places the return code in addr */
+ if (addr)
+ goto failed;
+
+ count = __stop_mcount_loc - __start_mcount_loc;
+
+ ret = ftrace_dyn_table_alloc(count);
+ if (ret)
+ goto failed;
+
+ last_ftrace_enabled = ftrace_enabled = 1;
+
+ ret = ftrace_convert_nops(__start_mcount_loc,
+ __stop_mcount_loc);
+
+ return;
+ failed:
+ ftrace_disabled = 1;
+}
+
+#else
+
+static int __init ftrace_nodyn_init(void)
+{
+ ftrace_enabled = 1;
+ return 0;
+}
+device_initcall(ftrace_nodyn_init);
+
+# define ftrace_startup() do { } while (0)
+# define ftrace_shutdown() do { } while (0)
+# define ftrace_startup_sysctl() do { } while (0)
+# define ftrace_shutdown_sysctl() do { } while (0)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+/**
+ * ftrace_kill - kill ftrace
+ *
+ * This function should be used by panic code. It stops ftrace
+ * but in a not so nice way. If you need to simply kill ftrace
+ * from a non-atomic section, use ftrace_kill.
+ */
+void ftrace_kill(void)
+{
+ ftrace_disabled = 1;
+ ftrace_enabled = 0;
+ clear_ftrace_function();
+}
+
+/**
+ * register_ftrace_function - register a function for profiling
+ * @ops - ops structure that holds the function for profiling.
+ *
+ * Register a function to be called by all functions in the
+ * kernel.
+ *
+ * Note: @ops->func and all the functions it calls must be labeled
+ * with "notrace", otherwise it will go into a
+ * recursive loop.
+ */
+int register_ftrace_function(struct ftrace_ops *ops)
+{
+ int ret;
+
+ if (unlikely(ftrace_disabled))
+ return -1;
+
+ mutex_lock(&ftrace_sysctl_lock);
+ ret = __register_ftrace_function(ops);
+ ftrace_startup();
+ mutex_unlock(&ftrace_sysctl_lock);
+
+ return ret;
+}
+
+/**
+ * unregister_ftrace_function - unresgister a function for profiling.
+ * @ops - ops structure that holds the function to unregister
+ *
+ * Unregister a function that was added to be called by ftrace profiling.
+ */
+int unregister_ftrace_function(struct ftrace_ops *ops)
+{
+ int ret;
+
+ mutex_lock(&ftrace_sysctl_lock);
+ ret = __unregister_ftrace_function(ops);
+ ftrace_shutdown();
+ mutex_unlock(&ftrace_sysctl_lock);
+
+ return ret;
+}
+
+int
+ftrace_enable_sysctl(struct ctl_table *table, int write,
+ struct file *file, void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret;
+
+ if (unlikely(ftrace_disabled))
+ return -ENODEV;
+
+ mutex_lock(&ftrace_sysctl_lock);
+
+ ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
+
+ if (ret || !write || (last_ftrace_enabled == ftrace_enabled))
+ goto out;
+
+ last_ftrace_enabled = ftrace_enabled;
+
+ if (ftrace_enabled) {
+
+ ftrace_startup_sysctl();
+
+ /* we are starting ftrace again */
+ if (ftrace_list != &ftrace_list_end) {
+ if (ftrace_list->next == &ftrace_list_end)
+ ftrace_trace_function = ftrace_list->func;
+ else
+ ftrace_trace_function = ftrace_list_func;
+ }
+
+ } else {
+ /* stopping ftrace calls (just send to ftrace_stub) */
+ ftrace_trace_function = ftrace_stub;
+
+ ftrace_shutdown_sysctl();
+ }
+
+ out:
+ mutex_unlock(&ftrace_sysctl_lock);
+ return ret;
+}
+
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
new file mode 100644
index 0000000..71202ac
--- /dev/null
+++ b/kernel/trace/ring_buffer.c
@@ -0,0 +1,2201 @@
+/*
+ * Generic ring buffer
+ *
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ */
+#include <linux/ring_buffer.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+#include <linux/sched.h> /* used for sched_clock() (for now) */
+#include <linux/init.h>
+#include <linux/hash.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+
+#include "trace.h"
+
+/* Global flag to disable all recording to ring buffers */
+static int ring_buffers_off __read_mostly;
+
+/**
+ * tracing_on - enable all tracing buffers
+ *
+ * This function enables all tracing buffers that may have been
+ * disabled with tracing_off.
+ */
+void tracing_on(void)
+{
+ ring_buffers_off = 0;
+}
+
+/**
+ * tracing_off - turn off all tracing buffers
+ *
+ * This function stops all tracing buffers from recording data.
+ * It does not disable any overhead the tracers themselves may
+ * be causing. This function simply causes all recording to
+ * the ring buffers to fail.
+ */
+void tracing_off(void)
+{
+ ring_buffers_off = 1;
+}
+
+/* Up this if you want to test the TIME_EXTENTS and normalization */
+#define DEBUG_SHIFT 0
+
+/* FIXME!!! */
+u64 ring_buffer_time_stamp(int cpu)
+{
+ u64 time;
+
+ preempt_disable_notrace();
+ /* shift to debug/test normalization and TIME_EXTENTS */
+ time = sched_clock() << DEBUG_SHIFT;
+ preempt_enable_notrace();
+
+ return time;
+}
+
+void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
+{
+ /* Just stupid testing the normalize function and deltas */
+ *ts >>= DEBUG_SHIFT;
+}
+
+#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
+#define RB_ALIGNMENT_SHIFT 2
+#define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
+#define RB_MAX_SMALL_DATA 28
+
+enum {
+ RB_LEN_TIME_EXTEND = 8,
+ RB_LEN_TIME_STAMP = 16,
+};
+
+/* inline for ring buffer fast paths */
+static inline unsigned
+rb_event_length(struct ring_buffer_event *event)
+{
+ unsigned length;
+
+ switch (event->type) {
+ case RINGBUF_TYPE_PADDING:
+ /* undefined */
+ return -1;
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ return RB_LEN_TIME_EXTEND;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ return RB_LEN_TIME_STAMP;
+
+ case RINGBUF_TYPE_DATA:
+ if (event->len)
+ length = event->len << RB_ALIGNMENT_SHIFT;
+ else
+ length = event->array[0];
+ return length + RB_EVNT_HDR_SIZE;
+ default:
+ BUG();
+ }
+ /* not hit */
+ return 0;
+}
+
+/**
+ * ring_buffer_event_length - return the length of the event
+ * @event: the event to get the length of
+ */
+unsigned ring_buffer_event_length(struct ring_buffer_event *event)
+{
+ return rb_event_length(event);
+}
+
+/* inline for ring buffer fast paths */
+static inline void *
+rb_event_data(struct ring_buffer_event *event)
+{
+ BUG_ON(event->type != RINGBUF_TYPE_DATA);
+ /* If length is in len field, then array[0] has the data */
+ if (event->len)
+ return (void *)&event->array[0];
+ /* Otherwise length is in array[0] and array[1] has the data */
+ return (void *)&event->array[1];
+}
+
+/**
+ * ring_buffer_event_data - return the data of the event
+ * @event: the event to get the data from
+ */
+void *ring_buffer_event_data(struct ring_buffer_event *event)
+{
+ return rb_event_data(event);
+}
+
+#define for_each_buffer_cpu(buffer, cpu) \
+ for_each_cpu_mask(cpu, buffer->cpumask)
+
+#define TS_SHIFT 27
+#define TS_MASK ((1ULL << TS_SHIFT) - 1)
+#define TS_DELTA_TEST (~TS_MASK)
+
+/*
+ * This hack stolen from mm/slob.c.
+ * We can store per page timing information in the page frame of the page.
+ * Thanks to Peter Zijlstra for suggesting this idea.
+ */
+struct buffer_page {
+ u64 time_stamp; /* page time stamp */
+ local_t write; /* index for next write */
+ local_t commit; /* write commited index */
+ unsigned read; /* index for next read */
+ struct list_head list; /* list of free pages */
+ void *page; /* Actual data page */
+};
+
+/*
+ * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
+ * this issue out.
+ */
+static inline void free_buffer_page(struct buffer_page *bpage)
+{
+ if (bpage->page)
+ free_page((unsigned long)bpage->page);
+ kfree(bpage);
+}
+
+/*
+ * We need to fit the time_stamp delta into 27 bits.
+ */
+static inline int test_time_stamp(u64 delta)
+{
+ if (delta & TS_DELTA_TEST)
+ return 1;
+ return 0;
+}
+
+#define BUF_PAGE_SIZE PAGE_SIZE
+
+/*
+ * head_page == tail_page && head == tail then buffer is empty.
+ */
+struct ring_buffer_per_cpu {
+ int cpu;
+ struct ring_buffer *buffer;
+ spinlock_t lock;
+ struct lock_class_key lock_key;
+ struct list_head pages;
+ struct buffer_page *head_page; /* read from head */
+ struct buffer_page *tail_page; /* write to tail */
+ struct buffer_page *commit_page; /* commited pages */
+ struct buffer_page *reader_page;
+ unsigned long overrun;
+ unsigned long entries;
+ u64 write_stamp;
+ u64 read_stamp;
+ atomic_t record_disabled;
+};
+
+struct ring_buffer {
+ unsigned long size;
+ unsigned pages;
+ unsigned flags;
+ int cpus;
+ cpumask_t cpumask;
+ atomic_t record_disabled;
+
+ struct mutex mutex;
+
+ struct ring_buffer_per_cpu **buffers;
+};
+
+struct ring_buffer_iter {
+ struct ring_buffer_per_cpu *cpu_buffer;
+ unsigned long head;
+ struct buffer_page *head_page;
+ u64 read_stamp;
+};
+
+#define RB_WARN_ON(buffer, cond) \
+ do { \
+ if (unlikely(cond)) { \
+ atomic_inc(&buffer->record_disabled); \
+ WARN_ON(1); \
+ } \
+ } while (0)
+
+#define RB_WARN_ON_RET(buffer, cond) \
+ do { \
+ if (unlikely(cond)) { \
+ atomic_inc(&buffer->record_disabled); \
+ WARN_ON(1); \
+ return -1; \
+ } \
+ } while (0)
+
+#define RB_WARN_ON_ONCE(buffer, cond) \
+ do { \
+ static int once; \
+ if (unlikely(cond) && !once) { \
+ once++; \
+ atomic_inc(&buffer->record_disabled); \
+ WARN_ON(1); \
+ } \
+ } while (0)
+
+/**
+ * check_pages - integrity check of buffer pages
+ * @cpu_buffer: CPU buffer with pages to test
+ *
+ * As a safty measure we check to make sure the data pages have not
+ * been corrupted.
+ */
+static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ struct list_head *head = &cpu_buffer->pages;
+ struct buffer_page *page, *tmp;
+
+ RB_WARN_ON_RET(cpu_buffer, head->next->prev != head);
+ RB_WARN_ON_RET(cpu_buffer, head->prev->next != head);
+
+ list_for_each_entry_safe(page, tmp, head, list) {
+ RB_WARN_ON_RET(cpu_buffer,
+ page->list.next->prev != &page->list);
+ RB_WARN_ON_RET(cpu_buffer,
+ page->list.prev->next != &page->list);
+ }
+
+ return 0;
+}
+
+static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
+ unsigned nr_pages)
+{
+ struct list_head *head = &cpu_buffer->pages;
+ struct buffer_page *page, *tmp;
+ unsigned long addr;
+ LIST_HEAD(pages);
+ unsigned i;
+
+ for (i = 0; i < nr_pages; i++) {
+ page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+ GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
+ if (!page)
+ goto free_pages;
+ list_add(&page->list, &pages);
+
+ addr = __get_free_page(GFP_KERNEL);
+ if (!addr)
+ goto free_pages;
+ page->page = (void *)addr;
+ }
+
+ list_splice(&pages, head);
+
+ rb_check_pages(cpu_buffer);
+
+ return 0;
+
+ free_pages:
+ list_for_each_entry_safe(page, tmp, &pages, list) {
+ list_del_init(&page->list);
+ free_buffer_page(page);
+ }
+ return -ENOMEM;
+}
+
+static struct ring_buffer_per_cpu *
+rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct buffer_page *page;
+ unsigned long addr;
+ int ret;
+
+ cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!cpu_buffer)
+ return NULL;
+
+ cpu_buffer->cpu = cpu;
+ cpu_buffer->buffer = buffer;
+ spin_lock_init(&cpu_buffer->lock);
+ INIT_LIST_HEAD(&cpu_buffer->pages);
+
+ page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!page)
+ goto fail_free_buffer;
+
+ cpu_buffer->reader_page = page;
+ addr = __get_free_page(GFP_KERNEL);
+ if (!addr)
+ goto fail_free_reader;
+ page->page = (void *)addr;
+
+ INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+
+ ret = rb_allocate_pages(cpu_buffer, buffer->pages);
+ if (ret < 0)
+ goto fail_free_reader;
+
+ cpu_buffer->head_page
+ = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+ cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
+
+ return cpu_buffer;
+
+ fail_free_reader:
+ free_buffer_page(cpu_buffer->reader_page);
+
+ fail_free_buffer:
+ kfree(cpu_buffer);
+ return NULL;
+}
+
+static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ struct list_head *head = &cpu_buffer->pages;
+ struct buffer_page *page, *tmp;
+
+ list_del_init(&cpu_buffer->reader_page->list);
+ free_buffer_page(cpu_buffer->reader_page);
+
+ list_for_each_entry_safe(page, tmp, head, list) {
+ list_del_init(&page->list);
+ free_buffer_page(page);
+ }
+ kfree(cpu_buffer);
+}
+
+/*
+ * Causes compile errors if the struct buffer_page gets bigger
+ * than the struct page.
+ */
+extern int ring_buffer_page_too_big(void);
+
+/**
+ * ring_buffer_alloc - allocate a new ring_buffer
+ * @size: the size in bytes that is needed.
+ * @flags: attributes to set for the ring buffer.
+ *
+ * Currently the only flag that is available is the RB_FL_OVERWRITE
+ * flag. This flag means that the buffer will overwrite old data
+ * when the buffer wraps. If this flag is not set, the buffer will
+ * drop data when the tail hits the head.
+ */
+struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
+{
+ struct ring_buffer *buffer;
+ int bsize;
+ int cpu;
+
+ /* Paranoid! Optimizes out when all is well */
+ if (sizeof(struct buffer_page) > sizeof(struct page))
+ ring_buffer_page_too_big();
+
+
+ /* keep it in its own cache line */
+ buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
+ GFP_KERNEL);
+ if (!buffer)
+ return NULL;
+
+ buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+ buffer->flags = flags;
+
+ /* need at least two pages */
+ if (buffer->pages == 1)
+ buffer->pages++;
+
+ buffer->cpumask = cpu_possible_map;
+ buffer->cpus = nr_cpu_ids;
+
+ bsize = sizeof(void *) * nr_cpu_ids;
+ buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
+ GFP_KERNEL);
+ if (!buffer->buffers)
+ goto fail_free_buffer;
+
+ for_each_buffer_cpu(buffer, cpu) {
+ buffer->buffers[cpu] =
+ rb_allocate_cpu_buffer(buffer, cpu);
+ if (!buffer->buffers[cpu])
+ goto fail_free_buffers;
+ }
+
+ mutex_init(&buffer->mutex);
+
+ return buffer;
+
+ fail_free_buffers:
+ for_each_buffer_cpu(buffer, cpu) {
+ if (buffer->buffers[cpu])
+ rb_free_cpu_buffer(buffer->buffers[cpu]);
+ }
+ kfree(buffer->buffers);
+
+ fail_free_buffer:
+ kfree(buffer);
+ return NULL;
+}
+
+/**
+ * ring_buffer_free - free a ring buffer.
+ * @buffer: the buffer to free.
+ */
+void
+ring_buffer_free(struct ring_buffer *buffer)
+{
+ int cpu;
+
+ for_each_buffer_cpu(buffer, cpu)
+ rb_free_cpu_buffer(buffer->buffers[cpu]);
+
+ kfree(buffer);
+}
+
+static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
+
+static void
+rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
+{
+ struct buffer_page *page;
+ struct list_head *p;
+ unsigned i;
+
+ atomic_inc(&cpu_buffer->record_disabled);
+ synchronize_sched();
+
+ for (i = 0; i < nr_pages; i++) {
+ BUG_ON(list_empty(&cpu_buffer->pages));
+ p = cpu_buffer->pages.next;
+ page = list_entry(p, struct buffer_page, list);
+ list_del_init(&page->list);
+ free_buffer_page(page);
+ }
+ BUG_ON(list_empty(&cpu_buffer->pages));
+
+ rb_reset_cpu(cpu_buffer);
+
+ rb_check_pages(cpu_buffer);
+
+ atomic_dec(&cpu_buffer->record_disabled);
+
+}
+
+static void
+rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
+ struct list_head *pages, unsigned nr_pages)
+{
+ struct buffer_page *page;
+ struct list_head *p;
+ unsigned i;
+
+ atomic_inc(&cpu_buffer->record_disabled);
+ synchronize_sched();
+
+ for (i = 0; i < nr_pages; i++) {
+ BUG_ON(list_empty(pages));
+ p = pages->next;
+ page = list_entry(p, struct buffer_page, list);
+ list_del_init(&page->list);
+ list_add_tail(&page->list, &cpu_buffer->pages);
+ }
+ rb_reset_cpu(cpu_buffer);
+
+ rb_check_pages(cpu_buffer);
+
+ atomic_dec(&cpu_buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_resize - resize the ring buffer
+ * @buffer: the buffer to resize.
+ * @size: the new size.
+ *
+ * The tracer is responsible for making sure that the buffer is
+ * not being used while changing the size.
+ * Note: We may be able to change the above requirement by using
+ * RCU synchronizations.
+ *
+ * Minimum size is 2 * BUF_PAGE_SIZE.
+ *
+ * Returns -1 on failure.
+ */
+int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ unsigned nr_pages, rm_pages, new_pages;
+ struct buffer_page *page, *tmp;
+ unsigned long buffer_size;
+ unsigned long addr;
+ LIST_HEAD(pages);
+ int i, cpu;
+
+ /*
+ * Always succeed at resizing a non-existent buffer:
+ */
+ if (!buffer)
+ return size;
+
+ size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+ size *= BUF_PAGE_SIZE;
+ buffer_size = buffer->pages * BUF_PAGE_SIZE;
+
+ /* we need a minimum of two pages */
+ if (size < BUF_PAGE_SIZE * 2)
+ size = BUF_PAGE_SIZE * 2;
+
+ if (size == buffer_size)
+ return size;
+
+ mutex_lock(&buffer->mutex);
+
+ nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+
+ if (size < buffer_size) {
+
+ /* easy case, just free pages */
+ BUG_ON(nr_pages >= buffer->pages);
+
+ rm_pages = buffer->pages - nr_pages;
+
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ rb_remove_pages(cpu_buffer, rm_pages);
+ }
+ goto out;
+ }
+
+ /*
+ * This is a bit more difficult. We only want to add pages
+ * when we can allocate enough for all CPUs. We do this
+ * by allocating all the pages and storing them on a local
+ * link list. If we succeed in our allocation, then we
+ * add these pages to the cpu_buffers. Otherwise we just free
+ * them all and return -ENOMEM;
+ */
+ BUG_ON(nr_pages <= buffer->pages);
+ new_pages = nr_pages - buffer->pages;
+
+ for_each_buffer_cpu(buffer, cpu) {
+ for (i = 0; i < new_pages; i++) {
+ page = kzalloc_node(ALIGN(sizeof(*page),
+ cache_line_size()),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!page)
+ goto free_pages;
+ list_add(&page->list, &pages);
+ addr = __get_free_page(GFP_KERNEL);
+ if (!addr)
+ goto free_pages;
+ page->page = (void *)addr;
+ }
+ }
+
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ rb_insert_pages(cpu_buffer, &pages, new_pages);
+ }
+
+ BUG_ON(!list_empty(&pages));
+
+ out:
+ buffer->pages = nr_pages;
+ mutex_unlock(&buffer->mutex);
+
+ return size;
+
+ free_pages:
+ list_for_each_entry_safe(page, tmp, &pages, list) {
+ list_del_init(&page->list);
+ free_buffer_page(page);
+ }
+ mutex_unlock(&buffer->mutex);
+ return -ENOMEM;
+}
+
+static inline int rb_null_event(struct ring_buffer_event *event)
+{
+ return event->type == RINGBUF_TYPE_PADDING;
+}
+
+static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
+{
+ return page->page + index;
+}
+
+static inline struct ring_buffer_event *
+rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ return __rb_page_index(cpu_buffer->reader_page,
+ cpu_buffer->reader_page->read);
+}
+
+static inline struct ring_buffer_event *
+rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ return __rb_page_index(cpu_buffer->head_page,
+ cpu_buffer->head_page->read);
+}
+
+static inline struct ring_buffer_event *
+rb_iter_head_event(struct ring_buffer_iter *iter)
+{
+ return __rb_page_index(iter->head_page, iter->head);
+}
+
+static inline unsigned rb_page_write(struct buffer_page *bpage)
+{
+ return local_read(&bpage->write);
+}
+
+static inline unsigned rb_page_commit(struct buffer_page *bpage)
+{
+ return local_read(&bpage->commit);
+}
+
+/* Size is determined by what has been commited */
+static inline unsigned rb_page_size(struct buffer_page *bpage)
+{
+ return rb_page_commit(bpage);
+}
+
+static inline unsigned
+rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ return rb_page_commit(cpu_buffer->commit_page);
+}
+
+static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ return rb_page_commit(cpu_buffer->head_page);
+}
+
+/*
+ * When the tail hits the head and the buffer is in overwrite mode,
+ * the head jumps to the next page and all content on the previous
+ * page is discarded. But before doing so, we update the overrun
+ * variable of the buffer.
+ */
+static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ struct ring_buffer_event *event;
+ unsigned long head;
+
+ for (head = 0; head < rb_head_size(cpu_buffer);
+ head += rb_event_length(event)) {
+
+ event = __rb_page_index(cpu_buffer->head_page, head);
+ BUG_ON(rb_null_event(event));
+ /* Only count data entries */
+ if (event->type != RINGBUF_TYPE_DATA)
+ continue;
+ cpu_buffer->overrun++;
+ cpu_buffer->entries--;
+ }
+}
+
+static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_page **page)
+{
+ struct list_head *p = (*page)->list.next;
+
+ if (p == &cpu_buffer->pages)
+ p = p->next;
+
+ *page = list_entry(p, struct buffer_page, list);
+}
+
+static inline unsigned
+rb_event_index(struct ring_buffer_event *event)
+{
+ unsigned long addr = (unsigned long)event;
+
+ return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
+}
+
+static inline int
+rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event)
+{
+ unsigned long addr = (unsigned long)event;
+ unsigned long index;
+
+ index = rb_event_index(event);
+ addr &= PAGE_MASK;
+
+ return cpu_buffer->commit_page->page == (void *)addr &&
+ rb_commit_index(cpu_buffer) == index;
+}
+
+static inline void
+rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event)
+{
+ unsigned long addr = (unsigned long)event;
+ unsigned long index;
+
+ index = rb_event_index(event);
+ addr &= PAGE_MASK;
+
+ while (cpu_buffer->commit_page->page != (void *)addr) {
+ RB_WARN_ON(cpu_buffer,
+ cpu_buffer->commit_page == cpu_buffer->tail_page);
+ cpu_buffer->commit_page->commit =
+ cpu_buffer->commit_page->write;
+ rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
+ cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+ }
+
+ /* Now set the commit to the event's index */
+ local_set(&cpu_buffer->commit_page->commit, index);
+}
+
+static inline void
+rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ /*
+ * We only race with interrupts and NMIs on this CPU.
+ * If we own the commit event, then we can commit
+ * all others that interrupted us, since the interruptions
+ * are in stack format (they finish before they come
+ * back to us). This allows us to do a simple loop to
+ * assign the commit to the tail.
+ */
+ again:
+ while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
+ cpu_buffer->commit_page->commit =
+ cpu_buffer->commit_page->write;
+ rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
+ cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+ /* add barrier to keep gcc from optimizing too much */
+ barrier();
+ }
+ while (rb_commit_index(cpu_buffer) !=
+ rb_page_write(cpu_buffer->commit_page)) {
+ cpu_buffer->commit_page->commit =
+ cpu_buffer->commit_page->write;
+ barrier();
+ }
+
+ /* again, keep gcc from optimizing */
+ barrier();
+
+ /*
+ * If an interrupt came in just after the first while loop
+ * and pushed the tail page forward, we will be left with
+ * a dangling commit that will never go forward.
+ */
+ if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
+ goto again;
+}
+
+static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
+ cpu_buffer->reader_page->read = 0;
+}
+
+static inline void rb_inc_iter(struct ring_buffer_iter *iter)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+ /*
+ * The iterator could be on the reader page (it starts there).
+ * But the head could have moved, since the reader was
+ * found. Check for this case and assign the iterator
+ * to the head page instead of next.
+ */
+ if (iter->head_page == cpu_buffer->reader_page)
+ iter->head_page = cpu_buffer->head_page;
+ else
+ rb_inc_page(cpu_buffer, &iter->head_page);
+
+ iter->read_stamp = iter->head_page->time_stamp;
+ iter->head = 0;
+}
+
+/**
+ * ring_buffer_update_event - update event type and data
+ * @event: the even to update
+ * @type: the type of event
+ * @length: the size of the event field in the ring buffer
+ *
+ * Update the type and data fields of the event. The length
+ * is the actual size that is written to the ring buffer,
+ * and with this, we can determine what to place into the
+ * data field.
+ */
+static inline void
+rb_update_event(struct ring_buffer_event *event,
+ unsigned type, unsigned length)
+{
+ event->type = type;
+
+ switch (type) {
+
+ case RINGBUF_TYPE_PADDING:
+ break;
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ event->len =
+ (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
+ >> RB_ALIGNMENT_SHIFT;
+ break;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ event->len =
+ (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
+ >> RB_ALIGNMENT_SHIFT;
+ break;
+
+ case RINGBUF_TYPE_DATA:
+ length -= RB_EVNT_HDR_SIZE;
+ if (length > RB_MAX_SMALL_DATA) {
+ event->len = 0;
+ event->array[0] = length;
+ } else
+ event->len =
+ (length + (RB_ALIGNMENT-1))
+ >> RB_ALIGNMENT_SHIFT;
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline unsigned rb_calculate_event_length(unsigned length)
+{
+ struct ring_buffer_event event; /* Used only for sizeof array */
+
+ /* zero length can cause confusions */
+ if (!length)
+ length = 1;
+
+ if (length > RB_MAX_SMALL_DATA)
+ length += sizeof(event.array[0]);
+
+ length += RB_EVNT_HDR_SIZE;
+ length = ALIGN(length, RB_ALIGNMENT);
+
+ return length;
+}
+
+static struct ring_buffer_event *
+__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
+ unsigned type, unsigned long length, u64 *ts)
+{
+ struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
+ unsigned long tail, write;
+ struct ring_buffer *buffer = cpu_buffer->buffer;
+ struct ring_buffer_event *event;
+ unsigned long flags;
+
+ commit_page = cpu_buffer->commit_page;
+ /* we just need to protect against interrupts */
+ barrier();
+ tail_page = cpu_buffer->tail_page;
+ write = local_add_return(length, &tail_page->write);
+ tail = write - length;
+
+ /* See if we shot pass the end of this buffer page */
+ if (write > BUF_PAGE_SIZE) {
+ struct buffer_page *next_page = tail_page;
+
+ spin_lock_irqsave(&cpu_buffer->lock, flags);
+
+ rb_inc_page(cpu_buffer, &next_page);
+
+ head_page = cpu_buffer->head_page;
+ reader_page = cpu_buffer->reader_page;
+
+ /* we grabbed the lock before incrementing */
+ RB_WARN_ON(cpu_buffer, next_page == reader_page);
+
+ /*
+ * If for some reason, we had an interrupt storm that made
+ * it all the way around the buffer, bail, and warn
+ * about it.
+ */
+ if (unlikely(next_page == commit_page)) {
+ WARN_ON_ONCE(1);
+ goto out_unlock;
+ }
+
+ if (next_page == head_page) {
+ if (!(buffer->flags & RB_FL_OVERWRITE)) {
+ /* reset write */
+ if (tail <= BUF_PAGE_SIZE)
+ local_set(&tail_page->write, tail);
+ goto out_unlock;
+ }
+
+ /* tail_page has not moved yet? */
+ if (tail_page == cpu_buffer->tail_page) {
+ /* count overflows */
+ rb_update_overflow(cpu_buffer);
+
+ rb_inc_page(cpu_buffer, &head_page);
+ cpu_buffer->head_page = head_page;
+ cpu_buffer->head_page->read = 0;
+ }
+ }
+
+ /*
+ * If the tail page is still the same as what we think
+ * it is, then it is up to us to update the tail
+ * pointer.
+ */
+ if (tail_page == cpu_buffer->tail_page) {
+ local_set(&next_page->write, 0);
+ local_set(&next_page->commit, 0);
+ cpu_buffer->tail_page = next_page;
+
+ /* reread the time stamp */
+ *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
+ cpu_buffer->tail_page->time_stamp = *ts;
+ }
+
+ /*
+ * The actual tail page has moved forward.
+ */
+ if (tail < BUF_PAGE_SIZE) {
+ /* Mark the rest of the page with padding */
+ event = __rb_page_index(tail_page, tail);
+ event->type = RINGBUF_TYPE_PADDING;
+ }
+
+ if (tail <= BUF_PAGE_SIZE)
+ /* Set the write back to the previous setting */
+ local_set(&tail_page->write, tail);
+
+ /*
+ * If this was a commit entry that failed,
+ * increment that too
+ */
+ if (tail_page == cpu_buffer->commit_page &&
+ tail == rb_commit_index(cpu_buffer)) {
+ rb_set_commit_to_write(cpu_buffer);
+ }
+
+ spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+
+ /* fail and let the caller try again */
+ return ERR_PTR(-EAGAIN);
+ }
+
+ /* We reserved something on the buffer */
+
+ BUG_ON(write > BUF_PAGE_SIZE);
+
+ event = __rb_page_index(tail_page, tail);
+ rb_update_event(event, type, length);
+
+ /*
+ * If this is a commit and the tail is zero, then update
+ * this page's time stamp.
+ */
+ if (!tail && rb_is_commit(cpu_buffer, event))
+ cpu_buffer->commit_page->time_stamp = *ts;
+
+ return event;
+
+ out_unlock:
+ spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ return NULL;
+}
+
+static int
+rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
+ u64 *ts, u64 *delta)
+{
+ struct ring_buffer_event *event;
+ static int once;
+ int ret;
+
+ if (unlikely(*delta > (1ULL << 59) && !once++)) {
+ printk(KERN_WARNING "Delta way too big! %llu"
+ " ts=%llu write stamp = %llu\n",
+ (unsigned long long)*delta,
+ (unsigned long long)*ts,
+ (unsigned long long)cpu_buffer->write_stamp);
+ WARN_ON(1);
+ }
+
+ /*
+ * The delta is too big, we to add a
+ * new timestamp.
+ */
+ event = __rb_reserve_next(cpu_buffer,
+ RINGBUF_TYPE_TIME_EXTEND,
+ RB_LEN_TIME_EXTEND,
+ ts);
+ if (!event)
+ return -EBUSY;
+
+ if (PTR_ERR(event) == -EAGAIN)
+ return -EAGAIN;
+
+ /* Only a commited time event can update the write stamp */
+ if (rb_is_commit(cpu_buffer, event)) {
+ /*
+ * If this is the first on the page, then we need to
+ * update the page itself, and just put in a zero.
+ */
+ if (rb_event_index(event)) {
+ event->time_delta = *delta & TS_MASK;
+ event->array[0] = *delta >> TS_SHIFT;
+ } else {
+ cpu_buffer->commit_page->time_stamp = *ts;
+ event->time_delta = 0;
+ event->array[0] = 0;
+ }
+ cpu_buffer->write_stamp = *ts;
+ /* let the caller know this was the commit */
+ ret = 1;
+ } else {
+ /* Darn, this is just wasted space */
+ event->time_delta = 0;
+ event->array[0] = 0;
+ ret = 0;
+ }
+
+ *delta = 0;
+
+ return ret;
+}
+
+static struct ring_buffer_event *
+rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
+ unsigned type, unsigned long length)
+{
+ struct ring_buffer_event *event;
+ u64 ts, delta;
+ int commit = 0;
+ int nr_loops = 0;
+
+ again:
+ /*
+ * We allow for interrupts to reenter here and do a trace.
+ * If one does, it will cause this original code to loop
+ * back here. Even with heavy interrupts happening, this
+ * should only happen a few times in a row. If this happens
+ * 1000 times in a row, there must be either an interrupt
+ * storm or we have something buggy.
+ * Bail!
+ */
+ if (unlikely(++nr_loops > 1000)) {
+ RB_WARN_ON(cpu_buffer, 1);
+ return NULL;
+ }
+
+ ts = ring_buffer_time_stamp(cpu_buffer->cpu);
+
+ /*
+ * Only the first commit can update the timestamp.
+ * Yes there is a race here. If an interrupt comes in
+ * just after the conditional and it traces too, then it
+ * will also check the deltas. More than one timestamp may
+ * also be made. But only the entry that did the actual
+ * commit will be something other than zero.
+ */
+ if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
+ rb_page_write(cpu_buffer->tail_page) ==
+ rb_commit_index(cpu_buffer)) {
+
+ delta = ts - cpu_buffer->write_stamp;
+
+ /* make sure this delta is calculated here */
+ barrier();
+
+ /* Did the write stamp get updated already? */
+ if (unlikely(ts < cpu_buffer->write_stamp))
+ delta = 0;
+
+ if (test_time_stamp(delta)) {
+
+ commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
+
+ if (commit == -EBUSY)
+ return NULL;
+
+ if (commit == -EAGAIN)
+ goto again;
+
+ RB_WARN_ON(cpu_buffer, commit < 0);
+ }
+ } else
+ /* Non commits have zero deltas */
+ delta = 0;
+
+ event = __rb_reserve_next(cpu_buffer, type, length, &ts);
+ if (PTR_ERR(event) == -EAGAIN)
+ goto again;
+
+ if (!event) {
+ if (unlikely(commit))
+ /*
+ * Ouch! We needed a timestamp and it was commited. But
+ * we didn't get our event reserved.
+ */
+ rb_set_commit_to_write(cpu_buffer);
+ return NULL;
+ }
+
+ /*
+ * If the timestamp was commited, make the commit our entry
+ * now so that we will update it when needed.
+ */
+ if (commit)
+ rb_set_commit_event(cpu_buffer, event);
+ else if (!rb_is_commit(cpu_buffer, event))
+ delta = 0;
+
+ event->time_delta = delta;
+
+ return event;
+}
+
+static DEFINE_PER_CPU(int, rb_need_resched);
+
+/**
+ * ring_buffer_lock_reserve - reserve a part of the buffer
+ * @buffer: the ring buffer to reserve from
+ * @length: the length of the data to reserve (excluding event header)
+ * @flags: a pointer to save the interrupt flags
+ *
+ * Returns a reseverd event on the ring buffer to copy directly to.
+ * The user of this interface will need to get the body to write into
+ * and can use the ring_buffer_event_data() interface.
+ *
+ * The length is the length of the data needed, not the event length
+ * which also includes the event header.
+ *
+ * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
+ * If NULL is returned, then nothing has been allocated or locked.
+ */
+struct ring_buffer_event *
+ring_buffer_lock_reserve(struct ring_buffer *buffer,
+ unsigned long length,
+ unsigned long *flags)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_event *event;
+ int cpu, resched;
+
+ if (ring_buffers_off)
+ return NULL;
+
+ if (atomic_read(&buffer->record_disabled))
+ return NULL;
+
+ /* If we are tracing schedule, we don't want to recurse */
+ resched = need_resched();
+ preempt_disable_notrace();
+
+ cpu = raw_smp_processor_id();
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ goto out;
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ if (atomic_read(&cpu_buffer->record_disabled))
+ goto out;
+
+ length = rb_calculate_event_length(length);
+ if (length > BUF_PAGE_SIZE)
+ goto out;
+
+ event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
+ if (!event)
+ goto out;
+
+ /*
+ * Need to store resched state on this cpu.
+ * Only the first needs to.
+ */
+
+ if (preempt_count() == 1)
+ per_cpu(rb_need_resched, cpu) = resched;
+
+ return event;
+
+ out:
+ if (resched)
+ preempt_enable_no_resched_notrace();
+ else
+ preempt_enable_notrace();
+ return NULL;
+}
+
+static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event)
+{
+ cpu_buffer->entries++;
+
+ /* Only process further if we own the commit */
+ if (!rb_is_commit(cpu_buffer, event))
+ return;
+
+ cpu_buffer->write_stamp += event->time_delta;
+
+ rb_set_commit_to_write(cpu_buffer);
+}
+
+/**
+ * ring_buffer_unlock_commit - commit a reserved
+ * @buffer: The buffer to commit to
+ * @event: The event pointer to commit.
+ * @flags: the interrupt flags received from ring_buffer_lock_reserve.
+ *
+ * This commits the data to the ring buffer, and releases any locks held.
+ *
+ * Must be paired with ring_buffer_lock_reserve.
+ */
+int ring_buffer_unlock_commit(struct ring_buffer *buffer,
+ struct ring_buffer_event *event,
+ unsigned long flags)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ int cpu = raw_smp_processor_id();
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ rb_commit(cpu_buffer, event);
+
+ /*
+ * Only the last preempt count needs to restore preemption.
+ */
+ if (preempt_count() == 1) {
+ if (per_cpu(rb_need_resched, cpu))
+ preempt_enable_no_resched_notrace();
+ else
+ preempt_enable_notrace();
+ } else
+ preempt_enable_no_resched_notrace();
+
+ return 0;
+}
+
+/**
+ * ring_buffer_write - write data to the buffer without reserving
+ * @buffer: The ring buffer to write to.
+ * @length: The length of the data being written (excluding the event header)
+ * @data: The data to write to the buffer.
+ *
+ * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
+ * one function. If you already have the data to write to the buffer, it
+ * may be easier to simply call this function.
+ *
+ * Note, like ring_buffer_lock_reserve, the length is the length of the data
+ * and not the length of the event which would hold the header.
+ */
+int ring_buffer_write(struct ring_buffer *buffer,
+ unsigned long length,
+ void *data)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_event *event;
+ unsigned long event_length;
+ void *body;
+ int ret = -EBUSY;
+ int cpu, resched;
+
+ if (ring_buffers_off)
+ return -EBUSY;
+
+ if (atomic_read(&buffer->record_disabled))
+ return -EBUSY;
+
+ resched = need_resched();
+ preempt_disable_notrace();
+
+ cpu = raw_smp_processor_id();
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ goto out;
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ if (atomic_read(&cpu_buffer->record_disabled))
+ goto out;
+
+ event_length = rb_calculate_event_length(length);
+ event = rb_reserve_next_event(cpu_buffer,
+ RINGBUF_TYPE_DATA, event_length);
+ if (!event)
+ goto out;
+
+ body = rb_event_data(event);
+
+ memcpy(body, data, length);
+
+ rb_commit(cpu_buffer, event);
+
+ ret = 0;
+ out:
+ if (resched)
+ preempt_enable_no_resched_notrace();
+ else
+ preempt_enable_notrace();
+
+ return ret;
+}
+
+static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ struct buffer_page *reader = cpu_buffer->reader_page;
+ struct buffer_page *head = cpu_buffer->head_page;
+ struct buffer_page *commit = cpu_buffer->commit_page;
+
+ return reader->read == rb_page_commit(reader) &&
+ (commit == reader ||
+ (commit == head &&
+ head->read == rb_page_commit(commit)));
+}
+
+/**
+ * ring_buffer_record_disable - stop all writes into the buffer
+ * @buffer: The ring buffer to stop writes to.
+ *
+ * This prevents all writes to the buffer. Any attempt to write
+ * to the buffer after this will fail and return NULL.
+ *
+ * The caller should call synchronize_sched() after this.
+ */
+void ring_buffer_record_disable(struct ring_buffer *buffer)
+{
+ atomic_inc(&buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_record_enable - enable writes to the buffer
+ * @buffer: The ring buffer to enable writes
+ *
+ * Note, multiple disables will need the same number of enables
+ * to truely enable the writing (much like preempt_disable).
+ */
+void ring_buffer_record_enable(struct ring_buffer *buffer)
+{
+ atomic_dec(&buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
+ * @buffer: The ring buffer to stop writes to.
+ * @cpu: The CPU buffer to stop
+ *
+ * This prevents all writes to the buffer. Any attempt to write
+ * to the buffer after this will fail and return NULL.
+ *
+ * The caller should call synchronize_sched() after this.
+ */
+void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return;
+
+ cpu_buffer = buffer->buffers[cpu];
+ atomic_inc(&cpu_buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_record_enable_cpu - enable writes to the buffer
+ * @buffer: The ring buffer to enable writes
+ * @cpu: The CPU to enable.
+ *
+ * Note, multiple disables will need the same number of enables
+ * to truely enable the writing (much like preempt_disable).
+ */
+void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return;
+
+ cpu_buffer = buffer->buffers[cpu];
+ atomic_dec(&cpu_buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the entries from.
+ */
+unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return 0;
+
+ cpu_buffer = buffer->buffers[cpu];
+ return cpu_buffer->entries;
+}
+
+/**
+ * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return 0;
+
+ cpu_buffer = buffer->buffers[cpu];
+ return cpu_buffer->overrun;
+}
+
+/**
+ * ring_buffer_entries - get the number of entries in a buffer
+ * @buffer: The ring buffer
+ *
+ * Returns the total number of entries in the ring buffer
+ * (all CPU entries)
+ */
+unsigned long ring_buffer_entries(struct ring_buffer *buffer)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ unsigned long entries = 0;
+ int cpu;
+
+ /* if you care about this being correct, lock the buffer */
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ entries += cpu_buffer->entries;
+ }
+
+ return entries;
+}
+
+/**
+ * ring_buffer_overrun_cpu - get the number of overruns in buffer
+ * @buffer: The ring buffer
+ *
+ * Returns the total number of overruns in the ring buffer
+ * (all CPU entries)
+ */
+unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ unsigned long overruns = 0;
+ int cpu;
+
+ /* if you care about this being correct, lock the buffer */
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ overruns += cpu_buffer->overrun;
+ }
+
+ return overruns;
+}
+
+/**
+ * ring_buffer_iter_reset - reset an iterator
+ * @iter: The iterator to reset
+ *
+ * Resets the iterator, so that it will start from the beginning
+ * again.
+ */
+void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+ /* Iterator usage is expected to have record disabled */
+ if (list_empty(&cpu_buffer->reader_page->list)) {
+ iter->head_page = cpu_buffer->head_page;
+ iter->head = cpu_buffer->head_page->read;
+ } else {
+ iter->head_page = cpu_buffer->reader_page;
+ iter->head = cpu_buffer->reader_page->read;
+ }
+ if (iter->head)
+ iter->read_stamp = cpu_buffer->read_stamp;
+ else
+ iter->read_stamp = iter->head_page->time_stamp;
+}
+
+/**
+ * ring_buffer_iter_empty - check if an iterator has no more to read
+ * @iter: The iterator to check
+ */
+int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ cpu_buffer = iter->cpu_buffer;
+
+ return iter->head_page == cpu_buffer->commit_page &&
+ iter->head == rb_commit_index(cpu_buffer);
+}
+
+static void
+rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event)
+{
+ u64 delta;
+
+ switch (event->type) {
+ case RINGBUF_TYPE_PADDING:
+ return;
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ delta = event->array[0];
+ delta <<= TS_SHIFT;
+ delta += event->time_delta;
+ cpu_buffer->read_stamp += delta;
+ return;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ /* FIXME: not implemented */
+ return;
+
+ case RINGBUF_TYPE_DATA:
+ cpu_buffer->read_stamp += event->time_delta;
+ return;
+
+ default:
+ BUG();
+ }
+ return;
+}
+
+static void
+rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
+ struct ring_buffer_event *event)
+{
+ u64 delta;
+
+ switch (event->type) {
+ case RINGBUF_TYPE_PADDING:
+ return;
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ delta = event->array[0];
+ delta <<= TS_SHIFT;
+ delta += event->time_delta;
+ iter->read_stamp += delta;
+ return;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ /* FIXME: not implemented */
+ return;
+
+ case RINGBUF_TYPE_DATA:
+ iter->read_stamp += event->time_delta;
+ return;
+
+ default:
+ BUG();
+ }
+ return;
+}
+
+static struct buffer_page *
+rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ struct buffer_page *reader = NULL;
+ unsigned long flags;
+ int nr_loops = 0;
+
+ spin_lock_irqsave(&cpu_buffer->lock, flags);
+
+ again:
+ /*
+ * This should normally only loop twice. But because the
+ * start of the reader inserts an empty page, it causes
+ * a case where we will loop three times. There should be no
+ * reason to loop four times (that I know of).
+ */
+ if (unlikely(++nr_loops > 3)) {
+ RB_WARN_ON(cpu_buffer, 1);
+ reader = NULL;
+ goto out;
+ }
+
+ reader = cpu_buffer->reader_page;
+
+ /* If there's more to read, return this page */
+ if (cpu_buffer->reader_page->read < rb_page_size(reader))
+ goto out;
+
+ /* Never should we have an index greater than the size */
+ RB_WARN_ON(cpu_buffer,
+ cpu_buffer->reader_page->read > rb_page_size(reader));
+
+ /* check if we caught up to the tail */
+ reader = NULL;
+ if (cpu_buffer->commit_page == cpu_buffer->reader_page)
+ goto out;
+
+ /*
+ * Splice the empty reader page into the list around the head.
+ * Reset the reader page to size zero.
+ */
+
+ reader = cpu_buffer->head_page;
+ cpu_buffer->reader_page->list.next = reader->list.next;
+ cpu_buffer->reader_page->list.prev = reader->list.prev;
+
+ local_set(&cpu_buffer->reader_page->write, 0);
+ local_set(&cpu_buffer->reader_page->commit, 0);
+
+ /* Make the reader page now replace the head */
+ reader->list.prev->next = &cpu_buffer->reader_page->list;
+ reader->list.next->prev = &cpu_buffer->reader_page->list;
+
+ /*
+ * If the tail is on the reader, then we must set the head
+ * to the inserted page, otherwise we set it one before.
+ */
+ cpu_buffer->head_page = cpu_buffer->reader_page;
+
+ if (cpu_buffer->commit_page != reader)
+ rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
+
+ /* Finally update the reader page to the new head */
+ cpu_buffer->reader_page = reader;
+ rb_reset_reader_page(cpu_buffer);
+
+ goto again;
+
+ out:
+ spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+
+ return reader;
+}
+
+static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ struct ring_buffer_event *event;
+ struct buffer_page *reader;
+ unsigned length;
+
+ reader = rb_get_reader_page(cpu_buffer);
+
+ /* This function should not be called when buffer is empty */
+ BUG_ON(!reader);
+
+ event = rb_reader_event(cpu_buffer);
+
+ if (event->type == RINGBUF_TYPE_DATA)
+ cpu_buffer->entries--;
+
+ rb_update_read_stamp(cpu_buffer, event);
+
+ length = rb_event_length(event);
+ cpu_buffer->reader_page->read += length;
+}
+
+static void rb_advance_iter(struct ring_buffer_iter *iter)
+{
+ struct ring_buffer *buffer;
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_event *event;
+ unsigned length;
+
+ cpu_buffer = iter->cpu_buffer;
+ buffer = cpu_buffer->buffer;
+
+ /*
+ * Check if we are at the end of the buffer.
+ */
+ if (iter->head >= rb_page_size(iter->head_page)) {
+ BUG_ON(iter->head_page == cpu_buffer->commit_page);
+ rb_inc_iter(iter);
+ return;
+ }
+
+ event = rb_iter_head_event(iter);
+
+ length = rb_event_length(event);
+
+ /*
+ * This should not be called to advance the header if we are
+ * at the tail of the buffer.
+ */
+ BUG_ON((iter->head_page == cpu_buffer->commit_page) &&
+ (iter->head + length > rb_commit_index(cpu_buffer)));
+
+ rb_update_iter_read_stamp(iter, event);
+
+ iter->head += length;
+
+ /* check for end of page padding */
+ if ((iter->head >= rb_page_size(iter->head_page)) &&
+ (iter->head_page != cpu_buffer->commit_page))
+ rb_advance_iter(iter);
+}
+
+/**
+ * ring_buffer_peek - peek at the next event to be read
+ * @buffer: The ring buffer to read
+ * @cpu: The cpu to peak at
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not consume the data.
+ */
+struct ring_buffer_event *
+ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_event *event;
+ struct buffer_page *reader;
+ int nr_loops = 0;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return NULL;
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ again:
+ /*
+ * We repeat when a timestamp is encountered. It is possible
+ * to get multiple timestamps from an interrupt entering just
+ * as one timestamp is about to be written. The max times
+ * that this can happen is the number of nested interrupts we
+ * can have. Nesting 10 deep of interrupts is clearly
+ * an anomaly.
+ */
+ if (unlikely(++nr_loops > 10)) {
+ RB_WARN_ON(cpu_buffer, 1);
+ return NULL;
+ }
+
+ reader = rb_get_reader_page(cpu_buffer);
+ if (!reader)
+ return NULL;
+
+ event = rb_reader_event(cpu_buffer);
+
+ switch (event->type) {
+ case RINGBUF_TYPE_PADDING:
+ RB_WARN_ON(cpu_buffer, 1);
+ rb_advance_reader(cpu_buffer);
+ return NULL;
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ /* Internal data, OK to advance */
+ rb_advance_reader(cpu_buffer);
+ goto again;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ /* FIXME: not implemented */
+ rb_advance_reader(cpu_buffer);
+ goto again;
+
+ case RINGBUF_TYPE_DATA:
+ if (ts) {
+ *ts = cpu_buffer->read_stamp + event->time_delta;
+ ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
+ }
+ return event;
+
+ default:
+ BUG();
+ }
+
+ return NULL;
+}
+
+/**
+ * ring_buffer_iter_peek - peek at the next event to be read
+ * @iter: The ring buffer iterator
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not increment the iterator.
+ */
+struct ring_buffer_event *
+ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
+{
+ struct ring_buffer *buffer;
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_event *event;
+ int nr_loops = 0;
+
+ if (ring_buffer_iter_empty(iter))
+ return NULL;
+
+ cpu_buffer = iter->cpu_buffer;
+ buffer = cpu_buffer->buffer;
+
+ again:
+ /*
+ * We repeat when a timestamp is encountered. It is possible
+ * to get multiple timestamps from an interrupt entering just
+ * as one timestamp is about to be written. The max times
+ * that this can happen is the number of nested interrupts we
+ * can have. Nesting 10 deep of interrupts is clearly
+ * an anomaly.
+ */
+ if (unlikely(++nr_loops > 10)) {
+ RB_WARN_ON(cpu_buffer, 1);
+ return NULL;
+ }
+
+ if (rb_per_cpu_empty(cpu_buffer))
+ return NULL;
+
+ event = rb_iter_head_event(iter);
+
+ switch (event->type) {
+ case RINGBUF_TYPE_PADDING:
+ rb_inc_iter(iter);
+ goto again;
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ /* Internal data, OK to advance */
+ rb_advance_iter(iter);
+ goto again;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ /* FIXME: not implemented */
+ rb_advance_iter(iter);
+ goto again;
+
+ case RINGBUF_TYPE_DATA:
+ if (ts) {
+ *ts = iter->read_stamp + event->time_delta;
+ ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
+ }
+ return event;
+
+ default:
+ BUG();
+ }
+
+ return NULL;
+}
+
+/**
+ * ring_buffer_consume - return an event and consume it
+ * @buffer: The ring buffer to get the next event from
+ *
+ * Returns the next event in the ring buffer, and that event is consumed.
+ * Meaning, that sequential reads will keep returning a different event,
+ * and eventually empty the ring buffer if the producer is slower.
+ */
+struct ring_buffer_event *
+ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_event *event;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return NULL;
+
+ event = ring_buffer_peek(buffer, cpu, ts);
+ if (!event)
+ return NULL;
+
+ cpu_buffer = buffer->buffers[cpu];
+ rb_advance_reader(cpu_buffer);
+
+ return event;
+}
+
+/**
+ * ring_buffer_read_start - start a non consuming read of the buffer
+ * @buffer: The ring buffer to read from
+ * @cpu: The cpu buffer to iterate over
+ *
+ * This starts up an iteration through the buffer. It also disables
+ * the recording to the buffer until the reading is finished.
+ * This prevents the reading from being corrupted. This is not
+ * a consuming read, so a producer is not expected.
+ *
+ * Must be paired with ring_buffer_finish.
+ */
+struct ring_buffer_iter *
+ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_iter *iter;
+ unsigned long flags;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return NULL;
+
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return NULL;
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ iter->cpu_buffer = cpu_buffer;
+
+ atomic_inc(&cpu_buffer->record_disabled);
+ synchronize_sched();
+
+ spin_lock_irqsave(&cpu_buffer->lock, flags);
+ ring_buffer_iter_reset(iter);
+ spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+
+ return iter;
+}
+
+/**
+ * ring_buffer_finish - finish reading the iterator of the buffer
+ * @iter: The iterator retrieved by ring_buffer_start
+ *
+ * This re-enables the recording to the buffer, and frees the
+ * iterator.
+ */
+void
+ring_buffer_read_finish(struct ring_buffer_iter *iter)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+ atomic_dec(&cpu_buffer->record_disabled);
+ kfree(iter);
+}
+
+/**
+ * ring_buffer_read - read the next item in the ring buffer by the iterator
+ * @iter: The ring buffer iterator
+ * @ts: The time stamp of the event read.
+ *
+ * This reads the next event in the ring buffer and increments the iterator.
+ */
+struct ring_buffer_event *
+ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
+{
+ struct ring_buffer_event *event;
+
+ event = ring_buffer_iter_peek(iter, ts);
+ if (!event)
+ return NULL;
+
+ rb_advance_iter(iter);
+
+ return event;
+}
+
+/**
+ * ring_buffer_size - return the size of the ring buffer (in bytes)
+ * @buffer: The ring buffer.
+ */
+unsigned long ring_buffer_size(struct ring_buffer *buffer)
+{
+ return BUF_PAGE_SIZE * buffer->pages;
+}
+
+static void
+rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ cpu_buffer->head_page
+ = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+ local_set(&cpu_buffer->head_page->write, 0);
+ local_set(&cpu_buffer->head_page->commit, 0);
+
+ cpu_buffer->head_page->read = 0;
+
+ cpu_buffer->tail_page = cpu_buffer->head_page;
+ cpu_buffer->commit_page = cpu_buffer->head_page;
+
+ INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+ local_set(&cpu_buffer->reader_page->write, 0);
+ local_set(&cpu_buffer->reader_page->commit, 0);
+ cpu_buffer->reader_page->read = 0;
+
+ cpu_buffer->overrun = 0;
+ cpu_buffer->entries = 0;
+}
+
+/**
+ * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
+ * @buffer: The ring buffer to reset a per cpu buffer of
+ * @cpu: The CPU buffer to be reset
+ */
+void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ unsigned long flags;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return;
+
+ spin_lock_irqsave(&cpu_buffer->lock, flags);
+
+ rb_reset_cpu(cpu_buffer);
+
+ spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+}
+
+/**
+ * ring_buffer_reset - reset a ring buffer
+ * @buffer: The ring buffer to reset all cpu buffers
+ */
+void ring_buffer_reset(struct ring_buffer *buffer)
+{
+ int cpu;
+
+ for_each_buffer_cpu(buffer, cpu)
+ ring_buffer_reset_cpu(buffer, cpu);
+}
+
+/**
+ * rind_buffer_empty - is the ring buffer empty?
+ * @buffer: The ring buffer to test
+ */
+int ring_buffer_empty(struct ring_buffer *buffer)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ int cpu;
+
+ /* yes this is racy, but if you don't like the race, lock the buffer */
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ if (!rb_per_cpu_empty(cpu_buffer))
+ return 0;
+ }
+ return 1;
+}
+
+/**
+ * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
+ * @buffer: The ring buffer
+ * @cpu: The CPU buffer to test
+ */
+int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return 1;
+
+ cpu_buffer = buffer->buffers[cpu];
+ return rb_per_cpu_empty(cpu_buffer);
+}
+
+/**
+ * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
+ * @buffer_a: One buffer to swap with
+ * @buffer_b: The other buffer to swap with
+ *
+ * This function is useful for tracers that want to take a "snapshot"
+ * of a CPU buffer and has another back up buffer lying around.
+ * it is expected that the tracer handles the cpu buffer not being
+ * used at the moment.
+ */
+int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
+ struct ring_buffer *buffer_b, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer_a;
+ struct ring_buffer_per_cpu *cpu_buffer_b;
+
+ if (!cpu_isset(cpu, buffer_a->cpumask) ||
+ !cpu_isset(cpu, buffer_b->cpumask))
+ return -EINVAL;
+
+ /* At least make sure the two buffers are somewhat the same */
+ if (buffer_a->size != buffer_b->size ||
+ buffer_a->pages != buffer_b->pages)
+ return -EINVAL;
+
+ cpu_buffer_a = buffer_a->buffers[cpu];
+ cpu_buffer_b = buffer_b->buffers[cpu];
+
+ /*
+ * We can't do a synchronize_sched here because this
+ * function can be called in atomic context.
+ * Normally this will be called from the same CPU as cpu.
+ * If not it's up to the caller to protect this.
+ */
+ atomic_inc(&cpu_buffer_a->record_disabled);
+ atomic_inc(&cpu_buffer_b->record_disabled);
+
+ buffer_a->buffers[cpu] = cpu_buffer_b;
+ buffer_b->buffers[cpu] = cpu_buffer_a;
+
+ cpu_buffer_b->buffer = buffer_a;
+ cpu_buffer_a->buffer = buffer_b;
+
+ atomic_dec(&cpu_buffer_a->record_disabled);
+ atomic_dec(&cpu_buffer_b->record_disabled);
+
+ return 0;
+}
+
+static ssize_t
+rb_simple_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ int *p = filp->private_data;
+ char buf[64];
+ int r;
+
+ /* !ring_buffers_off == tracing_on */
+ r = sprintf(buf, "%d\n", !*p);
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+rb_simple_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ int *p = filp->private_data;
+ char buf[64];
+ long val;
+ int ret;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ ret = strict_strtoul(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ /* !ring_buffers_off == tracing_on */
+ *p = !val;
+
+ (*ppos)++;
+
+ return cnt;
+}
+
+static struct file_operations rb_simple_fops = {
+ .open = tracing_open_generic,
+ .read = rb_simple_read,
+ .write = rb_simple_write,
+};
+
+
+static __init int rb_init_debugfs(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+
+ entry = debugfs_create_file("tracing_on", 0644, d_tracer,
+ &ring_buffers_off, &rb_simple_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'tracing_on' entry\n");
+
+ return 0;
+}
+
+fs_initcall(rb_init_debugfs);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
new file mode 100644
index 0000000..d86e325
--- /dev/null
+++ b/kernel/trace/trace.c
@@ -0,0 +1,3235 @@
+/*
+ * ring buffer based function tracer
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
+ *
+ * Originally taken from the RT patch by:
+ * Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Based on code from the latency_tracer, that is:
+ * Copyright (C) 2004-2006 Ingo Molnar
+ * Copyright (C) 2004 William Lee Irwin III
+ */
+#include <linux/utsrelease.h>
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/notifier.h>
+#include <linux/debugfs.h>
+#include <linux/pagemap.h>
+#include <linux/hardirq.h>
+#include <linux/linkage.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/kdebug.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/gfp.h>
+#include <linux/fs.h>
+#include <linux/kprobes.h>
+#include <linux/writeback.h>
+
+#include <linux/stacktrace.h>
+#include <linux/ring_buffer.h>
+#include <linux/irqflags.h>
+
+#include "trace.h"
+
+#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
+
+unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
+unsigned long __read_mostly tracing_thresh;
+
+static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
+
+static inline void ftrace_disable_cpu(void)
+{
+ preempt_disable();
+ local_inc(&__get_cpu_var(ftrace_cpu_disabled));
+}
+
+static inline void ftrace_enable_cpu(void)
+{
+ local_dec(&__get_cpu_var(ftrace_cpu_disabled));
+ preempt_enable();
+}
+
+static cpumask_t __read_mostly tracing_buffer_mask;
+
+#define for_each_tracing_cpu(cpu) \
+ for_each_cpu_mask(cpu, tracing_buffer_mask)
+
+static int tracing_disabled = 1;
+
+long
+ns2usecs(cycle_t nsec)
+{
+ nsec += 500;
+ do_div(nsec, 1000);
+ return nsec;
+}
+
+cycle_t ftrace_now(int cpu)
+{
+ u64 ts = ring_buffer_time_stamp(cpu);
+ ring_buffer_normalize_time_stamp(cpu, &ts);
+ return ts;
+}
+
+/*
+ * The global_trace is the descriptor that holds the tracing
+ * buffers for the live tracing. For each CPU, it contains
+ * a link list of pages that will store trace entries. The
+ * page descriptor of the pages in the memory is used to hold
+ * the link list by linking the lru item in the page descriptor
+ * to each of the pages in the buffer per CPU.
+ *
+ * For each active CPU there is a data field that holds the
+ * pages for the buffer for that CPU. Each CPU has the same number
+ * of pages allocated for its buffer.
+ */
+static struct trace_array global_trace;
+
+static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
+
+/*
+ * The max_tr is used to snapshot the global_trace when a maximum
+ * latency is reached. Some tracers will use this to store a maximum
+ * trace while it continues examining live traces.
+ *
+ * The buffers for the max_tr are set up the same as the global_trace.
+ * When a snapshot is taken, the link list of the max_tr is swapped
+ * with the link list of the global_trace and the buffers are reset for
+ * the global_trace so the tracing can continue.
+ */
+static struct trace_array max_tr;
+
+static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
+
+/* tracer_enabled is used to toggle activation of a tracer */
+static int tracer_enabled = 1;
+
+/* function tracing enabled */
+int ftrace_function_enabled;
+
+/*
+ * trace_buf_size is the size in bytes that is allocated
+ * for a buffer. Note, the number of bytes is always rounded
+ * to page size.
+ *
+ * This number is purposely set to a low number of 16384.
+ * If the dump on oops happens, it will be much appreciated
+ * to not have to wait for all that output. Anyway this can be
+ * boot time and run time configurable.
+ */
+#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
+
+static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
+
+/* trace_types holds a link list of available tracers. */
+static struct tracer *trace_types __read_mostly;
+
+/* current_trace points to the tracer that is currently active */
+static struct tracer *current_trace __read_mostly;
+
+/*
+ * max_tracer_type_len is used to simplify the allocating of
+ * buffers to read userspace tracer names. We keep track of
+ * the longest tracer name registered.
+ */
+static int max_tracer_type_len;
+
+/*
+ * trace_types_lock is used to protect the trace_types list.
+ * This lock is also used to keep user access serialized.
+ * Accesses from userspace will grab this lock while userspace
+ * activities happen inside the kernel.
+ */
+static DEFINE_MUTEX(trace_types_lock);
+
+/* trace_wait is a waitqueue for tasks blocked on trace_poll */
+static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
+
+/* trace_flags holds iter_ctrl options */
+unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
+
+/**
+ * trace_wake_up - wake up tasks waiting for trace input
+ *
+ * Simply wakes up any task that is blocked on the trace_wait
+ * queue. These is used with trace_poll for tasks polling the trace.
+ */
+void trace_wake_up(void)
+{
+ /*
+ * The runqueue_is_locked() can fail, but this is the best we
+ * have for now:
+ */
+ if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
+ wake_up(&trace_wait);
+}
+
+static int __init set_buf_size(char *str)
+{
+ unsigned long buf_size;
+ int ret;
+
+ if (!str)
+ return 0;
+ ret = strict_strtoul(str, 0, &buf_size);
+ /* nr_entries can not be zero */
+ if (ret < 0 || buf_size == 0)
+ return 0;
+ trace_buf_size = buf_size;
+ return 1;
+}
+__setup("trace_buf_size=", set_buf_size);
+
+unsigned long nsecs_to_usecs(unsigned long nsecs)
+{
+ return nsecs / 1000;
+}
+
+/*
+ * TRACE_ITER_SYM_MASK masks the options in trace_flags that
+ * control the output of kernel symbols.
+ */
+#define TRACE_ITER_SYM_MASK \
+ (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
+
+/* These must match the bit postions in trace_iterator_flags */
+static const char *trace_options[] = {
+ "print-parent",
+ "sym-offset",
+ "sym-addr",
+ "verbose",
+ "raw",
+ "hex",
+ "bin",
+ "block",
+ "stacktrace",
+ "sched-tree",
+ "ftrace_printk",
+ NULL
+};
+
+/*
+ * ftrace_max_lock is used to protect the swapping of buffers
+ * when taking a max snapshot. The buffers themselves are
+ * protected by per_cpu spinlocks. But the action of the swap
+ * needs its own lock.
+ *
+ * This is defined as a raw_spinlock_t in order to help
+ * with performance when lockdep debugging is enabled.
+ */
+static raw_spinlock_t ftrace_max_lock =
+ (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+/*
+ * Copy the new maximum trace into the separate maximum-trace
+ * structure. (this way the maximum trace is permanently saved,
+ * for later retrieval via /debugfs/tracing/latency_trace)
+ */
+static void
+__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
+{
+ struct trace_array_cpu *data = tr->data[cpu];
+
+ max_tr.cpu = cpu;
+ max_tr.time_start = data->preempt_timestamp;
+
+ data = max_tr.data[cpu];
+ data->saved_latency = tracing_max_latency;
+
+ memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
+ data->pid = tsk->pid;
+ data->uid = tsk->uid;
+ data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
+ data->policy = tsk->policy;
+ data->rt_priority = tsk->rt_priority;
+
+ /* record this tasks comm */
+ tracing_record_cmdline(current);
+}
+
+/**
+ * trace_seq_printf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+{
+ int len = (PAGE_SIZE - 1) - s->len;
+ va_list ap;
+ int ret;
+
+ if (!len)
+ return 0;
+
+ va_start(ap, fmt);
+ ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
+ va_end(ap);
+
+ /* If we can't write it all, don't bother writing anything */
+ if (ret >= len)
+ return 0;
+
+ s->len += ret;
+
+ return len;
+}
+
+/**
+ * trace_seq_puts - trace sequence printing of simple string
+ * @s: trace sequence descriptor
+ * @str: simple string to record
+ *
+ * The tracer may use either the sequence operations or its own
+ * copy to user routines. This function records a simple string
+ * into a special buffer (@s) for later retrieval by a sequencer
+ * or other mechanism.
+ */
+static int
+trace_seq_puts(struct trace_seq *s, const char *str)
+{
+ int len = strlen(str);
+
+ if (len > ((PAGE_SIZE - 1) - s->len))
+ return 0;
+
+ memcpy(s->buffer + s->len, str, len);
+ s->len += len;
+
+ return len;
+}
+
+static int
+trace_seq_putc(struct trace_seq *s, unsigned char c)
+{
+ if (s->len >= (PAGE_SIZE - 1))
+ return 0;
+
+ s->buffer[s->len++] = c;
+
+ return 1;
+}
+
+static int
+trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
+{
+ if (len > ((PAGE_SIZE - 1) - s->len))
+ return 0;
+
+ memcpy(s->buffer + s->len, mem, len);
+ s->len += len;
+
+ return len;
+}
+
+#define MAX_MEMHEX_BYTES 8
+#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
+
+static int
+trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
+{
+ unsigned char hex[HEX_CHARS];
+ unsigned char *data = mem;
+ int i, j;
+
+#ifdef __BIG_ENDIAN
+ for (i = 0, j = 0; i < len; i++) {
+#else
+ for (i = len-1, j = 0; i >= 0; i--) {
+#endif
+ hex[j++] = hex_asc_hi(data[i]);
+ hex[j++] = hex_asc_lo(data[i]);
+ }
+ hex[j++] = ' ';
+
+ return trace_seq_putmem(s, hex, j);
+}
+
+static void
+trace_seq_reset(struct trace_seq *s)
+{
+ s->len = 0;
+ s->readpos = 0;
+}
+
+ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
+{
+ int len;
+ int ret;
+
+ if (s->len <= s->readpos)
+ return -EBUSY;
+
+ len = s->len - s->readpos;
+ if (cnt > len)
+ cnt = len;
+ ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
+ if (ret)
+ return -EFAULT;
+
+ s->readpos += len;
+ return cnt;
+}
+
+static void
+trace_print_seq(struct seq_file *m, struct trace_seq *s)
+{
+ int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
+
+ s->buffer[len] = 0;
+ seq_puts(m, s->buffer);
+
+ trace_seq_reset(s);
+}
+
+/**
+ * update_max_tr - snapshot all trace buffers from global_trace to max_tr
+ * @tr: tracer
+ * @tsk: the task with the latency
+ * @cpu: The cpu that initiated the trace.
+ *
+ * Flip the buffers between the @tr and the max_tr and record information
+ * about which task was the cause of this latency.
+ */
+void
+update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
+{
+ struct ring_buffer *buf = tr->buffer;
+
+ WARN_ON_ONCE(!irqs_disabled());
+ __raw_spin_lock(&ftrace_max_lock);
+
+ tr->buffer = max_tr.buffer;
+ max_tr.buffer = buf;
+
+ ftrace_disable_cpu();
+ ring_buffer_reset(tr->buffer);
+ ftrace_enable_cpu();
+
+ __update_max_tr(tr, tsk, cpu);
+ __raw_spin_unlock(&ftrace_max_lock);
+}
+
+/**
+ * update_max_tr_single - only copy one trace over, and reset the rest
+ * @tr - tracer
+ * @tsk - task with the latency
+ * @cpu - the cpu of the buffer to copy.
+ *
+ * Flip the trace of a single CPU buffer between the @tr and the max_tr.
+ */
+void
+update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
+{
+ int ret;
+
+ WARN_ON_ONCE(!irqs_disabled());
+ __raw_spin_lock(&ftrace_max_lock);
+
+ ftrace_disable_cpu();
+
+ ring_buffer_reset(max_tr.buffer);
+ ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
+
+ ftrace_enable_cpu();
+
+ WARN_ON_ONCE(ret);
+
+ __update_max_tr(tr, tsk, cpu);
+ __raw_spin_unlock(&ftrace_max_lock);
+}
+
+/**
+ * register_tracer - register a tracer with the ftrace system.
+ * @type - the plugin for the tracer
+ *
+ * Register a new plugin tracer.
+ */
+int register_tracer(struct tracer *type)
+{
+ struct tracer *t;
+ int len;
+ int ret = 0;
+
+ if (!type->name) {
+ pr_info("Tracer must have a name\n");
+ return -1;
+ }
+
+ mutex_lock(&trace_types_lock);
+ for (t = trace_types; t; t = t->next) {
+ if (strcmp(type->name, t->name) == 0) {
+ /* already found */
+ pr_info("Trace %s already registered\n",
+ type->name);
+ ret = -1;
+ goto out;
+ }
+ }
+
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+ if (type->selftest) {
+ struct tracer *saved_tracer = current_trace;
+ struct trace_array *tr = &global_trace;
+ int saved_ctrl = tr->ctrl;
+ int i;
+ /*
+ * Run a selftest on this tracer.
+ * Here we reset the trace buffer, and set the current
+ * tracer to be this tracer. The tracer can then run some
+ * internal tracing to verify that everything is in order.
+ * If we fail, we do not register this tracer.
+ */
+ for_each_tracing_cpu(i) {
+ tracing_reset(tr, i);
+ }
+ current_trace = type;
+ tr->ctrl = 0;
+ /* the test is responsible for initializing and enabling */
+ pr_info("Testing tracer %s: ", type->name);
+ ret = type->selftest(type, tr);
+ /* the test is responsible for resetting too */
+ current_trace = saved_tracer;
+ tr->ctrl = saved_ctrl;
+ if (ret) {
+ printk(KERN_CONT "FAILED!\n");
+ goto out;
+ }
+ /* Only reset on passing, to avoid touching corrupted buffers */
+ for_each_tracing_cpu(i) {
+ tracing_reset(tr, i);
+ }
+ printk(KERN_CONT "PASSED\n");
+ }
+#endif
+
+ type->next = trace_types;
+ trace_types = type;
+ len = strlen(type->name);
+ if (len > max_tracer_type_len)
+ max_tracer_type_len = len;
+
+ out:
+ mutex_unlock(&trace_types_lock);
+
+ return ret;
+}
+
+void unregister_tracer(struct tracer *type)
+{
+ struct tracer **t;
+ int len;
+
+ mutex_lock(&trace_types_lock);
+ for (t = &trace_types; *t; t = &(*t)->next) {
+ if (*t == type)
+ goto found;
+ }
+ pr_info("Trace %s not registered\n", type->name);
+ goto out;
+
+ found:
+ *t = (*t)->next;
+ if (strlen(type->name) != max_tracer_type_len)
+ goto out;
+
+ max_tracer_type_len = 0;
+ for (t = &trace_types; *t; t = &(*t)->next) {
+ len = strlen((*t)->name);
+ if (len > max_tracer_type_len)
+ max_tracer_type_len = len;
+ }
+ out:
+ mutex_unlock(&trace_types_lock);
+}
+
+void tracing_reset(struct trace_array *tr, int cpu)
+{
+ ftrace_disable_cpu();
+ ring_buffer_reset_cpu(tr->buffer, cpu);
+ ftrace_enable_cpu();
+}
+
+#define SAVED_CMDLINES 128
+static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
+static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
+static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
+static int cmdline_idx;
+static DEFINE_SPINLOCK(trace_cmdline_lock);
+
+/* temporary disable recording */
+atomic_t trace_record_cmdline_disabled __read_mostly;
+
+static void trace_init_cmdlines(void)
+{
+ memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
+ memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
+ cmdline_idx = 0;
+}
+
+void trace_stop_cmdline_recording(void);
+
+static void trace_save_cmdline(struct task_struct *tsk)
+{
+ unsigned map;
+ unsigned idx;
+
+ if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
+ return;
+
+ /*
+ * It's not the end of the world if we don't get
+ * the lock, but we also don't want to spin
+ * nor do we want to disable interrupts,
+ * so if we miss here, then better luck next time.
+ */
+ if (!spin_trylock(&trace_cmdline_lock))
+ return;
+
+ idx = map_pid_to_cmdline[tsk->pid];
+ if (idx >= SAVED_CMDLINES) {
+ idx = (cmdline_idx + 1) % SAVED_CMDLINES;
+
+ map = map_cmdline_to_pid[idx];
+ if (map <= PID_MAX_DEFAULT)
+ map_pid_to_cmdline[map] = (unsigned)-1;
+
+ map_pid_to_cmdline[tsk->pid] = idx;
+
+ cmdline_idx = idx;
+ }
+
+ memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
+
+ spin_unlock(&trace_cmdline_lock);
+}
+
+static char *trace_find_cmdline(int pid)
+{
+ char *cmdline = "<...>";
+ unsigned map;
+
+ if (!pid)
+ return "<idle>";
+
+ if (pid > PID_MAX_DEFAULT)
+ goto out;
+
+ map = map_pid_to_cmdline[pid];
+ if (map >= SAVED_CMDLINES)
+ goto out;
+
+ cmdline = saved_cmdlines[map];
+
+ out:
+ return cmdline;
+}
+
+void tracing_record_cmdline(struct task_struct *tsk)
+{
+ if (atomic_read(&trace_record_cmdline_disabled))
+ return;
+
+ trace_save_cmdline(tsk);
+}
+
+void
+tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
+ int pc)
+{
+ struct task_struct *tsk = current;
+
+ entry->preempt_count = pc & 0xff;
+ entry->pid = (tsk) ? tsk->pid : 0;
+ entry->flags =
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+ (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
+#else
+ TRACE_FLAG_IRQS_NOSUPPORT |
+#endif
+ ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
+ ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
+ (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
+}
+
+void
+trace_function(struct trace_array *tr, struct trace_array_cpu *data,
+ unsigned long ip, unsigned long parent_ip, unsigned long flags,
+ int pc)
+{
+ struct ring_buffer_event *event;
+ struct ftrace_entry *entry;
+ unsigned long irq_flags;
+
+ /* If we are reading the ring buffer, don't trace */
+ if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ return;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_FN;
+ entry->ip = ip;
+ entry->parent_ip = parent_ip;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+}
+
+void
+ftrace(struct trace_array *tr, struct trace_array_cpu *data,
+ unsigned long ip, unsigned long parent_ip, unsigned long flags,
+ int pc)
+{
+ if (likely(!atomic_read(&data->disabled)))
+ trace_function(tr, data, ip, parent_ip, flags, pc);
+}
+
+static void ftrace_trace_stack(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long flags,
+ int skip, int pc)
+{
+#ifdef CONFIG_STACKTRACE
+ struct ring_buffer_event *event;
+ struct stack_entry *entry;
+ struct stack_trace trace;
+ unsigned long irq_flags;
+
+ if (!(trace_flags & TRACE_ITER_STACKTRACE))
+ return;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_STACK;
+
+ memset(&entry->caller, 0, sizeof(entry->caller));
+
+ trace.nr_entries = 0;
+ trace.max_entries = FTRACE_STACK_ENTRIES;
+ trace.skip = skip;
+ trace.entries = entry->caller;
+
+ save_stack_trace(&trace);
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+#endif
+}
+
+void __trace_stack(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long flags,
+ int skip)
+{
+ ftrace_trace_stack(tr, data, flags, skip, preempt_count());
+}
+
+static void
+ftrace_trace_special(void *__tr, void *__data,
+ unsigned long arg1, unsigned long arg2, unsigned long arg3,
+ int pc)
+{
+ struct ring_buffer_event *event;
+ struct trace_array_cpu *data = __data;
+ struct trace_array *tr = __tr;
+ struct special_entry *entry;
+ unsigned long irq_flags;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, pc);
+ entry->ent.type = TRACE_SPECIAL;
+ entry->arg1 = arg1;
+ entry->arg2 = arg2;
+ entry->arg3 = arg3;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+ ftrace_trace_stack(tr, data, irq_flags, 4, pc);
+
+ trace_wake_up();
+}
+
+void
+__trace_special(void *__tr, void *__data,
+ unsigned long arg1, unsigned long arg2, unsigned long arg3)
+{
+ ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count());
+}
+
+void
+tracing_sched_switch_trace(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct task_struct *prev,
+ struct task_struct *next,
+ unsigned long flags, int pc)
+{
+ struct ring_buffer_event *event;
+ struct ctx_switch_entry *entry;
+ unsigned long irq_flags;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_CTX;
+ entry->prev_pid = prev->pid;
+ entry->prev_prio = prev->prio;
+ entry->prev_state = prev->state;
+ entry->next_pid = next->pid;
+ entry->next_prio = next->prio;
+ entry->next_state = next->state;
+ entry->next_cpu = task_cpu(next);
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+ ftrace_trace_stack(tr, data, flags, 5, pc);
+}
+
+void
+tracing_sched_wakeup_trace(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct task_struct *wakee,
+ struct task_struct *curr,
+ unsigned long flags, int pc)
+{
+ struct ring_buffer_event *event;
+ struct ctx_switch_entry *entry;
+ unsigned long irq_flags;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_WAKE;
+ entry->prev_pid = curr->pid;
+ entry->prev_prio = curr->prio;
+ entry->prev_state = curr->state;
+ entry->next_pid = wakee->pid;
+ entry->next_prio = wakee->prio;
+ entry->next_state = wakee->state;
+ entry->next_cpu = task_cpu(wakee);
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+ ftrace_trace_stack(tr, data, flags, 6, pc);
+
+ trace_wake_up();
+}
+
+void
+ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
+{
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ int cpu;
+ int pc;
+
+ if (tracing_disabled || !tr->ctrl)
+ return;
+
+ pc = preempt_count();
+ preempt_disable_notrace();
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+
+ if (likely(!atomic_read(&data->disabled)))
+ ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
+
+ preempt_enable_notrace();
+}
+
+#ifdef CONFIG_FUNCTION_TRACER
+static void
+function_trace_call(unsigned long ip, unsigned long parent_ip)
+{
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu, resched;
+ int pc;
+
+ if (unlikely(!ftrace_function_enabled))
+ return;
+
+ pc = preempt_count();
+ resched = need_resched();
+ preempt_disable_notrace();
+ local_save_flags(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1))
+ trace_function(tr, data, ip, parent_ip, flags, pc);
+
+ atomic_dec(&data->disabled);
+ if (resched)
+ preempt_enable_no_resched_notrace();
+ else
+ preempt_enable_notrace();
+}
+
+static struct ftrace_ops trace_ops __read_mostly =
+{
+ .func = function_trace_call,
+};
+
+void tracing_start_function_trace(void)
+{
+ ftrace_function_enabled = 0;
+ register_ftrace_function(&trace_ops);
+ if (tracer_enabled)
+ ftrace_function_enabled = 1;
+}
+
+void tracing_stop_function_trace(void)
+{
+ ftrace_function_enabled = 0;
+ unregister_ftrace_function(&trace_ops);
+}
+#endif
+
+enum trace_file_type {
+ TRACE_FILE_LAT_FMT = 1,
+};
+
+static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
+{
+ /* Don't allow ftrace to trace into the ring buffers */
+ ftrace_disable_cpu();
+
+ iter->idx++;
+ if (iter->buffer_iter[iter->cpu])
+ ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+
+ ftrace_enable_cpu();
+}
+
+static struct trace_entry *
+peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
+{
+ struct ring_buffer_event *event;
+ struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
+
+ /* Don't allow ftrace to trace into the ring buffers */
+ ftrace_disable_cpu();
+
+ if (buf_iter)
+ event = ring_buffer_iter_peek(buf_iter, ts);
+ else
+ event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
+
+ ftrace_enable_cpu();
+
+ return event ? ring_buffer_event_data(event) : NULL;
+}
+
+static struct trace_entry *
+__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
+{
+ struct ring_buffer *buffer = iter->tr->buffer;
+ struct trace_entry *ent, *next = NULL;
+ u64 next_ts = 0, ts;
+ int next_cpu = -1;
+ int cpu;
+
+ for_each_tracing_cpu(cpu) {
+
+ if (ring_buffer_empty_cpu(buffer, cpu))
+ continue;
+
+ ent = peek_next_entry(iter, cpu, &ts);
+
+ /*
+ * Pick the entry with the smallest timestamp:
+ */
+ if (ent && (!next || ts < next_ts)) {
+ next = ent;
+ next_cpu = cpu;
+ next_ts = ts;
+ }
+ }
+
+ if (ent_cpu)
+ *ent_cpu = next_cpu;
+
+ if (ent_ts)
+ *ent_ts = next_ts;
+
+ return next;
+}
+
+/* Find the next real entry, without updating the iterator itself */
+static struct trace_entry *
+find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
+{
+ return __find_next_entry(iter, ent_cpu, ent_ts);
+}
+
+/* Find the next real entry, and increment the iterator to the next entry */
+static void *find_next_entry_inc(struct trace_iterator *iter)
+{
+ iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
+
+ if (iter->ent)
+ trace_iterator_increment(iter, iter->cpu);
+
+ return iter->ent ? iter : NULL;
+}
+
+static void trace_consume(struct trace_iterator *iter)
+{
+ /* Don't allow ftrace to trace into the ring buffers */
+ ftrace_disable_cpu();
+ ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
+ ftrace_enable_cpu();
+}
+
+static void *s_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct trace_iterator *iter = m->private;
+ int i = (int)*pos;
+ void *ent;
+
+ (*pos)++;
+
+ /* can't go backwards */
+ if (iter->idx > i)
+ return NULL;
+
+ if (iter->idx < 0)
+ ent = find_next_entry_inc(iter);
+ else
+ ent = iter;
+
+ while (ent && iter->idx < i)
+ ent = find_next_entry_inc(iter);
+
+ iter->pos = *pos;
+
+ return ent;
+}
+
+static void *s_start(struct seq_file *m, loff_t *pos)
+{
+ struct trace_iterator *iter = m->private;
+ void *p = NULL;
+ loff_t l = 0;
+ int cpu;
+
+ mutex_lock(&trace_types_lock);
+
+ if (!current_trace || current_trace != iter->trace) {
+ mutex_unlock(&trace_types_lock);
+ return NULL;
+ }
+
+ atomic_inc(&trace_record_cmdline_disabled);
+
+ /* let the tracer grab locks here if needed */
+ if (current_trace->start)
+ current_trace->start(iter);
+
+ if (*pos != iter->pos) {
+ iter->ent = NULL;
+ iter->cpu = 0;
+ iter->idx = -1;
+
+ ftrace_disable_cpu();
+
+ for_each_tracing_cpu(cpu) {
+ ring_buffer_iter_reset(iter->buffer_iter[cpu]);
+ }
+
+ ftrace_enable_cpu();
+
+ for (p = iter; p && l < *pos; p = s_next(m, p, &l))
+ ;
+
+ } else {
+ l = *pos - 1;
+ p = s_next(m, p, &l);
+ }
+
+ return p;
+}
+
+static void s_stop(struct seq_file *m, void *p)
+{
+ struct trace_iterator *iter = m->private;
+
+ atomic_dec(&trace_record_cmdline_disabled);
+
+ /* let the tracer release locks here if needed */
+ if (current_trace && current_trace == iter->trace && iter->trace->stop)
+ iter->trace->stop(iter);
+
+ mutex_unlock(&trace_types_lock);
+}
+
+#ifdef CONFIG_KRETPROBES
+static inline const char *kretprobed(const char *name)
+{
+ static const char tramp_name[] = "kretprobe_trampoline";
+ int size = sizeof(tramp_name);
+
+ if (strncmp(tramp_name, name, size) == 0)
+ return "[unknown/kretprobe'd]";
+ return name;
+}
+#else
+static inline const char *kretprobed(const char *name)
+{
+ return name;
+}
+#endif /* CONFIG_KRETPROBES */
+
+static int
+seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
+{
+#ifdef CONFIG_KALLSYMS
+ char str[KSYM_SYMBOL_LEN];
+ const char *name;
+
+ kallsyms_lookup(address, NULL, NULL, NULL, str);
+
+ name = kretprobed(str);
+
+ return trace_seq_printf(s, fmt, name);
+#endif
+ return 1;
+}
+
+static int
+seq_print_sym_offset(struct trace_seq *s, const char *fmt,
+ unsigned long address)
+{
+#ifdef CONFIG_KALLSYMS
+ char str[KSYM_SYMBOL_LEN];
+ const char *name;
+
+ sprint_symbol(str, address);
+ name = kretprobed(str);
+
+ return trace_seq_printf(s, fmt, name);
+#endif
+ return 1;
+}
+
+#ifndef CONFIG_64BIT
+# define IP_FMT "%08lx"
+#else
+# define IP_FMT "%016lx"
+#endif
+
+static int
+seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
+{
+ int ret;
+
+ if (!ip)
+ return trace_seq_printf(s, "0");
+
+ if (sym_flags & TRACE_ITER_SYM_OFFSET)
+ ret = seq_print_sym_offset(s, "%s", ip);
+ else
+ ret = seq_print_sym_short(s, "%s", ip);
+
+ if (!ret)
+ return 0;
+
+ if (sym_flags & TRACE_ITER_SYM_ADDR)
+ ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+ return ret;
+}
+
+static void print_lat_help_header(struct seq_file *m)
+{
+ seq_puts(m, "# _------=> CPU# \n");
+ seq_puts(m, "# / _-----=> irqs-off \n");
+ seq_puts(m, "# | / _----=> need-resched \n");
+ seq_puts(m, "# || / _---=> hardirq/softirq \n");
+ seq_puts(m, "# ||| / _--=> preempt-depth \n");
+ seq_puts(m, "# |||| / \n");
+ seq_puts(m, "# ||||| delay \n");
+ seq_puts(m, "# cmd pid ||||| time | caller \n");
+ seq_puts(m, "# \\ / ||||| \\ | / \n");
+}
+
+static void print_func_help_header(struct seq_file *m)
+{
+ seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
+ seq_puts(m, "# | | | | |\n");
+}
+
+
+static void
+print_trace_header(struct seq_file *m, struct trace_iterator *iter)
+{
+ unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
+ struct trace_array *tr = iter->tr;
+ struct trace_array_cpu *data = tr->data[tr->cpu];
+ struct tracer *type = current_trace;
+ unsigned long total;
+ unsigned long entries;
+ const char *name = "preemption";
+
+ if (type)
+ name = type->name;
+
+ entries = ring_buffer_entries(iter->tr->buffer);
+ total = entries +
+ ring_buffer_overruns(iter->tr->buffer);
+
+ seq_printf(m, "%s latency trace v1.1.5 on %s\n",
+ name, UTS_RELEASE);
+ seq_puts(m, "-----------------------------------"
+ "---------------------------------\n");
+ seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
+ " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
+ nsecs_to_usecs(data->saved_latency),
+ entries,
+ total,
+ tr->cpu,
+#if defined(CONFIG_PREEMPT_NONE)
+ "server",
+#elif defined(CONFIG_PREEMPT_VOLUNTARY)
+ "desktop",
+#elif defined(CONFIG_PREEMPT)
+ "preempt",
+#else
+ "unknown",
+#endif
+ /* These are reserved for later use */
+ 0, 0, 0, 0);
+#ifdef CONFIG_SMP
+ seq_printf(m, " #P:%d)\n", num_online_cpus());
+#else
+ seq_puts(m, ")\n");
+#endif
+ seq_puts(m, " -----------------\n");
+ seq_printf(m, " | task: %.16s-%d "
+ "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
+ data->comm, data->pid, data->uid, data->nice,
+ data->policy, data->rt_priority);
+ seq_puts(m, " -----------------\n");
+
+ if (data->critical_start) {
+ seq_puts(m, " => started at: ");
+ seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
+ trace_print_seq(m, &iter->seq);
+ seq_puts(m, "\n => ended at: ");
+ seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
+ trace_print_seq(m, &iter->seq);
+ seq_puts(m, "\n");
+ }
+
+ seq_puts(m, "\n");
+}
+
+static void
+lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
+{
+ int hardirq, softirq;
+ char *comm;
+
+ comm = trace_find_cmdline(entry->pid);
+
+ trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
+ trace_seq_printf(s, "%3d", cpu);
+ trace_seq_printf(s, "%c%c",
+ (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
+ (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
+ ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
+
+ hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
+ softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
+ if (hardirq && softirq) {
+ trace_seq_putc(s, 'H');
+ } else {
+ if (hardirq) {
+ trace_seq_putc(s, 'h');
+ } else {
+ if (softirq)
+ trace_seq_putc(s, 's');
+ else
+ trace_seq_putc(s, '.');
+ }
+ }
+
+ if (entry->preempt_count)
+ trace_seq_printf(s, "%x", entry->preempt_count);
+ else
+ trace_seq_puts(s, ".");
+}
+
+unsigned long preempt_mark_thresh = 100;
+
+static void
+lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
+ unsigned long rel_usecs)
+{
+ trace_seq_printf(s, " %4lldus", abs_usecs);
+ if (rel_usecs > preempt_mark_thresh)
+ trace_seq_puts(s, "!: ");
+ else if (rel_usecs > 1)
+ trace_seq_puts(s, "+: ");
+ else
+ trace_seq_puts(s, " : ");
+}
+
+static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
+
+/*
+ * The message is supposed to contain an ending newline.
+ * If the printing stops prematurely, try to add a newline of our own.
+ */
+void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
+{
+ struct trace_entry *ent;
+ struct trace_field_cont *cont;
+ bool ok = true;
+
+ ent = peek_next_entry(iter, iter->cpu, NULL);
+ if (!ent || ent->type != TRACE_CONT) {
+ trace_seq_putc(s, '\n');
+ return;
+ }
+
+ do {
+ cont = (struct trace_field_cont *)ent;
+ if (ok)
+ ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
+
+ ftrace_disable_cpu();
+
+ if (iter->buffer_iter[iter->cpu])
+ ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+ else
+ ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+
+ ftrace_enable_cpu();
+
+ ent = peek_next_entry(iter, iter->cpu, NULL);
+ } while (ent && ent->type == TRACE_CONT);
+
+ if (!ok)
+ trace_seq_putc(s, '\n');
+}
+
+static enum print_line_t
+print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
+{
+ struct trace_seq *s = &iter->seq;
+ unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
+ struct trace_entry *next_entry;
+ unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
+ struct trace_entry *entry = iter->ent;
+ unsigned long abs_usecs;
+ unsigned long rel_usecs;
+ u64 next_ts;
+ char *comm;
+ int S, T;
+ int i;
+ unsigned state;
+
+ if (entry->type == TRACE_CONT)
+ return TRACE_TYPE_HANDLED;
+
+ next_entry = find_next_entry(iter, NULL, &next_ts);
+ if (!next_entry)
+ next_ts = iter->ts;
+ rel_usecs = ns2usecs(next_ts - iter->ts);
+ abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
+
+ if (verbose) {
+ comm = trace_find_cmdline(entry->pid);
+ trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
+ " %ld.%03ldms (+%ld.%03ldms): ",
+ comm,
+ entry->pid, cpu, entry->flags,
+ entry->preempt_count, trace_idx,
+ ns2usecs(iter->ts),
+ abs_usecs/1000,
+ abs_usecs % 1000, rel_usecs/1000,
+ rel_usecs % 1000);
+ } else {
+ lat_print_generic(s, entry, cpu);
+ lat_print_timestamp(s, abs_usecs, rel_usecs);
+ }
+ switch (entry->type) {
+ case TRACE_FN: {
+ struct ftrace_entry *field;
+
+ trace_assign_type(field, entry);
+
+ seq_print_ip_sym(s, field->ip, sym_flags);
+ trace_seq_puts(s, " (");
+ seq_print_ip_sym(s, field->parent_ip, sym_flags);
+ trace_seq_puts(s, ")\n");
+ break;
+ }
+ case TRACE_CTX:
+ case TRACE_WAKE: {
+ struct ctx_switch_entry *field;
+
+ trace_assign_type(field, entry);
+
+ T = field->next_state < sizeof(state_to_char) ?
+ state_to_char[field->next_state] : 'X';
+
+ state = field->prev_state ?
+ __ffs(field->prev_state) + 1 : 0;
+ S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
+ comm = trace_find_cmdline(field->next_pid);
+ trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
+ field->prev_pid,
+ field->prev_prio,
+ S, entry->type == TRACE_CTX ? "==>" : " +",
+ field->next_cpu,
+ field->next_pid,
+ field->next_prio,
+ T, comm);
+ break;
+ }
+ case TRACE_SPECIAL: {
+ struct special_entry *field;
+
+ trace_assign_type(field, entry);
+
+ trace_seq_printf(s, "# %ld %ld %ld\n",
+ field->arg1,
+ field->arg2,
+ field->arg3);
+ break;
+ }
+ case TRACE_STACK: {
+ struct stack_entry *field;
+
+ trace_assign_type(field, entry);
+
+ for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+ if (i)
+ trace_seq_puts(s, " <= ");
+ seq_print_ip_sym(s, field->caller[i], sym_flags);
+ }
+ trace_seq_puts(s, "\n");
+ break;
+ }
+ case TRACE_PRINT: {
+ struct print_entry *field;
+
+ trace_assign_type(field, entry);
+
+ seq_print_ip_sym(s, field->ip, sym_flags);
+ trace_seq_printf(s, ": %s", field->buf);
+ if (entry->flags & TRACE_FLAG_CONT)
+ trace_seq_print_cont(s, iter);
+ break;
+ }
+ default:
+ trace_seq_printf(s, "Unknown type %d\n", entry->type);
+ }
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
+ struct trace_entry *entry;
+ unsigned long usec_rem;
+ unsigned long long t;
+ unsigned long secs;
+ char *comm;
+ int ret;
+ int S, T;
+ int i;
+
+ entry = iter->ent;
+
+ if (entry->type == TRACE_CONT)
+ return TRACE_TYPE_HANDLED;
+
+ comm = trace_find_cmdline(iter->ent->pid);
+
+ t = ns2usecs(iter->ts);
+ usec_rem = do_div(t, 1000000ULL);
+ secs = (unsigned long)t;
+
+ ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ switch (entry->type) {
+ case TRACE_FN: {
+ struct ftrace_entry *field;
+
+ trace_assign_type(field, entry);
+
+ ret = seq_print_ip_sym(s, field->ip, sym_flags);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
+ field->parent_ip) {
+ ret = trace_seq_printf(s, " <-");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ ret = seq_print_ip_sym(s,
+ field->parent_ip,
+ sym_flags);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+ ret = trace_seq_printf(s, "\n");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ break;
+ }
+ case TRACE_CTX:
+ case TRACE_WAKE: {
+ struct ctx_switch_entry *field;
+
+ trace_assign_type(field, entry);
+
+ S = field->prev_state < sizeof(state_to_char) ?
+ state_to_char[field->prev_state] : 'X';
+ T = field->next_state < sizeof(state_to_char) ?
+ state_to_char[field->next_state] : 'X';
+ ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
+ field->prev_pid,
+ field->prev_prio,
+ S,
+ entry->type == TRACE_CTX ? "==>" : " +",
+ field->next_cpu,
+ field->next_pid,
+ field->next_prio,
+ T);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ break;
+ }
+ case TRACE_SPECIAL: {
+ struct special_entry *field;
+
+ trace_assign_type(field, entry);
+
+ ret = trace_seq_printf(s, "# %ld %ld %ld\n",
+ field->arg1,
+ field->arg2,
+ field->arg3);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ break;
+ }
+ case TRACE_STACK: {
+ struct stack_entry *field;
+
+ trace_assign_type(field, entry);
+
+ for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+ if (i) {
+ ret = trace_seq_puts(s, " <= ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+ ret = seq_print_ip_sym(s, field->caller[i],
+ sym_flags);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+ ret = trace_seq_puts(s, "\n");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ break;
+ }
+ case TRACE_PRINT: {
+ struct print_entry *field;
+
+ trace_assign_type(field, entry);
+
+ seq_print_ip_sym(s, field->ip, sym_flags);
+ trace_seq_printf(s, ": %s", field->buf);
+ if (entry->flags & TRACE_FLAG_CONT)
+ trace_seq_print_cont(s, iter);
+ break;
+ }
+ }
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *entry;
+ int ret;
+ int S, T;
+
+ entry = iter->ent;
+
+ if (entry->type == TRACE_CONT)
+ return TRACE_TYPE_HANDLED;
+
+ ret = trace_seq_printf(s, "%d %d %llu ",
+ entry->pid, iter->cpu, iter->ts);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ switch (entry->type) {
+ case TRACE_FN: {
+ struct ftrace_entry *field;
+
+ trace_assign_type(field, entry);
+
+ ret = trace_seq_printf(s, "%x %x\n",
+ field->ip,
+ field->parent_ip);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ break;
+ }
+ case TRACE_CTX:
+ case TRACE_WAKE: {
+ struct ctx_switch_entry *field;
+
+ trace_assign_type(field, entry);
+
+ S = field->prev_state < sizeof(state_to_char) ?
+ state_to_char[field->prev_state] : 'X';
+ T = field->next_state < sizeof(state_to_char) ?
+ state_to_char[field->next_state] : 'X';
+ if (entry->type == TRACE_WAKE)
+ S = '+';
+ ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
+ field->prev_pid,
+ field->prev_prio,
+ S,
+ field->next_cpu,
+ field->next_pid,
+ field->next_prio,
+ T);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ break;
+ }
+ case TRACE_SPECIAL:
+ case TRACE_STACK: {
+ struct special_entry *field;
+
+ trace_assign_type(field, entry);
+
+ ret = trace_seq_printf(s, "# %ld %ld %ld\n",
+ field->arg1,
+ field->arg2,
+ field->arg3);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ break;
+ }
+ case TRACE_PRINT: {
+ struct print_entry *field;
+
+ trace_assign_type(field, entry);
+
+ trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
+ if (entry->flags & TRACE_FLAG_CONT)
+ trace_seq_print_cont(s, iter);
+ break;
+ }
+ }
+ return TRACE_TYPE_HANDLED;
+}
+
+#define SEQ_PUT_FIELD_RET(s, x) \
+do { \
+ if (!trace_seq_putmem(s, &(x), sizeof(x))) \
+ return 0; \
+} while (0)
+
+#define SEQ_PUT_HEX_FIELD_RET(s, x) \
+do { \
+ BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
+ if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
+ return 0; \
+} while (0)
+
+static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ unsigned char newline = '\n';
+ struct trace_entry *entry;
+ int S, T;
+
+ entry = iter->ent;
+
+ if (entry->type == TRACE_CONT)
+ return TRACE_TYPE_HANDLED;
+
+ SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
+ SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
+ SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
+
+ switch (entry->type) {
+ case TRACE_FN: {
+ struct ftrace_entry *field;
+
+ trace_assign_type(field, entry);
+
+ SEQ_PUT_HEX_FIELD_RET(s, field->ip);
+ SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
+ break;
+ }
+ case TRACE_CTX:
+ case TRACE_WAKE: {
+ struct ctx_switch_entry *field;
+
+ trace_assign_type(field, entry);
+
+ S = field->prev_state < sizeof(state_to_char) ?
+ state_to_char[field->prev_state] : 'X';
+ T = field->next_state < sizeof(state_to_char) ?
+ state_to_char[field->next_state] : 'X';
+ if (entry->type == TRACE_WAKE)
+ S = '+';
+ SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
+ SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
+ SEQ_PUT_HEX_FIELD_RET(s, S);
+ SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
+ SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
+ SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
+ SEQ_PUT_HEX_FIELD_RET(s, T);
+ break;
+ }
+ case TRACE_SPECIAL:
+ case TRACE_STACK: {
+ struct special_entry *field;
+
+ trace_assign_type(field, entry);
+
+ SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
+ SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
+ SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
+ break;
+ }
+ }
+ SEQ_PUT_FIELD_RET(s, newline);
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *entry;
+
+ entry = iter->ent;
+
+ if (entry->type == TRACE_CONT)
+ return TRACE_TYPE_HANDLED;
+
+ SEQ_PUT_FIELD_RET(s, entry->pid);
+ SEQ_PUT_FIELD_RET(s, entry->cpu);
+ SEQ_PUT_FIELD_RET(s, iter->ts);
+
+ switch (entry->type) {
+ case TRACE_FN: {
+ struct ftrace_entry *field;
+
+ trace_assign_type(field, entry);
+
+ SEQ_PUT_FIELD_RET(s, field->ip);
+ SEQ_PUT_FIELD_RET(s, field->parent_ip);
+ break;
+ }
+ case TRACE_CTX: {
+ struct ctx_switch_entry *field;
+
+ trace_assign_type(field, entry);
+
+ SEQ_PUT_FIELD_RET(s, field->prev_pid);
+ SEQ_PUT_FIELD_RET(s, field->prev_prio);
+ SEQ_PUT_FIELD_RET(s, field->prev_state);
+ SEQ_PUT_FIELD_RET(s, field->next_pid);
+ SEQ_PUT_FIELD_RET(s, field->next_prio);
+ SEQ_PUT_FIELD_RET(s, field->next_state);
+ break;
+ }
+ case TRACE_SPECIAL:
+ case TRACE_STACK: {
+ struct special_entry *field;
+
+ trace_assign_type(field, entry);
+
+ SEQ_PUT_FIELD_RET(s, field->arg1);
+ SEQ_PUT_FIELD_RET(s, field->arg2);
+ SEQ_PUT_FIELD_RET(s, field->arg3);
+ break;
+ }
+ }
+ return 1;
+}
+
+static int trace_empty(struct trace_iterator *iter)
+{
+ int cpu;
+
+ for_each_tracing_cpu(cpu) {
+ if (iter->buffer_iter[cpu]) {
+ if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
+ return 0;
+ } else {
+ if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static enum print_line_t print_trace_line(struct trace_iterator *iter)
+{
+ enum print_line_t ret;
+
+ if (iter->trace && iter->trace->print_line) {
+ ret = iter->trace->print_line(iter);
+ if (ret != TRACE_TYPE_UNHANDLED)
+ return ret;
+ }
+
+ if (trace_flags & TRACE_ITER_BIN)
+ return print_bin_fmt(iter);
+
+ if (trace_flags & TRACE_ITER_HEX)
+ return print_hex_fmt(iter);
+
+ if (trace_flags & TRACE_ITER_RAW)
+ return print_raw_fmt(iter);
+
+ if (iter->iter_flags & TRACE_FILE_LAT_FMT)
+ return print_lat_fmt(iter, iter->idx, iter->cpu);
+
+ return print_trace_fmt(iter);
+}
+
+static int s_show(struct seq_file *m, void *v)
+{
+ struct trace_iterator *iter = v;
+
+ if (iter->ent == NULL) {
+ if (iter->tr) {
+ seq_printf(m, "# tracer: %s\n", iter->trace->name);
+ seq_puts(m, "#\n");
+ }
+ if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
+ /* print nothing if the buffers are empty */
+ if (trace_empty(iter))
+ return 0;
+ print_trace_header(m, iter);
+ if (!(trace_flags & TRACE_ITER_VERBOSE))
+ print_lat_help_header(m);
+ } else {
+ if (!(trace_flags & TRACE_ITER_VERBOSE))
+ print_func_help_header(m);
+ }
+ } else {
+ print_trace_line(iter);
+ trace_print_seq(m, &iter->seq);
+ }
+
+ return 0;
+}
+
+static struct seq_operations tracer_seq_ops = {
+ .start = s_start,
+ .next = s_next,
+ .stop = s_stop,
+ .show = s_show,
+};
+
+static struct trace_iterator *
+__tracing_open(struct inode *inode, struct file *file, int *ret)
+{
+ struct trace_iterator *iter;
+ struct seq_file *m;
+ int cpu;
+
+ if (tracing_disabled) {
+ *ret = -ENODEV;
+ return NULL;
+ }
+
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter) {
+ *ret = -ENOMEM;
+ goto out;
+ }
+
+ mutex_lock(&trace_types_lock);
+ if (current_trace && current_trace->print_max)
+ iter->tr = &max_tr;
+ else
+ iter->tr = inode->i_private;
+ iter->trace = current_trace;
+ iter->pos = -1;
+
+ for_each_tracing_cpu(cpu) {
+
+ iter->buffer_iter[cpu] =
+ ring_buffer_read_start(iter->tr->buffer, cpu);
+
+ if (!iter->buffer_iter[cpu])
+ goto fail_buffer;
+ }
+
+ /* TODO stop tracer */
+ *ret = seq_open(file, &tracer_seq_ops);
+ if (*ret)
+ goto fail_buffer;
+
+ m = file->private_data;
+ m->private = iter;
+
+ /* stop the trace while dumping */
+ if (iter->tr->ctrl) {
+ tracer_enabled = 0;
+ ftrace_function_enabled = 0;
+ }
+
+ if (iter->trace && iter->trace->open)
+ iter->trace->open(iter);
+
+ mutex_unlock(&trace_types_lock);
+
+ out:
+ return iter;
+
+ fail_buffer:
+ for_each_tracing_cpu(cpu) {
+ if (iter->buffer_iter[cpu])
+ ring_buffer_read_finish(iter->buffer_iter[cpu]);
+ }
+ mutex_unlock(&trace_types_lock);
+ kfree(iter);
+
+ return ERR_PTR(-ENOMEM);
+}
+
+int tracing_open_generic(struct inode *inode, struct file *filp)
+{
+ if (tracing_disabled)
+ return -ENODEV;
+
+ filp->private_data = inode->i_private;
+ return 0;
+}
+
+int tracing_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *m = (struct seq_file *)file->private_data;
+ struct trace_iterator *iter = m->private;
+ int cpu;
+
+ mutex_lock(&trace_types_lock);
+ for_each_tracing_cpu(cpu) {
+ if (iter->buffer_iter[cpu])
+ ring_buffer_read_finish(iter->buffer_iter[cpu]);
+ }
+
+ if (iter->trace && iter->trace->close)
+ iter->trace->close(iter);
+
+ /* reenable tracing if it was previously enabled */
+ if (iter->tr->ctrl) {
+ tracer_enabled = 1;
+ /*
+ * It is safe to enable function tracing even if it
+ * isn't used
+ */
+ ftrace_function_enabled = 1;
+ }
+ mutex_unlock(&trace_types_lock);
+
+ seq_release(inode, file);
+ kfree(iter);
+ return 0;
+}
+
+static int tracing_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ __tracing_open(inode, file, &ret);
+
+ return ret;
+}
+
+static int tracing_lt_open(struct inode *inode, struct file *file)
+{
+ struct trace_iterator *iter;
+ int ret;
+
+ iter = __tracing_open(inode, file, &ret);
+
+ if (!ret)
+ iter->iter_flags |= TRACE_FILE_LAT_FMT;
+
+ return ret;
+}
+
+
+static void *
+t_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct tracer *t = m->private;
+
+ (*pos)++;
+
+ if (t)
+ t = t->next;
+
+ m->private = t;
+
+ return t;
+}
+
+static void *t_start(struct seq_file *m, loff_t *pos)
+{
+ struct tracer *t = m->private;
+ loff_t l = 0;
+
+ mutex_lock(&trace_types_lock);
+ for (; t && l < *pos; t = t_next(m, t, &l))
+ ;
+
+ return t;
+}
+
+static void t_stop(struct seq_file *m, void *p)
+{
+ mutex_unlock(&trace_types_lock);
+}
+
+static int t_show(struct seq_file *m, void *v)
+{
+ struct tracer *t = v;
+
+ if (!t)
+ return 0;
+
+ seq_printf(m, "%s", t->name);
+ if (t->next)
+ seq_putc(m, ' ');
+ else
+ seq_putc(m, '\n');
+
+ return 0;
+}
+
+static struct seq_operations show_traces_seq_ops = {
+ .start = t_start,
+ .next = t_next,
+ .stop = t_stop,
+ .show = t_show,
+};
+
+static int show_traces_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ if (tracing_disabled)
+ return -ENODEV;
+
+ ret = seq_open(file, &show_traces_seq_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = trace_types;
+ }
+
+ return ret;
+}
+
+static struct file_operations tracing_fops = {
+ .open = tracing_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = tracing_release,
+};
+
+static struct file_operations tracing_lt_fops = {
+ .open = tracing_lt_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = tracing_release,
+};
+
+static struct file_operations show_traces_fops = {
+ .open = show_traces_open,
+ .read = seq_read,
+ .release = seq_release,
+};
+
+/*
+ * Only trace on a CPU if the bitmask is set:
+ */
+static cpumask_t tracing_cpumask = CPU_MASK_ALL;
+
+/*
+ * When tracing/tracing_cpu_mask is modified then this holds
+ * the new bitmask we are about to install:
+ */
+static cpumask_t tracing_cpumask_new;
+
+/*
+ * The tracer itself will not take this lock, but still we want
+ * to provide a consistent cpumask to user-space:
+ */
+static DEFINE_MUTEX(tracing_cpumask_update_lock);
+
+/*
+ * Temporary storage for the character representation of the
+ * CPU bitmask (and one more byte for the newline):
+ */
+static char mask_str[NR_CPUS + 1];
+
+static ssize_t
+tracing_cpumask_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ int len;
+
+ mutex_lock(&tracing_cpumask_update_lock);
+
+ len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
+ if (count - len < 2) {
+ count = -EINVAL;
+ goto out_err;
+ }
+ len += sprintf(mask_str + len, "\n");
+ count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
+
+out_err:
+ mutex_unlock(&tracing_cpumask_update_lock);
+
+ return count;
+}
+
+static ssize_t
+tracing_cpumask_write(struct file *filp, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ int err, cpu;
+
+ mutex_lock(&tracing_cpumask_update_lock);
+ err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
+ if (err)
+ goto err_unlock;
+
+ raw_local_irq_disable();
+ __raw_spin_lock(&ftrace_max_lock);
+ for_each_tracing_cpu(cpu) {
+ /*
+ * Increase/decrease the disabled counter if we are
+ * about to flip a bit in the cpumask:
+ */
+ if (cpu_isset(cpu, tracing_cpumask) &&
+ !cpu_isset(cpu, tracing_cpumask_new)) {
+ atomic_inc(&global_trace.data[cpu]->disabled);
+ }
+ if (!cpu_isset(cpu, tracing_cpumask) &&
+ cpu_isset(cpu, tracing_cpumask_new)) {
+ atomic_dec(&global_trace.data[cpu]->disabled);
+ }
+ }
+ __raw_spin_unlock(&ftrace_max_lock);
+ raw_local_irq_enable();
+
+ tracing_cpumask = tracing_cpumask_new;
+
+ mutex_unlock(&tracing_cpumask_update_lock);
+
+ return count;
+
+err_unlock:
+ mutex_unlock(&tracing_cpumask_update_lock);
+
+ return err;
+}
+
+static struct file_operations tracing_cpumask_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_cpumask_read,
+ .write = tracing_cpumask_write,
+};
+
+static ssize_t
+tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char *buf;
+ int r = 0;
+ int len = 0;
+ int i;
+
+ /* calulate max size */
+ for (i = 0; trace_options[i]; i++) {
+ len += strlen(trace_options[i]);
+ len += 3; /* "no" and space */
+ }
+
+ /* +2 for \n and \0 */
+ buf = kmalloc(len + 2, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ for (i = 0; trace_options[i]; i++) {
+ if (trace_flags & (1 << i))
+ r += sprintf(buf + r, "%s ", trace_options[i]);
+ else
+ r += sprintf(buf + r, "no%s ", trace_options[i]);
+ }
+
+ r += sprintf(buf + r, "\n");
+ WARN_ON(r >= len + 2);
+
+ r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+
+ kfree(buf);
+
+ return r;
+}
+
+static ssize_t
+tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[64];
+ char *cmp = buf;
+ int neg = 0;
+ int i;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ if (strncmp(buf, "no", 2) == 0) {
+ neg = 1;
+ cmp += 2;
+ }
+
+ for (i = 0; trace_options[i]; i++) {
+ int len = strlen(trace_options[i]);
+
+ if (strncmp(cmp, trace_options[i], len) == 0) {
+ if (neg)
+ trace_flags &= ~(1 << i);
+ else
+ trace_flags |= (1 << i);
+ break;
+ }
+ }
+ /*
+ * If no option could be set, return an error:
+ */
+ if (!trace_options[i])
+ return -EINVAL;
+
+ filp->f_pos += cnt;
+
+ return cnt;
+}
+
+static struct file_operations tracing_iter_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_iter_ctrl_read,
+ .write = tracing_iter_ctrl_write,
+};
+
+static const char readme_msg[] =
+ "tracing mini-HOWTO:\n\n"
+ "# mkdir /debug\n"
+ "# mount -t debugfs nodev /debug\n\n"
+ "# cat /debug/tracing/available_tracers\n"
+ "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
+ "# cat /debug/tracing/current_tracer\n"
+ "none\n"
+ "# echo sched_switch > /debug/tracing/current_tracer\n"
+ "# cat /debug/tracing/current_tracer\n"
+ "sched_switch\n"
+ "# cat /debug/tracing/iter_ctrl\n"
+ "noprint-parent nosym-offset nosym-addr noverbose\n"
+ "# echo print-parent > /debug/tracing/iter_ctrl\n"
+ "# echo 1 > /debug/tracing/tracing_enabled\n"
+ "# cat /debug/tracing/trace > /tmp/trace.txt\n"
+ "echo 0 > /debug/tracing/tracing_enabled\n"
+;
+
+static ssize_t
+tracing_readme_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ return simple_read_from_buffer(ubuf, cnt, ppos,
+ readme_msg, strlen(readme_msg));
+}
+
+static struct file_operations tracing_readme_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_readme_read,
+};
+
+static ssize_t
+tracing_ctrl_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_array *tr = filp->private_data;
+ char buf[64];
+ int r;
+
+ r = sprintf(buf, "%ld\n", tr->ctrl);
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+tracing_ctrl_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_array *tr = filp->private_data;
+ char buf[64];
+ long val;
+ int ret;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ ret = strict_strtoul(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ val = !!val;
+
+ mutex_lock(&trace_types_lock);
+ if (tr->ctrl ^ val) {
+ if (val)
+ tracer_enabled = 1;
+ else
+ tracer_enabled = 0;
+
+ tr->ctrl = val;
+
+ if (current_trace && current_trace->ctrl_update)
+ current_trace->ctrl_update(tr);
+ }
+ mutex_unlock(&trace_types_lock);
+
+ filp->f_pos += cnt;
+
+ return cnt;
+}
+
+static ssize_t
+tracing_set_trace_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[max_tracer_type_len+2];
+ int r;
+
+ mutex_lock(&trace_types_lock);
+ if (current_trace)
+ r = sprintf(buf, "%s\n", current_trace->name);
+ else
+ r = sprintf(buf, "\n");
+ mutex_unlock(&trace_types_lock);
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+tracing_set_trace_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_array *tr = &global_trace;
+ struct tracer *t;
+ char buf[max_tracer_type_len+1];
+ int i;
+ size_t ret;
+
+ ret = cnt;
+
+ if (cnt > max_tracer_type_len)
+ cnt = max_tracer_type_len;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ /* strip ending whitespace. */
+ for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
+ buf[i] = 0;
+
+ mutex_lock(&trace_types_lock);
+ for (t = trace_types; t; t = t->next) {
+ if (strcmp(t->name, buf) == 0)
+ break;
+ }
+ if (!t) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (t == current_trace)
+ goto out;
+
+ if (current_trace && current_trace->reset)
+ current_trace->reset(tr);
+
+ current_trace = t;
+ if (t->init)
+ t->init(tr);
+
+ out:
+ mutex_unlock(&trace_types_lock);
+
+ if (ret > 0)
+ filp->f_pos += ret;
+
+ return ret;
+}
+
+static ssize_t
+tracing_max_lat_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ unsigned long *ptr = filp->private_data;
+ char buf[64];
+ int r;
+
+ r = snprintf(buf, sizeof(buf), "%ld\n",
+ *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
+ if (r > sizeof(buf))
+ r = sizeof(buf);
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+tracing_max_lat_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ long *ptr = filp->private_data;
+ char buf[64];
+ long val;
+ int ret;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ ret = strict_strtoul(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ *ptr = val * 1000;
+
+ return cnt;
+}
+
+static atomic_t tracing_reader;
+
+static int tracing_open_pipe(struct inode *inode, struct file *filp)
+{
+ struct trace_iterator *iter;
+
+ if (tracing_disabled)
+ return -ENODEV;
+
+ /* We only allow for reader of the pipe */
+ if (atomic_inc_return(&tracing_reader) != 1) {
+ atomic_dec(&tracing_reader);
+ return -EBUSY;
+ }
+
+ /* create a buffer to store the information to pass to userspace */
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ mutex_lock(&trace_types_lock);
+ iter->tr = &global_trace;
+ iter->trace = current_trace;
+ filp->private_data = iter;
+
+ if (iter->trace->pipe_open)
+ iter->trace->pipe_open(iter);
+ mutex_unlock(&trace_types_lock);
+
+ return 0;
+}
+
+static int tracing_release_pipe(struct inode *inode, struct file *file)
+{
+ struct trace_iterator *iter = file->private_data;
+
+ kfree(iter);
+ atomic_dec(&tracing_reader);
+
+ return 0;
+}
+
+static unsigned int
+tracing_poll_pipe(struct file *filp, poll_table *poll_table)
+{
+ struct trace_iterator *iter = filp->private_data;
+
+ if (trace_flags & TRACE_ITER_BLOCK) {
+ /*
+ * Always select as readable when in blocking mode
+ */
+ return POLLIN | POLLRDNORM;
+ } else {
+ if (!trace_empty(iter))
+ return POLLIN | POLLRDNORM;
+ poll_wait(filp, &trace_wait, poll_table);
+ if (!trace_empty(iter))
+ return POLLIN | POLLRDNORM;
+
+ return 0;
+ }
+}
+
+/*
+ * Consumer reader.
+ */
+static ssize_t
+tracing_read_pipe(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_iterator *iter = filp->private_data;
+ ssize_t sret;
+
+ /* return any leftover data */
+ sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+ if (sret != -EBUSY)
+ return sret;
+
+ trace_seq_reset(&iter->seq);
+
+ mutex_lock(&trace_types_lock);
+ if (iter->trace->read) {
+ sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
+ if (sret)
+ goto out;
+ }
+
+waitagain:
+ sret = 0;
+ while (trace_empty(iter)) {
+
+ if ((filp->f_flags & O_NONBLOCK)) {
+ sret = -EAGAIN;
+ goto out;
+ }
+
+ /*
+ * This is a make-shift waitqueue. The reason we don't use
+ * an actual wait queue is because:
+ * 1) we only ever have one waiter
+ * 2) the tracing, traces all functions, we don't want
+ * the overhead of calling wake_up and friends
+ * (and tracing them too)
+ * Anyway, this is really very primitive wakeup.
+ */
+ set_current_state(TASK_INTERRUPTIBLE);
+ iter->tr->waiter = current;
+
+ mutex_unlock(&trace_types_lock);
+
+ /* sleep for 100 msecs, and try again. */
+ schedule_timeout(HZ/10);
+
+ mutex_lock(&trace_types_lock);
+
+ iter->tr->waiter = NULL;
+
+ if (signal_pending(current)) {
+ sret = -EINTR;
+ goto out;
+ }
+
+ if (iter->trace != current_trace)
+ goto out;
+
+ /*
+ * We block until we read something and tracing is disabled.
+ * We still block if tracing is disabled, but we have never
+ * read anything. This allows a user to cat this file, and
+ * then enable tracing. But after we have read something,
+ * we give an EOF when tracing is again disabled.
+ *
+ * iter->pos will be 0 if we haven't read anything.
+ */
+ if (!tracer_enabled && iter->pos)
+ break;
+
+ continue;
+ }
+
+ /* stop when tracing is finished */
+ if (trace_empty(iter))
+ goto out;
+
+ if (cnt >= PAGE_SIZE)
+ cnt = PAGE_SIZE - 1;
+
+ /* reset all but tr, trace, and overruns */
+ memset(&iter->seq, 0,
+ sizeof(struct trace_iterator) -
+ offsetof(struct trace_iterator, seq));
+ iter->pos = -1;
+
+ while (find_next_entry_inc(iter) != NULL) {
+ enum print_line_t ret;
+ int len = iter->seq.len;
+
+ ret = print_trace_line(iter);
+ if (ret == TRACE_TYPE_PARTIAL_LINE) {
+ /* don't print partial lines */
+ iter->seq.len = len;
+ break;
+ }
+
+ trace_consume(iter);
+
+ if (iter->seq.len >= cnt)
+ break;
+ }
+
+ /* Now copy what we have to the user */
+ sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+ if (iter->seq.readpos >= iter->seq.len)
+ trace_seq_reset(&iter->seq);
+
+ /*
+ * If there was nothing to send to user, inspite of consuming trace
+ * entries, go back to wait for more entries.
+ */
+ if (sret == -EBUSY)
+ goto waitagain;
+
+out:
+ mutex_unlock(&trace_types_lock);
+
+ return sret;
+}
+
+static ssize_t
+tracing_entries_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_array *tr = filp->private_data;
+ char buf[64];
+ int r;
+
+ r = sprintf(buf, "%lu\n", tr->entries);
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+tracing_entries_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ unsigned long val;
+ char buf[64];
+ int ret, cpu;
+ struct trace_array *tr = filp->private_data;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ ret = strict_strtoul(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ /* must have at least 1 entry */
+ if (!val)
+ return -EINVAL;
+
+ mutex_lock(&trace_types_lock);
+
+ if (tr->ctrl) {
+ cnt = -EBUSY;
+ pr_info("ftrace: please disable tracing"
+ " before modifying buffer size\n");
+ goto out;
+ }
+
+ /* disable all cpu buffers */
+ for_each_tracing_cpu(cpu) {
+ if (global_trace.data[cpu])
+ atomic_inc(&global_trace.data[cpu]->disabled);
+ if (max_tr.data[cpu])
+ atomic_inc(&max_tr.data[cpu]->disabled);
+ }
+
+ if (val != global_trace.entries) {
+ ret = ring_buffer_resize(global_trace.buffer, val);
+ if (ret < 0) {
+ cnt = ret;
+ goto out;
+ }
+
+ ret = ring_buffer_resize(max_tr.buffer, val);
+ if (ret < 0) {
+ int r;
+ cnt = ret;
+ r = ring_buffer_resize(global_trace.buffer,
+ global_trace.entries);
+ if (r < 0) {
+ /* AARGH! We are left with different
+ * size max buffer!!!! */
+ WARN_ON(1);
+ tracing_disabled = 1;
+ }
+ goto out;
+ }
+
+ global_trace.entries = val;
+ }
+
+ filp->f_pos += cnt;
+
+ /* If check pages failed, return ENOMEM */
+ if (tracing_disabled)
+ cnt = -ENOMEM;
+ out:
+ for_each_tracing_cpu(cpu) {
+ if (global_trace.data[cpu])
+ atomic_dec(&global_trace.data[cpu]->disabled);
+ if (max_tr.data[cpu])
+ atomic_dec(&max_tr.data[cpu]->disabled);
+ }
+
+ max_tr.entries = global_trace.entries;
+ mutex_unlock(&trace_types_lock);
+
+ return cnt;
+}
+
+static int mark_printk(const char *fmt, ...)
+{
+ int ret;
+ va_list args;
+ va_start(args, fmt);
+ ret = trace_vprintk(0, fmt, args);
+ va_end(args);
+ return ret;
+}
+
+static ssize_t
+tracing_mark_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *fpos)
+{
+ char *buf;
+ char *end;
+ struct trace_array *tr = &global_trace;
+
+ if (!tr->ctrl || tracing_disabled)
+ return -EINVAL;
+
+ if (cnt > TRACE_BUF_SIZE)
+ cnt = TRACE_BUF_SIZE;
+
+ buf = kmalloc(cnt + 1, GFP_KERNEL);
+ if (buf == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(buf, ubuf, cnt)) {
+ kfree(buf);
+ return -EFAULT;
+ }
+
+ /* Cut from the first nil or newline. */
+ buf[cnt] = '\0';
+ end = strchr(buf, '\n');
+ if (end)
+ *end = '\0';
+
+ cnt = mark_printk("%s\n", buf);
+ kfree(buf);
+ *fpos += cnt;
+
+ return cnt;
+}
+
+static struct file_operations tracing_max_lat_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_max_lat_read,
+ .write = tracing_max_lat_write,
+};
+
+static struct file_operations tracing_ctrl_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_ctrl_read,
+ .write = tracing_ctrl_write,
+};
+
+static struct file_operations set_tracer_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_set_trace_read,
+ .write = tracing_set_trace_write,
+};
+
+static struct file_operations tracing_pipe_fops = {
+ .open = tracing_open_pipe,
+ .poll = tracing_poll_pipe,
+ .read = tracing_read_pipe,
+ .release = tracing_release_pipe,
+};
+
+static struct file_operations tracing_entries_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_entries_read,
+ .write = tracing_entries_write,
+};
+
+static struct file_operations tracing_mark_fops = {
+ .open = tracing_open_generic,
+ .write = tracing_mark_write,
+};
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+static ssize_t
+tracing_read_long(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ unsigned long *p = filp->private_data;
+ char buf[64];
+ int r;
+
+ r = sprintf(buf, "%ld\n", *p);
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static struct file_operations tracing_read_long_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_read_long,
+};
+#endif
+
+static struct dentry *d_tracer;
+
+struct dentry *tracing_init_dentry(void)
+{
+ static int once;
+
+ if (d_tracer)
+ return d_tracer;
+
+ d_tracer = debugfs_create_dir("tracing", NULL);
+
+ if (!d_tracer && !once) {
+ once = 1;
+ pr_warning("Could not create debugfs directory 'tracing'\n");
+ return NULL;
+ }
+
+ return d_tracer;
+}
+
+#ifdef CONFIG_FTRACE_SELFTEST
+/* Let selftest have access to static functions in this file */
+#include "trace_selftest.c"
+#endif
+
+static __init int tracer_init_debugfs(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+
+ entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
+ &global_trace, &tracing_ctrl_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
+
+ entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
+ NULL, &tracing_iter_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
+
+ entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
+ NULL, &tracing_cpumask_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
+
+ entry = debugfs_create_file("latency_trace", 0444, d_tracer,
+ &global_trace, &tracing_lt_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'latency_trace' entry\n");
+
+ entry = debugfs_create_file("trace", 0444, d_tracer,
+ &global_trace, &tracing_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'trace' entry\n");
+
+ entry = debugfs_create_file("available_tracers", 0444, d_tracer,
+ &global_trace, &show_traces_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'available_tracers' entry\n");
+
+ entry = debugfs_create_file("current_tracer", 0444, d_tracer,
+ &global_trace, &set_tracer_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'current_tracer' entry\n");
+
+ entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
+ &tracing_max_latency,
+ &tracing_max_lat_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'tracing_max_latency' entry\n");
+
+ entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
+ &tracing_thresh, &tracing_max_lat_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'tracing_thresh' entry\n");
+ entry = debugfs_create_file("README", 0644, d_tracer,
+ NULL, &tracing_readme_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'README' entry\n");
+
+ entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
+ NULL, &tracing_pipe_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'trace_pipe' entry\n");
+
+ entry = debugfs_create_file("trace_entries", 0644, d_tracer,
+ &global_trace, &tracing_entries_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'trace_entries' entry\n");
+
+ entry = debugfs_create_file("trace_marker", 0220, d_tracer,
+ NULL, &tracing_mark_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'trace_marker' entry\n");
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
+ &ftrace_update_tot_cnt,
+ &tracing_read_long_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'dyn_ftrace_total_info' entry\n");
+#endif
+#ifdef CONFIG_SYSPROF_TRACER
+ init_tracer_sysprof_debugfs(d_tracer);
+#endif
+ return 0;
+}
+
+int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
+{
+ static DEFINE_SPINLOCK(trace_buf_lock);
+ static char trace_buf[TRACE_BUF_SIZE];
+
+ struct ring_buffer_event *event;
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ struct print_entry *entry;
+ unsigned long flags, irq_flags;
+ int cpu, len = 0, size, pc;
+
+ if (!tr->ctrl || tracing_disabled)
+ return 0;
+
+ pc = preempt_count();
+ preempt_disable_notrace();
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+
+ if (unlikely(atomic_read(&data->disabled)))
+ goto out;
+
+ spin_lock_irqsave(&trace_buf_lock, flags);
+ len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
+
+ len = min(len, TRACE_BUF_SIZE-1);
+ trace_buf[len] = 0;
+
+ size = sizeof(*entry) + len + 1;
+ event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
+ if (!event)
+ goto out_unlock;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_PRINT;
+ entry->ip = ip;
+
+ memcpy(&entry->buf, trace_buf, len);
+ entry->buf[len] = 0;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ out_unlock:
+ spin_unlock_irqrestore(&trace_buf_lock, flags);
+
+ out:
+ preempt_enable_notrace();
+
+ return len;
+}
+EXPORT_SYMBOL_GPL(trace_vprintk);
+
+int __ftrace_printk(unsigned long ip, const char *fmt, ...)
+{
+ int ret;
+ va_list ap;
+
+ if (!(trace_flags & TRACE_ITER_PRINTK))
+ return 0;
+
+ va_start(ap, fmt);
+ ret = trace_vprintk(ip, fmt, ap);
+ va_end(ap);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(__ftrace_printk);
+
+static int trace_panic_handler(struct notifier_block *this,
+ unsigned long event, void *unused)
+{
+ ftrace_dump();
+ return NOTIFY_OK;
+}
+
+static struct notifier_block trace_panic_notifier = {
+ .notifier_call = trace_panic_handler,
+ .next = NULL,
+ .priority = 150 /* priority: INT_MAX >= x >= 0 */
+};
+
+static int trace_die_handler(struct notifier_block *self,
+ unsigned long val,
+ void *data)
+{
+ switch (val) {
+ case DIE_OOPS:
+ ftrace_dump();
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block trace_die_notifier = {
+ .notifier_call = trace_die_handler,
+ .priority = 200
+};
+
+/*
+ * printk is set to max of 1024, we really don't need it that big.
+ * Nothing should be printing 1000 characters anyway.
+ */
+#define TRACE_MAX_PRINT 1000
+
+/*
+ * Define here KERN_TRACE so that we have one place to modify
+ * it if we decide to change what log level the ftrace dump
+ * should be at.
+ */
+#define KERN_TRACE KERN_INFO
+
+static void
+trace_printk_seq(struct trace_seq *s)
+{
+ /* Probably should print a warning here. */
+ if (s->len >= 1000)
+ s->len = 1000;
+
+ /* should be zero ended, but we are paranoid. */
+ s->buffer[s->len] = 0;
+
+ printk(KERN_TRACE "%s", s->buffer);
+
+ trace_seq_reset(s);
+}
+
+
+void ftrace_dump(void)
+{
+ static DEFINE_SPINLOCK(ftrace_dump_lock);
+ /* use static because iter can be a bit big for the stack */
+ static struct trace_iterator iter;
+ static cpumask_t mask;
+ static int dump_ran;
+ unsigned long flags;
+ int cnt = 0, cpu;
+
+ /* only one dump */
+ spin_lock_irqsave(&ftrace_dump_lock, flags);
+ if (dump_ran)
+ goto out;
+
+ dump_ran = 1;
+
+ /* No turning back! */
+ ftrace_kill();
+
+ for_each_tracing_cpu(cpu) {
+ atomic_inc(&global_trace.data[cpu]->disabled);
+ }
+
+ printk(KERN_TRACE "Dumping ftrace buffer:\n");
+
+ iter.tr = &global_trace;
+ iter.trace = current_trace;
+
+ /*
+ * We need to stop all tracing on all CPUS to read the
+ * the next buffer. This is a bit expensive, but is
+ * not done often. We fill all what we can read,
+ * and then release the locks again.
+ */
+
+ cpus_clear(mask);
+
+ while (!trace_empty(&iter)) {
+
+ if (!cnt)
+ printk(KERN_TRACE "---------------------------------\n");
+
+ cnt++;
+
+ /* reset all but tr, trace, and overruns */
+ memset(&iter.seq, 0,
+ sizeof(struct trace_iterator) -
+ offsetof(struct trace_iterator, seq));
+ iter.iter_flags |= TRACE_FILE_LAT_FMT;
+ iter.pos = -1;
+
+ if (find_next_entry_inc(&iter) != NULL) {
+ print_trace_line(&iter);
+ trace_consume(&iter);
+ }
+
+ trace_printk_seq(&iter.seq);
+ }
+
+ if (!cnt)
+ printk(KERN_TRACE " (ftrace buffer empty)\n");
+ else
+ printk(KERN_TRACE "---------------------------------\n");
+
+ out:
+ spin_unlock_irqrestore(&ftrace_dump_lock, flags);
+}
+
+__init static int tracer_alloc_buffers(void)
+{
+ struct trace_array_cpu *data;
+ int i;
+
+ /* TODO: make the number of buffers hot pluggable with CPUS */
+ tracing_buffer_mask = cpu_possible_map;
+
+ global_trace.buffer = ring_buffer_alloc(trace_buf_size,
+ TRACE_BUFFER_FLAGS);
+ if (!global_trace.buffer) {
+ printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
+ WARN_ON(1);
+ return 0;
+ }
+ global_trace.entries = ring_buffer_size(global_trace.buffer);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+ max_tr.buffer = ring_buffer_alloc(trace_buf_size,
+ TRACE_BUFFER_FLAGS);
+ if (!max_tr.buffer) {
+ printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
+ WARN_ON(1);
+ ring_buffer_free(global_trace.buffer);
+ return 0;
+ }
+ max_tr.entries = ring_buffer_size(max_tr.buffer);
+ WARN_ON(max_tr.entries != global_trace.entries);
+#endif
+
+ /* Allocate the first page for all buffers */
+ for_each_tracing_cpu(i) {
+ data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
+ max_tr.data[i] = &per_cpu(max_data, i);
+ }
+
+ trace_init_cmdlines();
+
+ register_tracer(&nop_trace);
+#ifdef CONFIG_BOOT_TRACER
+ register_tracer(&boot_tracer);
+ current_trace = &boot_tracer;
+ current_trace->init(&global_trace);
+#else
+ current_trace = &nop_trace;
+#endif
+
+ /* All seems OK, enable tracing */
+ global_trace.ctrl = tracer_enabled;
+ tracing_disabled = 0;
+
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &trace_panic_notifier);
+
+ register_die_notifier(&trace_die_notifier);
+
+ return 0;
+}
+early_initcall(tracer_alloc_buffers);
+fs_initcall(tracer_init_debugfs);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
new file mode 100644
index 0000000..8465ad0
--- /dev/null
+++ b/kernel/trace/trace.h
@@ -0,0 +1,422 @@
+#ifndef _LINUX_KERNEL_TRACE_H
+#define _LINUX_KERNEL_TRACE_H
+
+#include <linux/fs.h>
+#include <asm/atomic.h>
+#include <linux/sched.h>
+#include <linux/clocksource.h>
+#include <linux/ring_buffer.h>
+#include <linux/mmiotrace.h>
+#include <linux/ftrace.h>
+
+enum trace_type {
+ __TRACE_FIRST_TYPE = 0,
+
+ TRACE_FN,
+ TRACE_CTX,
+ TRACE_WAKE,
+ TRACE_CONT,
+ TRACE_STACK,
+ TRACE_PRINT,
+ TRACE_SPECIAL,
+ TRACE_MMIO_RW,
+ TRACE_MMIO_MAP,
+ TRACE_BOOT,
+
+ __TRACE_LAST_TYPE
+};
+
+/*
+ * The trace entry - the most basic unit of tracing. This is what
+ * is printed in the end as a single line in the trace output, such as:
+ *
+ * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
+ */
+struct trace_entry {
+ unsigned char type;
+ unsigned char cpu;
+ unsigned char flags;
+ unsigned char preempt_count;
+ int pid;
+};
+
+/*
+ * Function trace entry - function address and parent function addres:
+ */
+struct ftrace_entry {
+ struct trace_entry ent;
+ unsigned long ip;
+ unsigned long parent_ip;
+};
+extern struct tracer boot_tracer;
+
+/*
+ * Context switch trace entry - which task (and prio) we switched from/to:
+ */
+struct ctx_switch_entry {
+ struct trace_entry ent;
+ unsigned int prev_pid;
+ unsigned char prev_prio;
+ unsigned char prev_state;
+ unsigned int next_pid;
+ unsigned char next_prio;
+ unsigned char next_state;
+ unsigned int next_cpu;
+};
+
+/*
+ * Special (free-form) trace entry:
+ */
+struct special_entry {
+ struct trace_entry ent;
+ unsigned long arg1;
+ unsigned long arg2;
+ unsigned long arg3;
+};
+
+/*
+ * Stack-trace entry:
+ */
+
+#define FTRACE_STACK_ENTRIES 8
+
+struct stack_entry {
+ struct trace_entry ent;
+ unsigned long caller[FTRACE_STACK_ENTRIES];
+};
+
+/*
+ * ftrace_printk entry:
+ */
+struct print_entry {
+ struct trace_entry ent;
+ unsigned long ip;
+ char buf[];
+};
+
+#define TRACE_OLD_SIZE 88
+
+struct trace_field_cont {
+ unsigned char type;
+ /* Temporary till we get rid of this completely */
+ char buf[TRACE_OLD_SIZE - 1];
+};
+
+struct trace_mmiotrace_rw {
+ struct trace_entry ent;
+ struct mmiotrace_rw rw;
+};
+
+struct trace_mmiotrace_map {
+ struct trace_entry ent;
+ struct mmiotrace_map map;
+};
+
+struct trace_boot {
+ struct trace_entry ent;
+ struct boot_trace initcall;
+};
+
+/*
+ * trace_flag_type is an enumeration that holds different
+ * states when a trace occurs. These are:
+ * IRQS_OFF - interrupts were disabled
+ * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
+ * NEED_RESCED - reschedule is requested
+ * HARDIRQ - inside an interrupt handler
+ * SOFTIRQ - inside a softirq handler
+ * CONT - multiple entries hold the trace item
+ */
+enum trace_flag_type {
+ TRACE_FLAG_IRQS_OFF = 0x01,
+ TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
+ TRACE_FLAG_NEED_RESCHED = 0x04,
+ TRACE_FLAG_HARDIRQ = 0x08,
+ TRACE_FLAG_SOFTIRQ = 0x10,
+ TRACE_FLAG_CONT = 0x20,
+};
+
+#define TRACE_BUF_SIZE 1024
+
+/*
+ * The CPU trace array - it consists of thousands of trace entries
+ * plus some other descriptor data: (for example which task started
+ * the trace, etc.)
+ */
+struct trace_array_cpu {
+ atomic_t disabled;
+
+ /* these fields get copied into max-trace: */
+ unsigned long trace_idx;
+ unsigned long overrun;
+ unsigned long saved_latency;
+ unsigned long critical_start;
+ unsigned long critical_end;
+ unsigned long critical_sequence;
+ unsigned long nice;
+ unsigned long policy;
+ unsigned long rt_priority;
+ cycle_t preempt_timestamp;
+ pid_t pid;
+ uid_t uid;
+ char comm[TASK_COMM_LEN];
+};
+
+struct trace_iterator;
+
+/*
+ * The trace array - an array of per-CPU trace arrays. This is the
+ * highest level data structure that individual tracers deal with.
+ * They have on/off state as well:
+ */
+struct trace_array {
+ struct ring_buffer *buffer;
+ unsigned long entries;
+ long ctrl;
+ int cpu;
+ cycle_t time_start;
+ struct task_struct *waiter;
+ struct trace_array_cpu *data[NR_CPUS];
+};
+
+#define FTRACE_CMP_TYPE(var, type) \
+ __builtin_types_compatible_p(typeof(var), type *)
+
+#undef IF_ASSIGN
+#define IF_ASSIGN(var, entry, etype, id) \
+ if (FTRACE_CMP_TYPE(var, etype)) { \
+ var = (typeof(var))(entry); \
+ WARN_ON(id && (entry)->type != id); \
+ break; \
+ }
+
+/* Will cause compile errors if type is not found. */
+extern void __ftrace_bad_type(void);
+
+/*
+ * The trace_assign_type is a verifier that the entry type is
+ * the same as the type being assigned. To add new types simply
+ * add a line with the following format:
+ *
+ * IF_ASSIGN(var, ent, type, id);
+ *
+ * Where "type" is the trace type that includes the trace_entry
+ * as the "ent" item. And "id" is the trace identifier that is
+ * used in the trace_type enum.
+ *
+ * If the type can have more than one id, then use zero.
+ */
+#define trace_assign_type(var, ent) \
+ do { \
+ IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \
+ IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
+ IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
+ IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
+ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
+ IF_ASSIGN(var, ent, struct special_entry, 0); \
+ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
+ TRACE_MMIO_RW); \
+ IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
+ TRACE_MMIO_MAP); \
+ IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \
+ __ftrace_bad_type(); \
+ } while (0)
+
+/* Return values for print_line callback */
+enum print_line_t {
+ TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
+ TRACE_TYPE_HANDLED = 1,
+ TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */
+};
+
+/*
+ * A specific tracer, represented by methods that operate on a trace array:
+ */
+struct tracer {
+ const char *name;
+ void (*init)(struct trace_array *tr);
+ void (*reset)(struct trace_array *tr);
+ void (*open)(struct trace_iterator *iter);
+ void (*pipe_open)(struct trace_iterator *iter);
+ void (*close)(struct trace_iterator *iter);
+ void (*start)(struct trace_iterator *iter);
+ void (*stop)(struct trace_iterator *iter);
+ ssize_t (*read)(struct trace_iterator *iter,
+ struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos);
+ void (*ctrl_update)(struct trace_array *tr);
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+ int (*selftest)(struct tracer *trace,
+ struct trace_array *tr);
+#endif
+ enum print_line_t (*print_line)(struct trace_iterator *iter);
+ struct tracer *next;
+ int print_max;
+};
+
+struct trace_seq {
+ unsigned char buffer[PAGE_SIZE];
+ unsigned int len;
+ unsigned int readpos;
+};
+
+/*
+ * Trace iterator - used by printout routines who present trace
+ * results to users and which routines might sleep, etc:
+ */
+struct trace_iterator {
+ struct trace_array *tr;
+ struct tracer *trace;
+ void *private;
+ struct ring_buffer_iter *buffer_iter[NR_CPUS];
+
+ /* The below is zeroed out in pipe_read */
+ struct trace_seq seq;
+ struct trace_entry *ent;
+ int cpu;
+ u64 ts;
+
+ unsigned long iter_flags;
+ loff_t pos;
+ long idx;
+};
+
+void trace_wake_up(void);
+void tracing_reset(struct trace_array *tr, int cpu);
+int tracing_open_generic(struct inode *inode, struct file *filp);
+struct dentry *tracing_init_dentry(void);
+void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
+
+struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
+ struct trace_array_cpu *data);
+void tracing_generic_entry_update(struct trace_entry *entry,
+ unsigned long flags,
+ int pc);
+
+void ftrace(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long ip,
+ unsigned long parent_ip,
+ unsigned long flags, int pc);
+void tracing_sched_switch_trace(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct task_struct *prev,
+ struct task_struct *next,
+ unsigned long flags, int pc);
+void tracing_record_cmdline(struct task_struct *tsk);
+
+void tracing_sched_wakeup_trace(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct task_struct *wakee,
+ struct task_struct *cur,
+ unsigned long flags, int pc);
+void trace_special(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long arg1,
+ unsigned long arg2,
+ unsigned long arg3, int pc);
+void trace_function(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long ip,
+ unsigned long parent_ip,
+ unsigned long flags, int pc);
+
+void tracing_start_cmdline_record(void);
+void tracing_stop_cmdline_record(void);
+int register_tracer(struct tracer *type);
+void unregister_tracer(struct tracer *type);
+
+extern unsigned long nsecs_to_usecs(unsigned long nsecs);
+
+extern unsigned long tracing_max_latency;
+extern unsigned long tracing_thresh;
+
+void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
+void update_max_tr_single(struct trace_array *tr,
+ struct task_struct *tsk, int cpu);
+
+extern cycle_t ftrace_now(int cpu);
+
+#ifdef CONFIG_FUNCTION_TRACER
+void tracing_start_function_trace(void);
+void tracing_stop_function_trace(void);
+#else
+# define tracing_start_function_trace() do { } while (0)
+# define tracing_stop_function_trace() do { } while (0)
+#endif
+
+#ifdef CONFIG_CONTEXT_SWITCH_TRACER
+typedef void
+(*tracer_switch_func_t)(void *private,
+ void *__rq,
+ struct task_struct *prev,
+ struct task_struct *next);
+
+struct tracer_switch_ops {
+ tracer_switch_func_t func;
+ void *private;
+ struct tracer_switch_ops *next;
+};
+
+#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern unsigned long ftrace_update_tot_cnt;
+#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
+extern int DYN_FTRACE_TEST_NAME(void);
+#endif
+
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+extern int trace_selftest_startup_function(struct tracer *trace,
+ struct trace_array *tr);
+extern int trace_selftest_startup_irqsoff(struct tracer *trace,
+ struct trace_array *tr);
+extern int trace_selftest_startup_preemptoff(struct tracer *trace,
+ struct trace_array *tr);
+extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
+ struct trace_array *tr);
+extern int trace_selftest_startup_wakeup(struct tracer *trace,
+ struct trace_array *tr);
+extern int trace_selftest_startup_nop(struct tracer *trace,
+ struct trace_array *tr);
+extern int trace_selftest_startup_sched_switch(struct tracer *trace,
+ struct trace_array *tr);
+extern int trace_selftest_startup_sysprof(struct tracer *trace,
+ struct trace_array *tr);
+#endif /* CONFIG_FTRACE_STARTUP_TEST */
+
+extern void *head_page(struct trace_array_cpu *data);
+extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
+extern void trace_seq_print_cont(struct trace_seq *s,
+ struct trace_iterator *iter);
+extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+ size_t cnt);
+extern long ns2usecs(cycle_t nsec);
+extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args);
+
+extern unsigned long trace_flags;
+
+/*
+ * trace_iterator_flags is an enumeration that defines bit
+ * positions into trace_flags that controls the output.
+ *
+ * NOTE: These bits must match the trace_options array in
+ * trace.c.
+ */
+enum trace_iterator_flags {
+ TRACE_ITER_PRINT_PARENT = 0x01,
+ TRACE_ITER_SYM_OFFSET = 0x02,
+ TRACE_ITER_SYM_ADDR = 0x04,
+ TRACE_ITER_VERBOSE = 0x08,
+ TRACE_ITER_RAW = 0x10,
+ TRACE_ITER_HEX = 0x20,
+ TRACE_ITER_BIN = 0x40,
+ TRACE_ITER_BLOCK = 0x80,
+ TRACE_ITER_STACKTRACE = 0x100,
+ TRACE_ITER_SCHED_TREE = 0x200,
+ TRACE_ITER_PRINTK = 0x400,
+};
+
+extern struct tracer nop_trace;
+
+#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
new file mode 100644
index 0000000..d0a5e50
--- /dev/null
+++ b/kernel/trace/trace_boot.c
@@ -0,0 +1,126 @@
+/*
+ * ring buffer based initcalls tracer
+ *
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/kallsyms.h>
+
+#include "trace.h"
+
+static struct trace_array *boot_trace;
+static int trace_boot_enabled;
+
+
+/* Should be started after do_pre_smp_initcalls() in init/main.c */
+void start_boot_trace(void)
+{
+ trace_boot_enabled = 1;
+}
+
+void stop_boot_trace(void)
+{
+ trace_boot_enabled = 0;
+}
+
+void reset_boot_trace(struct trace_array *tr)
+{
+ stop_boot_trace();
+}
+
+static void boot_trace_init(struct trace_array *tr)
+{
+ int cpu;
+ boot_trace = tr;
+
+ trace_boot_enabled = 0;
+
+ for_each_cpu_mask(cpu, cpu_possible_map)
+ tracing_reset(tr, cpu);
+}
+
+static void boot_trace_ctrl_update(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ start_boot_trace();
+ else
+ stop_boot_trace();
+}
+
+static enum print_line_t initcall_print_line(struct trace_iterator *iter)
+{
+ int ret;
+ struct trace_entry *entry = iter->ent;
+ struct trace_boot *field = (struct trace_boot *)entry;
+ struct boot_trace *it = &field->initcall;
+ struct trace_seq *s = &iter->seq;
+ struct timespec calltime = ktime_to_timespec(it->calltime);
+ struct timespec rettime = ktime_to_timespec(it->rettime);
+
+ if (entry->type == TRACE_BOOT) {
+ ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
+ calltime.tv_sec,
+ calltime.tv_nsec,
+ it->func, it->caller);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
+ "returned %d after %lld msecs\n",
+ rettime.tv_sec,
+ rettime.tv_nsec,
+ it->func, it->result, it->duration);
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ return TRACE_TYPE_HANDLED;
+ }
+ return TRACE_TYPE_UNHANDLED;
+}
+
+struct tracer boot_tracer __read_mostly =
+{
+ .name = "initcall",
+ .init = boot_trace_init,
+ .reset = reset_boot_trace,
+ .ctrl_update = boot_trace_ctrl_update,
+ .print_line = initcall_print_line,
+};
+
+void trace_boot(struct boot_trace *it, initcall_t fn)
+{
+ struct ring_buffer_event *event;
+ struct trace_boot *entry;
+ struct trace_array_cpu *data;
+ unsigned long irq_flags;
+ struct trace_array *tr = boot_trace;
+
+ if (!trace_boot_enabled)
+ return;
+
+ /* Get its name now since this function could
+ * disappear because it is in the .init section.
+ */
+ sprint_symbol(it->func, (unsigned long)fn);
+ preempt_disable();
+ data = tr->data[smp_processor_id()];
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ goto out;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, 0);
+ entry->ent.type = TRACE_BOOT;
+ entry->initcall = *it;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ trace_wake_up();
+
+ out:
+ preempt_enable();
+}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
new file mode 100644
index 0000000..0f85a64
--- /dev/null
+++ b/kernel/trace/trace_functions.c
@@ -0,0 +1,81 @@
+/*
+ * ring buffer based function tracer
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
+ *
+ * Based on code from the latency_tracer, that is:
+ *
+ * Copyright (C) 2004-2006 Ingo Molnar
+ * Copyright (C) 2004 William Lee Irwin III
+ */
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <linux/fs.h>
+
+#include "trace.h"
+
+static void function_reset(struct trace_array *tr)
+{
+ int cpu;
+
+ tr->time_start = ftrace_now(tr->cpu);
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+}
+
+static void start_function_trace(struct trace_array *tr)
+{
+ tr->cpu = get_cpu();
+ function_reset(tr);
+ put_cpu();
+
+ tracing_start_cmdline_record();
+ tracing_start_function_trace();
+}
+
+static void stop_function_trace(struct trace_array *tr)
+{
+ tracing_stop_function_trace();
+ tracing_stop_cmdline_record();
+}
+
+static void function_trace_init(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ start_function_trace(tr);
+}
+
+static void function_trace_reset(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ stop_function_trace(tr);
+}
+
+static void function_trace_ctrl_update(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ start_function_trace(tr);
+ else
+ stop_function_trace(tr);
+}
+
+static struct tracer function_trace __read_mostly =
+{
+ .name = "function",
+ .init = function_trace_init,
+ .reset = function_trace_reset,
+ .ctrl_update = function_trace_ctrl_update,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_function,
+#endif
+};
+
+static __init int init_function_trace(void)
+{
+ return register_tracer(&function_trace);
+}
+
+device_initcall(init_function_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
new file mode 100644
index 0000000..9c74071
--- /dev/null
+++ b/kernel/trace/trace_irqsoff.c
@@ -0,0 +1,493 @@
+/*
+ * trace irqs off criticall timings
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
+ *
+ * From code in the latency_tracer, that is:
+ *
+ * Copyright (C) 2004-2006 Ingo Molnar
+ * Copyright (C) 2004 William Lee Irwin III
+ */
+#include <linux/kallsyms.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/fs.h>
+
+#include "trace.h"
+
+static struct trace_array *irqsoff_trace __read_mostly;
+static int tracer_enabled __read_mostly;
+
+static DEFINE_PER_CPU(int, tracing_cpu);
+
+static DEFINE_SPINLOCK(max_trace_lock);
+
+enum {
+ TRACER_IRQS_OFF = (1 << 1),
+ TRACER_PREEMPT_OFF = (1 << 2),
+};
+
+static int trace_type __read_mostly;
+
+#ifdef CONFIG_PREEMPT_TRACER
+static inline int
+preempt_trace(void)
+{
+ return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count());
+}
+#else
+# define preempt_trace() (0)
+#endif
+
+#ifdef CONFIG_IRQSOFF_TRACER
+static inline int
+irq_trace(void)
+{
+ return ((trace_type & TRACER_IRQS_OFF) &&
+ irqs_disabled());
+}
+#else
+# define irq_trace() (0)
+#endif
+
+/*
+ * Sequence count - we record it when starting a measurement and
+ * skip the latency if the sequence has changed - some other section
+ * did a maximum and could disturb our measurement with serial console
+ * printouts, etc. Truly coinciding maximum latencies should be rare
+ * and what happens together happens separately as well, so this doesnt
+ * decrease the validity of the maximum found:
+ */
+static __cacheline_aligned_in_smp unsigned long max_sequence;
+
+#ifdef CONFIG_FUNCTION_TRACER
+/*
+ * irqsoff uses its own tracer function to keep the overhead down:
+ */
+static void
+irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
+{
+ struct trace_array *tr = irqsoff_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+
+ /*
+ * Does not matter if we preempt. We test the flags
+ * afterward, to see if irqs are disabled or not.
+ * If we preempt and get a false positive, the flags
+ * test will fail.
+ */
+ cpu = raw_smp_processor_id();
+ if (likely(!per_cpu(tracing_cpu, cpu)))
+ return;
+
+ local_save_flags(flags);
+ /* slight chance to get a false positive on tracing_cpu */
+ if (!irqs_disabled_flags(flags))
+ return;
+
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1))
+ trace_function(tr, data, ip, parent_ip, flags, preempt_count());
+
+ atomic_dec(&data->disabled);
+}
+
+static struct ftrace_ops trace_ops __read_mostly =
+{
+ .func = irqsoff_tracer_call,
+};
+#endif /* CONFIG_FUNCTION_TRACER */
+
+/*
+ * Should this new latency be reported/recorded?
+ */
+static int report_latency(cycle_t delta)
+{
+ if (tracing_thresh) {
+ if (delta < tracing_thresh)
+ return 0;
+ } else {
+ if (delta <= tracing_max_latency)
+ return 0;
+ }
+ return 1;
+}
+
+static void
+check_critical_timing(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long parent_ip,
+ int cpu)
+{
+ unsigned long latency, t0, t1;
+ cycle_t T0, T1, delta;
+ unsigned long flags;
+ int pc;
+
+ /*
+ * usecs conversion is slow so we try to delay the conversion
+ * as long as possible:
+ */
+ T0 = data->preempt_timestamp;
+ T1 = ftrace_now(cpu);
+ delta = T1-T0;
+
+ local_save_flags(flags);
+
+ pc = preempt_count();
+
+ if (!report_latency(delta))
+ goto out;
+
+ spin_lock_irqsave(&max_trace_lock, flags);
+
+ /* check if we are still the max latency */
+ if (!report_latency(delta))
+ goto out_unlock;
+
+ trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
+
+ latency = nsecs_to_usecs(delta);
+
+ if (data->critical_sequence != max_sequence)
+ goto out_unlock;
+
+ tracing_max_latency = delta;
+ t0 = nsecs_to_usecs(T0);
+ t1 = nsecs_to_usecs(T1);
+
+ data->critical_end = parent_ip;
+
+ update_max_tr_single(tr, current, cpu);
+
+ max_sequence++;
+
+out_unlock:
+ spin_unlock_irqrestore(&max_trace_lock, flags);
+
+out:
+ data->critical_sequence = max_sequence;
+ data->preempt_timestamp = ftrace_now(cpu);
+ tracing_reset(tr, cpu);
+ trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
+}
+
+static inline void
+start_critical_timing(unsigned long ip, unsigned long parent_ip)
+{
+ int cpu;
+ struct trace_array *tr = irqsoff_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+
+ if (likely(!tracer_enabled))
+ return;
+
+ cpu = raw_smp_processor_id();
+
+ if (per_cpu(tracing_cpu, cpu))
+ return;
+
+ data = tr->data[cpu];
+
+ if (unlikely(!data) || atomic_read(&data->disabled))
+ return;
+
+ atomic_inc(&data->disabled);
+
+ data->critical_sequence = max_sequence;
+ data->preempt_timestamp = ftrace_now(cpu);
+ data->critical_start = parent_ip ? : ip;
+ tracing_reset(tr, cpu);
+
+ local_save_flags(flags);
+
+ trace_function(tr, data, ip, parent_ip, flags, preempt_count());
+
+ per_cpu(tracing_cpu, cpu) = 1;
+
+ atomic_dec(&data->disabled);
+}
+
+static inline void
+stop_critical_timing(unsigned long ip, unsigned long parent_ip)
+{
+ int cpu;
+ struct trace_array *tr = irqsoff_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+
+ cpu = raw_smp_processor_id();
+ /* Always clear the tracing cpu on stopping the trace */
+ if (unlikely(per_cpu(tracing_cpu, cpu)))
+ per_cpu(tracing_cpu, cpu) = 0;
+ else
+ return;
+
+ if (!tracer_enabled)
+ return;
+
+ data = tr->data[cpu];
+
+ if (unlikely(!data) ||
+ !data->critical_start || atomic_read(&data->disabled))
+ return;
+
+ atomic_inc(&data->disabled);
+
+ local_save_flags(flags);
+ trace_function(tr, data, ip, parent_ip, flags, preempt_count());
+ check_critical_timing(tr, data, parent_ip ? : ip, cpu);
+ data->critical_start = 0;
+ atomic_dec(&data->disabled);
+}
+
+/* start and stop critical timings used to for stoppage (in idle) */
+void start_critical_timings(void)
+{
+ if (preempt_trace() || irq_trace())
+ start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+}
+EXPORT_SYMBOL_GPL(start_critical_timings);
+
+void stop_critical_timings(void)
+{
+ if (preempt_trace() || irq_trace())
+ stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+}
+EXPORT_SYMBOL_GPL(stop_critical_timings);
+
+#ifdef CONFIG_IRQSOFF_TRACER
+#ifdef CONFIG_PROVE_LOCKING
+void time_hardirqs_on(unsigned long a0, unsigned long a1)
+{
+ if (!preempt_trace() && irq_trace())
+ stop_critical_timing(a0, a1);
+}
+
+void time_hardirqs_off(unsigned long a0, unsigned long a1)
+{
+ if (!preempt_trace() && irq_trace())
+ start_critical_timing(a0, a1);
+}
+
+#else /* !CONFIG_PROVE_LOCKING */
+
+/*
+ * Stubs:
+ */
+
+void early_boot_irqs_off(void)
+{
+}
+
+void early_boot_irqs_on(void)
+{
+}
+
+void trace_softirqs_on(unsigned long ip)
+{
+}
+
+void trace_softirqs_off(unsigned long ip)
+{
+}
+
+inline void print_irqtrace_events(struct task_struct *curr)
+{
+}
+
+/*
+ * We are only interested in hardirq on/off events:
+ */
+void trace_hardirqs_on(void)
+{
+ if (!preempt_trace() && irq_trace())
+ stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+}
+EXPORT_SYMBOL(trace_hardirqs_on);
+
+void trace_hardirqs_off(void)
+{
+ if (!preempt_trace() && irq_trace())
+ start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+}
+EXPORT_SYMBOL(trace_hardirqs_off);
+
+void trace_hardirqs_on_caller(unsigned long caller_addr)
+{
+ if (!preempt_trace() && irq_trace())
+ stop_critical_timing(CALLER_ADDR0, caller_addr);
+}
+EXPORT_SYMBOL(trace_hardirqs_on_caller);
+
+void trace_hardirqs_off_caller(unsigned long caller_addr)
+{
+ if (!preempt_trace() && irq_trace())
+ start_critical_timing(CALLER_ADDR0, caller_addr);
+}
+EXPORT_SYMBOL(trace_hardirqs_off_caller);
+
+#endif /* CONFIG_PROVE_LOCKING */
+#endif /* CONFIG_IRQSOFF_TRACER */
+
+#ifdef CONFIG_PREEMPT_TRACER
+void trace_preempt_on(unsigned long a0, unsigned long a1)
+{
+ if (preempt_trace())
+ stop_critical_timing(a0, a1);
+}
+
+void trace_preempt_off(unsigned long a0, unsigned long a1)
+{
+ if (preempt_trace())
+ start_critical_timing(a0, a1);
+}
+#endif /* CONFIG_PREEMPT_TRACER */
+
+static void start_irqsoff_tracer(struct trace_array *tr)
+{
+ register_ftrace_function(&trace_ops);
+ tracer_enabled = 1;
+}
+
+static void stop_irqsoff_tracer(struct trace_array *tr)
+{
+ tracer_enabled = 0;
+ unregister_ftrace_function(&trace_ops);
+}
+
+static void __irqsoff_tracer_init(struct trace_array *tr)
+{
+ irqsoff_trace = tr;
+ /* make sure that the tracer is visible */
+ smp_wmb();
+
+ if (tr->ctrl)
+ start_irqsoff_tracer(tr);
+}
+
+static void irqsoff_tracer_reset(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ stop_irqsoff_tracer(tr);
+}
+
+static void irqsoff_tracer_ctrl_update(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ start_irqsoff_tracer(tr);
+ else
+ stop_irqsoff_tracer(tr);
+}
+
+static void irqsoff_tracer_open(struct trace_iterator *iter)
+{
+ /* stop the trace while dumping */
+ if (iter->tr->ctrl)
+ stop_irqsoff_tracer(iter->tr);
+}
+
+static void irqsoff_tracer_close(struct trace_iterator *iter)
+{
+ if (iter->tr->ctrl)
+ start_irqsoff_tracer(iter->tr);
+}
+
+#ifdef CONFIG_IRQSOFF_TRACER
+static void irqsoff_tracer_init(struct trace_array *tr)
+{
+ trace_type = TRACER_IRQS_OFF;
+
+ __irqsoff_tracer_init(tr);
+}
+static struct tracer irqsoff_tracer __read_mostly =
+{
+ .name = "irqsoff",
+ .init = irqsoff_tracer_init,
+ .reset = irqsoff_tracer_reset,
+ .open = irqsoff_tracer_open,
+ .close = irqsoff_tracer_close,
+ .ctrl_update = irqsoff_tracer_ctrl_update,
+ .print_max = 1,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_irqsoff,
+#endif
+};
+# define register_irqsoff(trace) register_tracer(&trace)
+#else
+# define register_irqsoff(trace) do { } while (0)
+#endif
+
+#ifdef CONFIG_PREEMPT_TRACER
+static void preemptoff_tracer_init(struct trace_array *tr)
+{
+ trace_type = TRACER_PREEMPT_OFF;
+
+ __irqsoff_tracer_init(tr);
+}
+
+static struct tracer preemptoff_tracer __read_mostly =
+{
+ .name = "preemptoff",
+ .init = preemptoff_tracer_init,
+ .reset = irqsoff_tracer_reset,
+ .open = irqsoff_tracer_open,
+ .close = irqsoff_tracer_close,
+ .ctrl_update = irqsoff_tracer_ctrl_update,
+ .print_max = 1,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_preemptoff,
+#endif
+};
+# define register_preemptoff(trace) register_tracer(&trace)
+#else
+# define register_preemptoff(trace) do { } while (0)
+#endif
+
+#if defined(CONFIG_IRQSOFF_TRACER) && \
+ defined(CONFIG_PREEMPT_TRACER)
+
+static void preemptirqsoff_tracer_init(struct trace_array *tr)
+{
+ trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
+
+ __irqsoff_tracer_init(tr);
+}
+
+static struct tracer preemptirqsoff_tracer __read_mostly =
+{
+ .name = "preemptirqsoff",
+ .init = preemptirqsoff_tracer_init,
+ .reset = irqsoff_tracer_reset,
+ .open = irqsoff_tracer_open,
+ .close = irqsoff_tracer_close,
+ .ctrl_update = irqsoff_tracer_ctrl_update,
+ .print_max = 1,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_preemptirqsoff,
+#endif
+};
+
+# define register_preemptirqsoff(trace) register_tracer(&trace)
+#else
+# define register_preemptirqsoff(trace) do { } while (0)
+#endif
+
+__init static int init_irqsoff_tracer(void)
+{
+ register_irqsoff(irqsoff_tracer);
+ register_preemptoff(preemptoff_tracer);
+ register_preemptirqsoff(preemptirqsoff_tracer);
+
+ return 0;
+}
+device_initcall(init_irqsoff_tracer);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
new file mode 100644
index 0000000..e62cbf7
--- /dev/null
+++ b/kernel/trace/trace_mmiotrace.c
@@ -0,0 +1,375 @@
+/*
+ * Memory mapped I/O tracing
+ *
+ * Copyright (C) 2008 Pekka Paalanen <pq@iki.fi>
+ */
+
+#define DEBUG 1
+
+#include <linux/kernel.h>
+#include <linux/mmiotrace.h>
+#include <linux/pci.h>
+
+#include "trace.h"
+
+struct header_iter {
+ struct pci_dev *dev;
+};
+
+static struct trace_array *mmio_trace_array;
+static bool overrun_detected;
+static unsigned long prev_overruns;
+
+static void mmio_reset_data(struct trace_array *tr)
+{
+ int cpu;
+
+ overrun_detected = false;
+ prev_overruns = 0;
+ tr->time_start = ftrace_now(tr->cpu);
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+}
+
+static void mmio_trace_init(struct trace_array *tr)
+{
+ pr_debug("in %s\n", __func__);
+ mmio_trace_array = tr;
+ if (tr->ctrl) {
+ mmio_reset_data(tr);
+ enable_mmiotrace();
+ }
+}
+
+static void mmio_trace_reset(struct trace_array *tr)
+{
+ pr_debug("in %s\n", __func__);
+ if (tr->ctrl)
+ disable_mmiotrace();
+ mmio_reset_data(tr);
+ mmio_trace_array = NULL;
+}
+
+static void mmio_trace_ctrl_update(struct trace_array *tr)
+{
+ pr_debug("in %s\n", __func__);
+ if (tr->ctrl) {
+ mmio_reset_data(tr);
+ enable_mmiotrace();
+ } else {
+ disable_mmiotrace();
+ }
+}
+
+static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
+{
+ int ret = 0;
+ int i;
+ resource_size_t start, end;
+ const struct pci_driver *drv = pci_dev_driver(dev);
+
+ /* XXX: incomplete checks for trace_seq_printf() return value */
+ ret += trace_seq_printf(s, "PCIDEV %02x%02x %04x%04x %x",
+ dev->bus->number, dev->devfn,
+ dev->vendor, dev->device, dev->irq);
+ /*
+ * XXX: is pci_resource_to_user() appropriate, since we are
+ * supposed to interpret the __ioremap() phys_addr argument based on
+ * these printed values?
+ */
+ for (i = 0; i < 7; i++) {
+ pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
+ ret += trace_seq_printf(s, " %llx",
+ (unsigned long long)(start |
+ (dev->resource[i].flags & PCI_REGION_FLAG_MASK)));
+ }
+ for (i = 0; i < 7; i++) {
+ pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
+ ret += trace_seq_printf(s, " %llx",
+ dev->resource[i].start < dev->resource[i].end ?
+ (unsigned long long)(end - start) + 1 : 0);
+ }
+ if (drv)
+ ret += trace_seq_printf(s, " %s\n", drv->name);
+ else
+ ret += trace_seq_printf(s, " \n");
+ return ret;
+}
+
+static void destroy_header_iter(struct header_iter *hiter)
+{
+ if (!hiter)
+ return;
+ pci_dev_put(hiter->dev);
+ kfree(hiter);
+}
+
+static void mmio_pipe_open(struct trace_iterator *iter)
+{
+ struct header_iter *hiter;
+ struct trace_seq *s = &iter->seq;
+
+ trace_seq_printf(s, "VERSION 20070824\n");
+
+ hiter = kzalloc(sizeof(*hiter), GFP_KERNEL);
+ if (!hiter)
+ return;
+
+ hiter->dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, NULL);
+ iter->private = hiter;
+}
+
+/* XXX: This is not called when the pipe is closed! */
+static void mmio_close(struct trace_iterator *iter)
+{
+ struct header_iter *hiter = iter->private;
+ destroy_header_iter(hiter);
+ iter->private = NULL;
+}
+
+static unsigned long count_overruns(struct trace_iterator *iter)
+{
+ unsigned long cnt = 0;
+ unsigned long over = ring_buffer_overruns(iter->tr->buffer);
+
+ if (over > prev_overruns)
+ cnt = over - prev_overruns;
+ prev_overruns = over;
+ return cnt;
+}
+
+static ssize_t mmio_read(struct trace_iterator *iter, struct file *filp,
+ char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+ ssize_t ret;
+ struct header_iter *hiter = iter->private;
+ struct trace_seq *s = &iter->seq;
+ unsigned long n;
+
+ n = count_overruns(iter);
+ if (n) {
+ /* XXX: This is later than where events were lost. */
+ trace_seq_printf(s, "MARK 0.000000 Lost %lu events.\n", n);
+ if (!overrun_detected)
+ pr_warning("mmiotrace has lost events.\n");
+ overrun_detected = true;
+ goto print_out;
+ }
+
+ if (!hiter)
+ return 0;
+
+ mmio_print_pcidev(s, hiter->dev);
+ hiter->dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, hiter->dev);
+
+ if (!hiter->dev) {
+ destroy_header_iter(hiter);
+ iter->private = NULL;
+ }
+
+print_out:
+ ret = trace_seq_to_user(s, ubuf, cnt);
+ return (ret == -EBUSY) ? 0 : ret;
+}
+
+static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_mmiotrace_rw *field;
+ struct mmiotrace_rw *rw;
+ struct trace_seq *s = &iter->seq;
+ unsigned long long t = ns2usecs(iter->ts);
+ unsigned long usec_rem = do_div(t, 1000000ULL);
+ unsigned secs = (unsigned long)t;
+ int ret = 1;
+
+ trace_assign_type(field, entry);
+ rw = &field->rw;
+
+ switch (rw->opcode) {
+ case MMIO_READ:
+ ret = trace_seq_printf(s,
+ "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
+ rw->width, secs, usec_rem, rw->map_id,
+ (unsigned long long)rw->phys,
+ rw->value, rw->pc, 0);
+ break;
+ case MMIO_WRITE:
+ ret = trace_seq_printf(s,
+ "W %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
+ rw->width, secs, usec_rem, rw->map_id,
+ (unsigned long long)rw->phys,
+ rw->value, rw->pc, 0);
+ break;
+ case MMIO_UNKNOWN_OP:
+ ret = trace_seq_printf(s,
+ "UNKNOWN %lu.%06lu %d 0x%llx %02x,%02x,%02x 0x%lx %d\n",
+ secs, usec_rem, rw->map_id,
+ (unsigned long long)rw->phys,
+ (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff,
+ (rw->value >> 0) & 0xff, rw->pc, 0);
+ break;
+ default:
+ ret = trace_seq_printf(s, "rw what?\n");
+ break;
+ }
+ if (ret)
+ return TRACE_TYPE_HANDLED;
+ return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static enum print_line_t mmio_print_map(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_mmiotrace_map *field;
+ struct mmiotrace_map *m;
+ struct trace_seq *s = &iter->seq;
+ unsigned long long t = ns2usecs(iter->ts);
+ unsigned long usec_rem = do_div(t, 1000000ULL);
+ unsigned secs = (unsigned long)t;
+ int ret;
+
+ trace_assign_type(field, entry);
+ m = &field->map;
+
+ switch (m->opcode) {
+ case MMIO_PROBE:
+ ret = trace_seq_printf(s,
+ "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
+ secs, usec_rem, m->map_id,
+ (unsigned long long)m->phys, m->virt, m->len,
+ 0UL, 0);
+ break;
+ case MMIO_UNPROBE:
+ ret = trace_seq_printf(s,
+ "UNMAP %lu.%06lu %d 0x%lx %d\n",
+ secs, usec_rem, m->map_id, 0UL, 0);
+ break;
+ default:
+ ret = trace_seq_printf(s, "map what?\n");
+ break;
+ }
+ if (ret)
+ return TRACE_TYPE_HANDLED;
+ return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct print_entry *print = (struct print_entry *)entry;
+ const char *msg = print->buf;
+ struct trace_seq *s = &iter->seq;
+ unsigned long long t = ns2usecs(iter->ts);
+ unsigned long usec_rem = do_div(t, 1000000ULL);
+ unsigned secs = (unsigned long)t;
+ int ret;
+
+ /* The trailing newline must be in the message. */
+ ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (entry->flags & TRACE_FLAG_CONT)
+ trace_seq_print_cont(s, iter);
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t mmio_print_line(struct trace_iterator *iter)
+{
+ switch (iter->ent->type) {
+ case TRACE_MMIO_RW:
+ return mmio_print_rw(iter);
+ case TRACE_MMIO_MAP:
+ return mmio_print_map(iter);
+ case TRACE_PRINT:
+ return mmio_print_mark(iter);
+ default:
+ return TRACE_TYPE_HANDLED; /* ignore unknown entries */
+ }
+}
+
+static struct tracer mmio_tracer __read_mostly =
+{
+ .name = "mmiotrace",
+ .init = mmio_trace_init,
+ .reset = mmio_trace_reset,
+ .pipe_open = mmio_pipe_open,
+ .close = mmio_close,
+ .read = mmio_read,
+ .ctrl_update = mmio_trace_ctrl_update,
+ .print_line = mmio_print_line,
+};
+
+__init static int init_mmio_trace(void)
+{
+ return register_tracer(&mmio_tracer);
+}
+device_initcall(init_mmio_trace);
+
+static void __trace_mmiotrace_rw(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct mmiotrace_rw *rw)
+{
+ struct ring_buffer_event *event;
+ struct trace_mmiotrace_rw *entry;
+ unsigned long irq_flags;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, preempt_count());
+ entry->ent.type = TRACE_MMIO_RW;
+ entry->rw = *rw;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ trace_wake_up();
+}
+
+void mmio_trace_rw(struct mmiotrace_rw *rw)
+{
+ struct trace_array *tr = mmio_trace_array;
+ struct trace_array_cpu *data = tr->data[smp_processor_id()];
+ __trace_mmiotrace_rw(tr, data, rw);
+}
+
+static void __trace_mmiotrace_map(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct mmiotrace_map *map)
+{
+ struct ring_buffer_event *event;
+ struct trace_mmiotrace_map *entry;
+ unsigned long irq_flags;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, preempt_count());
+ entry->ent.type = TRACE_MMIO_MAP;
+ entry->map = *map;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ trace_wake_up();
+}
+
+void mmio_trace_mapping(struct mmiotrace_map *map)
+{
+ struct trace_array *tr = mmio_trace_array;
+ struct trace_array_cpu *data;
+
+ preempt_disable();
+ data = tr->data[smp_processor_id()];
+ __trace_mmiotrace_map(tr, data, map);
+ preempt_enable();
+}
+
+int mmio_trace_printk(const char *fmt, va_list args)
+{
+ return trace_vprintk(0, fmt, args);
+}
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
new file mode 100644
index 0000000..4592b48
--- /dev/null
+++ b/kernel/trace/trace_nop.c
@@ -0,0 +1,64 @@
+/*
+ * nop tracer
+ *
+ * Copyright (C) 2008 Steven Noonan <steven@uplinklabs.net>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+
+#include "trace.h"
+
+static struct trace_array *ctx_trace;
+
+static void start_nop_trace(struct trace_array *tr)
+{
+ /* Nothing to do! */
+}
+
+static void stop_nop_trace(struct trace_array *tr)
+{
+ /* Nothing to do! */
+}
+
+static void nop_trace_init(struct trace_array *tr)
+{
+ int cpu;
+ ctx_trace = tr;
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+
+ if (tr->ctrl)
+ start_nop_trace(tr);
+}
+
+static void nop_trace_reset(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ stop_nop_trace(tr);
+}
+
+static void nop_trace_ctrl_update(struct trace_array *tr)
+{
+ /* When starting a new trace, reset the buffers */
+ if (tr->ctrl)
+ start_nop_trace(tr);
+ else
+ stop_nop_trace(tr);
+}
+
+struct tracer nop_trace __read_mostly =
+{
+ .name = "nop",
+ .init = nop_trace_init,
+ .reset = nop_trace_reset,
+ .ctrl_update = nop_trace_ctrl_update,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_nop,
+#endif
+};
+
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
new file mode 100644
index 0000000..b8f56be
--- /dev/null
+++ b/kernel/trace/trace_sched_switch.c
@@ -0,0 +1,211 @@
+/*
+ * trace context switch
+ *
+ * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <trace/sched.h>
+
+#include "trace.h"
+
+static struct trace_array *ctx_trace;
+static int __read_mostly tracer_enabled;
+static atomic_t sched_ref;
+
+static void
+probe_sched_switch(struct rq *__rq, struct task_struct *prev,
+ struct task_struct *next)
+{
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ int cpu;
+ int pc;
+
+ if (!atomic_read(&sched_ref))
+ return;
+
+ tracing_record_cmdline(prev);
+ tracing_record_cmdline(next);
+
+ if (!tracer_enabled)
+ return;
+
+ pc = preempt_count();
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = ctx_trace->data[cpu];
+
+ if (likely(!atomic_read(&data->disabled)))
+ tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc);
+
+ local_irq_restore(flags);
+}
+
+static void
+probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
+{
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ int cpu, pc;
+
+ if (!likely(tracer_enabled))
+ return;
+
+ pc = preempt_count();
+ tracing_record_cmdline(current);
+
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = ctx_trace->data[cpu];
+
+ if (likely(!atomic_read(&data->disabled)))
+ tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
+ flags, pc);
+
+ local_irq_restore(flags);
+}
+
+static void sched_switch_reset(struct trace_array *tr)
+{
+ int cpu;
+
+ tr->time_start = ftrace_now(tr->cpu);
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+}
+
+static int tracing_sched_register(void)
+{
+ int ret;
+
+ ret = register_trace_sched_wakeup(probe_sched_wakeup);
+ if (ret) {
+ pr_info("wakeup trace: Couldn't activate tracepoint"
+ " probe to kernel_sched_wakeup\n");
+ return ret;
+ }
+
+ ret = register_trace_sched_wakeup_new(probe_sched_wakeup);
+ if (ret) {
+ pr_info("wakeup trace: Couldn't activate tracepoint"
+ " probe to kernel_sched_wakeup_new\n");
+ goto fail_deprobe;
+ }
+
+ ret = register_trace_sched_switch(probe_sched_switch);
+ if (ret) {
+ pr_info("sched trace: Couldn't activate tracepoint"
+ " probe to kernel_sched_schedule\n");
+ goto fail_deprobe_wake_new;
+ }
+
+ return ret;
+fail_deprobe_wake_new:
+ unregister_trace_sched_wakeup_new(probe_sched_wakeup);
+fail_deprobe:
+ unregister_trace_sched_wakeup(probe_sched_wakeup);
+ return ret;
+}
+
+static void tracing_sched_unregister(void)
+{
+ unregister_trace_sched_switch(probe_sched_switch);
+ unregister_trace_sched_wakeup_new(probe_sched_wakeup);
+ unregister_trace_sched_wakeup(probe_sched_wakeup);
+}
+
+static void tracing_start_sched_switch(void)
+{
+ long ref;
+
+ ref = atomic_inc_return(&sched_ref);
+ if (ref == 1)
+ tracing_sched_register();
+}
+
+static void tracing_stop_sched_switch(void)
+{
+ long ref;
+
+ ref = atomic_dec_and_test(&sched_ref);
+ if (ref)
+ tracing_sched_unregister();
+}
+
+void tracing_start_cmdline_record(void)
+{
+ tracing_start_sched_switch();
+}
+
+void tracing_stop_cmdline_record(void)
+{
+ tracing_stop_sched_switch();
+}
+
+static void start_sched_trace(struct trace_array *tr)
+{
+ sched_switch_reset(tr);
+ tracing_start_cmdline_record();
+ tracer_enabled = 1;
+}
+
+static void stop_sched_trace(struct trace_array *tr)
+{
+ tracer_enabled = 0;
+ tracing_stop_cmdline_record();
+}
+
+static void sched_switch_trace_init(struct trace_array *tr)
+{
+ ctx_trace = tr;
+
+ if (tr->ctrl)
+ start_sched_trace(tr);
+}
+
+static void sched_switch_trace_reset(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ stop_sched_trace(tr);
+}
+
+static void sched_switch_trace_ctrl_update(struct trace_array *tr)
+{
+ /* When starting a new trace, reset the buffers */
+ if (tr->ctrl)
+ start_sched_trace(tr);
+ else
+ stop_sched_trace(tr);
+}
+
+static struct tracer sched_switch_trace __read_mostly =
+{
+ .name = "sched_switch",
+ .init = sched_switch_trace_init,
+ .reset = sched_switch_trace_reset,
+ .ctrl_update = sched_switch_trace_ctrl_update,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_sched_switch,
+#endif
+};
+
+__init static int init_sched_switch_trace(void)
+{
+ int ret = 0;
+
+ if (atomic_read(&sched_ref))
+ ret = tracing_sched_register();
+ if (ret) {
+ pr_info("error registering scheduler trace\n");
+ return ret;
+ }
+ return register_tracer(&sched_switch_trace);
+}
+device_initcall(init_sched_switch_trace);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
new file mode 100644
index 0000000..3ae93f1
--- /dev/null
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -0,0 +1,393 @@
+/*
+ * trace task wakeup timings
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
+ *
+ * Based on code from the latency_tracer, that is:
+ *
+ * Copyright (C) 2004-2006 Ingo Molnar
+ * Copyright (C) 2004 William Lee Irwin III
+ */
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <trace/sched.h>
+
+#include "trace.h"
+
+static struct trace_array *wakeup_trace;
+static int __read_mostly tracer_enabled;
+
+static struct task_struct *wakeup_task;
+static int wakeup_cpu;
+static unsigned wakeup_prio = -1;
+
+static raw_spinlock_t wakeup_lock =
+ (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+static void __wakeup_reset(struct trace_array *tr);
+
+#ifdef CONFIG_FUNCTION_TRACER
+/*
+ * irqsoff uses its own tracer function to keep the overhead down:
+ */
+static void
+wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
+{
+ struct trace_array *tr = wakeup_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int resched;
+ int cpu;
+ int pc;
+
+ if (likely(!wakeup_task))
+ return;
+
+ pc = preempt_count();
+ resched = need_resched();
+ preempt_disable_notrace();
+
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+ if (unlikely(disabled != 1))
+ goto out;
+
+ local_irq_save(flags);
+ __raw_spin_lock(&wakeup_lock);
+
+ if (unlikely(!wakeup_task))
+ goto unlock;
+
+ /*
+ * The task can't disappear because it needs to
+ * wake up first, and we have the wakeup_lock.
+ */
+ if (task_cpu(wakeup_task) != cpu)
+ goto unlock;
+
+ trace_function(tr, data, ip, parent_ip, flags, pc);
+
+ unlock:
+ __raw_spin_unlock(&wakeup_lock);
+ local_irq_restore(flags);
+
+ out:
+ atomic_dec(&data->disabled);
+
+ /*
+ * To prevent recursion from the scheduler, if the
+ * resched flag was set before we entered, then
+ * don't reschedule.
+ */
+ if (resched)
+ preempt_enable_no_resched_notrace();
+ else
+ preempt_enable_notrace();
+}
+
+static struct ftrace_ops trace_ops __read_mostly =
+{
+ .func = wakeup_tracer_call,
+};
+#endif /* CONFIG_FUNCTION_TRACER */
+
+/*
+ * Should this new latency be reported/recorded?
+ */
+static int report_latency(cycle_t delta)
+{
+ if (tracing_thresh) {
+ if (delta < tracing_thresh)
+ return 0;
+ } else {
+ if (delta <= tracing_max_latency)
+ return 0;
+ }
+ return 1;
+}
+
+static void notrace
+probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next)
+{
+ unsigned long latency = 0, t0 = 0, t1 = 0;
+ struct trace_array_cpu *data;
+ cycle_t T0, T1, delta;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ tracing_record_cmdline(prev);
+
+ if (unlikely(!tracer_enabled))
+ return;
+
+ /*
+ * When we start a new trace, we set wakeup_task to NULL
+ * and then set tracer_enabled = 1. We want to make sure
+ * that another CPU does not see the tracer_enabled = 1
+ * and the wakeup_task with an older task, that might
+ * actually be the same as next.
+ */
+ smp_rmb();
+
+ if (next != wakeup_task)
+ return;
+
+ pc = preempt_count();
+
+ /* The task we are waiting for is waking up */
+ data = wakeup_trace->data[wakeup_cpu];
+
+ /* disable local data, not wakeup_cpu data */
+ cpu = raw_smp_processor_id();
+ disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
+ if (likely(disabled != 1))
+ goto out;
+
+ local_irq_save(flags);
+ __raw_spin_lock(&wakeup_lock);
+
+ /* We could race with grabbing wakeup_lock */
+ if (unlikely(!tracer_enabled || next != wakeup_task))
+ goto out_unlock;
+
+ trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
+
+ /*
+ * usecs conversion is slow so we try to delay the conversion
+ * as long as possible:
+ */
+ T0 = data->preempt_timestamp;
+ T1 = ftrace_now(cpu);
+ delta = T1-T0;
+
+ if (!report_latency(delta))
+ goto out_unlock;
+
+ latency = nsecs_to_usecs(delta);
+
+ tracing_max_latency = delta;
+ t0 = nsecs_to_usecs(T0);
+ t1 = nsecs_to_usecs(T1);
+
+ update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
+
+out_unlock:
+ __wakeup_reset(wakeup_trace);
+ __raw_spin_unlock(&wakeup_lock);
+ local_irq_restore(flags);
+out:
+ atomic_dec(&wakeup_trace->data[cpu]->disabled);
+}
+
+static void __wakeup_reset(struct trace_array *tr)
+{
+ struct trace_array_cpu *data;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ data = tr->data[cpu];
+ tracing_reset(tr, cpu);
+ }
+
+ wakeup_cpu = -1;
+ wakeup_prio = -1;
+
+ if (wakeup_task)
+ put_task_struct(wakeup_task);
+
+ wakeup_task = NULL;
+}
+
+static void wakeup_reset(struct trace_array *tr)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __raw_spin_lock(&wakeup_lock);
+ __wakeup_reset(tr);
+ __raw_spin_unlock(&wakeup_lock);
+ local_irq_restore(flags);
+}
+
+static void
+probe_wakeup(struct rq *rq, struct task_struct *p)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+ long disabled;
+ int pc;
+
+ if (likely(!tracer_enabled))
+ return;
+
+ tracing_record_cmdline(p);
+ tracing_record_cmdline(current);
+
+ if (likely(!rt_task(p)) ||
+ p->prio >= wakeup_prio ||
+ p->prio >= current->prio)
+ return;
+
+ pc = preempt_count();
+ disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
+ if (unlikely(disabled != 1))
+ goto out;
+
+ /* interrupts should be off from try_to_wake_up */
+ __raw_spin_lock(&wakeup_lock);
+
+ /* check for races. */
+ if (!tracer_enabled || p->prio >= wakeup_prio)
+ goto out_locked;
+
+ /* reset the trace */
+ __wakeup_reset(wakeup_trace);
+
+ wakeup_cpu = task_cpu(p);
+ wakeup_prio = p->prio;
+
+ wakeup_task = p;
+ get_task_struct(wakeup_task);
+
+ local_save_flags(flags);
+
+ wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
+ trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
+ CALLER_ADDR1, CALLER_ADDR2, flags, pc);
+
+out_locked:
+ __raw_spin_unlock(&wakeup_lock);
+out:
+ atomic_dec(&wakeup_trace->data[cpu]->disabled);
+}
+
+static void start_wakeup_tracer(struct trace_array *tr)
+{
+ int ret;
+
+ ret = register_trace_sched_wakeup(probe_wakeup);
+ if (ret) {
+ pr_info("wakeup trace: Couldn't activate tracepoint"
+ " probe to kernel_sched_wakeup\n");
+ return;
+ }
+
+ ret = register_trace_sched_wakeup_new(probe_wakeup);
+ if (ret) {
+ pr_info("wakeup trace: Couldn't activate tracepoint"
+ " probe to kernel_sched_wakeup_new\n");
+ goto fail_deprobe;
+ }
+
+ ret = register_trace_sched_switch(probe_wakeup_sched_switch);
+ if (ret) {
+ pr_info("sched trace: Couldn't activate tracepoint"
+ " probe to kernel_sched_schedule\n");
+ goto fail_deprobe_wake_new;
+ }
+
+ wakeup_reset(tr);
+
+ /*
+ * Don't let the tracer_enabled = 1 show up before
+ * the wakeup_task is reset. This may be overkill since
+ * wakeup_reset does a spin_unlock after setting the
+ * wakeup_task to NULL, but I want to be safe.
+ * This is a slow path anyway.
+ */
+ smp_wmb();
+
+ register_ftrace_function(&trace_ops);
+
+ tracer_enabled = 1;
+
+ return;
+fail_deprobe_wake_new:
+ unregister_trace_sched_wakeup_new(probe_wakeup);
+fail_deprobe:
+ unregister_trace_sched_wakeup(probe_wakeup);
+}
+
+static void stop_wakeup_tracer(struct trace_array *tr)
+{
+ tracer_enabled = 0;
+ unregister_ftrace_function(&trace_ops);
+ unregister_trace_sched_switch(probe_wakeup_sched_switch);
+ unregister_trace_sched_wakeup_new(probe_wakeup);
+ unregister_trace_sched_wakeup(probe_wakeup);
+}
+
+static void wakeup_tracer_init(struct trace_array *tr)
+{
+ wakeup_trace = tr;
+
+ if (tr->ctrl)
+ start_wakeup_tracer(tr);
+}
+
+static void wakeup_tracer_reset(struct trace_array *tr)
+{
+ if (tr->ctrl) {
+ stop_wakeup_tracer(tr);
+ /* make sure we put back any tasks we are tracing */
+ wakeup_reset(tr);
+ }
+}
+
+static void wakeup_tracer_ctrl_update(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ start_wakeup_tracer(tr);
+ else
+ stop_wakeup_tracer(tr);
+}
+
+static void wakeup_tracer_open(struct trace_iterator *iter)
+{
+ /* stop the trace while dumping */
+ if (iter->tr->ctrl)
+ stop_wakeup_tracer(iter->tr);
+}
+
+static void wakeup_tracer_close(struct trace_iterator *iter)
+{
+ /* forget about any processes we were recording */
+ if (iter->tr->ctrl)
+ start_wakeup_tracer(iter->tr);
+}
+
+static struct tracer wakeup_tracer __read_mostly =
+{
+ .name = "wakeup",
+ .init = wakeup_tracer_init,
+ .reset = wakeup_tracer_reset,
+ .open = wakeup_tracer_open,
+ .close = wakeup_tracer_close,
+ .ctrl_update = wakeup_tracer_ctrl_update,
+ .print_max = 1,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_wakeup,
+#endif
+};
+
+__init static int init_wakeup_tracer(void)
+{
+ int ret;
+
+ ret = register_tracer(&wakeup_tracer);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+device_initcall(init_wakeup_tracer);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
new file mode 100644
index 0000000..90bc752
--- /dev/null
+++ b/kernel/trace/trace_selftest.c
@@ -0,0 +1,524 @@
+/* Include in trace.c */
+
+#include <linux/kthread.h>
+#include <linux/delay.h>
+
+static inline int trace_valid_entry(struct trace_entry *entry)
+{
+ switch (entry->type) {
+ case TRACE_FN:
+ case TRACE_CTX:
+ case TRACE_WAKE:
+ case TRACE_CONT:
+ case TRACE_STACK:
+ case TRACE_PRINT:
+ case TRACE_SPECIAL:
+ return 1;
+ }
+ return 0;
+}
+
+static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
+{
+ struct ring_buffer_event *event;
+ struct trace_entry *entry;
+
+ while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
+ entry = ring_buffer_event_data(event);
+
+ if (!trace_valid_entry(entry)) {
+ printk(KERN_CONT ".. invalid entry %d ",
+ entry->type);
+ goto failed;
+ }
+ }
+ return 0;
+
+ failed:
+ /* disable tracing */
+ tracing_disabled = 1;
+ printk(KERN_CONT ".. corrupted trace buffer .. ");
+ return -1;
+}
+
+/*
+ * Test the trace buffer to see if all the elements
+ * are still sane.
+ */
+static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
+{
+ unsigned long flags, cnt = 0;
+ int cpu, ret = 0;
+
+ /* Don't allow flipping of max traces now */
+ raw_local_irq_save(flags);
+ __raw_spin_lock(&ftrace_max_lock);
+
+ cnt = ring_buffer_entries(tr->buffer);
+
+ for_each_possible_cpu(cpu) {
+ ret = trace_test_buffer_cpu(tr, cpu);
+ if (ret)
+ break;
+ }
+ __raw_spin_unlock(&ftrace_max_lock);
+ raw_local_irq_restore(flags);
+
+ if (count)
+ *count = cnt;
+
+ return ret;
+}
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+#define __STR(x) #x
+#define STR(x) __STR(x)
+
+/* Test dynamic code modification and ftrace filters */
+int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
+ struct trace_array *tr,
+ int (*func)(void))
+{
+ int save_ftrace_enabled = ftrace_enabled;
+ int save_tracer_enabled = tracer_enabled;
+ unsigned long count;
+ char *func_name;
+ int ret;
+
+ /* The ftrace test PASSED */
+ printk(KERN_CONT "PASSED\n");
+ pr_info("Testing dynamic ftrace: ");
+
+ /* enable tracing, and record the filter function */
+ ftrace_enabled = 1;
+ tracer_enabled = 1;
+
+ /* passed in by parameter to fool gcc from optimizing */
+ func();
+
+ /*
+ * Some archs *cough*PowerPC*cough* add charachters to the
+ * start of the function names. We simply put a '*' to
+ * accomodate them.
+ */
+ func_name = "*" STR(DYN_FTRACE_TEST_NAME);
+
+ /* filter only on our function */
+ ftrace_set_filter(func_name, strlen(func_name), 1);
+
+ /* enable tracing */
+ tr->ctrl = 1;
+ trace->init(tr);
+
+ /* Sleep for a 1/10 of a second */
+ msleep(100);
+
+ /* we should have nothing in the buffer */
+ ret = trace_test_buffer(tr, &count);
+ if (ret)
+ goto out;
+
+ if (count) {
+ ret = -1;
+ printk(KERN_CONT ".. filter did not filter .. ");
+ goto out;
+ }
+
+ /* call our function again */
+ func();
+
+ /* sleep again */
+ msleep(100);
+
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ ftrace_enabled = 0;
+
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+ trace->reset(tr);
+
+ /* we should only have one item */
+ if (!ret && count != 1) {
+ printk(KERN_CONT ".. filter failed count=%ld ..", count);
+ ret = -1;
+ goto out;
+ }
+ out:
+ ftrace_enabled = save_ftrace_enabled;
+ tracer_enabled = save_tracer_enabled;
+
+ /* Enable tracing on all functions again */
+ ftrace_set_filter(NULL, 0, 1);
+
+ return ret;
+}
+#else
+# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
+#endif /* CONFIG_DYNAMIC_FTRACE */
+/*
+ * Simple verification test of ftrace function tracer.
+ * Enable ftrace, sleep 1/10 second, and then read the trace
+ * buffer to see if all is in order.
+ */
+int
+trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
+{
+ int save_ftrace_enabled = ftrace_enabled;
+ int save_tracer_enabled = tracer_enabled;
+ unsigned long count;
+ int ret;
+
+ /* make sure msleep has been recorded */
+ msleep(1);
+
+ /* start the tracing */
+ ftrace_enabled = 1;
+ tracer_enabled = 1;
+
+ tr->ctrl = 1;
+ trace->init(tr);
+ /* Sleep for a 1/10 of a second */
+ msleep(100);
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ ftrace_enabled = 0;
+
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+ trace->reset(tr);
+
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ goto out;
+ }
+
+ ret = trace_selftest_startup_dynamic_tracing(trace, tr,
+ DYN_FTRACE_TEST_NAME);
+
+ out:
+ ftrace_enabled = save_ftrace_enabled;
+ tracer_enabled = save_tracer_enabled;
+
+ /* kill ftrace totally if we failed */
+ if (ret)
+ ftrace_kill();
+
+ return ret;
+}
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#ifdef CONFIG_IRQSOFF_TRACER
+int
+trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long save_max = tracing_max_latency;
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ tr->ctrl = 1;
+ trace->init(tr);
+ /* reset the max latency */
+ tracing_max_latency = 0;
+ /* disable interrupts for a bit */
+ local_irq_disable();
+ udelay(100);
+ local_irq_enable();
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ /* check both trace buffers */
+ ret = trace_test_buffer(tr, NULL);
+ if (!ret)
+ ret = trace_test_buffer(&max_tr, &count);
+ trace->reset(tr);
+
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ }
+
+ tracing_max_latency = save_max;
+
+ return ret;
+}
+#endif /* CONFIG_IRQSOFF_TRACER */
+
+#ifdef CONFIG_PREEMPT_TRACER
+int
+trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long save_max = tracing_max_latency;
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ tr->ctrl = 1;
+ trace->init(tr);
+ /* reset the max latency */
+ tracing_max_latency = 0;
+ /* disable preemption for a bit */
+ preempt_disable();
+ udelay(100);
+ preempt_enable();
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ /* check both trace buffers */
+ ret = trace_test_buffer(tr, NULL);
+ if (!ret)
+ ret = trace_test_buffer(&max_tr, &count);
+ trace->reset(tr);
+
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ }
+
+ tracing_max_latency = save_max;
+
+ return ret;
+}
+#endif /* CONFIG_PREEMPT_TRACER */
+
+#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
+int
+trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long save_max = tracing_max_latency;
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ tr->ctrl = 1;
+ trace->init(tr);
+
+ /* reset the max latency */
+ tracing_max_latency = 0;
+
+ /* disable preemption and interrupts for a bit */
+ preempt_disable();
+ local_irq_disable();
+ udelay(100);
+ preempt_enable();
+ /* reverse the order of preempt vs irqs */
+ local_irq_enable();
+
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ /* check both trace buffers */
+ ret = trace_test_buffer(tr, NULL);
+ if (ret)
+ goto out;
+
+ ret = trace_test_buffer(&max_tr, &count);
+ if (ret)
+ goto out;
+
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ goto out;
+ }
+
+ /* do the test by disabling interrupts first this time */
+ tracing_max_latency = 0;
+ tr->ctrl = 1;
+ trace->ctrl_update(tr);
+ preempt_disable();
+ local_irq_disable();
+ udelay(100);
+ preempt_enable();
+ /* reverse the order of preempt vs irqs */
+ local_irq_enable();
+
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ /* check both trace buffers */
+ ret = trace_test_buffer(tr, NULL);
+ if (ret)
+ goto out;
+
+ ret = trace_test_buffer(&max_tr, &count);
+
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ goto out;
+ }
+
+ out:
+ trace->reset(tr);
+ tracing_max_latency = save_max;
+
+ return ret;
+}
+#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
+
+#ifdef CONFIG_NOP_TRACER
+int
+trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
+{
+ /* What could possibly go wrong? */
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_SCHED_TRACER
+static int trace_wakeup_test_thread(void *data)
+{
+ /* Make this a RT thread, doesn't need to be too high */
+ struct sched_param param = { .sched_priority = 5 };
+ struct completion *x = data;
+
+ sched_setscheduler(current, SCHED_FIFO, &param);
+
+ /* Make it know we have a new prio */
+ complete(x);
+
+ /* now go to sleep and let the test wake us up */
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+
+ /* we are awake, now wait to disappear */
+ while (!kthread_should_stop()) {
+ /*
+ * This is an RT task, do short sleeps to let
+ * others run.
+ */
+ msleep(100);
+ }
+
+ return 0;
+}
+
+int
+trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long save_max = tracing_max_latency;
+ struct task_struct *p;
+ struct completion isrt;
+ unsigned long count;
+ int ret;
+
+ init_completion(&isrt);
+
+ /* create a high prio thread */
+ p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test");
+ if (IS_ERR(p)) {
+ printk(KERN_CONT "Failed to create ftrace wakeup test thread ");
+ return -1;
+ }
+
+ /* make sure the thread is running at an RT prio */
+ wait_for_completion(&isrt);
+
+ /* start the tracing */
+ tr->ctrl = 1;
+ trace->init(tr);
+ /* reset the max latency */
+ tracing_max_latency = 0;
+
+ /* sleep to let the RT thread sleep too */
+ msleep(100);
+
+ /*
+ * Yes this is slightly racy. It is possible that for some
+ * strange reason that the RT thread we created, did not
+ * call schedule for 100ms after doing the completion,
+ * and we do a wakeup on a task that already is awake.
+ * But that is extremely unlikely, and the worst thing that
+ * happens in such a case, is that we disable tracing.
+ * Honestly, if this race does happen something is horrible
+ * wrong with the system.
+ */
+
+ wake_up_process(p);
+
+ /* give a little time to let the thread wake up */
+ msleep(100);
+
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ /* check both trace buffers */
+ ret = trace_test_buffer(tr, NULL);
+ if (!ret)
+ ret = trace_test_buffer(&max_tr, &count);
+
+
+ trace->reset(tr);
+
+ tracing_max_latency = save_max;
+
+ /* kill the thread */
+ kthread_stop(p);
+
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ }
+
+ return ret;
+}
+#endif /* CONFIG_SCHED_TRACER */
+
+#ifdef CONFIG_CONTEXT_SWITCH_TRACER
+int
+trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ tr->ctrl = 1;
+ trace->init(tr);
+ /* Sleep for a 1/10 of a second */
+ msleep(100);
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+ trace->reset(tr);
+
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ }
+
+ return ret;
+}
+#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
+
+#ifdef CONFIG_SYSPROF_TRACER
+int
+trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ tr->ctrl = 1;
+ trace->init(tr);
+ /* Sleep for a 1/10 of a second */
+ msleep(100);
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+ trace->reset(tr);
+
+ return ret;
+}
+#endif /* CONFIG_SYSPROF_TRACER */
diff --git a/kernel/trace/trace_selftest_dynamic.c b/kernel/trace/trace_selftest_dynamic.c
new file mode 100644
index 0000000..54dd77c
--- /dev/null
+++ b/kernel/trace/trace_selftest_dynamic.c
@@ -0,0 +1,7 @@
+#include "trace.h"
+
+int DYN_FTRACE_TEST_NAME(void)
+{
+ /* used to call mcount */
+ return 0;
+}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
new file mode 100644
index 0000000..3bdb44b
--- /dev/null
+++ b/kernel/trace/trace_stack.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <linux/stacktrace.h>
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include "trace.h"
+
+#define STACK_TRACE_ENTRIES 500
+
+static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
+ { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
+static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
+
+static struct stack_trace max_stack_trace = {
+ .max_entries = STACK_TRACE_ENTRIES,
+ .entries = stack_dump_trace,
+};
+
+static unsigned long max_stack_size;
+static raw_spinlock_t max_stack_lock =
+ (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+static int stack_trace_disabled __read_mostly;
+static DEFINE_PER_CPU(int, trace_active);
+
+static inline void check_stack(void)
+{
+ unsigned long this_size, flags;
+ unsigned long *p, *top, *start;
+ int i;
+
+ this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
+ this_size = THREAD_SIZE - this_size;
+
+ if (this_size <= max_stack_size)
+ return;
+
+ /* we do not handle interrupt stacks yet */
+ if (!object_is_on_stack(&this_size))
+ return;
+
+ raw_local_irq_save(flags);
+ __raw_spin_lock(&max_stack_lock);
+
+ /* a race could have already updated it */
+ if (this_size <= max_stack_size)
+ goto out;
+
+ max_stack_size = this_size;
+
+ max_stack_trace.nr_entries = 0;
+ max_stack_trace.skip = 3;
+
+ save_stack_trace(&max_stack_trace);
+
+ /*
+ * Now find where in the stack these are.
+ */
+ i = 0;
+ start = &this_size;
+ top = (unsigned long *)
+ (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
+
+ /*
+ * Loop through all the entries. One of the entries may
+ * for some reason be missed on the stack, so we may
+ * have to account for them. If they are all there, this
+ * loop will only happen once. This code only takes place
+ * on a new max, so it is far from a fast path.
+ */
+ while (i < max_stack_trace.nr_entries) {
+
+ stack_dump_index[i] = this_size;
+ p = start;
+
+ for (; p < top && i < max_stack_trace.nr_entries; p++) {
+ if (*p == stack_dump_trace[i]) {
+ this_size = stack_dump_index[i++] =
+ (top - p) * sizeof(unsigned long);
+ /* Start the search from here */
+ start = p + 1;
+ }
+ }
+
+ i++;
+ }
+
+ out:
+ __raw_spin_unlock(&max_stack_lock);
+ raw_local_irq_restore(flags);
+}
+
+static void
+stack_trace_call(unsigned long ip, unsigned long parent_ip)
+{
+ int cpu, resched;
+
+ if (unlikely(!ftrace_enabled || stack_trace_disabled))
+ return;
+
+ resched = need_resched();
+ preempt_disable_notrace();
+
+ cpu = raw_smp_processor_id();
+ /* no atomic needed, we only modify this variable by this cpu */
+ if (per_cpu(trace_active, cpu)++ != 0)
+ goto out;
+
+ check_stack();
+
+ out:
+ per_cpu(trace_active, cpu)--;
+ /* prevent recursion in schedule */
+ if (resched)
+ preempt_enable_no_resched_notrace();
+ else
+ preempt_enable_notrace();
+}
+
+static struct ftrace_ops trace_ops __read_mostly =
+{
+ .func = stack_trace_call,
+};
+
+static ssize_t
+stack_max_size_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ unsigned long *ptr = filp->private_data;
+ char buf[64];
+ int r;
+
+ r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
+ if (r > sizeof(buf))
+ r = sizeof(buf);
+ return simple_read_from_buffer(ubuf, count, ppos, buf, r);
+}
+
+static ssize_t
+stack_max_size_write(struct file *filp, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ long *ptr = filp->private_data;
+ unsigned long val, flags;
+ char buf[64];
+ int ret;
+
+ if (count >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, count))
+ return -EFAULT;
+
+ buf[count] = 0;
+
+ ret = strict_strtoul(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ raw_local_irq_save(flags);
+ __raw_spin_lock(&max_stack_lock);
+ *ptr = val;
+ __raw_spin_unlock(&max_stack_lock);
+ raw_local_irq_restore(flags);
+
+ return count;
+}
+
+static struct file_operations stack_max_size_fops = {
+ .open = tracing_open_generic,
+ .read = stack_max_size_read,
+ .write = stack_max_size_write,
+};
+
+static void *
+t_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ long i;
+
+ (*pos)++;
+
+ if (v == SEQ_START_TOKEN)
+ i = 0;
+ else {
+ i = *(long *)v;
+ i++;
+ }
+
+ if (i >= max_stack_trace.nr_entries ||
+ stack_dump_trace[i] == ULONG_MAX)
+ return NULL;
+
+ m->private = (void *)i;
+
+ return &m->private;
+}
+
+static void *t_start(struct seq_file *m, loff_t *pos)
+{
+ void *t = SEQ_START_TOKEN;
+ loff_t l = 0;
+
+ local_irq_disable();
+ __raw_spin_lock(&max_stack_lock);
+
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+
+ for (; t && l < *pos; t = t_next(m, t, &l))
+ ;
+
+ return t;
+}
+
+static void t_stop(struct seq_file *m, void *p)
+{
+ __raw_spin_unlock(&max_stack_lock);
+ local_irq_enable();
+}
+
+static int trace_lookup_stack(struct seq_file *m, long i)
+{
+ unsigned long addr = stack_dump_trace[i];
+#ifdef CONFIG_KALLSYMS
+ char str[KSYM_SYMBOL_LEN];
+
+ sprint_symbol(str, addr);
+
+ return seq_printf(m, "%s\n", str);
+#else
+ return seq_printf(m, "%p\n", (void*)addr);
+#endif
+}
+
+static int t_show(struct seq_file *m, void *v)
+{
+ long i;
+ int size;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(m, " Depth Size Location"
+ " (%d entries)\n"
+ " ----- ---- --------\n",
+ max_stack_trace.nr_entries);
+ return 0;
+ }
+
+ i = *(long *)v;
+
+ if (i >= max_stack_trace.nr_entries ||
+ stack_dump_trace[i] == ULONG_MAX)
+ return 0;
+
+ if (i+1 == max_stack_trace.nr_entries ||
+ stack_dump_trace[i+1] == ULONG_MAX)
+ size = stack_dump_index[i];
+ else
+ size = stack_dump_index[i] - stack_dump_index[i+1];
+
+ seq_printf(m, "%3ld) %8d %5d ", i, stack_dump_index[i], size);
+
+ trace_lookup_stack(m, i);
+
+ return 0;
+}
+
+static struct seq_operations stack_trace_seq_ops = {
+ .start = t_start,
+ .next = t_next,
+ .stop = t_stop,
+ .show = t_show,
+};
+
+static int stack_trace_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ ret = seq_open(file, &stack_trace_seq_ops);
+
+ return ret;
+}
+
+static struct file_operations stack_trace_fops = {
+ .open = stack_trace_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+};
+
+static __init int stack_trace_init(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+
+ entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
+ &max_stack_size, &stack_max_size_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'stack_max_size' entry\n");
+
+ entry = debugfs_create_file("stack_trace", 0444, d_tracer,
+ NULL, &stack_trace_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'stack_trace' entry\n");
+
+ register_ftrace_function(&trace_ops);
+
+ return 0;
+}
+
+device_initcall(stack_trace_init);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
new file mode 100644
index 0000000..9587d3b
--- /dev/null
+++ b/kernel/trace/trace_sysprof.c
@@ -0,0 +1,363 @@
+/*
+ * trace stack traces
+ *
+ * Copyright (C) 2004-2008, Soeren Sandmann
+ * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
+ */
+#include <linux/kallsyms.h>
+#include <linux/debugfs.h>
+#include <linux/hrtimer.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/irq.h>
+#include <linux/fs.h>
+
+#include <asm/stacktrace.h>
+
+#include "trace.h"
+
+static struct trace_array *sysprof_trace;
+static int __read_mostly tracer_enabled;
+
+/*
+ * 1 msec sample interval by default:
+ */
+static unsigned long sample_period = 1000000;
+static const unsigned int sample_max_depth = 512;
+
+static DEFINE_MUTEX(sample_timer_lock);
+/*
+ * Per CPU hrtimers that do the profiling:
+ */
+static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer);
+
+struct stack_frame {
+ const void __user *next_fp;
+ unsigned long return_address;
+};
+
+static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+{
+ int ret;
+
+ if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
+ return 0;
+
+ ret = 1;
+ pagefault_disable();
+ if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
+ ret = 0;
+ pagefault_enable();
+
+ return ret;
+}
+
+struct backtrace_info {
+ struct trace_array_cpu *data;
+ struct trace_array *tr;
+ int pos;
+};
+
+static void
+backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
+{
+ /* Ignore warnings */
+}
+
+static void backtrace_warning(void *data, char *msg)
+{
+ /* Ignore warnings */
+}
+
+static int backtrace_stack(void *data, char *name)
+{
+ /* Don't bother with IRQ stacks for now */
+ return -1;
+}
+
+static void backtrace_address(void *data, unsigned long addr, int reliable)
+{
+ struct backtrace_info *info = data;
+
+ if (info->pos < sample_max_depth && reliable) {
+ __trace_special(info->tr, info->data, 1, addr, 0);
+
+ info->pos++;
+ }
+}
+
+const static struct stacktrace_ops backtrace_ops = {
+ .warning = backtrace_warning,
+ .warning_symbol = backtrace_warning_symbol,
+ .stack = backtrace_stack,
+ .address = backtrace_address,
+};
+
+static int
+trace_kernel(struct pt_regs *regs, struct trace_array *tr,
+ struct trace_array_cpu *data)
+{
+ struct backtrace_info info;
+ unsigned long bp;
+ char *stack;
+
+ info.tr = tr;
+ info.data = data;
+ info.pos = 1;
+
+ __trace_special(info.tr, info.data, 1, regs->ip, 0);
+
+ stack = ((char *)regs + sizeof(struct pt_regs));
+#ifdef CONFIG_FRAME_POINTER
+ bp = regs->bp;
+#else
+ bp = 0;
+#endif
+
+ dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info);
+
+ return info.pos;
+}
+
+static void timer_notify(struct pt_regs *regs, int cpu)
+{
+ struct trace_array_cpu *data;
+ struct stack_frame frame;
+ struct trace_array *tr;
+ const void __user *fp;
+ int is_user;
+ int i;
+
+ if (!regs)
+ return;
+
+ tr = sysprof_trace;
+ data = tr->data[cpu];
+ is_user = user_mode(regs);
+
+ if (!current || current->pid == 0)
+ return;
+
+ if (is_user && current->state != TASK_RUNNING)
+ return;
+
+ __trace_special(tr, data, 0, 0, current->pid);
+
+ if (!is_user)
+ i = trace_kernel(regs, tr, data);
+ else
+ i = 0;
+
+ /*
+ * Trace user stack if we are not a kernel thread
+ */
+ if (current->mm && i < sample_max_depth) {
+ regs = (struct pt_regs *)current->thread.sp0 - 1;
+
+ fp = (void __user *)regs->bp;
+
+ __trace_special(tr, data, 2, regs->ip, 0);
+
+ while (i < sample_max_depth) {
+ frame.next_fp = NULL;
+ frame.return_address = 0;
+ if (!copy_stack_frame(fp, &frame))
+ break;
+ if ((unsigned long)fp < regs->sp)
+ break;
+
+ __trace_special(tr, data, 2, frame.return_address,
+ (unsigned long)fp);
+ fp = frame.next_fp;
+
+ i++;
+ }
+
+ }
+
+ /*
+ * Special trace entry if we overflow the max depth:
+ */
+ if (i == sample_max_depth)
+ __trace_special(tr, data, -1, -1, -1);
+
+ __trace_special(tr, data, 3, current->pid, i);
+}
+
+static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
+{
+ /* trace here */
+ timer_notify(get_irq_regs(), smp_processor_id());
+
+ hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
+
+ return HRTIMER_RESTART;
+}
+
+static void start_stack_timer(int cpu)
+{
+ struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
+
+ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer->function = stack_trace_timer_fn;
+ hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
+
+ hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
+}
+
+static void start_stack_timers(void)
+{
+ cpumask_t saved_mask = current->cpus_allowed;
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+ start_stack_timer(cpu);
+ }
+ set_cpus_allowed_ptr(current, &saved_mask);
+}
+
+static void stop_stack_timer(int cpu)
+{
+ struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
+
+ hrtimer_cancel(hrtimer);
+}
+
+static void stop_stack_timers(void)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ stop_stack_timer(cpu);
+}
+
+static void stack_reset(struct trace_array *tr)
+{
+ int cpu;
+
+ tr->time_start = ftrace_now(tr->cpu);
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+}
+
+static void start_stack_trace(struct trace_array *tr)
+{
+ mutex_lock(&sample_timer_lock);
+ stack_reset(tr);
+ start_stack_timers();
+ tracer_enabled = 1;
+ mutex_unlock(&sample_timer_lock);
+}
+
+static void stop_stack_trace(struct trace_array *tr)
+{
+ mutex_lock(&sample_timer_lock);
+ stop_stack_timers();
+ tracer_enabled = 0;
+ mutex_unlock(&sample_timer_lock);
+}
+
+static void stack_trace_init(struct trace_array *tr)
+{
+ sysprof_trace = tr;
+
+ if (tr->ctrl)
+ start_stack_trace(tr);
+}
+
+static void stack_trace_reset(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ stop_stack_trace(tr);
+}
+
+static void stack_trace_ctrl_update(struct trace_array *tr)
+{
+ /* When starting a new trace, reset the buffers */
+ if (tr->ctrl)
+ start_stack_trace(tr);
+ else
+ stop_stack_trace(tr);
+}
+
+static struct tracer stack_trace __read_mostly =
+{
+ .name = "sysprof",
+ .init = stack_trace_init,
+ .reset = stack_trace_reset,
+ .ctrl_update = stack_trace_ctrl_update,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_sysprof,
+#endif
+};
+
+__init static int init_stack_trace(void)
+{
+ return register_tracer(&stack_trace);
+}
+device_initcall(init_stack_trace);
+
+#define MAX_LONG_DIGITS 22
+
+static ssize_t
+sysprof_sample_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[MAX_LONG_DIGITS];
+ int r;
+
+ r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period));
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+sysprof_sample_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[MAX_LONG_DIGITS];
+ unsigned long val;
+
+ if (cnt > MAX_LONG_DIGITS-1)
+ cnt = MAX_LONG_DIGITS-1;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ val = simple_strtoul(buf, NULL, 10);
+ /*
+ * Enforce a minimum sample period of 100 usecs:
+ */
+ if (val < 100)
+ val = 100;
+
+ mutex_lock(&sample_timer_lock);
+ stop_stack_timers();
+ sample_period = val * 1000;
+ start_stack_timers();
+ mutex_unlock(&sample_timer_lock);
+
+ return cnt;
+}
+
+static struct file_operations sysprof_sample_fops = {
+ .read = sysprof_sample_read,
+ .write = sysprof_sample_write,
+};
+
+void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
+{
+ struct dentry *entry;
+
+ entry = debugfs_create_file("sysprof_sample_period", 0644,
+ d_tracer, NULL, &sysprof_sample_fops);
+ if (entry)
+ return;
+ pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n");
+}
OpenPOWER on IntegriCloud