summaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig194
-rw-r--r--kernel/trace/Makefile19
-rw-r--r--kernel/trace/ftrace.c1541
-rw-r--r--kernel/trace/ring_buffer.c2517
-rw-r--r--kernel/trace/trace.c2721
-rw-r--r--kernel/trace/trace.h470
-rw-r--r--kernel/trace/trace_boot.c186
-rw-r--r--kernel/trace/trace_branch.c342
-rw-r--r--kernel/trace/trace_functions.c32
-rw-r--r--kernel/trace/trace_functions_graph.c669
-rw-r--r--kernel/trace/trace_hw_branches.c195
-rw-r--r--kernel/trace/trace_irqsoff.c84
-rw-r--r--kernel/trace/trace_mmiotrace.c153
-rw-r--r--kernel/trace/trace_nop.c105
-rw-r--r--kernel/trace/trace_power.c179
-rw-r--r--kernel/trace/trace_sched_switch.c252
-rw-r--r--kernel/trace/trace_sched_wakeup.c220
-rw-r--r--kernel/trace/trace_selftest.c274
-rw-r--r--kernel/trace/trace_stack.c360
-rw-r--r--kernel/trace/trace_sysprof.c32
20 files changed, 8366 insertions, 2179 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 263e9e6..e2a4ff6 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,23 +1,56 @@
#
-# Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
+# Architectures that offer an FUNCTION_TRACER implementation should
+# select HAVE_FUNCTION_TRACER:
#
-config HAVE_FTRACE
+
+config USER_STACKTRACE_SUPPORT
+ bool
+
+config NOP_TRACER
+ bool
+
+config HAVE_FUNCTION_TRACER
+ bool
+
+config HAVE_FUNCTION_GRAPH_TRACER
+ bool
+
+config HAVE_FUNCTION_TRACE_MCOUNT_TEST
bool
+ help
+ This gets selected when the arch tests the function_trace_stop
+ variable at the mcount call site. Otherwise, this variable
+ is tested by the called function.
config HAVE_DYNAMIC_FTRACE
bool
+config HAVE_FTRACE_MCOUNT_RECORD
+ bool
+
+config HAVE_HW_BRANCH_TRACER
+ bool
+
config TRACER_MAX_TRACE
bool
+config RING_BUFFER
+ bool
+
config TRACING
bool
select DEBUG_FS
- select STACKTRACE
+ select RING_BUFFER
+ select STACKTRACE if STACKTRACE_SUPPORT
+ select TRACEPOINTS
+ select NOP_TRACER
+
+menu "Tracers"
-config FTRACE
+config FUNCTION_TRACER
bool "Kernel Function Tracer"
- depends on HAVE_FTRACE
+ depends on HAVE_FUNCTION_TRACER
+ depends on DEBUG_KERNEL
select FRAME_POINTER
select TRACING
select CONTEXT_SWITCH_TRACER
@@ -30,12 +63,26 @@ config FTRACE
(the bootup default), then the overhead of the instructions is very
small and not measurable even in micro-benchmarks.
+config FUNCTION_GRAPH_TRACER
+ bool "Kernel Function Graph Tracer"
+ depends on HAVE_FUNCTION_GRAPH_TRACER
+ depends on FUNCTION_TRACER
+ default y
+ help
+ Enable the kernel to trace a function at both its return
+ and its entry.
+ It's first purpose is to trace the duration of functions and
+ draw a call graph for each thread with some informations like
+ the return value.
+ This is done by setting the current return address on the current
+ task structure into a stack of calls.
+
config IRQSOFF_TRACER
bool "Interrupts-off Latency Tracer"
default n
depends on TRACE_IRQFLAGS_SUPPORT
depends on GENERIC_TIME
- depends on HAVE_FTRACE
+ depends on DEBUG_KERNEL
select TRACE_IRQFLAGS
select TRACING
select TRACER_MAX_TRACE
@@ -58,7 +105,7 @@ config PREEMPT_TRACER
default n
depends on GENERIC_TIME
depends on PREEMPT
- depends on HAVE_FTRACE
+ depends on DEBUG_KERNEL
select TRACING
select TRACER_MAX_TRACE
help
@@ -85,7 +132,7 @@ config SYSPROF_TRACER
config SCHED_TRACER
bool "Scheduling Latency Tracer"
- depends on HAVE_FTRACE
+ depends on DEBUG_KERNEL
select TRACING
select CONTEXT_SWITCH_TRACER
select TRACER_MAX_TRACE
@@ -95,17 +142,133 @@ config SCHED_TRACER
config CONTEXT_SWITCH_TRACER
bool "Trace process context switches"
- depends on HAVE_FTRACE
+ depends on DEBUG_KERNEL
select TRACING
select MARKERS
help
This tracer gets called from the context switch and records
all switching of tasks.
+config BOOT_TRACER
+ bool "Trace boot initcalls"
+ depends on DEBUG_KERNEL
+ select TRACING
+ select CONTEXT_SWITCH_TRACER
+ help
+ This tracer helps developers to optimize boot times: it records
+ the timings of the initcalls and traces key events and the identity
+ of tasks that can cause boot delays, such as context-switches.
+
+ Its aim is to be parsed by the /scripts/bootgraph.pl tool to
+ produce pretty graphics about boot inefficiencies, giving a visual
+ representation of the delays during initcalls - but the raw
+ /debug/tracing/trace text output is readable too.
+
+ ( Note that tracing self tests can't be enabled if this tracer is
+ selected, because the self-tests are an initcall as well and that
+ would invalidate the boot trace. )
+
+config TRACE_BRANCH_PROFILING
+ bool "Trace likely/unlikely profiler"
+ depends on DEBUG_KERNEL
+ select TRACING
+ help
+ This tracer profiles all the the likely and unlikely macros
+ in the kernel. It will display the results in:
+
+ /debugfs/tracing/profile_annotated_branch
+
+ Note: this will add a significant overhead, only turn this
+ on if you need to profile the system's use of these macros.
+
+ Say N if unsure.
+
+config PROFILE_ALL_BRANCHES
+ bool "Profile all if conditionals"
+ depends on TRACE_BRANCH_PROFILING
+ help
+ This tracer profiles all branch conditions. Every if ()
+ taken in the kernel is recorded whether it hit or miss.
+ The results will be displayed in:
+
+ /debugfs/tracing/profile_branch
+
+ This configuration, when enabled, will impose a great overhead
+ on the system. This should only be enabled when the system
+ is to be analyzed
+
+ Say N if unsure.
+
+config TRACING_BRANCHES
+ bool
+ help
+ Selected by tracers that will trace the likely and unlikely
+ conditions. This prevents the tracers themselves from being
+ profiled. Profiling the tracing infrastructure can only happen
+ when the likelys and unlikelys are not being traced.
+
+config BRANCH_TRACER
+ bool "Trace likely/unlikely instances"
+ depends on TRACE_BRANCH_PROFILING
+ select TRACING_BRANCHES
+ help
+ This traces the events of likely and unlikely condition
+ calls in the kernel. The difference between this and the
+ "Trace likely/unlikely profiler" is that this is not a
+ histogram of the callers, but actually places the calling
+ events into a running trace buffer to see when and where the
+ events happened, as well as their results.
+
+ Say N if unsure.
+
+config POWER_TRACER
+ bool "Trace power consumption behavior"
+ depends on DEBUG_KERNEL
+ depends on X86
+ select TRACING
+ help
+ This tracer helps developers to analyze and optimize the kernels
+ power management decisions, specifically the C-state and P-state
+ behavior.
+
+
+config STACK_TRACER
+ bool "Trace max stack"
+ depends on HAVE_FUNCTION_TRACER
+ depends on DEBUG_KERNEL
+ select FUNCTION_TRACER
+ select STACKTRACE
+ help
+ This special tracer records the maximum stack footprint of the
+ kernel and displays it in debugfs/tracing/stack_trace.
+
+ This tracer works by hooking into every function call that the
+ kernel executes, and keeping a maximum stack depth value and
+ stack-trace saved. If this is configured with DYNAMIC_FTRACE
+ then it will not have any overhead while the stack tracer
+ is disabled.
+
+ To enable the stack tracer on bootup, pass in 'stacktrace'
+ on the kernel command line.
+
+ The stack tracer can also be enabled or disabled via the
+ sysctl kernel.stack_tracer_enabled
+
+ Say N if unsure.
+
+config HW_BRANCH_TRACER
+ depends on HAVE_HW_BRANCH_TRACER
+ bool "Trace hw branches"
+ select TRACING
+ help
+ This tracer records all branches on the system in a circular
+ buffer giving access to the last N branches for each cpu.
+
config DYNAMIC_FTRACE
bool "enable/disable ftrace tracepoints dynamically"
- depends on FTRACE
+ depends on FUNCTION_TRACER
depends on HAVE_DYNAMIC_FTRACE
+ depends on DEBUG_KERNEL
default y
help
This option will modify all the calls to ftrace dynamically
@@ -113,7 +276,7 @@ config DYNAMIC_FTRACE
with a No-Op instruction) as they are called. A table is
created to dynamically enable them again.
- This way a CONFIG_FTRACE kernel is slightly larger, but otherwise
+ This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise
has native performance as long as no tracing is active.
The changes to the code are done by a kernel thread that
@@ -121,15 +284,22 @@ config DYNAMIC_FTRACE
were made. If so, it runs stop_machine (stops all CPUS)
and modifies the code to jump over the call to ftrace.
+config FTRACE_MCOUNT_RECORD
+ def_bool y
+ depends on DYNAMIC_FTRACE
+ depends on HAVE_FTRACE_MCOUNT_RECORD
+
config FTRACE_SELFTEST
bool
config FTRACE_STARTUP_TEST
bool "Perform a startup test on ftrace"
- depends on TRACING
+ depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER
select FTRACE_SELFTEST
help
This option performs a series of startup tests on ftrace. On bootup
a series of tests are made to verify that the tracer is
functioning properly. It will do tests on all the configured
tracers of ftrace.
+
+endmenu
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 71d17de..349d5a9 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -1,7 +1,7 @@
# Do not instrument the tracer itself:
-ifdef CONFIG_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
ORIG_CFLAGS := $(KBUILD_CFLAGS)
KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
@@ -10,15 +10,28 @@ CFLAGS_trace_selftest_dynamic.o = -pg
obj-y += trace_selftest_dynamic.o
endif
-obj-$(CONFIG_FTRACE) += libftrace.o
+# If unlikely tracing is enabled, do not trace these files
+ifdef CONFIG_TRACING_BRANCHES
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+endif
+
+obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
+obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
obj-$(CONFIG_TRACING) += trace.o
obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
-obj-$(CONFIG_FTRACE) += trace_functions.o
+obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
+obj-$(CONFIG_NOP_TRACER) += trace_nop.o
+obj-$(CONFIG_STACK_TRACER) += trace_stack.o
obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
+obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
+obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
+obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
+obj-$(CONFIG_POWER_TRACER) += trace_power.o
libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f6e3af3..2f32969 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -25,17 +25,35 @@
#include <linux/ftrace.h>
#include <linux/sysctl.h>
#include <linux/ctype.h>
-#include <linux/hash.h>
#include <linux/list.h>
#include <asm/ftrace.h>
#include "trace.h"
+#define FTRACE_WARN_ON(cond) \
+ do { \
+ if (WARN_ON(cond)) \
+ ftrace_kill(); \
+ } while (0)
+
+#define FTRACE_WARN_ON_ONCE(cond) \
+ do { \
+ if (WARN_ON_ONCE(cond)) \
+ ftrace_kill(); \
+ } while (0)
+
/* ftrace_enabled is a method to turn ftrace on or off */
int ftrace_enabled __read_mostly;
static int last_ftrace_enabled;
+/* set when tracing only a pid */
+struct pid *ftrace_pid_trace;
+static struct pid * const ftrace_swapper_pid = &init_struct_pid;
+
+/* Quick disabling of function tracer. */
+int function_trace_stop;
+
/*
* ftrace_disabled is set when an anomaly is discovered.
* ftrace_disabled is much stronger than ftrace_enabled.
@@ -44,6 +62,7 @@ static int ftrace_disabled __read_mostly;
static DEFINE_SPINLOCK(ftrace_lock);
static DEFINE_MUTEX(ftrace_sysctl_lock);
+static DEFINE_MUTEX(ftrace_start_lock);
static struct ftrace_ops ftrace_list_end __read_mostly =
{
@@ -52,6 +71,8 @@ static struct ftrace_ops ftrace_list_end __read_mostly =
static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
+ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
+ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
{
@@ -68,6 +89,21 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
};
}
+static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip)
+{
+ if (!test_tsk_trace_trace(current))
+ return;
+
+ ftrace_pid_function(ip, parent_ip);
+}
+
+static void set_ftrace_pid_function(ftrace_func_t func)
+{
+ /* do not set ftrace_pid_function to itself! */
+ if (func != ftrace_pid_func)
+ ftrace_pid_function = func;
+}
+
/**
* clear_ftrace_function - reset the ftrace function
*
@@ -77,11 +113,27 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
void clear_ftrace_function(void)
{
ftrace_trace_function = ftrace_stub;
+ __ftrace_trace_function = ftrace_stub;
+ ftrace_pid_function = ftrace_stub;
+}
+
+#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+/*
+ * For those archs that do not test ftrace_trace_stop in their
+ * mcount call site, we need to do it from C.
+ */
+static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
+{
+ if (function_trace_stop)
+ return;
+
+ __ftrace_trace_function(ip, parent_ip);
}
+#endif
static int __register_ftrace_function(struct ftrace_ops *ops)
{
- /* Should never be called by interrupts */
+ /* should not be called from interrupt context */
spin_lock(&ftrace_lock);
ops->next = ftrace_list;
@@ -95,14 +147,28 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
ftrace_list = ops;
if (ftrace_enabled) {
+ ftrace_func_t func;
+
+ if (ops->next == &ftrace_list_end)
+ func = ops->func;
+ else
+ func = ftrace_list_func;
+
+ if (ftrace_pid_trace) {
+ set_ftrace_pid_function(func);
+ func = ftrace_pid_func;
+ }
+
/*
* For one func, simply call it directly.
* For more than one func, call the chain.
*/
- if (ops->next == &ftrace_list_end)
- ftrace_trace_function = ops->func;
- else
- ftrace_trace_function = ftrace_list_func;
+#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ ftrace_trace_function = func;
+#else
+ __ftrace_trace_function = func;
+ ftrace_trace_function = ftrace_test_stop_func;
+#endif
}
spin_unlock(&ftrace_lock);
@@ -115,6 +181,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
struct ftrace_ops **p;
int ret = 0;
+ /* should not be called from interrupt context */
spin_lock(&ftrace_lock);
/*
@@ -140,9 +207,19 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
if (ftrace_enabled) {
/* If we only have one func left, then call that directly */
- if (ftrace_list == &ftrace_list_end ||
- ftrace_list->next == &ftrace_list_end)
- ftrace_trace_function = ftrace_list->func;
+ if (ftrace_list->next == &ftrace_list_end) {
+ ftrace_func_t func = ftrace_list->func;
+
+ if (ftrace_pid_trace) {
+ set_ftrace_pid_function(func);
+ func = ftrace_pid_func;
+ }
+#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ ftrace_trace_function = func;
+#else
+ __ftrace_trace_function = func;
+#endif
+ }
}
out:
@@ -151,9 +228,48 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
return ret;
}
+static void ftrace_update_pid_func(void)
+{
+ ftrace_func_t func;
+
+ /* should not be called from interrupt context */
+ spin_lock(&ftrace_lock);
+
+ if (ftrace_trace_function == ftrace_stub)
+ goto out;
+
+ func = ftrace_trace_function;
+
+ if (ftrace_pid_trace) {
+ set_ftrace_pid_function(func);
+ func = ftrace_pid_func;
+ } else {
+ if (func == ftrace_pid_func)
+ func = ftrace_pid_function;
+ }
+
+#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ ftrace_trace_function = func;
+#else
+ __ftrace_trace_function = func;
+#endif
+
+ out:
+ spin_unlock(&ftrace_lock);
+}
+
#ifdef CONFIG_DYNAMIC_FTRACE
+#ifndef CONFIG_FTRACE_MCOUNT_RECORD
+# error Dynamic ftrace depends on MCOUNT_RECORD
+#endif
-static struct task_struct *ftraced_task;
+/*
+ * Since MCOUNT_ADDR may point to mcount itself, we do not want
+ * to get it confused by reading a reference in the code as we
+ * are parsing on objcopy output of text. Use a variable for
+ * it instead.
+ */
+static unsigned long mcount_addr = MCOUNT_ADDR;
enum {
FTRACE_ENABLE_CALLS = (1 << 0),
@@ -161,18 +277,14 @@ enum {
FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
FTRACE_ENABLE_MCOUNT = (1 << 3),
FTRACE_DISABLE_MCOUNT = (1 << 4),
+ FTRACE_START_FUNC_RET = (1 << 5),
+ FTRACE_STOP_FUNC_RET = (1 << 6),
};
static int ftrace_filtered;
-static int tracing_on;
-static int frozen_record_count;
-
-static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
-static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
+static LIST_HEAD(ftrace_new_addrs);
-static DEFINE_SPINLOCK(ftrace_shutdown_lock);
-static DEFINE_MUTEX(ftraced_lock);
static DEFINE_MUTEX(ftrace_regex_lock);
struct ftrace_page {
@@ -190,16 +302,13 @@ struct ftrace_page {
static struct ftrace_page *ftrace_pages_start;
static struct ftrace_page *ftrace_pages;
-static int ftraced_trigger;
-static int ftraced_suspend;
-static int ftraced_stop;
-
-static int ftrace_record_suspend;
-
static struct dyn_ftrace *ftrace_free_records;
#ifdef CONFIG_KPROBES
+
+static int frozen_record_count;
+
static inline void freeze_record(struct dyn_ftrace *rec)
{
if (!(rec->flags & FTRACE_FL_FROZEN)) {
@@ -226,79 +335,36 @@ static inline int record_frozen(struct dyn_ftrace *rec)
# define record_frozen(rec) ({ 0; })
#endif /* CONFIG_KPROBES */
-int skip_trace(unsigned long ip)
+static void ftrace_free_rec(struct dyn_ftrace *rec)
{
- unsigned long fl;
- struct dyn_ftrace *rec;
- struct hlist_node *t;
- struct hlist_head *head;
-
- if (frozen_record_count == 0)
- return 0;
-
- head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)];
- hlist_for_each_entry_rcu(rec, t, head, node) {
- if (rec->ip == ip) {
- if (record_frozen(rec)) {
- if (rec->flags & FTRACE_FL_FAILED)
- return 1;
-
- if (!(rec->flags & FTRACE_FL_CONVERTED))
- return 1;
-
- if (!tracing_on || !ftrace_enabled)
- return 1;
-
- if (ftrace_filtered) {
- fl = rec->flags & (FTRACE_FL_FILTER |
- FTRACE_FL_NOTRACE);
- if (!fl || (fl & FTRACE_FL_NOTRACE))
- return 1;
- }
- }
- break;
- }
- }
-
- return 0;
+ rec->ip = (unsigned long)ftrace_free_records;
+ ftrace_free_records = rec;
+ rec->flags |= FTRACE_FL_FREE;
}
-static inline int
-ftrace_ip_in_hash(unsigned long ip, unsigned long key)
+void ftrace_release(void *start, unsigned long size)
{
- struct dyn_ftrace *p;
- struct hlist_node *t;
- int found = 0;
-
- hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) {
- if (p->ip == ip) {
- found = 1;
- break;
- }
- }
-
- return found;
-}
+ struct dyn_ftrace *rec;
+ struct ftrace_page *pg;
+ unsigned long s = (unsigned long)start;
+ unsigned long e = s + size;
+ int i;
-static inline void
-ftrace_add_hash(struct dyn_ftrace *node, unsigned long key)
-{
- hlist_add_head_rcu(&node->node, &ftrace_hash[key]);
-}
+ if (ftrace_disabled || !start)
+ return;
-/* called from kstop_machine */
-static inline void ftrace_del_hash(struct dyn_ftrace *node)
-{
- hlist_del(&node->node);
-}
+ /* should not be called from interrupt context */
+ spin_lock(&ftrace_lock);
-static void ftrace_free_rec(struct dyn_ftrace *rec)
-{
- /* no locking, only called from kstop_machine */
+ for (pg = ftrace_pages_start; pg; pg = pg->next) {
+ for (i = 0; i < pg->index; i++) {
+ rec = &pg->records[i];
- rec->ip = (unsigned long)ftrace_free_records;
- ftrace_free_records = rec;
- rec->flags |= FTRACE_FL_FREE;
+ if ((rec->ip >= s) && (rec->ip < e))
+ ftrace_free_rec(rec);
+ }
+ }
+ spin_unlock(&ftrace_lock);
}
static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
@@ -310,10 +376,8 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
rec = ftrace_free_records;
if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
- WARN_ON_ONCE(1);
+ FTRACE_WARN_ON_ONCE(1);
ftrace_free_records = NULL;
- ftrace_disabled = 1;
- ftrace_enabled = 0;
return NULL;
}
@@ -323,182 +387,163 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
}
if (ftrace_pages->index == ENTRIES_PER_PAGE) {
- if (!ftrace_pages->next)
- return NULL;
+ if (!ftrace_pages->next) {
+ /* allocate another page */
+ ftrace_pages->next =
+ (void *)get_zeroed_page(GFP_KERNEL);
+ if (!ftrace_pages->next)
+ return NULL;
+ }
ftrace_pages = ftrace_pages->next;
}
return &ftrace_pages->records[ftrace_pages->index++];
}
-static void
+static struct dyn_ftrace *
ftrace_record_ip(unsigned long ip)
{
- struct dyn_ftrace *node;
- unsigned long flags;
- unsigned long key;
- int resched;
- int atomic;
- int cpu;
-
- if (!ftrace_enabled || ftrace_disabled)
- return;
-
- resched = need_resched();
- preempt_disable_notrace();
-
- /*
- * We simply need to protect against recursion.
- * Use the the raw version of smp_processor_id and not
- * __get_cpu_var which can call debug hooks that can
- * cause a recursive crash here.
- */
- cpu = raw_smp_processor_id();
- per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
- if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1)
- goto out;
-
- if (unlikely(ftrace_record_suspend))
- goto out;
-
- key = hash_long(ip, FTRACE_HASHBITS);
-
- WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
-
- if (ftrace_ip_in_hash(ip, key))
- goto out;
-
- atomic = irqs_disabled();
-
- spin_lock_irqsave(&ftrace_shutdown_lock, flags);
+ struct dyn_ftrace *rec;
- /* This ip may have hit the hash before the lock */
- if (ftrace_ip_in_hash(ip, key))
- goto out_unlock;
+ if (ftrace_disabled)
+ return NULL;
- node = ftrace_alloc_dyn_node(ip);
- if (!node)
- goto out_unlock;
+ rec = ftrace_alloc_dyn_node(ip);
+ if (!rec)
+ return NULL;
- node->ip = ip;
+ rec->ip = ip;
- ftrace_add_hash(node, key);
+ list_add(&rec->list, &ftrace_new_addrs);
- ftraced_trigger = 1;
+ return rec;
+}
- out_unlock:
- spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
- out:
- per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
+static void print_ip_ins(const char *fmt, unsigned char *p)
+{
+ int i;
- /* prevent recursion with scheduler */
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ printk(KERN_CONT "%s", fmt);
+
+ for (i = 0; i < MCOUNT_INSN_SIZE; i++)
+ printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
+}
+
+static void ftrace_bug(int failed, unsigned long ip)
+{
+ switch (failed) {
+ case -EFAULT:
+ FTRACE_WARN_ON_ONCE(1);
+ pr_info("ftrace faulted on modifying ");
+ print_ip_sym(ip);
+ break;
+ case -EINVAL:
+ FTRACE_WARN_ON_ONCE(1);
+ pr_info("ftrace failed to modify ");
+ print_ip_sym(ip);
+ print_ip_ins(" actual: ", (unsigned char *)ip);
+ printk(KERN_CONT "\n");
+ break;
+ case -EPERM:
+ FTRACE_WARN_ON_ONCE(1);
+ pr_info("ftrace faulted on writing ");
+ print_ip_sym(ip);
+ break;
+ default:
+ FTRACE_WARN_ON_ONCE(1);
+ pr_info("ftrace faulted on unknown error ");
+ print_ip_sym(ip);
+ }
}
-#define FTRACE_ADDR ((long)(ftrace_caller))
static int
-__ftrace_replace_code(struct dyn_ftrace *rec,
- unsigned char *old, unsigned char *new, int enable)
+__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
{
unsigned long ip, fl;
+ unsigned long ftrace_addr;
+
+ ftrace_addr = (unsigned long)ftrace_caller;
ip = rec->ip;
- if (ftrace_filtered && enable) {
+ /*
+ * If this record is not to be traced and
+ * it is not enabled then do nothing.
+ *
+ * If this record is not to be traced and
+ * it is enabled then disabled it.
+ *
+ */
+ if (rec->flags & FTRACE_FL_NOTRACE) {
+ if (rec->flags & FTRACE_FL_ENABLED)
+ rec->flags &= ~FTRACE_FL_ENABLED;
+ else
+ return 0;
+
+ } else if (ftrace_filtered && enable) {
/*
- * If filtering is on:
- *
- * If this record is set to be filtered and
- * is enabled then do nothing.
- *
- * If this record is set to be filtered and
- * it is not enabled, enable it.
- *
- * If this record is not set to be filtered
- * and it is not enabled do nothing.
- *
- * If this record is set not to trace then
- * do nothing.
- *
- * If this record is set not to trace and
- * it is enabled then disable it.
- *
- * If this record is not set to be filtered and
- * it is enabled, disable it.
+ * Filtering is on:
*/
- fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE |
- FTRACE_FL_ENABLED);
+ fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
- if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) ||
- (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) ||
- !fl || (fl == FTRACE_FL_NOTRACE))
+ /* Record is filtered and enabled, do nothing */
+ if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
return 0;
- /*
- * If it is enabled disable it,
- * otherwise enable it!
- */
- if (fl & FTRACE_FL_ENABLED) {
- /* swap new and old */
- new = old;
- old = ftrace_call_replace(ip, FTRACE_ADDR);
+ /* Record is not filtered and is not enabled do nothing */
+ if (!fl)
+ return 0;
+
+ /* Record is not filtered but enabled, disable it */
+ if (fl == FTRACE_FL_ENABLED)
rec->flags &= ~FTRACE_FL_ENABLED;
- } else {
- new = ftrace_call_replace(ip, FTRACE_ADDR);
+ else
+ /* Otherwise record is filtered but not enabled, enable it */
rec->flags |= FTRACE_FL_ENABLED;
- }
} else {
+ /* Disable or not filtered */
if (enable) {
- /*
- * If this record is set not to trace and is
- * not enabled, do nothing.
- */
- fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
- if (fl == FTRACE_FL_NOTRACE)
- return 0;
-
- new = ftrace_call_replace(ip, FTRACE_ADDR);
- } else
- old = ftrace_call_replace(ip, FTRACE_ADDR);
-
- if (enable) {
+ /* if record is enabled, do nothing */
if (rec->flags & FTRACE_FL_ENABLED)
return 0;
+
rec->flags |= FTRACE_FL_ENABLED;
+
} else {
+
+ /* if record is not enabled do nothing */
if (!(rec->flags & FTRACE_FL_ENABLED))
return 0;
+
rec->flags &= ~FTRACE_FL_ENABLED;
}
}
- return ftrace_modify_code(ip, old, new);
+ if (rec->flags & FTRACE_FL_ENABLED)
+ return ftrace_make_call(rec, ftrace_addr);
+ else
+ return ftrace_make_nop(NULL, rec, ftrace_addr);
}
static void ftrace_replace_code(int enable)
{
int i, failed;
- unsigned char *new = NULL, *old = NULL;
struct dyn_ftrace *rec;
struct ftrace_page *pg;
- if (enable)
- old = ftrace_nop_replace();
- else
- new = ftrace_nop_replace();
-
for (pg = ftrace_pages_start; pg; pg = pg->next) {
for (i = 0; i < pg->index; i++) {
rec = &pg->records[i];
- /* don't modify code that has already faulted */
- if (rec->flags & FTRACE_FL_FAILED)
+ /*
+ * Skip over free records and records that have
+ * failed.
+ */
+ if (rec->flags & FTRACE_FL_FREE ||
+ rec->flags & FTRACE_FL_FAILED)
continue;
/* ignore updates to this record's mcount site */
@@ -509,78 +554,52 @@ static void ftrace_replace_code(int enable)
unfreeze_record(rec);
}
- failed = __ftrace_replace_code(rec, old, new, enable);
+ failed = __ftrace_replace_code(rec, enable);
if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
rec->flags |= FTRACE_FL_FAILED;
if ((system_state == SYSTEM_BOOTING) ||
!core_kernel_text(rec->ip)) {
- ftrace_del_hash(rec);
ftrace_free_rec(rec);
- }
+ } else
+ ftrace_bug(failed, rec->ip);
}
}
}
}
-static void ftrace_shutdown_replenish(void)
-{
- if (ftrace_pages->next)
- return;
-
- /* allocate another page */
- ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
-}
-
static int
-ftrace_code_disable(struct dyn_ftrace *rec)
+ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
{
unsigned long ip;
- unsigned char *nop, *call;
- int failed;
+ int ret;
ip = rec->ip;
- nop = ftrace_nop_replace();
- call = ftrace_call_replace(ip, MCOUNT_ADDR);
-
- failed = ftrace_modify_code(ip, call, nop);
- if (failed) {
+ ret = ftrace_make_nop(mod, rec, mcount_addr);
+ if (ret) {
+ ftrace_bug(ret, ip);
rec->flags |= FTRACE_FL_FAILED;
return 0;
}
return 1;
}
-static int __ftrace_update_code(void *ignore);
-
static int __ftrace_modify_code(void *data)
{
- unsigned long addr;
int *command = data;
- if (*command & FTRACE_ENABLE_CALLS) {
- /*
- * Update any recorded ips now that we have the
- * machine stopped
- */
- __ftrace_update_code(NULL);
+ if (*command & FTRACE_ENABLE_CALLS)
ftrace_replace_code(1);
- tracing_on = 1;
- } else if (*command & FTRACE_DISABLE_CALLS) {
+ else if (*command & FTRACE_DISABLE_CALLS)
ftrace_replace_code(0);
- tracing_on = 0;
- }
if (*command & FTRACE_UPDATE_TRACE_FUNC)
ftrace_update_ftrace_func(ftrace_trace_function);
- if (*command & FTRACE_ENABLE_MCOUNT) {
- addr = (unsigned long)ftrace_record_ip;
- ftrace_mcount_set(&addr);
- } else if (*command & FTRACE_DISABLE_MCOUNT) {
- addr = (unsigned long)ftrace_stub;
- ftrace_mcount_set(&addr);
- }
+ if (*command & FTRACE_START_FUNC_RET)
+ ftrace_enable_ftrace_graph_caller();
+ else if (*command & FTRACE_STOP_FUNC_RET)
+ ftrace_disable_ftrace_graph_caller();
return 0;
}
@@ -590,62 +609,44 @@ static void ftrace_run_update_code(int command)
stop_machine(__ftrace_modify_code, &command, NULL);
}
-void ftrace_disable_daemon(void)
-{
- /* Stop the daemon from calling kstop_machine */
- mutex_lock(&ftraced_lock);
- ftraced_stop = 1;
- mutex_unlock(&ftraced_lock);
-
- ftrace_force_update();
-}
-
-void ftrace_enable_daemon(void)
-{
- mutex_lock(&ftraced_lock);
- ftraced_stop = 0;
- mutex_unlock(&ftraced_lock);
-
- ftrace_force_update();
-}
-
static ftrace_func_t saved_ftrace_func;
+static int ftrace_start_up;
-static void ftrace_startup(void)
+static void ftrace_startup_enable(int command)
{
- int command = 0;
-
- if (unlikely(ftrace_disabled))
- return;
-
- mutex_lock(&ftraced_lock);
- ftraced_suspend++;
- if (ftraced_suspend == 1)
- command |= FTRACE_ENABLE_CALLS;
-
if (saved_ftrace_func != ftrace_trace_function) {
saved_ftrace_func = ftrace_trace_function;
command |= FTRACE_UPDATE_TRACE_FUNC;
}
if (!command || !ftrace_enabled)
- goto out;
+ return;
ftrace_run_update_code(command);
- out:
- mutex_unlock(&ftraced_lock);
}
-static void ftrace_shutdown(void)
+static void ftrace_startup(int command)
{
- int command = 0;
+ if (unlikely(ftrace_disabled))
+ return;
+
+ mutex_lock(&ftrace_start_lock);
+ ftrace_start_up++;
+ command |= FTRACE_ENABLE_CALLS;
+ ftrace_startup_enable(command);
+
+ mutex_unlock(&ftrace_start_lock);
+}
+
+static void ftrace_shutdown(int command)
+{
if (unlikely(ftrace_disabled))
return;
- mutex_lock(&ftraced_lock);
- ftraced_suspend--;
- if (!ftraced_suspend)
+ mutex_lock(&ftrace_start_lock);
+ ftrace_start_up--;
+ if (!ftrace_start_up)
command |= FTRACE_DISABLE_CALLS;
if (saved_ftrace_func != ftrace_trace_function) {
@@ -658,7 +659,7 @@ static void ftrace_shutdown(void)
ftrace_run_update_code(command);
out:
- mutex_unlock(&ftraced_lock);
+ mutex_unlock(&ftrace_start_lock);
}
static void ftrace_startup_sysctl(void)
@@ -668,15 +669,15 @@ static void ftrace_startup_sysctl(void)
if (unlikely(ftrace_disabled))
return;
- mutex_lock(&ftraced_lock);
+ mutex_lock(&ftrace_start_lock);
/* Force update next time */
saved_ftrace_func = NULL;
- /* ftraced_suspend is true if we want ftrace running */
- if (ftraced_suspend)
+ /* ftrace_start_up is true if we want ftrace running */
+ if (ftrace_start_up)
command |= FTRACE_ENABLE_CALLS;
ftrace_run_update_code(command);
- mutex_unlock(&ftraced_lock);
+ mutex_unlock(&ftrace_start_lock);
}
static void ftrace_shutdown_sysctl(void)
@@ -686,153 +687,51 @@ static void ftrace_shutdown_sysctl(void)
if (unlikely(ftrace_disabled))
return;
- mutex_lock(&ftraced_lock);
- /* ftraced_suspend is true if ftrace is running */
- if (ftraced_suspend)
+ mutex_lock(&ftrace_start_lock);
+ /* ftrace_start_up is true if ftrace is running */
+ if (ftrace_start_up)
command |= FTRACE_DISABLE_CALLS;
ftrace_run_update_code(command);
- mutex_unlock(&ftraced_lock);
+ mutex_unlock(&ftrace_start_lock);
}
static cycle_t ftrace_update_time;
static unsigned long ftrace_update_cnt;
unsigned long ftrace_update_tot_cnt;
-static int __ftrace_update_code(void *ignore)
+static int ftrace_update_code(struct module *mod)
{
- int i, save_ftrace_enabled;
+ struct dyn_ftrace *p, *t;
cycle_t start, stop;
- struct dyn_ftrace *p;
- struct hlist_node *t, *n;
- struct hlist_head *head, temp_list;
-
- /* Don't be recording funcs now */
- ftrace_record_suspend++;
- save_ftrace_enabled = ftrace_enabled;
- ftrace_enabled = 0;
start = ftrace_now(raw_smp_processor_id());
ftrace_update_cnt = 0;
- /* No locks needed, the machine is stopped! */
- for (i = 0; i < FTRACE_HASHSIZE; i++) {
- INIT_HLIST_HEAD(&temp_list);
- head = &ftrace_hash[i];
+ list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) {
- /* all CPUS are stopped, we are safe to modify code */
- hlist_for_each_entry_safe(p, t, n, head, node) {
- /* Skip over failed records which have not been
- * freed. */
- if (p->flags & FTRACE_FL_FAILED)
- continue;
-
- /* Unconverted records are always at the head of the
- * hash bucket. Once we encounter a converted record,
- * simply skip over to the next bucket. Saves ftraced
- * some processor cycles (ftrace does its bid for
- * global warming :-p ). */
- if (p->flags & (FTRACE_FL_CONVERTED))
- break;
-
- /* Ignore updates to this record's mcount site.
- * Reintroduce this record at the head of this
- * bucket to attempt to "convert" it again if
- * the kprobe on it is unregistered before the
- * next run. */
- if (get_kprobe((void *)p->ip)) {
- ftrace_del_hash(p);
- INIT_HLIST_NODE(&p->node);
- hlist_add_head(&p->node, &temp_list);
- freeze_record(p);
- continue;
- } else {
- unfreeze_record(p);
- }
+ /* If something went wrong, bail without enabling anything */
+ if (unlikely(ftrace_disabled))
+ return -1;
- /* convert record (i.e, patch mcount-call with NOP) */
- if (ftrace_code_disable(p)) {
- p->flags |= FTRACE_FL_CONVERTED;
- ftrace_update_cnt++;
- } else {
- if ((system_state == SYSTEM_BOOTING) ||
- !core_kernel_text(p->ip)) {
- ftrace_del_hash(p);
- ftrace_free_rec(p);
- }
- }
- }
+ list_del_init(&p->list);
- hlist_for_each_entry_safe(p, t, n, &temp_list, node) {
- hlist_del(&p->node);
- INIT_HLIST_NODE(&p->node);
- hlist_add_head(&p->node, head);
- }
+ /* convert record (i.e, patch mcount-call with NOP) */
+ if (ftrace_code_disable(mod, p)) {
+ p->flags |= FTRACE_FL_CONVERTED;
+ ftrace_update_cnt++;
+ } else
+ ftrace_free_rec(p);
}
stop = ftrace_now(raw_smp_processor_id());
ftrace_update_time = stop - start;
ftrace_update_tot_cnt += ftrace_update_cnt;
- ftraced_trigger = 0;
-
- ftrace_enabled = save_ftrace_enabled;
- ftrace_record_suspend--;
return 0;
}
-static int ftrace_update_code(void)
-{
- if (unlikely(ftrace_disabled) ||
- !ftrace_enabled || !ftraced_trigger)
- return 0;
-
- stop_machine(__ftrace_update_code, NULL, NULL);
-
- return 1;
-}
-
-static int ftraced(void *ignore)
-{
- unsigned long usecs;
-
- while (!kthread_should_stop()) {
-
- set_current_state(TASK_INTERRUPTIBLE);
-
- /* check once a second */
- schedule_timeout(HZ);
-
- if (unlikely(ftrace_disabled))
- continue;
-
- mutex_lock(&ftrace_sysctl_lock);
- mutex_lock(&ftraced_lock);
- if (!ftraced_suspend && !ftraced_stop &&
- ftrace_update_code()) {
- usecs = nsecs_to_usecs(ftrace_update_time);
- if (ftrace_update_tot_cnt > 100000) {
- ftrace_update_tot_cnt = 0;
- pr_info("hm, dftrace overflow: %lu change%s"
- " (%lu total) in %lu usec%s\n",
- ftrace_update_cnt,
- ftrace_update_cnt != 1 ? "s" : "",
- ftrace_update_tot_cnt,
- usecs, usecs != 1 ? "s" : "");
- ftrace_disabled = 1;
- WARN_ON_ONCE(1);
- }
- }
- mutex_unlock(&ftraced_lock);
- mutex_unlock(&ftrace_sysctl_lock);
-
- ftrace_shutdown_replenish();
- }
- __set_current_state(TASK_RUNNING);
- return 0;
-}
-
-static int __init ftrace_dyn_table_alloc(void)
+static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
{
struct ftrace_page *pg;
int cnt;
@@ -859,7 +758,9 @@ static int __init ftrace_dyn_table_alloc(void)
pg = ftrace_pages = ftrace_pages_start;
- cnt = NR_TO_INIT / ENTRIES_PER_PAGE;
+ cnt = num_to_init / ENTRIES_PER_PAGE;
+ pr_info("ftrace: allocating %ld entries in %d pages\n",
+ num_to_init, cnt + 1);
for (i = 0; i < cnt; i++) {
pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -884,7 +785,6 @@ enum {
#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
struct ftrace_iterator {
- loff_t pos;
struct ftrace_page *pg;
unsigned idx;
unsigned flags;
@@ -901,21 +801,26 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
(*pos)++;
+ /* should not be called from interrupt context */
+ spin_lock(&ftrace_lock);
retry:
if (iter->idx >= iter->pg->index) {
if (iter->pg->next) {
iter->pg = iter->pg->next;
iter->idx = 0;
goto retry;
+ } else {
+ iter->idx = -1;
}
} else {
rec = &iter->pg->records[iter->idx++];
- if ((!(iter->flags & FTRACE_ITER_FAILURES) &&
+ if ((rec->flags & FTRACE_FL_FREE) ||
+
+ (!(iter->flags & FTRACE_ITER_FAILURES) &&
(rec->flags & FTRACE_FL_FAILED)) ||
((iter->flags & FTRACE_ITER_FAILURES) &&
- (!(rec->flags & FTRACE_FL_FAILED) ||
- (rec->flags & FTRACE_FL_FREE))) ||
+ !(rec->flags & FTRACE_FL_FAILED)) ||
((iter->flags & FTRACE_ITER_FILTER) &&
!(rec->flags & FTRACE_FL_FILTER)) ||
@@ -926,8 +831,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
goto retry;
}
}
-
- iter->pos = *pos;
+ spin_unlock(&ftrace_lock);
return rec;
}
@@ -936,16 +840,16 @@ static void *t_start(struct seq_file *m, loff_t *pos)
{
struct ftrace_iterator *iter = m->private;
void *p = NULL;
- loff_t l = -1;
- if (*pos != iter->pos) {
- for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l))
- ;
- } else {
- l = *pos;
- p = t_next(m, p, &l);
+ if (*pos > 0) {
+ if (iter->idx < 0)
+ return p;
+ (*pos)--;
+ iter->idx--;
}
+ p = t_next(m, p, pos);
+
return p;
}
@@ -989,7 +893,6 @@ ftrace_avail_open(struct inode *inode, struct file *file)
return -ENOMEM;
iter->pg = ftrace_pages_start;
- iter->pos = -1;
ret = seq_open(file, &show_ftrace_seq_ops);
if (!ret) {
@@ -1039,8 +942,8 @@ static void ftrace_filter_reset(int enable)
unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
unsigned i;
- /* keep kstop machine from running */
- preempt_disable();
+ /* should not be called from interrupt context */
+ spin_lock(&ftrace_lock);
if (enable)
ftrace_filtered = 0;
pg = ftrace_pages_start;
@@ -1053,7 +956,7 @@ static void ftrace_filter_reset(int enable)
}
pg = pg->next;
}
- preempt_enable();
+ spin_unlock(&ftrace_lock);
}
static int
@@ -1076,7 +979,6 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
if (file->f_mode & FMODE_READ) {
iter->pg = ftrace_pages_start;
- iter->pos = -1;
iter->flags = enable ? FTRACE_ITER_FILTER :
FTRACE_ITER_NOTRACE;
@@ -1145,6 +1047,13 @@ ftrace_match(unsigned char *buff, int len, int enable)
int type = MATCH_FULL;
unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
unsigned i, match = 0, search_len = 0;
+ int not = 0;
+
+ if (buff[0] == '!') {
+ not = 1;
+ buff++;
+ len--;
+ }
for (i = 0; i < len; i++) {
if (buff[i] == '*') {
@@ -1165,8 +1074,8 @@ ftrace_match(unsigned char *buff, int len, int enable)
}
}
- /* keep kstop machine from running */
- preempt_disable();
+ /* should not be called from interrupt context */
+ spin_lock(&ftrace_lock);
if (enable)
ftrace_filtered = 1;
pg = ftrace_pages_start;
@@ -1198,12 +1107,16 @@ ftrace_match(unsigned char *buff, int len, int enable)
matched = 1;
break;
}
- if (matched)
- rec->flags |= flag;
+ if (matched) {
+ if (not)
+ rec->flags &= ~flag;
+ else
+ rec->flags |= flag;
+ }
}
pg = pg->next;
}
- preempt_enable();
+ spin_unlock(&ftrace_lock);
}
static ssize_t
@@ -1366,10 +1279,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
}
mutex_lock(&ftrace_sysctl_lock);
- mutex_lock(&ftraced_lock);
- if (iter->filtered && ftraced_suspend && ftrace_enabled)
+ mutex_lock(&ftrace_start_lock);
+ if (ftrace_start_up && ftrace_enabled)
ftrace_run_update_code(FTRACE_ENABLE_CALLS);
- mutex_unlock(&ftraced_lock);
+ mutex_unlock(&ftrace_start_lock);
mutex_unlock(&ftrace_sysctl_lock);
kfree(iter);
@@ -1389,55 +1302,6 @@ ftrace_notrace_release(struct inode *inode, struct file *file)
return ftrace_regex_release(inode, file, 0);
}
-static ssize_t
-ftraced_read(struct file *filp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- /* don't worry about races */
- char *buf = ftraced_stop ? "disabled\n" : "enabled\n";
- int r = strlen(buf);
-
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-}
-
-static ssize_t
-ftraced_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- char buf[64];
- long val;
- int ret;
-
- if (cnt >= sizeof(buf))
- return -EINVAL;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- if (strncmp(buf, "enable", 6) == 0)
- val = 1;
- else if (strncmp(buf, "disable", 7) == 0)
- val = 0;
- else {
- buf[cnt] = 0;
-
- ret = strict_strtoul(buf, 10, &val);
- if (ret < 0)
- return ret;
-
- val = !!val;
- }
-
- if (val)
- ftrace_enable_daemon();
- else
- ftrace_disable_daemon();
-
- filp->f_pos += cnt;
-
- return cnt;
-}
-
static struct file_operations ftrace_avail_fops = {
.open = ftrace_avail_open,
.read = seq_read,
@@ -1468,60 +1332,233 @@ static struct file_operations ftrace_notrace_fops = {
.release = ftrace_notrace_release,
};
-static struct file_operations ftraced_fops = {
- .open = tracing_open_generic,
- .read = ftraced_read,
- .write = ftraced_write,
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+static DEFINE_MUTEX(graph_lock);
+
+int ftrace_graph_count;
+unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
+
+static void *
+g_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ unsigned long *array = m->private;
+ int index = *pos;
+
+ (*pos)++;
+
+ if (index >= ftrace_graph_count)
+ return NULL;
+
+ return &array[index];
+}
+
+static void *g_start(struct seq_file *m, loff_t *pos)
+{
+ void *p = NULL;
+
+ mutex_lock(&graph_lock);
+
+ p = g_next(m, p, pos);
+
+ return p;
+}
+
+static void g_stop(struct seq_file *m, void *p)
+{
+ mutex_unlock(&graph_lock);
+}
+
+static int g_show(struct seq_file *m, void *v)
+{
+ unsigned long *ptr = v;
+ char str[KSYM_SYMBOL_LEN];
+
+ if (!ptr)
+ return 0;
+
+ kallsyms_lookup(*ptr, NULL, NULL, NULL, str);
+
+ seq_printf(m, "%s\n", str);
+
+ return 0;
+}
+
+static struct seq_operations ftrace_graph_seq_ops = {
+ .start = g_start,
+ .next = g_next,
+ .stop = g_stop,
+ .show = g_show,
};
-/**
- * ftrace_force_update - force an update to all recording ftrace functions
- */
-int ftrace_force_update(void)
+static int
+ftrace_graph_open(struct inode *inode, struct file *file)
{
int ret = 0;
if (unlikely(ftrace_disabled))
return -ENODEV;
- mutex_lock(&ftrace_sysctl_lock);
- mutex_lock(&ftraced_lock);
-
- /*
- * If ftraced_trigger is not set, then there is nothing
- * to update.
- */
- if (ftraced_trigger && !ftrace_update_code())
- ret = -EBUSY;
+ mutex_lock(&graph_lock);
+ if ((file->f_mode & FMODE_WRITE) &&
+ !(file->f_flags & O_APPEND)) {
+ ftrace_graph_count = 0;
+ memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
+ }
- mutex_unlock(&ftraced_lock);
- mutex_unlock(&ftrace_sysctl_lock);
+ if (file->f_mode & FMODE_READ) {
+ ret = seq_open(file, &ftrace_graph_seq_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = ftrace_graph_funcs;
+ }
+ } else
+ file->private_data = ftrace_graph_funcs;
+ mutex_unlock(&graph_lock);
return ret;
}
-static void ftrace_force_shutdown(void)
+static ssize_t
+ftrace_graph_read(struct file *file, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
{
- struct task_struct *task;
- int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC;
+ if (file->f_mode & FMODE_READ)
+ return seq_read(file, ubuf, cnt, ppos);
+ else
+ return -EPERM;
+}
- mutex_lock(&ftraced_lock);
- task = ftraced_task;
- ftraced_task = NULL;
- ftraced_suspend = -1;
- ftrace_run_update_code(command);
- mutex_unlock(&ftraced_lock);
+static int
+ftrace_set_func(unsigned long *array, int idx, char *buffer)
+{
+ char str[KSYM_SYMBOL_LEN];
+ struct dyn_ftrace *rec;
+ struct ftrace_page *pg;
+ int found = 0;
+ int i, j;
+
+ if (ftrace_disabled)
+ return -ENODEV;
+
+ /* should not be called from interrupt context */
+ spin_lock(&ftrace_lock);
+
+ for (pg = ftrace_pages_start; pg; pg = pg->next) {
+ for (i = 0; i < pg->index; i++) {
+ rec = &pg->records[i];
+
+ if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
+ continue;
+
+ kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+ if (strcmp(str, buffer) == 0) {
+ found = 1;
+ for (j = 0; j < idx; j++)
+ if (array[j] == rec->ip) {
+ found = 0;
+ break;
+ }
+ if (found)
+ array[idx] = rec->ip;
+ break;
+ }
+ }
+ }
+ spin_unlock(&ftrace_lock);
- if (task)
- kthread_stop(task);
+ return found ? 0 : -EINVAL;
}
-static __init int ftrace_init_debugfs(void)
+static ssize_t
+ftrace_graph_write(struct file *file, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
{
- struct dentry *d_tracer;
- struct dentry *entry;
+ unsigned char buffer[FTRACE_BUFF_MAX+1];
+ unsigned long *array;
+ size_t read = 0;
+ ssize_t ret;
+ int index = 0;
+ char ch;
- d_tracer = tracing_init_dentry();
+ if (!cnt || cnt < 0)
+ return 0;
+
+ mutex_lock(&graph_lock);
+
+ if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (file->f_mode & FMODE_READ) {
+ struct seq_file *m = file->private_data;
+ array = m->private;
+ } else
+ array = file->private_data;
+
+ ret = get_user(ch, ubuf++);
+ if (ret)
+ goto out;
+ read++;
+ cnt--;
+
+ /* skip white space */
+ while (cnt && isspace(ch)) {
+ ret = get_user(ch, ubuf++);
+ if (ret)
+ goto out;
+ read++;
+ cnt--;
+ }
+
+ if (isspace(ch)) {
+ *ppos += read;
+ ret = read;
+ goto out;
+ }
+
+ while (cnt && !isspace(ch)) {
+ if (index < FTRACE_BUFF_MAX)
+ buffer[index++] = ch;
+ else {
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = get_user(ch, ubuf++);
+ if (ret)
+ goto out;
+ read++;
+ cnt--;
+ }
+ buffer[index] = 0;
+
+ /* we allow only one at a time */
+ ret = ftrace_set_func(array, ftrace_graph_count, buffer);
+ if (ret)
+ goto out;
+
+ ftrace_graph_count++;
+
+ file->f_pos += read;
+
+ ret = read;
+ out:
+ mutex_unlock(&graph_lock);
+
+ return ret;
+}
+
+static const struct file_operations ftrace_graph_fops = {
+ .open = ftrace_graph_open,
+ .read = ftrace_graph_read,
+ .write = ftrace_graph_write,
+};
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
+{
+ struct dentry *entry;
entry = debugfs_create_file("available_filter_functions", 0444,
d_tracer, NULL, &ftrace_avail_fops);
@@ -1546,97 +1583,295 @@ static __init int ftrace_init_debugfs(void)
pr_warning("Could not create debugfs "
"'set_ftrace_notrace' entry\n");
- entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer,
- NULL, &ftraced_fops);
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ entry = debugfs_create_file("set_graph_function", 0444, d_tracer,
+ NULL,
+ &ftrace_graph_fops);
if (!entry)
pr_warning("Could not create debugfs "
- "'ftraced_enabled' entry\n");
+ "'set_graph_function' entry\n");
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
return 0;
}
-fs_initcall(ftrace_init_debugfs);
-
-static int __init ftrace_dynamic_init(void)
+static int ftrace_convert_nops(struct module *mod,
+ unsigned long *start,
+ unsigned long *end)
{
- struct task_struct *p;
+ unsigned long *p;
unsigned long addr;
+ unsigned long flags;
+
+ mutex_lock(&ftrace_start_lock);
+ p = start;
+ while (p < end) {
+ addr = ftrace_call_adjust(*p++);
+ /*
+ * Some architecture linkers will pad between
+ * the different mcount_loc sections of different
+ * object files to satisfy alignments.
+ * Skip any NULL pointers.
+ */
+ if (!addr)
+ continue;
+ ftrace_record_ip(addr);
+ }
+
+ /* disable interrupts to prevent kstop machine */
+ local_irq_save(flags);
+ ftrace_update_code(mod);
+ local_irq_restore(flags);
+ mutex_unlock(&ftrace_start_lock);
+
+ return 0;
+}
+
+void ftrace_init_module(struct module *mod,
+ unsigned long *start, unsigned long *end)
+{
+ if (ftrace_disabled || start == end)
+ return;
+ ftrace_convert_nops(mod, start, end);
+}
+
+extern unsigned long __start_mcount_loc[];
+extern unsigned long __stop_mcount_loc[];
+
+void __init ftrace_init(void)
+{
+ unsigned long count, addr, flags;
int ret;
- addr = (unsigned long)ftrace_record_ip;
+ /* Keep the ftrace pointer to the stub */
+ addr = (unsigned long)ftrace_stub;
- stop_machine(ftrace_dyn_arch_init, &addr, NULL);
+ local_irq_save(flags);
+ ftrace_dyn_arch_init(&addr);
+ local_irq_restore(flags);
/* ftrace_dyn_arch_init places the return code in addr */
- if (addr) {
- ret = (int)addr;
+ if (addr)
goto failed;
- }
- ret = ftrace_dyn_table_alloc();
- if (ret)
- goto failed;
+ count = __stop_mcount_loc - __start_mcount_loc;
- p = kthread_run(ftraced, NULL, "ftraced");
- if (IS_ERR(p)) {
- ret = -1;
+ ret = ftrace_dyn_table_alloc(count);
+ if (ret)
goto failed;
- }
last_ftrace_enabled = ftrace_enabled = 1;
- ftraced_task = p;
- return 0;
+ ret = ftrace_convert_nops(NULL,
+ __start_mcount_loc,
+ __stop_mcount_loc);
+ return;
failed:
ftrace_disabled = 1;
- return ret;
}
-core_initcall(ftrace_dynamic_init);
#else
-# define ftrace_startup() do { } while (0)
-# define ftrace_shutdown() do { } while (0)
+
+static int __init ftrace_nodyn_init(void)
+{
+ ftrace_enabled = 1;
+ return 0;
+}
+device_initcall(ftrace_nodyn_init);
+
+static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
+static inline void ftrace_startup_enable(int command) { }
+/* Keep as macros so we do not need to define the commands */
+# define ftrace_startup(command) do { } while (0)
+# define ftrace_shutdown(command) do { } while (0)
# define ftrace_startup_sysctl() do { } while (0)
# define ftrace_shutdown_sysctl() do { } while (0)
-# define ftrace_force_shutdown() do { } while (0)
#endif /* CONFIG_DYNAMIC_FTRACE */
-/**
- * ftrace_kill_atomic - kill ftrace from critical sections
- *
- * This function should be used by panic code. It stops ftrace
- * but in a not so nice way. If you need to simply kill ftrace
- * from a non-atomic section, use ftrace_kill.
- */
-void ftrace_kill_atomic(void)
+static ssize_t
+ftrace_pid_read(struct file *file, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
{
- ftrace_disabled = 1;
- ftrace_enabled = 0;
-#ifdef CONFIG_DYNAMIC_FTRACE
- ftraced_suspend = -1;
-#endif
- clear_ftrace_function();
+ char buf[64];
+ int r;
+
+ if (ftrace_pid_trace == ftrace_swapper_pid)
+ r = sprintf(buf, "swapper tasks\n");
+ else if (ftrace_pid_trace)
+ r = sprintf(buf, "%u\n", pid_nr(ftrace_pid_trace));
+ else
+ r = sprintf(buf, "no pid\n");
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static void clear_ftrace_swapper(void)
+{
+ struct task_struct *p;
+ int cpu;
+
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ p = idle_task(cpu);
+ clear_tsk_trace_trace(p);
+ }
+ put_online_cpus();
+}
+
+static void set_ftrace_swapper(void)
+{
+ struct task_struct *p;
+ int cpu;
+
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ p = idle_task(cpu);
+ set_tsk_trace_trace(p);
+ }
+ put_online_cpus();
+}
+
+static void clear_ftrace_pid(struct pid *pid)
+{
+ struct task_struct *p;
+
+ do_each_pid_task(pid, PIDTYPE_PID, p) {
+ clear_tsk_trace_trace(p);
+ } while_each_pid_task(pid, PIDTYPE_PID, p);
+ put_pid(pid);
+}
+
+static void set_ftrace_pid(struct pid *pid)
+{
+ struct task_struct *p;
+
+ do_each_pid_task(pid, PIDTYPE_PID, p) {
+ set_tsk_trace_trace(p);
+ } while_each_pid_task(pid, PIDTYPE_PID, p);
+}
+
+static void clear_ftrace_pid_task(struct pid **pid)
+{
+ if (*pid == ftrace_swapper_pid)
+ clear_ftrace_swapper();
+ else
+ clear_ftrace_pid(*pid);
+
+ *pid = NULL;
+}
+
+static void set_ftrace_pid_task(struct pid *pid)
+{
+ if (pid == ftrace_swapper_pid)
+ set_ftrace_swapper();
+ else
+ set_ftrace_pid(pid);
+}
+
+static ssize_t
+ftrace_pid_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct pid *pid;
+ char buf[64];
+ long val;
+ int ret;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ ret = strict_strtol(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ mutex_lock(&ftrace_start_lock);
+ if (val < 0) {
+ /* disable pid tracing */
+ if (!ftrace_pid_trace)
+ goto out;
+
+ clear_ftrace_pid_task(&ftrace_pid_trace);
+
+ } else {
+ /* swapper task is special */
+ if (!val) {
+ pid = ftrace_swapper_pid;
+ if (pid == ftrace_pid_trace)
+ goto out;
+ } else {
+ pid = find_get_pid(val);
+
+ if (pid == ftrace_pid_trace) {
+ put_pid(pid);
+ goto out;
+ }
+ }
+
+ if (ftrace_pid_trace)
+ clear_ftrace_pid_task(&ftrace_pid_trace);
+
+ if (!pid)
+ goto out;
+
+ ftrace_pid_trace = pid;
+
+ set_ftrace_pid_task(ftrace_pid_trace);
+ }
+
+ /* update the function call */
+ ftrace_update_pid_func();
+ ftrace_startup_enable(0);
+
+ out:
+ mutex_unlock(&ftrace_start_lock);
+
+ return cnt;
}
+static struct file_operations ftrace_pid_fops = {
+ .read = ftrace_pid_read,
+ .write = ftrace_pid_write,
+};
+
+static __init int ftrace_init_debugfs(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+ if (!d_tracer)
+ return 0;
+
+ ftrace_init_dyn_debugfs(d_tracer);
+
+ entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer,
+ NULL, &ftrace_pid_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'set_ftrace_pid' entry\n");
+ return 0;
+}
+
+fs_initcall(ftrace_init_debugfs);
+
/**
- * ftrace_kill - totally shutdown ftrace
+ * ftrace_kill - kill ftrace
*
- * This is a safety measure. If something was detected that seems
- * wrong, calling this function will keep ftrace from doing
- * any more modifications, and updates.
- * used when something went wrong.
+ * This function should be used by panic code. It stops ftrace
+ * but in a not so nice way. If you need to simply kill ftrace
+ * from a non-atomic section, use ftrace_kill.
*/
void ftrace_kill(void)
{
- mutex_lock(&ftrace_sysctl_lock);
ftrace_disabled = 1;
ftrace_enabled = 0;
-
clear_ftrace_function();
- mutex_unlock(&ftrace_sysctl_lock);
-
- /* Try to totally disable ftrace */
- ftrace_force_shutdown();
}
/**
@@ -1658,10 +1893,11 @@ int register_ftrace_function(struct ftrace_ops *ops)
return -1;
mutex_lock(&ftrace_sysctl_lock);
+
ret = __register_ftrace_function(ops);
- ftrace_startup();
- mutex_unlock(&ftrace_sysctl_lock);
+ ftrace_startup(0);
+ mutex_unlock(&ftrace_sysctl_lock);
return ret;
}
@@ -1677,7 +1913,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
mutex_lock(&ftrace_sysctl_lock);
ret = __unregister_ftrace_function(ops);
- ftrace_shutdown();
+ ftrace_shutdown(0);
mutex_unlock(&ftrace_sysctl_lock);
return ret;
@@ -1725,3 +1961,154 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
mutex_unlock(&ftrace_sysctl_lock);
return ret;
}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+static atomic_t ftrace_graph_active;
+
+int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
+{
+ return 0;
+}
+
+/* The callbacks that hook a function */
+trace_func_graph_ret_t ftrace_graph_return =
+ (trace_func_graph_ret_t)ftrace_stub;
+trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
+
+/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
+static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
+{
+ int i;
+ int ret = 0;
+ unsigned long flags;
+ int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
+ struct task_struct *g, *t;
+
+ for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
+ ret_stack_list[i] = kmalloc(FTRACE_RETFUNC_DEPTH
+ * sizeof(struct ftrace_ret_stack),
+ GFP_KERNEL);
+ if (!ret_stack_list[i]) {
+ start = 0;
+ end = i;
+ ret = -ENOMEM;
+ goto free;
+ }
+ }
+
+ read_lock_irqsave(&tasklist_lock, flags);
+ do_each_thread(g, t) {
+ if (start == end) {
+ ret = -EAGAIN;
+ goto unlock;
+ }
+
+ if (t->ret_stack == NULL) {
+ t->curr_ret_stack = -1;
+ /* Make sure IRQs see the -1 first: */
+ barrier();
+ t->ret_stack = ret_stack_list[start++];
+ atomic_set(&t->tracing_graph_pause, 0);
+ atomic_set(&t->trace_overrun, 0);
+ }
+ } while_each_thread(g, t);
+
+unlock:
+ read_unlock_irqrestore(&tasklist_lock, flags);
+free:
+ for (i = start; i < end; i++)
+ kfree(ret_stack_list[i]);
+ return ret;
+}
+
+/* Allocate a return stack for each task */
+static int start_graph_tracing(void)
+{
+ struct ftrace_ret_stack **ret_stack_list;
+ int ret;
+
+ ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE *
+ sizeof(struct ftrace_ret_stack *),
+ GFP_KERNEL);
+
+ if (!ret_stack_list)
+ return -ENOMEM;
+
+ do {
+ ret = alloc_retstack_tasklist(ret_stack_list);
+ } while (ret == -EAGAIN);
+
+ kfree(ret_stack_list);
+ return ret;
+}
+
+int register_ftrace_graph(trace_func_graph_ret_t retfunc,
+ trace_func_graph_ent_t entryfunc)
+{
+ int ret = 0;
+
+ mutex_lock(&ftrace_sysctl_lock);
+
+ atomic_inc(&ftrace_graph_active);
+ ret = start_graph_tracing();
+ if (ret) {
+ atomic_dec(&ftrace_graph_active);
+ goto out;
+ }
+
+ ftrace_graph_return = retfunc;
+ ftrace_graph_entry = entryfunc;
+
+ ftrace_startup(FTRACE_START_FUNC_RET);
+
+out:
+ mutex_unlock(&ftrace_sysctl_lock);
+ return ret;
+}
+
+void unregister_ftrace_graph(void)
+{
+ mutex_lock(&ftrace_sysctl_lock);
+
+ atomic_dec(&ftrace_graph_active);
+ ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
+ ftrace_graph_entry = ftrace_graph_entry_stub;
+ ftrace_shutdown(FTRACE_STOP_FUNC_RET);
+
+ mutex_unlock(&ftrace_sysctl_lock);
+}
+
+/* Allocate a return stack for newly created task */
+void ftrace_graph_init_task(struct task_struct *t)
+{
+ if (atomic_read(&ftrace_graph_active)) {
+ t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
+ * sizeof(struct ftrace_ret_stack),
+ GFP_KERNEL);
+ if (!t->ret_stack)
+ return;
+ t->curr_ret_stack = -1;
+ atomic_set(&t->tracing_graph_pause, 0);
+ atomic_set(&t->trace_overrun, 0);
+ } else
+ t->ret_stack = NULL;
+}
+
+void ftrace_graph_exit_task(struct task_struct *t)
+{
+ struct ftrace_ret_stack *ret_stack = t->ret_stack;
+
+ t->ret_stack = NULL;
+ /* NULL must become visible to IRQs before we free it: */
+ barrier();
+
+ kfree(ret_stack);
+}
+
+void ftrace_graph_stop(void)
+{
+ ftrace_stop();
+}
+#endif
+
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
new file mode 100644
index 0000000..1d601a7
--- /dev/null
+++ b/kernel/trace/ring_buffer.c
@@ -0,0 +1,2517 @@
+/*
+ * Generic ring buffer
+ *
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ */
+#include <linux/ring_buffer.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+#include <linux/sched.h> /* used for sched_clock() (for now) */
+#include <linux/init.h>
+#include <linux/hash.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+
+#include "trace.h"
+
+/*
+ * A fast way to enable or disable all ring buffers is to
+ * call tracing_on or tracing_off. Turning off the ring buffers
+ * prevents all ring buffers from being recorded to.
+ * Turning this switch on, makes it OK to write to the
+ * ring buffer, if the ring buffer is enabled itself.
+ *
+ * There's three layers that must be on in order to write
+ * to the ring buffer.
+ *
+ * 1) This global flag must be set.
+ * 2) The ring buffer must be enabled for recording.
+ * 3) The per cpu buffer must be enabled for recording.
+ *
+ * In case of an anomaly, this global flag has a bit set that
+ * will permantly disable all ring buffers.
+ */
+
+/*
+ * Global flag to disable all recording to ring buffers
+ * This has two bits: ON, DISABLED
+ *
+ * ON DISABLED
+ * ---- ----------
+ * 0 0 : ring buffers are off
+ * 1 0 : ring buffers are on
+ * X 1 : ring buffers are permanently disabled
+ */
+
+enum {
+ RB_BUFFERS_ON_BIT = 0,
+ RB_BUFFERS_DISABLED_BIT = 1,
+};
+
+enum {
+ RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
+ RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
+};
+
+static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
+
+/**
+ * tracing_on - enable all tracing buffers
+ *
+ * This function enables all tracing buffers that may have been
+ * disabled with tracing_off.
+ */
+void tracing_on(void)
+{
+ set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
+}
+EXPORT_SYMBOL_GPL(tracing_on);
+
+/**
+ * tracing_off - turn off all tracing buffers
+ *
+ * This function stops all tracing buffers from recording data.
+ * It does not disable any overhead the tracers themselves may
+ * be causing. This function simply causes all recording to
+ * the ring buffers to fail.
+ */
+void tracing_off(void)
+{
+ clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
+}
+EXPORT_SYMBOL_GPL(tracing_off);
+
+/**
+ * tracing_off_permanent - permanently disable ring buffers
+ *
+ * This function, once called, will disable all ring buffers
+ * permanenty.
+ */
+void tracing_off_permanent(void)
+{
+ set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
+}
+
+#include "trace.h"
+
+/* Up this if you want to test the TIME_EXTENTS and normalization */
+#define DEBUG_SHIFT 0
+
+/* FIXME!!! */
+u64 ring_buffer_time_stamp(int cpu)
+{
+ u64 time;
+
+ preempt_disable_notrace();
+ /* shift to debug/test normalization and TIME_EXTENTS */
+ time = sched_clock() << DEBUG_SHIFT;
+ preempt_enable_no_resched_notrace();
+
+ return time;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
+
+void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
+{
+ /* Just stupid testing the normalize function and deltas */
+ *ts >>= DEBUG_SHIFT;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
+
+#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
+#define RB_ALIGNMENT_SHIFT 2
+#define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
+#define RB_MAX_SMALL_DATA 28
+
+enum {
+ RB_LEN_TIME_EXTEND = 8,
+ RB_LEN_TIME_STAMP = 16,
+};
+
+/* inline for ring buffer fast paths */
+static inline unsigned
+rb_event_length(struct ring_buffer_event *event)
+{
+ unsigned length;
+
+ switch (event->type) {
+ case RINGBUF_TYPE_PADDING:
+ /* undefined */
+ return -1;
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ return RB_LEN_TIME_EXTEND;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ return RB_LEN_TIME_STAMP;
+
+ case RINGBUF_TYPE_DATA:
+ if (event->len)
+ length = event->len << RB_ALIGNMENT_SHIFT;
+ else
+ length = event->array[0];
+ return length + RB_EVNT_HDR_SIZE;
+ default:
+ BUG();
+ }
+ /* not hit */
+ return 0;
+}
+
+/**
+ * ring_buffer_event_length - return the length of the event
+ * @event: the event to get the length of
+ */
+unsigned ring_buffer_event_length(struct ring_buffer_event *event)
+{
+ return rb_event_length(event);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_event_length);
+
+/* inline for ring buffer fast paths */
+static inline void *
+rb_event_data(struct ring_buffer_event *event)
+{
+ BUG_ON(event->type != RINGBUF_TYPE_DATA);
+ /* If length is in len field, then array[0] has the data */
+ if (event->len)
+ return (void *)&event->array[0];
+ /* Otherwise length is in array[0] and array[1] has the data */
+ return (void *)&event->array[1];
+}
+
+/**
+ * ring_buffer_event_data - return the data of the event
+ * @event: the event to get the data from
+ */
+void *ring_buffer_event_data(struct ring_buffer_event *event)
+{
+ return rb_event_data(event);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_event_data);
+
+#define for_each_buffer_cpu(buffer, cpu) \
+ for_each_cpu_mask(cpu, buffer->cpumask)
+
+#define TS_SHIFT 27
+#define TS_MASK ((1ULL << TS_SHIFT) - 1)
+#define TS_DELTA_TEST (~TS_MASK)
+
+struct buffer_data_page {
+ u64 time_stamp; /* page time stamp */
+ local_t commit; /* write commited index */
+ unsigned char data[]; /* data of buffer page */
+};
+
+struct buffer_page {
+ local_t write; /* index for next write */
+ unsigned read; /* index for next read */
+ struct list_head list; /* list of free pages */
+ struct buffer_data_page *page; /* Actual data page */
+};
+
+static void rb_init_page(struct buffer_data_page *bpage)
+{
+ local_set(&bpage->commit, 0);
+}
+
+/*
+ * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
+ * this issue out.
+ */
+static inline void free_buffer_page(struct buffer_page *bpage)
+{
+ if (bpage->page)
+ free_page((unsigned long)bpage->page);
+ kfree(bpage);
+}
+
+/*
+ * We need to fit the time_stamp delta into 27 bits.
+ */
+static inline int test_time_stamp(u64 delta)
+{
+ if (delta & TS_DELTA_TEST)
+ return 1;
+ return 0;
+}
+
+#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
+
+/*
+ * head_page == tail_page && head == tail then buffer is empty.
+ */
+struct ring_buffer_per_cpu {
+ int cpu;
+ struct ring_buffer *buffer;
+ spinlock_t reader_lock; /* serialize readers */
+ raw_spinlock_t lock;
+ struct lock_class_key lock_key;
+ struct list_head pages;
+ struct buffer_page *head_page; /* read from head */
+ struct buffer_page *tail_page; /* write to tail */
+ struct buffer_page *commit_page; /* commited pages */
+ struct buffer_page *reader_page;
+ unsigned long overrun;
+ unsigned long entries;
+ u64 write_stamp;
+ u64 read_stamp;
+ atomic_t record_disabled;
+};
+
+struct ring_buffer {
+ unsigned pages;
+ unsigned flags;
+ int cpus;
+ cpumask_t cpumask;
+ atomic_t record_disabled;
+
+ struct mutex mutex;
+
+ struct ring_buffer_per_cpu **buffers;
+};
+
+struct ring_buffer_iter {
+ struct ring_buffer_per_cpu *cpu_buffer;
+ unsigned long head;
+ struct buffer_page *head_page;
+ u64 read_stamp;
+};
+
+/* buffer may be either ring_buffer or ring_buffer_per_cpu */
+#define RB_WARN_ON(buffer, cond) \
+ ({ \
+ int _____ret = unlikely(cond); \
+ if (_____ret) { \
+ atomic_inc(&buffer->record_disabled); \
+ WARN_ON(1); \
+ } \
+ _____ret; \
+ })
+
+/**
+ * check_pages - integrity check of buffer pages
+ * @cpu_buffer: CPU buffer with pages to test
+ *
+ * As a safty measure we check to make sure the data pages have not
+ * been corrupted.
+ */
+static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ struct list_head *head = &cpu_buffer->pages;
+ struct buffer_page *bpage, *tmp;
+
+ if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
+ return -1;
+ if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
+ return -1;
+
+ list_for_each_entry_safe(bpage, tmp, head, list) {
+ if (RB_WARN_ON(cpu_buffer,
+ bpage->list.next->prev != &bpage->list))
+ return -1;
+ if (RB_WARN_ON(cpu_buffer,
+ bpage->list.prev->next != &bpage->list))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
+ unsigned nr_pages)
+{
+ struct list_head *head = &cpu_buffer->pages;
+ struct buffer_page *bpage, *tmp;
+ unsigned long addr;
+ LIST_HEAD(pages);
+ unsigned i;
+
+ for (i = 0; i < nr_pages; i++) {
+ bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
+ GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
+ if (!bpage)
+ goto free_pages;
+ list_add(&bpage->list, &pages);
+
+ addr = __get_free_page(GFP_KERNEL);
+ if (!addr)
+ goto free_pages;
+ bpage->page = (void *)addr;
+ rb_init_page(bpage->page);
+ }
+
+ list_splice(&pages, head);
+
+ rb_check_pages(cpu_buffer);
+
+ return 0;
+
+ free_pages:
+ list_for_each_entry_safe(bpage, tmp, &pages, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
+ }
+ return -ENOMEM;
+}
+
+static struct ring_buffer_per_cpu *
+rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct buffer_page *bpage;
+ unsigned long addr;
+ int ret;
+
+ cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!cpu_buffer)
+ return NULL;
+
+ cpu_buffer->cpu = cpu;
+ cpu_buffer->buffer = buffer;
+ spin_lock_init(&cpu_buffer->reader_lock);
+ cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+ INIT_LIST_HEAD(&cpu_buffer->pages);
+
+ bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!bpage)
+ goto fail_free_buffer;
+
+ cpu_buffer->reader_page = bpage;
+ addr = __get_free_page(GFP_KERNEL);
+ if (!addr)
+ goto fail_free_reader;
+ bpage->page = (void *)addr;
+ rb_init_page(bpage->page);
+
+ INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+
+ ret = rb_allocate_pages(cpu_buffer, buffer->pages);
+ if (ret < 0)
+ goto fail_free_reader;
+
+ cpu_buffer->head_page
+ = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+ cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
+
+ return cpu_buffer;
+
+ fail_free_reader:
+ free_buffer_page(cpu_buffer->reader_page);
+
+ fail_free_buffer:
+ kfree(cpu_buffer);
+ return NULL;
+}
+
+static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ struct list_head *head = &cpu_buffer->pages;
+ struct buffer_page *bpage, *tmp;
+
+ list_del_init(&cpu_buffer->reader_page->list);
+ free_buffer_page(cpu_buffer->reader_page);
+
+ list_for_each_entry_safe(bpage, tmp, head, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
+ }
+ kfree(cpu_buffer);
+}
+
+/*
+ * Causes compile errors if the struct buffer_page gets bigger
+ * than the struct page.
+ */
+extern int ring_buffer_page_too_big(void);
+
+/**
+ * ring_buffer_alloc - allocate a new ring_buffer
+ * @size: the size in bytes per cpu that is needed.
+ * @flags: attributes to set for the ring buffer.
+ *
+ * Currently the only flag that is available is the RB_FL_OVERWRITE
+ * flag. This flag means that the buffer will overwrite old data
+ * when the buffer wraps. If this flag is not set, the buffer will
+ * drop data when the tail hits the head.
+ */
+struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
+{
+ struct ring_buffer *buffer;
+ int bsize;
+ int cpu;
+
+ /* Paranoid! Optimizes out when all is well */
+ if (sizeof(struct buffer_page) > sizeof(struct page))
+ ring_buffer_page_too_big();
+
+
+ /* keep it in its own cache line */
+ buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
+ GFP_KERNEL);
+ if (!buffer)
+ return NULL;
+
+ buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+ buffer->flags = flags;
+
+ /* need at least two pages */
+ if (buffer->pages == 1)
+ buffer->pages++;
+
+ buffer->cpumask = cpu_possible_map;
+ buffer->cpus = nr_cpu_ids;
+
+ bsize = sizeof(void *) * nr_cpu_ids;
+ buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
+ GFP_KERNEL);
+ if (!buffer->buffers)
+ goto fail_free_buffer;
+
+ for_each_buffer_cpu(buffer, cpu) {
+ buffer->buffers[cpu] =
+ rb_allocate_cpu_buffer(buffer, cpu);
+ if (!buffer->buffers[cpu])
+ goto fail_free_buffers;
+ }
+
+ mutex_init(&buffer->mutex);
+
+ return buffer;
+
+ fail_free_buffers:
+ for_each_buffer_cpu(buffer, cpu) {
+ if (buffer->buffers[cpu])
+ rb_free_cpu_buffer(buffer->buffers[cpu]);
+ }
+ kfree(buffer->buffers);
+
+ fail_free_buffer:
+ kfree(buffer);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_alloc);
+
+/**
+ * ring_buffer_free - free a ring buffer.
+ * @buffer: the buffer to free.
+ */
+void
+ring_buffer_free(struct ring_buffer *buffer)
+{
+ int cpu;
+
+ for_each_buffer_cpu(buffer, cpu)
+ rb_free_cpu_buffer(buffer->buffers[cpu]);
+
+ kfree(buffer);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_free);
+
+static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
+
+static void
+rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
+{
+ struct buffer_page *bpage;
+ struct list_head *p;
+ unsigned i;
+
+ atomic_inc(&cpu_buffer->record_disabled);
+ synchronize_sched();
+
+ for (i = 0; i < nr_pages; i++) {
+ if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
+ return;
+ p = cpu_buffer->pages.next;
+ bpage = list_entry(p, struct buffer_page, list);
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
+ }
+ if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
+ return;
+
+ rb_reset_cpu(cpu_buffer);
+
+ rb_check_pages(cpu_buffer);
+
+ atomic_dec(&cpu_buffer->record_disabled);
+
+}
+
+static void
+rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
+ struct list_head *pages, unsigned nr_pages)
+{
+ struct buffer_page *bpage;
+ struct list_head *p;
+ unsigned i;
+
+ atomic_inc(&cpu_buffer->record_disabled);
+ synchronize_sched();
+
+ for (i = 0; i < nr_pages; i++) {
+ if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
+ return;
+ p = pages->next;
+ bpage = list_entry(p, struct buffer_page, list);
+ list_del_init(&bpage->list);
+ list_add_tail(&bpage->list, &cpu_buffer->pages);
+ }
+ rb_reset_cpu(cpu_buffer);
+
+ rb_check_pages(cpu_buffer);
+
+ atomic_dec(&cpu_buffer->record_disabled);
+}
+
+/**
+ * ring_buffer_resize - resize the ring buffer
+ * @buffer: the buffer to resize.
+ * @size: the new size.
+ *
+ * The tracer is responsible for making sure that the buffer is
+ * not being used while changing the size.
+ * Note: We may be able to change the above requirement by using
+ * RCU synchronizations.
+ *
+ * Minimum size is 2 * BUF_PAGE_SIZE.
+ *
+ * Returns -1 on failure.
+ */
+int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ unsigned nr_pages, rm_pages, new_pages;
+ struct buffer_page *bpage, *tmp;
+ unsigned long buffer_size;
+ unsigned long addr;
+ LIST_HEAD(pages);
+ int i, cpu;
+
+ /*
+ * Always succeed at resizing a non-existent buffer:
+ */
+ if (!buffer)
+ return size;
+
+ size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+ size *= BUF_PAGE_SIZE;
+ buffer_size = buffer->pages * BUF_PAGE_SIZE;
+
+ /* we need a minimum of two pages */
+ if (size < BUF_PAGE_SIZE * 2)
+ size = BUF_PAGE_SIZE * 2;
+
+ if (size == buffer_size)
+ return size;
+
+ mutex_lock(&buffer->mutex);
+
+ nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+
+ if (size < buffer_size) {
+
+ /* easy case, just free pages */
+ if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
+ mutex_unlock(&buffer->mutex);
+ return -1;
+ }
+
+ rm_pages = buffer->pages - nr_pages;
+
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ rb_remove_pages(cpu_buffer, rm_pages);
+ }
+ goto out;
+ }
+
+ /*
+ * This is a bit more difficult. We only want to add pages
+ * when we can allocate enough for all CPUs. We do this
+ * by allocating all the pages and storing them on a local
+ * link list. If we succeed in our allocation, then we
+ * add these pages to the cpu_buffers. Otherwise we just free
+ * them all and return -ENOMEM;
+ */
+ if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
+ mutex_unlock(&buffer->mutex);
+ return -1;
+ }
+
+ new_pages = nr_pages - buffer->pages;
+
+ for_each_buffer_cpu(buffer, cpu) {
+ for (i = 0; i < new_pages; i++) {
+ bpage = kzalloc_node(ALIGN(sizeof(*bpage),
+ cache_line_size()),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!bpage)
+ goto free_pages;
+ list_add(&bpage->list, &pages);
+ addr = __get_free_page(GFP_KERNEL);
+ if (!addr)
+ goto free_pages;
+ bpage->page = (void *)addr;
+ rb_init_page(bpage->page);
+ }
+ }
+
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ rb_insert_pages(cpu_buffer, &pages, new_pages);
+ }
+
+ if (RB_WARN_ON(buffer, !list_empty(&pages))) {
+ mutex_unlock(&buffer->mutex);
+ return -1;
+ }
+
+ out:
+ buffer->pages = nr_pages;
+ mutex_unlock(&buffer->mutex);
+
+ return size;
+
+ free_pages:
+ list_for_each_entry_safe(bpage, tmp, &pages, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
+ }
+ mutex_unlock(&buffer->mutex);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_resize);
+
+static inline int rb_null_event(struct ring_buffer_event *event)
+{
+ return event->type == RINGBUF_TYPE_PADDING;
+}
+
+static inline void *
+__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
+{
+ return bpage->data + index;
+}
+
+static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
+{
+ return bpage->page->data + index;
+}
+
+static inline struct ring_buffer_event *
+rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ return __rb_page_index(cpu_buffer->reader_page,
+ cpu_buffer->reader_page->read);
+}
+
+static inline struct ring_buffer_event *
+rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ return __rb_page_index(cpu_buffer->head_page,
+ cpu_buffer->head_page->read);
+}
+
+static inline struct ring_buffer_event *
+rb_iter_head_event(struct ring_buffer_iter *iter)
+{
+ return __rb_page_index(iter->head_page, iter->head);
+}
+
+static inline unsigned rb_page_write(struct buffer_page *bpage)
+{
+ return local_read(&bpage->write);
+}
+
+static inline unsigned rb_page_commit(struct buffer_page *bpage)
+{
+ return local_read(&bpage->page->commit);
+}
+
+/* Size is determined by what has been commited */
+static inline unsigned rb_page_size(struct buffer_page *bpage)
+{
+ return rb_page_commit(bpage);
+}
+
+static inline unsigned
+rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ return rb_page_commit(cpu_buffer->commit_page);
+}
+
+static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ return rb_page_commit(cpu_buffer->head_page);
+}
+
+/*
+ * When the tail hits the head and the buffer is in overwrite mode,
+ * the head jumps to the next page and all content on the previous
+ * page is discarded. But before doing so, we update the overrun
+ * variable of the buffer.
+ */
+static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ struct ring_buffer_event *event;
+ unsigned long head;
+
+ for (head = 0; head < rb_head_size(cpu_buffer);
+ head += rb_event_length(event)) {
+
+ event = __rb_page_index(cpu_buffer->head_page, head);
+ if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
+ return;
+ /* Only count data entries */
+ if (event->type != RINGBUF_TYPE_DATA)
+ continue;
+ cpu_buffer->overrun++;
+ cpu_buffer->entries--;
+ }
+}
+
+static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_page **bpage)
+{
+ struct list_head *p = (*bpage)->list.next;
+
+ if (p == &cpu_buffer->pages)
+ p = p->next;
+
+ *bpage = list_entry(p, struct buffer_page, list);
+}
+
+static inline unsigned
+rb_event_index(struct ring_buffer_event *event)
+{
+ unsigned long addr = (unsigned long)event;
+
+ return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
+}
+
+static inline int
+rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event)
+{
+ unsigned long addr = (unsigned long)event;
+ unsigned long index;
+
+ index = rb_event_index(event);
+ addr &= PAGE_MASK;
+
+ return cpu_buffer->commit_page->page == (void *)addr &&
+ rb_commit_index(cpu_buffer) == index;
+}
+
+static inline void
+rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event)
+{
+ unsigned long addr = (unsigned long)event;
+ unsigned long index;
+
+ index = rb_event_index(event);
+ addr &= PAGE_MASK;
+
+ while (cpu_buffer->commit_page->page != (void *)addr) {
+ if (RB_WARN_ON(cpu_buffer,
+ cpu_buffer->commit_page == cpu_buffer->tail_page))
+ return;
+ cpu_buffer->commit_page->page->commit =
+ cpu_buffer->commit_page->write;
+ rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
+ cpu_buffer->write_stamp =
+ cpu_buffer->commit_page->page->time_stamp;
+ }
+
+ /* Now set the commit to the event's index */
+ local_set(&cpu_buffer->commit_page->page->commit, index);
+}
+
+static inline void
+rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ /*
+ * We only race with interrupts and NMIs on this CPU.
+ * If we own the commit event, then we can commit
+ * all others that interrupted us, since the interruptions
+ * are in stack format (they finish before they come
+ * back to us). This allows us to do a simple loop to
+ * assign the commit to the tail.
+ */
+ again:
+ while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
+ cpu_buffer->commit_page->page->commit =
+ cpu_buffer->commit_page->write;
+ rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
+ cpu_buffer->write_stamp =
+ cpu_buffer->commit_page->page->time_stamp;
+ /* add barrier to keep gcc from optimizing too much */
+ barrier();
+ }
+ while (rb_commit_index(cpu_buffer) !=
+ rb_page_write(cpu_buffer->commit_page)) {
+ cpu_buffer->commit_page->page->commit =
+ cpu_buffer->commit_page->write;
+ barrier();
+ }
+
+ /* again, keep gcc from optimizing */
+ barrier();
+
+ /*
+ * If an interrupt came in just after the first while loop
+ * and pushed the tail page forward, we will be left with
+ * a dangling commit that will never go forward.
+ */
+ if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
+ goto again;
+}
+
+static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
+ cpu_buffer->reader_page->read = 0;
+}
+
+static inline void rb_inc_iter(struct ring_buffer_iter *iter)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+ /*
+ * The iterator could be on the reader page (it starts there).
+ * But the head could have moved, since the reader was
+ * found. Check for this case and assign the iterator
+ * to the head page instead of next.
+ */
+ if (iter->head_page == cpu_buffer->reader_page)
+ iter->head_page = cpu_buffer->head_page;
+ else
+ rb_inc_page(cpu_buffer, &iter->head_page);
+
+ iter->read_stamp = iter->head_page->page->time_stamp;
+ iter->head = 0;
+}
+
+/**
+ * ring_buffer_update_event - update event type and data
+ * @event: the even to update
+ * @type: the type of event
+ * @length: the size of the event field in the ring buffer
+ *
+ * Update the type and data fields of the event. The length
+ * is the actual size that is written to the ring buffer,
+ * and with this, we can determine what to place into the
+ * data field.
+ */
+static inline void
+rb_update_event(struct ring_buffer_event *event,
+ unsigned type, unsigned length)
+{
+ event->type = type;
+
+ switch (type) {
+
+ case RINGBUF_TYPE_PADDING:
+ break;
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ event->len =
+ (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
+ >> RB_ALIGNMENT_SHIFT;
+ break;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ event->len =
+ (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
+ >> RB_ALIGNMENT_SHIFT;
+ break;
+
+ case RINGBUF_TYPE_DATA:
+ length -= RB_EVNT_HDR_SIZE;
+ if (length > RB_MAX_SMALL_DATA) {
+ event->len = 0;
+ event->array[0] = length;
+ } else
+ event->len =
+ (length + (RB_ALIGNMENT-1))
+ >> RB_ALIGNMENT_SHIFT;
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline unsigned rb_calculate_event_length(unsigned length)
+{
+ struct ring_buffer_event event; /* Used only for sizeof array */
+
+ /* zero length can cause confusions */
+ if (!length)
+ length = 1;
+
+ if (length > RB_MAX_SMALL_DATA)
+ length += sizeof(event.array[0]);
+
+ length += RB_EVNT_HDR_SIZE;
+ length = ALIGN(length, RB_ALIGNMENT);
+
+ return length;
+}
+
+static struct ring_buffer_event *
+__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
+ unsigned type, unsigned long length, u64 *ts)
+{
+ struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
+ unsigned long tail, write;
+ struct ring_buffer *buffer = cpu_buffer->buffer;
+ struct ring_buffer_event *event;
+ unsigned long flags;
+
+ commit_page = cpu_buffer->commit_page;
+ /* we just need to protect against interrupts */
+ barrier();
+ tail_page = cpu_buffer->tail_page;
+ write = local_add_return(length, &tail_page->write);
+ tail = write - length;
+
+ /* See if we shot pass the end of this buffer page */
+ if (write > BUF_PAGE_SIZE) {
+ struct buffer_page *next_page = tail_page;
+
+ local_irq_save(flags);
+ __raw_spin_lock(&cpu_buffer->lock);
+
+ rb_inc_page(cpu_buffer, &next_page);
+
+ head_page = cpu_buffer->head_page;
+ reader_page = cpu_buffer->reader_page;
+
+ /* we grabbed the lock before incrementing */
+ if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
+ goto out_unlock;
+
+ /*
+ * If for some reason, we had an interrupt storm that made
+ * it all the way around the buffer, bail, and warn
+ * about it.
+ */
+ if (unlikely(next_page == commit_page)) {
+ WARN_ON_ONCE(1);
+ goto out_unlock;
+ }
+
+ if (next_page == head_page) {
+ if (!(buffer->flags & RB_FL_OVERWRITE)) {
+ /* reset write */
+ if (tail <= BUF_PAGE_SIZE)
+ local_set(&tail_page->write, tail);
+ goto out_unlock;
+ }
+
+ /* tail_page has not moved yet? */
+ if (tail_page == cpu_buffer->tail_page) {
+ /* count overflows */
+ rb_update_overflow(cpu_buffer);
+
+ rb_inc_page(cpu_buffer, &head_page);
+ cpu_buffer->head_page = head_page;
+ cpu_buffer->head_page->read = 0;
+ }
+ }
+
+ /*
+ * If the tail page is still the same as what we think
+ * it is, then it is up to us to update the tail
+ * pointer.
+ */
+ if (tail_page == cpu_buffer->tail_page) {
+ local_set(&next_page->write, 0);
+ local_set(&next_page->page->commit, 0);
+ cpu_buffer->tail_page = next_page;
+
+ /* reread the time stamp */
+ *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
+ cpu_buffer->tail_page->page->time_stamp = *ts;
+ }
+
+ /*
+ * The actual tail page has moved forward.
+ */
+ if (tail < BUF_PAGE_SIZE) {
+ /* Mark the rest of the page with padding */
+ event = __rb_page_index(tail_page, tail);
+ event->type = RINGBUF_TYPE_PADDING;
+ }
+
+ if (tail <= BUF_PAGE_SIZE)
+ /* Set the write back to the previous setting */
+ local_set(&tail_page->write, tail);
+
+ /*
+ * If this was a commit entry that failed,
+ * increment that too
+ */
+ if (tail_page == cpu_buffer->commit_page &&
+ tail == rb_commit_index(cpu_buffer)) {
+ rb_set_commit_to_write(cpu_buffer);
+ }
+
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
+
+ /* fail and let the caller try again */
+ return ERR_PTR(-EAGAIN);
+ }
+
+ /* We reserved something on the buffer */
+
+ if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
+ return NULL;
+
+ event = __rb_page_index(tail_page, tail);
+ rb_update_event(event, type, length);
+
+ /*
+ * If this is a commit and the tail is zero, then update
+ * this page's time stamp.
+ */
+ if (!tail && rb_is_commit(cpu_buffer, event))
+ cpu_buffer->commit_page->page->time_stamp = *ts;
+
+ return event;
+
+ out_unlock:
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
+ return NULL;
+}
+
+static int
+rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
+ u64 *ts, u64 *delta)
+{
+ struct ring_buffer_event *event;
+ static int once;
+ int ret;
+
+ if (unlikely(*delta > (1ULL << 59) && !once++)) {
+ printk(KERN_WARNING "Delta way too big! %llu"
+ " ts=%llu write stamp = %llu\n",
+ (unsigned long long)*delta,
+ (unsigned long long)*ts,
+ (unsigned long long)cpu_buffer->write_stamp);
+ WARN_ON(1);
+ }
+
+ /*
+ * The delta is too big, we to add a
+ * new timestamp.
+ */
+ event = __rb_reserve_next(cpu_buffer,
+ RINGBUF_TYPE_TIME_EXTEND,
+ RB_LEN_TIME_EXTEND,
+ ts);
+ if (!event)
+ return -EBUSY;
+
+ if (PTR_ERR(event) == -EAGAIN)
+ return -EAGAIN;
+
+ /* Only a commited time event can update the write stamp */
+ if (rb_is_commit(cpu_buffer, event)) {
+ /*
+ * If this is the first on the page, then we need to
+ * update the page itself, and just put in a zero.
+ */
+ if (rb_event_index(event)) {
+ event->time_delta = *delta & TS_MASK;
+ event->array[0] = *delta >> TS_SHIFT;
+ } else {
+ cpu_buffer->commit_page->page->time_stamp = *ts;
+ event->time_delta = 0;
+ event->array[0] = 0;
+ }
+ cpu_buffer->write_stamp = *ts;
+ /* let the caller know this was the commit */
+ ret = 1;
+ } else {
+ /* Darn, this is just wasted space */
+ event->time_delta = 0;
+ event->array[0] = 0;
+ ret = 0;
+ }
+
+ *delta = 0;
+
+ return ret;
+}
+
+static struct ring_buffer_event *
+rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
+ unsigned type, unsigned long length)
+{
+ struct ring_buffer_event *event;
+ u64 ts, delta;
+ int commit = 0;
+ int nr_loops = 0;
+
+ again:
+ /*
+ * We allow for interrupts to reenter here and do a trace.
+ * If one does, it will cause this original code to loop
+ * back here. Even with heavy interrupts happening, this
+ * should only happen a few times in a row. If this happens
+ * 1000 times in a row, there must be either an interrupt
+ * storm or we have something buggy.
+ * Bail!
+ */
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
+ return NULL;
+
+ ts = ring_buffer_time_stamp(cpu_buffer->cpu);
+
+ /*
+ * Only the first commit can update the timestamp.
+ * Yes there is a race here. If an interrupt comes in
+ * just after the conditional and it traces too, then it
+ * will also check the deltas. More than one timestamp may
+ * also be made. But only the entry that did the actual
+ * commit will be something other than zero.
+ */
+ if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
+ rb_page_write(cpu_buffer->tail_page) ==
+ rb_commit_index(cpu_buffer)) {
+
+ delta = ts - cpu_buffer->write_stamp;
+
+ /* make sure this delta is calculated here */
+ barrier();
+
+ /* Did the write stamp get updated already? */
+ if (unlikely(ts < cpu_buffer->write_stamp))
+ delta = 0;
+
+ if (test_time_stamp(delta)) {
+
+ commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
+
+ if (commit == -EBUSY)
+ return NULL;
+
+ if (commit == -EAGAIN)
+ goto again;
+
+ RB_WARN_ON(cpu_buffer, commit < 0);
+ }
+ } else
+ /* Non commits have zero deltas */
+ delta = 0;
+
+ event = __rb_reserve_next(cpu_buffer, type, length, &ts);
+ if (PTR_ERR(event) == -EAGAIN)
+ goto again;
+
+ if (!event) {
+ if (unlikely(commit))
+ /*
+ * Ouch! We needed a timestamp and it was commited. But
+ * we didn't get our event reserved.
+ */
+ rb_set_commit_to_write(cpu_buffer);
+ return NULL;
+ }
+
+ /*
+ * If the timestamp was commited, make the commit our entry
+ * now so that we will update it when needed.
+ */
+ if (commit)
+ rb_set_commit_event(cpu_buffer, event);
+ else if (!rb_is_commit(cpu_buffer, event))
+ delta = 0;
+
+ event->time_delta = delta;
+
+ return event;
+}
+
+static DEFINE_PER_CPU(int, rb_need_resched);
+
+/**
+ * ring_buffer_lock_reserve - reserve a part of the buffer
+ * @buffer: the ring buffer to reserve from
+ * @length: the length of the data to reserve (excluding event header)
+ * @flags: a pointer to save the interrupt flags
+ *
+ * Returns a reseverd event on the ring buffer to copy directly to.
+ * The user of this interface will need to get the body to write into
+ * and can use the ring_buffer_event_data() interface.
+ *
+ * The length is the length of the data needed, not the event length
+ * which also includes the event header.
+ *
+ * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
+ * If NULL is returned, then nothing has been allocated or locked.
+ */
+struct ring_buffer_event *
+ring_buffer_lock_reserve(struct ring_buffer *buffer,
+ unsigned long length,
+ unsigned long *flags)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_event *event;
+ int cpu, resched;
+
+ if (ring_buffer_flags != RB_BUFFERS_ON)
+ return NULL;
+
+ if (atomic_read(&buffer->record_disabled))
+ return NULL;
+
+ /* If we are tracing schedule, we don't want to recurse */
+ resched = ftrace_preempt_disable();
+
+ cpu = raw_smp_processor_id();
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ goto out;
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ if (atomic_read(&cpu_buffer->record_disabled))
+ goto out;
+
+ length = rb_calculate_event_length(length);
+ if (length > BUF_PAGE_SIZE)
+ goto out;
+
+ event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
+ if (!event)
+ goto out;
+
+ /*
+ * Need to store resched state on this cpu.
+ * Only the first needs to.
+ */
+
+ if (preempt_count() == 1)
+ per_cpu(rb_need_resched, cpu) = resched;
+
+ return event;
+
+ out:
+ ftrace_preempt_enable(resched);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
+
+static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event)
+{
+ cpu_buffer->entries++;
+
+ /* Only process further if we own the commit */
+ if (!rb_is_commit(cpu_buffer, event))
+ return;
+
+ cpu_buffer->write_stamp += event->time_delta;
+
+ rb_set_commit_to_write(cpu_buffer);
+}
+
+/**
+ * ring_buffer_unlock_commit - commit a reserved
+ * @buffer: The buffer to commit to
+ * @event: The event pointer to commit.
+ * @flags: the interrupt flags received from ring_buffer_lock_reserve.
+ *
+ * This commits the data to the ring buffer, and releases any locks held.
+ *
+ * Must be paired with ring_buffer_lock_reserve.
+ */
+int ring_buffer_unlock_commit(struct ring_buffer *buffer,
+ struct ring_buffer_event *event,
+ unsigned long flags)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ int cpu = raw_smp_processor_id();
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ rb_commit(cpu_buffer, event);
+
+ /*
+ * Only the last preempt count needs to restore preemption.
+ */
+ if (preempt_count() == 1)
+ ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
+ else
+ preempt_enable_no_resched_notrace();
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
+
+/**
+ * ring_buffer_write - write data to the buffer without reserving
+ * @buffer: The ring buffer to write to.
+ * @length: The length of the data being written (excluding the event header)
+ * @data: The data to write to the buffer.
+ *
+ * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
+ * one function. If you already have the data to write to the buffer, it
+ * may be easier to simply call this function.
+ *
+ * Note, like ring_buffer_lock_reserve, the length is the length of the data
+ * and not the length of the event which would hold the header.
+ */
+int ring_buffer_write(struct ring_buffer *buffer,
+ unsigned long length,
+ void *data)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_event *event;
+ unsigned long event_length;
+ void *body;
+ int ret = -EBUSY;
+ int cpu, resched;
+
+ if (ring_buffer_flags != RB_BUFFERS_ON)
+ return -EBUSY;
+
+ if (atomic_read(&buffer->record_disabled))
+ return -EBUSY;
+
+ resched = ftrace_preempt_disable();
+
+ cpu = raw_smp_processor_id();
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ goto out;
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ if (atomic_read(&cpu_buffer->record_disabled))
+ goto out;
+
+ event_length = rb_calculate_event_length(length);
+ event = rb_reserve_next_event(cpu_buffer,
+ RINGBUF_TYPE_DATA, event_length);
+ if (!event)
+ goto out;
+
+ body = rb_event_data(event);
+
+ memcpy(body, data, length);
+
+ rb_commit(cpu_buffer, event);
+
+ ret = 0;
+ out:
+ ftrace_preempt_enable(resched);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_write);
+
+static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ struct buffer_page *reader = cpu_buffer->reader_page;
+ struct buffer_page *head = cpu_buffer->head_page;
+ struct buffer_page *commit = cpu_buffer->commit_page;
+
+ return reader->read == rb_page_commit(reader) &&
+ (commit == reader ||
+ (commit == head &&
+ head->read == rb_page_commit(commit)));
+}
+
+/**
+ * ring_buffer_record_disable - stop all writes into the buffer
+ * @buffer: The ring buffer to stop writes to.
+ *
+ * This prevents all writes to the buffer. Any attempt to write
+ * to the buffer after this will fail and return NULL.
+ *
+ * The caller should call synchronize_sched() after this.
+ */
+void ring_buffer_record_disable(struct ring_buffer *buffer)
+{
+ atomic_inc(&buffer->record_disabled);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
+
+/**
+ * ring_buffer_record_enable - enable writes to the buffer
+ * @buffer: The ring buffer to enable writes
+ *
+ * Note, multiple disables will need the same number of enables
+ * to truely enable the writing (much like preempt_disable).
+ */
+void ring_buffer_record_enable(struct ring_buffer *buffer)
+{
+ atomic_dec(&buffer->record_disabled);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
+
+/**
+ * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
+ * @buffer: The ring buffer to stop writes to.
+ * @cpu: The CPU buffer to stop
+ *
+ * This prevents all writes to the buffer. Any attempt to write
+ * to the buffer after this will fail and return NULL.
+ *
+ * The caller should call synchronize_sched() after this.
+ */
+void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return;
+
+ cpu_buffer = buffer->buffers[cpu];
+ atomic_inc(&cpu_buffer->record_disabled);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
+
+/**
+ * ring_buffer_record_enable_cpu - enable writes to the buffer
+ * @buffer: The ring buffer to enable writes
+ * @cpu: The CPU to enable.
+ *
+ * Note, multiple disables will need the same number of enables
+ * to truely enable the writing (much like preempt_disable).
+ */
+void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return;
+
+ cpu_buffer = buffer->buffers[cpu];
+ atomic_dec(&cpu_buffer->record_disabled);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
+
+/**
+ * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the entries from.
+ */
+unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return 0;
+
+ cpu_buffer = buffer->buffers[cpu];
+ return cpu_buffer->entries;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
+
+/**
+ * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return 0;
+
+ cpu_buffer = buffer->buffers[cpu];
+ return cpu_buffer->overrun;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
+
+/**
+ * ring_buffer_entries - get the number of entries in a buffer
+ * @buffer: The ring buffer
+ *
+ * Returns the total number of entries in the ring buffer
+ * (all CPU entries)
+ */
+unsigned long ring_buffer_entries(struct ring_buffer *buffer)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ unsigned long entries = 0;
+ int cpu;
+
+ /* if you care about this being correct, lock the buffer */
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ entries += cpu_buffer->entries;
+ }
+
+ return entries;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_entries);
+
+/**
+ * ring_buffer_overrun_cpu - get the number of overruns in buffer
+ * @buffer: The ring buffer
+ *
+ * Returns the total number of overruns in the ring buffer
+ * (all CPU entries)
+ */
+unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ unsigned long overruns = 0;
+ int cpu;
+
+ /* if you care about this being correct, lock the buffer */
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ overruns += cpu_buffer->overrun;
+ }
+
+ return overruns;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_overruns);
+
+static void rb_iter_reset(struct ring_buffer_iter *iter)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+ /* Iterator usage is expected to have record disabled */
+ if (list_empty(&cpu_buffer->reader_page->list)) {
+ iter->head_page = cpu_buffer->head_page;
+ iter->head = cpu_buffer->head_page->read;
+ } else {
+ iter->head_page = cpu_buffer->reader_page;
+ iter->head = cpu_buffer->reader_page->read;
+ }
+ if (iter->head)
+ iter->read_stamp = cpu_buffer->read_stamp;
+ else
+ iter->read_stamp = iter->head_page->page->time_stamp;
+}
+
+/**
+ * ring_buffer_iter_reset - reset an iterator
+ * @iter: The iterator to reset
+ *
+ * Resets the iterator, so that it will start from the beginning
+ * again.
+ */
+void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ rb_iter_reset(iter);
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
+
+/**
+ * ring_buffer_iter_empty - check if an iterator has no more to read
+ * @iter: The iterator to check
+ */
+int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ cpu_buffer = iter->cpu_buffer;
+
+ return iter->head_page == cpu_buffer->commit_page &&
+ iter->head == rb_commit_index(cpu_buffer);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
+
+static void
+rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event)
+{
+ u64 delta;
+
+ switch (event->type) {
+ case RINGBUF_TYPE_PADDING:
+ return;
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ delta = event->array[0];
+ delta <<= TS_SHIFT;
+ delta += event->time_delta;
+ cpu_buffer->read_stamp += delta;
+ return;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ /* FIXME: not implemented */
+ return;
+
+ case RINGBUF_TYPE_DATA:
+ cpu_buffer->read_stamp += event->time_delta;
+ return;
+
+ default:
+ BUG();
+ }
+ return;
+}
+
+static void
+rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
+ struct ring_buffer_event *event)
+{
+ u64 delta;
+
+ switch (event->type) {
+ case RINGBUF_TYPE_PADDING:
+ return;
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ delta = event->array[0];
+ delta <<= TS_SHIFT;
+ delta += event->time_delta;
+ iter->read_stamp += delta;
+ return;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ /* FIXME: not implemented */
+ return;
+
+ case RINGBUF_TYPE_DATA:
+ iter->read_stamp += event->time_delta;
+ return;
+
+ default:
+ BUG();
+ }
+ return;
+}
+
+static struct buffer_page *
+rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ struct buffer_page *reader = NULL;
+ unsigned long flags;
+ int nr_loops = 0;
+
+ local_irq_save(flags);
+ __raw_spin_lock(&cpu_buffer->lock);
+
+ again:
+ /*
+ * This should normally only loop twice. But because the
+ * start of the reader inserts an empty page, it causes
+ * a case where we will loop three times. There should be no
+ * reason to loop four times (that I know of).
+ */
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
+ reader = NULL;
+ goto out;
+ }
+
+ reader = cpu_buffer->reader_page;
+
+ /* If there's more to read, return this page */
+ if (cpu_buffer->reader_page->read < rb_page_size(reader))
+ goto out;
+
+ /* Never should we have an index greater than the size */
+ if (RB_WARN_ON(cpu_buffer,
+ cpu_buffer->reader_page->read > rb_page_size(reader)))
+ goto out;
+
+ /* check if we caught up to the tail */
+ reader = NULL;
+ if (cpu_buffer->commit_page == cpu_buffer->reader_page)
+ goto out;
+
+ /*
+ * Splice the empty reader page into the list around the head.
+ * Reset the reader page to size zero.
+ */
+
+ reader = cpu_buffer->head_page;
+ cpu_buffer->reader_page->list.next = reader->list.next;
+ cpu_buffer->reader_page->list.prev = reader->list.prev;
+
+ local_set(&cpu_buffer->reader_page->write, 0);
+ local_set(&cpu_buffer->reader_page->page->commit, 0);
+
+ /* Make the reader page now replace the head */
+ reader->list.prev->next = &cpu_buffer->reader_page->list;
+ reader->list.next->prev = &cpu_buffer->reader_page->list;
+
+ /*
+ * If the tail is on the reader, then we must set the head
+ * to the inserted page, otherwise we set it one before.
+ */
+ cpu_buffer->head_page = cpu_buffer->reader_page;
+
+ if (cpu_buffer->commit_page != reader)
+ rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
+
+ /* Finally update the reader page to the new head */
+ cpu_buffer->reader_page = reader;
+ rb_reset_reader_page(cpu_buffer);
+
+ goto again;
+
+ out:
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
+
+ return reader;
+}
+
+static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ struct ring_buffer_event *event;
+ struct buffer_page *reader;
+ unsigned length;
+
+ reader = rb_get_reader_page(cpu_buffer);
+
+ /* This function should not be called when buffer is empty */
+ if (RB_WARN_ON(cpu_buffer, !reader))
+ return;
+
+ event = rb_reader_event(cpu_buffer);
+
+ if (event->type == RINGBUF_TYPE_DATA)
+ cpu_buffer->entries--;
+
+ rb_update_read_stamp(cpu_buffer, event);
+
+ length = rb_event_length(event);
+ cpu_buffer->reader_page->read += length;
+}
+
+static void rb_advance_iter(struct ring_buffer_iter *iter)
+{
+ struct ring_buffer *buffer;
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_event *event;
+ unsigned length;
+
+ cpu_buffer = iter->cpu_buffer;
+ buffer = cpu_buffer->buffer;
+
+ /*
+ * Check if we are at the end of the buffer.
+ */
+ if (iter->head >= rb_page_size(iter->head_page)) {
+ if (RB_WARN_ON(buffer,
+ iter->head_page == cpu_buffer->commit_page))
+ return;
+ rb_inc_iter(iter);
+ return;
+ }
+
+ event = rb_iter_head_event(iter);
+
+ length = rb_event_length(event);
+
+ /*
+ * This should not be called to advance the header if we are
+ * at the tail of the buffer.
+ */
+ if (RB_WARN_ON(cpu_buffer,
+ (iter->head_page == cpu_buffer->commit_page) &&
+ (iter->head + length > rb_commit_index(cpu_buffer))))
+ return;
+
+ rb_update_iter_read_stamp(iter, event);
+
+ iter->head += length;
+
+ /* check for end of page padding */
+ if ((iter->head >= rb_page_size(iter->head_page)) &&
+ (iter->head_page != cpu_buffer->commit_page))
+ rb_advance_iter(iter);
+}
+
+static struct ring_buffer_event *
+rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_event *event;
+ struct buffer_page *reader;
+ int nr_loops = 0;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return NULL;
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ again:
+ /*
+ * We repeat when a timestamp is encountered. It is possible
+ * to get multiple timestamps from an interrupt entering just
+ * as one timestamp is about to be written. The max times
+ * that this can happen is the number of nested interrupts we
+ * can have. Nesting 10 deep of interrupts is clearly
+ * an anomaly.
+ */
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
+ return NULL;
+
+ reader = rb_get_reader_page(cpu_buffer);
+ if (!reader)
+ return NULL;
+
+ event = rb_reader_event(cpu_buffer);
+
+ switch (event->type) {
+ case RINGBUF_TYPE_PADDING:
+ RB_WARN_ON(cpu_buffer, 1);
+ rb_advance_reader(cpu_buffer);
+ return NULL;
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ /* Internal data, OK to advance */
+ rb_advance_reader(cpu_buffer);
+ goto again;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ /* FIXME: not implemented */
+ rb_advance_reader(cpu_buffer);
+ goto again;
+
+ case RINGBUF_TYPE_DATA:
+ if (ts) {
+ *ts = cpu_buffer->read_stamp + event->time_delta;
+ ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
+ }
+ return event;
+
+ default:
+ BUG();
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_peek);
+
+static struct ring_buffer_event *
+rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
+{
+ struct ring_buffer *buffer;
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_event *event;
+ int nr_loops = 0;
+
+ if (ring_buffer_iter_empty(iter))
+ return NULL;
+
+ cpu_buffer = iter->cpu_buffer;
+ buffer = cpu_buffer->buffer;
+
+ again:
+ /*
+ * We repeat when a timestamp is encountered. It is possible
+ * to get multiple timestamps from an interrupt entering just
+ * as one timestamp is about to be written. The max times
+ * that this can happen is the number of nested interrupts we
+ * can have. Nesting 10 deep of interrupts is clearly
+ * an anomaly.
+ */
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
+ return NULL;
+
+ if (rb_per_cpu_empty(cpu_buffer))
+ return NULL;
+
+ event = rb_iter_head_event(iter);
+
+ switch (event->type) {
+ case RINGBUF_TYPE_PADDING:
+ rb_inc_iter(iter);
+ goto again;
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ /* Internal data, OK to advance */
+ rb_advance_iter(iter);
+ goto again;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ /* FIXME: not implemented */
+ rb_advance_iter(iter);
+ goto again;
+
+ case RINGBUF_TYPE_DATA:
+ if (ts) {
+ *ts = iter->read_stamp + event->time_delta;
+ ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
+ }
+ return event;
+
+ default:
+ BUG();
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
+
+/**
+ * ring_buffer_peek - peek at the next event to be read
+ * @buffer: The ring buffer to read
+ * @cpu: The cpu to peak at
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not consume the data.
+ */
+struct ring_buffer_event *
+ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ struct ring_buffer_event *event;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ event = rb_buffer_peek(buffer, cpu, ts);
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return event;
+}
+
+/**
+ * ring_buffer_iter_peek - peek at the next event to be read
+ * @iter: The ring buffer iterator
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not increment the iterator.
+ */
+struct ring_buffer_event *
+ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ struct ring_buffer_event *event;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ event = rb_iter_peek(iter, ts);
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return event;
+}
+
+/**
+ * ring_buffer_consume - return an event and consume it
+ * @buffer: The ring buffer to get the next event from
+ *
+ * Returns the next event in the ring buffer, and that event is consumed.
+ * Meaning, that sequential reads will keep returning a different event,
+ * and eventually empty the ring buffer if the producer is slower.
+ */
+struct ring_buffer_event *
+ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ struct ring_buffer_event *event;
+ unsigned long flags;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return NULL;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+ event = rb_buffer_peek(buffer, cpu, ts);
+ if (!event)
+ goto out;
+
+ rb_advance_reader(cpu_buffer);
+
+ out:
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return event;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_consume);
+
+/**
+ * ring_buffer_read_start - start a non consuming read of the buffer
+ * @buffer: The ring buffer to read from
+ * @cpu: The cpu buffer to iterate over
+ *
+ * This starts up an iteration through the buffer. It also disables
+ * the recording to the buffer until the reading is finished.
+ * This prevents the reading from being corrupted. This is not
+ * a consuming read, so a producer is not expected.
+ *
+ * Must be paired with ring_buffer_finish.
+ */
+struct ring_buffer_iter *
+ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_iter *iter;
+ unsigned long flags;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return NULL;
+
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return NULL;
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ iter->cpu_buffer = cpu_buffer;
+
+ atomic_inc(&cpu_buffer->record_disabled);
+ synchronize_sched();
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ __raw_spin_lock(&cpu_buffer->lock);
+ rb_iter_reset(iter);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return iter;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_read_start);
+
+/**
+ * ring_buffer_finish - finish reading the iterator of the buffer
+ * @iter: The iterator retrieved by ring_buffer_start
+ *
+ * This re-enables the recording to the buffer, and frees the
+ * iterator.
+ */
+void
+ring_buffer_read_finish(struct ring_buffer_iter *iter)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+
+ atomic_dec(&cpu_buffer->record_disabled);
+ kfree(iter);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
+
+/**
+ * ring_buffer_read - read the next item in the ring buffer by the iterator
+ * @iter: The ring buffer iterator
+ * @ts: The time stamp of the event read.
+ *
+ * This reads the next event in the ring buffer and increments the iterator.
+ */
+struct ring_buffer_event *
+ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
+{
+ struct ring_buffer_event *event;
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ event = rb_iter_peek(iter, ts);
+ if (!event)
+ goto out;
+
+ rb_advance_iter(iter);
+ out:
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return event;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_read);
+
+/**
+ * ring_buffer_size - return the size of the ring buffer (in bytes)
+ * @buffer: The ring buffer.
+ */
+unsigned long ring_buffer_size(struct ring_buffer *buffer)
+{
+ return BUF_PAGE_SIZE * buffer->pages;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_size);
+
+static void
+rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ cpu_buffer->head_page
+ = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
+ local_set(&cpu_buffer->head_page->write, 0);
+ local_set(&cpu_buffer->head_page->page->commit, 0);
+
+ cpu_buffer->head_page->read = 0;
+
+ cpu_buffer->tail_page = cpu_buffer->head_page;
+ cpu_buffer->commit_page = cpu_buffer->head_page;
+
+ INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+ local_set(&cpu_buffer->reader_page->write, 0);
+ local_set(&cpu_buffer->reader_page->page->commit, 0);
+ cpu_buffer->reader_page->read = 0;
+
+ cpu_buffer->overrun = 0;
+ cpu_buffer->entries = 0;
+}
+
+/**
+ * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
+ * @buffer: The ring buffer to reset a per cpu buffer of
+ * @cpu: The CPU buffer to be reset
+ */
+void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ unsigned long flags;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+ __raw_spin_lock(&cpu_buffer->lock);
+
+ rb_reset_cpu(cpu_buffer);
+
+ __raw_spin_unlock(&cpu_buffer->lock);
+
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
+
+/**
+ * ring_buffer_reset - reset a ring buffer
+ * @buffer: The ring buffer to reset all cpu buffers
+ */
+void ring_buffer_reset(struct ring_buffer *buffer)
+{
+ int cpu;
+
+ for_each_buffer_cpu(buffer, cpu)
+ ring_buffer_reset_cpu(buffer, cpu);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_reset);
+
+/**
+ * rind_buffer_empty - is the ring buffer empty?
+ * @buffer: The ring buffer to test
+ */
+int ring_buffer_empty(struct ring_buffer *buffer)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ int cpu;
+
+ /* yes this is racy, but if you don't like the race, lock the buffer */
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ if (!rb_per_cpu_empty(cpu_buffer))
+ return 0;
+ }
+ return 1;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_empty);
+
+/**
+ * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
+ * @buffer: The ring buffer
+ * @cpu: The CPU buffer to test
+ */
+int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ if (!cpu_isset(cpu, buffer->cpumask))
+ return 1;
+
+ cpu_buffer = buffer->buffers[cpu];
+ return rb_per_cpu_empty(cpu_buffer);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
+
+/**
+ * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
+ * @buffer_a: One buffer to swap with
+ * @buffer_b: The other buffer to swap with
+ *
+ * This function is useful for tracers that want to take a "snapshot"
+ * of a CPU buffer and has another back up buffer lying around.
+ * it is expected that the tracer handles the cpu buffer not being
+ * used at the moment.
+ */
+int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
+ struct ring_buffer *buffer_b, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer_a;
+ struct ring_buffer_per_cpu *cpu_buffer_b;
+
+ if (!cpu_isset(cpu, buffer_a->cpumask) ||
+ !cpu_isset(cpu, buffer_b->cpumask))
+ return -EINVAL;
+
+ /* At least make sure the two buffers are somewhat the same */
+ if (buffer_a->pages != buffer_b->pages)
+ return -EINVAL;
+
+ cpu_buffer_a = buffer_a->buffers[cpu];
+ cpu_buffer_b = buffer_b->buffers[cpu];
+
+ /*
+ * We can't do a synchronize_sched here because this
+ * function can be called in atomic context.
+ * Normally this will be called from the same CPU as cpu.
+ * If not it's up to the caller to protect this.
+ */
+ atomic_inc(&cpu_buffer_a->record_disabled);
+ atomic_inc(&cpu_buffer_b->record_disabled);
+
+ buffer_a->buffers[cpu] = cpu_buffer_b;
+ buffer_b->buffers[cpu] = cpu_buffer_a;
+
+ cpu_buffer_b->buffer = buffer_a;
+ cpu_buffer_a->buffer = buffer_b;
+
+ atomic_dec(&cpu_buffer_a->record_disabled);
+ atomic_dec(&cpu_buffer_b->record_disabled);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
+
+static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_data_page *bpage)
+{
+ struct ring_buffer_event *event;
+ unsigned long head;
+
+ __raw_spin_lock(&cpu_buffer->lock);
+ for (head = 0; head < local_read(&bpage->commit);
+ head += rb_event_length(event)) {
+
+ event = __rb_data_page_index(bpage, head);
+ if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
+ return;
+ /* Only count data entries */
+ if (event->type != RINGBUF_TYPE_DATA)
+ continue;
+ cpu_buffer->entries--;
+ }
+ __raw_spin_unlock(&cpu_buffer->lock);
+}
+
+/**
+ * ring_buffer_alloc_read_page - allocate a page to read from buffer
+ * @buffer: the buffer to allocate for.
+ *
+ * This function is used in conjunction with ring_buffer_read_page.
+ * When reading a full page from the ring buffer, these functions
+ * can be used to speed up the process. The calling function should
+ * allocate a few pages first with this function. Then when it
+ * needs to get pages from the ring buffer, it passes the result
+ * of this function into ring_buffer_read_page, which will swap
+ * the page that was allocated, with the read page of the buffer.
+ *
+ * Returns:
+ * The page allocated, or NULL on error.
+ */
+void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
+{
+ unsigned long addr;
+ struct buffer_data_page *bpage;
+
+ addr = __get_free_page(GFP_KERNEL);
+ if (!addr)
+ return NULL;
+
+ bpage = (void *)addr;
+
+ return bpage;
+}
+
+/**
+ * ring_buffer_free_read_page - free an allocated read page
+ * @buffer: the buffer the page was allocate for
+ * @data: the page to free
+ *
+ * Free a page allocated from ring_buffer_alloc_read_page.
+ */
+void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
+{
+ free_page((unsigned long)data);
+}
+
+/**
+ * ring_buffer_read_page - extract a page from the ring buffer
+ * @buffer: buffer to extract from
+ * @data_page: the page to use allocated from ring_buffer_alloc_read_page
+ * @cpu: the cpu of the buffer to extract
+ * @full: should the extraction only happen when the page is full.
+ *
+ * This function will pull out a page from the ring buffer and consume it.
+ * @data_page must be the address of the variable that was returned
+ * from ring_buffer_alloc_read_page. This is because the page might be used
+ * to swap with a page in the ring buffer.
+ *
+ * for example:
+ * rpage = ring_buffer_alloc_page(buffer);
+ * if (!rpage)
+ * return error;
+ * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
+ * if (ret)
+ * process_page(rpage);
+ *
+ * When @full is set, the function will not return true unless
+ * the writer is off the reader page.
+ *
+ * Note: it is up to the calling functions to handle sleeps and wakeups.
+ * The ring buffer can be used anywhere in the kernel and can not
+ * blindly call wake_up. The layer that uses the ring buffer must be
+ * responsible for that.
+ *
+ * Returns:
+ * 1 if data has been transferred
+ * 0 if no data has been transferred.
+ */
+int ring_buffer_read_page(struct ring_buffer *buffer,
+ void **data_page, int cpu, int full)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ struct ring_buffer_event *event;
+ struct buffer_data_page *bpage;
+ unsigned long flags;
+ int ret = 0;
+
+ if (!data_page)
+ return 0;
+
+ bpage = *data_page;
+ if (!bpage)
+ return 0;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+ /*
+ * rb_buffer_peek will get the next ring buffer if
+ * the current reader page is empty.
+ */
+ event = rb_buffer_peek(buffer, cpu, NULL);
+ if (!event)
+ goto out;
+
+ /* check for data */
+ if (!local_read(&cpu_buffer->reader_page->page->commit))
+ goto out;
+ /*
+ * If the writer is already off of the read page, then simply
+ * switch the read page with the given page. Otherwise
+ * we need to copy the data from the reader to the writer.
+ */
+ if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
+ unsigned int read = cpu_buffer->reader_page->read;
+
+ if (full)
+ goto out;
+ /* The writer is still on the reader page, we must copy */
+ bpage = cpu_buffer->reader_page->page;
+ memcpy(bpage->data,
+ cpu_buffer->reader_page->page->data + read,
+ local_read(&bpage->commit) - read);
+
+ /* consume what was read */
+ cpu_buffer->reader_page += read;
+
+ } else {
+ /* swap the pages */
+ rb_init_page(bpage);
+ bpage = cpu_buffer->reader_page->page;
+ cpu_buffer->reader_page->page = *data_page;
+ cpu_buffer->reader_page->read = 0;
+ *data_page = bpage;
+ }
+ ret = 1;
+
+ /* update the entry counter */
+ rb_remove_entries(cpu_buffer, bpage);
+ out:
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return ret;
+}
+
+static ssize_t
+rb_simple_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ long *p = filp->private_data;
+ char buf[64];
+ int r;
+
+ if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
+ r = sprintf(buf, "permanently disabled\n");
+ else
+ r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+rb_simple_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ long *p = filp->private_data;
+ char buf[64];
+ long val;
+ int ret;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ ret = strict_strtoul(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ if (val)
+ set_bit(RB_BUFFERS_ON_BIT, p);
+ else
+ clear_bit(RB_BUFFERS_ON_BIT, p);
+
+ (*ppos)++;
+
+ return cnt;
+}
+
+static struct file_operations rb_simple_fops = {
+ .open = tracing_open_generic,
+ .read = rb_simple_read,
+ .write = rb_simple_write,
+};
+
+
+static __init int rb_init_debugfs(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+
+ entry = debugfs_create_file("tracing_on", 0644, d_tracer,
+ &ring_buffer_flags, &rb_simple_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'tracing_on' entry\n");
+
+ return 0;
+}
+
+fs_initcall(rb_init_debugfs);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8f3fb3d..4185d52 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -14,6 +14,7 @@
#include <linux/utsrelease.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
+#include <linux/notifier.h>
#include <linux/debugfs.h>
#include <linux/pagemap.h>
#include <linux/hardirq.h>
@@ -22,6 +23,7 @@
#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/percpu.h>
+#include <linux/kdebug.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/poll.h>
@@ -31,24 +33,97 @@
#include <linux/writeback.h>
#include <linux/stacktrace.h>
+#include <linux/ring_buffer.h>
+#include <linux/irqflags.h>
#include "trace.h"
+#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
+
unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
unsigned long __read_mostly tracing_thresh;
-static unsigned long __read_mostly tracing_nr_buffers;
+/*
+ * We need to change this state when a selftest is running.
+ * A selftest will lurk into the ring-buffer to count the
+ * entries inserted during the selftest although some concurrent
+ * insertions into the ring-buffer such as ftrace_printk could occurred
+ * at the same time, giving false positive or negative results.
+ */
+static bool __read_mostly tracing_selftest_running;
+
+/* For tracers that don't implement custom flags */
+static struct tracer_opt dummy_tracer_opt[] = {
+ { }
+};
+
+static struct tracer_flags dummy_tracer_flags = {
+ .val = 0,
+ .opts = dummy_tracer_opt
+};
+
+static int dummy_set_flag(u32 old_flags, u32 bit, int set)
+{
+ return 0;
+}
+
+/*
+ * Kill all tracing for good (never come back).
+ * It is initialized to 1 but will turn to zero if the initialization
+ * of the tracer is successful. But that is the only place that sets
+ * this back to zero.
+ */
+int tracing_disabled = 1;
+
+static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
+
+static inline void ftrace_disable_cpu(void)
+{
+ preempt_disable();
+ local_inc(&__get_cpu_var(ftrace_cpu_disabled));
+}
+
+static inline void ftrace_enable_cpu(void)
+{
+ local_dec(&__get_cpu_var(ftrace_cpu_disabled));
+ preempt_enable();
+}
+
static cpumask_t __read_mostly tracing_buffer_mask;
#define for_each_tracing_cpu(cpu) \
for_each_cpu_mask(cpu, tracing_buffer_mask)
-static int trace_alloc_page(void);
-static int trace_free_page(void);
+/*
+ * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
+ *
+ * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
+ * is set, then ftrace_dump is called. This will output the contents
+ * of the ftrace buffers to the console. This is very useful for
+ * capturing traces that lead to crashes and outputing it to a
+ * serial console.
+ *
+ * It is default off, but you can enable it with either specifying
+ * "ftrace_dump_on_oops" in the kernel command line, or setting
+ * /proc/sys/kernel/ftrace_dump_on_oops to true.
+ */
+int ftrace_dump_on_oops;
-static int tracing_disabled = 1;
+static int tracing_set_tracer(char *buf);
-static unsigned long tracing_pages_allocated;
+static int __init set_ftrace(char *str)
+{
+ tracing_set_tracer(str);
+ return 1;
+}
+__setup("ftrace", set_ftrace);
+
+static int __init set_ftrace_dump_on_oops(char *str)
+{
+ ftrace_dump_on_oops = 1;
+ return 1;
+}
+__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
long
ns2usecs(cycle_t nsec)
@@ -60,7 +135,9 @@ ns2usecs(cycle_t nsec)
cycle_t ftrace_now(int cpu)
{
- return cpu_clock(cpu);
+ u64 ts = ring_buffer_time_stamp(cpu);
+ ring_buffer_normalize_time_stamp(cpu, &ts);
+ return ts;
}
/*
@@ -96,15 +173,35 @@ static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
/* tracer_enabled is used to toggle activation of a tracer */
static int tracer_enabled = 1;
+/**
+ * tracing_is_enabled - return tracer_enabled status
+ *
+ * This function is used by other tracers to know the status
+ * of the tracer_enabled flag. Tracers may use this function
+ * to know if it should enable their features when starting
+ * up. See irqsoff tracer for an example (start_irqsoff_tracer).
+ */
+int tracing_is_enabled(void)
+{
+ return tracer_enabled;
+}
+
/* function tracing enabled */
int ftrace_function_enabled;
/*
- * trace_nr_entries is the number of entries that is allocated
- * for a buffer. Note, the number of entries is always rounded
- * to ENTRIES_PER_PAGE.
+ * trace_buf_size is the size in bytes that is allocated
+ * for a buffer. Note, the number of bytes is always rounded
+ * to page size.
+ *
+ * This number is purposely set to a low number of 16384.
+ * If the dump on oops happens, it will be much appreciated
+ * to not have to wait for all that output. Anyway this can be
+ * boot time and run time configurable.
*/
-static unsigned long trace_nr_entries = 65536UL;
+#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
+
+static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
/* trace_types holds a link list of available tracers. */
static struct tracer *trace_types __read_mostly;
@@ -130,26 +227,9 @@ static DEFINE_MUTEX(trace_types_lock);
/* trace_wait is a waitqueue for tasks blocked on trace_poll */
static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
-/* trace_flags holds iter_ctrl options */
-unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
-
-static notrace void no_trace_init(struct trace_array *tr)
-{
- int cpu;
-
- ftrace_function_enabled = 0;
- if(tr->ctrl)
- for_each_online_cpu(cpu)
- tracing_reset(tr->data[cpu]);
- tracer_enabled = 0;
-}
-
-/* dummy trace to disable tracing */
-static struct tracer no_tracer __read_mostly = {
- .name = "none",
- .init = no_trace_init
-};
-
+/* trace_flags holds trace_options default values */
+unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
+ TRACE_ITER_ANNOTATE;
/**
* trace_wake_up - wake up tasks waiting for trace input
@@ -167,51 +247,27 @@ void trace_wake_up(void)
wake_up(&trace_wait);
}
-#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
-
-static int __init set_nr_entries(char *str)
+static int __init set_buf_size(char *str)
{
- unsigned long nr_entries;
+ unsigned long buf_size;
int ret;
if (!str)
return 0;
- ret = strict_strtoul(str, 0, &nr_entries);
+ ret = strict_strtoul(str, 0, &buf_size);
/* nr_entries can not be zero */
- if (ret < 0 || nr_entries == 0)
+ if (ret < 0 || buf_size == 0)
return 0;
- trace_nr_entries = nr_entries;
+ trace_buf_size = buf_size;
return 1;
}
-__setup("trace_entries=", set_nr_entries);
+__setup("trace_buf_size=", set_buf_size);
unsigned long nsecs_to_usecs(unsigned long nsecs)
{
return nsecs / 1000;
}
-/*
- * trace_flag_type is an enumeration that holds different
- * states when a trace occurs. These are:
- * IRQS_OFF - interrupts were disabled
- * NEED_RESCED - reschedule is requested
- * HARDIRQ - inside an interrupt handler
- * SOFTIRQ - inside a softirq handler
- */
-enum trace_flag_type {
- TRACE_FLAG_IRQS_OFF = 0x01,
- TRACE_FLAG_NEED_RESCHED = 0x02,
- TRACE_FLAG_HARDIRQ = 0x04,
- TRACE_FLAG_SOFTIRQ = 0x08,
-};
-
-/*
- * TRACE_ITER_SYM_MASK masks the options in trace_flags that
- * control the output of kernel symbols.
- */
-#define TRACE_ITER_SYM_MASK \
- (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
-
/* These must match the bit postions in trace_iterator_flags */
static const char *trace_options[] = {
"print-parent",
@@ -224,6 +280,13 @@ static const char *trace_options[] = {
"block",
"stacktrace",
"sched-tree",
+ "ftrace_printk",
+ "ftrace_preempt",
+ "branch",
+ "annotate",
+ "userstacktrace",
+ "sym-userobj",
+ "printk-msg-only",
NULL
};
@@ -257,7 +320,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
data->pid = tsk->pid;
- data->uid = tsk->uid;
+ data->uid = task_uid(tsk);
data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
data->policy = tsk->policy;
data->rt_priority = tsk->rt_priority;
@@ -266,54 +329,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
tracing_record_cmdline(current);
}
-#define CHECK_COND(cond) \
- if (unlikely(cond)) { \
- tracing_disabled = 1; \
- WARN_ON(1); \
- return -1; \
- }
-
-/**
- * check_pages - integrity check of trace buffers
- *
- * As a safty measure we check to make sure the data pages have not
- * been corrupted.
- */
-int check_pages(struct trace_array_cpu *data)
-{
- struct page *page, *tmp;
-
- CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
- CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
-
- list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
- CHECK_COND(page->lru.next->prev != &page->lru);
- CHECK_COND(page->lru.prev->next != &page->lru);
- }
-
- return 0;
-}
-
-/**
- * head_page - page address of the first page in per_cpu buffer.
- *
- * head_page returns the page address of the first page in
- * a per_cpu buffer. This also preforms various consistency
- * checks to make sure the buffer has not been corrupted.
- */
-void *head_page(struct trace_array_cpu *data)
-{
- struct page *page;
-
- if (list_empty(&data->trace_pages))
- return NULL;
-
- page = list_entry(data->trace_pages.next, struct page, lru);
- BUG_ON(&page->lru == &data->trace_pages);
-
- return page_address(page);
-}
-
/**
* trace_seq_printf - sequence printing of trace information
* @s: trace sequence descriptor
@@ -395,34 +410,51 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
return len;
}
-#define HEX_CHARS 17
-static const char hex2asc[] = "0123456789abcdef";
+#define MAX_MEMHEX_BYTES 8
+#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
static int
trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
{
unsigned char hex[HEX_CHARS];
unsigned char *data = mem;
- unsigned char byte;
int i, j;
- BUG_ON(len >= HEX_CHARS);
-
#ifdef __BIG_ENDIAN
for (i = 0, j = 0; i < len; i++) {
#else
for (i = len-1, j = 0; i >= 0; i--) {
#endif
- byte = data[i];
-
- hex[j++] = hex2asc[byte & 0x0f];
- hex[j++] = hex2asc[byte >> 4];
+ hex[j++] = hex_asc_hi(data[i]);
+ hex[j++] = hex_asc_lo(data[i]);
}
hex[j++] = ' ';
return trace_seq_putmem(s, hex, j);
}
+static int
+trace_seq_path(struct trace_seq *s, struct path *path)
+{
+ unsigned char *p;
+
+ if (s->len >= (PAGE_SIZE - 1))
+ return 0;
+ p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
+ if (!IS_ERR(p)) {
+ p = mangle_path(s->buffer + s->len, p, "\n");
+ if (p) {
+ s->len = p - s->buffer;
+ return 1;
+ }
+ } else {
+ s->buffer[s->len++] = '?';
+ return 1;
+ }
+
+ return 0;
+}
+
static void
trace_seq_reset(struct trace_seq *s)
{
@@ -460,34 +492,6 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s)
trace_seq_reset(s);
}
-/*
- * flip the trace buffers between two trace descriptors.
- * This usually is the buffers between the global_trace and
- * the max_tr to record a snapshot of a current trace.
- *
- * The ftrace_max_lock must be held.
- */
-static void
-flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
-{
- struct list_head flip_pages;
-
- INIT_LIST_HEAD(&flip_pages);
-
- memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
- sizeof(struct trace_array_cpu) -
- offsetof(struct trace_array_cpu, trace_head_idx));
-
- check_pages(tr1);
- check_pages(tr2);
- list_splice_init(&tr1->trace_pages, &flip_pages);
- list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
- list_splice_init(&flip_pages, &tr2->trace_pages);
- BUG_ON(!list_empty(&flip_pages));
- check_pages(tr1);
- check_pages(tr2);
-}
-
/**
* update_max_tr - snapshot all trace buffers from global_trace to max_tr
* @tr: tracer
@@ -500,17 +504,17 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
void
update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
- struct trace_array_cpu *data;
- int i;
+ struct ring_buffer *buf = tr->buffer;
WARN_ON_ONCE(!irqs_disabled());
__raw_spin_lock(&ftrace_max_lock);
- /* clear out all the previous traces */
- for_each_tracing_cpu(i) {
- data = tr->data[i];
- flip_trace(max_tr.data[i], data);
- tracing_reset(data);
- }
+
+ tr->buffer = max_tr.buffer;
+ max_tr.buffer = buf;
+
+ ftrace_disable_cpu();
+ ring_buffer_reset(tr->buffer);
+ ftrace_enable_cpu();
__update_max_tr(tr, tsk, cpu);
__raw_spin_unlock(&ftrace_max_lock);
@@ -527,16 +531,19 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
void
update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
- struct trace_array_cpu *data = tr->data[cpu];
- int i;
+ int ret;
WARN_ON_ONCE(!irqs_disabled());
__raw_spin_lock(&ftrace_max_lock);
- for_each_tracing_cpu(i)
- tracing_reset(max_tr.data[i]);
- flip_trace(max_tr.data[cpu], data);
- tracing_reset(data);
+ ftrace_disable_cpu();
+
+ ring_buffer_reset(max_tr.buffer);
+ ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
+
+ ftrace_enable_cpu();
+
+ WARN_ON_ONCE(ret);
__update_max_tr(tr, tsk, cpu);
__raw_spin_unlock(&ftrace_max_lock);
@@ -559,7 +566,17 @@ int register_tracer(struct tracer *type)
return -1;
}
+ /*
+ * When this gets called we hold the BKL which means that
+ * preemption is disabled. Various trace selftests however
+ * need to disable and enable preemption for successful tests.
+ * So we drop the BKL here and grab it after the tests again.
+ */
+ unlock_kernel();
mutex_lock(&trace_types_lock);
+
+ tracing_selftest_running = true;
+
for (t = trace_types; t; t = t->next) {
if (strcmp(type->name, t->name) == 0) {
/* already found */
@@ -570,13 +587,20 @@ int register_tracer(struct tracer *type)
}
}
+ if (!type->set_flag)
+ type->set_flag = &dummy_set_flag;
+ if (!type->flags)
+ type->flags = &dummy_tracer_flags;
+ else
+ if (!type->flags->opts)
+ type->flags->opts = dummy_tracer_opt;
+
#ifdef CONFIG_FTRACE_STARTUP_TEST
if (type->selftest) {
struct tracer *saved_tracer = current_trace;
- struct trace_array_cpu *data;
struct trace_array *tr = &global_trace;
- int saved_ctrl = tr->ctrl;
int i;
+
/*
* Run a selftest on this tracer.
* Here we reset the trace buffer, and set the current
@@ -584,31 +608,23 @@ int register_tracer(struct tracer *type)
* internal tracing to verify that everything is in order.
* If we fail, we do not register this tracer.
*/
- for_each_tracing_cpu(i) {
- data = tr->data[i];
- if (!head_page(data))
- continue;
- tracing_reset(data);
- }
+ for_each_tracing_cpu(i)
+ tracing_reset(tr, i);
+
current_trace = type;
- tr->ctrl = 0;
/* the test is responsible for initializing and enabling */
pr_info("Testing tracer %s: ", type->name);
ret = type->selftest(type, tr);
/* the test is responsible for resetting too */
current_trace = saved_tracer;
- tr->ctrl = saved_ctrl;
if (ret) {
printk(KERN_CONT "FAILED!\n");
goto out;
}
/* Only reset on passing, to avoid touching corrupted buffers */
- for_each_tracing_cpu(i) {
- data = tr->data[i];
- if (!head_page(data))
- continue;
- tracing_reset(data);
- }
+ for_each_tracing_cpu(i)
+ tracing_reset(tr, i);
+
printk(KERN_CONT "PASSED\n");
}
#endif
@@ -620,7 +636,9 @@ int register_tracer(struct tracer *type)
max_tracer_type_len = len;
out:
+ tracing_selftest_running = false;
mutex_unlock(&trace_types_lock);
+ lock_kernel();
return ret;
}
@@ -653,13 +671,21 @@ void unregister_tracer(struct tracer *type)
mutex_unlock(&trace_types_lock);
}
-void tracing_reset(struct trace_array_cpu *data)
+void tracing_reset(struct trace_array *tr, int cpu)
{
- data->trace_idx = 0;
- data->overrun = 0;
- data->trace_head = data->trace_tail = head_page(data);
- data->trace_head_idx = 0;
- data->trace_tail_idx = 0;
+ ftrace_disable_cpu();
+ ring_buffer_reset_cpu(tr->buffer, cpu);
+ ftrace_enable_cpu();
+}
+
+void tracing_reset_online_cpus(struct trace_array *tr)
+{
+ int cpu;
+
+ tr->time_start = ftrace_now(tr->cpu);
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
}
#define SAVED_CMDLINES 128
@@ -679,6 +705,91 @@ static void trace_init_cmdlines(void)
cmdline_idx = 0;
}
+static int trace_stop_count;
+static DEFINE_SPINLOCK(tracing_start_lock);
+
+/**
+ * ftrace_off_permanent - disable all ftrace code permanently
+ *
+ * This should only be called when a serious anomally has
+ * been detected. This will turn off the function tracing,
+ * ring buffers, and other tracing utilites. It takes no
+ * locks and can be called from any context.
+ */
+void ftrace_off_permanent(void)
+{
+ tracing_disabled = 1;
+ ftrace_stop();
+ tracing_off_permanent();
+}
+
+/**
+ * tracing_start - quick start of the tracer
+ *
+ * If tracing is enabled but was stopped by tracing_stop,
+ * this will start the tracer back up.
+ */
+void tracing_start(void)
+{
+ struct ring_buffer *buffer;
+ unsigned long flags;
+
+ if (tracing_disabled)
+ return;
+
+ spin_lock_irqsave(&tracing_start_lock, flags);
+ if (--trace_stop_count)
+ goto out;
+
+ if (trace_stop_count < 0) {
+ /* Someone screwed up their debugging */
+ WARN_ON_ONCE(1);
+ trace_stop_count = 0;
+ goto out;
+ }
+
+
+ buffer = global_trace.buffer;
+ if (buffer)
+ ring_buffer_record_enable(buffer);
+
+ buffer = max_tr.buffer;
+ if (buffer)
+ ring_buffer_record_enable(buffer);
+
+ ftrace_start();
+ out:
+ spin_unlock_irqrestore(&tracing_start_lock, flags);
+}
+
+/**
+ * tracing_stop - quick stop of the tracer
+ *
+ * Light weight way to stop tracing. Use in conjunction with
+ * tracing_start.
+ */
+void tracing_stop(void)
+{
+ struct ring_buffer *buffer;
+ unsigned long flags;
+
+ ftrace_stop();
+ spin_lock_irqsave(&tracing_start_lock, flags);
+ if (trace_stop_count++)
+ goto out;
+
+ buffer = global_trace.buffer;
+ if (buffer)
+ ring_buffer_record_disable(buffer);
+
+ buffer = max_tr.buffer;
+ if (buffer)
+ ring_buffer_record_disable(buffer);
+
+ out:
+ spin_unlock_irqrestore(&tracing_start_lock, flags);
+}
+
void trace_stop_cmdline_recording(void);
static void trace_save_cmdline(struct task_struct *tsk)
@@ -716,7 +827,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
spin_unlock(&trace_cmdline_lock);
}
-static char *trace_find_cmdline(int pid)
+char *trace_find_cmdline(int pid)
{
char *cmdline = "<...>";
unsigned map;
@@ -745,82 +856,21 @@ void tracing_record_cmdline(struct task_struct *tsk)
trace_save_cmdline(tsk);
}
-static inline struct list_head *
-trace_next_list(struct trace_array_cpu *data, struct list_head *next)
-{
- /*
- * Roundrobin - but skip the head (which is not a real page):
- */
- next = next->next;
- if (unlikely(next == &data->trace_pages))
- next = next->next;
- BUG_ON(next == &data->trace_pages);
-
- return next;
-}
-
-static inline void *
-trace_next_page(struct trace_array_cpu *data, void *addr)
-{
- struct list_head *next;
- struct page *page;
-
- page = virt_to_page(addr);
-
- next = trace_next_list(data, &page->lru);
- page = list_entry(next, struct page, lru);
-
- return page_address(page);
-}
-
-static inline struct trace_entry *
-tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
-{
- unsigned long idx, idx_next;
- struct trace_entry *entry;
-
- data->trace_idx++;
- idx = data->trace_head_idx;
- idx_next = idx + 1;
-
- BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
-
- entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
-
- if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
- data->trace_head = trace_next_page(data, data->trace_head);
- idx_next = 0;
- }
-
- if (data->trace_head == data->trace_tail &&
- idx_next == data->trace_tail_idx) {
- /* overrun */
- data->overrun++;
- data->trace_tail_idx++;
- if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
- data->trace_tail =
- trace_next_page(data, data->trace_tail);
- data->trace_tail_idx = 0;
- }
- }
-
- data->trace_head_idx = idx_next;
-
- return entry;
-}
-
-static inline void
-tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
+void
+tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
+ int pc)
{
struct task_struct *tsk = current;
- unsigned long pc;
- pc = preempt_count();
-
- entry->preempt_count = pc & 0xff;
- entry->pid = (tsk) ? tsk->pid : 0;
- entry->t = ftrace_now(raw_smp_processor_id());
- entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
+ entry->preempt_count = pc & 0xff;
+ entry->pid = (tsk) ? tsk->pid : 0;
+ entry->tgid = (tsk) ? tsk->tgid : 0;
+ entry->flags =
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+ (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
+#else
+ TRACE_FLAG_IRQS_NOSUPPORT |
+#endif
((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
@@ -828,145 +878,233 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
void
trace_function(struct trace_array *tr, struct trace_array_cpu *data,
- unsigned long ip, unsigned long parent_ip, unsigned long flags)
+ unsigned long ip, unsigned long parent_ip, unsigned long flags,
+ int pc)
{
- struct trace_entry *entry;
+ struct ring_buffer_event *event;
+ struct ftrace_entry *entry;
unsigned long irq_flags;
- raw_local_irq_save(irq_flags);
- __raw_spin_lock(&data->lock);
- entry = tracing_get_trace_entry(tr, data);
- tracing_generic_entry_update(entry, flags);
- entry->type = TRACE_FN;
- entry->fn.ip = ip;
- entry->fn.parent_ip = parent_ip;
- __raw_spin_unlock(&data->lock);
- raw_local_irq_restore(irq_flags);
-}
+ /* If we are reading the ring buffer, don't trace */
+ if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ return;
-void
-ftrace(struct trace_array *tr, struct trace_array_cpu *data,
- unsigned long ip, unsigned long parent_ip, unsigned long flags)
-{
- if (likely(!atomic_read(&data->disabled)))
- trace_function(tr, data, ip, parent_ip, flags);
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_FN;
+ entry->ip = ip;
+ entry->parent_ip = parent_ip;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static void __trace_graph_entry(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct ftrace_graph_ent *trace,
+ unsigned long flags,
+ int pc)
+{
+ struct ring_buffer_event *event;
+ struct ftrace_graph_ent_entry *entry;
+ unsigned long irq_flags;
+
+ if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ return;
+
+ event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_GRAPH_ENT;
+ entry->graph_ent = *trace;
+ ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
}
-#ifdef CONFIG_MMIOTRACE
-void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data,
- struct mmiotrace_rw *rw)
+static void __trace_graph_return(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct ftrace_graph_ret *trace,
+ unsigned long flags,
+ int pc)
{
- struct trace_entry *entry;
+ struct ring_buffer_event *event;
+ struct ftrace_graph_ret_entry *entry;
unsigned long irq_flags;
- raw_local_irq_save(irq_flags);
- __raw_spin_lock(&data->lock);
-
- entry = tracing_get_trace_entry(tr, data);
- tracing_generic_entry_update(entry, 0);
- entry->type = TRACE_MMIO_RW;
- entry->mmiorw = *rw;
+ if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ return;
- __raw_spin_unlock(&data->lock);
- raw_local_irq_restore(irq_flags);
+ event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_GRAPH_RET;
+ entry->ret = *trace;
+ ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
+}
+#endif
- trace_wake_up();
+void
+ftrace(struct trace_array *tr, struct trace_array_cpu *data,
+ unsigned long ip, unsigned long parent_ip, unsigned long flags,
+ int pc)
+{
+ if (likely(!atomic_read(&data->disabled)))
+ trace_function(tr, data, ip, parent_ip, flags, pc);
}
-void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data,
- struct mmiotrace_map *map)
+static void ftrace_trace_stack(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long flags,
+ int skip, int pc)
{
- struct trace_entry *entry;
+#ifdef CONFIG_STACKTRACE
+ struct ring_buffer_event *event;
+ struct stack_entry *entry;
+ struct stack_trace trace;
unsigned long irq_flags;
- raw_local_irq_save(irq_flags);
- __raw_spin_lock(&data->lock);
+ if (!(trace_flags & TRACE_ITER_STACKTRACE))
+ return;
- entry = tracing_get_trace_entry(tr, data);
- tracing_generic_entry_update(entry, 0);
- entry->type = TRACE_MMIO_MAP;
- entry->mmiomap = *map;
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_STACK;
- __raw_spin_unlock(&data->lock);
- raw_local_irq_restore(irq_flags);
+ memset(&entry->caller, 0, sizeof(entry->caller));
- trace_wake_up();
-}
+ trace.nr_entries = 0;
+ trace.max_entries = FTRACE_STACK_ENTRIES;
+ trace.skip = skip;
+ trace.entries = entry->caller;
+
+ save_stack_trace(&trace);
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
#endif
+}
void __trace_stack(struct trace_array *tr,
struct trace_array_cpu *data,
unsigned long flags,
int skip)
{
- struct trace_entry *entry;
+ ftrace_trace_stack(tr, data, flags, skip, preempt_count());
+}
+
+static void ftrace_trace_userstack(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long flags, int pc)
+{
+#ifdef CONFIG_STACKTRACE
+ struct ring_buffer_event *event;
+ struct userstack_entry *entry;
struct stack_trace trace;
+ unsigned long irq_flags;
- if (!(trace_flags & TRACE_ITER_STACKTRACE))
+ if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
return;
- entry = tracing_get_trace_entry(tr, data);
- tracing_generic_entry_update(entry, flags);
- entry->type = TRACE_STACK;
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_USER_STACK;
- memset(&entry->stack, 0, sizeof(entry->stack));
+ memset(&entry->caller, 0, sizeof(entry->caller));
trace.nr_entries = 0;
trace.max_entries = FTRACE_STACK_ENTRIES;
- trace.skip = skip;
- trace.entries = entry->stack.caller;
+ trace.skip = 0;
+ trace.entries = entry->caller;
- save_stack_trace(&trace);
+ save_stack_trace_user(&trace);
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+#endif
}
-void
-__trace_special(void *__tr, void *__data,
- unsigned long arg1, unsigned long arg2, unsigned long arg3)
+void __trace_userstack(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long flags)
{
+ ftrace_trace_userstack(tr, data, flags, preempt_count());
+}
+
+static void
+ftrace_trace_special(void *__tr, void *__data,
+ unsigned long arg1, unsigned long arg2, unsigned long arg3,
+ int pc)
+{
+ struct ring_buffer_event *event;
struct trace_array_cpu *data = __data;
struct trace_array *tr = __tr;
- struct trace_entry *entry;
+ struct special_entry *entry;
unsigned long irq_flags;
- raw_local_irq_save(irq_flags);
- __raw_spin_lock(&data->lock);
- entry = tracing_get_trace_entry(tr, data);
- tracing_generic_entry_update(entry, 0);
- entry->type = TRACE_SPECIAL;
- entry->special.arg1 = arg1;
- entry->special.arg2 = arg2;
- entry->special.arg3 = arg3;
- __trace_stack(tr, data, irq_flags, 4);
- __raw_spin_unlock(&data->lock);
- raw_local_irq_restore(irq_flags);
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, pc);
+ entry->ent.type = TRACE_SPECIAL;
+ entry->arg1 = arg1;
+ entry->arg2 = arg2;
+ entry->arg3 = arg3;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+ ftrace_trace_stack(tr, data, irq_flags, 4, pc);
+ ftrace_trace_userstack(tr, data, irq_flags, pc);
trace_wake_up();
}
void
+__trace_special(void *__tr, void *__data,
+ unsigned long arg1, unsigned long arg2, unsigned long arg3)
+{
+ ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count());
+}
+
+void
tracing_sched_switch_trace(struct trace_array *tr,
struct trace_array_cpu *data,
struct task_struct *prev,
struct task_struct *next,
- unsigned long flags)
+ unsigned long flags, int pc)
{
- struct trace_entry *entry;
+ struct ring_buffer_event *event;
+ struct ctx_switch_entry *entry;
unsigned long irq_flags;
- raw_local_irq_save(irq_flags);
- __raw_spin_lock(&data->lock);
- entry = tracing_get_trace_entry(tr, data);
- tracing_generic_entry_update(entry, flags);
- entry->type = TRACE_CTX;
- entry->ctx.prev_pid = prev->pid;
- entry->ctx.prev_prio = prev->prio;
- entry->ctx.prev_state = prev->state;
- entry->ctx.next_pid = next->pid;
- entry->ctx.next_prio = next->prio;
- entry->ctx.next_state = next->state;
- __trace_stack(tr, data, flags, 5);
- __raw_spin_unlock(&data->lock);
- raw_local_irq_restore(irq_flags);
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_CTX;
+ entry->prev_pid = prev->pid;
+ entry->prev_prio = prev->prio;
+ entry->prev_state = prev->state;
+ entry->next_pid = next->pid;
+ entry->next_prio = next->prio;
+ entry->next_state = next->state;
+ entry->next_cpu = task_cpu(next);
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+ ftrace_trace_stack(tr, data, flags, 5, pc);
+ ftrace_trace_userstack(tr, data, flags, pc);
}
void
@@ -974,25 +1112,29 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
struct trace_array_cpu *data,
struct task_struct *wakee,
struct task_struct *curr,
- unsigned long flags)
+ unsigned long flags, int pc)
{
- struct trace_entry *entry;
+ struct ring_buffer_event *event;
+ struct ctx_switch_entry *entry;
unsigned long irq_flags;
- raw_local_irq_save(irq_flags);
- __raw_spin_lock(&data->lock);
- entry = tracing_get_trace_entry(tr, data);
- tracing_generic_entry_update(entry, flags);
- entry->type = TRACE_WAKE;
- entry->ctx.prev_pid = curr->pid;
- entry->ctx.prev_prio = curr->prio;
- entry->ctx.prev_state = curr->state;
- entry->ctx.next_pid = wakee->pid;
- entry->ctx.next_prio = wakee->prio;
- entry->ctx.next_state = wakee->state;
- __trace_stack(tr, data, flags, 6);
- __raw_spin_unlock(&data->lock);
- raw_local_irq_restore(irq_flags);
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_WAKE;
+ entry->prev_pid = curr->pid;
+ entry->prev_prio = curr->prio;
+ entry->prev_state = curr->state;
+ entry->next_pid = wakee->pid;
+ entry->next_prio = wakee->prio;
+ entry->next_state = wakee->state;
+ entry->next_cpu = task_cpu(wakee);
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+ ftrace_trace_stack(tr, data, flags, 6, pc);
+ ftrace_trace_userstack(tr, data, flags, pc);
trace_wake_up();
}
@@ -1003,25 +1145,52 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
unsigned long flags;
- long disabled;
int cpu;
+ int pc;
- if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl)
+ if (tracing_disabled)
return;
+ pc = preempt_count();
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
+
+ if (likely(atomic_inc_return(&data->disabled) == 1))
+ ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
+
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
+}
+
+#ifdef CONFIG_FUNCTION_TRACER
+static void
+function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
+{
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu, resched;
+ int pc;
+
+ if (unlikely(!ftrace_function_enabled))
+ return;
+
+ pc = preempt_count();
+ resched = ftrace_preempt_disable();
+ local_save_flags(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1))
- __trace_special(tr, data, arg1, arg2, arg3);
+ trace_function(tr, data, ip, parent_ip, flags, pc);
atomic_dec(&data->disabled);
- local_irq_restore(flags);
+ ftrace_preempt_enable(resched);
}
-#ifdef CONFIG_FTRACE
static void
function_trace_call(unsigned long ip, unsigned long parent_ip)
{
@@ -1030,24 +1199,85 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
unsigned long flags;
long disabled;
int cpu;
+ int pc;
if (unlikely(!ftrace_function_enabled))
return;
- if (skip_trace(ip))
- return;
+ /*
+ * Need to use raw, since this must be called before the
+ * recursive protection is performed.
+ */
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ trace_function(tr, data, ip, parent_ip, flags, pc);
+ }
+
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+int trace_graph_entry(struct ftrace_graph_ent *trace)
+{
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ if (!ftrace_trace_task(current))
+ return 0;
+
+ if (!ftrace_graph_addr(trace->func))
+ return 0;
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
disabled = atomic_inc_return(&data->disabled);
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ __trace_graph_entry(tr, data, trace, flags, pc);
+ }
+ /* Only do the atomic if it is not already set */
+ if (!test_tsk_trace_graph(current))
+ set_tsk_trace_graph(current);
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
- if (likely(disabled == 1))
- trace_function(tr, data, ip, parent_ip, flags);
+ return 1;
+}
+void trace_graph_return(struct ftrace_graph_ret *trace)
+{
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ __trace_graph_return(tr, data, trace, flags, pc);
+ }
+ if (!trace->depth)
+ clear_tsk_trace_graph(current);
atomic_dec(&data->disabled);
local_irq_restore(flags);
}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
static struct ftrace_ops trace_ops __read_mostly =
{
@@ -1057,9 +1287,14 @@ static struct ftrace_ops trace_ops __read_mostly =
void tracing_start_function_trace(void)
{
ftrace_function_enabled = 0;
+
+ if (trace_flags & TRACE_ITER_PREEMPTONLY)
+ trace_ops.func = function_trace_call_preempt_only;
+ else
+ trace_ops.func = function_trace_call;
+
register_ftrace_function(&trace_ops);
- if (tracer_enabled)
- ftrace_function_enabled = 1;
+ ftrace_function_enabled = 1;
}
void tracing_stop_function_trace(void)
@@ -1071,113 +1306,99 @@ void tracing_stop_function_trace(void)
enum trace_file_type {
TRACE_FILE_LAT_FMT = 1,
+ TRACE_FILE_ANNOTATE = 2,
};
-static struct trace_entry *
-trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
- struct trace_iterator *iter, int cpu)
+static void trace_iterator_increment(struct trace_iterator *iter)
{
- struct page *page;
- struct trace_entry *array;
+ /* Don't allow ftrace to trace into the ring buffers */
+ ftrace_disable_cpu();
- if (iter->next_idx[cpu] >= tr->entries ||
- iter->next_idx[cpu] >= data->trace_idx ||
- (data->trace_head == data->trace_tail &&
- data->trace_head_idx == data->trace_tail_idx))
- return NULL;
+ iter->idx++;
+ if (iter->buffer_iter[iter->cpu])
+ ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
- if (!iter->next_page[cpu]) {
- /* Initialize the iterator for this cpu trace buffer */
- WARN_ON(!data->trace_tail);
- page = virt_to_page(data->trace_tail);
- iter->next_page[cpu] = &page->lru;
- iter->next_page_idx[cpu] = data->trace_tail_idx;
- }
+ ftrace_enable_cpu();
+}
- page = list_entry(iter->next_page[cpu], struct page, lru);
- BUG_ON(&data->trace_pages == &page->lru);
+static struct trace_entry *
+peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
+{
+ struct ring_buffer_event *event;
+ struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
- array = page_address(page);
+ /* Don't allow ftrace to trace into the ring buffers */
+ ftrace_disable_cpu();
- WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
- return &array[iter->next_page_idx[cpu]];
+ if (buf_iter)
+ event = ring_buffer_iter_peek(buf_iter, ts);
+ else
+ event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
+
+ ftrace_enable_cpu();
+
+ return event ? ring_buffer_event_data(event) : NULL;
}
static struct trace_entry *
-find_next_entry(struct trace_iterator *iter, int *ent_cpu)
+__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
{
- struct trace_array *tr = iter->tr;
+ struct ring_buffer *buffer = iter->tr->buffer;
struct trace_entry *ent, *next = NULL;
+ u64 next_ts = 0, ts;
int next_cpu = -1;
int cpu;
for_each_tracing_cpu(cpu) {
- if (!head_page(tr->data[cpu]))
+
+ if (ring_buffer_empty_cpu(buffer, cpu))
continue;
- ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
+
+ ent = peek_next_entry(iter, cpu, &ts);
+
/*
* Pick the entry with the smallest timestamp:
*/
- if (ent && (!next || ent->t < next->t)) {
+ if (ent && (!next || ts < next_ts)) {
next = ent;
next_cpu = cpu;
+ next_ts = ts;
}
}
if (ent_cpu)
*ent_cpu = next_cpu;
+ if (ent_ts)
+ *ent_ts = next_ts;
+
return next;
}
-static void trace_iterator_increment(struct trace_iterator *iter)
+/* Find the next real entry, without updating the iterator itself */
+static struct trace_entry *
+find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
{
- iter->idx++;
- iter->next_idx[iter->cpu]++;
- iter->next_page_idx[iter->cpu]++;
-
- if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
- struct trace_array_cpu *data = iter->tr->data[iter->cpu];
-
- iter->next_page_idx[iter->cpu] = 0;
- iter->next_page[iter->cpu] =
- trace_next_list(data, iter->next_page[iter->cpu]);
- }
+ return __find_next_entry(iter, ent_cpu, ent_ts);
}
-static void trace_consume(struct trace_iterator *iter)
+/* Find the next real entry, and increment the iterator to the next entry */
+static void *find_next_entry_inc(struct trace_iterator *iter)
{
- struct trace_array_cpu *data = iter->tr->data[iter->cpu];
+ iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
- data->trace_tail_idx++;
- if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
- data->trace_tail = trace_next_page(data, data->trace_tail);
- data->trace_tail_idx = 0;
- }
+ if (iter->ent)
+ trace_iterator_increment(iter);
- /* Check if we empty it, then reset the index */
- if (data->trace_head == data->trace_tail &&
- data->trace_head_idx == data->trace_tail_idx)
- data->trace_idx = 0;
+ return iter->ent ? iter : NULL;
}
-static void *find_next_entry_inc(struct trace_iterator *iter)
+static void trace_consume(struct trace_iterator *iter)
{
- struct trace_entry *next;
- int next_cpu = -1;
-
- next = find_next_entry(iter, &next_cpu);
-
- iter->prev_ent = iter->ent;
- iter->prev_cpu = iter->cpu;
-
- iter->ent = next;
- iter->cpu = next_cpu;
-
- if (next)
- trace_iterator_increment(iter);
-
- return next ? iter : NULL;
+ /* Don't allow ftrace to trace into the ring buffers */
+ ftrace_disable_cpu();
+ ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
+ ftrace_enable_cpu();
}
static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1210,7 +1431,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
struct trace_iterator *iter = m->private;
void *p = NULL;
loff_t l = 0;
- int i;
+ int cpu;
mutex_lock(&trace_types_lock);
@@ -1221,22 +1442,19 @@ static void *s_start(struct seq_file *m, loff_t *pos)
atomic_inc(&trace_record_cmdline_disabled);
- /* let the tracer grab locks here if needed */
- if (current_trace->start)
- current_trace->start(iter);
-
if (*pos != iter->pos) {
iter->ent = NULL;
iter->cpu = 0;
iter->idx = -1;
- iter->prev_ent = NULL;
- iter->prev_cpu = -1;
- for_each_tracing_cpu(i) {
- iter->next_idx[i] = 0;
- iter->next_page[i] = NULL;
+ ftrace_disable_cpu();
+
+ for_each_tracing_cpu(cpu) {
+ ring_buffer_iter_reset(iter->buffer_iter[cpu]);
}
+ ftrace_enable_cpu();
+
for (p = iter; p && l < *pos; p = s_next(m, p, &l))
;
@@ -1250,28 +1468,24 @@ static void *s_start(struct seq_file *m, loff_t *pos)
static void s_stop(struct seq_file *m, void *p)
{
- struct trace_iterator *iter = m->private;
-
atomic_dec(&trace_record_cmdline_disabled);
-
- /* let the tracer release locks here if needed */
- if (current_trace && current_trace == iter->trace && iter->trace->stop)
- iter->trace->stop(iter);
-
mutex_unlock(&trace_types_lock);
}
-#define KRETPROBE_MSG "[unknown/kretprobe'd]"
-
#ifdef CONFIG_KRETPROBES
-static inline int kretprobed(unsigned long addr)
+static inline const char *kretprobed(const char *name)
{
- return addr == (unsigned long)kretprobe_trampoline;
+ static const char tramp_name[] = "kretprobe_trampoline";
+ int size = sizeof(tramp_name);
+
+ if (strncmp(tramp_name, name, size) == 0)
+ return "[unknown/kretprobe'd]";
+ return name;
}
#else
-static inline int kretprobed(unsigned long addr)
+static inline const char *kretprobed(const char *name)
{
- return 0;
+ return name;
}
#endif /* CONFIG_KRETPROBES */
@@ -1280,10 +1494,13 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
{
#ifdef CONFIG_KALLSYMS
char str[KSYM_SYMBOL_LEN];
+ const char *name;
kallsyms_lookup(address, NULL, NULL, NULL, str);
- return trace_seq_printf(s, fmt, str);
+ name = kretprobed(str);
+
+ return trace_seq_printf(s, fmt, name);
#endif
return 1;
}
@@ -1294,9 +1511,12 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
{
#ifdef CONFIG_KALLSYMS
char str[KSYM_SYMBOL_LEN];
+ const char *name;
sprint_symbol(str, address);
- return trace_seq_printf(s, fmt, str);
+ name = kretprobed(str);
+
+ return trace_seq_printf(s, fmt, name);
#endif
return 1;
}
@@ -1307,7 +1527,7 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
# define IP_FMT "%016lx"
#endif
-static int
+int
seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
{
int ret;
@@ -1328,23 +1548,95 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
return ret;
}
+static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
+ unsigned long ip, unsigned long sym_flags)
+{
+ struct file *file = NULL;
+ unsigned long vmstart = 0;
+ int ret = 1;
+
+ if (mm) {
+ const struct vm_area_struct *vma;
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, ip);
+ if (vma) {
+ file = vma->vm_file;
+ vmstart = vma->vm_start;
+ }
+ if (file) {
+ ret = trace_seq_path(s, &file->f_path);
+ if (ret)
+ ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
+ }
+ up_read(&mm->mmap_sem);
+ }
+ if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
+ ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+ return ret;
+}
+
+static int
+seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
+ unsigned long sym_flags)
+{
+ struct mm_struct *mm = NULL;
+ int ret = 1;
+ unsigned int i;
+
+ if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
+ struct task_struct *task;
+ /*
+ * we do the lookup on the thread group leader,
+ * since individual threads might have already quit!
+ */
+ rcu_read_lock();
+ task = find_task_by_vpid(entry->ent.tgid);
+ if (task)
+ mm = get_task_mm(task);
+ rcu_read_unlock();
+ }
+
+ for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+ unsigned long ip = entry->caller[i];
+
+ if (ip == ULONG_MAX || !ret)
+ break;
+ if (i && ret)
+ ret = trace_seq_puts(s, " <- ");
+ if (!ip) {
+ if (ret)
+ ret = trace_seq_puts(s, "??");
+ continue;
+ }
+ if (!ret)
+ break;
+ if (ret)
+ ret = seq_print_user_ip(s, mm, ip, sym_flags);
+ }
+
+ if (mm)
+ mmput(mm);
+ return ret;
+}
+
static void print_lat_help_header(struct seq_file *m)
{
- seq_puts(m, "# _------=> CPU# \n");
- seq_puts(m, "# / _-----=> irqs-off \n");
- seq_puts(m, "# | / _----=> need-resched \n");
- seq_puts(m, "# || / _---=> hardirq/softirq \n");
- seq_puts(m, "# ||| / _--=> preempt-depth \n");
- seq_puts(m, "# |||| / \n");
- seq_puts(m, "# ||||| delay \n");
- seq_puts(m, "# cmd pid ||||| time | caller \n");
- seq_puts(m, "# \\ / ||||| \\ | / \n");
+ seq_puts(m, "# _------=> CPU# \n");
+ seq_puts(m, "# / _-----=> irqs-off \n");
+ seq_puts(m, "# | / _----=> need-resched \n");
+ seq_puts(m, "# || / _---=> hardirq/softirq \n");
+ seq_puts(m, "# ||| / _--=> preempt-depth \n");
+ seq_puts(m, "# |||| / \n");
+ seq_puts(m, "# ||||| delay \n");
+ seq_puts(m, "# cmd pid ||||| time | caller \n");
+ seq_puts(m, "# \\ / ||||| \\ | / \n");
}
static void print_func_help_header(struct seq_file *m)
{
- seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
- seq_puts(m, "# | | | | |\n");
+ seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
+ seq_puts(m, "# | | | | |\n");
}
@@ -1355,23 +1647,16 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
struct trace_array *tr = iter->tr;
struct trace_array_cpu *data = tr->data[tr->cpu];
struct tracer *type = current_trace;
- unsigned long total = 0;
- unsigned long entries = 0;
- int cpu;
+ unsigned long total;
+ unsigned long entries;
const char *name = "preemption";
if (type)
name = type->name;
- for_each_tracing_cpu(cpu) {
- if (head_page(tr->data[cpu])) {
- total += tr->data[cpu]->trace_idx;
- if (tr->data[cpu]->trace_idx > tr->entries)
- entries += tr->entries;
- else
- entries += tr->data[cpu]->trace_idx;
- }
- }
+ entries = ring_buffer_entries(iter->tr->buffer);
+ total = entries +
+ ring_buffer_overruns(iter->tr->buffer);
seq_printf(m, "%s latency trace v1.1.5 on %s\n",
name, UTS_RELEASE);
@@ -1428,9 +1713,10 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
comm = trace_find_cmdline(entry->pid);
trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
- trace_seq_printf(s, "%d", cpu);
+ trace_seq_printf(s, "%3d", cpu);
trace_seq_printf(s, "%c%c",
- (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
+ (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
+ (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
@@ -1457,7 +1743,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
unsigned long preempt_mark_thresh = 100;
static void
-lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
+lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
unsigned long rel_usecs)
{
trace_seq_printf(s, " %4lldus", abs_usecs);
@@ -1471,34 +1757,101 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
-static int
+static int task_state_char(unsigned long state)
+{
+ int bit = state ? __ffs(state) + 1 : 0;
+
+ return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
+}
+
+/*
+ * The message is supposed to contain an ending newline.
+ * If the printing stops prematurely, try to add a newline of our own.
+ */
+void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
+{
+ struct trace_entry *ent;
+ struct trace_field_cont *cont;
+ bool ok = true;
+
+ ent = peek_next_entry(iter, iter->cpu, NULL);
+ if (!ent || ent->type != TRACE_CONT) {
+ trace_seq_putc(s, '\n');
+ return;
+ }
+
+ do {
+ cont = (struct trace_field_cont *)ent;
+ if (ok)
+ ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
+
+ ftrace_disable_cpu();
+
+ if (iter->buffer_iter[iter->cpu])
+ ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+ else
+ ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+
+ ftrace_enable_cpu();
+
+ ent = peek_next_entry(iter, iter->cpu, NULL);
+ } while (ent && ent->type == TRACE_CONT);
+
+ if (!ok)
+ trace_seq_putc(s, '\n');
+}
+
+static void test_cpu_buff_start(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+
+ if (!(trace_flags & TRACE_ITER_ANNOTATE))
+ return;
+
+ if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
+ return;
+
+ if (cpu_isset(iter->cpu, iter->started))
+ return;
+
+ cpu_set(iter->cpu, iter->started);
+ trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
+}
+
+static enum print_line_t
print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
{
struct trace_seq *s = &iter->seq;
unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
- struct trace_entry *next_entry = find_next_entry(iter, NULL);
+ struct trace_entry *next_entry;
unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
struct trace_entry *entry = iter->ent;
unsigned long abs_usecs;
unsigned long rel_usecs;
+ u64 next_ts;
char *comm;
int S, T;
int i;
- unsigned state;
+ if (entry->type == TRACE_CONT)
+ return TRACE_TYPE_HANDLED;
+
+ test_cpu_buff_start(iter);
+
+ next_entry = find_next_entry(iter, NULL, &next_ts);
if (!next_entry)
- next_entry = entry;
- rel_usecs = ns2usecs(next_entry->t - entry->t);
- abs_usecs = ns2usecs(entry->t - iter->tr->time_start);
+ next_ts = iter->ts;
+ rel_usecs = ns2usecs(next_ts - iter->ts);
+ abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
if (verbose) {
comm = trace_find_cmdline(entry->pid);
- trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
+ trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
" %ld.%03ldms (+%ld.%03ldms): ",
comm,
entry->pid, cpu, entry->flags,
entry->preempt_count, trace_idx,
- ns2usecs(entry->t),
+ ns2usecs(iter->ts),
abs_usecs/1000,
abs_usecs % 1000, rel_usecs/1000,
rel_usecs % 1000);
@@ -1507,52 +1860,99 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
lat_print_timestamp(s, abs_usecs, rel_usecs);
}
switch (entry->type) {
- case TRACE_FN:
- seq_print_ip_sym(s, entry->fn.ip, sym_flags);
+ case TRACE_FN: {
+ struct ftrace_entry *field;
+
+ trace_assign_type(field, entry);
+
+ seq_print_ip_sym(s, field->ip, sym_flags);
trace_seq_puts(s, " (");
- if (kretprobed(entry->fn.parent_ip))
- trace_seq_puts(s, KRETPROBE_MSG);
- else
- seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
+ seq_print_ip_sym(s, field->parent_ip, sym_flags);
trace_seq_puts(s, ")\n");
break;
+ }
case TRACE_CTX:
- case TRACE_WAKE:
- T = entry->ctx.next_state < sizeof(state_to_char) ?
- state_to_char[entry->ctx.next_state] : 'X';
-
- state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0;
- S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
- comm = trace_find_cmdline(entry->ctx.next_pid);
- trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
- entry->ctx.prev_pid,
- entry->ctx.prev_prio,
+ case TRACE_WAKE: {
+ struct ctx_switch_entry *field;
+
+ trace_assign_type(field, entry);
+
+ T = task_state_char(field->next_state);
+ S = task_state_char(field->prev_state);
+ comm = trace_find_cmdline(field->next_pid);
+ trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
+ field->prev_pid,
+ field->prev_prio,
S, entry->type == TRACE_CTX ? "==>" : " +",
- entry->ctx.next_pid,
- entry->ctx.next_prio,
+ field->next_cpu,
+ field->next_pid,
+ field->next_prio,
T, comm);
break;
- case TRACE_SPECIAL:
+ }
+ case TRACE_SPECIAL: {
+ struct special_entry *field;
+
+ trace_assign_type(field, entry);
+
trace_seq_printf(s, "# %ld %ld %ld\n",
- entry->special.arg1,
- entry->special.arg2,
- entry->special.arg3);
+ field->arg1,
+ field->arg2,
+ field->arg3);
break;
- case TRACE_STACK:
+ }
+ case TRACE_STACK: {
+ struct stack_entry *field;
+
+ trace_assign_type(field, entry);
+
for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
if (i)
trace_seq_puts(s, " <= ");
- seq_print_ip_sym(s, entry->stack.caller[i], sym_flags);
+ seq_print_ip_sym(s, field->caller[i], sym_flags);
}
trace_seq_puts(s, "\n");
break;
+ }
+ case TRACE_PRINT: {
+ struct print_entry *field;
+
+ trace_assign_type(field, entry);
+
+ seq_print_ip_sym(s, field->ip, sym_flags);
+ trace_seq_printf(s, ": %s", field->buf);
+ if (entry->flags & TRACE_FLAG_CONT)
+ trace_seq_print_cont(s, iter);
+ break;
+ }
+ case TRACE_BRANCH: {
+ struct trace_branch *field;
+
+ trace_assign_type(field, entry);
+
+ trace_seq_printf(s, "[%s] %s:%s:%d\n",
+ field->correct ? " ok " : " MISS ",
+ field->func,
+ field->file,
+ field->line);
+ break;
+ }
+ case TRACE_USER_STACK: {
+ struct userstack_entry *field;
+
+ trace_assign_type(field, entry);
+
+ seq_print_userip_objs(field, s, sym_flags);
+ trace_seq_putc(s, '\n');
+ break;
+ }
default:
trace_seq_printf(s, "Unknown type %d\n", entry->type);
}
- return 1;
+ return TRACE_TYPE_HANDLED;
}
-static int print_trace_fmt(struct trace_iterator *iter)
+static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
@@ -1567,90 +1967,154 @@ static int print_trace_fmt(struct trace_iterator *iter)
entry = iter->ent;
+ if (entry->type == TRACE_CONT)
+ return TRACE_TYPE_HANDLED;
+
+ test_cpu_buff_start(iter);
+
comm = trace_find_cmdline(iter->ent->pid);
- t = ns2usecs(entry->t);
+ t = ns2usecs(iter->ts);
usec_rem = do_div(t, 1000000ULL);
secs = (unsigned long)t;
ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
if (!ret)
- return 0;
- ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
+ return TRACE_TYPE_PARTIAL_LINE;
+ ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
if (!ret)
- return 0;
+ return TRACE_TYPE_PARTIAL_LINE;
ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
if (!ret)
- return 0;
+ return TRACE_TYPE_PARTIAL_LINE;
switch (entry->type) {
- case TRACE_FN:
- ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
+ case TRACE_FN: {
+ struct ftrace_entry *field;
+
+ trace_assign_type(field, entry);
+
+ ret = seq_print_ip_sym(s, field->ip, sym_flags);
if (!ret)
- return 0;
+ return TRACE_TYPE_PARTIAL_LINE;
if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
- entry->fn.parent_ip) {
+ field->parent_ip) {
ret = trace_seq_printf(s, " <-");
if (!ret)
- return 0;
- if (kretprobed(entry->fn.parent_ip))
- ret = trace_seq_puts(s, KRETPROBE_MSG);
- else
- ret = seq_print_ip_sym(s, entry->fn.parent_ip,
- sym_flags);
+ return TRACE_TYPE_PARTIAL_LINE;
+ ret = seq_print_ip_sym(s,
+ field->parent_ip,
+ sym_flags);
if (!ret)
- return 0;
+ return TRACE_TYPE_PARTIAL_LINE;
}
ret = trace_seq_printf(s, "\n");
if (!ret)
- return 0;
+ return TRACE_TYPE_PARTIAL_LINE;
break;
+ }
case TRACE_CTX:
- case TRACE_WAKE:
- S = entry->ctx.prev_state < sizeof(state_to_char) ?
- state_to_char[entry->ctx.prev_state] : 'X';
- T = entry->ctx.next_state < sizeof(state_to_char) ?
- state_to_char[entry->ctx.next_state] : 'X';
- ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
- entry->ctx.prev_pid,
- entry->ctx.prev_prio,
+ case TRACE_WAKE: {
+ struct ctx_switch_entry *field;
+
+ trace_assign_type(field, entry);
+
+ T = task_state_char(field->next_state);
+ S = task_state_char(field->prev_state);
+ ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
+ field->prev_pid,
+ field->prev_prio,
S,
entry->type == TRACE_CTX ? "==>" : " +",
- entry->ctx.next_pid,
- entry->ctx.next_prio,
+ field->next_cpu,
+ field->next_pid,
+ field->next_prio,
T);
if (!ret)
- return 0;
+ return TRACE_TYPE_PARTIAL_LINE;
break;
- case TRACE_SPECIAL:
+ }
+ case TRACE_SPECIAL: {
+ struct special_entry *field;
+
+ trace_assign_type(field, entry);
+
ret = trace_seq_printf(s, "# %ld %ld %ld\n",
- entry->special.arg1,
- entry->special.arg2,
- entry->special.arg3);
+ field->arg1,
+ field->arg2,
+ field->arg3);
if (!ret)
- return 0;
+ return TRACE_TYPE_PARTIAL_LINE;
break;
- case TRACE_STACK:
+ }
+ case TRACE_STACK: {
+ struct stack_entry *field;
+
+ trace_assign_type(field, entry);
+
for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
if (i) {
ret = trace_seq_puts(s, " <= ");
if (!ret)
- return 0;
+ return TRACE_TYPE_PARTIAL_LINE;
}
- ret = seq_print_ip_sym(s, entry->stack.caller[i],
+ ret = seq_print_ip_sym(s, field->caller[i],
sym_flags);
if (!ret)
- return 0;
+ return TRACE_TYPE_PARTIAL_LINE;
}
ret = trace_seq_puts(s, "\n");
if (!ret)
- return 0;
+ return TRACE_TYPE_PARTIAL_LINE;
break;
}
- return 1;
+ case TRACE_PRINT: {
+ struct print_entry *field;
+
+ trace_assign_type(field, entry);
+
+ seq_print_ip_sym(s, field->ip, sym_flags);
+ trace_seq_printf(s, ": %s", field->buf);
+ if (entry->flags & TRACE_FLAG_CONT)
+ trace_seq_print_cont(s, iter);
+ break;
+ }
+ case TRACE_GRAPH_RET: {
+ return print_graph_function(iter);
+ }
+ case TRACE_GRAPH_ENT: {
+ return print_graph_function(iter);
+ }
+ case TRACE_BRANCH: {
+ struct trace_branch *field;
+
+ trace_assign_type(field, entry);
+
+ trace_seq_printf(s, "[%s] %s:%s:%d\n",
+ field->correct ? " ok " : " MISS ",
+ field->func,
+ field->file,
+ field->line);
+ break;
+ }
+ case TRACE_USER_STACK: {
+ struct userstack_entry *field;
+
+ trace_assign_type(field, entry);
+
+ ret = seq_print_userip_objs(field, s, sym_flags);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ ret = trace_seq_putc(s, '\n');
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ break;
+ }
+ }
+ return TRACE_TYPE_HANDLED;
}
-static int print_raw_fmt(struct trace_iterator *iter)
+static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
struct trace_entry *entry;
@@ -1659,47 +2123,75 @@ static int print_raw_fmt(struct trace_iterator *iter)
entry = iter->ent;
+ if (entry->type == TRACE_CONT)
+ return TRACE_TYPE_HANDLED;
+
ret = trace_seq_printf(s, "%d %d %llu ",
- entry->pid, iter->cpu, entry->t);
+ entry->pid, iter->cpu, iter->ts);
if (!ret)
- return 0;
+ return TRACE_TYPE_PARTIAL_LINE;
switch (entry->type) {
- case TRACE_FN:
+ case TRACE_FN: {
+ struct ftrace_entry *field;
+
+ trace_assign_type(field, entry);
+
ret = trace_seq_printf(s, "%x %x\n",
- entry->fn.ip, entry->fn.parent_ip);
+ field->ip,
+ field->parent_ip);
if (!ret)
- return 0;
+ return TRACE_TYPE_PARTIAL_LINE;
break;
+ }
case TRACE_CTX:
- case TRACE_WAKE:
- S = entry->ctx.prev_state < sizeof(state_to_char) ?
- state_to_char[entry->ctx.prev_state] : 'X';
- T = entry->ctx.next_state < sizeof(state_to_char) ?
- state_to_char[entry->ctx.next_state] : 'X';
- if (entry->type == TRACE_WAKE)
- S = '+';
- ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
- entry->ctx.prev_pid,
- entry->ctx.prev_prio,
+ case TRACE_WAKE: {
+ struct ctx_switch_entry *field;
+
+ trace_assign_type(field, entry);
+
+ T = task_state_char(field->next_state);
+ S = entry->type == TRACE_WAKE ? '+' :
+ task_state_char(field->prev_state);
+ ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
+ field->prev_pid,
+ field->prev_prio,
S,
- entry->ctx.next_pid,
- entry->ctx.next_prio,
+ field->next_cpu,
+ field->next_pid,
+ field->next_prio,
T);
if (!ret)
- return 0;
+ return TRACE_TYPE_PARTIAL_LINE;
break;
+ }
case TRACE_SPECIAL:
- case TRACE_STACK:
+ case TRACE_USER_STACK:
+ case TRACE_STACK: {
+ struct special_entry *field;
+
+ trace_assign_type(field, entry);
+
ret = trace_seq_printf(s, "# %ld %ld %ld\n",
- entry->special.arg1,
- entry->special.arg2,
- entry->special.arg3);
+ field->arg1,
+ field->arg2,
+ field->arg3);
if (!ret)
- return 0;
+ return TRACE_TYPE_PARTIAL_LINE;
break;
}
- return 1;
+ case TRACE_PRINT: {
+ struct print_entry *field;
+
+ trace_assign_type(field, entry);
+
+ trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
+ if (entry->flags & TRACE_FLAG_CONT)
+ trace_seq_print_cont(s, iter);
+ break;
+ }
+ }
+ return TRACE_TYPE_HANDLED;
}
#define SEQ_PUT_FIELD_RET(s, x) \
@@ -1710,11 +2202,12 @@ do { \
#define SEQ_PUT_HEX_FIELD_RET(s, x) \
do { \
+ BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
return 0; \
} while (0)
-static int print_hex_fmt(struct trace_iterator *iter)
+static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
unsigned char newline = '\n';
@@ -1723,97 +2216,162 @@ static int print_hex_fmt(struct trace_iterator *iter)
entry = iter->ent;
+ if (entry->type == TRACE_CONT)
+ return TRACE_TYPE_HANDLED;
+
SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
- SEQ_PUT_HEX_FIELD_RET(s, entry->t);
+ SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
switch (entry->type) {
- case TRACE_FN:
- SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip);
- SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
+ case TRACE_FN: {
+ struct ftrace_entry *field;
+
+ trace_assign_type(field, entry);
+
+ SEQ_PUT_HEX_FIELD_RET(s, field->ip);
+ SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
break;
+ }
case TRACE_CTX:
- case TRACE_WAKE:
- S = entry->ctx.prev_state < sizeof(state_to_char) ?
- state_to_char[entry->ctx.prev_state] : 'X';
- T = entry->ctx.next_state < sizeof(state_to_char) ?
- state_to_char[entry->ctx.next_state] : 'X';
- if (entry->type == TRACE_WAKE)
- S = '+';
- SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid);
- SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio);
+ case TRACE_WAKE: {
+ struct ctx_switch_entry *field;
+
+ trace_assign_type(field, entry);
+
+ T = task_state_char(field->next_state);
+ S = entry->type == TRACE_WAKE ? '+' :
+ task_state_char(field->prev_state);
+ SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
+ SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
SEQ_PUT_HEX_FIELD_RET(s, S);
- SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid);
- SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio);
- SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
+ SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
+ SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
+ SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
SEQ_PUT_HEX_FIELD_RET(s, T);
break;
+ }
case TRACE_SPECIAL:
- case TRACE_STACK:
- SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1);
- SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2);
- SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3);
+ case TRACE_USER_STACK:
+ case TRACE_STACK: {
+ struct special_entry *field;
+
+ trace_assign_type(field, entry);
+
+ SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
+ SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
+ SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
break;
}
+ }
SEQ_PUT_FIELD_RET(s, newline);
- return 1;
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *entry = iter->ent;
+ struct print_entry *field;
+ int ret;
+
+ trace_assign_type(field, entry);
+
+ ret = trace_seq_printf(s, field->buf);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (entry->flags & TRACE_FLAG_CONT)
+ trace_seq_print_cont(s, iter);
+
+ return TRACE_TYPE_HANDLED;
}
-static int print_bin_fmt(struct trace_iterator *iter)
+static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
struct trace_entry *entry;
entry = iter->ent;
+ if (entry->type == TRACE_CONT)
+ return TRACE_TYPE_HANDLED;
+
SEQ_PUT_FIELD_RET(s, entry->pid);
SEQ_PUT_FIELD_RET(s, entry->cpu);
- SEQ_PUT_FIELD_RET(s, entry->t);
+ SEQ_PUT_FIELD_RET(s, iter->ts);
switch (entry->type) {
- case TRACE_FN:
- SEQ_PUT_FIELD_RET(s, entry->fn.ip);
- SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip);
+ case TRACE_FN: {
+ struct ftrace_entry *field;
+
+ trace_assign_type(field, entry);
+
+ SEQ_PUT_FIELD_RET(s, field->ip);
+ SEQ_PUT_FIELD_RET(s, field->parent_ip);
break;
- case TRACE_CTX:
- SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid);
- SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio);
- SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state);
- SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid);
- SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio);
- SEQ_PUT_FIELD_RET(s, entry->ctx.next_state);
+ }
+ case TRACE_CTX: {
+ struct ctx_switch_entry *field;
+
+ trace_assign_type(field, entry);
+
+ SEQ_PUT_FIELD_RET(s, field->prev_pid);
+ SEQ_PUT_FIELD_RET(s, field->prev_prio);
+ SEQ_PUT_FIELD_RET(s, field->prev_state);
+ SEQ_PUT_FIELD_RET(s, field->next_pid);
+ SEQ_PUT_FIELD_RET(s, field->next_prio);
+ SEQ_PUT_FIELD_RET(s, field->next_state);
break;
+ }
case TRACE_SPECIAL:
- case TRACE_STACK:
- SEQ_PUT_FIELD_RET(s, entry->special.arg1);
- SEQ_PUT_FIELD_RET(s, entry->special.arg2);
- SEQ_PUT_FIELD_RET(s, entry->special.arg3);
+ case TRACE_USER_STACK:
+ case TRACE_STACK: {
+ struct special_entry *field;
+
+ trace_assign_type(field, entry);
+
+ SEQ_PUT_FIELD_RET(s, field->arg1);
+ SEQ_PUT_FIELD_RET(s, field->arg2);
+ SEQ_PUT_FIELD_RET(s, field->arg3);
break;
}
+ }
return 1;
}
static int trace_empty(struct trace_iterator *iter)
{
- struct trace_array_cpu *data;
int cpu;
for_each_tracing_cpu(cpu) {
- data = iter->tr->data[cpu];
-
- if (head_page(data) && data->trace_idx &&
- (data->trace_tail != data->trace_head ||
- data->trace_tail_idx != data->trace_head_idx))
- return 0;
+ if (iter->buffer_iter[cpu]) {
+ if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
+ return 0;
+ } else {
+ if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
+ return 0;
+ }
}
+
return 1;
}
-static int print_trace_line(struct trace_iterator *iter)
+static enum print_line_t print_trace_line(struct trace_iterator *iter)
{
- if (iter->trace && iter->trace->print_line)
- return iter->trace->print_line(iter);
+ enum print_line_t ret;
+
+ if (iter->trace && iter->trace->print_line) {
+ ret = iter->trace->print_line(iter);
+ if (ret != TRACE_TYPE_UNHANDLED)
+ return ret;
+ }
+
+ if (iter->ent->type == TRACE_PRINT &&
+ trace_flags & TRACE_ITER_PRINTK &&
+ trace_flags & TRACE_ITER_PRINTK_MSGONLY)
+ return print_printk_msg_only(iter);
if (trace_flags & TRACE_ITER_BIN)
return print_bin_fmt(iter);
@@ -1839,7 +2397,9 @@ static int s_show(struct seq_file *m, void *v)
seq_printf(m, "# tracer: %s\n", iter->trace->name);
seq_puts(m, "#\n");
}
- if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
+ if (iter->trace && iter->trace->print_header)
+ iter->trace->print_header(m);
+ else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
/* print nothing if the buffers are empty */
if (trace_empty(iter))
return 0;
@@ -1869,6 +2429,8 @@ static struct trace_iterator *
__tracing_open(struct inode *inode, struct file *file, int *ret)
{
struct trace_iterator *iter;
+ struct seq_file *m;
+ int cpu;
if (tracing_disabled) {
*ret = -ENODEV;
@@ -1889,28 +2451,49 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
iter->trace = current_trace;
iter->pos = -1;
+ /* Notify the tracer early; before we stop tracing. */
+ if (iter->trace && iter->trace->open)
+ iter->trace->open(iter);
+
+ /* Annotate start of buffers if we had overruns */
+ if (ring_buffer_overruns(iter->tr->buffer))
+ iter->iter_flags |= TRACE_FILE_ANNOTATE;
+
+
+ for_each_tracing_cpu(cpu) {
+
+ iter->buffer_iter[cpu] =
+ ring_buffer_read_start(iter->tr->buffer, cpu);
+
+ if (!iter->buffer_iter[cpu])
+ goto fail_buffer;
+ }
+
/* TODO stop tracer */
*ret = seq_open(file, &tracer_seq_ops);
- if (!*ret) {
- struct seq_file *m = file->private_data;
- m->private = iter;
+ if (*ret)
+ goto fail_buffer;
- /* stop the trace while dumping */
- if (iter->tr->ctrl) {
- tracer_enabled = 0;
- ftrace_function_enabled = 0;
- }
+ m = file->private_data;
+ m->private = iter;
+
+ /* stop the trace while dumping */
+ tracing_stop();
- if (iter->trace && iter->trace->open)
- iter->trace->open(iter);
- } else {
- kfree(iter);
- iter = NULL;
- }
mutex_unlock(&trace_types_lock);
out:
return iter;
+
+ fail_buffer:
+ for_each_tracing_cpu(cpu) {
+ if (iter->buffer_iter[cpu])
+ ring_buffer_read_finish(iter->buffer_iter[cpu]);
+ }
+ mutex_unlock(&trace_types_lock);
+ kfree(iter);
+
+ return ERR_PTR(-ENOMEM);
}
int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -1926,20 +2509,19 @@ int tracing_release(struct inode *inode, struct file *file)
{
struct seq_file *m = (struct seq_file *)file->private_data;
struct trace_iterator *iter = m->private;
+ int cpu;
mutex_lock(&trace_types_lock);
+ for_each_tracing_cpu(cpu) {
+ if (iter->buffer_iter[cpu])
+ ring_buffer_read_finish(iter->buffer_iter[cpu]);
+ }
+
if (iter->trace && iter->trace->close)
iter->trace->close(iter);
/* reenable tracing if it was previously enabled */
- if (iter->tr->ctrl) {
- tracer_enabled = 1;
- /*
- * It is safe to enable function tracing even if it
- * isn't used
- */
- ftrace_function_enabled = 1;
- }
+ tracing_start();
mutex_unlock(&trace_types_lock);
seq_release(inode, file);
@@ -2117,7 +2699,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
if (err)
goto err_unlock;
- raw_local_irq_disable();
+ local_irq_disable();
__raw_spin_lock(&ftrace_max_lock);
for_each_tracing_cpu(cpu) {
/*
@@ -2134,7 +2716,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
}
}
__raw_spin_unlock(&ftrace_max_lock);
- raw_local_irq_enable();
+ local_irq_enable();
tracing_cpumask = tracing_cpumask_new;
@@ -2155,13 +2737,16 @@ static struct file_operations tracing_cpumask_fops = {
};
static ssize_t
-tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
+tracing_trace_options_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
+ int i;
char *buf;
int r = 0;
int len = 0;
- int i;
+ u32 tracer_flags = current_trace->flags->val;
+ struct tracer_opt *trace_opts = current_trace->flags->opts;
+
/* calulate max size */
for (i = 0; trace_options[i]; i++) {
@@ -2169,6 +2754,15 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
len += 3; /* "no" and space */
}
+ /*
+ * Increase the size with names of options specific
+ * of the current tracer.
+ */
+ for (i = 0; trace_opts[i].name; i++) {
+ len += strlen(trace_opts[i].name);
+ len += 3; /* "no" and space */
+ }
+
/* +2 for \n and \0 */
buf = kmalloc(len + 2, GFP_KERNEL);
if (!buf)
@@ -2181,6 +2775,15 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
r += sprintf(buf + r, "no%s ", trace_options[i]);
}
+ for (i = 0; trace_opts[i].name; i++) {
+ if (tracer_flags & trace_opts[i].bit)
+ r += sprintf(buf + r, "%s ",
+ trace_opts[i].name);
+ else
+ r += sprintf(buf + r, "no%s ",
+ trace_opts[i].name);
+ }
+
r += sprintf(buf + r, "\n");
WARN_ON(r >= len + 2);
@@ -2191,13 +2794,48 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
return r;
}
+/* Try to assign a tracer specific option */
+static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
+{
+ struct tracer_flags *trace_flags = trace->flags;
+ struct tracer_opt *opts = NULL;
+ int ret = 0, i = 0;
+ int len;
+
+ for (i = 0; trace_flags->opts[i].name; i++) {
+ opts = &trace_flags->opts[i];
+ len = strlen(opts->name);
+
+ if (strncmp(cmp, opts->name, len) == 0) {
+ ret = trace->set_flag(trace_flags->val,
+ opts->bit, !neg);
+ break;
+ }
+ }
+ /* Not found */
+ if (!trace_flags->opts[i].name)
+ return -EINVAL;
+
+ /* Refused to handle */
+ if (ret)
+ return ret;
+
+ if (neg)
+ trace_flags->val &= ~opts->bit;
+ else
+ trace_flags->val |= opts->bit;
+
+ return 0;
+}
+
static ssize_t
-tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
+tracing_trace_options_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[64];
char *cmp = buf;
int neg = 0;
+ int ret;
int i;
if (cnt >= sizeof(buf))
@@ -2224,11 +2862,13 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
break;
}
}
- /*
- * If no option could be set, return an error:
- */
- if (!trace_options[i])
- return -EINVAL;
+
+ /* If no option could be set, test the specific tracer options */
+ if (!trace_options[i]) {
+ ret = set_tracer_option(current_trace, cmp, neg);
+ if (ret)
+ return ret;
+ }
filp->f_pos += cnt;
@@ -2237,8 +2877,8 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
static struct file_operations tracing_iter_fops = {
.open = tracing_open_generic,
- .read = tracing_iter_ctrl_read,
- .write = tracing_iter_ctrl_write,
+ .read = tracing_trace_options_read,
+ .write = tracing_trace_options_write,
};
static const char readme_msg[] =
@@ -2252,9 +2892,9 @@ static const char readme_msg[] =
"# echo sched_switch > /debug/tracing/current_tracer\n"
"# cat /debug/tracing/current_tracer\n"
"sched_switch\n"
- "# cat /debug/tracing/iter_ctrl\n"
+ "# cat /debug/tracing/trace_options\n"
"noprint-parent nosym-offset nosym-addr noverbose\n"
- "# echo print-parent > /debug/tracing/iter_ctrl\n"
+ "# echo print-parent > /debug/tracing/trace_options\n"
"# echo 1 > /debug/tracing/tracing_enabled\n"
"# cat /debug/tracing/trace > /tmp/trace.txt\n"
"echo 0 > /debug/tracing/tracing_enabled\n"
@@ -2277,11 +2917,10 @@ static ssize_t
tracing_ctrl_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- struct trace_array *tr = filp->private_data;
char buf[64];
int r;
- r = sprintf(buf, "%ld\n", tr->ctrl);
+ r = sprintf(buf, "%u\n", tracer_enabled);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
@@ -2309,16 +2948,18 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
val = !!val;
mutex_lock(&trace_types_lock);
- if (tr->ctrl ^ val) {
- if (val)
+ if (tracer_enabled ^ val) {
+ if (val) {
tracer_enabled = 1;
- else
+ if (current_trace->start)
+ current_trace->start(tr);
+ tracing_start();
+ } else {
tracer_enabled = 0;
-
- tr->ctrl = val;
-
- if (current_trace && current_trace->ctrl_update)
- current_trace->ctrl_update(tr);
+ tracing_stop();
+ if (current_trace->stop)
+ current_trace->stop(tr);
+ }
}
mutex_unlock(&trace_types_lock);
@@ -2344,14 +2985,52 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
+static int tracing_set_tracer(char *buf)
+{
+ struct trace_array *tr = &global_trace;
+ struct tracer *t;
+ int ret = 0;
+
+ mutex_lock(&trace_types_lock);
+ for (t = trace_types; t; t = t->next) {
+ if (strcmp(t->name, buf) == 0)
+ break;
+ }
+ if (!t) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (t == current_trace)
+ goto out;
+
+ trace_branch_disable();
+ if (current_trace && current_trace->reset)
+ current_trace->reset(tr);
+
+ current_trace = t;
+ if (t->init) {
+ ret = t->init(tr);
+ if (ret)
+ goto out;
+ }
+
+ trace_branch_enable(tr);
+ out:
+ mutex_unlock(&trace_types_lock);
+
+ return ret;
+}
+
static ssize_t
tracing_set_trace_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- struct trace_array *tr = &global_trace;
- struct tracer *t;
char buf[max_tracer_type_len+1];
int i;
+ size_t ret;
+ int err;
+
+ ret = cnt;
if (cnt > max_tracer_type_len)
cnt = max_tracer_type_len;
@@ -2365,27 +3044,13 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
buf[i] = 0;
- mutex_lock(&trace_types_lock);
- for (t = trace_types; t; t = t->next) {
- if (strcmp(t->name, buf) == 0)
- break;
- }
- if (!t || t == current_trace)
- goto out;
-
- if (current_trace && current_trace->reset)
- current_trace->reset(tr);
-
- current_trace = t;
- if (t->init)
- t->init(tr);
+ err = tracing_set_tracer(buf);
+ if (err)
+ return err;
- out:
- mutex_unlock(&trace_types_lock);
+ filp->f_pos += ret;
- filp->f_pos += cnt;
-
- return cnt;
+ return ret;
}
static ssize_t
@@ -2450,6 +3115,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
return -ENOMEM;
mutex_lock(&trace_types_lock);
+
+ /* trace pipe does not show start of buffer */
+ cpus_setall(iter->started);
+
iter->tr = &global_trace;
iter->trace = current_trace;
filp->private_data = iter;
@@ -2500,20 +3169,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct trace_iterator *iter = filp->private_data;
- struct trace_array_cpu *data;
- static cpumask_t mask;
- unsigned long flags;
-#ifdef CONFIG_FTRACE
- int ftrace_save;
-#endif
- int cpu;
ssize_t sret;
/* return any leftover data */
sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
if (sret != -EBUSY)
return sret;
- sret = 0;
trace_seq_reset(&iter->seq);
@@ -2524,6 +3185,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
goto out;
}
+waitagain:
+ sret = 0;
while (trace_empty(iter)) {
if ((filp->f_flags & O_NONBLOCK)) {
@@ -2588,46 +3251,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
offsetof(struct trace_iterator, seq));
iter->pos = -1;
- /*
- * We need to stop all tracing on all CPUS to read the
- * the next buffer. This is a bit expensive, but is
- * not done often. We fill all what we can read,
- * and then release the locks again.
- */
-
- cpus_clear(mask);
- local_irq_save(flags);
-#ifdef CONFIG_FTRACE
- ftrace_save = ftrace_enabled;
- ftrace_enabled = 0;
-#endif
- smp_wmb();
- for_each_tracing_cpu(cpu) {
- data = iter->tr->data[cpu];
-
- if (!head_page(data) || !data->trace_idx)
- continue;
-
- atomic_inc(&data->disabled);
- cpu_set(cpu, mask);
- }
-
- for_each_cpu_mask(cpu, mask) {
- data = iter->tr->data[cpu];
- __raw_spin_lock(&data->lock);
-
- if (data->overrun > iter->last_overrun[cpu])
- iter->overrun[cpu] +=
- data->overrun - iter->last_overrun[cpu];
- iter->last_overrun[cpu] = data->overrun;
- }
-
while (find_next_entry_inc(iter) != NULL) {
- int ret;
+ enum print_line_t ret;
int len = iter->seq.len;
ret = print_trace_line(iter);
- if (!ret) {
+ if (ret == TRACE_TYPE_PARTIAL_LINE) {
/* don't print partial lines */
iter->seq.len = len;
break;
@@ -2639,26 +3268,17 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
break;
}
- for_each_cpu_mask(cpu, mask) {
- data = iter->tr->data[cpu];
- __raw_spin_unlock(&data->lock);
- }
-
- for_each_cpu_mask(cpu, mask) {
- data = iter->tr->data[cpu];
- atomic_dec(&data->disabled);
- }
-#ifdef CONFIG_FTRACE
- ftrace_enabled = ftrace_save;
-#endif
- local_irq_restore(flags);
-
/* Now copy what we have to the user */
sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
if (iter->seq.readpos >= iter->seq.len)
trace_seq_reset(&iter->seq);
+
+ /*
+ * If there was nothing to send to user, inspite of consuming trace
+ * entries, go back to wait for more entries.
+ */
if (sret == -EBUSY)
- sret = 0;
+ goto waitagain;
out:
mutex_unlock(&trace_types_lock);
@@ -2674,7 +3294,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf,
char buf[64];
int r;
- r = sprintf(buf, "%lu\n", tr->entries);
+ r = sprintf(buf, "%lu\n", tr->entries >> 10);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
@@ -2684,7 +3304,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
{
unsigned long val;
char buf[64];
- int i, ret;
+ int ret, cpu;
if (cnt >= sizeof(buf))
return -EINVAL;
@@ -2704,71 +3324,109 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
mutex_lock(&trace_types_lock);
- if (current_trace != &no_tracer) {
- cnt = -EBUSY;
- pr_info("ftrace: set current_tracer to none"
- " before modifying buffer size\n");
- goto out;
- }
-
- if (val > global_trace.entries) {
- long pages_requested;
- unsigned long freeable_pages;
-
- /* make sure we have enough memory before mapping */
- pages_requested =
- (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
-
- /* account for each buffer (and max_tr) */
- pages_requested *= tracing_nr_buffers * 2;
+ tracing_stop();
- /* Check for overflow */
- if (pages_requested < 0) {
- cnt = -ENOMEM;
- goto out;
- }
+ /* disable all cpu buffers */
+ for_each_tracing_cpu(cpu) {
+ if (global_trace.data[cpu])
+ atomic_inc(&global_trace.data[cpu]->disabled);
+ if (max_tr.data[cpu])
+ atomic_inc(&max_tr.data[cpu]->disabled);
+ }
- freeable_pages = determine_dirtyable_memory();
+ /* value is in KB */
+ val <<= 10;
- /* we only allow to request 1/4 of useable memory */
- if (pages_requested >
- ((freeable_pages + tracing_pages_allocated) / 4)) {
- cnt = -ENOMEM;
+ if (val != global_trace.entries) {
+ ret = ring_buffer_resize(global_trace.buffer, val);
+ if (ret < 0) {
+ cnt = ret;
goto out;
}
- while (global_trace.entries < val) {
- if (trace_alloc_page()) {
- cnt = -ENOMEM;
- goto out;
+ ret = ring_buffer_resize(max_tr.buffer, val);
+ if (ret < 0) {
+ int r;
+ cnt = ret;
+ r = ring_buffer_resize(global_trace.buffer,
+ global_trace.entries);
+ if (r < 0) {
+ /* AARGH! We are left with different
+ * size max buffer!!!! */
+ WARN_ON(1);
+ tracing_disabled = 1;
}
- /* double check that we don't go over the known pages */
- if (tracing_pages_allocated > pages_requested)
- break;
+ goto out;
}
- } else {
- /* include the number of entries in val (inc of page entries) */
- while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
- trace_free_page();
+ global_trace.entries = val;
}
- /* check integrity */
- for_each_tracing_cpu(i)
- check_pages(global_trace.data[i]);
-
filp->f_pos += cnt;
/* If check pages failed, return ENOMEM */
if (tracing_disabled)
cnt = -ENOMEM;
out:
+ for_each_tracing_cpu(cpu) {
+ if (global_trace.data[cpu])
+ atomic_dec(&global_trace.data[cpu]->disabled);
+ if (max_tr.data[cpu])
+ atomic_dec(&max_tr.data[cpu]->disabled);
+ }
+
+ tracing_start();
max_tr.entries = global_trace.entries;
mutex_unlock(&trace_types_lock);
return cnt;
}
+static int mark_printk(const char *fmt, ...)
+{
+ int ret;
+ va_list args;
+ va_start(args, fmt);
+ ret = trace_vprintk(0, -1, fmt, args);
+ va_end(args);
+ return ret;
+}
+
+static ssize_t
+tracing_mark_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *fpos)
+{
+ char *buf;
+ char *end;
+
+ if (tracing_disabled)
+ return -EINVAL;
+
+ if (cnt > TRACE_BUF_SIZE)
+ cnt = TRACE_BUF_SIZE;
+
+ buf = kmalloc(cnt + 1, GFP_KERNEL);
+ if (buf == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(buf, ubuf, cnt)) {
+ kfree(buf);
+ return -EFAULT;
+ }
+
+ /* Cut from the first nil or newline. */
+ buf[cnt] = '\0';
+ end = strchr(buf, '\n');
+ if (end)
+ *end = '\0';
+
+ cnt = mark_printk("%s\n", buf);
+ kfree(buf);
+ *fpos += cnt;
+
+ return cnt;
+}
+
static struct file_operations tracing_max_lat_fops = {
.open = tracing_open_generic,
.read = tracing_max_lat_read,
@@ -2800,24 +3458,45 @@ static struct file_operations tracing_entries_fops = {
.write = tracing_entries_write,
};
+static struct file_operations tracing_mark_fops = {
+ .open = tracing_open_generic,
+ .write = tracing_mark_write,
+};
+
#ifdef CONFIG_DYNAMIC_FTRACE
+int __weak ftrace_arch_read_dyn_info(char *buf, int size)
+{
+ return 0;
+}
+
static ssize_t
-tracing_read_long(struct file *filp, char __user *ubuf,
+tracing_read_dyn_info(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
+ static char ftrace_dyn_info_buffer[1024];
+ static DEFINE_MUTEX(dyn_info_mutex);
unsigned long *p = filp->private_data;
- char buf[64];
+ char *buf = ftrace_dyn_info_buffer;
+ int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
int r;
- r = sprintf(buf, "%ld\n", *p);
+ mutex_lock(&dyn_info_mutex);
+ r = sprintf(buf, "%ld ", *p);
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
+ buf[r++] = '\n';
+
+ r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+
+ mutex_unlock(&dyn_info_mutex);
+
+ return r;
}
-static struct file_operations tracing_read_long_fops = {
+static struct file_operations tracing_dyn_info_fops = {
.open = tracing_open_generic,
- .read = tracing_read_long,
+ .read = tracing_read_dyn_info,
};
#endif
@@ -2846,7 +3525,7 @@ struct dentry *tracing_init_dentry(void)
#include "trace_selftest.c"
#endif
-static __init void tracer_init_debugfs(void)
+static __init int tracer_init_debugfs(void)
{
struct dentry *d_tracer;
struct dentry *entry;
@@ -2858,10 +3537,10 @@ static __init void tracer_init_debugfs(void)
if (!entry)
pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
- entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
+ entry = debugfs_create_file("trace_options", 0644, d_tracer,
NULL, &tracing_iter_fops);
if (!entry)
- pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
+ pr_warning("Could not create debugfs 'trace_options' entry\n");
entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
NULL, &tracing_cpumask_fops);
@@ -2881,12 +3560,12 @@ static __init void tracer_init_debugfs(void)
entry = debugfs_create_file("available_tracers", 0444, d_tracer,
&global_trace, &show_traces_fops);
if (!entry)
- pr_warning("Could not create debugfs 'trace' entry\n");
+ pr_warning("Could not create debugfs 'available_tracers' entry\n");
entry = debugfs_create_file("current_tracer", 0444, d_tracer,
&global_trace, &set_tracer_fops);
if (!entry)
- pr_warning("Could not create debugfs 'trace' entry\n");
+ pr_warning("Could not create debugfs 'current_tracer' entry\n");
entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
&tracing_max_latency,
@@ -2899,7 +3578,7 @@ static __init void tracer_init_debugfs(void)
&tracing_thresh, &tracing_max_lat_fops);
if (!entry)
pr_warning("Could not create debugfs "
- "'tracing_threash' entry\n");
+ "'tracing_thresh' entry\n");
entry = debugfs_create_file("README", 0644, d_tracer,
NULL, &tracing_readme_fops);
if (!entry)
@@ -2909,18 +3588,24 @@ static __init void tracer_init_debugfs(void)
NULL, &tracing_pipe_fops);
if (!entry)
pr_warning("Could not create debugfs "
- "'tracing_threash' entry\n");
+ "'trace_pipe' entry\n");
- entry = debugfs_create_file("trace_entries", 0644, d_tracer,
+ entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
&global_trace, &tracing_entries_fops);
if (!entry)
pr_warning("Could not create debugfs "
- "'tracing_threash' entry\n");
+ "'buffer_size_kb' entry\n");
+
+ entry = debugfs_create_file("trace_marker", 0220, d_tracer,
+ NULL, &tracing_mark_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'trace_marker' entry\n");
#ifdef CONFIG_DYNAMIC_FTRACE
entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
&ftrace_update_tot_cnt,
- &tracing_read_long_fops);
+ &tracing_dyn_info_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'dyn_ftrace_total_info' entry\n");
@@ -2928,230 +3613,268 @@ static __init void tracer_init_debugfs(void)
#ifdef CONFIG_SYSPROF_TRACER
init_tracer_sysprof_debugfs(d_tracer);
#endif
+ return 0;
}
-static int trace_alloc_page(void)
+int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
{
+ static DEFINE_SPINLOCK(trace_buf_lock);
+ static char trace_buf[TRACE_BUF_SIZE];
+
+ struct ring_buffer_event *event;
+ struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
- struct page *page, *tmp;
- LIST_HEAD(pages);
- void *array;
- unsigned pages_allocated = 0;
- int i;
+ int cpu, len = 0, size, pc;
+ struct print_entry *entry;
+ unsigned long irq_flags;
- /* first allocate a page for each CPU */
- for_each_tracing_cpu(i) {
- array = (void *)__get_free_page(GFP_KERNEL);
- if (array == NULL) {
- printk(KERN_ERR "tracer: failed to allocate page"
- "for trace buffer!\n");
- goto free_pages;
- }
+ if (tracing_disabled || tracing_selftest_running)
+ return 0;
- pages_allocated++;
- page = virt_to_page(array);
- list_add(&page->lru, &pages);
+ pc = preempt_count();
+ preempt_disable_notrace();
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
-/* Only allocate if we are actually using the max trace */
-#ifdef CONFIG_TRACER_MAX_TRACE
- array = (void *)__get_free_page(GFP_KERNEL);
- if (array == NULL) {
- printk(KERN_ERR "tracer: failed to allocate page"
- "for trace buffer!\n");
- goto free_pages;
- }
- pages_allocated++;
- page = virt_to_page(array);
- list_add(&page->lru, &pages);
-#endif
- }
+ if (unlikely(atomic_read(&data->disabled)))
+ goto out;
- /* Now that we successfully allocate a page per CPU, add them */
- for_each_tracing_cpu(i) {
- data = global_trace.data[i];
- page = list_entry(pages.next, struct page, lru);
- list_del_init(&page->lru);
- list_add_tail(&page->lru, &data->trace_pages);
- ClearPageLRU(page);
+ pause_graph_tracing();
+ spin_lock_irqsave(&trace_buf_lock, irq_flags);
+ len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
+
+ len = min(len, TRACE_BUF_SIZE-1);
+ trace_buf[len] = 0;
+
+ size = sizeof(*entry) + len + 1;
+ event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
+ if (!event)
+ goto out_unlock;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, irq_flags, pc);
+ entry->ent.type = TRACE_PRINT;
+ entry->ip = ip;
+ entry->depth = depth;
+
+ memcpy(&entry->buf, trace_buf, len);
+ entry->buf[len] = 0;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ out_unlock:
+ spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
+ unpause_graph_tracing();
+ out:
+ preempt_enable_notrace();
-#ifdef CONFIG_TRACER_MAX_TRACE
- data = max_tr.data[i];
- page = list_entry(pages.next, struct page, lru);
- list_del_init(&page->lru);
- list_add_tail(&page->lru, &data->trace_pages);
- SetPageLRU(page);
-#endif
- }
- tracing_pages_allocated += pages_allocated;
- global_trace.entries += ENTRIES_PER_PAGE;
+ return len;
+}
+EXPORT_SYMBOL_GPL(trace_vprintk);
- return 0;
+int __ftrace_printk(unsigned long ip, const char *fmt, ...)
+{
+ int ret;
+ va_list ap;
- free_pages:
- list_for_each_entry_safe(page, tmp, &pages, lru) {
- list_del_init(&page->lru);
- __free_page(page);
+ if (!(trace_flags & TRACE_ITER_PRINTK))
+ return 0;
+
+ va_start(ap, fmt);
+ ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
+ va_end(ap);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(__ftrace_printk);
+
+static int trace_panic_handler(struct notifier_block *this,
+ unsigned long event, void *unused)
+{
+ if (ftrace_dump_on_oops)
+ ftrace_dump();
+ return NOTIFY_OK;
+}
+
+static struct notifier_block trace_panic_notifier = {
+ .notifier_call = trace_panic_handler,
+ .next = NULL,
+ .priority = 150 /* priority: INT_MAX >= x >= 0 */
+};
+
+static int trace_die_handler(struct notifier_block *self,
+ unsigned long val,
+ void *data)
+{
+ switch (val) {
+ case DIE_OOPS:
+ if (ftrace_dump_on_oops)
+ ftrace_dump();
+ break;
+ default:
+ break;
}
- return -ENOMEM;
+ return NOTIFY_OK;
+}
+
+static struct notifier_block trace_die_notifier = {
+ .notifier_call = trace_die_handler,
+ .priority = 200
+};
+
+/*
+ * printk is set to max of 1024, we really don't need it that big.
+ * Nothing should be printing 1000 characters anyway.
+ */
+#define TRACE_MAX_PRINT 1000
+
+/*
+ * Define here KERN_TRACE so that we have one place to modify
+ * it if we decide to change what log level the ftrace dump
+ * should be at.
+ */
+#define KERN_TRACE KERN_INFO
+
+static void
+trace_printk_seq(struct trace_seq *s)
+{
+ /* Probably should print a warning here. */
+ if (s->len >= 1000)
+ s->len = 1000;
+
+ /* should be zero ended, but we are paranoid. */
+ s->buffer[s->len] = 0;
+
+ printk(KERN_TRACE "%s", s->buffer);
+
+ trace_seq_reset(s);
}
-static int trace_free_page(void)
+void ftrace_dump(void)
{
- struct trace_array_cpu *data;
- struct page *page;
- struct list_head *p;
- int i;
- int ret = 0;
+ static DEFINE_SPINLOCK(ftrace_dump_lock);
+ /* use static because iter can be a bit big for the stack */
+ static struct trace_iterator iter;
+ static cpumask_t mask;
+ static int dump_ran;
+ unsigned long flags;
+ int cnt = 0, cpu;
- /* free one page from each buffer */
- for_each_tracing_cpu(i) {
- data = global_trace.data[i];
- p = data->trace_pages.next;
- if (p == &data->trace_pages) {
- /* should never happen */
- WARN_ON(1);
- tracing_disabled = 1;
- ret = -1;
- break;
- }
- page = list_entry(p, struct page, lru);
- ClearPageLRU(page);
- list_del(&page->lru);
- tracing_pages_allocated--;
- tracing_pages_allocated--;
- __free_page(page);
+ /* only one dump */
+ spin_lock_irqsave(&ftrace_dump_lock, flags);
+ if (dump_ran)
+ goto out;
- tracing_reset(data);
+ dump_ran = 1;
-#ifdef CONFIG_TRACER_MAX_TRACE
- data = max_tr.data[i];
- p = data->trace_pages.next;
- if (p == &data->trace_pages) {
- /* should never happen */
- WARN_ON(1);
- tracing_disabled = 1;
- ret = -1;
- break;
+ /* No turning back! */
+ ftrace_kill();
+
+ for_each_tracing_cpu(cpu) {
+ atomic_inc(&global_trace.data[cpu]->disabled);
+ }
+
+ /* don't look at user memory in panic mode */
+ trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
+
+ printk(KERN_TRACE "Dumping ftrace buffer:\n");
+
+ iter.tr = &global_trace;
+ iter.trace = current_trace;
+
+ /*
+ * We need to stop all tracing on all CPUS to read the
+ * the next buffer. This is a bit expensive, but is
+ * not done often. We fill all what we can read,
+ * and then release the locks again.
+ */
+
+ cpus_clear(mask);
+
+ while (!trace_empty(&iter)) {
+
+ if (!cnt)
+ printk(KERN_TRACE "---------------------------------\n");
+
+ cnt++;
+
+ /* reset all but tr, trace, and overruns */
+ memset(&iter.seq, 0,
+ sizeof(struct trace_iterator) -
+ offsetof(struct trace_iterator, seq));
+ iter.iter_flags |= TRACE_FILE_LAT_FMT;
+ iter.pos = -1;
+
+ if (find_next_entry_inc(&iter) != NULL) {
+ print_trace_line(&iter);
+ trace_consume(&iter);
}
- page = list_entry(p, struct page, lru);
- ClearPageLRU(page);
- list_del(&page->lru);
- __free_page(page);
- tracing_reset(data);
-#endif
+ trace_printk_seq(&iter.seq);
}
- global_trace.entries -= ENTRIES_PER_PAGE;
- return ret;
+ if (!cnt)
+ printk(KERN_TRACE " (ftrace buffer empty)\n");
+ else
+ printk(KERN_TRACE "---------------------------------\n");
+
+ out:
+ spin_unlock_irqrestore(&ftrace_dump_lock, flags);
}
__init static int tracer_alloc_buffers(void)
{
struct trace_array_cpu *data;
- void *array;
- struct page *page;
- int pages = 0;
- int ret = -ENOMEM;
int i;
/* TODO: make the number of buffers hot pluggable with CPUS */
- tracing_nr_buffers = num_possible_cpus();
tracing_buffer_mask = cpu_possible_map;
- /* Allocate the first page for all buffers */
- for_each_tracing_cpu(i) {
- data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
- max_tr.data[i] = &per_cpu(max_data, i);
-
- array = (void *)__get_free_page(GFP_KERNEL);
- if (array == NULL) {
- printk(KERN_ERR "tracer: failed to allocate page"
- "for trace buffer!\n");
- goto free_buffers;
- }
-
- /* set the array to the list */
- INIT_LIST_HEAD(&data->trace_pages);
- page = virt_to_page(array);
- list_add(&page->lru, &data->trace_pages);
- /* use the LRU flag to differentiate the two buffers */
- ClearPageLRU(page);
-
- data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
- max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+ global_trace.buffer = ring_buffer_alloc(trace_buf_size,
+ TRACE_BUFFER_FLAGS);
+ if (!global_trace.buffer) {
+ printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
+ WARN_ON(1);
+ return 0;
+ }
+ global_trace.entries = ring_buffer_size(global_trace.buffer);
-/* Only allocate if we are actually using the max trace */
#ifdef CONFIG_TRACER_MAX_TRACE
- array = (void *)__get_free_page(GFP_KERNEL);
- if (array == NULL) {
- printk(KERN_ERR "tracer: failed to allocate page"
- "for trace buffer!\n");
- goto free_buffers;
- }
-
- INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
- page = virt_to_page(array);
- list_add(&page->lru, &max_tr.data[i]->trace_pages);
- SetPageLRU(page);
-#endif
+ max_tr.buffer = ring_buffer_alloc(trace_buf_size,
+ TRACE_BUFFER_FLAGS);
+ if (!max_tr.buffer) {
+ printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
+ WARN_ON(1);
+ ring_buffer_free(global_trace.buffer);
+ return 0;
}
+ max_tr.entries = ring_buffer_size(max_tr.buffer);
+ WARN_ON(max_tr.entries != global_trace.entries);
+#endif
- /*
- * Since we allocate by orders of pages, we may be able to
- * round up a bit.
- */
- global_trace.entries = ENTRIES_PER_PAGE;
- pages++;
-
- while (global_trace.entries < trace_nr_entries) {
- if (trace_alloc_page())
- break;
- pages++;
+ /* Allocate the first page for all buffers */
+ for_each_tracing_cpu(i) {
+ data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
+ max_tr.data[i] = &per_cpu(max_data, i);
}
- max_tr.entries = global_trace.entries;
-
- pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n",
- pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE);
- pr_info(" actual entries %ld\n", global_trace.entries);
-
- tracer_init_debugfs();
trace_init_cmdlines();
- register_tracer(&no_tracer);
- current_trace = &no_tracer;
+ register_tracer(&nop_trace);
+#ifdef CONFIG_BOOT_TRACER
+ register_tracer(&boot_tracer);
+ current_trace = &boot_tracer;
+ current_trace->init(&global_trace);
+#else
+ current_trace = &nop_trace;
+#endif
/* All seems OK, enable tracing */
- global_trace.ctrl = tracer_enabled;
tracing_disabled = 0;
- return 0;
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &trace_panic_notifier);
- free_buffers:
- for (i-- ; i >= 0; i--) {
- struct page *page, *tmp;
- struct trace_array_cpu *data = global_trace.data[i];
-
- if (data) {
- list_for_each_entry_safe(page, tmp,
- &data->trace_pages, lru) {
- list_del_init(&page->lru);
- __free_page(page);
- }
- }
+ register_die_notifier(&trace_die_notifier);
-#ifdef CONFIG_TRACER_MAX_TRACE
- data = max_tr.data[i];
- if (data) {
- list_for_each_entry_safe(page, tmp,
- &data->trace_pages, lru) {
- list_del_init(&page->lru);
- __free_page(page);
- }
- }
-#endif
- }
- return ret;
+ return 0;
}
-fs_initcall(tracer_alloc_buffers);
+early_initcall(tracer_alloc_buffers);
+fs_initcall(tracer_init_debugfs);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f69f867..cc7a4f8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -5,7 +5,10 @@
#include <asm/atomic.h>
#include <linux/sched.h>
#include <linux/clocksource.h>
+#include <linux/ring_buffer.h>
#include <linux/mmiotrace.h>
+#include <linux/ftrace.h>
+#include <trace/boot.h>
enum trace_type {
__TRACE_FIRST_TYPE = 0,
@@ -13,38 +16,80 @@ enum trace_type {
TRACE_FN,
TRACE_CTX,
TRACE_WAKE,
+ TRACE_CONT,
TRACE_STACK,
+ TRACE_PRINT,
TRACE_SPECIAL,
TRACE_MMIO_RW,
TRACE_MMIO_MAP,
+ TRACE_BRANCH,
+ TRACE_BOOT_CALL,
+ TRACE_BOOT_RET,
+ TRACE_GRAPH_RET,
+ TRACE_GRAPH_ENT,
+ TRACE_USER_STACK,
+ TRACE_HW_BRANCHES,
+ TRACE_POWER,
__TRACE_LAST_TYPE
};
/*
+ * The trace entry - the most basic unit of tracing. This is what
+ * is printed in the end as a single line in the trace output, such as:
+ *
+ * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
+ */
+struct trace_entry {
+ unsigned char type;
+ unsigned char cpu;
+ unsigned char flags;
+ unsigned char preempt_count;
+ int pid;
+ int tgid;
+};
+
+/*
* Function trace entry - function address and parent function addres:
*/
struct ftrace_entry {
+ struct trace_entry ent;
unsigned long ip;
unsigned long parent_ip;
};
+/* Function call entry */
+struct ftrace_graph_ent_entry {
+ struct trace_entry ent;
+ struct ftrace_graph_ent graph_ent;
+};
+
+/* Function return entry */
+struct ftrace_graph_ret_entry {
+ struct trace_entry ent;
+ struct ftrace_graph_ret ret;
+};
+extern struct tracer boot_tracer;
+
/*
* Context switch trace entry - which task (and prio) we switched from/to:
*/
struct ctx_switch_entry {
+ struct trace_entry ent;
unsigned int prev_pid;
unsigned char prev_prio;
unsigned char prev_state;
unsigned int next_pid;
unsigned char next_prio;
unsigned char next_state;
+ unsigned int next_cpu;
};
/*
* Special (free-form) trace entry:
*/
struct special_entry {
+ struct trace_entry ent;
unsigned long arg1;
unsigned long arg2;
unsigned long arg3;
@@ -57,33 +102,94 @@ struct special_entry {
#define FTRACE_STACK_ENTRIES 8
struct stack_entry {
+ struct trace_entry ent;
+ unsigned long caller[FTRACE_STACK_ENTRIES];
+};
+
+struct userstack_entry {
+ struct trace_entry ent;
unsigned long caller[FTRACE_STACK_ENTRIES];
};
/*
- * The trace entry - the most basic unit of tracing. This is what
- * is printed in the end as a single line in the trace output, such as:
- *
- * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
+ * ftrace_printk entry:
*/
-struct trace_entry {
- char type;
- char cpu;
- char flags;
- char preempt_count;
- int pid;
- cycle_t t;
- union {
- struct ftrace_entry fn;
- struct ctx_switch_entry ctx;
- struct special_entry special;
- struct stack_entry stack;
- struct mmiotrace_rw mmiorw;
- struct mmiotrace_map mmiomap;
- };
+struct print_entry {
+ struct trace_entry ent;
+ unsigned long ip;
+ int depth;
+ char buf[];
+};
+
+#define TRACE_OLD_SIZE 88
+
+struct trace_field_cont {
+ unsigned char type;
+ /* Temporary till we get rid of this completely */
+ char buf[TRACE_OLD_SIZE - 1];
};
-#define TRACE_ENTRY_SIZE sizeof(struct trace_entry)
+struct trace_mmiotrace_rw {
+ struct trace_entry ent;
+ struct mmiotrace_rw rw;
+};
+
+struct trace_mmiotrace_map {
+ struct trace_entry ent;
+ struct mmiotrace_map map;
+};
+
+struct trace_boot_call {
+ struct trace_entry ent;
+ struct boot_trace_call boot_call;
+};
+
+struct trace_boot_ret {
+ struct trace_entry ent;
+ struct boot_trace_ret boot_ret;
+};
+
+#define TRACE_FUNC_SIZE 30
+#define TRACE_FILE_SIZE 20
+struct trace_branch {
+ struct trace_entry ent;
+ unsigned line;
+ char func[TRACE_FUNC_SIZE+1];
+ char file[TRACE_FILE_SIZE+1];
+ char correct;
+};
+
+struct hw_branch_entry {
+ struct trace_entry ent;
+ u64 from;
+ u64 to;
+};
+
+struct trace_power {
+ struct trace_entry ent;
+ struct power_trace state_data;
+};
+
+/*
+ * trace_flag_type is an enumeration that holds different
+ * states when a trace occurs. These are:
+ * IRQS_OFF - interrupts were disabled
+ * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
+ * NEED_RESCED - reschedule is requested
+ * HARDIRQ - inside an interrupt handler
+ * SOFTIRQ - inside a softirq handler
+ * CONT - multiple entries hold the trace item
+ */
+enum trace_flag_type {
+ TRACE_FLAG_IRQS_OFF = 0x01,
+ TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
+ TRACE_FLAG_NEED_RESCHED = 0x04,
+ TRACE_FLAG_HARDIRQ = 0x08,
+ TRACE_FLAG_SOFTIRQ = 0x10,
+ TRACE_FLAG_CONT = 0x20,
+};
+
+#define TRACE_BUF_SIZE 1024
/*
* The CPU trace array - it consists of thousands of trace entries
@@ -91,16 +197,9 @@ struct trace_entry {
* the trace, etc.)
*/
struct trace_array_cpu {
- struct list_head trace_pages;
atomic_t disabled;
- raw_spinlock_t lock;
- struct lock_class_key lock_key;
/* these fields get copied into max-trace: */
- unsigned trace_head_idx;
- unsigned trace_tail_idx;
- void *trace_head; /* producer */
- void *trace_tail; /* consumer */
unsigned long trace_idx;
unsigned long overrun;
unsigned long saved_latency;
@@ -124,37 +223,123 @@ struct trace_iterator;
* They have on/off state as well:
*/
struct trace_array {
+ struct ring_buffer *buffer;
unsigned long entries;
- long ctrl;
int cpu;
cycle_t time_start;
struct task_struct *waiter;
struct trace_array_cpu *data[NR_CPUS];
};
+#define FTRACE_CMP_TYPE(var, type) \
+ __builtin_types_compatible_p(typeof(var), type *)
+
+#undef IF_ASSIGN
+#define IF_ASSIGN(var, entry, etype, id) \
+ if (FTRACE_CMP_TYPE(var, etype)) { \
+ var = (typeof(var))(entry); \
+ WARN_ON(id && (entry)->type != id); \
+ break; \
+ }
+
+/* Will cause compile errors if type is not found. */
+extern void __ftrace_bad_type(void);
+
+/*
+ * The trace_assign_type is a verifier that the entry type is
+ * the same as the type being assigned. To add new types simply
+ * add a line with the following format:
+ *
+ * IF_ASSIGN(var, ent, type, id);
+ *
+ * Where "type" is the trace type that includes the trace_entry
+ * as the "ent" item. And "id" is the trace identifier that is
+ * used in the trace_type enum.
+ *
+ * If the type can have more than one id, then use zero.
+ */
+#define trace_assign_type(var, ent) \
+ do { \
+ IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \
+ IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
+ IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
+ IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
+ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
+ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
+ IF_ASSIGN(var, ent, struct special_entry, 0); \
+ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
+ TRACE_MMIO_RW); \
+ IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
+ TRACE_MMIO_MAP); \
+ IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
+ IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
+ IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
+ IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \
+ TRACE_GRAPH_ENT); \
+ IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
+ TRACE_GRAPH_RET); \
+ IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
+ IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+ __ftrace_bad_type(); \
+ } while (0)
+
+/* Return values for print_line callback */
+enum print_line_t {
+ TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
+ TRACE_TYPE_HANDLED = 1,
+ TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */
+};
+
+
+/*
+ * An option specific to a tracer. This is a boolean value.
+ * The bit is the bit index that sets its value on the
+ * flags value in struct tracer_flags.
+ */
+struct tracer_opt {
+ const char *name; /* Will appear on the trace_options file */
+ u32 bit; /* Mask assigned in val field in tracer_flags */
+};
+
+/*
+ * The set of specific options for a tracer. Your tracer
+ * have to set the initial value of the flags val.
+ */
+struct tracer_flags {
+ u32 val;
+ struct tracer_opt *opts;
+};
+
+/* Makes more easy to define a tracer opt */
+#define TRACER_OPT(s, b) .name = #s, .bit = b
+
/*
* A specific tracer, represented by methods that operate on a trace array:
*/
struct tracer {
const char *name;
- void (*init)(struct trace_array *tr);
+ /* Your tracer should raise a warning if init fails */
+ int (*init)(struct trace_array *tr);
void (*reset)(struct trace_array *tr);
+ void (*start)(struct trace_array *tr);
+ void (*stop)(struct trace_array *tr);
void (*open)(struct trace_iterator *iter);
void (*pipe_open)(struct trace_iterator *iter);
void (*close)(struct trace_iterator *iter);
- void (*start)(struct trace_iterator *iter);
- void (*stop)(struct trace_iterator *iter);
ssize_t (*read)(struct trace_iterator *iter,
struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos);
- void (*ctrl_update)(struct trace_array *tr);
#ifdef CONFIG_FTRACE_STARTUP_TEST
int (*selftest)(struct tracer *trace,
struct trace_array *tr);
#endif
- int (*print_line)(struct trace_iterator *iter);
+ void (*print_header)(struct seq_file *m);
+ enum print_line_t (*print_line)(struct trace_iterator *iter);
+ /* If you handled the flag setting, return 0 */
+ int (*set_flag)(u32 old_flags, u32 bit, int set);
struct tracer *next;
int print_max;
+ struct tracer_flags *flags;
};
struct trace_seq {
@@ -171,60 +356,72 @@ struct trace_iterator {
struct trace_array *tr;
struct tracer *trace;
void *private;
- long last_overrun[NR_CPUS];
- long overrun[NR_CPUS];
+ struct ring_buffer_iter *buffer_iter[NR_CPUS];
/* The below is zeroed out in pipe_read */
struct trace_seq seq;
struct trace_entry *ent;
int cpu;
-
- struct trace_entry *prev_ent;
- int prev_cpu;
+ u64 ts;
unsigned long iter_flags;
loff_t pos;
- unsigned long next_idx[NR_CPUS];
- struct list_head *next_page[NR_CPUS];
- unsigned next_page_idx[NR_CPUS];
long idx;
+
+ cpumask_t started;
};
-void tracing_reset(struct trace_array_cpu *data);
+int tracing_is_enabled(void);
+void trace_wake_up(void);
+void tracing_reset(struct trace_array *tr, int cpu);
+void tracing_reset_online_cpus(struct trace_array *tr);
int tracing_open_generic(struct inode *inode, struct file *filp);
struct dentry *tracing_init_dentry(void);
void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
+struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
+ struct trace_array_cpu *data);
+void tracing_generic_entry_update(struct trace_entry *entry,
+ unsigned long flags,
+ int pc);
+
void ftrace(struct trace_array *tr,
struct trace_array_cpu *data,
unsigned long ip,
unsigned long parent_ip,
- unsigned long flags);
+ unsigned long flags, int pc);
void tracing_sched_switch_trace(struct trace_array *tr,
struct trace_array_cpu *data,
struct task_struct *prev,
struct task_struct *next,
- unsigned long flags);
+ unsigned long flags, int pc);
void tracing_record_cmdline(struct task_struct *tsk);
void tracing_sched_wakeup_trace(struct trace_array *tr,
struct trace_array_cpu *data,
struct task_struct *wakee,
struct task_struct *cur,
- unsigned long flags);
+ unsigned long flags, int pc);
void trace_special(struct trace_array *tr,
struct trace_array_cpu *data,
unsigned long arg1,
unsigned long arg2,
- unsigned long arg3);
+ unsigned long arg3, int pc);
void trace_function(struct trace_array *tr,
struct trace_array_cpu *data,
unsigned long ip,
unsigned long parent_ip,
- unsigned long flags);
+ unsigned long flags, int pc);
+
+void trace_graph_return(struct ftrace_graph_ret *trace);
+int trace_graph_entry(struct ftrace_graph_ent *trace);
+void trace_hw_branch(struct trace_array *tr, u64 from, u64 to);
void tracing_start_cmdline_record(void);
void tracing_stop_cmdline_record(void);
+void tracing_sched_switch_assign_trace(struct trace_array *tr);
+void tracing_stop_sched_switch_record(void);
+void tracing_start_sched_switch_record(void);
int register_tracer(struct tracer *type);
void unregister_tracer(struct tracer *type);
@@ -239,7 +436,7 @@ void update_max_tr_single(struct trace_array *tr,
extern cycle_t ftrace_now(int cpu);
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
void tracing_start_function_trace(void);
void tracing_stop_function_trace(void);
#else
@@ -260,6 +457,7 @@ struct tracer_switch_ops {
struct tracer_switch_ops *next;
};
+char *trace_find_cmdline(int pid);
#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -268,54 +466,96 @@ extern unsigned long ftrace_update_tot_cnt;
extern int DYN_FTRACE_TEST_NAME(void);
#endif
-#ifdef CONFIG_MMIOTRACE
-extern void __trace_mmiotrace_rw(struct trace_array *tr,
- struct trace_array_cpu *data,
- struct mmiotrace_rw *rw);
-extern void __trace_mmiotrace_map(struct trace_array *tr,
- struct trace_array_cpu *data,
- struct mmiotrace_map *map);
-#endif
-
#ifdef CONFIG_FTRACE_STARTUP_TEST
-#ifdef CONFIG_FTRACE
extern int trace_selftest_startup_function(struct tracer *trace,
struct trace_array *tr);
-#endif
-#ifdef CONFIG_IRQSOFF_TRACER
extern int trace_selftest_startup_irqsoff(struct tracer *trace,
struct trace_array *tr);
-#endif
-#ifdef CONFIG_PREEMPT_TRACER
extern int trace_selftest_startup_preemptoff(struct tracer *trace,
struct trace_array *tr);
-#endif
-#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
struct trace_array *tr);
-#endif
-#ifdef CONFIG_SCHED_TRACER
extern int trace_selftest_startup_wakeup(struct tracer *trace,
struct trace_array *tr);
-#endif
-#ifdef CONFIG_CONTEXT_SWITCH_TRACER
+extern int trace_selftest_startup_nop(struct tracer *trace,
+ struct trace_array *tr);
extern int trace_selftest_startup_sched_switch(struct tracer *trace,
struct trace_array *tr);
-#endif
-#ifdef CONFIG_SYSPROF_TRACER
extern int trace_selftest_startup_sysprof(struct tracer *trace,
struct trace_array *tr);
-#endif
+extern int trace_selftest_startup_branch(struct tracer *trace,
+ struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
+extern void trace_seq_print_cont(struct trace_seq *s,
+ struct trace_iterator *iter);
+
+extern int
+seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
+ unsigned long sym_flags);
extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
size_t cnt);
extern long ns2usecs(cycle_t nsec);
+extern int
+trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
extern unsigned long trace_flags;
+/* Standard output formatting function used for function return traces */
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+extern enum print_line_t print_graph_function(struct trace_iterator *iter);
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/* TODO: make this variable */
+#define FTRACE_GRAPH_MAX_FUNCS 32
+extern int ftrace_graph_count;
+extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
+
+static inline int ftrace_graph_addr(unsigned long addr)
+{
+ int i;
+
+ if (!ftrace_graph_count || test_tsk_trace_graph(current))
+ return 1;
+
+ for (i = 0; i < ftrace_graph_count; i++) {
+ if (addr == ftrace_graph_funcs[i])
+ return 1;
+ }
+
+ return 0;
+}
+#else
+static inline int ftrace_trace_addr(unsigned long addr)
+{
+ return 1;
+}
+static inline int ftrace_graph_addr(unsigned long addr)
+{
+ return 1;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#else /* CONFIG_FUNCTION_GRAPH_TRACER */
+static inline enum print_line_t
+print_graph_function(struct trace_iterator *iter)
+{
+ return TRACE_TYPE_UNHANDLED;
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+extern struct pid *ftrace_pid_trace;
+
+static inline int ftrace_trace_task(struct task_struct *task)
+{
+ if (!ftrace_pid_trace)
+ return 1;
+
+ return test_tsk_trace_trace(task);
+}
+
/*
* trace_iterator_flags is an enumeration that defines bit
* positions into trace_flags that controls the output.
@@ -334,6 +574,94 @@ enum trace_iterator_flags {
TRACE_ITER_BLOCK = 0x80,
TRACE_ITER_STACKTRACE = 0x100,
TRACE_ITER_SCHED_TREE = 0x200,
+ TRACE_ITER_PRINTK = 0x400,
+ TRACE_ITER_PREEMPTONLY = 0x800,
+ TRACE_ITER_BRANCH = 0x1000,
+ TRACE_ITER_ANNOTATE = 0x2000,
+ TRACE_ITER_USERSTACKTRACE = 0x4000,
+ TRACE_ITER_SYM_USEROBJ = 0x8000,
+ TRACE_ITER_PRINTK_MSGONLY = 0x10000
};
+/*
+ * TRACE_ITER_SYM_MASK masks the options in trace_flags that
+ * control the output of kernel symbols.
+ */
+#define TRACE_ITER_SYM_MASK \
+ (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
+
+extern struct tracer nop_trace;
+
+/**
+ * ftrace_preempt_disable - disable preemption scheduler safe
+ *
+ * When tracing can happen inside the scheduler, there exists
+ * cases that the tracing might happen before the need_resched
+ * flag is checked. If this happens and the tracer calls
+ * preempt_enable (after a disable), a schedule might take place
+ * causing an infinite recursion.
+ *
+ * To prevent this, we read the need_recshed flag before
+ * disabling preemption. When we want to enable preemption we
+ * check the flag, if it is set, then we call preempt_enable_no_resched.
+ * Otherwise, we call preempt_enable.
+ *
+ * The rational for doing the above is that if need resched is set
+ * and we have yet to reschedule, we are either in an atomic location
+ * (where we do not need to check for scheduling) or we are inside
+ * the scheduler and do not want to resched.
+ */
+static inline int ftrace_preempt_disable(void)
+{
+ int resched;
+
+ resched = need_resched();
+ preempt_disable_notrace();
+
+ return resched;
+}
+
+/**
+ * ftrace_preempt_enable - enable preemption scheduler safe
+ * @resched: the return value from ftrace_preempt_disable
+ *
+ * This is a scheduler safe way to enable preemption and not miss
+ * any preemption checks. The disabled saved the state of preemption.
+ * If resched is set, then we were either inside an atomic or
+ * are inside the scheduler (we would have already scheduled
+ * otherwise). In this case, we do not want to call normal
+ * preempt_enable, but preempt_enable_no_resched instead.
+ */
+static inline void ftrace_preempt_enable(int resched)
+{
+ if (resched)
+ preempt_enable_no_resched_notrace();
+ else
+ preempt_enable_notrace();
+}
+
+#ifdef CONFIG_BRANCH_TRACER
+extern int enable_branch_tracing(struct trace_array *tr);
+extern void disable_branch_tracing(void);
+static inline int trace_branch_enable(struct trace_array *tr)
+{
+ if (trace_flags & TRACE_ITER_BRANCH)
+ return enable_branch_tracing(tr);
+ return 0;
+}
+static inline void trace_branch_disable(void)
+{
+ /* due to races, always disable */
+ disable_branch_tracing();
+}
+#else
+static inline int trace_branch_enable(struct trace_array *tr)
+{
+ return 0;
+}
+static inline void trace_branch_disable(void)
+{
+}
+#endif /* CONFIG_BRANCH_TRACER */
+
#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
new file mode 100644
index 0000000..3ccebde
--- /dev/null
+++ b/kernel/trace/trace_boot.c
@@ -0,0 +1,186 @@
+/*
+ * ring buffer based initcalls tracer
+ *
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/kallsyms.h>
+
+#include "trace.h"
+
+static struct trace_array *boot_trace;
+static bool pre_initcalls_finished;
+
+/* Tells the boot tracer that the pre_smp_initcalls are finished.
+ * So we are ready .
+ * It doesn't enable sched events tracing however.
+ * You have to call enable_boot_trace to do so.
+ */
+void start_boot_trace(void)
+{
+ pre_initcalls_finished = true;
+}
+
+void enable_boot_trace(void)
+{
+ if (pre_initcalls_finished)
+ tracing_start_sched_switch_record();
+}
+
+void disable_boot_trace(void)
+{
+ if (pre_initcalls_finished)
+ tracing_stop_sched_switch_record();
+}
+
+static int boot_trace_init(struct trace_array *tr)
+{
+ int cpu;
+ boot_trace = tr;
+
+ for_each_cpu_mask(cpu, cpu_possible_map)
+ tracing_reset(tr, cpu);
+
+ tracing_sched_switch_assign_trace(tr);
+ return 0;
+}
+
+static enum print_line_t
+initcall_call_print_line(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_seq *s = &iter->seq;
+ struct trace_boot_call *field;
+ struct boot_trace_call *call;
+ u64 ts;
+ unsigned long nsec_rem;
+ int ret;
+
+ trace_assign_type(field, entry);
+ call = &field->boot_call;
+ ts = iter->ts;
+ nsec_rem = do_div(ts, 1000000000);
+
+ ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
+ (unsigned long)ts, nsec_rem, call->func, call->caller);
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ else
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+initcall_ret_print_line(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_seq *s = &iter->seq;
+ struct trace_boot_ret *field;
+ struct boot_trace_ret *init_ret;
+ u64 ts;
+ unsigned long nsec_rem;
+ int ret;
+
+ trace_assign_type(field, entry);
+ init_ret = &field->boot_ret;
+ ts = iter->ts;
+ nsec_rem = do_div(ts, 1000000000);
+
+ ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
+ "returned %d after %llu msecs\n",
+ (unsigned long) ts,
+ nsec_rem,
+ init_ret->func, init_ret->result, init_ret->duration);
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ else
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t initcall_print_line(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+
+ switch (entry->type) {
+ case TRACE_BOOT_CALL:
+ return initcall_call_print_line(iter);
+ case TRACE_BOOT_RET:
+ return initcall_ret_print_line(iter);
+ default:
+ return TRACE_TYPE_UNHANDLED;
+ }
+}
+
+struct tracer boot_tracer __read_mostly =
+{
+ .name = "initcall",
+ .init = boot_trace_init,
+ .reset = tracing_reset_online_cpus,
+ .print_line = initcall_print_line,
+};
+
+void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
+{
+ struct ring_buffer_event *event;
+ struct trace_boot_call *entry;
+ unsigned long irq_flags;
+ struct trace_array *tr = boot_trace;
+
+ if (!pre_initcalls_finished)
+ return;
+
+ /* Get its name now since this function could
+ * disappear because it is in the .init section.
+ */
+ sprint_symbol(bt->func, (unsigned long)fn);
+ preempt_disable();
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ goto out;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, 0);
+ entry->ent.type = TRACE_BOOT_CALL;
+ entry->boot_call = *bt;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ trace_wake_up();
+
+ out:
+ preempt_enable();
+}
+
+void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
+{
+ struct ring_buffer_event *event;
+ struct trace_boot_ret *entry;
+ unsigned long irq_flags;
+ struct trace_array *tr = boot_trace;
+
+ if (!pre_initcalls_finished)
+ return;
+
+ sprint_symbol(bt->func, (unsigned long)fn);
+ preempt_disable();
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ goto out;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, 0);
+ entry->ent.type = TRACE_BOOT_RET;
+ entry->boot_ret = *bt;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ trace_wake_up();
+
+ out:
+ preempt_enable();
+}
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
new file mode 100644
index 0000000..6c00feb
--- /dev/null
+++ b/kernel/trace/trace_branch.c
@@ -0,0 +1,342 @@
+/*
+ * unlikely profiler
+ *
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ */
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/irqflags.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/hash.h>
+#include <linux/fs.h>
+#include <asm/local.h>
+#include "trace.h"
+
+#ifdef CONFIG_BRANCH_TRACER
+
+static int branch_tracing_enabled __read_mostly;
+static DEFINE_MUTEX(branch_tracing_mutex);
+static struct trace_array *branch_tracer;
+
+static void
+probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
+{
+ struct trace_array *tr = branch_tracer;
+ struct ring_buffer_event *event;
+ struct trace_branch *entry;
+ unsigned long flags, irq_flags;
+ int cpu, pc;
+ const char *p;
+
+ /*
+ * I would love to save just the ftrace_likely_data pointer, but
+ * this code can also be used by modules. Ugly things can happen
+ * if the module is unloaded, and then we go and read the
+ * pointer. This is slower, but much safer.
+ */
+
+ if (unlikely(!tr))
+ return;
+
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
+ goto out;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ goto out;
+
+ pc = preempt_count();
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_BRANCH;
+
+ /* Strip off the path, only save the file */
+ p = f->file + strlen(f->file);
+ while (p >= f->file && *p != '/')
+ p--;
+ p++;
+
+ strncpy(entry->func, f->func, TRACE_FUNC_SIZE);
+ strncpy(entry->file, p, TRACE_FILE_SIZE);
+ entry->func[TRACE_FUNC_SIZE] = 0;
+ entry->file[TRACE_FILE_SIZE] = 0;
+ entry->line = f->line;
+ entry->correct = val == expect;
+
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ out:
+ atomic_dec(&tr->data[cpu]->disabled);
+ local_irq_restore(flags);
+}
+
+static inline
+void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
+{
+ if (!branch_tracing_enabled)
+ return;
+
+ probe_likely_condition(f, val, expect);
+}
+
+int enable_branch_tracing(struct trace_array *tr)
+{
+ int ret = 0;
+
+ mutex_lock(&branch_tracing_mutex);
+ branch_tracer = tr;
+ /*
+ * Must be seen before enabling. The reader is a condition
+ * where we do not need a matching rmb()
+ */
+ smp_wmb();
+ branch_tracing_enabled++;
+ mutex_unlock(&branch_tracing_mutex);
+
+ return ret;
+}
+
+void disable_branch_tracing(void)
+{
+ mutex_lock(&branch_tracing_mutex);
+
+ if (!branch_tracing_enabled)
+ goto out_unlock;
+
+ branch_tracing_enabled--;
+
+ out_unlock:
+ mutex_unlock(&branch_tracing_mutex);
+}
+
+static void start_branch_trace(struct trace_array *tr)
+{
+ enable_branch_tracing(tr);
+}
+
+static void stop_branch_trace(struct trace_array *tr)
+{
+ disable_branch_tracing();
+}
+
+static int branch_trace_init(struct trace_array *tr)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+
+ start_branch_trace(tr);
+ return 0;
+}
+
+static void branch_trace_reset(struct trace_array *tr)
+{
+ stop_branch_trace(tr);
+}
+
+struct tracer branch_trace __read_mostly =
+{
+ .name = "branch",
+ .init = branch_trace_init,
+ .reset = branch_trace_reset,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_branch,
+#endif
+};
+
+__init static int init_branch_trace(void)
+{
+ return register_tracer(&branch_trace);
+}
+
+device_initcall(init_branch_trace);
+#else
+static inline
+void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
+{
+}
+#endif /* CONFIG_BRANCH_TRACER */
+
+void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
+{
+ /*
+ * I would love to have a trace point here instead, but the
+ * trace point code is so inundated with unlikely and likely
+ * conditions that the recursive nightmare that exists is too
+ * much to try to get working. At least for now.
+ */
+ trace_likely_condition(f, val, expect);
+
+ /* FIXME: Make this atomic! */
+ if (val == expect)
+ f->correct++;
+ else
+ f->incorrect++;
+}
+EXPORT_SYMBOL(ftrace_likely_update);
+
+struct ftrace_pointer {
+ void *start;
+ void *stop;
+ int hit;
+};
+
+static void *
+t_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ const struct ftrace_pointer *f = m->private;
+ struct ftrace_branch_data *p = v;
+
+ (*pos)++;
+
+ if (v == (void *)1)
+ return f->start;
+
+ ++p;
+
+ if ((void *)p >= (void *)f->stop)
+ return NULL;
+
+ return p;
+}
+
+static void *t_start(struct seq_file *m, loff_t *pos)
+{
+ void *t = (void *)1;
+ loff_t l = 0;
+
+ for (; t && l < *pos; t = t_next(m, t, &l))
+ ;
+
+ return t;
+}
+
+static void t_stop(struct seq_file *m, void *p)
+{
+}
+
+static int t_show(struct seq_file *m, void *v)
+{
+ const struct ftrace_pointer *fp = m->private;
+ struct ftrace_branch_data *p = v;
+ const char *f;
+ long percent;
+
+ if (v == (void *)1) {
+ if (fp->hit)
+ seq_printf(m, " miss hit %% ");
+ else
+ seq_printf(m, " correct incorrect %% ");
+ seq_printf(m, " Function "
+ " File Line\n"
+ " ------- --------- - "
+ " -------- "
+ " ---- ----\n");
+ return 0;
+ }
+
+ /* Only print the file, not the path */
+ f = p->file + strlen(p->file);
+ while (f >= p->file && *f != '/')
+ f--;
+ f++;
+
+ /*
+ * The miss is overlayed on correct, and hit on incorrect.
+ */
+ if (p->correct) {
+ percent = p->incorrect * 100;
+ percent /= p->correct + p->incorrect;
+ } else
+ percent = p->incorrect ? 100 : -1;
+
+ seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect);
+ if (percent < 0)
+ seq_printf(m, " X ");
+ else
+ seq_printf(m, "%3ld ", percent);
+ seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line);
+ return 0;
+}
+
+static struct seq_operations tracing_likely_seq_ops = {
+ .start = t_start,
+ .next = t_next,
+ .stop = t_stop,
+ .show = t_show,
+};
+
+static int tracing_branch_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ ret = seq_open(file, &tracing_likely_seq_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = (void *)inode->i_private;
+ }
+
+ return ret;
+}
+
+static const struct file_operations tracing_branch_fops = {
+ .open = tracing_branch_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+};
+
+#ifdef CONFIG_PROFILE_ALL_BRANCHES
+extern unsigned long __start_branch_profile[];
+extern unsigned long __stop_branch_profile[];
+
+static const struct ftrace_pointer ftrace_branch_pos = {
+ .start = __start_branch_profile,
+ .stop = __stop_branch_profile,
+ .hit = 1,
+};
+
+#endif /* CONFIG_PROFILE_ALL_BRANCHES */
+
+extern unsigned long __start_annotated_branch_profile[];
+extern unsigned long __stop_annotated_branch_profile[];
+
+static const struct ftrace_pointer ftrace_annotated_branch_pos = {
+ .start = __start_annotated_branch_profile,
+ .stop = __stop_annotated_branch_profile,
+};
+
+static __init int ftrace_branch_init(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+
+ entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer,
+ (void *)&ftrace_annotated_branch_pos,
+ &tracing_branch_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'profile_annotatet_branch' entry\n");
+
+#ifdef CONFIG_PROFILE_ALL_BRANCHES
+ entry = debugfs_create_file("profile_branch", 0444, d_tracer,
+ (void *)&ftrace_branch_pos,
+ &tracing_branch_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs"
+ " 'profile_branch' entry\n");
+#endif
+
+ return 0;
+}
+
+device_initcall(ftrace_branch_init);
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 3121448..9236d7e 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -16,20 +16,10 @@
#include "trace.h"
-static void function_reset(struct trace_array *tr)
-{
- int cpu;
-
- tr->time_start = ftrace_now(tr->cpu);
-
- for_each_online_cpu(cpu)
- tracing_reset(tr->data[cpu]);
-}
-
static void start_function_trace(struct trace_array *tr)
{
tr->cpu = get_cpu();
- function_reset(tr);
+ tracing_reset_online_cpus(tr);
put_cpu();
tracing_start_cmdline_record();
@@ -42,32 +32,28 @@ static void stop_function_trace(struct trace_array *tr)
tracing_stop_cmdline_record();
}
-static void function_trace_init(struct trace_array *tr)
+static int function_trace_init(struct trace_array *tr)
{
- if (tr->ctrl)
- start_function_trace(tr);
+ start_function_trace(tr);
+ return 0;
}
static void function_trace_reset(struct trace_array *tr)
{
- if (tr->ctrl)
- stop_function_trace(tr);
+ stop_function_trace(tr);
}
-static void function_trace_ctrl_update(struct trace_array *tr)
+static void function_trace_start(struct trace_array *tr)
{
- if (tr->ctrl)
- start_function_trace(tr);
- else
- stop_function_trace(tr);
+ tracing_reset_online_cpus(tr);
}
static struct tracer function_trace __read_mostly =
{
- .name = "ftrace",
+ .name = "function",
.init = function_trace_init,
.reset = function_trace_reset,
- .ctrl_update = function_trace_ctrl_update,
+ .start = function_trace_start,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_function,
#endif
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
new file mode 100644
index 0000000..4bf39fc
--- /dev/null
+++ b/kernel/trace/trace_functions_graph.c
@@ -0,0 +1,669 @@
+/*
+ *
+ * Function graph tracer.
+ * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ * Mostly borrowed from function tracer which
+ * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <linux/fs.h>
+
+#include "trace.h"
+
+#define TRACE_GRAPH_INDENT 2
+
+/* Flag options */
+#define TRACE_GRAPH_PRINT_OVERRUN 0x1
+#define TRACE_GRAPH_PRINT_CPU 0x2
+#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
+#define TRACE_GRAPH_PRINT_PROC 0x8
+
+static struct tracer_opt trace_opts[] = {
+ /* Display overruns ? */
+ { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
+ /* Display CPU ? */
+ { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
+ /* Display Overhead ? */
+ { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
+ /* Display proc name/pid */
+ { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
+ { } /* Empty entry */
+};
+
+static struct tracer_flags tracer_flags = {
+ /* Don't display overruns and proc by default */
+ .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD,
+ .opts = trace_opts
+};
+
+/* pid on the last trace processed */
+static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 };
+
+static int graph_trace_init(struct trace_array *tr)
+{
+ int cpu, ret;
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+
+ ret = register_ftrace_graph(&trace_graph_return,
+ &trace_graph_entry);
+ if (ret)
+ return ret;
+ tracing_start_cmdline_record();
+
+ return 0;
+}
+
+static void graph_trace_reset(struct trace_array *tr)
+{
+ tracing_stop_cmdline_record();
+ unregister_ftrace_graph();
+}
+
+static inline int log10_cpu(int nb)
+{
+ if (nb / 100)
+ return 3;
+ if (nb / 10)
+ return 2;
+ return 1;
+}
+
+static enum print_line_t
+print_graph_cpu(struct trace_seq *s, int cpu)
+{
+ int i;
+ int ret;
+ int log10_this = log10_cpu(cpu);
+ int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map));
+
+
+ /*
+ * Start with a space character - to make it stand out
+ * to the right a bit when trace output is pasted into
+ * email:
+ */
+ ret = trace_seq_printf(s, " ");
+
+ /*
+ * Tricky - we space the CPU field according to the max
+ * number of online CPUs. On a 2-cpu system it would take
+ * a maximum of 1 digit - on a 128 cpu system it would
+ * take up to 3 digits:
+ */
+ for (i = 0; i < log10_all - log10_this; i++) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+ ret = trace_seq_printf(s, "%d) ", cpu);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+#define TRACE_GRAPH_PROCINFO_LENGTH 14
+
+static enum print_line_t
+print_graph_proc(struct trace_seq *s, pid_t pid)
+{
+ int i;
+ int ret;
+ int len;
+ char comm[8];
+ int spaces = 0;
+ /* sign + log10(MAX_INT) + '\0' */
+ char pid_str[11];
+
+ strncpy(comm, trace_find_cmdline(pid), 7);
+ comm[7] = '\0';
+ sprintf(pid_str, "%d", pid);
+
+ /* 1 stands for the "-" character */
+ len = strlen(comm) + strlen(pid_str) + 1;
+
+ if (len < TRACE_GRAPH_PROCINFO_LENGTH)
+ spaces = TRACE_GRAPH_PROCINFO_LENGTH - len;
+
+ /* First spaces to align center */
+ for (i = 0; i < spaces / 2; i++) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ ret = trace_seq_printf(s, "%s-%s", comm, pid_str);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Last spaces to align center */
+ for (i = 0; i < spaces - (spaces / 2); i++) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+ return TRACE_TYPE_HANDLED;
+}
+
+
+/* If the pid changed since the last trace, output this event */
+static enum print_line_t
+verif_pid(struct trace_seq *s, pid_t pid, int cpu)
+{
+ pid_t prev_pid;
+ int ret;
+
+ if (last_pid[cpu] != -1 && last_pid[cpu] == pid)
+ return TRACE_TYPE_HANDLED;
+
+ prev_pid = last_pid[cpu];
+ last_pid[cpu] = pid;
+
+/*
+ * Context-switch trace line:
+
+ ------------------------------------------
+ | 1) migration/0--1 => sshd-1755
+ ------------------------------------------
+
+ */
+ ret = trace_seq_printf(s,
+ " ------------------------------------------\n");
+ if (!ret)
+ TRACE_TYPE_PARTIAL_LINE;
+
+ ret = print_graph_cpu(s, cpu);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ TRACE_TYPE_PARTIAL_LINE;
+
+ ret = print_graph_proc(s, prev_pid);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ TRACE_TYPE_PARTIAL_LINE;
+
+ ret = trace_seq_printf(s, " => ");
+ if (!ret)
+ TRACE_TYPE_PARTIAL_LINE;
+
+ ret = print_graph_proc(s, pid);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ TRACE_TYPE_PARTIAL_LINE;
+
+ ret = trace_seq_printf(s,
+ "\n ------------------------------------------\n\n");
+ if (!ret)
+ TRACE_TYPE_PARTIAL_LINE;
+
+ return ret;
+}
+
+static bool
+trace_branch_is_leaf(struct trace_iterator *iter,
+ struct ftrace_graph_ent_entry *curr)
+{
+ struct ring_buffer_iter *ring_iter;
+ struct ring_buffer_event *event;
+ struct ftrace_graph_ret_entry *next;
+
+ ring_iter = iter->buffer_iter[iter->cpu];
+
+ if (!ring_iter)
+ return false;
+
+ event = ring_buffer_iter_peek(ring_iter, NULL);
+
+ if (!event)
+ return false;
+
+ next = ring_buffer_event_data(event);
+
+ if (next->ent.type != TRACE_GRAPH_RET)
+ return false;
+
+ if (curr->ent.pid != next->ent.pid ||
+ curr->graph_ent.func != next->ret.func)
+ return false;
+
+ return true;
+}
+
+static enum print_line_t
+print_graph_irq(struct trace_seq *s, unsigned long addr,
+ enum trace_type type, int cpu, pid_t pid)
+{
+ int ret;
+
+ if (addr < (unsigned long)__irqentry_text_start ||
+ addr >= (unsigned long)__irqentry_text_end)
+ return TRACE_TYPE_UNHANDLED;
+
+ if (type == TRACE_GRAPH_ENT) {
+ ret = trace_seq_printf(s, "==========> | ");
+ } else {
+ /* Cpu */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+ ret = print_graph_cpu(s, cpu);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+ /* Proc */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
+ ret = print_graph_proc(s, pid);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ ret = trace_seq_printf(s, " | ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* No overhead */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ ret = trace_seq_printf(s, "<========== |\n");
+ }
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+print_graph_duration(unsigned long long duration, struct trace_seq *s)
+{
+ unsigned long nsecs_rem = do_div(duration, 1000);
+ /* log10(ULONG_MAX) + '\0' */
+ char msecs_str[21];
+ char nsecs_str[5];
+ int ret, len;
+ int i;
+
+ sprintf(msecs_str, "%lu", (unsigned long) duration);
+
+ /* Print msecs */
+ ret = trace_seq_printf(s, msecs_str);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ len = strlen(msecs_str);
+
+ /* Print nsecs (we don't want to exceed 7 numbers) */
+ if (len < 7) {
+ snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem);
+ ret = trace_seq_printf(s, ".%s", nsecs_str);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ len += strlen(nsecs_str);
+ }
+
+ ret = trace_seq_printf(s, " us ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Print remaining spaces to fit the row's width */
+ for (i = len; i < 7; i++) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ ret = trace_seq_printf(s, "| ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ return TRACE_TYPE_HANDLED;
+
+}
+
+/* Signal a overhead of time execution to the output */
+static int
+print_graph_overhead(unsigned long long duration, struct trace_seq *s)
+{
+ /* Duration exceeded 100 msecs */
+ if (duration > 100000ULL)
+ return trace_seq_printf(s, "! ");
+
+ /* Duration exceeded 10 msecs */
+ if (duration > 10000ULL)
+ return trace_seq_printf(s, "+ ");
+
+ return trace_seq_printf(s, " ");
+}
+
+/* Case of a leaf function on its call entry */
+static enum print_line_t
+print_graph_entry_leaf(struct trace_iterator *iter,
+ struct ftrace_graph_ent_entry *entry, struct trace_seq *s)
+{
+ struct ftrace_graph_ret_entry *ret_entry;
+ struct ftrace_graph_ret *graph_ret;
+ struct ring_buffer_event *event;
+ struct ftrace_graph_ent *call;
+ unsigned long long duration;
+ int ret;
+ int i;
+
+ event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+ ret_entry = ring_buffer_event_data(event);
+ graph_ret = &ret_entry->ret;
+ call = &entry->graph_ent;
+ duration = graph_ret->rettime - graph_ret->calltime;
+
+ /* Overhead */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+ ret = print_graph_overhead(duration, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* Duration */
+ ret = print_graph_duration(duration, s);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Function */
+ for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ ret = seq_print_ip_sym(s, call->func, 0);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ ret = trace_seq_printf(s, "();\n");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
+ struct trace_seq *s, pid_t pid, int cpu)
+{
+ int i;
+ int ret;
+ struct ftrace_graph_ent *call = &entry->graph_ent;
+
+ /* No overhead */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* Interrupt */
+ ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, pid);
+ if (ret == TRACE_TYPE_UNHANDLED) {
+ /* No time */
+ ret = trace_seq_printf(s, " | ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ } else {
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+
+ /* Function */
+ for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ ret = seq_print_ip_sym(s, call->func, 0);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ ret = trace_seq_printf(s, "() {\n");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
+ struct trace_iterator *iter, int cpu)
+{
+ int ret;
+ struct trace_entry *ent = iter->ent;
+
+ /* Pid */
+ if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Cpu */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+ ret = print_graph_cpu(s, cpu);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* Proc */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
+ ret = print_graph_proc(s, ent->pid);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ ret = trace_seq_printf(s, " | ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ if (trace_branch_is_leaf(iter, field))
+ return print_graph_entry_leaf(iter, field, s);
+ else
+ return print_graph_entry_nested(field, s, iter->ent->pid, cpu);
+
+}
+
+static enum print_line_t
+print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
+ struct trace_entry *ent, int cpu)
+{
+ int i;
+ int ret;
+ unsigned long long duration = trace->rettime - trace->calltime;
+
+ /* Pid */
+ if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Cpu */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+ ret = print_graph_cpu(s, cpu);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* Proc */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
+ ret = print_graph_proc(s, ent->pid);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ ret = trace_seq_printf(s, " | ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* Overhead */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+ ret = print_graph_overhead(duration, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* Duration */
+ ret = print_graph_duration(duration, s);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Closing brace */
+ for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ ret = trace_seq_printf(s, "}\n");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Overrun */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
+ ret = trace_seq_printf(s, " (Overruns: %lu)\n",
+ trace->overrun);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ ret = print_graph_irq(s, trace->func, TRACE_GRAPH_RET, cpu, ent->pid);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+print_graph_comment(struct print_entry *trace, struct trace_seq *s,
+ struct trace_entry *ent, struct trace_iterator *iter)
+{
+ int i;
+ int ret;
+
+ /* Pid */
+ if (verif_pid(s, ent->pid, iter->cpu) == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Cpu */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+ ret = print_graph_cpu(s, iter->cpu);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* Proc */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
+ ret = print_graph_proc(s, ent->pid);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ ret = trace_seq_printf(s, " | ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* No overhead */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* No time */
+ ret = trace_seq_printf(s, " | ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Indentation */
+ if (trace->depth > 0)
+ for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* The comment */
+ ret = trace_seq_printf(s, "/* %s", trace->buf);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (ent->flags & TRACE_FLAG_CONT)
+ trace_seq_print_cont(s, iter);
+
+ ret = trace_seq_printf(s, " */\n");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+
+enum print_line_t
+print_graph_function(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *entry = iter->ent;
+
+ switch (entry->type) {
+ case TRACE_GRAPH_ENT: {
+ struct ftrace_graph_ent_entry *field;
+ trace_assign_type(field, entry);
+ return print_graph_entry(field, s, iter,
+ iter->cpu);
+ }
+ case TRACE_GRAPH_RET: {
+ struct ftrace_graph_ret_entry *field;
+ trace_assign_type(field, entry);
+ return print_graph_return(&field->ret, s, entry, iter->cpu);
+ }
+ case TRACE_PRINT: {
+ struct print_entry *field;
+ trace_assign_type(field, entry);
+ return print_graph_comment(field, s, entry, iter);
+ }
+ default:
+ return TRACE_TYPE_UNHANDLED;
+ }
+}
+
+static void print_graph_headers(struct seq_file *s)
+{
+ /* 1st line */
+ seq_printf(s, "# ");
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
+ seq_printf(s, "CPU ");
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
+ seq_printf(s, "TASK/PID ");
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD)
+ seq_printf(s, "OVERHEAD/");
+ seq_printf(s, "DURATION FUNCTION CALLS\n");
+
+ /* 2nd line */
+ seq_printf(s, "# ");
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
+ seq_printf(s, "| ");
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
+ seq_printf(s, "| | ");
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+ seq_printf(s, "| ");
+ seq_printf(s, "| | | | |\n");
+ } else
+ seq_printf(s, " | | | | |\n");
+}
+static struct tracer graph_trace __read_mostly = {
+ .name = "function_graph",
+ .init = graph_trace_init,
+ .reset = graph_trace_reset,
+ .print_line = print_graph_function,
+ .print_header = print_graph_headers,
+ .flags = &tracer_flags,
+};
+
+static __init int init_graph_trace(void)
+{
+ return register_tracer(&graph_trace);
+}
+
+device_initcall(init_graph_trace);
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
new file mode 100644
index 0000000..b6a3e20
--- /dev/null
+++ b/kernel/trace/trace_hw_branches.c
@@ -0,0 +1,195 @@
+/*
+ * h/w branch tracer for x86 based on bts
+ *
+ * Copyright (C) 2008 Markus Metzger <markus.t.metzger@gmail.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/kallsyms.h>
+
+#include <asm/ds.h>
+
+#include "trace.h"
+
+
+#define SIZEOF_BTS (1 << 13)
+
+static DEFINE_PER_CPU(struct bts_tracer *, tracer);
+static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
+
+#define this_tracer per_cpu(tracer, smp_processor_id())
+#define this_buffer per_cpu(buffer, smp_processor_id())
+
+
+static void bts_trace_start_cpu(void *arg)
+{
+ if (this_tracer)
+ ds_release_bts(this_tracer);
+
+ this_tracer =
+ ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS,
+ /* ovfl = */ NULL, /* th = */ (size_t)-1,
+ BTS_KERNEL);
+ if (IS_ERR(this_tracer)) {
+ this_tracer = NULL;
+ return;
+ }
+}
+
+static void bts_trace_start(struct trace_array *tr)
+{
+ int cpu;
+
+ tracing_reset_online_cpus(tr);
+
+ for_each_cpu_mask(cpu, cpu_possible_map)
+ smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
+}
+
+static void bts_trace_stop_cpu(void *arg)
+{
+ if (this_tracer) {
+ ds_release_bts(this_tracer);
+ this_tracer = NULL;
+ }
+}
+
+static void bts_trace_stop(struct trace_array *tr)
+{
+ int cpu;
+
+ for_each_cpu_mask(cpu, cpu_possible_map)
+ smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
+}
+
+static int bts_trace_init(struct trace_array *tr)
+{
+ tracing_reset_online_cpus(tr);
+ bts_trace_start(tr);
+
+ return 0;
+}
+
+static void bts_trace_print_header(struct seq_file *m)
+{
+ seq_puts(m,
+ "# CPU# FROM TO FUNCTION\n");
+ seq_puts(m,
+ "# | | | |\n");
+}
+
+static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_seq *seq = &iter->seq;
+ struct hw_branch_entry *it;
+
+ trace_assign_type(it, entry);
+
+ if (entry->type == TRACE_HW_BRANCHES) {
+ if (trace_seq_printf(seq, "%4d ", entry->cpu) &&
+ trace_seq_printf(seq, "0x%016llx -> 0x%016llx ",
+ it->from, it->to) &&
+ (!it->from ||
+ seq_print_ip_sym(seq, it->from, /* sym_flags = */ 0)) &&
+ trace_seq_printf(seq, "\n"))
+ return TRACE_TYPE_HANDLED;
+ return TRACE_TYPE_PARTIAL_LINE;;
+ }
+ return TRACE_TYPE_UNHANDLED;
+}
+
+void trace_hw_branch(struct trace_array *tr, u64 from, u64 to)
+{
+ struct ring_buffer_event *event;
+ struct hw_branch_entry *entry;
+ unsigned long irq;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, from);
+ entry->ent.type = TRACE_HW_BRANCHES;
+ entry->ent.cpu = smp_processor_id();
+ entry->from = from;
+ entry->to = to;
+ ring_buffer_unlock_commit(tr->buffer, event, irq);
+}
+
+static void trace_bts_at(struct trace_array *tr,
+ const struct bts_trace *trace, void *at)
+{
+ struct bts_struct bts;
+ int err = 0;
+
+ WARN_ON_ONCE(!trace->read);
+ if (!trace->read)
+ return;
+
+ err = trace->read(this_tracer, at, &bts);
+ if (err < 0)
+ return;
+
+ switch (bts.qualifier) {
+ case BTS_BRANCH:
+ trace_hw_branch(tr, bts.variant.lbr.from, bts.variant.lbr.to);
+ break;
+ }
+}
+
+static void trace_bts_cpu(void *arg)
+{
+ struct trace_array *tr = (struct trace_array *) arg;
+ const struct bts_trace *trace;
+ unsigned char *at;
+
+ if (!this_tracer)
+ return;
+
+ ds_suspend_bts(this_tracer);
+ trace = ds_read_bts(this_tracer);
+ if (!trace)
+ goto out;
+
+ for (at = trace->ds.top; (void *)at < trace->ds.end;
+ at += trace->ds.size)
+ trace_bts_at(tr, trace, at);
+
+ for (at = trace->ds.begin; (void *)at < trace->ds.top;
+ at += trace->ds.size)
+ trace_bts_at(tr, trace, at);
+
+out:
+ ds_resume_bts(this_tracer);
+}
+
+static void trace_bts_prepare(struct trace_iterator *iter)
+{
+ int cpu;
+
+ for_each_cpu_mask(cpu, cpu_possible_map)
+ smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1);
+}
+
+struct tracer bts_tracer __read_mostly =
+{
+ .name = "hw-branch-tracer",
+ .init = bts_trace_init,
+ .reset = bts_trace_stop,
+ .print_header = bts_trace_print_header,
+ .print_line = bts_trace_print_line,
+ .start = bts_trace_start,
+ .stop = bts_trace_stop,
+ .open = trace_bts_prepare
+};
+
+__init static int init_bts_trace(void)
+{
+ return register_tracer(&bts_tracer);
+}
+device_initcall(init_bts_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index ece6cfb..7c2e326 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -63,7 +63,7 @@ irq_trace(void)
*/
static __cacheline_aligned_in_smp unsigned long max_sequence;
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
/*
* irqsoff uses its own tracer function to keep the overhead down:
*/
@@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1))
- trace_function(tr, data, ip, parent_ip, flags);
+ trace_function(tr, data, ip, parent_ip, flags, preempt_count());
atomic_dec(&data->disabled);
}
@@ -104,7 +104,7 @@ static struct ftrace_ops trace_ops __read_mostly =
{
.func = irqsoff_tracer_call,
};
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
/*
* Should this new latency be reported/recorded?
@@ -130,6 +130,7 @@ check_critical_timing(struct trace_array *tr,
unsigned long latency, t0, t1;
cycle_t T0, T1, delta;
unsigned long flags;
+ int pc;
/*
* usecs conversion is slow so we try to delay the conversion
@@ -141,6 +142,8 @@ check_critical_timing(struct trace_array *tr,
local_save_flags(flags);
+ pc = preempt_count();
+
if (!report_latency(delta))
goto out;
@@ -150,7 +153,7 @@ check_critical_timing(struct trace_array *tr,
if (!report_latency(delta))
goto out_unlock;
- trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
+ trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
latency = nsecs_to_usecs(delta);
@@ -173,8 +176,8 @@ out_unlock:
out:
data->critical_sequence = max_sequence;
data->preempt_timestamp = ftrace_now(cpu);
- tracing_reset(data);
- trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
+ tracing_reset(tr, cpu);
+ trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
}
static inline void
@@ -203,11 +206,11 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
data->critical_sequence = max_sequence;
data->preempt_timestamp = ftrace_now(cpu);
data->critical_start = parent_ip ? : ip;
- tracing_reset(data);
+ tracing_reset(tr, cpu);
local_save_flags(flags);
- trace_function(tr, data, ip, parent_ip, flags);
+ trace_function(tr, data, ip, parent_ip, flags, preempt_count());
per_cpu(tracing_cpu, cpu) = 1;
@@ -234,14 +237,14 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
data = tr->data[cpu];
- if (unlikely(!data) || unlikely(!head_page(data)) ||
+ if (unlikely(!data) ||
!data->critical_start || atomic_read(&data->disabled))
return;
atomic_inc(&data->disabled);
local_save_flags(flags);
- trace_function(tr, data, ip, parent_ip, flags);
+ trace_function(tr, data, ip, parent_ip, flags, preempt_count());
check_critical_timing(tr, data, parent_ip ? : ip, cpu);
data->critical_start = 0;
atomic_dec(&data->disabled);
@@ -350,15 +353,28 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
}
#endif /* CONFIG_PREEMPT_TRACER */
+/*
+ * save_tracer_enabled is used to save the state of the tracer_enabled
+ * variable when we disable it when we open a trace output file.
+ */
+static int save_tracer_enabled;
+
static void start_irqsoff_tracer(struct trace_array *tr)
{
register_ftrace_function(&trace_ops);
- tracer_enabled = 1;
+ if (tracing_is_enabled()) {
+ tracer_enabled = 1;
+ save_tracer_enabled = 1;
+ } else {
+ tracer_enabled = 0;
+ save_tracer_enabled = 0;
+ }
}
static void stop_irqsoff_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
+ save_tracer_enabled = 0;
unregister_ftrace_function(&trace_ops);
}
@@ -367,53 +383,55 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
irqsoff_trace = tr;
/* make sure that the tracer is visible */
smp_wmb();
-
- if (tr->ctrl)
- start_irqsoff_tracer(tr);
+ start_irqsoff_tracer(tr);
}
static void irqsoff_tracer_reset(struct trace_array *tr)
{
- if (tr->ctrl)
- stop_irqsoff_tracer(tr);
+ stop_irqsoff_tracer(tr);
}
-static void irqsoff_tracer_ctrl_update(struct trace_array *tr)
+static void irqsoff_tracer_start(struct trace_array *tr)
{
- if (tr->ctrl)
- start_irqsoff_tracer(tr);
- else
- stop_irqsoff_tracer(tr);
+ tracer_enabled = 1;
+ save_tracer_enabled = 1;
+}
+
+static void irqsoff_tracer_stop(struct trace_array *tr)
+{
+ tracer_enabled = 0;
+ save_tracer_enabled = 0;
}
static void irqsoff_tracer_open(struct trace_iterator *iter)
{
/* stop the trace while dumping */
- if (iter->tr->ctrl)
- stop_irqsoff_tracer(iter->tr);
+ tracer_enabled = 0;
}
static void irqsoff_tracer_close(struct trace_iterator *iter)
{
- if (iter->tr->ctrl)
- start_irqsoff_tracer(iter->tr);
+ /* restart tracing */
+ tracer_enabled = save_tracer_enabled;
}
#ifdef CONFIG_IRQSOFF_TRACER
-static void irqsoff_tracer_init(struct trace_array *tr)
+static int irqsoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_IRQS_OFF;
__irqsoff_tracer_init(tr);
+ return 0;
}
static struct tracer irqsoff_tracer __read_mostly =
{
.name = "irqsoff",
.init = irqsoff_tracer_init,
.reset = irqsoff_tracer_reset,
+ .start = irqsoff_tracer_start,
+ .stop = irqsoff_tracer_stop,
.open = irqsoff_tracer_open,
.close = irqsoff_tracer_close,
- .ctrl_update = irqsoff_tracer_ctrl_update,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_irqsoff,
@@ -425,11 +443,12 @@ static struct tracer irqsoff_tracer __read_mostly =
#endif
#ifdef CONFIG_PREEMPT_TRACER
-static void preemptoff_tracer_init(struct trace_array *tr)
+static int preemptoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_PREEMPT_OFF;
__irqsoff_tracer_init(tr);
+ return 0;
}
static struct tracer preemptoff_tracer __read_mostly =
@@ -437,9 +456,10 @@ static struct tracer preemptoff_tracer __read_mostly =
.name = "preemptoff",
.init = preemptoff_tracer_init,
.reset = irqsoff_tracer_reset,
+ .start = irqsoff_tracer_start,
+ .stop = irqsoff_tracer_stop,
.open = irqsoff_tracer_open,
.close = irqsoff_tracer_close,
- .ctrl_update = irqsoff_tracer_ctrl_update,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_preemptoff,
@@ -453,11 +473,12 @@ static struct tracer preemptoff_tracer __read_mostly =
#if defined(CONFIG_IRQSOFF_TRACER) && \
defined(CONFIG_PREEMPT_TRACER)
-static void preemptirqsoff_tracer_init(struct trace_array *tr)
+static int preemptirqsoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
__irqsoff_tracer_init(tr);
+ return 0;
}
static struct tracer preemptirqsoff_tracer __read_mostly =
@@ -465,9 +486,10 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
.name = "preemptirqsoff",
.init = preemptirqsoff_tracer_init,
.reset = irqsoff_tracer_reset,
+ .start = irqsoff_tracer_start,
+ .stop = irqsoff_tracer_stop,
.open = irqsoff_tracer_open,
.close = irqsoff_tracer_close,
- .ctrl_update = irqsoff_tracer_ctrl_update,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_preemptirqsoff,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index b13dc19..fffcb06 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -18,46 +18,39 @@ struct header_iter {
static struct trace_array *mmio_trace_array;
static bool overrun_detected;
+static unsigned long prev_overruns;
static void mmio_reset_data(struct trace_array *tr)
{
- int cpu;
-
overrun_detected = false;
- tr->time_start = ftrace_now(tr->cpu);
+ prev_overruns = 0;
- for_each_online_cpu(cpu)
- tracing_reset(tr->data[cpu]);
+ tracing_reset_online_cpus(tr);
}
-static void mmio_trace_init(struct trace_array *tr)
+static int mmio_trace_init(struct trace_array *tr)
{
pr_debug("in %s\n", __func__);
mmio_trace_array = tr;
- if (tr->ctrl) {
- mmio_reset_data(tr);
- enable_mmiotrace();
- }
+
+ mmio_reset_data(tr);
+ enable_mmiotrace();
+ return 0;
}
static void mmio_trace_reset(struct trace_array *tr)
{
pr_debug("in %s\n", __func__);
- if (tr->ctrl)
- disable_mmiotrace();
+
+ disable_mmiotrace();
mmio_reset_data(tr);
mmio_trace_array = NULL;
}
-static void mmio_trace_ctrl_update(struct trace_array *tr)
+static void mmio_trace_start(struct trace_array *tr)
{
pr_debug("in %s\n", __func__);
- if (tr->ctrl) {
- mmio_reset_data(tr);
- enable_mmiotrace();
- } else {
- disable_mmiotrace();
- }
+ mmio_reset_data(tr);
}
static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
@@ -128,12 +121,12 @@ static void mmio_close(struct trace_iterator *iter)
static unsigned long count_overruns(struct trace_iterator *iter)
{
- int cpu;
unsigned long cnt = 0;
- for_each_online_cpu(cpu) {
- cnt += iter->overrun[cpu];
- iter->overrun[cpu] = 0;
- }
+ unsigned long over = ring_buffer_overruns(iter->tr->buffer);
+
+ if (over > prev_overruns)
+ cnt = over - prev_overruns;
+ prev_overruns = over;
return cnt;
}
@@ -171,17 +164,21 @@ print_out:
return (ret == -EBUSY) ? 0 : ret;
}
-static int mmio_print_rw(struct trace_iterator *iter)
+static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
{
struct trace_entry *entry = iter->ent;
- struct mmiotrace_rw *rw = &entry->mmiorw;
+ struct trace_mmiotrace_rw *field;
+ struct mmiotrace_rw *rw;
struct trace_seq *s = &iter->seq;
- unsigned long long t = ns2usecs(entry->t);
+ unsigned long long t = ns2usecs(iter->ts);
unsigned long usec_rem = do_div(t, 1000000ULL);
unsigned secs = (unsigned long)t;
int ret = 1;
- switch (entry->mmiorw.opcode) {
+ trace_assign_type(field, entry);
+ rw = &field->rw;
+
+ switch (rw->opcode) {
case MMIO_READ:
ret = trace_seq_printf(s,
"R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
@@ -209,21 +206,25 @@ static int mmio_print_rw(struct trace_iterator *iter)
break;
}
if (ret)
- return 1;
- return 0;
+ return TRACE_TYPE_HANDLED;
+ return TRACE_TYPE_PARTIAL_LINE;
}
-static int mmio_print_map(struct trace_iterator *iter)
+static enum print_line_t mmio_print_map(struct trace_iterator *iter)
{
struct trace_entry *entry = iter->ent;
- struct mmiotrace_map *m = &entry->mmiomap;
+ struct trace_mmiotrace_map *field;
+ struct mmiotrace_map *m;
struct trace_seq *s = &iter->seq;
- unsigned long long t = ns2usecs(entry->t);
+ unsigned long long t = ns2usecs(iter->ts);
unsigned long usec_rem = do_div(t, 1000000ULL);
unsigned secs = (unsigned long)t;
- int ret = 1;
+ int ret;
- switch (entry->mmiorw.opcode) {
+ trace_assign_type(field, entry);
+ m = &field->map;
+
+ switch (m->opcode) {
case MMIO_PROBE:
ret = trace_seq_printf(s,
"MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
@@ -241,20 +242,43 @@ static int mmio_print_map(struct trace_iterator *iter)
break;
}
if (ret)
- return 1;
- return 0;
+ return TRACE_TYPE_HANDLED;
+ return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct print_entry *print = (struct print_entry *)entry;
+ const char *msg = print->buf;
+ struct trace_seq *s = &iter->seq;
+ unsigned long long t = ns2usecs(iter->ts);
+ unsigned long usec_rem = do_div(t, 1000000ULL);
+ unsigned secs = (unsigned long)t;
+ int ret;
+
+ /* The trailing newline must be in the message. */
+ ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (entry->flags & TRACE_FLAG_CONT)
+ trace_seq_print_cont(s, iter);
+
+ return TRACE_TYPE_HANDLED;
}
-/* return 0 to abort printing without consuming current entry in pipe mode */
-static int mmio_print_line(struct trace_iterator *iter)
+static enum print_line_t mmio_print_line(struct trace_iterator *iter)
{
switch (iter->ent->type) {
case TRACE_MMIO_RW:
return mmio_print_rw(iter);
case TRACE_MMIO_MAP:
return mmio_print_map(iter);
+ case TRACE_PRINT:
+ return mmio_print_mark(iter);
default:
- return 1; /* ignore unknown entries */
+ return TRACE_TYPE_HANDLED; /* ignore unknown entries */
}
}
@@ -263,10 +287,10 @@ static struct tracer mmio_tracer __read_mostly =
.name = "mmiotrace",
.init = mmio_trace_init,
.reset = mmio_trace_reset,
+ .start = mmio_trace_start,
.pipe_open = mmio_pipe_open,
.close = mmio_close,
.read = mmio_read,
- .ctrl_update = mmio_trace_ctrl_update,
.print_line = mmio_print_line,
};
@@ -276,6 +300,27 @@ __init static int init_mmio_trace(void)
}
device_initcall(init_mmio_trace);
+static void __trace_mmiotrace_rw(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct mmiotrace_rw *rw)
+{
+ struct ring_buffer_event *event;
+ struct trace_mmiotrace_rw *entry;
+ unsigned long irq_flags;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, preempt_count());
+ entry->ent.type = TRACE_MMIO_RW;
+ entry->rw = *rw;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ trace_wake_up();
+}
+
void mmio_trace_rw(struct mmiotrace_rw *rw)
{
struct trace_array *tr = mmio_trace_array;
@@ -283,6 +328,27 @@ void mmio_trace_rw(struct mmiotrace_rw *rw)
__trace_mmiotrace_rw(tr, data, rw);
}
+static void __trace_mmiotrace_map(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct mmiotrace_map *map)
+{
+ struct ring_buffer_event *event;
+ struct trace_mmiotrace_map *entry;
+ unsigned long irq_flags;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, preempt_count());
+ entry->ent.type = TRACE_MMIO_MAP;
+ entry->map = *map;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ trace_wake_up();
+}
+
void mmio_trace_mapping(struct mmiotrace_map *map)
{
struct trace_array *tr = mmio_trace_array;
@@ -293,3 +359,8 @@ void mmio_trace_mapping(struct mmiotrace_map *map)
__trace_mmiotrace_map(tr, data, map);
preempt_enable();
}
+
+int mmio_trace_printk(const char *fmt, va_list args)
+{
+ return trace_vprintk(0, -1, fmt, args);
+}
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
new file mode 100644
index 0000000..b9767ac
--- /dev/null
+++ b/kernel/trace/trace_nop.c
@@ -0,0 +1,105 @@
+/*
+ * nop tracer
+ *
+ * Copyright (C) 2008 Steven Noonan <steven@uplinklabs.net>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+
+#include "trace.h"
+
+/* Our two options */
+enum {
+ TRACE_NOP_OPT_ACCEPT = 0x1,
+ TRACE_NOP_OPT_REFUSE = 0x2
+};
+
+/* Options for the tracer (see trace_options file) */
+static struct tracer_opt nop_opts[] = {
+ /* Option that will be accepted by set_flag callback */
+ { TRACER_OPT(test_nop_accept, TRACE_NOP_OPT_ACCEPT) },
+ /* Option that will be refused by set_flag callback */
+ { TRACER_OPT(test_nop_refuse, TRACE_NOP_OPT_REFUSE) },
+ { } /* Always set a last empty entry */
+};
+
+static struct tracer_flags nop_flags = {
+ /* You can check your flags value here when you want. */
+ .val = 0, /* By default: all flags disabled */
+ .opts = nop_opts
+};
+
+static struct trace_array *ctx_trace;
+
+static void start_nop_trace(struct trace_array *tr)
+{
+ /* Nothing to do! */
+}
+
+static void stop_nop_trace(struct trace_array *tr)
+{
+ /* Nothing to do! */
+}
+
+static int nop_trace_init(struct trace_array *tr)
+{
+ int cpu;
+ ctx_trace = tr;
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+
+ start_nop_trace(tr);
+ return 0;
+}
+
+static void nop_trace_reset(struct trace_array *tr)
+{
+ stop_nop_trace(tr);
+}
+
+/* It only serves as a signal handler and a callback to
+ * accept or refuse tthe setting of a flag.
+ * If you don't implement it, then the flag setting will be
+ * automatically accepted.
+ */
+static int nop_set_flag(u32 old_flags, u32 bit, int set)
+{
+ /*
+ * Note that you don't need to update nop_flags.val yourself.
+ * The tracing Api will do it automatically if you return 0
+ */
+ if (bit == TRACE_NOP_OPT_ACCEPT) {
+ printk(KERN_DEBUG "nop_test_accept flag set to %d: we accept."
+ " Now cat trace_options to see the result\n",
+ set);
+ return 0;
+ }
+
+ if (bit == TRACE_NOP_OPT_REFUSE) {
+ printk(KERN_DEBUG "nop_test_refuse flag set to %d: we refuse."
+ "Now cat trace_options to see the result\n",
+ set);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+
+struct tracer nop_trace __read_mostly =
+{
+ .name = "nop",
+ .init = nop_trace_init,
+ .reset = nop_trace_reset,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_nop,
+#endif
+ .flags = &nop_flags,
+ .set_flag = nop_set_flag
+};
+
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
new file mode 100644
index 0000000..a7172a3
--- /dev/null
+++ b/kernel/trace/trace_power.c
@@ -0,0 +1,179 @@
+/*
+ * ring buffer based C-state tracer
+ *
+ * Arjan van de Ven <arjan@linux.intel.com>
+ * Copyright (C) 2008 Intel Corporation
+ *
+ * Much is borrowed from trace_boot.c which is
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+
+#include "trace.h"
+
+static struct trace_array *power_trace;
+static int __read_mostly trace_power_enabled;
+
+
+static void start_power_trace(struct trace_array *tr)
+{
+ trace_power_enabled = 1;
+}
+
+static void stop_power_trace(struct trace_array *tr)
+{
+ trace_power_enabled = 0;
+}
+
+
+static int power_trace_init(struct trace_array *tr)
+{
+ int cpu;
+ power_trace = tr;
+
+ trace_power_enabled = 1;
+
+ for_each_cpu_mask(cpu, cpu_possible_map)
+ tracing_reset(tr, cpu);
+ return 0;
+}
+
+static enum print_line_t power_print_line(struct trace_iterator *iter)
+{
+ int ret = 0;
+ struct trace_entry *entry = iter->ent;
+ struct trace_power *field ;
+ struct power_trace *it;
+ struct trace_seq *s = &iter->seq;
+ struct timespec stamp;
+ struct timespec duration;
+
+ trace_assign_type(field, entry);
+ it = &field->state_data;
+ stamp = ktime_to_timespec(it->stamp);
+ duration = ktime_to_timespec(ktime_sub(it->end, it->stamp));
+
+ if (entry->type == TRACE_POWER) {
+ if (it->type == POWER_CSTATE)
+ ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n",
+ stamp.tv_sec,
+ stamp.tv_nsec,
+ it->state, iter->cpu,
+ duration.tv_sec,
+ duration.tv_nsec);
+ if (it->type == POWER_PSTATE)
+ ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n",
+ stamp.tv_sec,
+ stamp.tv_nsec,
+ it->state, iter->cpu);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ return TRACE_TYPE_HANDLED;
+ }
+ return TRACE_TYPE_UNHANDLED;
+}
+
+static struct tracer power_tracer __read_mostly =
+{
+ .name = "power",
+ .init = power_trace_init,
+ .start = start_power_trace,
+ .stop = stop_power_trace,
+ .reset = stop_power_trace,
+ .print_line = power_print_line,
+};
+
+static int init_power_trace(void)
+{
+ return register_tracer(&power_tracer);
+}
+device_initcall(init_power_trace);
+
+void trace_power_start(struct power_trace *it, unsigned int type,
+ unsigned int level)
+{
+ if (!trace_power_enabled)
+ return;
+
+ memset(it, 0, sizeof(struct power_trace));
+ it->state = level;
+ it->type = type;
+ it->stamp = ktime_get();
+}
+EXPORT_SYMBOL_GPL(trace_power_start);
+
+
+void trace_power_end(struct power_trace *it)
+{
+ struct ring_buffer_event *event;
+ struct trace_power *entry;
+ struct trace_array_cpu *data;
+ unsigned long irq_flags;
+ struct trace_array *tr = power_trace;
+
+ if (!trace_power_enabled)
+ return;
+
+ preempt_disable();
+ it->end = ktime_get();
+ data = tr->data[smp_processor_id()];
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ goto out;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, 0);
+ entry->ent.type = TRACE_POWER;
+ entry->state_data = *it;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ trace_wake_up();
+
+ out:
+ preempt_enable();
+}
+EXPORT_SYMBOL_GPL(trace_power_end);
+
+void trace_power_mark(struct power_trace *it, unsigned int type,
+ unsigned int level)
+{
+ struct ring_buffer_event *event;
+ struct trace_power *entry;
+ struct trace_array_cpu *data;
+ unsigned long irq_flags;
+ struct trace_array *tr = power_trace;
+
+ if (!trace_power_enabled)
+ return;
+
+ memset(it, 0, sizeof(struct power_trace));
+ it->state = level;
+ it->type = type;
+ it->stamp = ktime_get();
+ preempt_disable();
+ it->end = it->stamp;
+ data = tr->data[smp_processor_id()];
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ goto out;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, 0);
+ entry->ent.type = TRACE_POWER;
+ entry->state_data = *it;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ trace_wake_up();
+
+ out:
+ preempt_enable();
+}
+EXPORT_SYMBOL_GPL(trace_power_mark);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index cb817a2..df175cb 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -9,25 +9,27 @@
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
-#include <linux/marker.h>
#include <linux/ftrace.h>
+#include <trace/sched.h>
#include "trace.h"
static struct trace_array *ctx_trace;
static int __read_mostly tracer_enabled;
-static atomic_t sched_ref;
+static int sched_ref;
+static DEFINE_MUTEX(sched_register_mutex);
static void
-sched_switch_func(void *private, void *__rq, struct task_struct *prev,
+probe_sched_switch(struct rq *__rq, struct task_struct *prev,
struct task_struct *next)
{
- struct trace_array **ptr = private;
- struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
unsigned long flags;
- long disabled;
int cpu;
+ int pc;
+
+ if (!sched_ref)
+ return;
tracing_record_cmdline(prev);
tracing_record_cmdline(next);
@@ -35,183 +37,95 @@ sched_switch_func(void *private, void *__rq, struct task_struct *prev,
if (!tracer_enabled)
return;
+ pc = preempt_count();
local_irq_save(flags);
cpu = raw_smp_processor_id();
- data = tr->data[cpu];
- disabled = atomic_inc_return(&data->disabled);
+ data = ctx_trace->data[cpu];
- if (likely(disabled == 1))
- tracing_sched_switch_trace(tr, data, prev, next, flags);
+ if (likely(!atomic_read(&data->disabled)))
+ tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc);
- atomic_dec(&data->disabled);
local_irq_restore(flags);
}
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct task_struct *prev;
- struct task_struct *next;
- struct rq *__rq;
-
- if (!atomic_read(&sched_ref))
- return;
-
- /* skip prev_pid %d next_pid %d prev_state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- __rq = va_arg(*args, typeof(__rq));
- prev = va_arg(*args, typeof(prev));
- next = va_arg(*args, typeof(next));
-
- /*
- * If tracer_switch_func only points to the local
- * switch func, it still needs the ptr passed to it.
- */
- sched_switch_func(probe_data, __rq, prev, next);
-}
-
static void
-wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
- task_struct *curr)
+probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
{
- struct trace_array **ptr = private;
- struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
unsigned long flags;
- long disabled;
- int cpu;
+ int cpu, pc;
- if (!tracer_enabled)
+ if (!likely(tracer_enabled))
return;
- tracing_record_cmdline(curr);
+ pc = preempt_count();
+ tracing_record_cmdline(current);
local_irq_save(flags);
cpu = raw_smp_processor_id();
- data = tr->data[cpu];
- disabled = atomic_inc_return(&data->disabled);
+ data = ctx_trace->data[cpu];
- if (likely(disabled == 1))
- tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
+ if (likely(!atomic_read(&data->disabled)))
+ tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
+ flags, pc);
- atomic_dec(&data->disabled);
local_irq_restore(flags);
}
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct task_struct *curr;
- struct task_struct *task;
- struct rq *__rq;
-
- if (likely(!tracer_enabled))
- return;
-
- /* Skip pid %d state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- /* now get the meat: "rq %p task %p rq->curr %p" */
- __rq = va_arg(*args, typeof(__rq));
- task = va_arg(*args, typeof(task));
- curr = va_arg(*args, typeof(curr));
-
- tracing_record_cmdline(task);
- tracing_record_cmdline(curr);
-
- wakeup_func(probe_data, __rq, task, curr);
-}
-
-static void sched_switch_reset(struct trace_array *tr)
-{
- int cpu;
-
- tr->time_start = ftrace_now(tr->cpu);
-
- for_each_online_cpu(cpu)
- tracing_reset(tr->data[cpu]);
-}
-
static int tracing_sched_register(void)
{
int ret;
- ret = marker_probe_register("kernel_sched_wakeup",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &ctx_trace);
+ ret = register_trace_sched_wakeup(probe_sched_wakeup);
if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup\n");
return ret;
}
- ret = marker_probe_register("kernel_sched_wakeup_new",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &ctx_trace);
+ ret = register_trace_sched_wakeup_new(probe_sched_wakeup);
if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup_new\n");
goto fail_deprobe;
}
- ret = marker_probe_register("kernel_sched_schedule",
- "prev_pid %d next_pid %d prev_state %ld "
- "## rq %p prev %p next %p",
- sched_switch_callback,
- &ctx_trace);
+ ret = register_trace_sched_switch(probe_sched_switch);
if (ret) {
- pr_info("sched trace: Couldn't add marker"
+ pr_info("sched trace: Couldn't activate tracepoint"
" probe to kernel_sched_schedule\n");
goto fail_deprobe_wake_new;
}
return ret;
fail_deprobe_wake_new:
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &ctx_trace);
+ unregister_trace_sched_wakeup_new(probe_sched_wakeup);
fail_deprobe:
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &ctx_trace);
+ unregister_trace_sched_wakeup(probe_sched_wakeup);
return ret;
}
static void tracing_sched_unregister(void)
{
- marker_probe_unregister("kernel_sched_schedule",
- sched_switch_callback,
- &ctx_trace);
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &ctx_trace);
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &ctx_trace);
+ unregister_trace_sched_switch(probe_sched_switch);
+ unregister_trace_sched_wakeup_new(probe_sched_wakeup);
+ unregister_trace_sched_wakeup(probe_sched_wakeup);
}
static void tracing_start_sched_switch(void)
{
- long ref;
-
- ref = atomic_inc_return(&sched_ref);
- if (ref == 1)
+ mutex_lock(&sched_register_mutex);
+ if (!(sched_ref++))
tracing_sched_register();
+ mutex_unlock(&sched_register_mutex);
}
static void tracing_stop_sched_switch(void)
{
- long ref;
-
- ref = atomic_dec_and_test(&sched_ref);
- if (ref)
+ mutex_lock(&sched_register_mutex);
+ if (!(--sched_ref))
tracing_sched_unregister();
+ mutex_unlock(&sched_register_mutex);
}
void tracing_start_cmdline_record(void)
@@ -224,40 +138,86 @@ void tracing_stop_cmdline_record(void)
tracing_stop_sched_switch();
}
+/**
+ * tracing_start_sched_switch_record - start tracing context switches
+ *
+ * Turns on context switch tracing for a tracer.
+ */
+void tracing_start_sched_switch_record(void)
+{
+ if (unlikely(!ctx_trace)) {
+ WARN_ON(1);
+ return;
+ }
+
+ tracing_start_sched_switch();
+
+ mutex_lock(&sched_register_mutex);
+ tracer_enabled++;
+ mutex_unlock(&sched_register_mutex);
+}
+
+/**
+ * tracing_stop_sched_switch_record - start tracing context switches
+ *
+ * Turns off context switch tracing for a tracer.
+ */
+void tracing_stop_sched_switch_record(void)
+{
+ mutex_lock(&sched_register_mutex);
+ tracer_enabled--;
+ WARN_ON(tracer_enabled < 0);
+ mutex_unlock(&sched_register_mutex);
+
+ tracing_stop_sched_switch();
+}
+
+/**
+ * tracing_sched_switch_assign_trace - assign a trace array for ctx switch
+ * @tr: trace array pointer to assign
+ *
+ * Some tracers might want to record the context switches in their
+ * trace. This function lets those tracers assign the trace array
+ * to use.
+ */
+void tracing_sched_switch_assign_trace(struct trace_array *tr)
+{
+ ctx_trace = tr;
+}
+
static void start_sched_trace(struct trace_array *tr)
{
- sched_switch_reset(tr);
- tracing_start_cmdline_record();
- tracer_enabled = 1;
+ tracing_reset_online_cpus(tr);
+ tracing_start_sched_switch_record();
}
static void stop_sched_trace(struct trace_array *tr)
{
- tracer_enabled = 0;
- tracing_stop_cmdline_record();
+ tracing_stop_sched_switch_record();
}
-static void sched_switch_trace_init(struct trace_array *tr)
+static int sched_switch_trace_init(struct trace_array *tr)
{
ctx_trace = tr;
-
- if (tr->ctrl)
- start_sched_trace(tr);
+ start_sched_trace(tr);
+ return 0;
}
static void sched_switch_trace_reset(struct trace_array *tr)
{
- if (tr->ctrl)
+ if (sched_ref)
stop_sched_trace(tr);
}
-static void sched_switch_trace_ctrl_update(struct trace_array *tr)
+static void sched_switch_trace_start(struct trace_array *tr)
{
- /* When starting a new trace, reset the buffers */
- if (tr->ctrl)
- start_sched_trace(tr);
- else
- stop_sched_trace(tr);
+ tracing_reset_online_cpus(tr);
+ tracing_start_sched_switch();
+}
+
+static void sched_switch_trace_stop(struct trace_array *tr)
+{
+ tracing_stop_sched_switch();
}
static struct tracer sched_switch_trace __read_mostly =
@@ -265,7 +225,8 @@ static struct tracer sched_switch_trace __read_mostly =
.name = "sched_switch",
.init = sched_switch_trace_init,
.reset = sched_switch_trace_reset,
- .ctrl_update = sched_switch_trace_ctrl_update,
+ .start = sched_switch_trace_start,
+ .stop = sched_switch_trace_stop,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_sched_switch,
#endif
@@ -273,14 +234,7 @@ static struct tracer sched_switch_trace __read_mostly =
__init static int init_sched_switch_trace(void)
{
- int ret = 0;
-
- if (atomic_read(&sched_ref))
- ret = tracing_sched_register();
- if (ret) {
- pr_info("error registering scheduler trace\n");
- return ret;
- }
return register_tracer(&sched_switch_trace);
}
device_initcall(init_sched_switch_trace);
+
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index e303ccb..43586b6 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,7 +15,7 @@
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
-#include <linux/marker.h>
+#include <trace/sched.h>
#include "trace.h"
@@ -31,7 +31,7 @@ static raw_spinlock_t wakeup_lock =
static void __wakeup_reset(struct trace_array *tr);
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
/*
* irqsoff uses its own tracer function to keep the overhead down:
*/
@@ -44,12 +44,13 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
long disabled;
int resched;
int cpu;
+ int pc;
if (likely(!wakeup_task))
return;
- resched = need_resched();
- preempt_disable_notrace();
+ pc = preempt_count();
+ resched = ftrace_preempt_disable();
cpu = raw_smp_processor_id();
data = tr->data[cpu];
@@ -70,7 +71,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
if (task_cpu(wakeup_task) != cpu)
goto unlock;
- trace_function(tr, data, ip, parent_ip, flags);
+ trace_function(tr, data, ip, parent_ip, flags, pc);
unlock:
__raw_spin_unlock(&wakeup_lock);
@@ -79,22 +80,14 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
out:
atomic_dec(&data->disabled);
- /*
- * To prevent recursion from the scheduler, if the
- * resched flag was set before we entered, then
- * don't reschedule.
- */
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
}
static struct ftrace_ops trace_ops __read_mostly =
{
.func = wakeup_tracer_call,
};
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
/*
* Should this new latency be reported/recorded?
@@ -112,17 +105,18 @@ static int report_latency(cycle_t delta)
}
static void notrace
-wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
+probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
unsigned long latency = 0, t0 = 0, t1 = 0;
- struct trace_array **ptr = private;
- struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
cycle_t T0, T1, delta;
unsigned long flags;
long disabled;
int cpu;
+ int pc;
+
+ tracing_record_cmdline(prev);
if (unlikely(!tracer_enabled))
return;
@@ -139,12 +133,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
if (next != wakeup_task)
return;
+ pc = preempt_count();
+
/* The task we are waiting for is waking up */
- data = tr->data[wakeup_cpu];
+ data = wakeup_trace->data[wakeup_cpu];
/* disable local data, not wakeup_cpu data */
cpu = raw_smp_processor_id();
- disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+ disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
if (likely(disabled != 1))
goto out;
@@ -155,7 +151,7 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
if (unlikely(!tracer_enabled || next != wakeup_task))
goto out_unlock;
- trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
+ trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
/*
* usecs conversion is slow so we try to delay the conversion
@@ -174,39 +170,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
t0 = nsecs_to_usecs(T0);
t1 = nsecs_to_usecs(T1);
- update_max_tr(tr, wakeup_task, wakeup_cpu);
+ update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
out_unlock:
- __wakeup_reset(tr);
+ __wakeup_reset(wakeup_trace);
__raw_spin_unlock(&wakeup_lock);
local_irq_restore(flags);
out:
- atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct task_struct *prev;
- struct task_struct *next;
- struct rq *__rq;
-
- /* skip prev_pid %d next_pid %d prev_state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- __rq = va_arg(*args, typeof(__rq));
- prev = va_arg(*args, typeof(prev));
- next = va_arg(*args, typeof(next));
-
- tracing_record_cmdline(prev);
-
- /*
- * If tracer_switch_func only points to the local
- * switch func, it still needs the ptr passed to it.
- */
- wakeup_sched_switch(probe_data, __rq, prev, next);
+ atomic_dec(&wakeup_trace->data[cpu]->disabled);
}
static void __wakeup_reset(struct trace_array *tr)
@@ -216,7 +187,7 @@ static void __wakeup_reset(struct trace_array *tr)
for_each_possible_cpu(cpu) {
data = tr->data[cpu];
- tracing_reset(data);
+ tracing_reset(tr, cpu);
}
wakeup_cpu = -1;
@@ -240,19 +211,26 @@ static void wakeup_reset(struct trace_array *tr)
}
static void
-wakeup_check_start(struct trace_array *tr, struct task_struct *p,
- struct task_struct *curr)
+probe_wakeup(struct rq *rq, struct task_struct *p, int success)
{
int cpu = smp_processor_id();
unsigned long flags;
long disabled;
+ int pc;
+
+ if (likely(!tracer_enabled))
+ return;
+
+ tracing_record_cmdline(p);
+ tracing_record_cmdline(current);
if (likely(!rt_task(p)) ||
p->prio >= wakeup_prio ||
- p->prio >= curr->prio)
+ p->prio >= current->prio)
return;
- disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+ pc = preempt_count();
+ disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
if (unlikely(disabled != 1))
goto out;
@@ -264,7 +242,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
goto out_locked;
/* reset the trace */
- __wakeup_reset(tr);
+ __wakeup_reset(wakeup_trace);
wakeup_cpu = task_cpu(p);
wakeup_prio = p->prio;
@@ -274,74 +252,43 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
local_save_flags(flags);
- tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
- trace_function(tr, tr->data[wakeup_cpu],
- CALLER_ADDR1, CALLER_ADDR2, flags);
+ wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
+ trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
+ CALLER_ADDR1, CALLER_ADDR2, flags, pc);
out_locked:
__raw_spin_unlock(&wakeup_lock);
out:
- atomic_dec(&tr->data[cpu]->disabled);
+ atomic_dec(&wakeup_trace->data[cpu]->disabled);
}
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct trace_array **ptr = probe_data;
- struct trace_array *tr = *ptr;
- struct task_struct *curr;
- struct task_struct *task;
- struct rq *__rq;
-
- if (likely(!tracer_enabled))
- return;
-
- /* Skip pid %d state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- /* now get the meat: "rq %p task %p rq->curr %p" */
- __rq = va_arg(*args, typeof(__rq));
- task = va_arg(*args, typeof(task));
- curr = va_arg(*args, typeof(curr));
-
- tracing_record_cmdline(task);
- tracing_record_cmdline(curr);
-
- wakeup_check_start(tr, task, curr);
-}
+/*
+ * save_tracer_enabled is used to save the state of the tracer_enabled
+ * variable when we disable it when we open a trace output file.
+ */
+static int save_tracer_enabled;
static void start_wakeup_tracer(struct trace_array *tr)
{
int ret;
- ret = marker_probe_register("kernel_sched_wakeup",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &wakeup_trace);
+ ret = register_trace_sched_wakeup(probe_wakeup);
if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup\n");
return;
}
- ret = marker_probe_register("kernel_sched_wakeup_new",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &wakeup_trace);
+ ret = register_trace_sched_wakeup_new(probe_wakeup);
if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup_new\n");
goto fail_deprobe;
}
- ret = marker_probe_register("kernel_sched_schedule",
- "prev_pid %d next_pid %d prev_state %ld "
- "## rq %p prev %p next %p",
- sched_switch_callback,
- &wakeup_trace);
+ ret = register_trace_sched_switch(probe_wakeup_sched_switch);
if (ret) {
- pr_info("sched trace: Couldn't add marker"
+ pr_info("sched trace: Couldn't activate tracepoint"
" probe to kernel_sched_schedule\n");
goto fail_deprobe_wake_new;
}
@@ -359,71 +306,71 @@ static void start_wakeup_tracer(struct trace_array *tr)
register_ftrace_function(&trace_ops);
- tracer_enabled = 1;
+ if (tracing_is_enabled()) {
+ tracer_enabled = 1;
+ save_tracer_enabled = 1;
+ } else {
+ tracer_enabled = 0;
+ save_tracer_enabled = 0;
+ }
return;
fail_deprobe_wake_new:
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &wakeup_trace);
+ unregister_trace_sched_wakeup_new(probe_wakeup);
fail_deprobe:
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &wakeup_trace);
+ unregister_trace_sched_wakeup(probe_wakeup);
}
static void stop_wakeup_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
+ save_tracer_enabled = 0;
unregister_ftrace_function(&trace_ops);
- marker_probe_unregister("kernel_sched_schedule",
- sched_switch_callback,
- &wakeup_trace);
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &wakeup_trace);
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &wakeup_trace);
+ unregister_trace_sched_switch(probe_wakeup_sched_switch);
+ unregister_trace_sched_wakeup_new(probe_wakeup);
+ unregister_trace_sched_wakeup(probe_wakeup);
}
-static void wakeup_tracer_init(struct trace_array *tr)
+static int wakeup_tracer_init(struct trace_array *tr)
{
wakeup_trace = tr;
-
- if (tr->ctrl)
- start_wakeup_tracer(tr);
+ start_wakeup_tracer(tr);
+ return 0;
}
static void wakeup_tracer_reset(struct trace_array *tr)
{
- if (tr->ctrl) {
- stop_wakeup_tracer(tr);
- /* make sure we put back any tasks we are tracing */
- wakeup_reset(tr);
- }
+ stop_wakeup_tracer(tr);
+ /* make sure we put back any tasks we are tracing */
+ wakeup_reset(tr);
}
-static void wakeup_tracer_ctrl_update(struct trace_array *tr)
+static void wakeup_tracer_start(struct trace_array *tr)
{
- if (tr->ctrl)
- start_wakeup_tracer(tr);
- else
- stop_wakeup_tracer(tr);
+ wakeup_reset(tr);
+ tracer_enabled = 1;
+ save_tracer_enabled = 1;
+}
+
+static void wakeup_tracer_stop(struct trace_array *tr)
+{
+ tracer_enabled = 0;
+ save_tracer_enabled = 0;
}
static void wakeup_tracer_open(struct trace_iterator *iter)
{
/* stop the trace while dumping */
- if (iter->tr->ctrl)
- stop_wakeup_tracer(iter->tr);
+ tracer_enabled = 0;
}
static void wakeup_tracer_close(struct trace_iterator *iter)
{
/* forget about any processes we were recording */
- if (iter->tr->ctrl)
- start_wakeup_tracer(iter->tr);
+ if (save_tracer_enabled) {
+ wakeup_reset(iter->tr);
+ tracer_enabled = 1;
+ }
}
static struct tracer wakeup_tracer __read_mostly =
@@ -431,9 +378,10 @@ static struct tracer wakeup_tracer __read_mostly =
.name = "wakeup",
.init = wakeup_tracer_init,
.reset = wakeup_tracer_reset,
+ .start = wakeup_tracer_start,
+ .stop = wakeup_tracer_stop,
.open = wakeup_tracer_open,
.close = wakeup_tracer_close,
- .ctrl_update = wakeup_tracer_ctrl_update,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_wakeup,
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 0911b7e..88c8eb7 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -9,65 +9,30 @@ static inline int trace_valid_entry(struct trace_entry *entry)
case TRACE_FN:
case TRACE_CTX:
case TRACE_WAKE:
+ case TRACE_CONT:
case TRACE_STACK:
+ case TRACE_PRINT:
case TRACE_SPECIAL:
+ case TRACE_BRANCH:
return 1;
}
return 0;
}
-static int
-trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
+static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
{
- struct trace_entry *entries;
- struct page *page;
- int idx = 0;
- int i;
+ struct ring_buffer_event *event;
+ struct trace_entry *entry;
- BUG_ON(list_empty(&data->trace_pages));
- page = list_entry(data->trace_pages.next, struct page, lru);
- entries = page_address(page);
+ while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
+ entry = ring_buffer_event_data(event);
- check_pages(data);
- if (head_page(data) != entries)
- goto failed;
-
- /*
- * The starting trace buffer always has valid elements,
- * if any element exists.
- */
- entries = head_page(data);
-
- for (i = 0; i < tr->entries; i++) {
-
- if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
+ if (!trace_valid_entry(entry)) {
printk(KERN_CONT ".. invalid entry %d ",
- entries[idx].type);
+ entry->type);
goto failed;
}
-
- idx++;
- if (idx >= ENTRIES_PER_PAGE) {
- page = virt_to_page(entries);
- if (page->lru.next == &data->trace_pages) {
- if (i != tr->entries - 1) {
- printk(KERN_CONT ".. entries buffer mismatch");
- goto failed;
- }
- } else {
- page = list_entry(page->lru.next, struct page, lru);
- entries = page_address(page);
- }
- idx = 0;
- }
}
-
- page = virt_to_page(entries);
- if (page->lru.next != &data->trace_pages) {
- printk(KERN_CONT ".. too many entries");
- goto failed;
- }
-
return 0;
failed:
@@ -87,20 +52,18 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
int cpu, ret = 0;
/* Don't allow flipping of max traces now */
- raw_local_irq_save(flags);
+ local_irq_save(flags);
__raw_spin_lock(&ftrace_max_lock);
- for_each_possible_cpu(cpu) {
- if (!head_page(tr->data[cpu]))
- continue;
- cnt += tr->data[cpu]->trace_idx;
+ cnt = ring_buffer_entries(tr->buffer);
- ret = trace_test_buffer_cpu(tr, tr->data[cpu]);
+ for_each_possible_cpu(cpu) {
+ ret = trace_test_buffer_cpu(tr, cpu);
if (ret)
break;
}
__raw_spin_unlock(&ftrace_max_lock);
- raw_local_irq_restore(flags);
+ local_irq_restore(flags);
if (count)
*count = cnt;
@@ -108,7 +71,12 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
return ret;
}
-#ifdef CONFIG_FTRACE
+static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
+{
+ printk(KERN_WARNING "Failed to init %s tracer, init returned %d\n",
+ trace->name, init_ret);
+}
+#ifdef CONFIG_FUNCTION_TRACER
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -120,11 +88,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
struct trace_array *tr,
int (*func)(void))
{
- unsigned long count;
- int ret;
int save_ftrace_enabled = ftrace_enabled;
int save_tracer_enabled = tracer_enabled;
+ unsigned long count;
char *func_name;
+ int ret;
/* The ftrace test PASSED */
printk(KERN_CONT "PASSED\n");
@@ -137,13 +105,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
/* passed in by parameter to fool gcc from optimizing */
func();
- /* update the records */
- ret = ftrace_force_update();
- if (ret) {
- printk(KERN_CONT ".. ftraced failed .. ");
- return ret;
- }
-
/*
* Some archs *cough*PowerPC*cough* add charachters to the
* start of the function names. We simply put a '*' to
@@ -155,8 +116,12 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
ftrace_set_filter(func_name, strlen(func_name), 1);
/* enable tracing */
- tr->ctrl = 1;
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ goto out;
+ }
+
/* Sleep for a 1/10 of a second */
msleep(100);
@@ -178,13 +143,13 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
msleep(100);
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
ftrace_enabled = 0;
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
+ tracing_start();
/* we should only have one item */
if (!ret && count != 1) {
@@ -192,6 +157,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
ret = -1;
goto out;
}
+
out:
ftrace_enabled = save_ftrace_enabled;
tracer_enabled = save_tracer_enabled;
@@ -212,37 +178,34 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
int
trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
{
- unsigned long count;
- int ret;
int save_ftrace_enabled = ftrace_enabled;
int save_tracer_enabled = tracer_enabled;
+ unsigned long count;
+ int ret;
/* make sure msleep has been recorded */
msleep(1);
- /* force the recorded functions to be traced */
- ret = ftrace_force_update();
- if (ret) {
- printk(KERN_CONT ".. ftraced failed .. ");
- return ret;
- }
-
/* start the tracing */
ftrace_enabled = 1;
tracer_enabled = 1;
- tr->ctrl = 1;
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ goto out;
+ }
+
/* Sleep for a 1/10 of a second */
msleep(100);
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
ftrace_enabled = 0;
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
+ tracing_start();
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
@@ -263,7 +226,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
return ret;
}
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_IRQSOFF_TRACER
int
@@ -274,8 +237,12 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
int ret;
/* start the tracing */
- tr->ctrl = 1;
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
/* reset the max latency */
tracing_max_latency = 0;
/* disable interrupts for a bit */
@@ -283,13 +250,13 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
udelay(100);
local_irq_enable();
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (!ret)
ret = trace_test_buffer(&max_tr, &count);
trace->reset(tr);
+ tracing_start();
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
@@ -310,9 +277,26 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
unsigned long count;
int ret;
+ /*
+ * Now that the big kernel lock is no longer preemptable,
+ * and this is called with the BKL held, it will always
+ * fail. If preemption is already disabled, simply
+ * pass the test. When the BKL is removed, or becomes
+ * preemptible again, we will once again test this,
+ * so keep it in.
+ */
+ if (preempt_count()) {
+ printk(KERN_CONT "can not test ... force ");
+ return 0;
+ }
+
/* start the tracing */
- tr->ctrl = 1;
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
/* reset the max latency */
tracing_max_latency = 0;
/* disable preemption for a bit */
@@ -320,13 +304,13 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
udelay(100);
preempt_enable();
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (!ret)
ret = trace_test_buffer(&max_tr, &count);
trace->reset(tr);
+ tracing_start();
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
@@ -347,9 +331,25 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
unsigned long count;
int ret;
+ /*
+ * Now that the big kernel lock is no longer preemptable,
+ * and this is called with the BKL held, it will always
+ * fail. If preemption is already disabled, simply
+ * pass the test. When the BKL is removed, or becomes
+ * preemptible again, we will once again test this,
+ * so keep it in.
+ */
+ if (preempt_count()) {
+ printk(KERN_CONT "can not test ... force ");
+ return 0;
+ }
+
/* start the tracing */
- tr->ctrl = 1;
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ goto out;
+ }
/* reset the max latency */
tracing_max_latency = 0;
@@ -363,27 +363,30 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
local_irq_enable();
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
- if (ret)
+ if (ret) {
+ tracing_start();
goto out;
+ }
ret = trace_test_buffer(&max_tr, &count);
- if (ret)
+ if (ret) {
+ tracing_start();
goto out;
+ }
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
+ tracing_start();
goto out;
}
/* do the test by disabling interrupts first this time */
tracing_max_latency = 0;
- tr->ctrl = 1;
- trace->ctrl_update(tr);
+ tracing_start();
preempt_disable();
local_irq_disable();
udelay(100);
@@ -392,8 +395,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
local_irq_enable();
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (ret)
@@ -409,12 +411,22 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
out:
trace->reset(tr);
+ tracing_start();
tracing_max_latency = save_max;
return ret;
}
#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
+#ifdef CONFIG_NOP_TRACER
+int
+trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
+{
+ /* What could possibly go wrong? */
+ return 0;
+}
+#endif
+
#ifdef CONFIG_SCHED_TRACER
static int trace_wakeup_test_thread(void *data)
{
@@ -465,8 +477,12 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
wait_for_completion(&isrt);
/* start the tracing */
- tr->ctrl = 1;
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
/* reset the max latency */
tracing_max_latency = 0;
@@ -486,9 +502,11 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
wake_up_process(p);
+ /* give a little time to let the thread wake up */
+ msleep(100);
+
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (!ret)
@@ -496,6 +514,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
trace->reset(tr);
+ tracing_start();
tracing_max_latency = save_max;
@@ -519,16 +538,20 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
int ret;
/* start the tracing */
- tr->ctrl = 1;
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
/* Sleep for a 1/10 of a second */
msleep(100);
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
+ tracing_start();
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
@@ -547,17 +570,48 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
int ret;
/* start the tracing */
- tr->ctrl = 1;
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return 0;
+ }
+
/* Sleep for a 1/10 of a second */
msleep(100);
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
+ tracing_start();
return ret;
}
#endif /* CONFIG_SYSPROF_TRACER */
+
+#ifdef CONFIG_BRANCH_TRACER
+int
+trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
+ /* Sleep for a 1/10 of a second */
+ msleep(100);
+ /* stop the tracing. */
+ tracing_stop();
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+ trace->reset(tr);
+ tracing_start();
+
+ return ret;
+}
+#endif /* CONFIG_BRANCH_TRACER */
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
new file mode 100644
index 0000000..d0871bc
--- /dev/null
+++ b/kernel/trace/trace_stack.c
@@ -0,0 +1,360 @@
+/*
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <linux/stacktrace.h>
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include "trace.h"
+
+#define STACK_TRACE_ENTRIES 500
+
+static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
+ { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
+static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
+
+static struct stack_trace max_stack_trace = {
+ .max_entries = STACK_TRACE_ENTRIES,
+ .entries = stack_dump_trace,
+};
+
+static unsigned long max_stack_size;
+static raw_spinlock_t max_stack_lock =
+ (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+static int stack_trace_disabled __read_mostly;
+static DEFINE_PER_CPU(int, trace_active);
+static DEFINE_MUTEX(stack_sysctl_mutex);
+
+int stack_tracer_enabled;
+static int last_stack_tracer_enabled;
+
+static inline void check_stack(void)
+{
+ unsigned long this_size, flags;
+ unsigned long *p, *top, *start;
+ int i;
+
+ this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
+ this_size = THREAD_SIZE - this_size;
+
+ if (this_size <= max_stack_size)
+ return;
+
+ /* we do not handle interrupt stacks yet */
+ if (!object_is_on_stack(&this_size))
+ return;
+
+ local_irq_save(flags);
+ __raw_spin_lock(&max_stack_lock);
+
+ /* a race could have already updated it */
+ if (this_size <= max_stack_size)
+ goto out;
+
+ max_stack_size = this_size;
+
+ max_stack_trace.nr_entries = 0;
+ max_stack_trace.skip = 3;
+
+ save_stack_trace(&max_stack_trace);
+
+ /*
+ * Now find where in the stack these are.
+ */
+ i = 0;
+ start = &this_size;
+ top = (unsigned long *)
+ (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
+
+ /*
+ * Loop through all the entries. One of the entries may
+ * for some reason be missed on the stack, so we may
+ * have to account for them. If they are all there, this
+ * loop will only happen once. This code only takes place
+ * on a new max, so it is far from a fast path.
+ */
+ while (i < max_stack_trace.nr_entries) {
+ int found = 0;
+
+ stack_dump_index[i] = this_size;
+ p = start;
+
+ for (; p < top && i < max_stack_trace.nr_entries; p++) {
+ if (*p == stack_dump_trace[i]) {
+ this_size = stack_dump_index[i++] =
+ (top - p) * sizeof(unsigned long);
+ found = 1;
+ /* Start the search from here */
+ start = p + 1;
+ }
+ }
+
+ if (!found)
+ i++;
+ }
+
+ out:
+ __raw_spin_unlock(&max_stack_lock);
+ local_irq_restore(flags);
+}
+
+static void
+stack_trace_call(unsigned long ip, unsigned long parent_ip)
+{
+ int cpu, resched;
+
+ if (unlikely(!ftrace_enabled || stack_trace_disabled))
+ return;
+
+ resched = ftrace_preempt_disable();
+
+ cpu = raw_smp_processor_id();
+ /* no atomic needed, we only modify this variable by this cpu */
+ if (per_cpu(trace_active, cpu)++ != 0)
+ goto out;
+
+ check_stack();
+
+ out:
+ per_cpu(trace_active, cpu)--;
+ /* prevent recursion in schedule */
+ ftrace_preempt_enable(resched);
+}
+
+static struct ftrace_ops trace_ops __read_mostly =
+{
+ .func = stack_trace_call,
+};
+
+static ssize_t
+stack_max_size_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ unsigned long *ptr = filp->private_data;
+ char buf[64];
+ int r;
+
+ r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
+ if (r > sizeof(buf))
+ r = sizeof(buf);
+ return simple_read_from_buffer(ubuf, count, ppos, buf, r);
+}
+
+static ssize_t
+stack_max_size_write(struct file *filp, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ long *ptr = filp->private_data;
+ unsigned long val, flags;
+ char buf[64];
+ int ret;
+
+ if (count >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, count))
+ return -EFAULT;
+
+ buf[count] = 0;
+
+ ret = strict_strtoul(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ local_irq_save(flags);
+ __raw_spin_lock(&max_stack_lock);
+ *ptr = val;
+ __raw_spin_unlock(&max_stack_lock);
+ local_irq_restore(flags);
+
+ return count;
+}
+
+static const struct file_operations stack_max_size_fops = {
+ .open = tracing_open_generic,
+ .read = stack_max_size_read,
+ .write = stack_max_size_write,
+};
+
+static void *
+t_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ long i;
+
+ (*pos)++;
+
+ if (v == SEQ_START_TOKEN)
+ i = 0;
+ else {
+ i = *(long *)v;
+ i++;
+ }
+
+ if (i >= max_stack_trace.nr_entries ||
+ stack_dump_trace[i] == ULONG_MAX)
+ return NULL;
+
+ m->private = (void *)i;
+
+ return &m->private;
+}
+
+static void *t_start(struct seq_file *m, loff_t *pos)
+{
+ void *t = SEQ_START_TOKEN;
+ loff_t l = 0;
+
+ local_irq_disable();
+ __raw_spin_lock(&max_stack_lock);
+
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+
+ for (; t && l < *pos; t = t_next(m, t, &l))
+ ;
+
+ return t;
+}
+
+static void t_stop(struct seq_file *m, void *p)
+{
+ __raw_spin_unlock(&max_stack_lock);
+ local_irq_enable();
+}
+
+static int trace_lookup_stack(struct seq_file *m, long i)
+{
+ unsigned long addr = stack_dump_trace[i];
+#ifdef CONFIG_KALLSYMS
+ char str[KSYM_SYMBOL_LEN];
+
+ sprint_symbol(str, addr);
+
+ return seq_printf(m, "%s\n", str);
+#else
+ return seq_printf(m, "%p\n", (void*)addr);
+#endif
+}
+
+static int t_show(struct seq_file *m, void *v)
+{
+ long i;
+ int size;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(m, " Depth Size Location"
+ " (%d entries)\n"
+ " ----- ---- --------\n",
+ max_stack_trace.nr_entries);
+ return 0;
+ }
+
+ i = *(long *)v;
+
+ if (i >= max_stack_trace.nr_entries ||
+ stack_dump_trace[i] == ULONG_MAX)
+ return 0;
+
+ if (i+1 == max_stack_trace.nr_entries ||
+ stack_dump_trace[i+1] == ULONG_MAX)
+ size = stack_dump_index[i];
+ else
+ size = stack_dump_index[i] - stack_dump_index[i+1];
+
+ seq_printf(m, "%3ld) %8d %5d ", i, stack_dump_index[i], size);
+
+ trace_lookup_stack(m, i);
+
+ return 0;
+}
+
+static const struct seq_operations stack_trace_seq_ops = {
+ .start = t_start,
+ .next = t_next,
+ .stop = t_stop,
+ .show = t_show,
+};
+
+static int stack_trace_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ ret = seq_open(file, &stack_trace_seq_ops);
+
+ return ret;
+}
+
+static const struct file_operations stack_trace_fops = {
+ .open = stack_trace_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+};
+
+int
+stack_trace_sysctl(struct ctl_table *table, int write,
+ struct file *file, void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret;
+
+ mutex_lock(&stack_sysctl_mutex);
+
+ ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
+
+ if (ret || !write ||
+ (last_stack_tracer_enabled == stack_tracer_enabled))
+ goto out;
+
+ last_stack_tracer_enabled = stack_tracer_enabled;
+
+ if (stack_tracer_enabled)
+ register_ftrace_function(&trace_ops);
+ else
+ unregister_ftrace_function(&trace_ops);
+
+ out:
+ mutex_unlock(&stack_sysctl_mutex);
+ return ret;
+}
+
+static __init int enable_stacktrace(char *str)
+{
+ stack_tracer_enabled = 1;
+ last_stack_tracer_enabled = 1;
+ return 1;
+}
+__setup("stacktrace", enable_stacktrace);
+
+static __init int stack_trace_init(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+
+ entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
+ &max_stack_size, &stack_max_size_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'stack_max_size' entry\n");
+
+ entry = debugfs_create_file("stack_trace", 0444, d_tracer,
+ NULL, &stack_trace_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'stack_trace' entry\n");
+
+ if (stack_tracer_enabled)
+ register_ftrace_function(&trace_ops);
+
+ return 0;
+}
+
+device_initcall(stack_trace_init);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index db58fb6..a5779bd 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -202,7 +202,6 @@ static void start_stack_timer(int cpu)
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = stack_trace_timer_fn;
- hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
}
@@ -234,20 +233,10 @@ static void stop_stack_timers(void)
stop_stack_timer(cpu);
}
-static void stack_reset(struct trace_array *tr)
-{
- int cpu;
-
- tr->time_start = ftrace_now(tr->cpu);
-
- for_each_online_cpu(cpu)
- tracing_reset(tr->data[cpu]);
-}
-
static void start_stack_trace(struct trace_array *tr)
{
mutex_lock(&sample_timer_lock);
- stack_reset(tr);
+ tracing_reset_online_cpus(tr);
start_stack_timers();
tracer_enabled = 1;
mutex_unlock(&sample_timer_lock);
@@ -261,27 +250,17 @@ static void stop_stack_trace(struct trace_array *tr)
mutex_unlock(&sample_timer_lock);
}
-static void stack_trace_init(struct trace_array *tr)
+static int stack_trace_init(struct trace_array *tr)
{
sysprof_trace = tr;
- if (tr->ctrl)
- start_stack_trace(tr);
+ start_stack_trace(tr);
+ return 0;
}
static void stack_trace_reset(struct trace_array *tr)
{
- if (tr->ctrl)
- stop_stack_trace(tr);
-}
-
-static void stack_trace_ctrl_update(struct trace_array *tr)
-{
- /* When starting a new trace, reset the buffers */
- if (tr->ctrl)
- start_stack_trace(tr);
- else
- stop_stack_trace(tr);
+ stop_stack_trace(tr);
}
static struct tracer stack_trace __read_mostly =
@@ -289,7 +268,6 @@ static struct tracer stack_trace __read_mostly =
.name = "sysprof",
.init = stack_trace_init,
.reset = stack_trace_reset,
- .ctrl_update = stack_trace_ctrl_update,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_sysprof,
#endif
OpenPOWER on IntegriCloud