From bd3bff9e20f454b242d979ec2f9a4dca0d5fa06f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:41 +0200 Subject: sched: add latency tracer callbacks to the scheduler add 3 lightweight callbacks to the tracer backend. zero impact if tracing is turned off. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sched.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index cfa222a..463dcdb 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2467,6 +2467,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) out_activate: #endif /* CONFIG_SMP */ + ftrace_wake_up_task(p, rq->curr); schedstat_inc(p, se.nr_wakeups); if (sync) schedstat_inc(p, se.nr_wakeups_sync); @@ -2611,6 +2612,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->sched_class->task_new(rq, p); inc_nr_running(rq); } + ftrace_wake_up_new_task(p, rq->curr); check_preempt_curr(rq, p); #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) @@ -2783,6 +2785,7 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm, *oldmm; prepare_task_switch(rq, prev, next); + ftrace_ctx_switch(prev, next); mm = next->mm; oldmm = prev->active_mm; /* -- cgit v1.1 From 7c731e0a495e25e79dc1e9e68772a67a55721a65 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:41 +0200 Subject: ftrace: make the task state char-string visible to all The tracer wants to be able to convert the state number into a user visible character. This patch pulls that conversion string out the scheduler into the header. This way if it were to ever change, other parts of the kernel will know. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 463dcdb..73e6008 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5729,7 +5729,7 @@ out_unlock: return retval; } -static const char stat_nam[] = "RSDTtZX"; +static const char stat_nam[] = TASK_STATE_TO_CHAR_STR; void sched_show_task(struct task_struct *p) { -- cgit v1.1 From 6cd8a4bb2f97527a9ceb30bc77ea4e959c6a95e3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: trace preempt off critical timings Add preempt off timings. A lot of kernel core code is taken from the RT patch latency trace that was written by Ingo Molnar. This adds "preemptoff" and "preemptirqsoff" to /debugfs/tracing/available_tracers Now instead of just tracing irqs off, preemption off can be selected to be recorded. When this is selected, it shares the same files as irqs off timings. One can either trace preemption off, irqs off, or one or the other off. By echoing "preemptoff" into /debugfs/tracing/current_tracer, recording of preempt off only is performed. "irqsoff" will only record the time irqs are disabled, but "preemptirqsoff" will take the total time irqs or preemption are disabled. Runtime switching of these options is now supported by simpling echoing in the appropriate trace name into /debugfs/tracing/current_tracer. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sched.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 73e6008..328494e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include @@ -4365,26 +4366,44 @@ void scheduler_tick(void) #endif } -#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) +#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ + defined(CONFIG_PREEMPT_TRACER)) + +static inline unsigned long get_parent_ip(unsigned long addr) +{ + if (in_lock_functions(addr)) { + addr = CALLER_ADDR2; + if (in_lock_functions(addr)) + addr = CALLER_ADDR3; + } + return addr; +} void __kprobes add_preempt_count(int val) { +#ifdef CONFIG_DEBUG_PREEMPT /* * Underflow? */ if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) return; +#endif preempt_count() += val; +#ifdef CONFIG_DEBUG_PREEMPT /* * Spinlock count overflowing soon? */ DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK - 10); +#endif + if (preempt_count() == val) + trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); } EXPORT_SYMBOL(add_preempt_count); void __kprobes sub_preempt_count(int val) { +#ifdef CONFIG_DEBUG_PREEMPT /* * Underflow? */ @@ -4396,7 +4415,10 @@ void __kprobes sub_preempt_count(int val) if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK))) return; +#endif + if (preempt_count() == val) + trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); preempt_count() -= val; } EXPORT_SYMBOL(sub_preempt_count); -- cgit v1.1 From 8ac0fca4ccb355ce50471d7aa3f10f5900b28b95 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: sched tracer fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 328494e..53ab117 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2613,7 +2613,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->sched_class->task_new(rq, p); inc_nr_running(rq); } - ftrace_wake_up_new_task(p, rq->curr); + ftrace_wake_up_task(p, rq->curr); check_preempt_curr(rq, p); #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) -- cgit v1.1 From 4e65551905fb0300ae7e667cbaa41ee2e3f29a13 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: sched tracer, trace full rbtree Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sched.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 53ab117..b9208a0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2394,6 +2394,35 @@ static int sched_balance_self(int cpu, int flag) #endif /* CONFIG_SMP */ +#ifdef CONFIG_CONTEXT_SWITCH_TRACER + +void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) +{ + struct sched_entity *se; + struct task_struct *p; + struct rb_node *curr; + struct rq *rq = __rq; + + curr = first_fair(&rq->cfs); + if (!curr) + return; + + while (curr) { + se = rb_entry(curr, struct sched_entity, run_node); + if (!entity_is_task(se)) + continue; + + p = task_of(se); + + __trace_special(__tr, __data, + p->pid, p->se.vruntime, p->se.sum_exec_runtime); + + curr = rb_next(curr); + } +} + +#endif + /*** * try_to_wake_up - wake up a thread * @p: the to-be-woken-up thread @@ -2468,7 +2497,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) out_activate: #endif /* CONFIG_SMP */ - ftrace_wake_up_task(p, rq->curr); + ftrace_wake_up_task(rq, p, rq->curr); schedstat_inc(p, se.nr_wakeups); if (sync) schedstat_inc(p, se.nr_wakeups_sync); @@ -2613,7 +2642,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->sched_class->task_new(rq, p); inc_nr_running(rq); } - ftrace_wake_up_task(p, rq->curr); + ftrace_wake_up_task(rq, p, rq->curr); check_preempt_curr(rq, p); #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) @@ -2786,7 +2815,7 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm, *oldmm; prepare_task_switch(rq, prev, next); - ftrace_ctx_switch(prev, next); + ftrace_ctx_switch(rq, prev, next); mm = next->mm; oldmm = prev->active_mm; /* -- cgit v1.1 From 24cd5d111e8c713e62cda7ca1d01232402e3d3c9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: trace curr/next tasks Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sched.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index b9208a0..673b588 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2398,8 +2398,8 @@ static int sched_balance_self(int cpu, int flag) void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { - struct sched_entity *se; struct task_struct *p; + struct sched_entity *se; struct rb_node *curr; struct rq *rq = __rq; @@ -2407,6 +2407,17 @@ void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) if (!curr) return; + if (rq->cfs.curr) { + p = task_of(rq->cfs.curr); + __trace_special(__tr, __data, + p->pid, p->se.vruntime, p->se.sum_exec_runtime); + } + if (rq->cfs.next) { + p = task_of(rq->cfs.next); + __trace_special(__tr, __data, + p->pid, p->se.vruntime, p->se.sum_exec_runtime); + } + while (curr) { se = rb_entry(curr, struct sched_entity, run_node); if (!entity_is_task(se)) -- cgit v1.1 From 017730c11241e26577673eb9d957cfc66172ea91 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: fix wakeups Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sched.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 673b588..9ca4a2e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -642,6 +642,24 @@ static inline void update_rq_clock(struct rq *rq) # define const_debug static const #endif +/** + * runqueue_is_locked + * + * Returns true if the current cpu runqueue is locked. + * This interface allows printk to be called with the runqueue lock + * held and know whether or not it is OK to wake up the klogd. + */ +int runqueue_is_locked(void) +{ + int cpu = get_cpu(); + struct rq *rq = cpu_rq(cpu); + int ret; + + ret = spin_is_locked(&rq->lock); + put_cpu(); + return ret; +} + /* * Debugging: various feature bits */ -- cgit v1.1 From 36fc25a9f48deacd8aa08cd2d1c186a4e412604f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: sched tree fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sched.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 9ca4a2e..3bc7c53 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2414,6 +2414,23 @@ static int sched_balance_self(int cpu, int flag) #ifdef CONFIG_CONTEXT_SWITCH_TRACER +void ftrace_task(struct task_struct *p, void *__tr, void *__data) +{ +#if 0 + /* + * trace timeline tree + */ + __trace_special(__tr, __data, + p->pid, p->se.vruntime, p->se.sum_exec_runtime); +#else + /* + * trace balance metrics + */ + __trace_special(__tr, __data, + p->pid, p->se.avg_overlap, 0); +#endif +} + void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { struct task_struct *p; @@ -2421,32 +2438,22 @@ void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) struct rb_node *curr; struct rq *rq = __rq; - curr = first_fair(&rq->cfs); - if (!curr) - return; - if (rq->cfs.curr) { p = task_of(rq->cfs.curr); - __trace_special(__tr, __data, - p->pid, p->se.vruntime, p->se.sum_exec_runtime); + ftrace_task(p, __tr, __data); } if (rq->cfs.next) { p = task_of(rq->cfs.next); - __trace_special(__tr, __data, - p->pid, p->se.vruntime, p->se.sum_exec_runtime); + ftrace_task(p, __tr, __data); } - while (curr) { + for (curr = first_fair(&rq->cfs); curr; curr = rb_next(curr)) { se = rb_entry(curr, struct sched_entity, run_node); if (!entity_is_task(se)) continue; p = task_of(se); - - __trace_special(__tr, __data, - p->pid, p->se.vruntime, p->se.sum_exec_runtime); - - curr = rb_next(curr); + ftrace_task(p, __tr, __data); } } -- cgit v1.1 From 5429db2d26a59903c81a4f6c6dae7eb9daaea5fc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: fix wakeup callback Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 3bc7c53..1ec3fb2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2533,7 +2533,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) out_activate: #endif /* CONFIG_SMP */ - ftrace_wake_up_task(rq, p, rq->curr); schedstat_inc(p, se.nr_wakeups); if (sync) schedstat_inc(p, se.nr_wakeups_sync); @@ -2548,6 +2547,7 @@ out_activate: success = 1; out_running: + ftrace_wake_up_task(rq, p, rq->curr); check_preempt_curr(rq, p); p->state = TASK_RUNNING; -- cgit v1.1 From 4d9493c90f8e6e1b164aede3814010a290161abb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:54 +0200 Subject: ftrace: remove add-hoc code Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sched.c | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 1ec3fb2..ad95cca 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2412,53 +2412,6 @@ static int sched_balance_self(int cpu, int flag) #endif /* CONFIG_SMP */ -#ifdef CONFIG_CONTEXT_SWITCH_TRACER - -void ftrace_task(struct task_struct *p, void *__tr, void *__data) -{ -#if 0 - /* - * trace timeline tree - */ - __trace_special(__tr, __data, - p->pid, p->se.vruntime, p->se.sum_exec_runtime); -#else - /* - * trace balance metrics - */ - __trace_special(__tr, __data, - p->pid, p->se.avg_overlap, 0); -#endif -} - -void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) -{ - struct task_struct *p; - struct sched_entity *se; - struct rb_node *curr; - struct rq *rq = __rq; - - if (rq->cfs.curr) { - p = task_of(rq->cfs.curr); - ftrace_task(p, __tr, __data); - } - if (rq->cfs.next) { - p = task_of(rq->cfs.next); - ftrace_task(p, __tr, __data); - } - - for (curr = first_fair(&rq->cfs); curr; curr = rb_next(curr)) { - se = rb_entry(curr, struct sched_entity, run_node); - if (!entity_is_task(se)) - continue; - - p = task_of(se); - ftrace_task(p, __tr, __data); - } -} - -#endif - /*** * try_to_wake_up - wake up a thread * @p: the to-be-woken-up thread -- cgit v1.1 From 5b82a1b08a00b2adca3d9dd9777efff40b7aaaa1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:10 +0200 Subject: Port ftrace to markers Porting ftrace to the marker infrastructure. Don't need to chain to the wakeup tracer from the sched tracer, because markers support multiple probes connected. Signed-off-by: Mathieu Desnoyers CC: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sched.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index ad95cca..e2e985e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2500,7 +2500,9 @@ out_activate: success = 1; out_running: - ftrace_wake_up_task(rq, p, rq->curr); + trace_mark(kernel_sched_wakeup, + "pid %d state %ld ## rq %p task %p rq->curr %p", + p->pid, p->state, rq, p, rq->curr); check_preempt_curr(rq, p); p->state = TASK_RUNNING; @@ -2631,7 +2633,9 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->sched_class->task_new(rq, p); inc_nr_running(rq); } - ftrace_wake_up_task(rq, p, rq->curr); + trace_mark(kernel_sched_wakeup_new, + "pid %d state %ld ## rq %p task %p rq->curr %p", + p->pid, p->state, rq, p, rq->curr); check_preempt_curr(rq, p); #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) @@ -2804,7 +2808,11 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm, *oldmm; prepare_task_switch(rq, prev, next); - ftrace_ctx_switch(rq, prev, next); + trace_mark(kernel_sched_schedule, + "prev_pid %d next_pid %d prev_state %ld " + "## rq %p prev %p next %p", + prev->pid, next->pid, prev->state, + rq, prev, next); mm = next->mm; oldmm = prev->active_mm; /* -- cgit v1.1 From c6531cce6e6e4b99bcda46b6268d6f2d9e30aea4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:14 +0200 Subject: sched: do not trace sched_clock The tracer uses sched_clock, so do not trace it. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sched.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index e2e985e..6590a82 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -884,12 +884,12 @@ static unsigned long long __cpu_clock(int cpu) * For kernel-internal use: high-speed (but slightly incorrect) per-cpu * clock constructed from sched_clock(): */ -unsigned long long cpu_clock(int cpu) +unsigned long long notrace cpu_clock(int cpu) { unsigned long long prev_cpu_time, time, delta_time; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); prev_cpu_time = per_cpu(prev_cpu_time, cpu); time = __cpu_clock(cpu) + per_cpu(time_offset, cpu); delta_time = time-prev_cpu_time; @@ -898,7 +898,7 @@ unsigned long long cpu_clock(int cpu) time = __sync_cpu_clock(time, cpu); per_cpu(prev_cpu_time, cpu) = time; } - local_irq_restore(flags); + raw_local_irq_restore(flags); return time; } -- cgit v1.1 From 040ec23d07f95285e9777a85cda29cb339a3065b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 9 Jun 2008 01:45:29 -0700 Subject: sched: sched_clock() lockdep fix Sitsofe Wheeler bisected the following commit to cause a lockdep to warn about itself and turn itself off: > commit c6531cce6e6e4b99bcda46b6268d6f2d9e30aea4 > Author: Ingo Molnar > Date: Mon May 12 21:21:14 2008 +0200 > > sched: do not trace sched_clock do not use raw irq flags in cpu_clock() as it causes lockdep to lose track of the true state of the IRQ flag. Reported-and-bisected-by: Sitsofe Wheeler Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton --- kernel/sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 6590a82..b8c9fe6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -889,7 +889,7 @@ unsigned long long notrace cpu_clock(int cpu) unsigned long long prev_cpu_time, time, delta_time; unsigned long flags; - raw_local_irq_save(flags); + local_irq_save(flags); prev_cpu_time = per_cpu(prev_cpu_time, cpu); time = __cpu_clock(cpu) + per_cpu(time_offset, cpu); delta_time = time-prev_cpu_time; @@ -898,7 +898,7 @@ unsigned long long notrace cpu_clock(int cpu) time = __sync_cpu_clock(time, cpu); per_cpu(prev_cpu_time, cpu) = time; } - raw_local_irq_restore(flags); + local_irq_restore(flags); return time; } -- cgit v1.1