From 45afb1734fa6323a8ba08bd6c392ee227df67dde Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Sat, 7 Jul 2012 16:49:02 +0900
Subject: sched: Use task_rq_unlock() in __sched_setscheduler()

It seems there's no specific reason to open-code it.  I guess
commit 0122ec5b02f76 ("sched: Add p->pi_lock to task_rq_lock()")
simply missed it.  Let's be consistent with others.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1341647342-6742-1-git-send-email-namhyung@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'kernel/sched')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5d011ef..2cb4e77 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4340,9 +4340,7 @@ recheck:
 	 */
 	if (unlikely(policy == p->policy && (!rt_policy(policy) ||
 			param->sched_priority == p->rt_priority))) {
-
-		__task_rq_unlock(rq);
-		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+		task_rq_unlock(rq, p, &flags);
 		return 0;
 	}
 
-- 
cgit v1.1


From 014acbf0d5c8445e0ff88ae60edd676dd9cc461c Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue0@gmail.com>
Date: Thu, 12 Jul 2012 15:03:42 +0800
Subject: sched: Fix minor code style issues

Delete redudant spaces between type name and data name or operators.

Signed-off-by: Ying Xue <ying.xue0@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1342076622-6606-1-git-send-email-ying.xue0@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/cpupri.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'kernel/sched')

diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index d72586f..23aa789 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -65,8 +65,8 @@ static int convert_prio(int prio)
 int cpupri_find(struct cpupri *cp, struct task_struct *p,
 		struct cpumask *lowest_mask)
 {
-	int                  idx      = 0;
-	int                  task_pri = convert_prio(p->prio);
+	int idx = 0;
+	int task_pri = convert_prio(p->prio);
 
 	if (task_pri >= MAX_RT_PRIO)
 		return 0;
@@ -137,9 +137,9 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
  */
 void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 {
-	int                 *currpri = &cp->cpu_to_pri[cpu];
-	int                  oldpri  = *currpri;
-	int                  do_mb = 0;
+	int *currpri = &cp->cpu_to_pri[cpu];
+	int oldpri = *currpri;
+	int do_mb = 0;
 
 	newpri = convert_prio(newpri);
 
-- 
cgit v1.1


From b9403130a5350fca59a50ed11c198cb8c7e54119 Mon Sep 17 00:00:00 2001
From: Michael Wang <wangyun@linux.vnet.ibm.com>
Date: Thu, 12 Jul 2012 16:10:13 +0800
Subject: sched/cleanups: Add load balance cpumask pointer to 'struct lb_env'

With this patch struct ld_env will have a pointer of the load balancing
cpumask and we don't need to pass a cpumask around anymore.

Signed-off-by: Michael Wang <wangyun@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/4FFE8665.3080705@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

(limited to 'kernel/sched')

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 22321db..d0cc03b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3069,6 +3069,9 @@ struct lb_env {
 	int			new_dst_cpu;
 	enum cpu_idle_type	idle;
 	long			imbalance;
+	/* The set of CPUs under consideration for load-balancing */
+	struct cpumask		*cpus;
+
 	unsigned int		flags;
 
 	unsigned int		loop;
@@ -3653,8 +3656,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
  */
 static inline void update_sg_lb_stats(struct lb_env *env,
 			struct sched_group *group, int load_idx,
-			int local_group, const struct cpumask *cpus,
-			int *balance, struct sg_lb_stats *sgs)
+			int local_group, int *balance, struct sg_lb_stats *sgs)
 {
 	unsigned long nr_running, max_nr_running, min_nr_running;
 	unsigned long load, max_cpu_load, min_cpu_load;
@@ -3671,7 +3673,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 	max_nr_running = 0;
 	min_nr_running = ~0UL;
 
-	for_each_cpu_and(i, sched_group_cpus(group), cpus) {
+	for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
 		struct rq *rq = cpu_rq(i);
 
 		nr_running = rq->nr_running;
@@ -3800,8 +3802,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
  * @sds: variable to hold the statistics for this sched_domain.
  */
 static inline void update_sd_lb_stats(struct lb_env *env,
-				      const struct cpumask *cpus,
-				      int *balance, struct sd_lb_stats *sds)
+					int *balance, struct sd_lb_stats *sds)
 {
 	struct sched_domain *child = env->sd->child;
 	struct sched_group *sg = env->sd->groups;
@@ -3818,8 +3819,7 @@ static inline void update_sd_lb_stats(struct lb_env *env,
 
 		local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg));
 		memset(&sgs, 0, sizeof(sgs));
-		update_sg_lb_stats(env, sg, load_idx, local_group,
-				   cpus, balance, &sgs);
+		update_sg_lb_stats(env, sg, load_idx, local_group, balance, &sgs);
 
 		if (local_group && !(*balance))
 			return;
@@ -4055,7 +4055,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
  * to restore balance.
  *
  * @env: The load balancing environment.
- * @cpus: The set of CPUs under consideration for load-balancing.
  * @balance: Pointer to a variable indicating if this_cpu
  *	is the appropriate cpu to perform load balancing at this_level.
  *
@@ -4065,7 +4064,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
  *		   put to idle by rebalancing its tasks onto our group.
  */
 static struct sched_group *
-find_busiest_group(struct lb_env *env, const struct cpumask *cpus, int *balance)
+find_busiest_group(struct lb_env *env, int *balance)
 {
 	struct sd_lb_stats sds;
 
@@ -4075,7 +4074,7 @@ find_busiest_group(struct lb_env *env, const struct cpumask *cpus, int *balance)
 	 * Compute the various statistics relavent for load balancing at
 	 * this level.
 	 */
-	update_sd_lb_stats(env, cpus, balance, &sds);
+	update_sd_lb_stats(env, balance, &sds);
 
 	/*
 	 * this_cpu is not the appropriate cpu to perform load balancing at
@@ -4155,8 +4154,7 @@ ret:
  * find_busiest_queue - find the busiest runqueue among the cpus in group.
  */
 static struct rq *find_busiest_queue(struct lb_env *env,
-				     struct sched_group *group,
-				     const struct cpumask *cpus)
+				     struct sched_group *group)
 {
 	struct rq *busiest = NULL, *rq;
 	unsigned long max_load = 0;
@@ -4171,7 +4169,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 		if (!capacity)
 			capacity = fix_small_capacity(env->sd, group);
 
-		if (!cpumask_test_cpu(i, cpus))
+		if (!cpumask_test_cpu(i, env->cpus))
 			continue;
 
 		rq = cpu_rq(i);
@@ -4252,6 +4250,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 		.dst_grpmask    = sched_group_cpus(sd->groups),
 		.idle		= idle,
 		.loop_break	= sched_nr_migrate_break,
+		.cpus		= cpus,
 	};
 
 	cpumask_copy(cpus, cpu_active_mask);
@@ -4260,7 +4259,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 	schedstat_inc(sd, lb_count[idle]);
 
 redo:
-	group = find_busiest_group(&env, cpus, balance);
+	group = find_busiest_group(&env, balance);
 
 	if (*balance == 0)
 		goto out_balanced;
@@ -4270,7 +4269,7 @@ redo:
 		goto out_balanced;
 	}
 
-	busiest = find_busiest_queue(&env, group, cpus);
+	busiest = find_busiest_queue(&env, group);
 	if (!busiest) {
 		schedstat_inc(sd, lb_nobusyq[idle]);
 		goto out_balanced;
-- 
cgit v1.1


From a35b6466aabb051568b844e8c63f87a356d3d129 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 8 Aug 2012 21:46:40 +0200
Subject: sched, cgroup: Reduce rq->lock hold times for large cgroup
 hierarchies

Peter Portante reported that for large cgroup hierarchies (and or on
large CPU counts) we get immense lock contention on rq->lock and stuff
stops working properly.

His workload was a ton of processes, each in their own cgroup,
everybody idling except for a sporadic wakeup once every so often.

It was found that:

  schedule()
    idle_balance()
      load_balance()
        local_irq_save()
        double_rq_lock()
        update_h_load()
          walk_tg_tree(tg_load_down)
            tg_load_down()

Results in an entire cgroup hierarchy walk under rq->lock for every
new-idle balance and since new-idle balance isn't throttled this
results in a lot of work while holding the rq->lock.

This patch does two things, it removes the work from under rq->lock
based on the good principle of race and pray which is widely employed
in the load-balancer as a whole. And secondly it throttles the
update_h_load() calculation to max once per jiffy.

I considered excluding update_h_load() for new-idle balance
all-together, but purely relying on regular balance passes to update
this data might not work out under some rare circumstances where the
new-idle busiest isn't the regular busiest for a while (unlikely, but
a nightmare to debug if someone hits it and suffers).

Cc: pjt@google.com
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Reported-by: Peter Portante <pportant@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-aaarrzfpnaam7pqrekofu8a6@git.kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched/fair.c  | 11 +++++++++--
 kernel/sched/sched.h |  6 +++++-
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'kernel/sched')

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d0cc03b..c219bf8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3387,6 +3387,14 @@ static int tg_load_down(struct task_group *tg, void *data)
 
 static void update_h_load(long cpu)
 {
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long now = jiffies;
+
+	if (rq->h_load_throttle == now)
+		return;
+
+	rq->h_load_throttle = now;
+
 	rcu_read_lock();
 	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
 	rcu_read_unlock();
@@ -4293,11 +4301,10 @@ redo:
 		env.src_rq    = busiest;
 		env.loop_max  = min(sysctl_sched_nr_migrate, busiest->nr_running);
 
+		update_h_load(env.src_cpu);
 more_balance:
 		local_irq_save(flags);
 		double_rq_lock(this_rq, busiest);
-		if (!env.loop)
-			update_h_load(env.src_cpu);
 
 		/*
 		 * cur_ld_moved - load moved in current iteration
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c35a1a7..531411b 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -374,7 +374,11 @@ struct rq {
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	/* list of leaf cfs_rq on this cpu: */
 	struct list_head leaf_cfs_rq_list;
-#endif
+#ifdef CONFIG_SMP
+	unsigned long h_load_throttle;
+#endif /* CONFIG_SMP */
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
 #ifdef CONFIG_RT_GROUP_SCHED
 	struct list_head leaf_rt_rq_list;
 #endif
-- 
cgit v1.1


From bea6832cc8c4a0a9a65dd17da6aaa657fe27bc3e Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 8 Aug 2012 11:27:15 +0200
Subject: sched: fix divide by zero at {thread_group,task}_times

On architectures where cputime_t is 64 bit type, is possible to trigger
divide by zero on do_div(temp, (__force u32) total) line, if total is a
non zero number but has lower 32 bit's zeroed. Removing casting is not
a good solution since some do_div() implementations do cast to u32
internally.

This problem can be triggered in practice on very long lived processes:

  PID: 2331   TASK: ffff880472814b00  CPU: 2   COMMAND: "oraagent.bin"
   #0 [ffff880472a51b70] machine_kexec at ffffffff8103214b
   #1 [ffff880472a51bd0] crash_kexec at ffffffff810b91c2
   #2 [ffff880472a51ca0] oops_end at ffffffff814f0b00
   #3 [ffff880472a51cd0] die at ffffffff8100f26b
   #4 [ffff880472a51d00] do_trap at ffffffff814f03f4
   #5 [ffff880472a51d60] do_divide_error at ffffffff8100cfff
   #6 [ffff880472a51e00] divide_error at ffffffff8100be7b
      [exception RIP: thread_group_times+0x56]
      RIP: ffffffff81056a16  RSP: ffff880472a51eb8  RFLAGS: 00010046
      RAX: bc3572c9fe12d194  RBX: ffff880874150800  RCX: 0000000110266fad
      RDX: 0000000000000000  RSI: ffff880472a51eb8  RDI: 001038ae7d9633dc
      RBP: ffff880472a51ef8   R8: 00000000b10a3a64   R9: ffff880874150800
      R10: 00007fcba27ab680  R11: 0000000000000202  R12: ffff880472a51f08
      R13: ffff880472a51f10  R14: 0000000000000000  R15: 0000000000000007
      ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
   #7 [ffff880472a51f00] do_sys_times at ffffffff8108845d
   #8 [ffff880472a51f40] sys_times at ffffffff81088524
   #9 [ffff880472a51f80] system_call_fastpath at ffffffff8100b0f2
      RIP: 0000003808caac3a  RSP: 00007fcba27ab6d8  RFLAGS: 00000202
      RAX: 0000000000000064  RBX: ffffffff8100b0f2  RCX: 0000000000000000
      RDX: 00007fcba27ab6e0  RSI: 000000000076d58e  RDI: 00007fcba27ab6e0
      RBP: 00007fcba27ab700   R8: 0000000000000020   R9: 000000000000091b
      R10: 00007fcba27ab680  R11: 0000000000000202  R12: 00007fff9ca41940
      R13: 0000000000000000  R14: 00007fcba27ac9c0  R15: 00007fff9ca41940
      ORIG_RAX: 0000000000000064  CS: 0033  SS: 002b

Cc: stable@vger.kernel.org
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120808092714.GA3580@redhat.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched/core.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

(limited to 'kernel/sched')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2cb4e77..e2c5841 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3142,6 +3142,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 # define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)
 #endif
 
+static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
+{
+	u64 temp = (__force u64) rtime;
+
+	temp *= (__force u64) utime;
+
+	if (sizeof(cputime_t) == 4)
+		temp = div_u64(temp, (__force u32) total);
+	else
+		temp = div64_u64(temp, (__force u64) total);
+
+	return (__force cputime_t) temp;
+}
+
 void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 	cputime_t rtime, utime = p->utime, total = utime + p->stime;
@@ -3151,13 +3165,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	 */
 	rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
 
-	if (total) {
-		u64 temp = (__force u64) rtime;
-
-		temp *= (__force u64) utime;
-		do_div(temp, (__force u32) total);
-		utime = (__force cputime_t) temp;
-	} else
+	if (total)
+		utime = scale_utime(utime, rtime, total);
+	else
 		utime = rtime;
 
 	/*
@@ -3184,13 +3194,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	total = cputime.utime + cputime.stime;
 	rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
 
-	if (total) {
-		u64 temp = (__force u64) rtime;
-
-		temp *= (__force u64) cputime.utime;
-		do_div(temp, (__force u32) total);
-		utime = (__force cputime_t) temp;
-	} else
+	if (total)
+		utime = scale_utime(cputime.utime, rtime, total);
+	else
 		utime = rtime;
 
 	sig->prev_utime = max(sig->prev_utime, utime);
-- 
cgit v1.1


From 35cf4e50b16331def6cfcbee11e49270b6db07f5 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Tue, 7 Aug 2012 05:00:13 +0200
Subject: sched,cgroup: Fix up task_groups list

With multiple instances of task_groups, for_each_rt_rq() is a noop,
no task groups having been added to the rt.c list instance.  This
renders __enable/disable_runtime() and print_rt_stats() noop, the
user (non) visible effect being that rt task groups are missing in
/proc/sched_debug.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: stable@kernel.org # v3.3+
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1344308413.6846.7.camel@marge.simpson.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched/core.c  | 1 +
 kernel/sched/sched.h | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel/sched')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e2c5841..6aa212f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7252,6 +7252,7 @@ int in_sched_functions(unsigned long addr)
 
 #ifdef CONFIG_CGROUP_SCHED
 struct task_group root_task_group;
+LIST_HEAD(task_groups);
 #endif
 
 DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 531411b..f6714d0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -80,7 +80,7 @@ extern struct mutex sched_domains_mutex;
 struct cfs_rq;
 struct rt_rq;
 
-static LIST_HEAD(task_groups);
+extern struct list_head task_groups;
 
 struct cfs_bandwidth {
 #ifdef CONFIG_CFS_BANDWIDTH
-- 
cgit v1.1


From e221d028bb08b47e624c5f0a31732c642db9d19a Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Tue, 7 Aug 2012 10:02:38 +0200
Subject: sched,rt: fix isolated CPUs leaving root_task_group indefinitely
 throttled

Root task group bandwidth replenishment must service all CPUs, regardless of
where the timer was last started, and regardless of the isolation mechanism,
lest 'Quoth the Raven, "Nevermore"' become rt scheduling policy.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1344326558.6968.25.camel@marge.simpson.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched/rt.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'kernel/sched')

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 573e1ca..944cb68 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -788,6 +788,19 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 	const struct cpumask *span;
 
 	span = sched_rt_period_mask();
+#ifdef CONFIG_RT_GROUP_SCHED
+	/*
+	 * FIXME: isolated CPUs should really leave the root task group,
+	 * whether they are isolcpus or were isolated via cpusets, lest
+	 * the timer run on a CPU which does not service all runqueues,
+	 * potentially leaving other CPUs indefinitely throttled.  If
+	 * isolation is really required, the user will turn the throttle
+	 * off to kill the perturbations it causes anyway.  Meanwhile,
+	 * this maintains functionality for boot and/or troubleshooting.
+	 */
+	if (rt_b == &root_task_group.rt_bandwidth)
+		span = cpu_online_mask;
+#endif
 	for_each_cpu(i, span) {
 		int enqueue = 0;
 		struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
-- 
cgit v1.1


From 8f6189684eb4e85e6c593cd710693f09c944450a Mon Sep 17 00:00:00 2001
From: Mike Galbraith <mgalbraith@suse.de>
Date: Sat, 4 Aug 2012 05:44:14 +0200
Subject: sched: Fix migration thread runtime bogosity

Make stop scheduler class do the same accounting as other classes,

Migration threads can be caught in the act while doing exec balancing,
leading to the below due to use of unmaintained ->se.exec_start.  The
load that triggered this particular instance was an apparently out of
control heavily threaded application that does system monitoring in
what equated to an exec bomb, with one of the VERY frequently migrated
tasks being ps.

%CPU   PID USER     CMD
99.3    45 root     [migration/10]
97.7    53 root     [migration/12]
97.0    57 root     [migration/13]
90.1    49 root     [migration/11]
89.6    65 root     [migration/15]
88.7    17 root     [migration/3]
80.4    37 root     [migration/8]
78.1    41 root     [migration/9]
44.2    13 root     [migration/2]

Signed-off-by: Mike Galbraith <mgalbraith@suse.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1344051854.6739.19.camel@marge.simpson.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched/stop_task.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

(limited to 'kernel/sched')

diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 7b386e8..da5eb5b 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -27,8 +27,10 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
 {
 	struct task_struct *stop = rq->stop;
 
-	if (stop && stop->on_rq)
+	if (stop && stop->on_rq) {
+		stop->se.exec_start = rq->clock_task;
 		return stop;
+	}
 
 	return NULL;
 }
@@ -52,6 +54,21 @@ static void yield_task_stop(struct rq *rq)
 
 static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
 {
+	struct task_struct *curr = rq->curr;
+	u64 delta_exec;
+
+	delta_exec = rq->clock_task - curr->se.exec_start;
+	if (unlikely((s64)delta_exec < 0))
+		delta_exec = 0;
+
+	schedstat_set(curr->se.statistics.exec_max,
+			max(curr->se.statistics.exec_max, delta_exec));
+
+	curr->se.sum_exec_runtime += delta_exec;
+	account_group_exec_runtime(curr, delta_exec);
+
+	curr->se.exec_start = rq->clock_task;
+	cpuacct_charge(curr, delta_exec);
 }
 
 static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
@@ -60,6 +77,9 @@ static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
 
 static void set_curr_task_stop(struct rq *rq)
 {
+	struct task_struct *stop = rq->stop;
+
+	stop->se.exec_start = rq->clock_task;
 }
 
 static void switched_to_stop(struct rq *rq, struct task_struct *p)
-- 
cgit v1.1