sched, cgroup: Optimize load_balance_fair()

Use for_each_leaf_cfs_rq() instead of list_for_each_entry_rcu(), this achieves that load_balance_fair() only iterates those task_groups that actually have tasks on busiest, and that we iterate bottom-up, trying to move light groups before the heavier ones. No idea if it will actually work out to be beneficial in practice, does anybody have a cgroup workload that might show a difference one way or the other? [ Also move update_h_load to sched_fair.c, loosing #ifdef-ery ] Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Reviewed-by: Paul Turner <pjt@google.com> Link: http://lkml.kernel.org/r/1310557009.2586.28.camel@twins Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Peter Zijlstra <a.p.zijlstra@chello.nl> 2011-07-13 13:09:25 +0200
committer: Ingo Molnar <mingo@elte.hu> 2011-07-21 18:01:46 +0200
commit: 9763b67fb9f3050c6da739105888327587c30c4d (patch)
tree: 822e6a5243c3d872f86d9c9b980896bc4cd8a491 /kernel/sched_fair.c
parent: 9598c82dcacadc3b9daa8170613fd054c6124d30 (diff)
download: op-kernel-dev-9763b67fb9f3050c6da739105888327587c30c4d.zip
op-kernel-dev-9763b67fb9f3050c6da739105888327587c30c4d.tar.gz
1 files changed, 35 insertions, 5 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6cdff84..180bcf1 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2232,11 +2232,43 @@ static void update_shares(int cpu)
 	struct rq *rq = cpu_rq(cpu);
 
 	rcu_read_lock();
+	/*
+	 * Iterates the task_group tree in a bottom up fashion, see
+	 * list_add_leaf_cfs_rq() for details.
+	 */
 	for_each_leaf_cfs_rq(rq, cfs_rq)
 		update_shares_cpu(cfs_rq->tg, cpu);
 	rcu_read_unlock();
 }
 
+/*
+ * Compute the cpu's hierarchical load factor for each task group.
+ * This needs to be done in a top-down fashion because the load of a child
+ * group is a fraction of its parents load.
+ */
+static int tg_load_down(struct task_group *tg, void *data)
+{
+	unsigned long load;
+	long cpu = (long)data;
+
+	if (!tg->parent) {
+		load = cpu_rq(cpu)->load.weight;
+	} else {
+		load = tg->parent->cfs_rq[cpu]->h_load;
+		load *= tg->se[cpu]->load.weight;
+		load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
+	}
+
+	tg->cfs_rq[cpu]->h_load = load;
+
+	return 0;
+}
+
+static void update_h_load(long cpu)
+{
+	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
+}
+
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		  unsigned long max_load_move,
@@ -2244,14 +2276,12 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		  int *all_pinned)
 {
 	long rem_load_move = max_load_move;
-	int busiest_cpu = cpu_of(busiest);
-	struct task_group *tg;
+	struct cfs_rq *busiest_cfs_rq;
 
 	rcu_read_lock();
-	update_h_load(busiest_cpu);
+	update_h_load(cpu_of(busiest));
 
-	list_for_each_entry_rcu(tg, &task_groups, list) {
-		struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
+	for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) {
 		unsigned long busiest_h_load = busiest_cfs_rq->h_load;
 		unsigned long busiest_weight = busiest_cfs_rq->load.weight;
 		u64 rem_load, moved_load;
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	2011-07-13 13:09:25 +0200
committer	Ingo Molnar <mingo@elte.hu>	2011-07-21 18:01:46 +0200
commit	9763b67fb9f3050c6da739105888327587c30c4d (patch)
tree	822e6a5243c3d872f86d9c9b980896bc4cd8a491 /kernel/sched_fair.c
parent	9598c82dcacadc3b9daa8170613fd054c6124d30 (diff)
download	op-kernel-dev-9763b67fb9f3050c6da739105888327587c30c4d.zip op-kernel-dev-9763b67fb9f3050c6da739105888327587c30c4d.tar.gz