sched: Micro-optimize the smart wake-affine logic

Smart wake-affine is using node-size as the factor currently, but the overhead of the mask operation is high. Thus, this patch introduce the 'sd_llc_size' percpu variable, which will record the highest cache-share domain size, and make it to be the new factor, in order to reduce the overhead and make it more reasonable. Tested-by: Davidlohr Bueso <davidlohr.bueso@hp.com> Tested-by: Michael Wang <wangyun@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Acked-by: Michael Wang <wangyun@linux.vnet.ibm.com> Cc: Mike Galbraith <efault@gmx.de> Link: http://lkml.kernel.org/r/51D5008E.6030102@linux.vnet.ibm.com [ Tidied up the changelog. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Peter Zijlstra <peterz@infradead.org> 2013-07-04 12:56:46 +0800
committer: Ingo Molnar <mingo@kernel.org> 2013-07-23 12:22:06 +0200
commit: 7d9ffa8961482232d964173cccba6e14d2d543b2 (patch)
tree: 80fd615fb64b1bd82e0de0e5d1e8be2bae8cb06d /kernel
parent: 62470419e993f8d9d93db0effd3af4296ecb79a5 (diff)
download: op-kernel-dev-7d9ffa8961482232d964173cccba6e14d2d543b2.zip
op-kernel-dev-7d9ffa8961482232d964173cccba6e14d2d543b2.tar.gz
3 files changed, 8 insertions, 2 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b7c32cb..6df0fbe 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5083,18 +5083,23 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
  * two cpus are in the same cache domain, see cpus_share_cache().
  */
 DEFINE_PER_CPU(struct sched_domain *, sd_llc);
+DEFINE_PER_CPU(int, sd_llc_size);
 DEFINE_PER_CPU(int, sd_llc_id);
 
 static void update_top_cache_domain(int cpu)
 {
 	struct sched_domain *sd;
 	int id = cpu;
+	int size = 1;
 
 	sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
-	if (sd)
+	if (sd) {
 		id = cpumask_first(sched_domain_span(sd));
+		size = cpumask_weight(sched_domain_span(sd));
+	}
 
 	rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
+	per_cpu(sd_llc_size, cpu) = size;
 	per_cpu(sd_llc_id, cpu) = id;
 }
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 860063a..f237437 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3175,7 +3175,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
 
 static int wake_wide(struct task_struct *p)
 {
-	int factor = nr_cpus_node(cpu_to_node(smp_processor_id()));
+	int factor = this_cpu_read(sd_llc_size);
 
 	/*
 	 * Yeah, it's the switching-frequency, could means many wakee or
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 5e129ef..4c1cb80 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -594,6 +594,7 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
 }
 
 DECLARE_PER_CPU(struct sched_domain *, sd_llc);
+DECLARE_PER_CPU(int, sd_llc_size);
 DECLARE_PER_CPU(int, sd_llc_id);
 
 struct sched_group_power {
author	Peter Zijlstra <peterz@infradead.org>	2013-07-04 12:56:46 +0800
committer	Ingo Molnar <mingo@kernel.org>	2013-07-23 12:22:06 +0200
commit	7d9ffa8961482232d964173cccba6e14d2d543b2 (patch)
tree	80fd615fb64b1bd82e0de0e5d1e8be2bae8cb06d /kernel
parent	62470419e993f8d9d93db0effd3af4296ecb79a5 (diff)
download	op-kernel-dev-7d9ffa8961482232d964173cccba6e14d2d543b2.zip op-kernel-dev-7d9ffa8961482232d964173cccba6e14d2d543b2.tar.gz