diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2008-10-17 19:27:02 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-20 14:05:02 +0200 |
commit | ffda12a17a324103e9900fa1035309811eecbfe5 (patch) | |
tree | 79fe8aae79a41b467f2cdd055036b3017642a9f6 /kernel | |
parent | b0aa51b999c449e5e3f9faa1ee406e052d407fe7 (diff) | |
download | op-kernel-dev-ffda12a17a324103e9900fa1035309811eecbfe5.zip op-kernel-dev-ffda12a17a324103e9900fa1035309811eecbfe5.tar.gz |
sched: optimize group load balancer
I noticed that tg_shares_up() unconditionally takes rq-locks for all cpus
in the sched_domain. This hurts.
We need the rq-locks whenever we change the weight of the per-cpu group sched
entities. To allevate this a little, only change the weight when the new
weight is at least shares_thresh away from the old value.
This avoids the rq-lock for the top level entries, since those will never
be re-weighted, and fuzzes the lower level entries a little to gain performance
in semi-stable situations.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched.c | 45 | ||||
-rw-r--r-- | kernel/sysctl.c | 10 |
2 files changed, 35 insertions, 20 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index c530b84..11ca390 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -818,6 +818,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; unsigned int sysctl_sched_shares_ratelimit = 250000; /* + * Inject some fuzzyness into changing the per-cpu group shares + * this avoids remote rq-locks at the expense of fairness. + * default: 4 + */ +unsigned int sysctl_sched_shares_thresh = 4; + +/* * period over which we measure -rt task cpu usage in us. * default: 1s */ @@ -1453,8 +1460,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares); * Calculate and set the cpu's group shares. */ static void -__update_group_shares_cpu(struct task_group *tg, int cpu, - unsigned long sd_shares, unsigned long sd_rq_weight) +update_group_shares_cpu(struct task_group *tg, int cpu, + unsigned long sd_shares, unsigned long sd_rq_weight) { int boost = 0; unsigned long shares; @@ -1485,19 +1492,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu, * */ shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); + shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); - /* - * record the actual number of shares, not the boosted amount. - */ - tg->cfs_rq[cpu]->shares = boost ? 0 : shares; - tg->cfs_rq[cpu]->rq_weight = rq_weight; + if (abs(shares - tg->se[cpu]->load.weight) > + sysctl_sched_shares_thresh) { + struct rq *rq = cpu_rq(cpu); + unsigned long flags; - if (shares < MIN_SHARES) - shares = MIN_SHARES; - else if (shares > MAX_SHARES) - shares = MAX_SHARES; + spin_lock_irqsave(&rq->lock, flags); + /* + * record the actual number of shares, not the boosted amount. + */ + tg->cfs_rq[cpu]->shares = boost ? 0 : shares; + tg->cfs_rq[cpu]->rq_weight = rq_weight; - __set_se_shares(tg->se[cpu], shares); + __set_se_shares(tg->se[cpu], shares); + spin_unlock_irqrestore(&rq->lock, flags); + } } /* @@ -1526,14 +1537,8 @@ static int tg_shares_up(struct task_group *tg, void *data) if (!rq_weight) rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; - for_each_cpu_mask(i, sd->span) { - struct rq *rq = cpu_rq(i); - unsigned long flags; - - spin_lock_irqsave(&rq->lock, flags); - __update_group_shares_cpu(tg, i, shares, rq_weight); - spin_unlock_irqrestore(&rq->lock, flags); - } + for_each_cpu_mask(i, sd->span) + update_group_shares_cpu(tg, i, shares, rq_weight); return 0; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 617d41e..3d804f4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -276,6 +276,16 @@ static struct ctl_table kern_table[] = { }, { .ctl_name = CTL_UNNUMBERED, + .procname = "sched_shares_thresh", + .data = &sysctl_sched_shares_thresh, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, + { + .ctl_name = CTL_UNNUMBERED, .procname = "sched_child_runs_first", .data = &sysctl_sched_child_runs_first, .maxlen = sizeof(unsigned int), |