summaryrefslogtreecommitdiffstats
path: root/sys/kern/sched_ule.c
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2012-04-09 18:24:58 +0000
committermav <mav@FreeBSD.org>2012-04-09 18:24:58 +0000
commite1ffe54fb78e2405ff70048e1e10c582be09a67c (patch)
treeff15f388d9d66904d2142b1c9bdefc92e5f42f49 /sys/kern/sched_ule.c
parent628004ddfb14301b6958eff7fd70dc889f1b6f5e (diff)
downloadFreeBSD-src-e1ffe54fb78e2405ff70048e1e10c582be09a67c.zip
FreeBSD-src-e1ffe54fb78e2405ff70048e1e10c582be09a67c.tar.gz
Microoptimize cpu_search().
According to profiling, it makes one take 6% of CPU time on hackbench with its million of context switches per second, instead of 8% before.
Diffstat (limited to 'sys/kern/sched_ule.c')
-rw-r--r--sys/kern/sched_ule.c52
1 files changed, 28 insertions, 24 deletions
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index f930afc..7e5af09 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -594,32 +594,34 @@ cpu_search(const struct cpu_group *cg, struct cpu_search *low,
cpuset_t cpumask;
struct cpu_group *child;
struct tdq *tdq;
- int cpu, i, hload, lload, load, total, rnd;
+ int cpu, i, hload, lload, load, total, rnd, *rndptr;
total = 0;
cpumask = cg->cg_mask;
if (match & CPU_SEARCH_LOWEST) {
lload = INT_MAX;
- low->cs_load = INT_MAX;
lgroup = *low;
}
if (match & CPU_SEARCH_HIGHEST) {
- hload = -1;
- high->cs_load = -1;
+ hload = INT_MIN;
hgroup = *high;
}
/* Iterate through the child CPU groups and then remaining CPUs. */
- for (i = 0, cpu = 0; i <= cg->cg_children; ) {
- if (i >= cg->cg_children) {
- while (cpu <= mp_maxid && !CPU_ISSET(cpu, &cpumask))
- cpu++;
- if (cpu > mp_maxid)
+ for (i = cg->cg_children, cpu = mp_maxid; i >= 0; ) {
+ if (i == 0) {
+ while (cpu >= 0 && !CPU_ISSET(cpu, &cpumask))
+ cpu--;
+ if (cpu < 0)
break;
child = NULL;
} else
- child = &cg->cg_child[i];
+ child = &cg->cg_child[i - 1];
+ if (match & CPU_SEARCH_LOWEST)
+ lgroup.cs_cpu = -1;
+ if (match & CPU_SEARCH_HIGHEST)
+ hgroup.cs_cpu = -1;
if (child) { /* Handle child CPU group. */
CPU_NAND(&cpumask, &child->cg_mask);
switch (match) {
@@ -636,23 +638,23 @@ cpu_search(const struct cpu_group *cg, struct cpu_search *low,
} else { /* Handle child CPU. */
tdq = TDQ_CPU(cpu);
load = tdq->tdq_load * 256;
- rnd = DPCPU_SET(randomval,
- DPCPU_GET(randomval) * 69069 + 5) >> 26;
+ rndptr = DPCPU_PTR(randomval);
+ rnd = (*rndptr = *rndptr * 69069 + 5) >> 26;
if (match & CPU_SEARCH_LOWEST) {
if (cpu == low->cs_prefer)
load -= 64;
/* If that CPU is allowed and get data. */
- if (CPU_ISSET(cpu, &lgroup.cs_mask) &&
- tdq->tdq_lowpri > lgroup.cs_pri &&
- tdq->tdq_load <= lgroup.cs_limit) {
+ if (tdq->tdq_lowpri > lgroup.cs_pri &&
+ tdq->tdq_load <= lgroup.cs_limit &&
+ CPU_ISSET(cpu, &lgroup.cs_mask)) {
lgroup.cs_cpu = cpu;
lgroup.cs_load = load - rnd;
}
}
if (match & CPU_SEARCH_HIGHEST)
- if (CPU_ISSET(cpu, &hgroup.cs_mask) &&
- tdq->tdq_load >= hgroup.cs_limit &&
- tdq->tdq_transferable) {
+ if (tdq->tdq_load >= hgroup.cs_limit &&
+ tdq->tdq_transferable &&
+ CPU_ISSET(cpu, &hgroup.cs_mask)) {
hgroup.cs_cpu = cpu;
hgroup.cs_load = load - rnd;
}
@@ -661,7 +663,7 @@ cpu_search(const struct cpu_group *cg, struct cpu_search *low,
/* We have info about child item. Compare it. */
if (match & CPU_SEARCH_LOWEST) {
- if (lgroup.cs_load != INT_MAX &&
+ if (lgroup.cs_cpu >= 0 &&
(load < lload ||
(load == lload && lgroup.cs_load < low->cs_load))) {
lload = load;
@@ -670,17 +672,19 @@ cpu_search(const struct cpu_group *cg, struct cpu_search *low,
}
}
if (match & CPU_SEARCH_HIGHEST)
- if (hgroup.cs_load >= 0 &&
+ if (hgroup.cs_cpu >= 0 &&
(load > hload ||
(load == hload && hgroup.cs_load > high->cs_load))) {
hload = load;
high->cs_cpu = hgroup.cs_cpu;
high->cs_load = hgroup.cs_load;
}
- if (child)
- i++;
- else
- cpu++;
+ if (child) {
+ i--;
+ if (i == 0 && CPU_EMPTY(&cpumask))
+ break;
+ } else
+ cpu--;
}
return (total);
}
OpenPOWER on IntegriCloud