summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
authorjeff <jeff@FreeBSD.org>2008-04-17 04:20:10 +0000
committerjeff <jeff@FreeBSD.org>2008-04-17 04:20:10 +0000
commit9d30d1d7a4245f9915c17e74f97d6909fb40ada9 (patch)
tree670d4df2ed30d93745545294f2c7cb18db2f3d2c /sys/kern
parenta61cdf99c2c0d01312b2493fb6f7b245ba4f0ba9 (diff)
downloadFreeBSD-src-9d30d1d7a4245f9915c17e74f97d6909fb40ada9.zip
FreeBSD-src-9d30d1d7a4245f9915c17e74f97d6909fb40ada9.tar.gz
- Make SCHED_STATS more generic by adding a wrapper to create the
variables and sysctl nodes. - In reset walk the children of kern_sched_stats and reset the counters via the oid_arg1 pointer. This allows us to add arbitrary counters to the tree and still reset them properly. - Define a set of switch types to be passed with flags to mi_switch(). These types are named SWT_*. These types correspond to SCHED_STATS counters and are automatically handled in this way. - Make the new SWT_ types more specific than the older switch stats. There are now stats for idle switches, remote idle wakeups, remote preemption ithreads idling, etc. - Add switch statistics for ULE's pickcpu algorithm. These stats include how much migration there is, how often affinity was successful, how often threads were migrated to the local cpu on wakeup, etc. Sponsored by: Nokia
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/kern_intr.c4
-rw-r--r--sys/kern/kern_subr.c2
-rw-r--r--sys/kern/kern_switch.c66
-rw-r--r--sys/kern/kern_synch.c6
-rw-r--r--sys/kern/kern_thread.c4
-rw-r--r--sys/kern/sched_4bsd.c10
-rw-r--r--sys/kern/sched_ule.c38
-rw-r--r--sys/kern/subr_sleepqueue.c6
-rw-r--r--sys/kern/subr_trap.c3
-rw-r--r--sys/kern/subr_turnstile.c3
10 files changed, 89 insertions, 53 deletions
diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c
index e626988..5e464f9 100644
--- a/sys/kern/kern_intr.c
+++ b/sys/kern/kern_intr.c
@@ -1231,7 +1231,7 @@ ithread_loop(void *arg)
if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) {
TD_SET_IWAIT(td);
ie->ie_count = 0;
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_IWAIT, NULL);
}
thread_unlock(td);
}
@@ -1389,7 +1389,7 @@ ithread_loop(void *arg)
if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) {
TD_SET_IWAIT(td);
ie->ie_count = 0;
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_IWAIT, NULL);
}
thread_unlock(td);
}
diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c
index 2101026..c93f262 100644
--- a/sys/kern/kern_subr.c
+++ b/sys/kern/kern_subr.c
@@ -456,7 +456,7 @@ uio_yield(void)
DROP_GIANT();
thread_lock(td);
sched_prio(td, td->td_user_pri);
- mi_switch(SW_INVOL, NULL);
+ mi_switch(SW_INVOL | SWT_RELINQUISH, NULL);
thread_unlock(td);
PICKUP_GIANT();
}
diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c
index 10bfb73..b7cb0b6 100644
--- a/sys/kern/kern_switch.c
+++ b/sys/kern/kern_switch.c
@@ -73,25 +73,35 @@ static int kern_sched_preemption = 0;
SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
&kern_sched_preemption, 0, "Kernel preemption enabled");
+/*
+ * Support for scheduler stats exported via kern.sched.stats. All stats may
+ * be reset with kern.sched.stats.reset = 1. Stats may be defined elsewhere
+ * with SCHED_STAT_DEFINE().
+ */
#ifdef SCHED_STATS
-long switch_preempt;
-long switch_owepreempt;
-long switch_turnstile;
-long switch_sleepq;
-long switch_sleepqtimo;
-long switch_relinquish;
-long switch_needresched;
-static SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats");
-SYSCTL_INT(_kern_sched_stats, OID_AUTO, preempt, CTLFLAG_RD, &switch_preempt, 0, "");
-SYSCTL_INT(_kern_sched_stats, OID_AUTO, owepreempt, CTLFLAG_RD, &switch_owepreempt, 0, "");
-SYSCTL_INT(_kern_sched_stats, OID_AUTO, turnstile, CTLFLAG_RD, &switch_turnstile, 0, "");
-SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepq, CTLFLAG_RD, &switch_sleepq, 0, "");
-SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepqtimo, CTLFLAG_RD, &switch_sleepqtimo, 0, "");
-SYSCTL_INT(_kern_sched_stats, OID_AUTO, relinquish, CTLFLAG_RD, &switch_relinquish, 0, "");
-SYSCTL_INT(_kern_sched_stats, OID_AUTO, needresched, CTLFLAG_RD, &switch_needresched, 0, "");
+long sched_switch_stats[SWT_COUNT]; /* Switch reasons from mi_switch(). */
+
+SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats");
+SCHED_STAT_DEFINE_VAR(uncategorized, &sched_switch_stats[SWT_NONE], "");
+SCHED_STAT_DEFINE_VAR(preempt, &sched_switch_stats[SWT_PREEMPT], "");
+SCHED_STAT_DEFINE_VAR(owepreempt, &sched_switch_stats[SWT_OWEPREEMPT], "");
+SCHED_STAT_DEFINE_VAR(turnstile, &sched_switch_stats[SWT_TURNSTILE], "");
+SCHED_STAT_DEFINE_VAR(sleepq, &sched_switch_stats[SWT_SLEEPQ], "");
+SCHED_STAT_DEFINE_VAR(sleepqtimo, &sched_switch_stats[SWT_SLEEPQTIMO], "");
+SCHED_STAT_DEFINE_VAR(relinquish, &sched_switch_stats[SWT_RELINQUISH], "");
+SCHED_STAT_DEFINE_VAR(needresched, &sched_switch_stats[SWT_NEEDRESCHED], "");
+SCHED_STAT_DEFINE_VAR(idle, &sched_switch_stats[SWT_IDLE], "");
+SCHED_STAT_DEFINE_VAR(iwait, &sched_switch_stats[SWT_IWAIT], "");
+SCHED_STAT_DEFINE_VAR(suspend, &sched_switch_stats[SWT_SUSPEND], "");
+SCHED_STAT_DEFINE_VAR(remotepreempt, &sched_switch_stats[SWT_REMOTEPREEMPT],
+ "");
+SCHED_STAT_DEFINE_VAR(remotewakeidle, &sched_switch_stats[SWT_REMOTEWAKEIDLE],
+ "");
+
static int
sysctl_stats_reset(SYSCTL_HANDLER_ARGS)
{
+ struct sysctl_oid *p;
int error;
int val;
@@ -101,14 +111,15 @@ sysctl_stats_reset(SYSCTL_HANDLER_ARGS)
return (error);
if (val == 0)
return (0);
- switch_preempt = 0;
- switch_owepreempt = 0;
- switch_turnstile = 0;
- switch_sleepq = 0;
- switch_sleepqtimo = 0;
- switch_relinquish = 0;
- switch_needresched = 0;
-
+ /*
+ * Traverse the list of children of _kern_sched_stats and reset each
+ * to 0. Skip the reset entry.
+ */
+ SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
+ if (p == oidp || p->oid_arg1 == NULL)
+ continue;
+ *(long *)p->oid_arg1 = 0;
+ }
return (0);
}
@@ -164,6 +175,7 @@ void
critical_exit(void)
{
struct thread *td;
+ int flags;
td = curthread;
KASSERT(td->td_critnest != 0,
@@ -175,8 +187,12 @@ critical_exit(void)
td->td_critnest = 1;
thread_lock(td);
td->td_critnest--;
- SCHED_STAT_INC(switch_owepreempt);
- mi_switch(SW_INVOL|SW_PREEMPT, NULL);
+ flags = SW_INVOL | SW_PREEMPT;
+ if (TD_IS_IDLETHREAD(td))
+ flags |= SWT_IDLE;
+ else
+ flags |= SWT_OWEPREEMPT;
+ mi_switch(flags, NULL);
thread_unlock(td);
}
} else
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index b4defe9..c322ace 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -38,6 +38,7 @@
__FBSDID("$FreeBSD$");
#include "opt_ktrace.h"
+#include "opt_sched.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -390,6 +391,9 @@ mi_switch(int flags, struct thread *newtd)
td->td_ru.ru_nvcsw++;
else
td->td_ru.ru_nivcsw++;
+#ifdef SCHED_STATS
+ SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]);
+#endif
/*
* Compute the amount of time during which the current
* thread was running, and add that to its total so far.
@@ -533,7 +537,7 @@ yield(struct thread *td, struct yield_args *uap)
thread_lock(td);
sched_prio(td, PRI_MAX_TIMESHARE);
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
thread_unlock(td);
td->td_retval[0] = 0;
return (0);
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 8745e5e..a3d5da7 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -723,7 +723,7 @@ thread_suspend_check(int return_instead)
td->td_flags |= TDF_BOUNDARY;
}
PROC_SUNLOCK(p);
- mi_switch(SW_INVOL, NULL);
+ mi_switch(SW_INVOL | SWT_SUSPEND, NULL);
if (return_instead == 0)
td->td_flags &= ~TDF_BOUNDARY;
thread_unlock(td);
@@ -756,7 +756,7 @@ thread_suspend_switch(struct thread *td)
sched_sleep(td, 0);
PROC_SUNLOCK(p);
DROP_GIANT();
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_SUSPEND, NULL);
thread_unlock(td);
PICKUP_GIANT();
PROC_LOCK(p);
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 6879801..ed5cf62 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -316,8 +316,7 @@ maybe_preempt(struct thread *td)
TD_SET_RUNNING(td);
CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
td->td_proc->p_pid, td->td_name);
- SCHED_STAT_INC(switch_preempt);
- mi_switch(SW_INVOL|SW_PREEMPT, td);
+ mi_switch(SW_INVOL | SW_PREEMPT | SWT_PREEMPT, td);
/*
* td's lock pointer may have changed. We have to return with it
* locked.
@@ -1332,7 +1331,7 @@ sched_preempt(struct thread *td)
if (td->td_critnest > 1)
td->td_owepreempt = 1;
else
- mi_switch(SW_INVOL | SW_PREEMPT, NULL);
+ mi_switch(SW_INVOL | SW_PREEMPT | SWT_PREEMPT, NULL);
thread_unlock(td);
}
@@ -1397,8 +1396,7 @@ void
sched_relinquish(struct thread *td)
{
thread_lock(td);
- SCHED_STAT_INC(switch_relinquish);
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
thread_unlock(td);
}
@@ -1448,7 +1446,7 @@ sched_idletd(void *dummy)
cpu_idle();
mtx_lock_spin(&sched_lock);
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_IDLE, NULL);
mtx_unlock_spin(&sched_lock);
}
}
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index 911b169..c03f7c8 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -909,7 +909,7 @@ tdq_idled(struct tdq *tdq)
}
spinlock_exit();
TDQ_UNLOCK(steal);
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_IDLE, NULL);
thread_unlock(curthread);
return (0);
@@ -1073,6 +1073,13 @@ sched_setcpu(struct thread *td, int cpu, int flags)
return (tdq);
}
+SCHED_STAT_DEFINE(pickcpu_intrbind, "Soft interrupt binding");
+SCHED_STAT_DEFINE(pickcpu_idle_affinity, "Picked idle cpu based on affinity");
+SCHED_STAT_DEFINE(pickcpu_affinity, "Picked cpu based on affinity");
+SCHED_STAT_DEFINE(pickcpu_lowest, "Selected lowest load");
+SCHED_STAT_DEFINE(pickcpu_local, "Migrated to current cpu");
+SCHED_STAT_DEFINE(pickcpu_migration, "Selection may have caused migration");
+
static int
sched_pickcpu(struct thread *td, int flags)
{
@@ -1098,8 +1105,10 @@ sched_pickcpu(struct thread *td, int flags)
* the interrupt.
*/
if (td->td_priority <= PRI_MAX_ITHD && THREAD_CAN_SCHED(td, self) &&
- curthread->td_intr_nesting_level)
+ curthread->td_intr_nesting_level && ts->ts_cpu != self) {
+ SCHED_STAT_INC(pickcpu_intrbind);
ts->ts_cpu = self;
+ }
/*
* If the thread can run on the last cpu and the affinity has not
* expired or it is idle run it there.
@@ -1107,10 +1116,14 @@ sched_pickcpu(struct thread *td, int flags)
pri = td->td_priority;
tdq = TDQ_CPU(ts->ts_cpu);
if (THREAD_CAN_SCHED(td, ts->ts_cpu)) {
- if (tdq->tdq_lowpri > PRI_MIN_IDLE)
+ if (tdq->tdq_lowpri > PRI_MIN_IDLE) {
+ SCHED_STAT_INC(pickcpu_idle_affinity);
return (ts->ts_cpu);
- if (SCHED_AFFINITY(ts, CG_SHARE_L2) && tdq->tdq_lowpri > pri)
+ }
+ if (SCHED_AFFINITY(ts, CG_SHARE_L2) && tdq->tdq_lowpri > pri) {
+ SCHED_STAT_INC(pickcpu_affinity);
return (ts->ts_cpu);
+ }
}
/*
* Search for the highest level in the tree that still has affinity.
@@ -1129,8 +1142,13 @@ sched_pickcpu(struct thread *td, int flags)
* Compare the lowest loaded cpu to current cpu.
*/
if (THREAD_CAN_SCHED(td, self) && TDQ_CPU(self)->tdq_lowpri > pri &&
- TDQ_CPU(cpu)->tdq_lowpri < PRI_MIN_IDLE)
+ TDQ_CPU(cpu)->tdq_lowpri < PRI_MIN_IDLE) {
+ SCHED_STAT_INC(pickcpu_local);
cpu = self;
+ } else
+ SCHED_STAT_INC(pickcpu_lowest);
+ if (cpu != ts->ts_cpu)
+ SCHED_STAT_INC(pickcpu_migration);
KASSERT(cpu != -1, ("sched_pickcpu: Failed to find a cpu."));
return (cpu);
}
@@ -1989,10 +2007,15 @@ sched_preempt(struct thread *td)
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
tdq->tdq_ipipending = 0;
if (td->td_priority > tdq->tdq_lowpri) {
+ int flags;
+
+ flags = SW_INVOL | SW_PREEMPT;
if (td->td_critnest > 1)
td->td_owepreempt = 1;
+ else if (TD_IS_IDLETHREAD(td))
+ mi_switch(flags | SWT_REMOTEWAKEIDLE, NULL);
else
- mi_switch(SW_INVOL | SW_PREEMPT, NULL);
+ mi_switch(flags | SWT_REMOTEPREEMPT, NULL);
}
thread_unlock(td);
}
@@ -2378,8 +2401,7 @@ void
sched_relinquish(struct thread *td)
{
thread_lock(td);
- SCHED_STAT_INC(switch_relinquish);
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
thread_unlock(td);
}
diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c
index 9edd56e..1dbd1db 100644
--- a/sys/kern/subr_sleepqueue.c
+++ b/sys/kern/subr_sleepqueue.c
@@ -486,8 +486,7 @@ sleepq_switch(void *wchan, int pri)
sched_sleep(td, pri);
thread_lock_set(td, &sc->sc_lock);
TD_SET_SLEEPING(td);
- SCHED_STAT_INC(switch_sleepq);
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_SLEEPQ, NULL);
KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
(void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
@@ -527,8 +526,7 @@ sleepq_check_timeout(void)
else if (callout_stop(&td->td_slpcallout) == 0) {
td->td_flags |= TDF_TIMEOUT;
TD_SET_SLEEPING(td);
- SCHED_STAT_INC(switch_sleepqtimo);
- mi_switch(SW_INVOL, NULL);
+ mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL);
}
return (0);
}
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index a92abd2..3d1948d 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -211,8 +211,7 @@ ast(struct trapframe *framep)
#endif
thread_lock(td);
sched_prio(td, td->td_user_pri);
- SCHED_STAT_INC(switch_needresched);
- mi_switch(SW_INVOL, NULL);
+ mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL);
thread_unlock(td);
#ifdef KTRACE
if (KTRPOINT(td, KTR_CSW))
diff --git a/sys/kern/subr_turnstile.c b/sys/kern/subr_turnstile.c
index fd6cdbd..7b8270a 100644
--- a/sys/kern/subr_turnstile.c
+++ b/sys/kern/subr_turnstile.c
@@ -741,8 +741,7 @@ turnstile_wait(struct turnstile *ts, struct thread *owner, int queue)
td->td_tid, lock, lock->lo_name);
THREAD_LOCKPTR_ASSERT(td, &ts->ts_lock);
- SCHED_STAT_INC(switch_turnstile);
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_TURNSTILE, NULL);
if (LOCK_LOG_TEST(lock, 0))
CTR4(KTR_LOCK, "%s: td %d free from blocked on [%p] %s",
OpenPOWER on IntegriCloud