summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/kern/kern_intr.c4
-rw-r--r--sys/kern/kern_subr.c2
-rw-r--r--sys/kern/kern_switch.c66
-rw-r--r--sys/kern/kern_synch.c6
-rw-r--r--sys/kern/kern_thread.c4
-rw-r--r--sys/kern/sched_4bsd.c10
-rw-r--r--sys/kern/sched_ule.c38
-rw-r--r--sys/kern/subr_sleepqueue.c6
-rw-r--r--sys/kern/subr_trap.c3
-rw-r--r--sys/kern/subr_turnstile.c3
-rw-r--r--sys/sys/proc.h24
-rw-r--r--sys/sys/sched.h18
-rw-r--r--sys/sys/sysctl.h1
-rw-r--r--sys/vm/vm_glue.c2
-rw-r--r--sys/vm/vm_zeroidle.c2
15 files changed, 122 insertions, 67 deletions
diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c
index e626988..5e464f9 100644
--- a/sys/kern/kern_intr.c
+++ b/sys/kern/kern_intr.c
@@ -1231,7 +1231,7 @@ ithread_loop(void *arg)
if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) {
TD_SET_IWAIT(td);
ie->ie_count = 0;
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_IWAIT, NULL);
}
thread_unlock(td);
}
@@ -1389,7 +1389,7 @@ ithread_loop(void *arg)
if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) {
TD_SET_IWAIT(td);
ie->ie_count = 0;
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_IWAIT, NULL);
}
thread_unlock(td);
}
diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c
index 2101026..c93f262 100644
--- a/sys/kern/kern_subr.c
+++ b/sys/kern/kern_subr.c
@@ -456,7 +456,7 @@ uio_yield(void)
DROP_GIANT();
thread_lock(td);
sched_prio(td, td->td_user_pri);
- mi_switch(SW_INVOL, NULL);
+ mi_switch(SW_INVOL | SWT_RELINQUISH, NULL);
thread_unlock(td);
PICKUP_GIANT();
}
diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c
index 10bfb73..b7cb0b6 100644
--- a/sys/kern/kern_switch.c
+++ b/sys/kern/kern_switch.c
@@ -73,25 +73,35 @@ static int kern_sched_preemption = 0;
SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
&kern_sched_preemption, 0, "Kernel preemption enabled");
+/*
+ * Support for scheduler stats exported via kern.sched.stats. All stats may
+ * be reset with kern.sched.stats.reset = 1. Stats may be defined elsewhere
+ * with SCHED_STAT_DEFINE().
+ */
#ifdef SCHED_STATS
-long switch_preempt;
-long switch_owepreempt;
-long switch_turnstile;
-long switch_sleepq;
-long switch_sleepqtimo;
-long switch_relinquish;
-long switch_needresched;
-static SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats");
-SYSCTL_INT(_kern_sched_stats, OID_AUTO, preempt, CTLFLAG_RD, &switch_preempt, 0, "");
-SYSCTL_INT(_kern_sched_stats, OID_AUTO, owepreempt, CTLFLAG_RD, &switch_owepreempt, 0, "");
-SYSCTL_INT(_kern_sched_stats, OID_AUTO, turnstile, CTLFLAG_RD, &switch_turnstile, 0, "");
-SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepq, CTLFLAG_RD, &switch_sleepq, 0, "");
-SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepqtimo, CTLFLAG_RD, &switch_sleepqtimo, 0, "");
-SYSCTL_INT(_kern_sched_stats, OID_AUTO, relinquish, CTLFLAG_RD, &switch_relinquish, 0, "");
-SYSCTL_INT(_kern_sched_stats, OID_AUTO, needresched, CTLFLAG_RD, &switch_needresched, 0, "");
+long sched_switch_stats[SWT_COUNT]; /* Switch reasons from mi_switch(). */
+
+SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats");
+SCHED_STAT_DEFINE_VAR(uncategorized, &sched_switch_stats[SWT_NONE], "");
+SCHED_STAT_DEFINE_VAR(preempt, &sched_switch_stats[SWT_PREEMPT], "");
+SCHED_STAT_DEFINE_VAR(owepreempt, &sched_switch_stats[SWT_OWEPREEMPT], "");
+SCHED_STAT_DEFINE_VAR(turnstile, &sched_switch_stats[SWT_TURNSTILE], "");
+SCHED_STAT_DEFINE_VAR(sleepq, &sched_switch_stats[SWT_SLEEPQ], "");
+SCHED_STAT_DEFINE_VAR(sleepqtimo, &sched_switch_stats[SWT_SLEEPQTIMO], "");
+SCHED_STAT_DEFINE_VAR(relinquish, &sched_switch_stats[SWT_RELINQUISH], "");
+SCHED_STAT_DEFINE_VAR(needresched, &sched_switch_stats[SWT_NEEDRESCHED], "");
+SCHED_STAT_DEFINE_VAR(idle, &sched_switch_stats[SWT_IDLE], "");
+SCHED_STAT_DEFINE_VAR(iwait, &sched_switch_stats[SWT_IWAIT], "");
+SCHED_STAT_DEFINE_VAR(suspend, &sched_switch_stats[SWT_SUSPEND], "");
+SCHED_STAT_DEFINE_VAR(remotepreempt, &sched_switch_stats[SWT_REMOTEPREEMPT],
+ "");
+SCHED_STAT_DEFINE_VAR(remotewakeidle, &sched_switch_stats[SWT_REMOTEWAKEIDLE],
+ "");
+
static int
sysctl_stats_reset(SYSCTL_HANDLER_ARGS)
{
+ struct sysctl_oid *p;
int error;
int val;
@@ -101,14 +111,15 @@ sysctl_stats_reset(SYSCTL_HANDLER_ARGS)
return (error);
if (val == 0)
return (0);
- switch_preempt = 0;
- switch_owepreempt = 0;
- switch_turnstile = 0;
- switch_sleepq = 0;
- switch_sleepqtimo = 0;
- switch_relinquish = 0;
- switch_needresched = 0;
-
+ /*
+ * Traverse the list of children of _kern_sched_stats and reset each
+ * to 0. Skip the reset entry.
+ */
+ SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
+ if (p == oidp || p->oid_arg1 == NULL)
+ continue;
+ *(long *)p->oid_arg1 = 0;
+ }
return (0);
}
@@ -164,6 +175,7 @@ void
critical_exit(void)
{
struct thread *td;
+ int flags;
td = curthread;
KASSERT(td->td_critnest != 0,
@@ -175,8 +187,12 @@ critical_exit(void)
td->td_critnest = 1;
thread_lock(td);
td->td_critnest--;
- SCHED_STAT_INC(switch_owepreempt);
- mi_switch(SW_INVOL|SW_PREEMPT, NULL);
+ flags = SW_INVOL | SW_PREEMPT;
+ if (TD_IS_IDLETHREAD(td))
+ flags |= SWT_IDLE;
+ else
+ flags |= SWT_OWEPREEMPT;
+ mi_switch(flags, NULL);
thread_unlock(td);
}
} else
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index b4defe9..c322ace 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -38,6 +38,7 @@
__FBSDID("$FreeBSD$");
#include "opt_ktrace.h"
+#include "opt_sched.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -390,6 +391,9 @@ mi_switch(int flags, struct thread *newtd)
td->td_ru.ru_nvcsw++;
else
td->td_ru.ru_nivcsw++;
+#ifdef SCHED_STATS
+ SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]);
+#endif
/*
* Compute the amount of time during which the current
* thread was running, and add that to its total so far.
@@ -533,7 +537,7 @@ yield(struct thread *td, struct yield_args *uap)
thread_lock(td);
sched_prio(td, PRI_MAX_TIMESHARE);
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
thread_unlock(td);
td->td_retval[0] = 0;
return (0);
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 8745e5e..a3d5da7 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -723,7 +723,7 @@ thread_suspend_check(int return_instead)
td->td_flags |= TDF_BOUNDARY;
}
PROC_SUNLOCK(p);
- mi_switch(SW_INVOL, NULL);
+ mi_switch(SW_INVOL | SWT_SUSPEND, NULL);
if (return_instead == 0)
td->td_flags &= ~TDF_BOUNDARY;
thread_unlock(td);
@@ -756,7 +756,7 @@ thread_suspend_switch(struct thread *td)
sched_sleep(td, 0);
PROC_SUNLOCK(p);
DROP_GIANT();
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_SUSPEND, NULL);
thread_unlock(td);
PICKUP_GIANT();
PROC_LOCK(p);
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 6879801..ed5cf62 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -316,8 +316,7 @@ maybe_preempt(struct thread *td)
TD_SET_RUNNING(td);
CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
td->td_proc->p_pid, td->td_name);
- SCHED_STAT_INC(switch_preempt);
- mi_switch(SW_INVOL|SW_PREEMPT, td);
+ mi_switch(SW_INVOL | SW_PREEMPT | SWT_PREEMPT, td);
/*
* td's lock pointer may have changed. We have to return with it
* locked.
@@ -1332,7 +1331,7 @@ sched_preempt(struct thread *td)
if (td->td_critnest > 1)
td->td_owepreempt = 1;
else
- mi_switch(SW_INVOL | SW_PREEMPT, NULL);
+ mi_switch(SW_INVOL | SW_PREEMPT | SWT_PREEMPT, NULL);
thread_unlock(td);
}
@@ -1397,8 +1396,7 @@ void
sched_relinquish(struct thread *td)
{
thread_lock(td);
- SCHED_STAT_INC(switch_relinquish);
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
thread_unlock(td);
}
@@ -1448,7 +1446,7 @@ sched_idletd(void *dummy)
cpu_idle();
mtx_lock_spin(&sched_lock);
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_IDLE, NULL);
mtx_unlock_spin(&sched_lock);
}
}
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index 911b169..c03f7c8 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -909,7 +909,7 @@ tdq_idled(struct tdq *tdq)
}
spinlock_exit();
TDQ_UNLOCK(steal);
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_IDLE, NULL);
thread_unlock(curthread);
return (0);
@@ -1073,6 +1073,13 @@ sched_setcpu(struct thread *td, int cpu, int flags)
return (tdq);
}
+SCHED_STAT_DEFINE(pickcpu_intrbind, "Soft interrupt binding");
+SCHED_STAT_DEFINE(pickcpu_idle_affinity, "Picked idle cpu based on affinity");
+SCHED_STAT_DEFINE(pickcpu_affinity, "Picked cpu based on affinity");
+SCHED_STAT_DEFINE(pickcpu_lowest, "Selected lowest load");
+SCHED_STAT_DEFINE(pickcpu_local, "Migrated to current cpu");
+SCHED_STAT_DEFINE(pickcpu_migration, "Selection may have caused migration");
+
static int
sched_pickcpu(struct thread *td, int flags)
{
@@ -1098,8 +1105,10 @@ sched_pickcpu(struct thread *td, int flags)
* the interrupt.
*/
if (td->td_priority <= PRI_MAX_ITHD && THREAD_CAN_SCHED(td, self) &&
- curthread->td_intr_nesting_level)
+ curthread->td_intr_nesting_level && ts->ts_cpu != self) {
+ SCHED_STAT_INC(pickcpu_intrbind);
ts->ts_cpu = self;
+ }
/*
* If the thread can run on the last cpu and the affinity has not
* expired or it is idle run it there.
@@ -1107,10 +1116,14 @@ sched_pickcpu(struct thread *td, int flags)
pri = td->td_priority;
tdq = TDQ_CPU(ts->ts_cpu);
if (THREAD_CAN_SCHED(td, ts->ts_cpu)) {
- if (tdq->tdq_lowpri > PRI_MIN_IDLE)
+ if (tdq->tdq_lowpri > PRI_MIN_IDLE) {
+ SCHED_STAT_INC(pickcpu_idle_affinity);
return (ts->ts_cpu);
- if (SCHED_AFFINITY(ts, CG_SHARE_L2) && tdq->tdq_lowpri > pri)
+ }
+ if (SCHED_AFFINITY(ts, CG_SHARE_L2) && tdq->tdq_lowpri > pri) {
+ SCHED_STAT_INC(pickcpu_affinity);
return (ts->ts_cpu);
+ }
}
/*
* Search for the highest level in the tree that still has affinity.
@@ -1129,8 +1142,13 @@ sched_pickcpu(struct thread *td, int flags)
* Compare the lowest loaded cpu to current cpu.
*/
if (THREAD_CAN_SCHED(td, self) && TDQ_CPU(self)->tdq_lowpri > pri &&
- TDQ_CPU(cpu)->tdq_lowpri < PRI_MIN_IDLE)
+ TDQ_CPU(cpu)->tdq_lowpri < PRI_MIN_IDLE) {
+ SCHED_STAT_INC(pickcpu_local);
cpu = self;
+ } else
+ SCHED_STAT_INC(pickcpu_lowest);
+ if (cpu != ts->ts_cpu)
+ SCHED_STAT_INC(pickcpu_migration);
KASSERT(cpu != -1, ("sched_pickcpu: Failed to find a cpu."));
return (cpu);
}
@@ -1989,10 +2007,15 @@ sched_preempt(struct thread *td)
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
tdq->tdq_ipipending = 0;
if (td->td_priority > tdq->tdq_lowpri) {
+ int flags;
+
+ flags = SW_INVOL | SW_PREEMPT;
if (td->td_critnest > 1)
td->td_owepreempt = 1;
+ else if (TD_IS_IDLETHREAD(td))
+ mi_switch(flags | SWT_REMOTEWAKEIDLE, NULL);
else
- mi_switch(SW_INVOL | SW_PREEMPT, NULL);
+ mi_switch(flags | SWT_REMOTEPREEMPT, NULL);
}
thread_unlock(td);
}
@@ -2378,8 +2401,7 @@ void
sched_relinquish(struct thread *td)
{
thread_lock(td);
- SCHED_STAT_INC(switch_relinquish);
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
thread_unlock(td);
}
diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c
index 9edd56e..1dbd1db 100644
--- a/sys/kern/subr_sleepqueue.c
+++ b/sys/kern/subr_sleepqueue.c
@@ -486,8 +486,7 @@ sleepq_switch(void *wchan, int pri)
sched_sleep(td, pri);
thread_lock_set(td, &sc->sc_lock);
TD_SET_SLEEPING(td);
- SCHED_STAT_INC(switch_sleepq);
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_SLEEPQ, NULL);
KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
(void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
@@ -527,8 +526,7 @@ sleepq_check_timeout(void)
else if (callout_stop(&td->td_slpcallout) == 0) {
td->td_flags |= TDF_TIMEOUT;
TD_SET_SLEEPING(td);
- SCHED_STAT_INC(switch_sleepqtimo);
- mi_switch(SW_INVOL, NULL);
+ mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL);
}
return (0);
}
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index a92abd2..3d1948d 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -211,8 +211,7 @@ ast(struct trapframe *framep)
#endif
thread_lock(td);
sched_prio(td, td->td_user_pri);
- SCHED_STAT_INC(switch_needresched);
- mi_switch(SW_INVOL, NULL);
+ mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL);
thread_unlock(td);
#ifdef KTRACE
if (KTRPOINT(td, KTR_CSW))
diff --git a/sys/kern/subr_turnstile.c b/sys/kern/subr_turnstile.c
index fd6cdbd..7b8270a 100644
--- a/sys/kern/subr_turnstile.c
+++ b/sys/kern/subr_turnstile.c
@@ -741,8 +741,7 @@ turnstile_wait(struct turnstile *ts, struct thread *owner, int queue)
td->td_tid, lock, lock->lo_name);
THREAD_LOCKPTR_ASSERT(td, &ts->ts_lock);
- SCHED_STAT_INC(switch_turnstile);
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_TURNSTILE, NULL);
if (LOCK_LOG_TEST(lock, 0))
CTR4(KTR_LOCK, "%s: td %d free from blocked on [%p] %s",
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index a3e055f..86adbb1 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -588,10 +588,26 @@ struct proc {
#ifdef _KERNEL
-/* Flags for mi_switch(). */
-#define SW_VOL 0x0001 /* Voluntary switch. */
-#define SW_INVOL 0x0002 /* Involuntary switch. */
-#define SW_PREEMPT 0x0004 /* The invol switch is a preemption */
+/* Types and flags for mi_switch(). */
+#define SW_TYPE_MASK 0xff /* First 8 bits are switch type */
+#define SWT_NONE 0 /* Unspecified switch. */
+#define SWT_PREEMPT 1 /* Switching due to preemption. */
+#define SWT_OWEPREEMPT 2 /* Switching due to opepreempt. */
+#define SWT_TURNSTILE 3 /* Turnstile contention. */
+#define SWT_SLEEPQ 4 /* Sleepq wait. */
+#define SWT_SLEEPQTIMO 5 /* Sleepq timeout wait. */
+#define SWT_RELINQUISH 6 /* yield call. */
+#define SWT_NEEDRESCHED 7 /* NEEDRESCHED was set. */
+#define SWT_IDLE 8 /* Switching from the idle thread. */
+#define SWT_IWAIT 9 /* Waiting for interrupts. */
+#define SWT_SUSPEND 10 /* Thread suspended. */
+#define SWT_REMOTEPREEMPT 11 /* Remote processor preempted. */
+#define SWT_REMOTEWAKEIDLE 12 /* Remote processor preempted idle. */
+#define SWT_COUNT 13 /* Number of switch types. */
+/* Flags */
+#define SW_VOL 0x0100 /* Voluntary switch. */
+#define SW_INVOL 0x0200 /* Involuntary switch. */
+#define SW_PREEMPT 0x0400 /* The invol switch is a preemption */
/* How values for thread_single(). */
#define SINGLE_NO_EXIT 0
diff --git a/sys/sys/sched.h b/sys/sys/sched.h
index fa57055..bbd2199 100644
--- a/sys/sys/sched.h
+++ b/sys/sys/sched.h
@@ -154,17 +154,19 @@ sched_unpin(void)
#define SRQ_PREEMPTED 0x0008 /* has been preempted.. be kind */
#define SRQ_BORROWING 0x0010 /* Priority updated due to prio_lend */
-/* Switch stats. */
+/* Scheduler stats. */
#ifdef SCHED_STATS
-extern long switch_preempt;
-extern long switch_owepreempt;
-extern long switch_turnstile;
-extern long switch_sleepq;
-extern long switch_sleepqtimo;
-extern long switch_relinquish;
-extern long switch_needresched;
+extern long sched_switch_stats[SWT_COUNT];
+
+#define SCHED_STAT_DEFINE_VAR(name, ptr, descr) \
+ SYSCTL_LONG(_kern_sched_stats, OID_AUTO, name, CTLFLAG_RD, ptr, 0, descr)
+#define SCHED_STAT_DEFINE(name, descr) \
+ unsigned long name; \
+ SCHED_STAT_DEFINE_VAR(name, &name, descr)
#define SCHED_STAT_INC(var) atomic_add_long(&(var), 1)
#else
+#define SCHED_STAT_DEFINE_VAR(name, descr, ptr)
+#define SCHED_STAT_DEFINE(name, descr)
#define SCHED_STAT_INC(var)
#endif
diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h
index 59543ba..ebd83e7 100644
--- a/sys/sys/sysctl.h
+++ b/sys/sys/sysctl.h
@@ -632,6 +632,7 @@ SYSCTL_DECL(_kern_features);
SYSCTL_DECL(_kern_ipc);
SYSCTL_DECL(_kern_proc);
SYSCTL_DECL(_kern_sched);
+SYSCTL_DECL(_kern_sched_stats);
SYSCTL_DECL(_sysctl);
SYSCTL_DECL(_vm);
SYSCTL_DECL(_vm_stats);
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 9f9dc5a..462c460 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -735,7 +735,7 @@ loop:
thread_lock(&thread0);
if (!proc0_rescan) {
TD_SET_IWAIT(&thread0);
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_IWAIT, NULL);
}
proc0_rescan = 0;
thread_unlock(&thread0);
diff --git a/sys/vm/vm_zeroidle.c b/sys/vm/vm_zeroidle.c
index c82de5a..9e1970a 100644
--- a/sys/vm/vm_zeroidle.c
+++ b/sys/vm/vm_zeroidle.c
@@ -127,7 +127,7 @@ vm_pagezero(void __unused *arg)
#ifndef PREEMPTION
if (sched_runnable()) {
thread_lock(curthread);
- mi_switch(SW_VOL, NULL);
+ mi_switch(SW_VOL | SWT_IDLE, NULL);
thread_unlock(curthread);
}
#endif
OpenPOWER on IntegriCloud