From 0e425be7bbb51e540023cafb50cb3a0de3b8abf9 Mon Sep 17 00:00:00 2001 From: mav Date: Wed, 17 Sep 2014 14:06:21 +0000 Subject: MFC r271604, r271616: Add couple memory barriers to order tdq_cpu_idle and tdq_load accesses. This change fixes transient performance drops in some of my benchmarks, vanishing as soon as I am trying to collect any stats from the scheduler. It looks like reordered access to those variables sometimes caused loss of IPI_PREEMPT, that delayed thread execution until some later interrupt. Approved by: re (marius) --- sys/kern/sched_ule.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'sys/kern/sched_ule.c') diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c index a655440..961f80d 100644 --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -1037,6 +1037,14 @@ tdq_notify(struct tdq *tdq, struct thread *td) ctd = pcpu_find(cpu)->pc_curthread; if (!sched_shouldpreempt(pri, ctd->td_priority, 1)) return; + + /* + * Make sure that tdq_load updated before calling this function + * is globally visible before we read tdq_cpu_idle. Idle thread + * accesses both of them without locks, and the order is important. + */ + mb(); + if (TD_IS_IDLETHREAD(ctd)) { /* * If the MD code has an idle wakeup routine try that before @@ -2645,6 +2653,12 @@ sched_idletd(void *dummy) /* Run main MD idle handler. */ tdq->tdq_cpu_idle = 1; + /* + * Make sure that tdq_cpu_idle update is globally visible + * before cpu_idle() read tdq_load. The order is important + * to avoid race with tdq_notify. + */ + mb(); cpu_idle(switchcnt * 4 > sched_idlespinthresh); tdq->tdq_cpu_idle = 0; -- cgit v1.1