summaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-01-11 14:18:38 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-11 14:18:38 -0800
commit24af98c4cf5f5e69266e270c7f3fb34b82ff6656 (patch)
tree70d71381c841c92b2d28397bf0c5d6a7d9bbbaac /kernel/sched
parent9061cbe62adeccf8c986883bcd40f4aeee59ea75 (diff)
parent337f13046ff03717a9e99675284a817527440a49 (diff)
downloadop-kernel-dev-24af98c4cf5f5e69266e270c7f3fb34b82ff6656.zip
op-kernel-dev-24af98c4cf5f5e69266e270c7f3fb34b82ff6656.tar.gz
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: "So we have a laundry list of locking subsystem changes: - continuing barrier API and code improvements - futex enhancements - atomics API improvements - pvqspinlock enhancements: in particular lock stealing and adaptive spinning - qspinlock micro-enhancements" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: futex: Allow FUTEX_CLOCK_REALTIME with FUTEX_WAIT op futex: Cleanup the goto confusion in requeue_pi() futex: Remove pointless put_pi_state calls in requeue() futex: Document pi_state refcounting in requeue code futex: Rename free_pi_state() to put_pi_state() futex: Drop refcount if requeue_pi() acquired the rtmutex locking/barriers, arch: Remove ambiguous statement in the smp_store_mb() documentation lcoking/barriers, arch: Use smp barriers in smp_store_release() locking/cmpxchg, arch: Remove tas() definitions locking/pvqspinlock: Queue node adaptive spinning locking/pvqspinlock: Allow limited lock stealing locking/pvqspinlock: Collect slowpath lock statistics sched/core, locking: Document Program-Order guarantees locking, sched: Introduce smp_cond_acquire() and use it locking/pvqspinlock, x86: Optimize the PV unlock code path locking/qspinlock: Avoid redundant read of next pointer locking/qspinlock: Prefetch the next node cacheline locking/qspinlock: Use _acquire/_release() versions of cmpxchg() & xchg() atomics: Add test for atomic operations with _relaxed variants
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c99
-rw-r--r--kernel/sched/sched.h2
2 files changed, 93 insertions, 8 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1ef0d7a..34cb9f7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1905,6 +1905,97 @@ static void ttwu_queue(struct task_struct *p, int cpu)
raw_spin_unlock(&rq->lock);
}
+/*
+ * Notes on Program-Order guarantees on SMP systems.
+ *
+ * MIGRATION
+ *
+ * The basic program-order guarantee on SMP systems is that when a task [t]
+ * migrates, all its activity on its old cpu [c0] happens-before any subsequent
+ * execution on its new cpu [c1].
+ *
+ * For migration (of runnable tasks) this is provided by the following means:
+ *
+ * A) UNLOCK of the rq(c0)->lock scheduling out task t
+ * B) migration for t is required to synchronize *both* rq(c0)->lock and
+ * rq(c1)->lock (if not at the same time, then in that order).
+ * C) LOCK of the rq(c1)->lock scheduling in task
+ *
+ * Transitivity guarantees that B happens after A and C after B.
+ * Note: we only require RCpc transitivity.
+ * Note: the cpu doing B need not be c0 or c1
+ *
+ * Example:
+ *
+ * CPU0 CPU1 CPU2
+ *
+ * LOCK rq(0)->lock
+ * sched-out X
+ * sched-in Y
+ * UNLOCK rq(0)->lock
+ *
+ * LOCK rq(0)->lock // orders against CPU0
+ * dequeue X
+ * UNLOCK rq(0)->lock
+ *
+ * LOCK rq(1)->lock
+ * enqueue X
+ * UNLOCK rq(1)->lock
+ *
+ * LOCK rq(1)->lock // orders against CPU2
+ * sched-out Z
+ * sched-in X
+ * UNLOCK rq(1)->lock
+ *
+ *
+ * BLOCKING -- aka. SLEEP + WAKEUP
+ *
+ * For blocking we (obviously) need to provide the same guarantee as for
+ * migration. However the means are completely different as there is no lock
+ * chain to provide order. Instead we do:
+ *
+ * 1) smp_store_release(X->on_cpu, 0)
+ * 2) smp_cond_acquire(!X->on_cpu)
+ *
+ * Example:
+ *
+ * CPU0 (schedule) CPU1 (try_to_wake_up) CPU2 (schedule)
+ *
+ * LOCK rq(0)->lock LOCK X->pi_lock
+ * dequeue X
+ * sched-out X
+ * smp_store_release(X->on_cpu, 0);
+ *
+ * smp_cond_acquire(!X->on_cpu);
+ * X->state = WAKING
+ * set_task_cpu(X,2)
+ *
+ * LOCK rq(2)->lock
+ * enqueue X
+ * X->state = RUNNING
+ * UNLOCK rq(2)->lock
+ *
+ * LOCK rq(2)->lock // orders against CPU1
+ * sched-out Z
+ * sched-in X
+ * UNLOCK rq(2)->lock
+ *
+ * UNLOCK X->pi_lock
+ * UNLOCK rq(0)->lock
+ *
+ *
+ * However; for wakeups there is a second guarantee we must provide, namely we
+ * must observe the state that lead to our wakeup. That is, not only must our
+ * task observe its own prior state, it must also observe the stores prior to
+ * its wakeup.
+ *
+ * This means that any means of doing remote wakeups must order the CPU doing
+ * the wakeup against the CPU the task is going to end up running on. This,
+ * however, is already required for the regular Program-Order guarantee above,
+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_acquire).
+ *
+ */
+
/**
* try_to_wake_up - wake up a thread
* @p: the thread to be awakened
@@ -1968,19 +2059,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
/*
* If the owning (remote) cpu is still in the middle of schedule() with
* this task as prev, wait until its done referencing the task.
- */
- while (p->on_cpu)
- cpu_relax();
- /*
- * Combined with the control dependency above, we have an effective
- * smp_load_acquire() without the need for full barriers.
*
* Pairs with the smp_store_release() in finish_lock_switch().
*
* This ensures that tasks getting woken will be fully ordered against
* their previous state and preserve Program Order.
*/
- smp_rmb();
+ smp_cond_acquire(!p->on_cpu);
p->sched_contributes_to_load = !!task_contributes_to_load(p);
p->state = TASK_WAKING;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b242775..1e0bb4a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1076,7 +1076,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
* In particular, the load of prev->state in finish_task_switch() must
* happen before this.
*
- * Pairs with the control dependency and rmb in try_to_wake_up().
+ * Pairs with the smp_cond_acquire() in try_to_wake_up().
*/
smp_store_release(&prev->on_cpu, 0);
#endif
OpenPOWER on IntegriCloud