summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu.c40
-rw-r--r--kernel/cpuset.c2
-rw-r--r--kernel/sched.c108
-rw-r--r--kernel/sched_fair.c3
-rw-r--r--kernel/sched_rt.c75
5 files changed, 130 insertions, 98 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index cfb1d43..033603c 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -64,6 +64,8 @@ void __init cpu_hotplug_init(void)
cpu_hotplug.refcount = 0;
}
+cpumask_t cpu_active_map;
+
#ifdef CONFIG_HOTPLUG_CPU
void get_online_cpus(void)
@@ -291,11 +293,30 @@ int __ref cpu_down(unsigned int cpu)
int err = 0;
cpu_maps_update_begin();
- if (cpu_hotplug_disabled)
+
+ if (cpu_hotplug_disabled) {
err = -EBUSY;
- else
- err = _cpu_down(cpu, 0);
+ goto out;
+ }
+
+ cpu_clear(cpu, cpu_active_map);
+
+ /*
+ * Make sure the all cpus did the reschedule and are not
+ * using stale version of the cpu_active_map.
+ * This is not strictly necessary becuase stop_machine()
+ * that we run down the line already provides the required
+ * synchronization. But it's really a side effect and we do not
+ * want to depend on the innards of the stop_machine here.
+ */
+ synchronize_sched();
+
+ err = _cpu_down(cpu, 0);
+ if (cpu_online(cpu))
+ cpu_set(cpu, cpu_active_map);
+
+out:
cpu_maps_update_done();
return err;
}
@@ -355,11 +376,18 @@ int __cpuinit cpu_up(unsigned int cpu)
}
cpu_maps_update_begin();
- if (cpu_hotplug_disabled)
+
+ if (cpu_hotplug_disabled) {
err = -EBUSY;
- else
- err = _cpu_up(cpu, 0);
+ goto out;
+ }
+
+ err = _cpu_up(cpu, 0);
+ if (cpu_online(cpu))
+ cpu_set(cpu, cpu_active_map);
+
+out:
cpu_maps_update_done();
return err;
}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 459d601..3c3ef02 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -564,7 +564,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
* partition_sched_domains().
*/
-static void rebuild_sched_domains(void)
+void rebuild_sched_domains(void)
{
struct kfifo *q; /* queue of cpusets to be scanned */
struct cpuset *cp; /* scans q */
diff --git a/kernel/sched.c b/kernel/sched.c
index c13c75e9..85cf246 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2802,7 +2802,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
rq = task_rq_lock(p, &flags);
if (!cpu_isset(dest_cpu, p->cpus_allowed)
- || unlikely(cpu_is_offline(dest_cpu)))
+ || unlikely(!cpu_active(dest_cpu)))
goto out;
/* force the process onto the specified CPU */
@@ -3770,7 +3770,7 @@ int select_nohz_load_balancer(int stop_tick)
/*
* If we are going offline and still the leader, give up!
*/
- if (cpu_is_offline(cpu) &&
+ if (!cpu_active(cpu) &&
atomic_read(&nohz.load_balancer) == cpu) {
if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
BUG();
@@ -5794,7 +5794,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
struct rq *rq_dest, *rq_src;
int ret = 0, on_rq;
- if (unlikely(cpu_is_offline(dest_cpu)))
+ if (unlikely(!cpu_active(dest_cpu)))
return ret;
rq_src = cpu_rq(src_cpu);
@@ -7472,18 +7472,6 @@ void __attribute__((weak)) arch_update_cpu_topology(void)
}
/*
- * Free current domain masks.
- * Called after all cpus are attached to NULL domain.
- */
-static void free_sched_domains(void)
-{
- ndoms_cur = 0;
- if (doms_cur != &fallback_doms)
- kfree(doms_cur);
- doms_cur = &fallback_doms;
-}
-
-/*
* Set up scheduler domains and groups. Callers must hold the hotplug lock.
* For now this just excludes isolated cpus, but could be used to
* exclude other special cases in the future.
@@ -7561,7 +7549,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
* ownership of it and will kfree it when done with it. If the caller
* failed the kmalloc call, then it can pass in doms_new == NULL,
* and partition_sched_domains() will fallback to the single partition
- * 'fallback_doms'.
+ * 'fallback_doms', it also forces the domains to be rebuilt.
*
* Call with hotplug lock held
*/
@@ -7575,12 +7563,8 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
/* always unregister in case we don't destroy any domains */
unregister_sched_domain_sysctl();
- if (doms_new == NULL) {
- ndoms_new = 1;
- doms_new = &fallback_doms;
- cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
- dattr_new = NULL;
- }
+ if (doms_new == NULL)
+ ndoms_new = 0;
/* Destroy deleted domains */
for (i = 0; i < ndoms_cur; i++) {
@@ -7595,6 +7579,14 @@ match1:
;
}
+ if (doms_new == NULL) {
+ ndoms_cur = 0;
+ ndoms_new = 1;
+ doms_new = &fallback_doms;
+ cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
+ dattr_new = NULL;
+ }
+
/* Build new domains */
for (i = 0; i < ndoms_new; i++) {
for (j = 0; j < ndoms_cur; j++) {
@@ -7625,17 +7617,10 @@ match2:
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
int arch_reinit_sched_domains(void)
{
- int err;
-
get_online_cpus();
- mutex_lock(&sched_domains_mutex);
- detach_destroy_domains(&cpu_online_map);
- free_sched_domains();
- err = arch_init_sched_domains(&cpu_online_map);
- mutex_unlock(&sched_domains_mutex);
+ rebuild_sched_domains();
put_online_cpus();
-
- return err;
+ return 0;
}
static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
@@ -7701,59 +7686,49 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
}
#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+#ifndef CONFIG_CPUSETS
/*
- * Force a reinitialization of the sched domains hierarchy. The domains
- * and groups cannot be updated in place without racing with the balancing
- * code, so we temporarily attach all running cpus to the NULL domain
- * which will prevent rebalancing while the sched domains are recalculated.
+ * Add online and remove offline CPUs from the scheduler domains.
+ * When cpusets are enabled they take over this function.
*/
static int update_sched_domains(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ partition_sched_domains(0, NULL, NULL);
+ return NOTIFY_OK;
+
+ default:
+ return NOTIFY_DONE;
+ }
+}
+#endif
+
+static int update_runtime(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
int cpu = (int)(long)hcpu;
switch (action) {
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
disable_runtime(cpu_rq(cpu));
- /* fall-through */
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- detach_destroy_domains(&cpu_online_map);
- free_sched_domains();
return NOTIFY_OK;
-
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
enable_runtime(cpu_rq(cpu));
- /* fall-through */
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- /*
- * Fall through and re-initialise the domains.
- */
- break;
+ return NOTIFY_OK;
+
default:
return NOTIFY_DONE;
}
-
-#ifndef CONFIG_CPUSETS
- /*
- * Create default domain partitioning if cpusets are disabled.
- * Otherwise we let cpusets rebuild the domains based on the
- * current setup.
- */
-
- /* The hotplug lock is already held by cpu_up/cpu_down */
- arch_init_sched_domains(&cpu_online_map);
-#endif
-
- return NOTIFY_OK;
}
void __init sched_init_smp(void)
@@ -7773,8 +7748,15 @@ void __init sched_init_smp(void)
cpu_set(smp_processor_id(), non_isolated_cpus);
mutex_unlock(&sched_domains_mutex);
put_online_cpus();
+
+#ifndef CONFIG_CPUSETS
/* XXX: Theoretical race here - CPU may be hotplugged now */
hotcpu_notifier(update_sched_domains, 0);
+#endif
+
+ /* RT runtime code needs to handle some hotplug events */
+ hotcpu_notifier(update_runtime, 0);
+
init_hrtick();
/* Move init over to a non-isolated CPU */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6893b3e..7f70026 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1003,6 +1003,8 @@ static void yield_task_fair(struct rq *rq)
* not idle and an idle cpu is available. The span of cpus to
* search starts with cpus closest then further out as needed,
* so we always favor a closer, idle cpu.
+ * Domains may include CPUs that are not usable for migration,
+ * hence we need to mask them out (cpu_active_map)
*
* Returns the CPU we should wake onto.
*/
@@ -1030,6 +1032,7 @@ static int wake_idle(int cpu, struct task_struct *p)
|| ((sd->flags & SD_WAKE_IDLE_FAR)
&& !task_hot(p, task_rq(p)->clock, sd))) {
cpus_and(tmp, sd->span, p->cpus_allowed);
+ cpus_and(tmp, tmp, cpu_active_map);
for_each_cpu_mask(i, tmp) {
if (idle_cpu(i)) {
if (i != task_cpu(p)) {
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 147004c6..24621ce 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -601,11 +601,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
return;
- if (rt_se->nr_cpus_allowed == 1)
- list_add(&rt_se->run_list, queue);
- else
- list_add_tail(&rt_se->run_list, queue);
-
+ list_add_tail(&rt_se->run_list, queue);
__set_bit(rt_se_prio(rt_se), array->bitmap);
inc_rt_tasks(rt_se, rt_rq);
@@ -690,32 +686,34 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
* Put task to the end of the run list without the overhead of dequeue
* followed by enqueue.
*/
-static
-void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
+static void
+requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
{
- struct rt_prio_array *array = &rt_rq->active;
-
if (on_rt_rq(rt_se)) {
- list_del_init(&rt_se->run_list);
- list_add_tail(&rt_se->run_list,
- array->queue + rt_se_prio(rt_se));
+ struct rt_prio_array *array = &rt_rq->active;
+ struct list_head *queue = array->queue + rt_se_prio(rt_se);
+
+ if (head)
+ list_move(&rt_se->run_list, queue);
+ else
+ list_move_tail(&rt_se->run_list, queue);
}
}
-static void requeue_task_rt(struct rq *rq, struct task_struct *p)
+static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
{
struct sched_rt_entity *rt_se = &p->rt;
struct rt_rq *rt_rq;
for_each_sched_rt_entity(rt_se) {
rt_rq = rt_rq_of_se(rt_se);
- requeue_rt_entity(rt_rq, rt_se);
+ requeue_rt_entity(rt_rq, rt_se, head);
}
}
static void yield_task_rt(struct rq *rq)
{
- requeue_task_rt(rq, rq->curr);
+ requeue_task_rt(rq, rq->curr, 0);
}
#ifdef CONFIG_SMP
@@ -755,6 +753,30 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
*/
return task_cpu(p);
}
+
+static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
+{
+ cpumask_t mask;
+
+ if (rq->curr->rt.nr_cpus_allowed == 1)
+ return;
+
+ if (p->rt.nr_cpus_allowed != 1
+ && cpupri_find(&rq->rd->cpupri, p, &mask))
+ return;
+
+ if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
+ return;
+
+ /*
+ * There appears to be other cpus that can accept
+ * current and none to run 'p', so lets reschedule
+ * to try and push current away:
+ */
+ requeue_task_rt(rq, p, 1);
+ resched_task(rq->curr);
+}
+
#endif /* CONFIG_SMP */
/*
@@ -780,18 +802,8 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
* to move current somewhere else, making room for our non-migratable
* task.
*/
- if((p->prio == rq->curr->prio)
- && p->rt.nr_cpus_allowed == 1
- && rq->curr->rt.nr_cpus_allowed != 1) {
- cpumask_t mask;
-
- if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
- /*
- * There appears to be other cpus that can accept
- * current, so lets reschedule to try and push it away
- */
- resched_task(rq->curr);
- }
+ if (p->prio == rq->curr->prio && !need_resched())
+ check_preempt_equal_prio(rq, p);
#endif
}
@@ -924,6 +936,13 @@ static int find_lowest_rq(struct task_struct *task)
return -1; /* No targets found */
/*
+ * Only consider CPUs that are usable for migration.
+ * I guess we might want to change cpupri_find() to ignore those
+ * in the first place.
+ */
+ cpus_and(*lowest_mask, *lowest_mask, cpu_active_map);
+
+ /*
* At this point we have built a mask of cpus representing the
* lowest priority tasks in the system. Now we want to elect
* the best one based on our affinity and topology.
@@ -1417,7 +1436,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
* on the queue:
*/
if (p->rt.run_list.prev != p->rt.run_list.next) {
- requeue_task_rt(rq, p);
+ requeue_task_rt(rq, p, 0);
set_tsk_need_resched(p);
}
}
OpenPOWER on IntegriCloud