summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/sched_fair.c99
2 files changed, 94 insertions, 7 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 8ec1e7a..5db05f6f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -402,7 +402,7 @@ struct cfs_rq {
u64 runtime_expires;
s64 runtime_remaining;
- int throttled;
+ int throttled, throttle_count;
struct list_head throttled_list;
#endif
#endif
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 7641195..5a20894 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -706,6 +706,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
}
#ifdef CONFIG_FAIR_GROUP_SCHED
+/* we need this in update_cfs_load and load-balance functions below */
+static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
# ifdef CONFIG_SMP
static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
int global_update)
@@ -728,7 +730,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
u64 now, delta;
unsigned long load = cfs_rq->load.weight;
- if (cfs_rq->tg == &root_task_group)
+ if (cfs_rq->tg == &root_task_group || throttled_hierarchy(cfs_rq))
return;
now = rq_of(cfs_rq)->clock_task;
@@ -837,7 +839,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq)
tg = cfs_rq->tg;
se = tg->se[cpu_of(rq_of(cfs_rq))];
- if (!se)
+ if (!se || throttled_hierarchy(cfs_rq))
return;
#ifndef CONFIG_SMP
if (likely(se->load.weight == tg->shares))
@@ -1403,6 +1405,65 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
return cfs_rq->throttled;
}
+/* check whether cfs_rq, or any parent, is throttled */
+static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
+{
+ return cfs_rq->throttle_count;
+}
+
+/*
+ * Ensure that neither of the group entities corresponding to src_cpu or
+ * dest_cpu are members of a throttled hierarchy when performing group
+ * load-balance operations.
+ */
+static inline int throttled_lb_pair(struct task_group *tg,
+ int src_cpu, int dest_cpu)
+{
+ struct cfs_rq *src_cfs_rq, *dest_cfs_rq;
+
+ src_cfs_rq = tg->cfs_rq[src_cpu];
+ dest_cfs_rq = tg->cfs_rq[dest_cpu];
+
+ return throttled_hierarchy(src_cfs_rq) ||
+ throttled_hierarchy(dest_cfs_rq);
+}
+
+/* updated child weight may affect parent so we have to do this bottom up */
+static int tg_unthrottle_up(struct task_group *tg, void *data)
+{
+ struct rq *rq = data;
+ struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
+
+ cfs_rq->throttle_count--;
+#ifdef CONFIG_SMP
+ if (!cfs_rq->throttle_count) {
+ u64 delta = rq->clock_task - cfs_rq->load_stamp;
+
+ /* leaving throttled state, advance shares averaging windows */
+ cfs_rq->load_stamp += delta;
+ cfs_rq->load_last += delta;
+
+ /* update entity weight now that we are on_rq again */
+ update_cfs_shares(cfs_rq);
+ }
+#endif
+
+ return 0;
+}
+
+static int tg_throttle_down(struct task_group *tg, void *data)
+{
+ struct rq *rq = data;
+ struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
+
+ /* group is entering throttled state, record last load */
+ if (!cfs_rq->throttle_count)
+ update_cfs_load(cfs_rq, 0);
+ cfs_rq->throttle_count++;
+
+ return 0;
+}
+
static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
{
struct rq *rq = rq_of(cfs_rq);
@@ -1413,7 +1474,9 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
/* account load preceding throttle */
- update_cfs_load(cfs_rq, 0);
+ rcu_read_lock();
+ walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
+ rcu_read_unlock();
task_delta = cfs_rq->h_nr_running;
for_each_sched_entity(se) {
@@ -1454,6 +1517,10 @@ static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
list_del_rcu(&cfs_rq->throttled_list);
raw_spin_unlock(&cfs_b->lock);
+ update_rq_clock(rq);
+ /* update hierarchical throttle state */
+ walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
+
if (!cfs_rq->load.weight)
return;
@@ -1598,6 +1665,17 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
{
return 0;
}
+
+static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
+{
+ return 0;
+}
+
+static inline int throttled_lb_pair(struct task_group *tg,
+ int src_cpu, int dest_cpu)
+{
+ return 0;
+}
#endif
/**************************************************
@@ -2493,6 +2571,9 @@ move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
for_each_leaf_cfs_rq(busiest, cfs_rq) {
list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) {
+ if (throttled_lb_pair(task_group(p),
+ busiest->cpu, this_cpu))
+ break;
if (!can_migrate_task(p, busiest, this_cpu,
sd, idle, &pinned))
@@ -2608,8 +2689,13 @@ static void update_shares(int cpu)
* Iterates the task_group tree in a bottom up fashion, see
* list_add_leaf_cfs_rq() for details.
*/
- for_each_leaf_cfs_rq(rq, cfs_rq)
+ for_each_leaf_cfs_rq(rq, cfs_rq) {
+ /* throttled entities do not contribute to load */
+ if (throttled_hierarchy(cfs_rq))
+ continue;
+
update_shares_cpu(cfs_rq->tg, cpu);
+ }
rcu_read_unlock();
}
@@ -2659,9 +2745,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
u64 rem_load, moved_load;
/*
- * empty group
+ * empty group or part of a throttled hierarchy
*/
- if (!busiest_cfs_rq->task_weight)
+ if (!busiest_cfs_rq->task_weight ||
+ throttled_lb_pair(busiest_cfs_rq->tg, cpu_of(busiest), this_cpu))
continue;
rem_load = (u64)rem_load_move * busiest_weight;
OpenPOWER on IntegriCloud