diff options
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 248 |
1 files changed, 180 insertions, 68 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 9573c33..98345e4 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -143,6 +143,49 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) return se->parent; } +/* return depth at which a sched entity is present in the hierarchy */ +static inline int depth_se(struct sched_entity *se) +{ + int depth = 0; + + for_each_sched_entity(se) + depth++; + + return depth; +} + +static void +find_matching_se(struct sched_entity **se, struct sched_entity **pse) +{ + int se_depth, pse_depth; + + /* + * preemption test can be made between sibling entities who are in the + * same cfs_rq i.e who have a common parent. Walk up the hierarchy of + * both tasks until we find their ancestors who are siblings of common + * parent. + */ + + /* First walk up until both entities are at same depth */ + se_depth = depth_se(*se); + pse_depth = depth_se(*pse); + + while (se_depth > pse_depth) { + se_depth--; + *se = parent_entity(*se); + } + + while (pse_depth > se_depth) { + pse_depth--; + *pse = parent_entity(*pse); + } + + while (!is_same_group(*se, *pse)) { + *se = parent_entity(*se); + *pse = parent_entity(*pse); + } +} + #else /* CONFIG_FAIR_GROUP_SCHED */ static inline struct rq *rq_of(struct cfs_rq *cfs_rq) @@ -193,6 +236,11 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) return NULL; } +static inline void +find_matching_se(struct sched_entity **se, struct sched_entity **pse) +{ +} + #endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -223,6 +271,27 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) return se->vruntime - cfs_rq->min_vruntime; } +static void update_min_vruntime(struct cfs_rq *cfs_rq) +{ + u64 vruntime = cfs_rq->min_vruntime; + + if (cfs_rq->curr) + vruntime = cfs_rq->curr->vruntime; + + if (cfs_rq->rb_leftmost) { + struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost, + struct sched_entity, + run_node); + + if (vruntime == cfs_rq->min_vruntime) + vruntime = se->vruntime; + else + vruntime = min_vruntime(vruntime, se->vruntime); + } + + cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime); +} + /* * Enqueue an entity into the rb-tree: */ @@ -256,15 +325,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) * Maintain a cache of leftmost tree entries (it is frequently * used): */ - if (leftmost) { + if (leftmost) cfs_rq->rb_leftmost = &se->run_node; - /* - * maintain cfs_rq->min_vruntime to be a monotonic increasing - * value tracking the leftmost vruntime in the tree. - */ - cfs_rq->min_vruntime = - max_vruntime(cfs_rq->min_vruntime, se->vruntime); - } rb_link_node(&se->run_node, parent, link); rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); @@ -274,37 +336,25 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { if (cfs_rq->rb_leftmost == &se->run_node) { struct rb_node *next_node; - struct sched_entity *next; next_node = rb_next(&se->run_node); cfs_rq->rb_leftmost = next_node; - - if (next_node) { - next = rb_entry(next_node, - struct sched_entity, run_node); - cfs_rq->min_vruntime = - max_vruntime(cfs_rq->min_vruntime, - next->vruntime); - } } - if (cfs_rq->next == se) - cfs_rq->next = NULL; - rb_erase(&se->run_node, &cfs_rq->tasks_timeline); } -static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq) -{ - return cfs_rq->rb_leftmost; -} - static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) { - return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node); + struct rb_node *left = cfs_rq->rb_leftmost; + + if (!left) + return NULL; + + return rb_entry(left, struct sched_entity, run_node); } -static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) +static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) { struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); @@ -424,6 +474,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, schedstat_add(cfs_rq, exec_clock, delta_exec); delta_exec_weighted = calc_delta_fair(delta_exec, curr); curr->vruntime += delta_exec_weighted; + update_min_vruntime(cfs_rq); } static void update_curr(struct cfs_rq *cfs_rq) @@ -613,13 +664,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se) static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) { - u64 vruntime; - - if (first_fair(cfs_rq)) { - vruntime = min_vruntime(cfs_rq->min_vruntime, - __pick_next_entity(cfs_rq)->vruntime); - } else - vruntime = cfs_rq->min_vruntime; + u64 vruntime = cfs_rq->min_vruntime; /* * The 'current' period is already promised to the current tasks, @@ -671,6 +716,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) __enqueue_entity(cfs_rq, se); } +static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) +{ + if (cfs_rq->last == se) + cfs_rq->last = NULL; + + if (cfs_rq->next == se) + cfs_rq->next = NULL; +} + static void dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) { @@ -693,9 +747,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) #endif } + clear_buddies(cfs_rq, se); + if (se != cfs_rq->curr) __dequeue_entity(cfs_rq, se); account_entity_dequeue(cfs_rq, se); + update_min_vruntime(cfs_rq); } /* @@ -742,29 +799,18 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) se->prev_sum_exec_runtime = se->sum_exec_runtime; } -static struct sched_entity * -pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) -{ - struct rq *rq = rq_of(cfs_rq); - u64 pair_slice = rq->clock - cfs_rq->pair_start; - - if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) { - cfs_rq->pair_start = rq->clock; - return se; - } - - return cfs_rq->next; -} +static int +wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) { - struct sched_entity *se = NULL; + struct sched_entity *se = __pick_next_entity(cfs_rq); - if (first_fair(cfs_rq)) { - se = __pick_next_entity(cfs_rq); - se = pick_next(cfs_rq, se); - set_next_entity(cfs_rq, se); - } + if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1) + return cfs_rq->next; + + if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1) + return cfs_rq->last; return se; } @@ -936,6 +982,8 @@ static void yield_task_fair(struct rq *rq) if (unlikely(cfs_rq->nr_running == 1)) return; + clear_buddies(cfs_rq, se); + if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { update_rq_clock(rq); /* @@ -1122,10 +1170,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) return 0; - if (!sync && sched_feat(SYNC_WAKEUPS) && - curr->se.avg_overlap < sysctl_sched_migration_cost && - p->se.avg_overlap < sysctl_sched_migration_cost) - sync = 1; + if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || + p->se.avg_overlap > sysctl_sched_migration_cost)) + sync = 0; /* * If sync wakeup then subtract the (maximum possible) @@ -1244,33 +1291,88 @@ static unsigned long wakeup_gran(struct sched_entity *se) * More easily preempt - nice tasks, while not making it harder for * + nice tasks. */ - if (sched_feat(ASYM_GRAN)) - gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load); + if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD) + gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se); return gran; } /* + * Should 'se' preempt 'curr'. + * + * |s1 + * |s2 + * |s3 + * g + * |<--->|c + * + * w(c, s1) = -1 + * w(c, s2) = 0 + * w(c, s3) = 1 + * + */ +static int +wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) +{ + s64 gran, vdiff = curr->vruntime - se->vruntime; + + if (vdiff <= 0) + return -1; + + gran = wakeup_gran(curr); + if (vdiff > gran) + return 1; + + return 0; +} + +static void set_last_buddy(struct sched_entity *se) +{ + for_each_sched_entity(se) + cfs_rq_of(se)->last = se; +} + +static void set_next_buddy(struct sched_entity *se) +{ + for_each_sched_entity(se) + cfs_rq_of(se)->next = se; +} + +/* * Preempt the current task with a newly woken task if needed: */ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) { struct task_struct *curr = rq->curr; - struct cfs_rq *cfs_rq = task_cfs_rq(curr); struct sched_entity *se = &curr->se, *pse = &p->se; - s64 delta_exec; if (unlikely(rt_prio(p->prio))) { + struct cfs_rq *cfs_rq = task_cfs_rq(curr); + update_rq_clock(rq); update_curr(cfs_rq); resched_task(curr); return; } + if (unlikely(p->sched_class != &fair_sched_class)) + return; + if (unlikely(se == pse)) return; - cfs_rq_of(pse)->next = pse; + /* + * Only set the backward buddy when the current task is still on the + * rq. This can happen when a wakeup gets interleaved with schedule on + * the ->pre_schedule() or idle_balance() point, either of which can + * drop the rq lock. + * + * Also, during early boot the idle thread is in the fair class, for + * obvious reasons its a bad idea to schedule back to the idle thread. + */ + if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) + set_last_buddy(se); + set_next_buddy(pse); /* * We can come here with TIF_NEED_RESCHED already set from new task @@ -1296,9 +1398,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) return; } - delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime; - if (delta_exec > wakeup_gran(pse)) - resched_task(curr); + find_matching_se(&se, &pse); + + while (se) { + BUG_ON(!pse); + + if (wakeup_preempt_entity(se, pse) == 1) { + resched_task(curr); + break; + } + + se = parent_entity(se); + pse = parent_entity(pse); + } } static struct task_struct *pick_next_task_fair(struct rq *rq) @@ -1312,6 +1424,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) do { se = pick_next_entity(cfs_rq); + set_next_entity(cfs_rq, se); cfs_rq = group_cfs_rq(se); } while (cfs_rq); @@ -1594,9 +1707,6 @@ static const struct sched_class fair_sched_class = { .enqueue_task = enqueue_task_fair, .dequeue_task = dequeue_task_fair, .yield_task = yield_task_fair, -#ifdef CONFIG_SMP - .select_task_rq = select_task_rq_fair, -#endif /* CONFIG_SMP */ .check_preempt_curr = check_preempt_wakeup, @@ -1604,6 +1714,8 @@ static const struct sched_class fair_sched_class = { .put_prev_task = put_prev_task_fair, #ifdef CONFIG_SMP + .select_task_rq = select_task_rq_fair, + .load_balance = load_balance_fair, .move_one_task = move_one_task_fair, #endif |