From db66d756c74acb886c51f11b501c2fe622018a0a Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Fri, 18 Apr 2014 01:59:15 +0900 Subject: sched/docbook: Fix 'make htmldocs' warnings caused by missing description When 'flags' argument to sched_{set,get}attr() syscalls were added in: 6d35ab48090b ("sched: Add 'flags' argument to sched_{set,get}attr() syscalls") no description for 'flags' was added. It causes the following warnings on "make htmldocs": Warning(/kernel/sched/core.c:3645): No description found for parameter 'flags' Warning(/kernel/sched/core.c:3789): No description found for parameter 'flags' Signed-off-by: Masanari Iida Cc: peterz@infradead.org Link: http://lkml.kernel.org/r/1397753955-2914-1-git-send-email-standby24x7@gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/sched/core.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 268a45e..9fe2190 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3639,6 +3639,7 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) * sys_sched_setattr - same as above, but with extended sched_attr * @pid: the pid in question. * @uattr: structure containing the extended parameters. + * @flags: for future extension. */ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, unsigned int, flags) @@ -3783,6 +3784,7 @@ err_size: * @pid: the pid in question. * @uattr: structure containing the extended parameters. * @size: sizeof(attr) for fwd/bwd comp. + * @flags: for future extension. */ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, unsigned int, size, unsigned int, flags) -- cgit v1.1 From 722a9f9299ca720a3f14660e7c0dce7b76a9cb42 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 2 May 2014 00:44:38 +0200 Subject: asmlinkage: Add explicit __visible to drivers/*, lib/*, kernel/* As requested by Linus add explicit __visible to the asmlinkage users. This marks functions visible to assembler. Tree sweep for rest of tree. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1398984278-29319-4-git-send-email-andi@firstfloor.org Signed-off-by: H. Peter Anvin --- kernel/sched/core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/sched/core.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 268a45e..d9d8ece 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2192,7 +2192,7 @@ static inline void post_schedule(struct rq *rq) * schedule_tail - first thing a freshly forked thread must call. * @prev: the thread we just switched away from. */ -asmlinkage void schedule_tail(struct task_struct *prev) +asmlinkage __visible void schedule_tail(struct task_struct *prev) __releases(rq->lock) { struct rq *rq = this_rq(); @@ -2741,7 +2741,7 @@ static inline void sched_submit_work(struct task_struct *tsk) blk_schedule_flush_plug(tsk); } -asmlinkage void __sched schedule(void) +asmlinkage __visible void __sched schedule(void) { struct task_struct *tsk = current; @@ -2751,7 +2751,7 @@ asmlinkage void __sched schedule(void) EXPORT_SYMBOL(schedule); #ifdef CONFIG_CONTEXT_TRACKING -asmlinkage void __sched schedule_user(void) +asmlinkage __visible void __sched schedule_user(void) { /* * If we come here after a random call to set_need_resched(), @@ -2783,7 +2783,7 @@ void __sched schedule_preempt_disabled(void) * off of preempt_enable. Kernel preemptions off return from interrupt * occur there and call schedule directly. */ -asmlinkage void __sched notrace preempt_schedule(void) +asmlinkage __visible void __sched notrace preempt_schedule(void) { /* * If there is a non-zero preempt_count or interrupts are disabled, @@ -2813,7 +2813,7 @@ EXPORT_SYMBOL(preempt_schedule); * Note, that this is called and return with irqs disabled. This will * protect us against recursive calling from irq. */ -asmlinkage void __sched preempt_schedule_irq(void) +asmlinkage __visible void __sched preempt_schedule_irq(void) { enum ctx_state prev_state; -- cgit v1.1 From 5bfd126e80dca70431aef8fdbc1cf14535f3c338 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Tue, 15 Apr 2014 13:49:04 +0200 Subject: sched/deadline: Fix sched_yield() behavior yield_task_dl() is broken: o it forces current to be throttled setting its runtime to zero; o it sets current's dl_se->dl_new to one, expecting that dl_task_timer() will queue it back with proper parameters at replenish time. Unfortunately, dl_task_timer() has this check at the very beginning: if (!dl_task(p) || dl_se->dl_new) goto unlock; So, it just bails out and the task is never replenished. It actually yielded forever. To fix this, introduce a new flag indicating that the task properly yielded the CPU before its current runtime expired. While this is a little overdoing at the moment, the flag would be useful in the future to discriminate between "good" jobs (of which remaining runtime could be reclaimed, i.e. recycled) and "bad" jobs (for which dl_throttled task has been set) that needed to be stopped. Reported-by: yjay.kim Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140429103953.e68eba1b2ac3309214e3dc5a@gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/sched/core.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9fe2190..e62c65a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3124,6 +3124,7 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr) dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); dl_se->dl_throttled = 0; dl_se->dl_new = 1; + dl_se->dl_yielded = 0; } static void __setscheduler_params(struct task_struct *p, -- cgit v1.1 From 6ccdc84b81a0a6c09a7f0427761d2f8cecfc2218 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 24 Apr 2014 12:00:47 +0200 Subject: sched: Skip double execution of pick_next_task_fair() Tim wrote: "The current code will call pick_next_task_fair a second time in the slow path if we did not pull any task in our first try. This is really unnecessary as we already know no task can be pulled and it doubles the delay for the cpu to enter idle. We instrumented some network workloads and that saw that pick_next_task_fair is frequently called twice before a cpu enters idle. The call to pick_next_task_fair can add non trivial latency as it calls load_balance which runs find_busiest_group on an hierarchy of sched domains spanning the cpus for a large system. For some 4 socket systems, we saw almost 0.25 msec spent per call of pick_next_task_fair before a cpu can be idled." Optimize the second call away for the common case and document the dependency. Reported-by: Tim Chen Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Len Brown Link: http://lkml.kernel.org/r/20140424100047.GP11096@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel/sched/core.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e62c65a..28921ec 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2592,8 +2592,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev) if (likely(prev->sched_class == class && rq->nr_running == rq->cfs.h_nr_running)) { p = fair_sched_class.pick_next_task(rq, prev); - if (likely(p && p != RETRY_TASK)) - return p; + if (unlikely(p == RETRY_TASK)) + goto again; + + /* assumes fair_sched_class->next == idle_sched_class */ + if (unlikely(!p)) + p = idle_sched_class.pick_next_task(rq, prev); + + return p; } again: -- cgit v1.1 From 2b4cfe64dee0d84506b951d81bf55d9891744d25 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Wed, 23 Apr 2014 18:30:34 -0700 Subject: sched/numa: Initialize newidle balance stats in sd_numa_init() Also initialize the per-sd variables for newidle load balancing in sd_numa_init(). Signed-off-by: Jason Low Acked-by: morten.rasmussen@arm.com Cc: daniel.lezcano@linaro.org Cc: alex.shi@linaro.org Cc: preeti@linux.vnet.ibm.com Cc: efault@gmx.de Cc: vincent.guittot@linaro.org Cc: aswin@hp.com Cc: chegu_vinod@hp.com Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1398303035-18255-3-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/sched/core.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 28921ec..13584f1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6026,6 +6026,8 @@ sd_numa_init(struct sched_domain_topology_level *tl, int cpu) , .last_balance = jiffies, .balance_interval = sd_weight, + .max_newidle_lb_cost = 0, + .next_decay_max_lb_cost = jiffies, }; SD_INIT_NAME(sd, NUMA); sd->private = &tl->data; -- cgit v1.1 From 143cf23df25b7082cd706c3c53188e741e7881c3 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Fri, 9 May 2014 16:54:15 +0200 Subject: sched: Make sched_setattr() correctly return -EFBIG The documented[1] behavior of sched_attr() in the proposed man page text is: sched_attr::size must be set to the size of the structure, as in sizeof(struct sched_attr), if the provided structure is smaller than the kernel structure, any additional fields are assumed '0'. If the provided structure is larger than the kernel structure, the kernel verifies all additional fields are '0' if not the syscall will fail with -E2BIG. As currently implemented, sched_copy_attr() returns -EFBIG for for this case, but the logic in sys_sched_setattr() converts that error to -EFAULT. This patch fixes the behavior. [1] http://thread.gmane.org/gmane.linux.kernel/1615615/focus=1697760 Signed-off-by: Michael Kerrisk Signed-off-by: Peter Zijlstra Cc: Cc: Linus Torvalds Link: http://lkml.kernel.org/r/536CEC17.9070903@gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/sched/core.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 13584f1..f2205f0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3658,8 +3658,9 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, if (!uattr || pid < 0 || flags) return -EINVAL; - if (sched_copy_attr(uattr, &attr)) - return -EFAULT; + retval = sched_copy_attr(uattr, &attr); + if (retval) + return retval; rcu_read_lock(); retval = -ESRCH; -- cgit v1.1 From dbdb22754fde671dc93d2fae06f8be113d47f2fb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 May 2014 10:49:03 +0200 Subject: sched: Disallow sched_attr::sched_policy < 0 The scheduler uses policy=-1 to preserve the current policy state to implement sys_sched_setparam(), this got exposed to userspace by accident through sys_sched_setattr(), cure this. Reported-by: Michael Kerrisk Signed-off-by: Peter Zijlstra Acked-by: Michael Kerrisk Cc: Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20140509085311.GJ30445@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/sched/core.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f2205f0..cdefcf7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3662,6 +3662,9 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, if (retval) return retval; + if (attr.sched_policy < 0) + return -EINVAL; + rcu_read_lock(); retval = -ESRCH; p = find_process_by_pid(pid); -- cgit v1.1 From ce5f7f8200ca2504f6f290044393d73ca314965a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 12 May 2014 22:50:34 +0200 Subject: sched/deadline: Change sched_getparam() behaviour vs SCHED_DEADLINE The way we read POSIX one should only call sched_getparam() when sched_getscheduler() returns either SCHED_FIFO or SCHED_RR. Given that we currently return sched_param::sched_priority=0 for all others, extend the same behaviour to SCHED_DEADLINE. Requested-by: Michael Kerrisk Signed-off-by: Peter Zijlstra Acked-by: Michael Kerrisk Cc: Dario Faggioli Cc: linux-man Cc: "Michael Kerrisk (man-pages)" Cc: Juri Lelli Cc: Linus Torvalds Cc: Link: http://lkml.kernel.org/r/20140512205034.GH13467@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'kernel/sched/core.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index cdefcf7..f3f08bf 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3713,7 +3713,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) */ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) { - struct sched_param lp; + struct sched_param lp = { .sched_priority = 0 }; struct task_struct *p; int retval; @@ -3730,11 +3730,8 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) if (retval) goto out_unlock; - if (task_has_dl_policy(p)) { - retval = -EINVAL; - goto out_unlock; - } - lp.sched_priority = p->rt_priority; + if (task_has_rt_policy(p)) + lp.sched_priority = p->rt_priority; rcu_read_unlock(); /* -- cgit v1.1 From b0827819b0da4acfbc1df1e05edcf50efd07cbd1 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Tue, 13 May 2014 14:11:31 +0200 Subject: sched/deadline: Restrict user params max value to 2^63 ns Michael Kerrisk noticed that creating SCHED_DEADLINE reservations with certain parameters (e.g, a runtime of something near 2^64 ns) can cause a system freeze for some amount of time. The problem is that in the interface we have u64 sched_runtime; while internally we need to have a signed runtime (to cope with budget overruns) s64 runtime; At the time we setup a new dl_entity we copy the first value in the second. The cast turns out with negative values when sched_runtime is too big, and this causes the scheduler to go crazy right from the start. Moreover, considering how we deal with deadlines wraparound (s64)(a - b) < 0 we also have to restrict acceptable values for sched_{deadline,period}. This patch fixes the thing checking that user parameters are always below 2^63 ns (still large enough for everyone). It also rewrites other conditions that we check, since in __checkparam_dl we don't have to deal with deadline wraparounds and what we have now erroneously fails when the difference between values is too big. Reported-by: Michael Kerrisk Suggested-by: Peter Zijlstra Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra Cc: Cc: Dario Faggioli Cc: Dave Jones Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20140513141131.20d944f81633ee937f256385@gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) (limited to 'kernel/sched/core.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f3f08bf..44e00ab 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3195,17 +3195,40 @@ __getparam_dl(struct task_struct *p, struct sched_attr *attr) * We ask for the deadline not being zero, and greater or equal * than the runtime, as well as the period of being zero or * greater than deadline. Furthermore, we have to be sure that - * user parameters are above the internal resolution (1us); we - * check sched_runtime only since it is always the smaller one. + * user parameters are above the internal resolution of 1us (we + * check sched_runtime only since it is always the smaller one) and + * below 2^63 ns (we have to check both sched_deadline and + * sched_period, as the latter can be zero). */ static bool __checkparam_dl(const struct sched_attr *attr) { - return attr && attr->sched_deadline != 0 && - (attr->sched_period == 0 || - (s64)(attr->sched_period - attr->sched_deadline) >= 0) && - (s64)(attr->sched_deadline - attr->sched_runtime ) >= 0 && - attr->sched_runtime >= (2 << (DL_SCALE - 1)); + /* deadline != 0 */ + if (attr->sched_deadline == 0) + return false; + + /* + * Since we truncate DL_SCALE bits, make sure we're at least + * that big. + */ + if (attr->sched_runtime < (1ULL << DL_SCALE)) + return false; + + /* + * Since we use the MSB for wrap-around and sign issues, make + * sure it's not set (mind that period can be equal to zero). + */ + if (attr->sched_deadline & (1ULL << 63) || + attr->sched_period & (1ULL << 63)) + return false; + + /* runtime <= deadline <= period (if period != 0) */ + if ((attr->sched_period != 0 && + attr->sched_period < attr->sched_deadline) || + attr->sched_deadline < attr->sched_runtime) + return false; + + return true; } /* -- cgit v1.1 From 6acbfb96976fc3350e30d964acb1dbbdf876d55e Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 16 May 2014 11:50:42 +0800 Subject: sched: Fix hotplug vs. set_cpus_allowed_ptr() Lai found that: WARNING: CPU: 1 PID: 13 at arch/x86/kernel/smp.c:124 native_smp_send_reschedule+0x2d/0x4b() ... migration_cpu_stop+0x1d/0x22 was caused by set_cpus_allowed_ptr() assuming that cpu_active_mask is always a sub-set of cpu_online_mask. This isn't true since 5fbd036b552f ("sched: Cleanup cpu_active madness"). So set active and online at the same time to avoid this particular problem. Fixes: 5fbd036b552f ("sched: Cleanup cpu_active madness") Signed-off-by: Lai Jiangshan Signed-off-by: Peter Zijlstra Cc: Andrew Morton Cc: Gautham R. Shenoy Cc: Linus Torvalds Cc: Michael wang Cc: Paul Gortmaker Cc: Rafael J. Wysocki Cc: Srivatsa S. Bhat Cc: Toshi Kani Link: http://lkml.kernel.org/r/53758B12.8060609@cn.fujitsu.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/sched/core.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 44e00ab..86f3890 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5076,7 +5076,6 @@ static int sched_cpu_active(struct notifier_block *nfb, unsigned long action, void *hcpu) { switch (action & ~CPU_TASKS_FROZEN) { - case CPU_STARTING: case CPU_DOWN_FAILED: set_cpu_active((long)hcpu, true); return NOTIFY_OK; -- cgit v1.1