summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c35
-rw-r--r--kernel/exit.c14
-rw-r--r--kernel/posix-timers.c8
-rw-r--r--kernel/sched.c64
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/workqueue.c2
7 files changed, 86 insertions, 43 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2a75e44..fe2f71f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1554,7 +1554,7 @@ struct ctr_struct {
* when reading out p->cpuset, as we don't really care if it changes
* on the next cycle, and we are not going to try to dereference it.
*/
-static inline int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs)
+static int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs)
{
int n = 0;
struct task_struct *g, *p;
@@ -2150,6 +2150,33 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
}
/**
+ * cpuset_lock - lock out any changes to cpuset structures
+ *
+ * The out of memory (oom) code needs to lock down cpusets
+ * from being changed while it scans the tasklist looking for a
+ * task in an overlapping cpuset. Expose callback_sem via this
+ * cpuset_lock() routine, so the oom code can lock it, before
+ * locking the task list. The tasklist_lock is a spinlock, so
+ * must be taken inside callback_sem.
+ */
+
+void cpuset_lock(void)
+{
+ down(&callback_sem);
+}
+
+/**
+ * cpuset_unlock - release lock on cpuset changes
+ *
+ * Undo the lock taken in a previous cpuset_lock() call.
+ */
+
+void cpuset_unlock(void)
+{
+ up(&callback_sem);
+}
+
+/**
* cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors?
* @p: pointer to task_struct of some other task.
*
@@ -2158,7 +2185,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
* determine if task @p's memory usage might impact the memory
* available to the current task.
*
- * Acquires callback_sem - not suitable for calling from a fast path.
+ * Call while holding callback_sem.
**/
int cpuset_excl_nodes_overlap(const struct task_struct *p)
@@ -2166,8 +2193,6 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
int overlap = 0; /* do cpusets overlap? */
- down(&callback_sem);
-
task_lock(current);
if (current->flags & PF_EXITING) {
task_unlock(current);
@@ -2186,8 +2211,6 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
done:
- up(&callback_sem);
-
return overlap;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index f8e609f..93cee36 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -193,7 +193,7 @@ int is_orphaned_pgrp(int pgrp)
return retval;
}
-static inline int has_stopped_jobs(int pgrp)
+static int has_stopped_jobs(int pgrp)
{
int retval = 0;
struct task_struct *p;
@@ -230,7 +230,7 @@ static inline int has_stopped_jobs(int pgrp)
*
* NOTE that reparent_to_init() gives the caller full capabilities.
*/
-static inline void reparent_to_init(void)
+static void reparent_to_init(void)
{
write_lock_irq(&tasklist_lock);
@@ -244,7 +244,9 @@ static inline void reparent_to_init(void)
/* Set the exit signal to SIGCHLD so we signal init on exit */
current->exit_signal = SIGCHLD;
- if ((current->policy == SCHED_NORMAL) && (task_nice(current) < 0))
+ if ((current->policy == SCHED_NORMAL ||
+ current->policy == SCHED_BATCH)
+ && (task_nice(current) < 0))
set_user_nice(current, 0);
/* cpus_allowed? */
/* rt_priority? */
@@ -367,7 +369,7 @@ void daemonize(const char *name, ...)
EXPORT_SYMBOL(daemonize);
-static inline void close_files(struct files_struct * files)
+static void close_files(struct files_struct * files)
{
int i, j;
struct fdtable *fdt;
@@ -541,7 +543,7 @@ static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_re
p->real_parent = reaper;
}
-static inline void reparent_thread(task_t *p, task_t *father, int traced)
+static void reparent_thread(task_t *p, task_t *father, int traced)
{
/* We don't want people slaying init. */
if (p->exit_signal != -1)
@@ -605,7 +607,7 @@ static inline void reparent_thread(task_t *p, task_t *father, int traced)
* group, and if no such member exists, give it to
* the global child reaper process (ie "init")
*/
-static inline void forget_original_parent(struct task_struct * father,
+static void forget_original_parent(struct task_struct * father,
struct list_head *to_release)
{
struct task_struct *p, *reaper = father;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 9e66e61..197208b 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -192,7 +192,7 @@ static inline int common_clock_set(const clockid_t which_clock,
return do_sys_settimeofday(tp, NULL);
}
-static inline int common_timer_create(struct k_itimer *new_timer)
+static int common_timer_create(struct k_itimer *new_timer)
{
hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock);
new_timer->it.real.timer.data = new_timer;
@@ -361,7 +361,7 @@ static int posix_timer_fn(void *data)
return ret;
}
-static inline struct task_struct * good_sigevent(sigevent_t * event)
+static struct task_struct * good_sigevent(sigevent_t * event)
{
struct task_struct *rtn = current->group_leader;
@@ -687,7 +687,7 @@ sys_timer_getoverrun(timer_t timer_id)
/* Set a POSIX.1b interval timer. */
/* timr->it_lock is taken. */
-static inline int
+static int
common_timer_set(struct k_itimer *timr, int flags,
struct itimerspec *new_setting, struct itimerspec *old_setting)
{
@@ -829,7 +829,7 @@ retry_delete:
/*
* return timer owned by the process, used by exit_itimers
*/
-static inline void itimer_delete(struct k_itimer *timer)
+static void itimer_delete(struct k_itimer *timer)
{
unsigned long flags;
diff --git a/kernel/sched.c b/kernel/sched.c
index c9dec2a..788ecce 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -521,7 +521,7 @@ static inline void sched_info_dequeued(task_t *t)
* long it was waiting to run. We also note when it began so that we
* can keep stats on how long its timeslice is.
*/
-static inline void sched_info_arrive(task_t *t)
+static void sched_info_arrive(task_t *t)
{
unsigned long now = jiffies, diff = 0;
struct runqueue *rq = task_rq(t);
@@ -748,10 +748,14 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
unsigned long long __sleep_time = now - p->timestamp;
unsigned long sleep_time;
- if (__sleep_time > NS_MAX_SLEEP_AVG)
- sleep_time = NS_MAX_SLEEP_AVG;
- else
- sleep_time = (unsigned long)__sleep_time;
+ if (unlikely(p->policy == SCHED_BATCH))
+ sleep_time = 0;
+ else {
+ if (__sleep_time > NS_MAX_SLEEP_AVG)
+ sleep_time = NS_MAX_SLEEP_AVG;
+ else
+ sleep_time = (unsigned long)__sleep_time;
+ }
if (likely(sleep_time > 0)) {
/*
@@ -1003,7 +1007,7 @@ void kick_process(task_t *p)
* We want to under-estimate the load of migration sources, to
* balance conservatively.
*/
-static inline unsigned long __source_load(int cpu, int type, enum idle_type idle)
+static unsigned long __source_load(int cpu, int type, enum idle_type idle)
{
runqueue_t *rq = cpu_rq(cpu);
unsigned long running = rq->nr_running;
@@ -1866,7 +1870,7 @@ void sched_exec(void)
* pull_task - move a task from a remote runqueue to the local runqueue.
* Both runqueues must be locked.
*/
-static inline
+static
void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
{
@@ -1888,7 +1892,7 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
/*
* can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
*/
-static inline
+static
int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
struct sched_domain *sd, enum idle_type idle,
int *all_pinned)
@@ -2374,7 +2378,7 @@ out_balanced:
* idle_balance is called by schedule() if this_cpu is about to become
* idle. Attempts to pull tasks from other CPUs.
*/
-static inline void idle_balance(int this_cpu, runqueue_t *this_rq)
+static void idle_balance(int this_cpu, runqueue_t *this_rq)
{
struct sched_domain *sd;
@@ -2758,7 +2762,7 @@ static inline void wakeup_busy_runqueue(runqueue_t *rq)
resched_task(rq->idle);
}
-static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
+static void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
{
struct sched_domain *tmp, *sd = NULL;
cpumask_t sibling_map;
@@ -2812,7 +2816,7 @@ static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)
return p->time_slice * (100 - sd->per_cpu_gain) / 100;
}
-static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
+static int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
{
struct sched_domain *tmp, *sd = NULL;
cpumask_t sibling_map;
@@ -3560,7 +3564,7 @@ void set_user_nice(task_t *p, long nice)
* The RT priorities are set via sched_setscheduler(), but we still
* allow the 'normal' nice value to be set - but as expected
* it wont have any effect on scheduling until the task is
- * not SCHED_NORMAL:
+ * not SCHED_NORMAL/SCHED_BATCH:
*/
if (rt_task(p)) {
p->static_prio = NICE_TO_PRIO(nice);
@@ -3706,10 +3710,16 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
BUG_ON(p->array);
p->policy = policy;
p->rt_priority = prio;
- if (policy != SCHED_NORMAL)
+ if (policy != SCHED_NORMAL && policy != SCHED_BATCH) {
p->prio = MAX_RT_PRIO-1 - p->rt_priority;
- else
+ } else {
p->prio = p->static_prio;
+ /*
+ * SCHED_BATCH tasks are treated as perpetual CPU hogs:
+ */
+ if (policy == SCHED_BATCH)
+ p->sleep_avg = 0;
+ }
}
/**
@@ -3733,29 +3743,35 @@ recheck:
if (policy < 0)
policy = oldpolicy = p->policy;
else if (policy != SCHED_FIFO && policy != SCHED_RR &&
- policy != SCHED_NORMAL)
- return -EINVAL;
+ policy != SCHED_NORMAL && policy != SCHED_BATCH)
+ return -EINVAL;
/*
* Valid priorities for SCHED_FIFO and SCHED_RR are
- * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0.
+ * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
+ * SCHED_BATCH is 0.
*/
if (param->sched_priority < 0 ||
(p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
(!p->mm && param->sched_priority > MAX_RT_PRIO-1))
return -EINVAL;
- if ((policy == SCHED_NORMAL) != (param->sched_priority == 0))
+ if ((policy == SCHED_NORMAL || policy == SCHED_BATCH)
+ != (param->sched_priority == 0))
return -EINVAL;
/*
* Allow unprivileged RT tasks to decrease priority:
*/
if (!capable(CAP_SYS_NICE)) {
- /* can't change policy */
- if (policy != p->policy &&
- !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
+ /*
+ * can't change policy, except between SCHED_NORMAL
+ * and SCHED_BATCH:
+ */
+ if (((policy != SCHED_NORMAL && p->policy != SCHED_BATCH) &&
+ (policy != SCHED_BATCH && p->policy != SCHED_NORMAL)) &&
+ !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
return -EPERM;
/* can't increase priority */
- if (policy != SCHED_NORMAL &&
+ if ((policy != SCHED_NORMAL && policy != SCHED_BATCH) &&
param->sched_priority > p->rt_priority &&
param->sched_priority >
p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
@@ -4233,6 +4249,7 @@ asmlinkage long sys_sched_get_priority_max(int policy)
ret = MAX_USER_RT_PRIO-1;
break;
case SCHED_NORMAL:
+ case SCHED_BATCH:
ret = 0;
break;
}
@@ -4256,6 +4273,7 @@ asmlinkage long sys_sched_get_priority_min(int policy)
ret = 1;
break;
case SCHED_NORMAL:
+ case SCHED_BATCH:
ret = 0;
}
return ret;
@@ -5990,7 +6008,7 @@ next_sg:
* Detach sched domains from a group of cpus specified in cpu_map
* These cpus will now be attached to the NULL domain
*/
-static inline void detach_destroy_domains(const cpumask_t *cpu_map)
+static void detach_destroy_domains(const cpumask_t *cpu_map)
{
int i;
diff --git a/kernel/signal.c b/kernel/signal.c
index 1da2e74..5dafbd3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -476,7 +476,7 @@ unblock_all_signals(void)
spin_unlock_irqrestore(&current->sighand->siglock, flags);
}
-static inline int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
+static int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
{
struct sigqueue *q, *first = NULL;
int still_pending = 0;
@@ -1881,7 +1881,7 @@ do_signal_stop(int signr)
* We return zero if we still hold the siglock and should look
* for another signal without checking group_stop_count again.
*/
-static inline int handle_group_stop(void)
+static int handle_group_stop(void)
{
int stop_count;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 62d4d95..f5d69b6 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -648,7 +648,7 @@ static ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
-#if defined(CONFIG_S390)
+#if defined(CONFIG_S390) && defined(CONFIG_SMP)
{
.ctl_name = KERN_SPIN_RETRY,
.procname = "spin_retry",
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 82c4fa7..b052e2c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -147,7 +147,7 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
return ret;
}
-static inline void run_workqueue(struct cpu_workqueue_struct *cwq)
+static void run_workqueue(struct cpu_workqueue_struct *cwq)
{
unsigned long flags;
OpenPOWER on IntegriCloud