summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c41
-rw-r--r--kernel/irq/dummychip.c1
-rw-r--r--kernel/locking/rtmutex.c12
-rw-r--r--kernel/sched/core.c59
-rw-r--r--kernel/time/clockevents.c6
-rw-r--r--kernel/time/hrtimer.c14
-rw-r--r--kernel/trace/trace_output.c3
-rw-r--r--kernel/watchdog.c20
8 files changed, 94 insertions, 62 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 81aa3a4..1a3bf48 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -913,10 +913,30 @@ static void put_ctx(struct perf_event_context *ctx)
* Those places that change perf_event::ctx will hold both
* perf_event_ctx::mutex of the 'old' and 'new' ctx value.
*
- * Lock ordering is by mutex address. There is one other site where
- * perf_event_context::mutex nests and that is put_event(). But remember that
- * that is a parent<->child context relation, and migration does not affect
- * children, therefore these two orderings should not interact.
+ * Lock ordering is by mutex address. There are two other sites where
+ * perf_event_context::mutex nests and those are:
+ *
+ * - perf_event_exit_task_context() [ child , 0 ]
+ * __perf_event_exit_task()
+ * sync_child_event()
+ * put_event() [ parent, 1 ]
+ *
+ * - perf_event_init_context() [ parent, 0 ]
+ * inherit_task_group()
+ * inherit_group()
+ * inherit_event()
+ * perf_event_alloc()
+ * perf_init_event()
+ * perf_try_init_event() [ child , 1 ]
+ *
+ * While it appears there is an obvious deadlock here -- the parent and child
+ * nesting levels are inverted between the two. This is in fact safe because
+ * life-time rules separate them. That is an exiting task cannot fork, and a
+ * spawning task cannot (yet) exit.
+ *
+ * But remember that that these are parent<->child context relations, and
+ * migration does not affect children, therefore these two orderings should not
+ * interact.
*
* The change in perf_event::ctx does not affect children (as claimed above)
* because the sys_perf_event_open() case will install a new event and break
@@ -3657,9 +3677,6 @@ static void perf_remove_from_owner(struct perf_event *event)
}
}
-/*
- * Called when the last reference to the file is gone.
- */
static void put_event(struct perf_event *event)
{
struct perf_event_context *ctx;
@@ -3697,6 +3714,9 @@ int perf_event_release_kernel(struct perf_event *event)
}
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
+/*
+ * Called when the last reference to the file is gone.
+ */
static int perf_release(struct inode *inode, struct file *file)
{
put_event(file->private_data);
@@ -7364,7 +7384,12 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
return -ENODEV;
if (event->group_leader != event) {
- ctx = perf_event_ctx_lock(event->group_leader);
+ /*
+ * This ctx->mutex can nest when we're called through
+ * inheritance. See the perf_event_ctx_lock_nested() comment.
+ */
+ ctx = perf_event_ctx_lock_nested(event->group_leader,
+ SINGLE_DEPTH_NESTING);
BUG_ON(!ctx);
}
diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c
index 988dc58..2feb6fe 100644
--- a/kernel/irq/dummychip.c
+++ b/kernel/irq/dummychip.c
@@ -57,5 +57,6 @@ struct irq_chip dummy_irq_chip = {
.irq_ack = noop,
.irq_mask = noop,
.irq_unmask = noop,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
};
EXPORT_SYMBOL_GPL(dummy_irq_chip);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index b732793..b025295 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -265,15 +265,17 @@ struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
}
/*
- * Called by sched_setscheduler() to check whether the priority change
- * is overruled by a possible priority boosting.
+ * Called by sched_setscheduler() to get the priority which will be
+ * effective after the change.
*/
-int rt_mutex_check_prio(struct task_struct *task, int newprio)
+int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
{
if (!task_has_pi_waiters(task))
- return 0;
+ return newprio;
- return task_top_pi_waiter(task)->task->prio <= newprio;
+ if (task_top_pi_waiter(task)->task->prio <= newprio)
+ return task_top_pi_waiter(task)->task->prio;
+ return newprio;
}
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe22f75..1236732 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3300,15 +3300,18 @@ static void __setscheduler_params(struct task_struct *p,
/* Actually do priority change: must hold pi & rq lock. */
static void __setscheduler(struct rq *rq, struct task_struct *p,
- const struct sched_attr *attr)
+ const struct sched_attr *attr, bool keep_boost)
{
__setscheduler_params(p, attr);
/*
- * If we get here, there was no pi waiters boosting the
- * task. It is safe to use the normal prio.
+ * Keep a potential priority boosting if called from
+ * sched_setscheduler().
*/
- p->prio = normal_prio(p);
+ if (keep_boost)
+ p->prio = rt_mutex_get_effective_prio(p, normal_prio(p));
+ else
+ p->prio = normal_prio(p);
if (dl_prio(p->prio))
p->sched_class = &dl_sched_class;
@@ -3408,7 +3411,7 @@ static int __sched_setscheduler(struct task_struct *p,
int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
MAX_RT_PRIO - 1 - attr->sched_priority;
int retval, oldprio, oldpolicy = -1, queued, running;
- int policy = attr->sched_policy;
+ int new_effective_prio, policy = attr->sched_policy;
unsigned long flags;
const struct sched_class *prev_class;
struct rq *rq;
@@ -3590,15 +3593,14 @@ change:
oldprio = p->prio;
/*
- * Special case for priority boosted tasks.
- *
- * If the new priority is lower or equal (user space view)
- * than the current (boosted) priority, we just store the new
+ * Take priority boosted tasks into account. If the new
+ * effective priority is unchanged, we just store the new
* normal parameters and do not touch the scheduler class and
* the runqueue. This will be done when the task deboost
* itself.
*/
- if (rt_mutex_check_prio(p, newprio)) {
+ new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
+ if (new_effective_prio == oldprio) {
__setscheduler_params(p, attr);
task_rq_unlock(rq, p, &flags);
return 0;
@@ -3612,7 +3614,7 @@ change:
put_prev_task(rq, p);
prev_class = p->sched_class;
- __setscheduler(rq, p, attr);
+ __setscheduler(rq, p, attr, true);
if (running)
p->sched_class->set_curr_task(rq);
@@ -4387,10 +4389,7 @@ long __sched io_schedule_timeout(long timeout)
long ret;
current->in_iowait = 1;
- if (old_iowait)
- blk_schedule_flush_plug(current);
- else
- blk_flush_plug(current);
+ blk_schedule_flush_plug(current);
delayacct_blkio_start();
rq = raw_rq();
@@ -6997,27 +6996,23 @@ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
unsigned long flags;
long cpu = (long)hcpu;
struct dl_bw *dl_b;
+ bool overflow;
+ int cpus;
- switch (action & ~CPU_TASKS_FROZEN) {
+ switch (action) {
case CPU_DOWN_PREPARE:
- /* explicitly allow suspend */
- if (!(action & CPU_TASKS_FROZEN)) {
- bool overflow;
- int cpus;
-
- rcu_read_lock_sched();
- dl_b = dl_bw_of(cpu);
+ rcu_read_lock_sched();
+ dl_b = dl_bw_of(cpu);
- raw_spin_lock_irqsave(&dl_b->lock, flags);
- cpus = dl_bw_cpus(cpu);
- overflow = __dl_overflow(dl_b, cpus, 0, 0);
- raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+ raw_spin_lock_irqsave(&dl_b->lock, flags);
+ cpus = dl_bw_cpus(cpu);
+ overflow = __dl_overflow(dl_b, cpus, 0, 0);
+ raw_spin_unlock_irqrestore(&dl_b->lock, flags);
- rcu_read_unlock_sched();
+ rcu_read_unlock_sched();
- if (overflow)
- return notifier_from_errno(-EBUSY);
- }
+ if (overflow)
+ return notifier_from_errno(-EBUSY);
cpuset_update_active_cpus(false);
break;
case CPU_DOWN_PREPARE_FROZEN:
@@ -7346,7 +7341,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
queued = task_on_rq_queued(p);
if (queued)
dequeue_task(rq, p, 0);
- __setscheduler(rq, p, &attr);
+ __setscheduler(rq, p, &attr, false);
if (queued) {
enqueue_task(rq, p, 0);
resched_curr(rq);
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 11dc22a..637a094 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -117,11 +117,7 @@ static int __clockevents_set_state(struct clock_event_device *dev,
/* Transition with new state-specific callbacks */
switch (state) {
case CLOCK_EVT_STATE_DETACHED:
- /*
- * This is an internal state, which is guaranteed to go from
- * SHUTDOWN to DETACHED. No driver interaction required.
- */
- return 0;
+ /* The clockevent device is getting replaced. Shut it down. */
case CLOCK_EVT_STATE_SHUTDOWN:
return dev->set_state_shutdown(dev);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 76d4bd9..93ef7190 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -266,21 +266,23 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
/*
* Divide a ktime value by a nanosecond value
*/
-u64 __ktime_divns(const ktime_t kt, s64 div)
+s64 __ktime_divns(const ktime_t kt, s64 div)
{
- u64 dclc;
int sft = 0;
+ s64 dclc;
+ u64 tmp;
dclc = ktime_to_ns(kt);
+ tmp = dclc < 0 ? -dclc : dclc;
+
/* Make sure the divisor is less than 2^32: */
while (div >> 32) {
sft++;
div >>= 1;
}
- dclc >>= sft;
- do_div(dclc, (unsigned long) div);
-
- return dclc;
+ tmp >>= sft;
+ do_div(tmp, (unsigned long) div);
+ return dclc < 0 ? -tmp : tmp;
}
EXPORT_SYMBOL_GPL(__ktime_divns);
#endif /* BITS_PER_LONG >= 64 */
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 692bf71..25a086b 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -178,12 +178,13 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
EXPORT_SYMBOL(ftrace_print_hex_seq);
const char *
-ftrace_print_array_seq(struct trace_seq *p, const void *buf, int buf_len,
+ftrace_print_array_seq(struct trace_seq *p, const void *buf, int count,
size_t el_size)
{
const char *ret = trace_seq_buffer_ptr(p);
const char *prefix = "";
void *ptr = (void *)buf;
+ size_t buf_len = count * el_size;
trace_seq_putc(p, '{');
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 2316f50..581a68a 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -41,6 +41,8 @@
#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT)
#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
+static DEFINE_MUTEX(watchdog_proc_mutex);
+
#ifdef CONFIG_HARDLOCKUP_DETECTOR
static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
#else
@@ -608,26 +610,36 @@ void watchdog_nmi_enable_all(void)
{
int cpu;
- if (!watchdog_user_enabled)
- return;
+ mutex_lock(&watchdog_proc_mutex);
+
+ if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+ goto unlock;
get_online_cpus();
for_each_online_cpu(cpu)
watchdog_nmi_enable(cpu);
put_online_cpus();
+
+unlock:
+ mutex_unlock(&watchdog_proc_mutex);
}
void watchdog_nmi_disable_all(void)
{
int cpu;
+ mutex_lock(&watchdog_proc_mutex);
+
if (!watchdog_running)
- return;
+ goto unlock;
get_online_cpus();
for_each_online_cpu(cpu)
watchdog_nmi_disable(cpu);
put_online_cpus();
+
+unlock:
+ mutex_unlock(&watchdog_proc_mutex);
}
#else
static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
@@ -744,8 +756,6 @@ static int proc_watchdog_update(void)
}
-static DEFINE_MUTEX(watchdog_proc_mutex);
-
/*
* common function for watchdog, nmi_watchdog and soft_watchdog parameter
*
OpenPOWER on IntegriCloud