summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c114
-rw-r--r--kernel/irq/handle.c2
-rw-r--r--kernel/irq/proc.c19
-rw-r--r--kernel/locking/lockdep.c10
-rw-r--r--kernel/rcu/tree.c5
-rw-r--r--kernel/sched/core.c24
-rw-r--r--kernel/sched/sched.h5
-rw-r--r--kernel/time/clocksource.c2
8 files changed, 129 insertions, 52 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f548f69..b11756f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1243,11 +1243,7 @@ static inline void perf_event__state_init(struct perf_event *event)
PERF_EVENT_STATE_INACTIVE;
}
-/*
- * Called at perf_event creation and when events are attached/detached from a
- * group.
- */
-static void perf_event__read_size(struct perf_event *event)
+static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
{
int entry = sizeof(u64); /* value */
int size = 0;
@@ -1263,7 +1259,7 @@ static void perf_event__read_size(struct perf_event *event)
entry += sizeof(u64);
if (event->attr.read_format & PERF_FORMAT_GROUP) {
- nr += event->group_leader->nr_siblings;
+ nr += nr_siblings;
size += sizeof(u64);
}
@@ -1271,14 +1267,11 @@ static void perf_event__read_size(struct perf_event *event)
event->read_size = size;
}
-static void perf_event__header_size(struct perf_event *event)
+static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
{
struct perf_sample_data *data;
- u64 sample_type = event->attr.sample_type;
u16 size = 0;
- perf_event__read_size(event);
-
if (sample_type & PERF_SAMPLE_IP)
size += sizeof(data->ip);
@@ -1303,6 +1296,17 @@ static void perf_event__header_size(struct perf_event *event)
event->header_size = size;
}
+/*
+ * Called at perf_event creation and when events are attached/detached from a
+ * group.
+ */
+static void perf_event__header_size(struct perf_event *event)
+{
+ __perf_event_read_size(event,
+ event->group_leader->nr_siblings);
+ __perf_event_header_size(event, event->attr.sample_type);
+}
+
static void perf_event__id_header_size(struct perf_event *event)
{
struct perf_sample_data *data;
@@ -1330,6 +1334,27 @@ static void perf_event__id_header_size(struct perf_event *event)
event->id_header_size = size;
}
+static bool perf_event_validate_size(struct perf_event *event)
+{
+ /*
+ * The values computed here will be over-written when we actually
+ * attach the event.
+ */
+ __perf_event_read_size(event, event->group_leader->nr_siblings + 1);
+ __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ);
+ perf_event__id_header_size(event);
+
+ /*
+ * Sum the lot; should not exceed the 64k limit we have on records.
+ * Conservative limit to allow for callchains and other variable fields.
+ */
+ if (event->read_size + event->header_size +
+ event->id_header_size + sizeof(struct perf_event_header) >= 16*1024)
+ return false;
+
+ return true;
+}
+
static void perf_group_attach(struct perf_event *event)
{
struct perf_event *group_leader = event->group_leader, *pos;
@@ -8297,13 +8322,35 @@ SYSCALL_DEFINE5(perf_event_open,
if (move_group) {
gctx = group_leader->ctx;
+ mutex_lock_double(&gctx->mutex, &ctx->mutex);
+ } else {
+ mutex_lock(&ctx->mutex);
+ }
+ if (!perf_event_validate_size(event)) {
+ err = -E2BIG;
+ goto err_locked;
+ }
+
+ /*
+ * Must be under the same ctx::mutex as perf_install_in_context(),
+ * because we need to serialize with concurrent event creation.
+ */
+ if (!exclusive_event_installable(event, ctx)) {
+ /* exclusive and group stuff are assumed mutually exclusive */
+ WARN_ON_ONCE(move_group);
+
+ err = -EBUSY;
+ goto err_locked;
+ }
+
+ WARN_ON_ONCE(ctx->parent_ctx);
+
+ if (move_group) {
/*
* See perf_event_ctx_lock() for comments on the details
* of swizzling perf_event::ctx.
*/
- mutex_lock_double(&gctx->mutex, &ctx->mutex);
-
perf_remove_from_context(group_leader, false);
list_for_each_entry(sibling, &group_leader->sibling_list,
@@ -8311,13 +8358,7 @@ SYSCALL_DEFINE5(perf_event_open,
perf_remove_from_context(sibling, false);
put_ctx(gctx);
}
- } else {
- mutex_lock(&ctx->mutex);
- }
- WARN_ON_ONCE(ctx->parent_ctx);
-
- if (move_group) {
/*
* Wait for everybody to stop referencing the events through
* the old lists, before installing it on new lists.
@@ -8349,22 +8390,29 @@ SYSCALL_DEFINE5(perf_event_open,
perf_event__state_init(group_leader);
perf_install_in_context(ctx, group_leader, group_leader->cpu);
get_ctx(ctx);
- }
- if (!exclusive_event_installable(event, ctx)) {
- err = -EBUSY;
- mutex_unlock(&ctx->mutex);
- fput(event_file);
- goto err_context;
+ /*
+ * Now that all events are installed in @ctx, nothing
+ * references @gctx anymore, so drop the last reference we have
+ * on it.
+ */
+ put_ctx(gctx);
}
+ /*
+ * Precalculate sample_data sizes; do while holding ctx::mutex such
+ * that we're serialized against further additions and before
+ * perf_install_in_context() which is the point the event is active and
+ * can use these values.
+ */
+ perf_event__header_size(event);
+ perf_event__id_header_size(event);
+
perf_install_in_context(ctx, event, event->cpu);
perf_unpin_context(ctx);
- if (move_group) {
+ if (move_group)
mutex_unlock(&gctx->mutex);
- put_ctx(gctx);
- }
mutex_unlock(&ctx->mutex);
put_online_cpus();
@@ -8376,12 +8424,6 @@ SYSCALL_DEFINE5(perf_event_open,
mutex_unlock(&current->perf_event_mutex);
/*
- * Precalculate sample_data sizes
- */
- perf_event__header_size(event);
- perf_event__id_header_size(event);
-
- /*
* Drop the reference on the group_event after placing the
* new event on the sibling_list. This ensures destruction
* of the group leader will find the pointer to itself in
@@ -8391,6 +8433,12 @@ SYSCALL_DEFINE5(perf_event_open,
fd_install(event_fd, event_file);
return event_fd;
+err_locked:
+ if (move_group)
+ mutex_unlock(&gctx->mutex);
+ mutex_unlock(&ctx->mutex);
+/* err_file: */
+ fput(event_file);
err_context:
perf_unpin_context(ctx);
put_ctx(ctx);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index de41a68..e25a83b 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -22,7 +22,6 @@
/**
* handle_bad_irq - handle spurious and unhandled irqs
- * @irq: the interrupt number
* @desc: description of the interrupt
*
* Handles spurious and unhandled IRQ's. It also prints a debugmessage.
@@ -35,6 +34,7 @@ void handle_bad_irq(struct irq_desc *desc)
kstat_incr_irqs_this_cpu(desc);
ack_bad_irq(irq);
}
+EXPORT_SYMBOL_GPL(handle_bad_irq);
/*
* Special, empty irq handler:
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index e3a8c95..a50ddc9 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -12,6 +12,7 @@
#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <linux/mutex.h>
#include "internals.h"
@@ -323,18 +324,29 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
void register_irq_proc(unsigned int irq, struct irq_desc *desc)
{
+ static DEFINE_MUTEX(register_lock);
char name [MAX_NAMELEN];
- if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip) || desc->dir)
+ if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip))
return;
+ /*
+ * irq directories are registered only when a handler is
+ * added, not when the descriptor is created, so multiple
+ * tasks might try to register at the same time.
+ */
+ mutex_lock(&register_lock);
+
+ if (desc->dir)
+ goto out_unlock;
+
memset(name, 0, MAX_NAMELEN);
sprintf(name, "%d", irq);
/* create /proc/irq/1234 */
desc->dir = proc_mkdir(name, root_irq_dir);
if (!desc->dir)
- return;
+ goto out_unlock;
#ifdef CONFIG_SMP
/* create /proc/irq/<irq>/smp_affinity */
@@ -355,6 +367,9 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
proc_create_data("spurious", 0444, desc->dir,
&irq_spurious_proc_fops, (void *)(long)irq);
+
+out_unlock:
+ mutex_unlock(&register_lock);
}
void unregister_irq_proc(unsigned int irq, struct irq_desc *desc)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 8acfbf7..4e49cc4 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3068,7 +3068,7 @@ static int __lock_is_held(struct lockdep_map *lock);
static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
int trylock, int read, int check, int hardirqs_off,
struct lockdep_map *nest_lock, unsigned long ip,
- int references)
+ int references, int pin_count)
{
struct task_struct *curr = current;
struct lock_class *class = NULL;
@@ -3157,7 +3157,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
hlock->waittime_stamp = 0;
hlock->holdtime_stamp = lockstat_clock();
#endif
- hlock->pin_count = 0;
+ hlock->pin_count = pin_count;
if (check && !mark_irqflags(curr, hlock))
return 0;
@@ -3343,7 +3343,7 @@ found_it:
hlock_class(hlock)->subclass, hlock->trylock,
hlock->read, hlock->check, hlock->hardirqs_off,
hlock->nest_lock, hlock->acquire_ip,
- hlock->references))
+ hlock->references, hlock->pin_count))
return 0;
}
@@ -3433,7 +3433,7 @@ found_it:
hlock_class(hlock)->subclass, hlock->trylock,
hlock->read, hlock->check, hlock->hardirqs_off,
hlock->nest_lock, hlock->acquire_ip,
- hlock->references))
+ hlock->references, hlock->pin_count))
return 0;
}
@@ -3583,7 +3583,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
current->lockdep_recursion = 1;
trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
__lock_acquire(lock, subclass, trylock, read, check,
- irqs_disabled_flags(flags), nest_lock, ip, 0);
+ irqs_disabled_flags(flags), nest_lock, ip, 0, 0);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 9f75f25..775d36c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3868,6 +3868,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
static void __init
rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
{
+ static struct lock_class_key rcu_exp_sched_rdp_class;
unsigned long flags;
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
struct rcu_node *rnp = rcu_get_root(rsp);
@@ -3883,6 +3884,10 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
mutex_init(&rdp->exp_funnel_mutex);
rcu_boot_init_nocb_percpu_data(rdp);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ if (rsp == &rcu_sched_state)
+ lockdep_set_class_and_name(&rdp->exp_funnel_mutex,
+ &rcu_exp_sched_rdp_class,
+ "rcu_data_exp_sched");
}
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2f9c928..10a8faa 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2517,11 +2517,11 @@ static struct rq *finish_task_switch(struct task_struct *prev)
* If a task dies, then it sets TASK_DEAD in tsk->state and calls
* schedule one last time. The schedule call will never return, and
* the scheduled task must drop that reference.
- * The test for TASK_DEAD must occur while the runqueue locks are
- * still held, otherwise prev could be scheduled on another cpu, die
- * there before we look at prev->state, and then the reference would
- * be dropped twice.
- * Manfred Spraul <manfred@colorfullife.com>
+ *
+ * We must observe prev->state before clearing prev->on_cpu (in
+ * finish_lock_switch), otherwise a concurrent wakeup can get prev
+ * running on another CPU and we could rave with its RUNNING -> DEAD
+ * transition, resulting in a double drop.
*/
prev_state = prev->state;
vtime_task_switch(prev);
@@ -4934,7 +4934,15 @@ void init_idle(struct task_struct *idle, int cpu)
idle->state = TASK_RUNNING;
idle->se.exec_start = sched_clock();
- do_set_cpus_allowed(idle, cpumask_of(cpu));
+#ifdef CONFIG_SMP
+ /*
+ * Its possible that init_idle() gets called multiple times on a task,
+ * in that case do_set_cpus_allowed() will not do the right thing.
+ *
+ * And since this is boot we can forgo the serialization.
+ */
+ set_cpus_allowed_common(idle, cpumask_of(cpu));
+#endif
/*
* We're having a chicken and egg problem, even though we are
* holding rq->lock, the cpu isn't yet set to this cpu so the
@@ -4951,7 +4959,7 @@ void init_idle(struct task_struct *idle, int cpu)
rq->curr = rq->idle = idle;
idle->on_rq = TASK_ON_RQ_QUEUED;
-#if defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
idle->on_cpu = 1;
#endif
raw_spin_unlock(&rq->lock);
@@ -4966,7 +4974,7 @@ void init_idle(struct task_struct *idle, int cpu)
idle->sched_class = &idle_sched_class;
ftrace_graph_init_idle_task(idle, cpu);
vtime_init_idle(idle, cpu);
-#if defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
#endif
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 68cda11..6d2a119 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1078,9 +1078,10 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
* After ->on_cpu is cleared, the task can be moved to a different CPU.
* We must ensure this doesn't happen until the switch is completely
* finished.
+ *
+ * Pairs with the control dependency and rmb in try_to_wake_up().
*/
- smp_wmb();
- prev->on_cpu = 0;
+ smp_store_release(&prev->on_cpu, 0);
#endif
#ifdef CONFIG_DEBUG_SPINLOCK
/* this is a valid case when another task releases the spinlock */
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 841b72f..3a38775 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -217,7 +217,7 @@ static void clocksource_watchdog(unsigned long data)
continue;
/* Check the deviation from the watchdog clocksource. */
- if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
+ if (abs64(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n",
cs->name);
pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
OpenPOWER on IntegriCloud