diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 241 | ||||
-rw-r--r-- | kernel/events/core.c | 106 | ||||
-rw-r--r-- | kernel/jump_label.c | 20 | ||||
-rw-r--r-- | kernel/kprobes.c | 59 | ||||
-rw-r--r-- | kernel/padata.c | 43 | ||||
-rw-r--r-- | kernel/stop_machine.c | 11 |
6 files changed, 244 insertions, 236 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index b86b32e..b03a325 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -27,6 +27,7 @@ #include <linux/smpboot.h> #include <linux/relay.h> #include <linux/slab.h> +#include <linux/percpu-rwsem.h> #include <trace/events/power.h> #define CREATE_TRACE_POINTS @@ -65,6 +66,12 @@ struct cpuhp_cpu_state { static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state); +#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP) +static struct lock_class_key cpuhp_state_key; +static struct lockdep_map cpuhp_state_lock_map = + STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key); +#endif + /** * cpuhp_step - Hotplug state machine step * @name: Name of the step @@ -196,121 +203,41 @@ void cpu_maps_update_done(void) mutex_unlock(&cpu_add_remove_lock); } -/* If set, cpu_up and cpu_down will return -EBUSY and do nothing. +/* + * If set, cpu_up and cpu_down will return -EBUSY and do nothing. * Should always be manipulated under cpu_add_remove_lock */ static int cpu_hotplug_disabled; #ifdef CONFIG_HOTPLUG_CPU -static struct { - struct task_struct *active_writer; - /* wait queue to wake up the active_writer */ - wait_queue_head_t wq; - /* verifies that no writer will get active while readers are active */ - struct mutex lock; - /* - * Also blocks the new readers during - * an ongoing cpu hotplug operation. - */ - atomic_t refcount; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -} cpu_hotplug = { - .active_writer = NULL, - .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), - .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), -#ifdef CONFIG_DEBUG_LOCK_ALLOC - .dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map), -#endif -}; - -/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */ -#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map) -#define cpuhp_lock_acquire_tryread() \ - lock_map_acquire_tryread(&cpu_hotplug.dep_map) -#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) -#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) +DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock); - -void get_online_cpus(void) +void cpus_read_lock(void) { - might_sleep(); - if (cpu_hotplug.active_writer == current) - return; - cpuhp_lock_acquire_read(); - mutex_lock(&cpu_hotplug.lock); - atomic_inc(&cpu_hotplug.refcount); - mutex_unlock(&cpu_hotplug.lock); + percpu_down_read(&cpu_hotplug_lock); } -EXPORT_SYMBOL_GPL(get_online_cpus); +EXPORT_SYMBOL_GPL(cpus_read_lock); -void put_online_cpus(void) +void cpus_read_unlock(void) { - int refcount; - - if (cpu_hotplug.active_writer == current) - return; - - refcount = atomic_dec_return(&cpu_hotplug.refcount); - if (WARN_ON(refcount < 0)) /* try to fix things up */ - atomic_inc(&cpu_hotplug.refcount); - - if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq)) - wake_up(&cpu_hotplug.wq); - - cpuhp_lock_release(); - + percpu_up_read(&cpu_hotplug_lock); } -EXPORT_SYMBOL_GPL(put_online_cpus); +EXPORT_SYMBOL_GPL(cpus_read_unlock); -/* - * This ensures that the hotplug operation can begin only when the - * refcount goes to zero. - * - * Note that during a cpu-hotplug operation, the new readers, if any, - * will be blocked by the cpu_hotplug.lock - * - * Since cpu_hotplug_begin() is always called after invoking - * cpu_maps_update_begin(), we can be sure that only one writer is active. - * - * Note that theoretically, there is a possibility of a livelock: - * - Refcount goes to zero, last reader wakes up the sleeping - * writer. - * - Last reader unlocks the cpu_hotplug.lock. - * - A new reader arrives at this moment, bumps up the refcount. - * - The writer acquires the cpu_hotplug.lock finds the refcount - * non zero and goes to sleep again. - * - * However, this is very difficult to achieve in practice since - * get_online_cpus() not an api which is called all that often. - * - */ -void cpu_hotplug_begin(void) +void cpus_write_lock(void) { - DEFINE_WAIT(wait); - - cpu_hotplug.active_writer = current; - cpuhp_lock_acquire(); + percpu_down_write(&cpu_hotplug_lock); +} - for (;;) { - mutex_lock(&cpu_hotplug.lock); - prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE); - if (likely(!atomic_read(&cpu_hotplug.refcount))) - break; - mutex_unlock(&cpu_hotplug.lock); - schedule(); - } - finish_wait(&cpu_hotplug.wq, &wait); +void cpus_write_unlock(void) +{ + percpu_up_write(&cpu_hotplug_lock); } -void cpu_hotplug_done(void) +void lockdep_assert_cpus_held(void) { - cpu_hotplug.active_writer = NULL; - mutex_unlock(&cpu_hotplug.lock); - cpuhp_lock_release(); + percpu_rwsem_assert_held(&cpu_hotplug_lock); } /* @@ -344,8 +271,6 @@ void cpu_hotplug_enable(void) EXPORT_SYMBOL_GPL(cpu_hotplug_enable); #endif /* CONFIG_HOTPLUG_CPU */ -/* Notifier wrappers for transitioning to state machine */ - static int bringup_wait_for_ap(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); @@ -484,6 +409,7 @@ static void cpuhp_thread_fun(unsigned int cpu) st->should_run = false; + lock_map_acquire(&cpuhp_state_lock_map); /* Single callback invocation for [un]install ? */ if (st->single) { if (st->cb_state < CPUHP_AP_ONLINE) { @@ -510,6 +436,7 @@ static void cpuhp_thread_fun(unsigned int cpu) else if (st->state > st->target) ret = cpuhp_ap_offline(cpu, st); } + lock_map_release(&cpuhp_state_lock_map); st->result = ret; complete(&st->done); } @@ -524,6 +451,9 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup, if (!cpu_online(cpu)) return 0; + lock_map_acquire(&cpuhp_state_lock_map); + lock_map_release(&cpuhp_state_lock_map); + /* * If we are up and running, use the hotplug thread. For early calls * we invoke the thread function directly. @@ -567,6 +497,8 @@ static int cpuhp_kick_ap_work(unsigned int cpu) enum cpuhp_state state = st->state; trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work); + lock_map_acquire(&cpuhp_state_lock_map); + lock_map_release(&cpuhp_state_lock_map); __cpuhp_kick_ap_work(st); wait_for_completion(&st->done); trace_cpuhp_exit(cpu, st->state, state, st->result); @@ -630,30 +562,6 @@ void clear_tasks_mm_cpumask(int cpu) rcu_read_unlock(); } -static inline void check_for_tasks(int dead_cpu) -{ - struct task_struct *g, *p; - - read_lock(&tasklist_lock); - for_each_process_thread(g, p) { - if (!p->on_rq) - continue; - /* - * We do the check with unlocked task_rq(p)->lock. - * Order the reading to do not warn about a task, - * which was running on this cpu in the past, and - * it's just been woken on another cpu. - */ - rmb(); - if (task_cpu(p) != dead_cpu) - continue; - - pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n", - p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags); - } - read_unlock(&tasklist_lock); -} - /* Take this CPU down. */ static int take_cpu_down(void *_param) { @@ -701,7 +609,7 @@ static int takedown_cpu(unsigned int cpu) /* * So now all preempt/rcu users must observe !cpu_active(). */ - err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu)); + err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu)); if (err) { /* CPU refused to die */ irq_unlock_sparse(); @@ -773,7 +681,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, if (!cpu_present(cpu)) return -EINVAL; - cpu_hotplug_begin(); + cpus_write_lock(); cpuhp_tasks_frozen = tasks_frozen; @@ -811,7 +719,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, } out: - cpu_hotplug_done(); + cpus_write_unlock(); return ret; } @@ -893,7 +801,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) struct task_struct *idle; int ret = 0; - cpu_hotplug_begin(); + cpus_write_lock(); if (!cpu_present(cpu)) { ret = -EINVAL; @@ -941,7 +849,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) target = min((int)target, CPUHP_BRINGUP_CPU); ret = cpuhp_up_callbacks(cpu, st, target); out: - cpu_hotplug_done(); + cpus_write_unlock(); return ret; } @@ -1418,18 +1326,20 @@ static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state, } } -int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, - bool invoke) +int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state, + struct hlist_node *node, + bool invoke) { struct cpuhp_step *sp; int cpu; int ret; + lockdep_assert_cpus_held(); + sp = cpuhp_get_step(state); if (sp->multi_instance == false) return -EINVAL; - get_online_cpus(); mutex_lock(&cpuhp_state_mutex); if (!invoke || !sp->startup.multi) @@ -1458,13 +1368,23 @@ add_node: hlist_add_head(node, &sp->list); unlock: mutex_unlock(&cpuhp_state_mutex); - put_online_cpus(); + return ret; +} + +int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, + bool invoke) +{ + int ret; + + cpus_read_lock(); + ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke); + cpus_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance); /** - * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state + * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state * @state: The state to setup * @invoke: If true, the startup function is invoked for cpus where * cpu state >= @state @@ -1473,25 +1393,27 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance); * @multi_instance: State is set up for multiple instances which get * added afterwards. * + * The caller needs to hold cpus read locked while calling this function. * Returns: * On success: * Positive state number if @state is CPUHP_AP_ONLINE_DYN * 0 for all other states * On failure: proper (negative) error code */ -int __cpuhp_setup_state(enum cpuhp_state state, - const char *name, bool invoke, - int (*startup)(unsigned int cpu), - int (*teardown)(unsigned int cpu), - bool multi_instance) +int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, + const char *name, bool invoke, + int (*startup)(unsigned int cpu), + int (*teardown)(unsigned int cpu), + bool multi_instance) { int cpu, ret = 0; bool dynstate; + lockdep_assert_cpus_held(); + if (cpuhp_cb_check(state) || !name) return -EINVAL; - get_online_cpus(); mutex_lock(&cpuhp_state_mutex); ret = cpuhp_store_callbacks(state, name, startup, teardown, @@ -1527,7 +1449,6 @@ int __cpuhp_setup_state(enum cpuhp_state state, } out: mutex_unlock(&cpuhp_state_mutex); - put_online_cpus(); /* * If the requested state is CPUHP_AP_ONLINE_DYN, return the * dynamically allocated state in case of success. @@ -1536,6 +1457,22 @@ out: return state; return ret; } +EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked); + +int __cpuhp_setup_state(enum cpuhp_state state, + const char *name, bool invoke, + int (*startup)(unsigned int cpu), + int (*teardown)(unsigned int cpu), + bool multi_instance) +{ + int ret; + + cpus_read_lock(); + ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup, + teardown, multi_instance); + cpus_read_unlock(); + return ret; +} EXPORT_SYMBOL(__cpuhp_setup_state); int __cpuhp_state_remove_instance(enum cpuhp_state state, @@ -1549,7 +1486,7 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state, if (!sp->multi_instance) return -EINVAL; - get_online_cpus(); + cpus_read_lock(); mutex_lock(&cpuhp_state_mutex); if (!invoke || !cpuhp_get_teardown_cb(state)) @@ -1570,29 +1507,30 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state, remove: hlist_del(node); mutex_unlock(&cpuhp_state_mutex); - put_online_cpus(); + cpus_read_unlock(); return 0; } EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance); /** - * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state + * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state * @state: The state to remove * @invoke: If true, the teardown function is invoked for cpus where * cpu state >= @state * + * The caller needs to hold cpus read locked while calling this function. * The teardown callback is currently not allowed to fail. Think * about module removal! */ -void __cpuhp_remove_state(enum cpuhp_state state, bool invoke) +void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke) { struct cpuhp_step *sp = cpuhp_get_step(state); int cpu; BUG_ON(cpuhp_cb_check(state)); - get_online_cpus(); + lockdep_assert_cpus_held(); mutex_lock(&cpuhp_state_mutex); if (sp->multi_instance) { @@ -1620,7 +1558,14 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke) remove: cpuhp_store_callbacks(state, NULL, NULL, NULL, false); mutex_unlock(&cpuhp_state_mutex); - put_online_cpus(); +} +EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked); + +void __cpuhp_remove_state(enum cpuhp_state state, bool invoke) +{ + cpus_read_lock(); + __cpuhp_remove_state_cpuslocked(state, invoke); + cpus_read_unlock(); } EXPORT_SYMBOL(__cpuhp_remove_state); @@ -1689,7 +1634,7 @@ static struct attribute *cpuhp_cpu_attrs[] = { NULL }; -static struct attribute_group cpuhp_cpu_attr_group = { +static const struct attribute_group cpuhp_cpu_attr_group = { .attrs = cpuhp_cpu_attrs, .name = "hotplug", NULL @@ -1721,7 +1666,7 @@ static struct attribute *cpuhp_cpu_root_attrs[] = { NULL }; -static struct attribute_group cpuhp_cpu_root_attr_group = { +static const struct attribute_group cpuhp_cpu_root_attr_group = { .attrs = cpuhp_cpu_root_attrs, .name = "hotplug", NULL diff --git a/kernel/events/core.c b/kernel/events/core.c index bc63f8d..4d2c32f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -389,6 +389,7 @@ static atomic_t nr_switch_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); static struct srcu_struct pmus_srcu; +static cpumask_var_t perf_online_mask; /* * perf event paranoia level: @@ -3807,14 +3808,6 @@ find_get_context(struct pmu *pmu, struct task_struct *task, if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EACCES); - /* - * We could be clever and allow to attach a event to an - * offline CPU and activate it when the CPU comes up, but - * that's for later. - */ - if (!cpu_online(cpu)) - return ERR_PTR(-ENODEV); - cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); ctx = &cpuctx->ctx; get_ctx(ctx); @@ -7723,7 +7716,8 @@ static int swevent_hlist_get_cpu(int cpu) int err = 0; mutex_lock(&swhash->hlist_mutex); - if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) { + if (!swevent_hlist_deref(swhash) && + cpumask_test_cpu(cpu, perf_online_mask)) { struct swevent_hlist *hlist; hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); @@ -7744,7 +7738,7 @@ static int swevent_hlist_get(void) { int err, cpu, failed_cpu; - get_online_cpus(); + mutex_lock(&pmus_lock); for_each_possible_cpu(cpu) { err = swevent_hlist_get_cpu(cpu); if (err) { @@ -7752,8 +7746,7 @@ static int swevent_hlist_get(void) goto fail; } } - put_online_cpus(); - + mutex_unlock(&pmus_lock); return 0; fail: for_each_possible_cpu(cpu) { @@ -7761,8 +7754,7 @@ fail: break; swevent_hlist_put_cpu(cpu); } - - put_online_cpus(); + mutex_unlock(&pmus_lock); return err; } @@ -8940,7 +8932,7 @@ perf_event_mux_interval_ms_store(struct device *dev, pmu->hrtimer_interval_ms = timer; /* update all cpuctx for this PMU */ - get_online_cpus(); + cpus_read_lock(); for_each_online_cpu(cpu) { struct perf_cpu_context *cpuctx; cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); @@ -8949,7 +8941,7 @@ perf_event_mux_interval_ms_store(struct device *dev, cpu_function_call(cpu, (remote_function_f)perf_mux_hrtimer_restart, cpuctx); } - put_online_cpus(); + cpus_read_unlock(); mutex_unlock(&mux_interval_mutex); return count; @@ -9079,6 +9071,7 @@ skip_type: lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock); cpuctx->ctx.pmu = pmu; + cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask); __perf_mux_hrtimer_init(cpuctx, cpu); } @@ -9903,12 +9896,10 @@ SYSCALL_DEFINE5(perf_event_open, goto err_task; } - get_online_cpus(); - if (task) { err = mutex_lock_interruptible(&task->signal->cred_guard_mutex); if (err) - goto err_cpus; + goto err_task; /* * Reuse ptrace permission checks for now. @@ -10094,6 +10085,23 @@ SYSCALL_DEFINE5(perf_event_open, goto err_locked; } + if (!task) { + /* + * Check if the @cpu we're creating an event for is online. + * + * We use the perf_cpu_context::ctx::mutex to serialize against + * the hotplug notifiers. See perf_event_{init,exit}_cpu(). + */ + struct perf_cpu_context *cpuctx = + container_of(ctx, struct perf_cpu_context, ctx); + + if (!cpuctx->online) { + err = -ENODEV; + goto err_locked; + } + } + + /* * Must be under the same ctx::mutex as perf_install_in_context(), * because we need to serialize with concurrent event creation. @@ -10183,8 +10191,6 @@ SYSCALL_DEFINE5(perf_event_open, put_task_struct(task); } - put_online_cpus(); - mutex_lock(¤t->perf_event_mutex); list_add_tail(&event->owner_entry, ¤t->perf_event_list); mutex_unlock(¤t->perf_event_mutex); @@ -10218,8 +10224,6 @@ err_alloc: err_cred: if (task) mutex_unlock(&task->signal->cred_guard_mutex); -err_cpus: - put_online_cpus(); err_task: if (task) put_task_struct(task); @@ -10274,6 +10278,21 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, goto err_unlock; } + if (!task) { + /* + * Check if the @cpu we're creating an event for is online. + * + * We use the perf_cpu_context::ctx::mutex to serialize against + * the hotplug notifiers. See perf_event_{init,exit}_cpu(). + */ + struct perf_cpu_context *cpuctx = + container_of(ctx, struct perf_cpu_context, ctx); + if (!cpuctx->online) { + err = -ENODEV; + goto err_unlock; + } + } + if (!exclusive_event_installable(event, ctx)) { err = -EBUSY; goto err_unlock; @@ -10941,6 +10960,8 @@ static void __init perf_event_init_all_cpus(void) struct swevent_htable *swhash; int cpu; + zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL); + for_each_possible_cpu(cpu) { swhash = &per_cpu(swevent_htable, cpu); mutex_init(&swhash->hlist_mutex); @@ -10956,7 +10977,7 @@ static void __init perf_event_init_all_cpus(void) } } -int perf_event_init_cpu(unsigned int cpu) +void perf_swevent_init_cpu(unsigned int cpu) { struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); @@ -10969,7 +10990,6 @@ int perf_event_init_cpu(unsigned int cpu) rcu_assign_pointer(swhash->swevent_hlist, hlist); } mutex_unlock(&swhash->hlist_mutex); - return 0; } #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE @@ -10987,19 +11007,22 @@ static void __perf_event_exit_context(void *__info) static void perf_event_exit_cpu_context(int cpu) { + struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; struct pmu *pmu; - int idx; - idx = srcu_read_lock(&pmus_srcu); - list_for_each_entry_rcu(pmu, &pmus, entry) { - ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx; + mutex_lock(&pmus_lock); + list_for_each_entry(pmu, &pmus, entry) { + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); + ctx = &cpuctx->ctx; mutex_lock(&ctx->mutex); smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1); + cpuctx->online = 0; mutex_unlock(&ctx->mutex); } - srcu_read_unlock(&pmus_srcu, idx); + cpumask_clear_cpu(cpu, perf_online_mask); + mutex_unlock(&pmus_lock); } #else @@ -11007,6 +11030,29 @@ static void perf_event_exit_cpu_context(int cpu) { } #endif +int perf_event_init_cpu(unsigned int cpu) +{ + struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx; + struct pmu *pmu; + + perf_swevent_init_cpu(cpu); + + mutex_lock(&pmus_lock); + cpumask_set_cpu(cpu, perf_online_mask); + list_for_each_entry(pmu, &pmus, entry) { + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); + ctx = &cpuctx->ctx; + + mutex_lock(&ctx->mutex); + cpuctx->online = 1; + mutex_unlock(&ctx->mutex); + } + mutex_unlock(&pmus_lock); + + return 0; +} + int perf_event_exit_cpu(unsigned int cpu) { perf_event_exit_cpu_context(cpu); diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 6c9cb20..d11c506 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -15,6 +15,7 @@ #include <linux/static_key.h> #include <linux/jump_label_ratelimit.h> #include <linux/bug.h> +#include <linux/cpu.h> #ifdef HAVE_JUMP_LABEL @@ -124,6 +125,7 @@ void static_key_slow_inc(struct static_key *key) return; } + cpus_read_lock(); jump_label_lock(); if (atomic_read(&key->enabled) == 0) { atomic_set(&key->enabled, -1); @@ -133,12 +135,14 @@ void static_key_slow_inc(struct static_key *key) atomic_inc(&key->enabled); } jump_label_unlock(); + cpus_read_unlock(); } EXPORT_SYMBOL_GPL(static_key_slow_inc); static void __static_key_slow_dec(struct static_key *key, unsigned long rate_limit, struct delayed_work *work) { + cpus_read_lock(); /* * The negative count check is valid even when a negative * key->enabled is in use by static_key_slow_inc(); a @@ -149,6 +153,7 @@ static void __static_key_slow_dec(struct static_key *key, if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { WARN(atomic_read(&key->enabled) < 0, "jump label: negative count!\n"); + cpus_read_unlock(); return; } @@ -159,6 +164,7 @@ static void __static_key_slow_dec(struct static_key *key, jump_label_update(key); } jump_label_unlock(); + cpus_read_unlock(); } static void jump_label_update_timeout(struct work_struct *work) @@ -334,6 +340,7 @@ void __init jump_label_init(void) if (static_key_initialized) return; + cpus_read_lock(); jump_label_lock(); jump_label_sort_entries(iter_start, iter_stop); @@ -353,6 +360,7 @@ void __init jump_label_init(void) } static_key_initialized = true; jump_label_unlock(); + cpus_read_unlock(); } #ifdef CONFIG_MODULES @@ -590,28 +598,28 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val, struct module *mod = data; int ret = 0; + cpus_read_lock(); + jump_label_lock(); + switch (val) { case MODULE_STATE_COMING: - jump_label_lock(); ret = jump_label_add_module(mod); if (ret) { WARN(1, "Failed to allocatote memory: jump_label may not work properly.\n"); jump_label_del_module(mod); } - jump_label_unlock(); break; case MODULE_STATE_GOING: - jump_label_lock(); jump_label_del_module(mod); - jump_label_unlock(); break; case MODULE_STATE_LIVE: - jump_label_lock(); jump_label_invalidate_module_init(mod); - jump_label_unlock(); break; } + jump_label_unlock(); + cpus_read_unlock(); + return notifier_from_errno(ret); } diff --git a/kernel/kprobes.c b/kernel/kprobes.c index adfe3b4..6756d75 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -483,11 +483,6 @@ static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); */ static void do_optimize_kprobes(void) { - /* Optimization never be done when disarmed */ - if (kprobes_all_disarmed || !kprobes_allow_optimization || - list_empty(&optimizing_list)) - return; - /* * The optimization/unoptimization refers online_cpus via * stop_machine() and cpu-hotplug modifies online_cpus. @@ -495,14 +490,19 @@ static void do_optimize_kprobes(void) * This combination can cause a deadlock (cpu-hotplug try to lock * text_mutex but stop_machine can not be done because online_cpus * has been changed) - * To avoid this deadlock, we need to call get_online_cpus() + * To avoid this deadlock, caller must have locked cpu hotplug * for preventing cpu-hotplug outside of text_mutex locking. */ - get_online_cpus(); + lockdep_assert_cpus_held(); + + /* Optimization never be done when disarmed */ + if (kprobes_all_disarmed || !kprobes_allow_optimization || + list_empty(&optimizing_list)) + return; + mutex_lock(&text_mutex); arch_optimize_kprobes(&optimizing_list); mutex_unlock(&text_mutex); - put_online_cpus(); } /* @@ -513,12 +513,13 @@ static void do_unoptimize_kprobes(void) { struct optimized_kprobe *op, *tmp; + /* See comment in do_optimize_kprobes() */ + lockdep_assert_cpus_held(); + /* Unoptimization must be done anytime */ if (list_empty(&unoptimizing_list)) return; - /* Ditto to do_optimize_kprobes */ - get_online_cpus(); mutex_lock(&text_mutex); arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); /* Loop free_list for disarming */ @@ -537,7 +538,6 @@ static void do_unoptimize_kprobes(void) list_del_init(&op->list); } mutex_unlock(&text_mutex); - put_online_cpus(); } /* Reclaim all kprobes on the free_list */ @@ -562,6 +562,7 @@ static void kick_kprobe_optimizer(void) static void kprobe_optimizer(struct work_struct *work) { mutex_lock(&kprobe_mutex); + cpus_read_lock(); /* Lock modules while optimizing kprobes */ mutex_lock(&module_mutex); @@ -587,6 +588,7 @@ static void kprobe_optimizer(struct work_struct *work) do_free_cleaned_kprobes(); mutex_unlock(&module_mutex); + cpus_read_unlock(); mutex_unlock(&kprobe_mutex); /* Step 5: Kick optimizer again if needed */ @@ -650,9 +652,8 @@ static void optimize_kprobe(struct kprobe *p) /* Short cut to direct unoptimizing */ static void force_unoptimize_kprobe(struct optimized_kprobe *op) { - get_online_cpus(); + lockdep_assert_cpus_held(); arch_unoptimize_kprobe(op); - put_online_cpus(); if (kprobe_disabled(&op->kp)) arch_disarm_kprobe(&op->kp); } @@ -791,6 +792,7 @@ static void try_to_optimize_kprobe(struct kprobe *p) return; /* For preparing optimization, jump_label_text_reserved() is called */ + cpus_read_lock(); jump_label_lock(); mutex_lock(&text_mutex); @@ -812,6 +814,7 @@ static void try_to_optimize_kprobe(struct kprobe *p) out: mutex_unlock(&text_mutex); jump_label_unlock(); + cpus_read_unlock(); } #ifdef CONFIG_SYSCTL @@ -826,6 +829,7 @@ static void optimize_all_kprobes(void) if (kprobes_allow_optimization) goto out; + cpus_read_lock(); kprobes_allow_optimization = true; for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; @@ -833,6 +837,7 @@ static void optimize_all_kprobes(void) if (!kprobe_disabled(p)) optimize_kprobe(p); } + cpus_read_unlock(); printk(KERN_INFO "Kprobes globally optimized\n"); out: mutex_unlock(&kprobe_mutex); @@ -851,6 +856,7 @@ static void unoptimize_all_kprobes(void) return; } + cpus_read_lock(); kprobes_allow_optimization = false; for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; @@ -859,6 +865,7 @@ static void unoptimize_all_kprobes(void) unoptimize_kprobe(p, false); } } + cpus_read_unlock(); mutex_unlock(&kprobe_mutex); /* Wait for unoptimizing completion */ @@ -1010,14 +1017,11 @@ static void arm_kprobe(struct kprobe *kp) arm_kprobe_ftrace(kp); return; } - /* - * Here, since __arm_kprobe() doesn't use stop_machine(), - * this doesn't cause deadlock on text_mutex. So, we don't - * need get_online_cpus(). - */ + cpus_read_lock(); mutex_lock(&text_mutex); __arm_kprobe(kp); mutex_unlock(&text_mutex); + cpus_read_unlock(); } /* Disarm a kprobe with text_mutex */ @@ -1027,10 +1031,12 @@ static void disarm_kprobe(struct kprobe *kp, bool reopt) disarm_kprobe_ftrace(kp); return; } - /* Ditto */ + + cpus_read_lock(); mutex_lock(&text_mutex); __disarm_kprobe(kp, reopt); mutex_unlock(&text_mutex); + cpus_read_unlock(); } /* @@ -1298,13 +1304,10 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) int ret = 0; struct kprobe *ap = orig_p; + cpus_read_lock(); + /* For preparing optimization, jump_label_text_reserved() is called */ jump_label_lock(); - /* - * Get online CPUs to avoid text_mutex deadlock.with stop machine, - * which is invoked by unoptimize_kprobe() in add_new_kprobe() - */ - get_online_cpus(); mutex_lock(&text_mutex); if (!kprobe_aggrprobe(orig_p)) { @@ -1352,8 +1355,8 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) out: mutex_unlock(&text_mutex); - put_online_cpus(); jump_label_unlock(); + cpus_read_unlock(); if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) { ap->flags &= ~KPROBE_FLAG_DISABLED; @@ -1555,9 +1558,12 @@ int register_kprobe(struct kprobe *p) goto out; } - mutex_lock(&text_mutex); /* Avoiding text modification */ + cpus_read_lock(); + /* Prevent text modification */ + mutex_lock(&text_mutex); ret = prepare_kprobe(p); mutex_unlock(&text_mutex); + cpus_read_unlock(); if (ret) goto out; @@ -1570,7 +1576,6 @@ int register_kprobe(struct kprobe *p) /* Try to optimize kprobe */ try_to_optimize_kprobe(p); - out: mutex_unlock(&kprobe_mutex); diff --git a/kernel/padata.c b/kernel/padata.c index ac8f1e5..868f947 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -934,29 +934,18 @@ static struct kobj_type padata_attr_type = { }; /** - * padata_alloc_possible - Allocate and initialize padata instance. - * Use the cpu_possible_mask for serial and - * parallel workers. - * - * @wq: workqueue to use for the allocated padata instance - */ -struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq) -{ - return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask); -} -EXPORT_SYMBOL(padata_alloc_possible); - -/** * padata_alloc - allocate and initialize a padata instance and specify * cpumasks for serial and parallel workers. * * @wq: workqueue to use for the allocated padata instance * @pcpumask: cpumask that will be used for padata parallelization * @cbcpumask: cpumask that will be used for padata serialization + * + * Must be called from a cpus_read_lock() protected region */ -struct padata_instance *padata_alloc(struct workqueue_struct *wq, - const struct cpumask *pcpumask, - const struct cpumask *cbcpumask) +static struct padata_instance *padata_alloc(struct workqueue_struct *wq, + const struct cpumask *pcpumask, + const struct cpumask *cbcpumask) { struct padata_instance *pinst; struct parallel_data *pd = NULL; @@ -965,7 +954,6 @@ struct padata_instance *padata_alloc(struct workqueue_struct *wq, if (!pinst) goto err; - get_online_cpus(); if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL)) goto err_free_inst; if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) { @@ -989,14 +977,12 @@ struct padata_instance *padata_alloc(struct workqueue_struct *wq, pinst->flags = 0; - put_online_cpus(); - BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier); kobject_init(&pinst->kobj, &padata_attr_type); mutex_init(&pinst->lock); #ifdef CONFIG_HOTPLUG_CPU - cpuhp_state_add_instance_nocalls(hp_online, &pinst->node); + cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node); #endif return pinst; @@ -1005,12 +991,27 @@ err_free_masks: free_cpumask_var(pinst->cpumask.cbcpu); err_free_inst: kfree(pinst); - put_online_cpus(); err: return NULL; } /** + * padata_alloc_possible - Allocate and initialize padata instance. + * Use the cpu_possible_mask for serial and + * parallel workers. + * + * @wq: workqueue to use for the allocated padata instance + * + * Must be called from a cpus_read_lock() protected region + */ +struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq) +{ + lockdep_assert_cpus_held(); + return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask); +} +EXPORT_SYMBOL(padata_alloc_possible); + +/** * padata_free - free a padata instance * * @padata_inst: padata instance to free diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 1eb8266..b759126 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -552,7 +552,8 @@ static int __init cpu_stop_init(void) } early_initcall(cpu_stop_init); -static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) +int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, + const struct cpumask *cpus) { struct multi_stop_data msdata = { .fn = fn, @@ -561,6 +562,8 @@ static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cp .active_cpus = cpus, }; + lockdep_assert_cpus_held(); + if (!stop_machine_initialized) { /* * Handle the case where stop_machine() is called @@ -590,9 +593,9 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) int ret; /* No CPUs can come up or down during this. */ - get_online_cpus(); - ret = __stop_machine(fn, data, cpus); - put_online_cpus(); + cpus_read_lock(); + ret = stop_machine_cpuslocked(fn, data, cpus); + cpus_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(stop_machine); |