summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arraymap.c33
-rw-r--r--kernel/bpf/core.c2
-rw-r--r--kernel/bpf/cpumap.c2
-rw-r--r--kernel/bpf/lpm_trie.c14
-rw-r--r--kernel/bpf/sockmap.c3
-rw-r--r--kernel/bpf/verifier.c42
-rw-r--r--kernel/compat.c19
-rw-r--r--kernel/events/core.c4
-rw-r--r--kernel/extable.c2
-rw-r--r--kernel/fork.c15
-rw-r--r--kernel/irq/irqdomain.c18
-rw-r--r--kernel/irq/matrix.c23
-rw-r--r--kernel/jump_label.c27
-rw-r--r--kernel/kprobes.c178
-rw-r--r--kernel/locking/qspinlock.c21
-rw-r--r--kernel/locking/rtmutex.c5
-rw-r--r--kernel/memremap.c15
-rw-r--r--kernel/panic.c2
-rw-r--r--kernel/printk/printk.c3
-rw-r--r--kernel/relay.c2
-rw-r--r--kernel/sched/core.c27
-rw-r--r--kernel/sched/cpufreq_schedutil.c2
-rw-r--r--kernel/sched/deadline.c6
-rw-r--r--kernel/sched/rt.c3
-rw-r--r--kernel/seccomp.c6
-rw-r--r--kernel/time/timer.c6
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--kernel/user.c3
-rw-r--r--kernel/workqueue.c16
29 files changed, 317 insertions, 184 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index b1f6648..14750e7 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -26,8 +26,10 @@ static void bpf_array_free_percpu(struct bpf_array *array)
{
int i;
- for (i = 0; i < array->map.max_entries; i++)
+ for (i = 0; i < array->map.max_entries; i++) {
free_percpu(array->pptrs[i]);
+ cond_resched();
+ }
}
static int bpf_array_alloc_percpu(struct bpf_array *array)
@@ -43,6 +45,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
return -ENOMEM;
}
array->pptrs[i] = ptr;
+ cond_resched();
}
return 0;
@@ -73,11 +76,11 @@ static int array_map_alloc_check(union bpf_attr *attr)
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
- int numa_node = bpf_map_attr_numa_node(attr);
+ int ret, numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
+ u64 cost, array_size, mask64;
struct bpf_array *array;
- u64 array_size, mask64;
elem_size = round_up(attr->value_size, 8);
@@ -109,8 +112,19 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
array_size += (u64) max_entries * elem_size;
/* make sure there is no u32 overflow later in round_up() */
- if (array_size >= U32_MAX - PAGE_SIZE)
+ cost = array_size;
+ if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM);
+ if (percpu) {
+ cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
+ if (cost >= U32_MAX - PAGE_SIZE)
+ return ERR_PTR(-ENOMEM);
+ }
+ cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+ ret = bpf_map_precharge_memlock(cost);
+ if (ret < 0)
+ return ERR_PTR(ret);
/* allocate all map elements and zero-initialize them */
array = bpf_map_area_alloc(array_size, numa_node);
@@ -121,20 +135,13 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
+ array->map.pages = cost;
array->elem_size = elem_size;
- if (!percpu)
- goto out;
-
- array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
-
- if (array_size >= U32_MAX - PAGE_SIZE ||
- bpf_array_alloc_percpu(array)) {
+ if (percpu && bpf_array_alloc_percpu(array)) {
bpf_map_area_free(array);
return ERR_PTR(-ENOMEM);
}
-out:
- array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
return &array->map;
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 29ca920..d315b39 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1590,7 +1590,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
* so always copy 'cnt' prog_ids to the user.
* In a rare race the user will see zero prog_ids
*/
- ids = kcalloc(cnt, sizeof(u32), GFP_USER);
+ ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);
if (!ids)
return -ENOMEM;
rcu_read_lock();
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index fbfdada6..a4bb0b3 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -334,7 +334,7 @@ static int cpu_map_kthread_run(void *data)
static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
int map_id)
{
- gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN;
+ gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
struct bpf_cpu_map_entry *rcpu;
int numa, err;
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 7b469d1..b4b5b81 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -555,7 +555,10 @@ static void trie_free(struct bpf_map *map)
struct lpm_trie_node __rcu **slot;
struct lpm_trie_node *node;
- raw_spin_lock(&trie->lock);
+ /* Wait for outstanding programs to complete
+ * update/lookup/delete/get_next_key and free the trie.
+ */
+ synchronize_rcu();
/* Always start at the root and walk down to a node that has no
* children. Then free that node, nullify its reference in the parent
@@ -566,10 +569,9 @@ static void trie_free(struct bpf_map *map)
slot = &trie->root;
for (;;) {
- node = rcu_dereference_protected(*slot,
- lockdep_is_held(&trie->lock));
+ node = rcu_dereference_protected(*slot, 1);
if (!node)
- goto unlock;
+ goto out;
if (rcu_access_pointer(node->child[0])) {
slot = &node->child[0];
@@ -587,8 +589,8 @@ static void trie_free(struct bpf_map *map)
}
}
-unlock:
- raw_spin_unlock(&trie->lock);
+out:
+ kfree(trie);
}
static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 48c3341..a927e89 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -521,8 +521,8 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
{
struct bpf_stab *stab;
- int err = -EINVAL;
u64 cost;
+ int err;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
@@ -547,6 +547,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
/* make sure page count doesn't overflow */
cost = (u64) stab->map.max_entries * sizeof(struct sock *);
+ err = -EINVAL;
if (cost >= U32_MAX - PAGE_SIZE)
goto free_stab;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5fb69a8..c6eff10 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1356,6 +1356,13 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
return reg->type == PTR_TO_CTX;
}
+static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
+{
+ const struct bpf_reg_state *reg = cur_regs(env) + regno;
+
+ return type_is_pkt_pointer(reg->type);
+}
+
static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
int off, int size, bool strict)
@@ -1416,10 +1423,10 @@ static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
}
static int check_ptr_alignment(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg,
- int off, int size)
+ const struct bpf_reg_state *reg, int off,
+ int size, bool strict_alignment_once)
{
- bool strict = env->strict_alignment;
+ bool strict = env->strict_alignment || strict_alignment_once;
const char *pointer_desc = "";
switch (reg->type) {
@@ -1576,9 +1583,9 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
* if t==write && value_regno==-1, some unknown value is stored into memory
* if t==read && value_regno==-1, don't care what we read from memory
*/
-static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off,
- int bpf_size, enum bpf_access_type t,
- int value_regno)
+static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
+ int off, int bpf_size, enum bpf_access_type t,
+ int value_regno, bool strict_alignment_once)
{
struct bpf_reg_state *regs = cur_regs(env);
struct bpf_reg_state *reg = regs + regno;
@@ -1590,7 +1597,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
return size;
/* alignment checks will add in reg->off themselves */
- err = check_ptr_alignment(env, reg, off, size);
+ err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
if (err)
return err;
@@ -1735,21 +1742,23 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
return -EACCES;
}
- if (is_ctx_reg(env, insn->dst_reg)) {
- verbose(env, "BPF_XADD stores into R%d context is not allowed\n",
- insn->dst_reg);
+ if (is_ctx_reg(env, insn->dst_reg) ||
+ is_pkt_reg(env, insn->dst_reg)) {
+ verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
+ insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ?
+ "context" : "packet");
return -EACCES;
}
/* check whether atomic_add can read the memory */
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
- BPF_SIZE(insn->code), BPF_READ, -1);
+ BPF_SIZE(insn->code), BPF_READ, -1, true);
if (err)
return err;
/* check whether atomic_add can write into the same memory */
return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
- BPF_SIZE(insn->code), BPF_WRITE, -1);
+ BPF_SIZE(insn->code), BPF_WRITE, -1, true);
}
/* when register 'regno' is passed into function that will read 'access_size'
@@ -2388,7 +2397,8 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
* is inferred from register state.
*/
for (i = 0; i < meta.access_size; i++) {
- err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1);
+ err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
+ BPF_WRITE, -1, false);
if (err)
return err;
}
@@ -4632,7 +4642,7 @@ static int do_check(struct bpf_verifier_env *env)
*/
err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
BPF_SIZE(insn->code), BPF_READ,
- insn->dst_reg);
+ insn->dst_reg, false);
if (err)
return err;
@@ -4684,7 +4694,7 @@ static int do_check(struct bpf_verifier_env *env)
/* check that memory (dst_reg + off) is writeable */
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_WRITE,
- insn->src_reg);
+ insn->src_reg, false);
if (err)
return err;
@@ -4719,7 +4729,7 @@ static int do_check(struct bpf_verifier_env *env)
/* check that memory (dst_reg + off) is writeable */
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_WRITE,
- -1);
+ -1, false);
if (err)
return err;
diff --git a/kernel/compat.c b/kernel/compat.c
index 3247fe7..3f5fa89 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -488,25 +488,6 @@ get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat)
}
EXPORT_SYMBOL_GPL(get_compat_sigset);
-int
-put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set,
- unsigned int size)
-{
- /* size <= sizeof(compat_sigset_t) <= sizeof(sigset_t) */
-#ifdef __BIG_ENDIAN
- compat_sigset_t v;
- switch (_NSIG_WORDS) {
- case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3];
- case 3: v.sig[5] = (set->sig[2] >> 32); v.sig[4] = set->sig[2];
- case 2: v.sig[3] = (set->sig[1] >> 32); v.sig[2] = set->sig[1];
- case 1: v.sig[1] = (set->sig[0] >> 32); v.sig[0] = set->sig[0];
- }
- return copy_to_user(compat, &v, size) ? -EFAULT : 0;
-#else
- return copy_to_user(compat, set, size) ? -EFAULT : 0;
-#endif
-}
-
#ifdef CONFIG_NUMA
COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages,
compat_uptr_t __user *, pages32,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 96db9ae..4b83847 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2246,7 +2246,7 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
struct perf_event_context *task_ctx,
enum event_type_t event_type)
{
- enum event_type_t ctx_event_type = event_type & EVENT_ALL;
+ enum event_type_t ctx_event_type;
bool cpu_event = !!(event_type & EVENT_CPU);
/*
@@ -2256,6 +2256,8 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
if (event_type & EVENT_PINNED)
event_type |= EVENT_FLEXIBLE;
+ ctx_event_type = event_type & EVENT_ALL;
+
perf_pmu_disable(cpuctx->ctx.pmu);
if (task_ctx)
task_ctx_sched_out(cpuctx, task_ctx, event_type);
diff --git a/kernel/extable.c b/kernel/extable.c
index a17fdb6..6a5b61e 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -64,7 +64,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
return e;
}
-static inline int init_kernel_text(unsigned long addr)
+int init_kernel_text(unsigned long addr)
{
if (addr >= (unsigned long)_sinittext &&
addr < (unsigned long)_einittext)
diff --git a/kernel/fork.c b/kernel/fork.c
index be8aa5b..e5d9d40 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -592,7 +592,7 @@ static void check_mm(struct mm_struct *mm)
* is dropped: either by a lazy thread or by
* mmput. Free the page directory and the mm.
*/
-static void __mmdrop(struct mm_struct *mm)
+void __mmdrop(struct mm_struct *mm)
{
BUG_ON(mm == &init_mm);
mm_free_pgd(mm);
@@ -603,18 +603,7 @@ static void __mmdrop(struct mm_struct *mm)
put_user_ns(mm->user_ns);
free_mm(mm);
}
-
-void mmdrop(struct mm_struct *mm)
-{
- /*
- * The implicit full barrier implied by atomic_dec_and_test() is
- * required by the membarrier system call before returning to
- * user-space, after storing to rq->curr.
- */
- if (unlikely(atomic_dec_and_test(&mm->mm_count)))
- __mmdrop(mm);
-}
-EXPORT_SYMBOL_GPL(mmdrop);
+EXPORT_SYMBOL_GPL(__mmdrop);
static void mmdrop_async_fn(struct work_struct *work)
{
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index e6a9c36..82b8b18 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1726,25 +1726,14 @@ static int irq_domain_debug_show(struct seq_file *m, void *p)
irq_domain_debug_show_one(m, d, 0);
return 0;
}
-
-static int irq_domain_debug_open(struct inode *inode, struct file *file)
-{
- return single_open(file, irq_domain_debug_show, inode->i_private);
-}
-
-static const struct file_operations dfs_domain_ops = {
- .open = irq_domain_debug_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(irq_domain_debug);
static void debugfs_add_domain_dir(struct irq_domain *d)
{
if (!d->name || !domain_dir || d->debugfs_file)
return;
d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d,
- &dfs_domain_ops);
+ &irq_domain_debug_fops);
}
static void debugfs_remove_domain_dir(struct irq_domain *d)
@@ -1760,7 +1749,8 @@ void __init irq_domain_debugfs_init(struct dentry *root)
if (!domain_dir)
return;
- debugfs_create_file("default", 0444, domain_dir, NULL, &dfs_domain_ops);
+ debugfs_create_file("default", 0444, domain_dir, NULL,
+ &irq_domain_debug_fops);
mutex_lock(&irq_domain_mutex);
list_for_each_entry(d, &irq_domain_list, link)
debugfs_add_domain_dir(d);
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 5187dfe..4c57704 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -16,6 +16,7 @@ struct cpumap {
unsigned int available;
unsigned int allocated;
unsigned int managed;
+ bool initialized;
bool online;
unsigned long alloc_map[IRQ_MATRIX_SIZE];
unsigned long managed_map[IRQ_MATRIX_SIZE];
@@ -81,9 +82,11 @@ void irq_matrix_online(struct irq_matrix *m)
BUG_ON(cm->online);
- bitmap_zero(cm->alloc_map, m->matrix_bits);
- cm->available = m->alloc_size - (cm->managed + m->systembits_inalloc);
- cm->allocated = 0;
+ if (!cm->initialized) {
+ cm->available = m->alloc_size;
+ cm->available -= cm->managed + m->systembits_inalloc;
+ cm->initialized = true;
+ }
m->global_available += cm->available;
cm->online = true;
m->online_maps++;
@@ -370,14 +373,16 @@ void irq_matrix_free(struct irq_matrix *m, unsigned int cpu,
if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end))
return;
- if (cm->online) {
- clear_bit(bit, cm->alloc_map);
- cm->allocated--;
+ clear_bit(bit, cm->alloc_map);
+ cm->allocated--;
+
+ if (cm->online)
m->total_allocated--;
- if (!managed) {
- cm->available++;
+
+ if (!managed) {
+ cm->available++;
+ if (cm->online)
m->global_available++;
- }
}
trace_irq_matrix_free(bit, cpu, m, cm);
}
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index b451709..52a0a7a 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -366,12 +366,15 @@ static void __jump_label_update(struct static_key *key,
{
for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
/*
- * entry->code set to 0 invalidates module init text sections
- * kernel_text_address() verifies we are not in core kernel
- * init code, see jump_label_invalidate_module_init().
+ * An entry->code of 0 indicates an entry which has been
+ * disabled because it was in an init text area.
*/
- if (entry->code && kernel_text_address(entry->code))
- arch_jump_label_transform(entry, jump_label_type(entry));
+ if (entry->code) {
+ if (kernel_text_address(entry->code))
+ arch_jump_label_transform(entry, jump_label_type(entry));
+ else
+ WARN_ONCE(1, "can't patch jump_label at %pS", (void *)entry->code);
+ }
}
}
@@ -417,6 +420,19 @@ void __init jump_label_init(void)
cpus_read_unlock();
}
+/* Disable any jump label entries in __init code */
+void __init jump_label_invalidate_init(void)
+{
+ struct jump_entry *iter_start = __start___jump_table;
+ struct jump_entry *iter_stop = __stop___jump_table;
+ struct jump_entry *iter;
+
+ for (iter = iter_start; iter < iter_stop; iter++) {
+ if (init_kernel_text(iter->code))
+ iter->code = 0;
+ }
+}
+
#ifdef CONFIG_MODULES
static enum jump_label_type jump_label_init_type(struct jump_entry *entry)
@@ -633,6 +649,7 @@ static void jump_label_del_module(struct module *mod)
}
}
+/* Disable any jump label entries in module init code */
static void jump_label_invalidate_module_init(struct module *mod)
{
struct jump_entry *iter_start = mod->jump_entries;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index da2ccf1..102160f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -978,67 +978,90 @@ static int prepare_kprobe(struct kprobe *p)
}
/* Caller must lock kprobe_mutex */
-static void arm_kprobe_ftrace(struct kprobe *p)
+static int arm_kprobe_ftrace(struct kprobe *p)
{
- int ret;
+ int ret = 0;
ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
(unsigned long)p->addr, 0, 0);
- WARN(ret < 0, "Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
- kprobe_ftrace_enabled++;
- if (kprobe_ftrace_enabled == 1) {
+ if (ret) {
+ pr_debug("Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+ return ret;
+ }
+
+ if (kprobe_ftrace_enabled == 0) {
ret = register_ftrace_function(&kprobe_ftrace_ops);
- WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
+ if (ret) {
+ pr_debug("Failed to init kprobe-ftrace (%d)\n", ret);
+ goto err_ftrace;
+ }
}
+
+ kprobe_ftrace_enabled++;
+ return ret;
+
+err_ftrace:
+ /*
+ * Note: Since kprobe_ftrace_ops has IPMODIFY set, and ftrace requires a
+ * non-empty filter_hash for IPMODIFY ops, we're safe from an accidental
+ * empty filter_hash which would undesirably trace all functions.
+ */
+ ftrace_set_filter_ip(&kprobe_ftrace_ops, (unsigned long)p->addr, 1, 0);
+ return ret;
}
/* Caller must lock kprobe_mutex */
-static void disarm_kprobe_ftrace(struct kprobe *p)
+static int disarm_kprobe_ftrace(struct kprobe *p)
{
- int ret;
+ int ret = 0;
- kprobe_ftrace_enabled--;
- if (kprobe_ftrace_enabled == 0) {
+ if (kprobe_ftrace_enabled == 1) {
ret = unregister_ftrace_function(&kprobe_ftrace_ops);
- WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
+ if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (%d)\n", ret))
+ return ret;
}
+
+ kprobe_ftrace_enabled--;
+
ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
(unsigned long)p->addr, 1, 0);
WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+ return ret;
}
#else /* !CONFIG_KPROBES_ON_FTRACE */
#define prepare_kprobe(p) arch_prepare_kprobe(p)
-#define arm_kprobe_ftrace(p) do {} while (0)
-#define disarm_kprobe_ftrace(p) do {} while (0)
+#define arm_kprobe_ftrace(p) (-ENODEV)
+#define disarm_kprobe_ftrace(p) (-ENODEV)
#endif
/* Arm a kprobe with text_mutex */
-static void arm_kprobe(struct kprobe *kp)
+static int arm_kprobe(struct kprobe *kp)
{
- if (unlikely(kprobe_ftrace(kp))) {
- arm_kprobe_ftrace(kp);
- return;
- }
+ if (unlikely(kprobe_ftrace(kp)))
+ return arm_kprobe_ftrace(kp);
+
cpus_read_lock();
mutex_lock(&text_mutex);
__arm_kprobe(kp);
mutex_unlock(&text_mutex);
cpus_read_unlock();
+
+ return 0;
}
/* Disarm a kprobe with text_mutex */
-static void disarm_kprobe(struct kprobe *kp, bool reopt)
+static int disarm_kprobe(struct kprobe *kp, bool reopt)
{
- if (unlikely(kprobe_ftrace(kp))) {
- disarm_kprobe_ftrace(kp);
- return;
- }
+ if (unlikely(kprobe_ftrace(kp)))
+ return disarm_kprobe_ftrace(kp);
cpus_read_lock();
mutex_lock(&text_mutex);
__disarm_kprobe(kp, reopt);
mutex_unlock(&text_mutex);
cpus_read_unlock();
+
+ return 0;
}
/*
@@ -1362,9 +1385,15 @@ out:
if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
ap->flags &= ~KPROBE_FLAG_DISABLED;
- if (!kprobes_all_disarmed)
+ if (!kprobes_all_disarmed) {
/* Arm the breakpoint again. */
- arm_kprobe(ap);
+ ret = arm_kprobe(ap);
+ if (ret) {
+ ap->flags |= KPROBE_FLAG_DISABLED;
+ list_del_rcu(&p->list);
+ synchronize_sched();
+ }
+ }
}
return ret;
}
@@ -1573,8 +1602,14 @@ int register_kprobe(struct kprobe *p)
hlist_add_head_rcu(&p->hlist,
&kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
- if (!kprobes_all_disarmed && !kprobe_disabled(p))
- arm_kprobe(p);
+ if (!kprobes_all_disarmed && !kprobe_disabled(p)) {
+ ret = arm_kprobe(p);
+ if (ret) {
+ hlist_del_rcu(&p->hlist);
+ synchronize_sched();
+ goto out;
+ }
+ }
/* Try to optimize kprobe */
try_to_optimize_kprobe(p);
@@ -1608,11 +1643,12 @@ static int aggr_kprobe_disabled(struct kprobe *ap)
static struct kprobe *__disable_kprobe(struct kprobe *p)
{
struct kprobe *orig_p;
+ int ret;
/* Get an original kprobe for return */
orig_p = __get_valid_kprobe(p);
if (unlikely(orig_p == NULL))
- return NULL;
+ return ERR_PTR(-EINVAL);
if (!kprobe_disabled(p)) {
/* Disable probe if it is a child probe */
@@ -1626,8 +1662,13 @@ static struct kprobe *__disable_kprobe(struct kprobe *p)
* should have already been disarmed, so
* skip unneed disarming process.
*/
- if (!kprobes_all_disarmed)
- disarm_kprobe(orig_p, true);
+ if (!kprobes_all_disarmed) {
+ ret = disarm_kprobe(orig_p, true);
+ if (ret) {
+ p->flags &= ~KPROBE_FLAG_DISABLED;
+ return ERR_PTR(ret);
+ }
+ }
orig_p->flags |= KPROBE_FLAG_DISABLED;
}
}
@@ -1644,8 +1685,8 @@ static int __unregister_kprobe_top(struct kprobe *p)
/* Disable kprobe. This will disarm it if needed. */
ap = __disable_kprobe(p);
- if (ap == NULL)
- return -EINVAL;
+ if (IS_ERR(ap))
+ return PTR_ERR(ap);
if (ap == p)
/*
@@ -2078,12 +2119,14 @@ static void kill_kprobe(struct kprobe *p)
int disable_kprobe(struct kprobe *kp)
{
int ret = 0;
+ struct kprobe *p;
mutex_lock(&kprobe_mutex);
/* Disable this kprobe */
- if (__disable_kprobe(kp) == NULL)
- ret = -EINVAL;
+ p = __disable_kprobe(kp);
+ if (IS_ERR(p))
+ ret = PTR_ERR(p);
mutex_unlock(&kprobe_mutex);
return ret;
@@ -2116,7 +2159,9 @@ int enable_kprobe(struct kprobe *kp)
if (!kprobes_all_disarmed && kprobe_disabled(p)) {
p->flags &= ~KPROBE_FLAG_DISABLED;
- arm_kprobe(p);
+ ret = arm_kprobe(p);
+ if (ret)
+ p->flags |= KPROBE_FLAG_DISABLED;
}
out:
mutex_unlock(&kprobe_mutex);
@@ -2407,11 +2452,12 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = {
.release = seq_release,
};
-static void arm_all_kprobes(void)
+static int arm_all_kprobes(void)
{
struct hlist_head *head;
struct kprobe *p;
- unsigned int i;
+ unsigned int i, total = 0, errors = 0;
+ int err, ret = 0;
mutex_lock(&kprobe_mutex);
@@ -2428,46 +2474,74 @@ static void arm_all_kprobes(void)
/* Arming kprobes doesn't optimize kprobe itself */
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
- hlist_for_each_entry_rcu(p, head, hlist)
- if (!kprobe_disabled(p))
- arm_kprobe(p);
+ /* Arm all kprobes on a best-effort basis */
+ hlist_for_each_entry_rcu(p, head, hlist) {
+ if (!kprobe_disabled(p)) {
+ err = arm_kprobe(p);
+ if (err) {
+ errors++;
+ ret = err;
+ }
+ total++;
+ }
+ }
}
- printk(KERN_INFO "Kprobes globally enabled\n");
+ if (errors)
+ pr_warn("Kprobes globally enabled, but failed to arm %d out of %d probes\n",
+ errors, total);
+ else
+ pr_info("Kprobes globally enabled\n");
already_enabled:
mutex_unlock(&kprobe_mutex);
- return;
+ return ret;
}
-static void disarm_all_kprobes(void)
+static int disarm_all_kprobes(void)
{
struct hlist_head *head;
struct kprobe *p;
- unsigned int i;
+ unsigned int i, total = 0, errors = 0;
+ int err, ret = 0;
mutex_lock(&kprobe_mutex);
/* If kprobes are already disarmed, just return */
if (kprobes_all_disarmed) {
mutex_unlock(&kprobe_mutex);
- return;
+ return 0;
}
kprobes_all_disarmed = true;
- printk(KERN_INFO "Kprobes globally disabled\n");
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
+ /* Disarm all kprobes on a best-effort basis */
hlist_for_each_entry_rcu(p, head, hlist) {
- if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
- disarm_kprobe(p, false);
+ if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
+ err = disarm_kprobe(p, false);
+ if (err) {
+ errors++;
+ ret = err;
+ }
+ total++;
+ }
}
}
+
+ if (errors)
+ pr_warn("Kprobes globally disabled, but failed to disarm %d out of %d probes\n",
+ errors, total);
+ else
+ pr_info("Kprobes globally disabled\n");
+
mutex_unlock(&kprobe_mutex);
/* Wait for disarming all kprobes by optimizer */
wait_for_kprobe_optimizer();
+
+ return ret;
}
/*
@@ -2494,6 +2568,7 @@ static ssize_t write_enabled_file_bool(struct file *file,
{
char buf[32];
size_t buf_size;
+ int ret = 0;
buf_size = min(count, (sizeof(buf)-1));
if (copy_from_user(buf, user_buf, buf_size))
@@ -2504,17 +2579,20 @@ static ssize_t write_enabled_file_bool(struct file *file,
case 'y':
case 'Y':
case '1':
- arm_all_kprobes();
+ ret = arm_all_kprobes();
break;
case 'n':
case 'N':
case '0':
- disarm_all_kprobes();
+ ret = disarm_all_kprobes();
break;
default:
return -EINVAL;
}
+ if (ret)
+ return ret;
+
return count;
}
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 38ece03..d880296 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -379,6 +379,14 @@ queue:
tail = encode_tail(smp_processor_id(), idx);
node += idx;
+
+ /*
+ * Ensure that we increment the head node->count before initialising
+ * the actual node. If the compiler is kind enough to reorder these
+ * stores, then an IRQ could overwrite our assignments.
+ */
+ barrier();
+
node->locked = 0;
node->next = NULL;
pv_init_node(node);
@@ -408,14 +416,15 @@ queue:
*/
if (old & _Q_TAIL_MASK) {
prev = decode_tail(old);
+
/*
- * The above xchg_tail() is also a load of @lock which
- * generates, through decode_tail(), a pointer. The address
- * dependency matches the RELEASE of xchg_tail() such that
- * the subsequent access to @prev happens after.
+ * We must ensure that the stores to @node are observed before
+ * the write to prev->next. The address dependency from
+ * xchg_tail is not sufficient to ensure this because the read
+ * component of xchg_tail is unordered with respect to the
+ * initialisation of @node.
*/
-
- WRITE_ONCE(prev->next, node);
+ smp_store_release(&prev->next, node);
pv_wait_node(node, prev);
arch_mcs_spin_lock_contended(&node->locked);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 65cc0cb..940633c 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1616,11 +1616,12 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
{
DEFINE_WAKE_Q(wake_q);
+ unsigned long flags;
bool postunlock;
- raw_spin_lock_irq(&lock->wait_lock);
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
postunlock = __rt_mutex_futex_unlock(lock, &wake_q);
- raw_spin_unlock_irq(&lock->wait_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
if (postunlock)
rt_mutex_postunlock(&wake_q);
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 4849be5..4dd4274 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -275,8 +275,15 @@ static unsigned long pfn_end(struct dev_pagemap *pgmap)
return (res->start + resource_size(res)) >> PAGE_SHIFT;
}
+static unsigned long pfn_next(unsigned long pfn)
+{
+ if (pfn % 1024 == 0)
+ cond_resched();
+ return pfn + 1;
+}
+
#define for_each_device_pfn(pfn, map) \
- for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++)
+ for (pfn = pfn_first(map); pfn < pfn_end(map); pfn = pfn_next(pfn))
static void devm_memremap_pages_release(void *data)
{
@@ -337,10 +344,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
resource_size_t align_start, align_size, align_end;
struct vmem_altmap *altmap = pgmap->altmap_valid ?
&pgmap->altmap : NULL;
+ struct resource *res = &pgmap->res;
unsigned long pfn, pgoff, order;
pgprot_t pgprot = PAGE_KERNEL;
- int error, nid, is_ram, i = 0;
- struct resource *res = &pgmap->res;
+ int error, nid, is_ram;
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@@ -409,8 +416,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
list_del(&page->lru);
page->pgmap = pgmap;
percpu_ref_get(pgmap->ref);
- if (!(++i % 1024))
- cond_resched();
}
devm_add_action(dev, devm_memremap_pages_release, pgmap);
diff --git a/kernel/panic.c b/kernel/panic.c
index 2cfef40..4b794f1 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -640,7 +640,7 @@ device_initcall(register_warn_debugfs);
*/
__visible void __stack_chk_fail(void)
{
- panic("stack-protector: Kernel stack is corrupted in: %p\n",
+ panic("stack-protector: Kernel stack is corrupted in: %pB\n",
__builtin_return_address(0));
}
EXPORT_SYMBOL(__stack_chk_fail);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index fc11235..f274fbe 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2397,7 +2397,7 @@ skip:
if (console_lock_spinning_disable_and_check()) {
printk_safe_exit_irqrestore(flags);
- return;
+ goto out;
}
printk_safe_exit_irqrestore(flags);
@@ -2430,6 +2430,7 @@ skip:
if (retry && console_trylock())
goto again;
+out:
if (wake_klogd)
wake_up_klogd();
}
diff --git a/kernel/relay.c b/kernel/relay.c
index c302940..c955b10 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -163,7 +163,7 @@ static struct rchan_buf *relay_create_buf(struct rchan *chan)
{
struct rchan_buf *buf;
- if (chan->n_subbufs > UINT_MAX / sizeof(size_t *))
+ if (chan->n_subbufs > KMALLOC_MAX_SIZE / sizeof(size_t *))
return NULL;
buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bf724c1..e7c535e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2601,19 +2601,31 @@ static inline void finish_task(struct task_struct *prev)
#endif
}
-static inline void finish_lock_switch(struct rq *rq)
+static inline void
+prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
{
+ /*
+ * Since the runqueue lock will be released by the next
+ * task (which is an invalid locking op but in the case
+ * of the scheduler it's an obvious special-case), so we
+ * do an early lockdep release here:
+ */
+ rq_unpin_lock(rq, rf);
+ spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
#ifdef CONFIG_DEBUG_SPINLOCK
/* this is a valid case when another task releases the spinlock */
- rq->lock.owner = current;
+ rq->lock.owner = next;
#endif
+}
+
+static inline void finish_lock_switch(struct rq *rq)
+{
/*
* If we are tracking spinlock dependencies then we have to
* fix up the runqueue lock - which gets 'carried over' from
* prev into current:
*/
spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-
raw_spin_unlock_irq(&rq->lock);
}
@@ -2844,14 +2856,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
- /*
- * Since the runqueue lock will be released by the next
- * task (which is an invalid locking op but in the case
- * of the scheduler it's an obvious special-case), so we
- * do an early lockdep release here:
- */
- rq_unpin_lock(rq, rf);
- spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
+ prepare_lock_switch(rq, next, rf);
/* Here we just switch the register state and the stack. */
switch_to(prev, next, prev);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index dd062a1..7936f54 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -19,8 +19,6 @@
#include "sched.h"
-#define SUGOV_KTHREAD_PRIORITY 50
-
struct sugov_tunables {
struct gov_attr_set attr_set;
unsigned int rate_limit_us;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 9bb0e0c..9df0978 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1153,6 +1153,7 @@ static void update_curr_dl(struct rq *rq)
struct sched_dl_entity *dl_se = &curr->dl;
u64 delta_exec, scaled_delta_exec;
int cpu = cpu_of(rq);
+ u64 now;
if (!dl_task(curr) || !on_dl_rq(dl_se))
return;
@@ -1165,7 +1166,8 @@ static void update_curr_dl(struct rq *rq)
* natural solution, but the full ramifications of this
* approach need further study.
*/
- delta_exec = rq_clock_task(rq) - curr->se.exec_start;
+ now = rq_clock_task(rq);
+ delta_exec = now - curr->se.exec_start;
if (unlikely((s64)delta_exec <= 0)) {
if (unlikely(dl_se->dl_yielded))
goto throttle;
@@ -1178,7 +1180,7 @@ static void update_curr_dl(struct rq *rq)
curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);
- curr->se.exec_start = rq_clock_task(rq);
+ curr->se.exec_start = now;
cgroup_account_cputime(curr, delta_exec);
sched_rt_avg_update(rq, delta_exec);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 663b235..aad49451 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -950,12 +950,13 @@ static void update_curr_rt(struct rq *rq)
{
struct task_struct *curr = rq->curr;
struct sched_rt_entity *rt_se = &curr->rt;
- u64 now = rq_clock_task(rq);
u64 delta_exec;
+ u64 now;
if (curr->sched_class != &rt_sched_class)
return;
+ now = rq_clock_task(rq);
delta_exec = now - curr->se.exec_start;
if (unlikely((s64)delta_exec <= 0))
return;
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 940fa40..dc77548 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -1076,14 +1076,16 @@ long seccomp_get_metadata(struct task_struct *task,
size = min_t(unsigned long, size, sizeof(kmd));
- if (copy_from_user(&kmd, data, size))
+ if (size < sizeof(kmd.filter_off))
+ return -EINVAL;
+
+ if (copy_from_user(&kmd.filter_off, data, sizeof(kmd.filter_off)))
return -EFAULT;
filter = get_nth_filter(task, kmd.filter_off);
if (IS_ERR(filter))
return PTR_ERR(filter);
- memset(&kmd, 0, sizeof(kmd));
if (filter->log)
kmd.flags |= SECCOMP_FILTER_FLAG_LOG;
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 48150ab..4a4fd56 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1894,6 +1894,12 @@ int timers_dead_cpu(unsigned int cpu)
raw_spin_lock_irq(&new_base->lock);
raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+ /*
+ * The current CPUs base clock might be stale. Update it
+ * before moving the timers over.
+ */
+ forward_timer_base(new_base);
+
BUG_ON(old_base->running_timer);
for (i = 0; i < WHEEL_SIZE; i++)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index fc2838a..c0a9e31 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -872,6 +872,8 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
return -EINVAL;
if (copy_from_user(&query, uquery, sizeof(query)))
return -EFAULT;
+ if (query.ids_len > BPF_TRACE_MAX_PROGS)
+ return -E2BIG;
mutex_lock(&bpf_event_mutex);
ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
diff --git a/kernel/user.c b/kernel/user.c
index 9a20acc..36288d8 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -101,6 +101,7 @@ struct user_struct root_user = {
.sigpending = ATOMIC_INIT(0),
.locked_shm = 0,
.uid = GLOBAL_ROOT_UID,
+ .ratelimit = RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0),
};
/*
@@ -191,6 +192,8 @@ struct user_struct *alloc_uid(kuid_t uid)
new->uid = uid;
atomic_set(&new->__count, 1);
+ ratelimit_state_init(&new->ratelimit, HZ, 100);
+ ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);
/*
* Before adding this, check whether we raced
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 017044c..bb9a519 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4180,6 +4180,22 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
EXPORT_SYMBOL_GPL(workqueue_set_max_active);
/**
+ * current_work - retrieve %current task's work struct
+ *
+ * Determine if %current task is a workqueue worker and what it's working on.
+ * Useful to find out the context that the %current task is running in.
+ *
+ * Return: work struct if %current task is a workqueue worker, %NULL otherwise.
+ */
+struct work_struct *current_work(void)
+{
+ struct worker *worker = current_wq_worker();
+
+ return worker ? worker->current_work : NULL;
+}
+EXPORT_SYMBOL(current_work);
+
+/**
* current_is_workqueue_rescuer - is %current workqueue rescuer?
*
* Determine whether %current is a workqueue rescuer. Can be used from
OpenPOWER on IntegriCloud