summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/audit.c1
-rw-r--r--kernel/bpf/inode.c16
-rw-r--r--kernel/bpf/verifier.c108
-rw-r--r--kernel/cpu.c38
-rw-r--r--kernel/crash_core.c44
-rw-r--r--kernel/events/core.c33
-rw-r--r--kernel/exit.c21
-rw-r--r--kernel/extable.c3
-rw-r--r--kernel/fork.c28
-rw-r--r--kernel/futex.c4
-rw-r--r--kernel/groups.c35
-rw-r--r--kernel/irq/affinity.c13
-rw-r--r--kernel/irq/chip.c12
-rw-r--r--kernel/irq/cpuhotplug.c9
-rw-r--r--kernel/irq/internals.h14
-rw-r--r--kernel/irq/irqdesc.c1
-rw-r--r--kernel/irq/irqdomain.c19
-rw-r--r--kernel/irq/manage.c111
-rw-r--r--kernel/irq/pm.c2
-rw-r--r--kernel/kallsyms.c10
-rw-r--r--kernel/kcmp.c57
-rw-r--r--kernel/kexec.c8
-rw-r--r--kernel/kexec_core.c39
-rw-r--r--kernel/kexec_file.c15
-rw-r--r--kernel/kexec_internal.h2
-rw-r--r--kernel/kmod.c56
-rw-r--r--kernel/kprobes.c42
-rw-r--r--kernel/ksysfs.c4
-rw-r--r--kernel/locking/qrwlock.c1
-rw-r--r--kernel/locking/qspinlock.c1
-rw-r--r--kernel/locking/rtmutex.c1
-rw-r--r--kernel/locking/rwsem-spinlock.c4
-rw-r--r--kernel/module.c87
-rw-r--r--kernel/power/main.c2
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/cpufreq_schedutil.c5
-rw-r--r--kernel/sched/cputime.c180
-rw-r--r--kernel/sched/deadline.c14
-rw-r--r--kernel/sched/fair.c32
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/sys.c8
-rw-r--r--kernel/sysctl.c335
-rw-r--r--kernel/sysctl_binary.c2
-rw-r--r--kernel/trace/ftrace.c47
-rw-r--r--kernel/trace/ring_buffer.c10
-rw-r--r--kernel/trace/trace.c143
-rw-r--r--kernel/trace/trace.h6
-rw-r--r--kernel/trace/trace_kprobe.c11
-rw-r--r--kernel/trace/trace_stack.c6
-rw-r--r--kernel/watchdog.c289
-rw-r--r--kernel/watchdog_hld.c37
-rw-r--r--kernel/workqueue.c30
53 files changed, 1439 insertions, 565 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 72aa080..4cb8e8b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -82,7 +82,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += debug/
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
-obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o
+obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
diff --git a/kernel/audit.c b/kernel/audit.c
index 833267b..6dd5569 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -641,6 +641,7 @@ static int auditd_send_unicast_skb(struct sk_buff *skb)
ac = rcu_dereference(auditd_conn);
if (!ac) {
rcu_read_unlock();
+ kfree_skb(skb);
rc = -ECONNREFUSED;
goto err;
}
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 9bbd334..e833ed9 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -377,10 +377,22 @@ static void bpf_evict_inode(struct inode *inode)
bpf_any_put(inode->i_private, type);
}
+/*
+ * Display the mount options in /proc/mounts.
+ */
+static int bpf_show_options(struct seq_file *m, struct dentry *root)
+{
+ umode_t mode = d_inode(root)->i_mode & S_IALLUGO & ~S_ISVTX;
+
+ if (mode != S_IRWXUGO)
+ seq_printf(m, ",mode=%o", mode);
+ return 0;
+}
+
static const struct super_operations bpf_super_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
- .show_options = generic_show_options,
+ .show_options = bpf_show_options,
.evict_inode = bpf_evict_inode,
};
@@ -434,8 +446,6 @@ static int bpf_fill_super(struct super_block *sb, void *data, int silent)
struct inode *inode;
int ret;
- save_mount_options(sb, data);
-
ret = bpf_parse_options(data, &opts);
if (ret)
return ret;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6a86723..af9e84a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -504,6 +504,7 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
{
regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+ regs[regno].value_from_signed = false;
regs[regno].min_align = 0;
}
@@ -777,12 +778,13 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
return -EACCES;
}
-static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
+static bool __is_pointer_value(bool allow_ptr_leaks,
+ const struct bpf_reg_state *reg)
{
- if (env->allow_ptr_leaks)
+ if (allow_ptr_leaks)
return false;
- switch (env->cur_state.regs[regno].type) {
+ switch (reg->type) {
case UNKNOWN_VALUE:
case CONST_IMM:
return false;
@@ -791,6 +793,11 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
}
}
+static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
+{
+ return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]);
+}
+
static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
int off, int size, bool strict)
{
@@ -1832,10 +1839,24 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
dst_align = dst_reg->min_align;
/* We don't know anything about what was done to this register, mark it
- * as unknown.
+ * as unknown. Also, if both derived bounds came from signed/unsigned
+ * mixed compares and one side is unbounded, we cannot really do anything
+ * with them as boundaries cannot be trusted. Thus, arithmetic of two
+ * regs of such kind will get invalidated bounds on the dst side.
*/
- if (min_val == BPF_REGISTER_MIN_RANGE &&
- max_val == BPF_REGISTER_MAX_RANGE) {
+ if ((min_val == BPF_REGISTER_MIN_RANGE &&
+ max_val == BPF_REGISTER_MAX_RANGE) ||
+ (BPF_SRC(insn->code) == BPF_X &&
+ ((min_val != BPF_REGISTER_MIN_RANGE &&
+ max_val == BPF_REGISTER_MAX_RANGE) ||
+ (min_val == BPF_REGISTER_MIN_RANGE &&
+ max_val != BPF_REGISTER_MAX_RANGE) ||
+ (dst_reg->min_value != BPF_REGISTER_MIN_RANGE &&
+ dst_reg->max_value == BPF_REGISTER_MAX_RANGE) ||
+ (dst_reg->min_value == BPF_REGISTER_MIN_RANGE &&
+ dst_reg->max_value != BPF_REGISTER_MAX_RANGE)) &&
+ regs[insn->dst_reg].value_from_signed !=
+ regs[insn->src_reg].value_from_signed)) {
reset_reg_range_values(regs, insn->dst_reg);
return;
}
@@ -2023,6 +2044,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
regs[insn->dst_reg].max_value = insn->imm;
regs[insn->dst_reg].min_value = insn->imm;
regs[insn->dst_reg].min_align = calc_align(insn->imm);
+ regs[insn->dst_reg].value_from_signed = false;
}
} else if (opcode > BPF_END) {
@@ -2198,40 +2220,63 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
struct bpf_reg_state *false_reg, u64 val,
u8 opcode)
{
+ bool value_from_signed = true;
+ bool is_range = true;
+
switch (opcode) {
case BPF_JEQ:
/* If this is false then we know nothing Jon Snow, but if it is
* true then we know for sure.
*/
true_reg->max_value = true_reg->min_value = val;
+ is_range = false;
break;
case BPF_JNE:
/* If this is true we know nothing Jon Snow, but if it is false
* we know the value for sure;
*/
false_reg->max_value = false_reg->min_value = val;
+ is_range = false;
break;
case BPF_JGT:
- /* Unsigned comparison, the minimum value is 0. */
- false_reg->min_value = 0;
+ value_from_signed = false;
/* fallthrough */
case BPF_JSGT:
+ if (true_reg->value_from_signed != value_from_signed)
+ reset_reg_range_values(true_reg, 0);
+ if (false_reg->value_from_signed != value_from_signed)
+ reset_reg_range_values(false_reg, 0);
+ if (opcode == BPF_JGT) {
+ /* Unsigned comparison, the minimum value is 0. */
+ false_reg->min_value = 0;
+ }
/* If this is false then we know the maximum val is val,
* otherwise we know the min val is val+1.
*/
false_reg->max_value = val;
+ false_reg->value_from_signed = value_from_signed;
true_reg->min_value = val + 1;
+ true_reg->value_from_signed = value_from_signed;
break;
case BPF_JGE:
- /* Unsigned comparison, the minimum value is 0. */
- false_reg->min_value = 0;
+ value_from_signed = false;
/* fallthrough */
case BPF_JSGE:
+ if (true_reg->value_from_signed != value_from_signed)
+ reset_reg_range_values(true_reg, 0);
+ if (false_reg->value_from_signed != value_from_signed)
+ reset_reg_range_values(false_reg, 0);
+ if (opcode == BPF_JGE) {
+ /* Unsigned comparison, the minimum value is 0. */
+ false_reg->min_value = 0;
+ }
/* If this is false then we know the maximum value is val - 1,
* otherwise we know the mimimum value is val.
*/
false_reg->max_value = val - 1;
+ false_reg->value_from_signed = value_from_signed;
true_reg->min_value = val;
+ true_reg->value_from_signed = value_from_signed;
break;
default:
break;
@@ -2239,6 +2284,12 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
check_reg_overflow(false_reg);
check_reg_overflow(true_reg);
+ if (is_range) {
+ if (__is_pointer_value(false, false_reg))
+ reset_reg_range_values(false_reg, 0);
+ if (__is_pointer_value(false, true_reg))
+ reset_reg_range_values(true_reg, 0);
+ }
}
/* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg
@@ -2248,41 +2299,64 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
struct bpf_reg_state *false_reg, u64 val,
u8 opcode)
{
+ bool value_from_signed = true;
+ bool is_range = true;
+
switch (opcode) {
case BPF_JEQ:
/* If this is false then we know nothing Jon Snow, but if it is
* true then we know for sure.
*/
true_reg->max_value = true_reg->min_value = val;
+ is_range = false;
break;
case BPF_JNE:
/* If this is true we know nothing Jon Snow, but if it is false
* we know the value for sure;
*/
false_reg->max_value = false_reg->min_value = val;
+ is_range = false;
break;
case BPF_JGT:
- /* Unsigned comparison, the minimum value is 0. */
- true_reg->min_value = 0;
+ value_from_signed = false;
/* fallthrough */
case BPF_JSGT:
+ if (true_reg->value_from_signed != value_from_signed)
+ reset_reg_range_values(true_reg, 0);
+ if (false_reg->value_from_signed != value_from_signed)
+ reset_reg_range_values(false_reg, 0);
+ if (opcode == BPF_JGT) {
+ /* Unsigned comparison, the minimum value is 0. */
+ true_reg->min_value = 0;
+ }
/*
* If this is false, then the val is <= the register, if it is
* true the register <= to the val.
*/
false_reg->min_value = val;
+ false_reg->value_from_signed = value_from_signed;
true_reg->max_value = val - 1;
+ true_reg->value_from_signed = value_from_signed;
break;
case BPF_JGE:
- /* Unsigned comparison, the minimum value is 0. */
- true_reg->min_value = 0;
+ value_from_signed = false;
/* fallthrough */
case BPF_JSGE:
+ if (true_reg->value_from_signed != value_from_signed)
+ reset_reg_range_values(true_reg, 0);
+ if (false_reg->value_from_signed != value_from_signed)
+ reset_reg_range_values(false_reg, 0);
+ if (opcode == BPF_JGE) {
+ /* Unsigned comparison, the minimum value is 0. */
+ true_reg->min_value = 0;
+ }
/* If this is false then constant < register, if it is true then
* the register < constant.
*/
false_reg->min_value = val + 1;
+ false_reg->value_from_signed = value_from_signed;
true_reg->max_value = val;
+ true_reg->value_from_signed = value_from_signed;
break;
default:
break;
@@ -2290,6 +2364,12 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
check_reg_overflow(false_reg);
check_reg_overflow(true_reg);
+ if (is_range) {
+ if (__is_pointer_value(false, false_reg))
+ reset_reg_range_values(false_reg, 0);
+ if (__is_pointer_value(false, true_reg))
+ reset_reg_range_values(true_reg, 0);
+ }
}
static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
diff --git a/kernel/cpu.c b/kernel/cpu.c
index b03a325..eee0331 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -271,11 +271,26 @@ void cpu_hotplug_enable(void)
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
#endif /* CONFIG_HOTPLUG_CPU */
+static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st);
+
static int bringup_wait_for_ap(unsigned int cpu)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
wait_for_completion(&st->done);
+ if (WARN_ON_ONCE((!cpu_online(cpu))))
+ return -ECANCELED;
+
+ /* Unpark the stopper thread and the hotplug thread of the target cpu */
+ stop_machine_unpark(cpu);
+ kthread_unpark(st->thread);
+
+ /* Should we go further up ? */
+ if (st->target > CPUHP_AP_ONLINE_IDLE) {
+ __cpuhp_kick_ap_work(st);
+ wait_for_completion(&st->done);
+ }
return st->result;
}
@@ -296,9 +311,7 @@ static int bringup_cpu(unsigned int cpu)
irq_unlock_sparse();
if (ret)
return ret;
- ret = bringup_wait_for_ap(cpu);
- BUG_ON(!cpu_online(cpu));
- return ret;
+ return bringup_wait_for_ap(cpu);
}
/*
@@ -767,31 +780,20 @@ void notify_cpu_starting(unsigned int cpu)
}
/*
- * Called from the idle task. We need to set active here, so we can kick off
- * the stopper thread and unpark the smpboot threads. If the target state is
- * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the
- * cpu further.
+ * Called from the idle task. Wake up the controlling task which brings the
+ * stopper and the hotplug thread of the upcoming CPU up and then delegates
+ * the rest of the online bringup to the hotplug thread.
*/
void cpuhp_online_idle(enum cpuhp_state state)
{
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
- unsigned int cpu = smp_processor_id();
/* Happens for the boot cpu */
if (state != CPUHP_AP_ONLINE_IDLE)
return;
st->state = CPUHP_AP_ONLINE_IDLE;
-
- /* Unpark the stopper thread and the hotplug thread of this cpu */
- stop_machine_unpark(cpu);
- kthread_unpark(st->thread);
-
- /* Should we go further up ? */
- if (st->target > CPUHP_AP_ONLINE_IDLE)
- __cpuhp_kick_ap_work(st);
- else
- complete(&st->done);
+ complete(&st->done);
}
/* Requires cpu_add_remove_lock to be held */
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index fcbd568..6db80fc 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -14,10 +14,12 @@
#include <asm/sections.h>
/* vmcoreinfo stuff */
-static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
-u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
-size_t vmcoreinfo_size;
-size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
+static unsigned char *vmcoreinfo_data;
+static size_t vmcoreinfo_size;
+u32 *vmcoreinfo_note;
+
+/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
+static unsigned char *vmcoreinfo_data_safecopy;
/*
* parsing the "crashkernel" commandline
@@ -324,8 +326,23 @@ static void update_vmcoreinfo_note(void)
final_note(buf);
}
+void crash_update_vmcoreinfo_safecopy(void *ptr)
+{
+ if (ptr)
+ memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size);
+
+ vmcoreinfo_data_safecopy = ptr;
+}
+
void crash_save_vmcoreinfo(void)
{
+ if (!vmcoreinfo_note)
+ return;
+
+ /* Use the safe copy to generate vmcoreinfo note if have */
+ if (vmcoreinfo_data_safecopy)
+ vmcoreinfo_data = vmcoreinfo_data_safecopy;
+
vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
update_vmcoreinfo_note();
}
@@ -340,7 +357,7 @@ void vmcoreinfo_append_str(const char *fmt, ...)
r = vscnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
- r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
+ r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size);
memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
@@ -356,11 +373,26 @@ void __weak arch_crash_save_vmcoreinfo(void)
phys_addr_t __weak paddr_vmcoreinfo_note(void)
{
- return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
+ return __pa(vmcoreinfo_note);
}
static int __init crash_save_vmcoreinfo_init(void)
{
+ vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
+ if (!vmcoreinfo_data) {
+ pr_warn("Memory allocation for vmcoreinfo_data failed\n");
+ return -ENOMEM;
+ }
+
+ vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!vmcoreinfo_note) {
+ free_page((unsigned long)vmcoreinfo_data);
+ vmcoreinfo_data = NULL;
+ pr_warn("Memory allocation for vmcoreinfo_note failed\n");
+ return -ENOMEM;
+ }
+
VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
VMCOREINFO_PAGESIZE(PAGE_SIZE);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1538df9..426c2ff 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1452,6 +1452,13 @@ static enum event_type_t get_event_type(struct perf_event *event)
lockdep_assert_held(&ctx->lock);
+ /*
+ * It's 'group type', really, because if our group leader is
+ * pinned, so are we.
+ */
+ if (event->group_leader != event)
+ event = event->group_leader;
+
event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE;
if (!ctx->task)
event_type |= EVENT_CPU;
@@ -4378,7 +4385,9 @@ EXPORT_SYMBOL_GPL(perf_event_read_value);
static int __perf_read_group_add(struct perf_event *leader,
u64 read_format, u64 *values)
{
+ struct perf_event_context *ctx = leader->ctx;
struct perf_event *sub;
+ unsigned long flags;
int n = 1; /* skip @nr */
int ret;
@@ -4408,12 +4417,15 @@ static int __perf_read_group_add(struct perf_event *leader,
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(leader);
+ raw_spin_lock_irqsave(&ctx->lock, flags);
+
list_for_each_entry(sub, &leader->sibling_list, group_entry) {
values[n++] += perf_event_count(sub);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(sub);
}
+ raw_spin_unlock_irqrestore(&ctx->lock, flags);
return 0;
}
@@ -7321,21 +7333,6 @@ int perf_event_account_interrupt(struct perf_event *event)
return __perf_event_account_interrupt(event, 1);
}
-static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
-{
- /*
- * Due to interrupt latency (AKA "skid"), we may enter the
- * kernel before taking an overflow, even if the PMU is only
- * counting user events.
- * To avoid leaking information to userspace, we must always
- * reject kernel samples when exclude_kernel is set.
- */
- if (event->attr.exclude_kernel && !user_mode(regs))
- return false;
-
- return true;
-}
-
/*
* Generic event overflow handling, sampling.
*/
@@ -7357,12 +7354,6 @@ static int __perf_event_overflow(struct perf_event *event,
ret = __perf_event_account_interrupt(event, throttle);
/*
- * For security, drop the skid kernel samples if necessary.
- */
- if (!sample_is_allowed(event, regs))
- return ret;
-
- /*
* XXX event_limit might not quite work as expected on inherited
* events
*/
diff --git a/kernel/exit.c b/kernel/exit.c
index 2bbc232..c5548fa 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1590,9 +1590,6 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
wo.wo_rusage = ru;
ret = do_wait(&wo);
- if (ret > 0)
- ret = 0;
-
put_pid(pid);
return ret;
}
@@ -1603,6 +1600,11 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
struct rusage r;
struct waitid_info info = {.status = 0};
long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL);
+ int signo = 0;
+ if (err > 0) {
+ signo = SIGCHLD;
+ err = 0;
+ }
if (!err) {
if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
@@ -1612,7 +1614,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
return err;
user_access_begin();
- unsafe_put_user(err ? 0 : SIGCHLD, &infop->si_signo, Efault);
+ unsafe_put_user(signo, &infop->si_signo, Efault);
unsafe_put_user(0, &infop->si_errno, Efault);
unsafe_put_user((short)info.cause, &infop->si_code, Efault);
unsafe_put_user(info.pid, &infop->si_pid, Efault);
@@ -1637,6 +1639,10 @@ long kernel_wait4(pid_t upid, int __user *stat_addr, int options,
__WNOTHREAD|__WCLONE|__WALL))
return -EINVAL;
+ /* -INT_MIN is not defined */
+ if (upid == INT_MIN)
+ return -ESRCH;
+
if (upid == -1)
type = PIDTYPE_MAX;
else if (upid < 0) {
@@ -1714,6 +1720,11 @@ COMPAT_SYSCALL_DEFINE5(waitid,
struct rusage ru;
struct waitid_info info = {.status = 0};
long err = kernel_waitid(which, pid, &info, options, uru ? &ru : NULL);
+ int signo = 0;
+ if (err > 0) {
+ signo = SIGCHLD;
+ err = 0;
+ }
if (!err && uru) {
/* kernel_waitid() overwrites everything in ru */
@@ -1729,7 +1740,7 @@ COMPAT_SYSCALL_DEFINE5(waitid,
return err;
user_access_begin();
- unsafe_put_user(err ? 0 : SIGCHLD, &infop->si_signo, Efault);
+ unsafe_put_user(signo, &infop->si_signo, Efault);
unsafe_put_user(0, &infop->si_errno, Efault);
unsafe_put_user((short)info.cause, &infop->si_code, Efault);
unsafe_put_user(info.pid, &infop->si_pid, Efault);
diff --git a/kernel/extable.c b/kernel/extable.c
index 223df4a..38c2412 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -55,7 +55,8 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
{
const struct exception_table_entry *e;
- e = search_extable(__start___ex_table, __stop___ex_table-1, addr);
+ e = search_extable(__start___ex_table,
+ __stop___ex_table - __start___ex_table, addr);
if (!e)
e = search_module_extables(addr);
return e;
diff --git a/kernel/fork.c b/kernel/fork.c
index aa01b81..17921b0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -205,19 +205,17 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
void *stack;
int i;
- local_irq_disable();
for (i = 0; i < NR_CACHED_STACKS; i++) {
- struct vm_struct *s = this_cpu_read(cached_stacks[i]);
+ struct vm_struct *s;
+
+ s = this_cpu_xchg(cached_stacks[i], NULL);
if (!s)
continue;
- this_cpu_write(cached_stacks[i], NULL);
tsk->stack_vm_area = s;
- local_irq_enable();
return s->addr;
}
- local_irq_enable();
stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
VMALLOC_START, VMALLOC_END,
@@ -245,19 +243,15 @@ static inline void free_thread_stack(struct task_struct *tsk)
{
#ifdef CONFIG_VMAP_STACK
if (task_stack_vm_area(tsk)) {
- unsigned long flags;
int i;
- local_irq_save(flags);
for (i = 0; i < NR_CACHED_STACKS; i++) {
- if (this_cpu_read(cached_stacks[i]))
+ if (this_cpu_cmpxchg(cached_stacks[i],
+ NULL, tsk->stack_vm_area) != NULL)
continue;
- this_cpu_write(cached_stacks[i], tsk->stack_vm_area);
- local_irq_restore(flags);
return;
}
- local_irq_restore(flags);
vfree_atomic(tsk->stack);
return;
@@ -560,7 +554,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
set_task_stack_end_magic(tsk);
#ifdef CONFIG_CC_STACKPROTECTOR
- tsk->stack_canary = get_random_long();
+ tsk->stack_canary = get_random_canary();
#endif
/*
@@ -579,6 +573,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
kcov_task_init(tsk);
+#ifdef CONFIG_FAULT_INJECTION
+ tsk->fail_nth = 0;
+#endif
+
return tsk;
free_stack:
@@ -1637,9 +1635,9 @@ static __latent_entropy struct task_struct *copy_process(
prev_cputime_init(&p->prev_cputime);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
- seqcount_init(&p->vtime_seqcount);
- p->vtime_snap = 0;
- p->vtime_snap_whence = VTIME_INACTIVE;
+ seqcount_init(&p->vtime.seqcount);
+ p->vtime.starttime = 0;
+ p->vtime.state = VTIME_INACTIVE;
#endif
#if defined(SPLIT_RSS_COUNTING)
diff --git a/kernel/futex.c b/kernel/futex.c
index c934689..16dbe4c 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -212,7 +212,7 @@ struct futex_pi_state {
atomic_t refcount;
union futex_key key;
-};
+} __randomize_layout;
/**
* struct futex_q - The hashed futex queue entry, one per waiting task
@@ -246,7 +246,7 @@ struct futex_q {
struct rt_mutex_waiter *rt_waiter;
union futex_key *requeue_pi_key;
u32 bitset;
-};
+} __randomize_layout;
static const struct futex_q futex_q_init = {
/* list gets initialized in queue_me()*/
diff --git a/kernel/groups.c b/kernel/groups.c
index d097276..434f666 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -5,6 +5,7 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/security.h>
+#include <linux/sort.h>
#include <linux/syscalls.h>
#include <linux/user_namespace.h>
#include <linux/vmalloc.h>
@@ -76,32 +77,18 @@ static int groups_from_user(struct group_info *group_info,
return 0;
}
-/* a simple Shell sort */
+static int gid_cmp(const void *_a, const void *_b)
+{
+ kgid_t a = *(kgid_t *)_a;
+ kgid_t b = *(kgid_t *)_b;
+
+ return gid_gt(a, b) - gid_lt(a, b);
+}
+
static void groups_sort(struct group_info *group_info)
{
- int base, max, stride;
- int gidsetsize = group_info->ngroups;
-
- for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
- ; /* nothing */
- stride /= 3;
-
- while (stride) {
- max = gidsetsize - stride;
- for (base = 0; base < max; base++) {
- int left = base;
- int right = left + stride;
- kgid_t tmp = group_info->gid[right];
-
- while (left >= 0 && gid_gt(group_info->gid[left], tmp)) {
- group_info->gid[right] = group_info->gid[left];
- right = left;
- left -= stride;
- }
- group_info->gid[right] = tmp;
- }
- stride /= 3;
- }
+ sort(group_info->gid, group_info->ngroups, sizeof(*group_info->gid),
+ gid_cmp, NULL);
}
/* a simple bsearch */
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index d2747f9..d69bd772 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -110,6 +110,13 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
struct cpumask *masks;
cpumask_var_t nmsk, *node_to_present_cpumask;
+ /*
+ * If there aren't any vectors left after applying the pre/post
+ * vectors don't bother with assigning affinity.
+ */
+ if (!affv)
+ return NULL;
+
if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
return NULL;
@@ -192,15 +199,19 @@ out:
/**
* irq_calc_affinity_vectors - Calculate the optimal number of vectors
+ * @minvec: The minimum number of vectors available
* @maxvec: The maximum number of vectors available
* @affd: Description of the affinity requirements
*/
-int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd)
+int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd)
{
int resv = affd->pre_vectors + affd->post_vectors;
int vecs = maxvec - resv;
int ret;
+ if (resv > minvec)
+ return 0;
+
get_online_cpus();
ret = min_t(int, cpumask_weight(cpu_present_mask), vecs) + resv;
put_online_cpus();
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index ad43468..a3cc37c 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -170,21 +170,11 @@ static void irq_state_clr_disabled(struct irq_desc *desc)
irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED);
}
-static void irq_state_set_disabled(struct irq_desc *desc)
-{
- irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
-}
-
static void irq_state_clr_masked(struct irq_desc *desc)
{
irqd_clear(&desc->irq_data, IRQD_IRQ_MASKED);
}
-static void irq_state_set_masked(struct irq_desc *desc)
-{
- irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
-}
-
static void irq_state_clr_started(struct irq_desc *desc)
{
irqd_clear(&desc->irq_data, IRQD_IRQ_STARTED);
@@ -234,7 +224,7 @@ __irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
return IRQ_STARTUP_MANAGED;
}
#else
-static int
+static __always_inline int
__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
{
return IRQ_STARTUP_NORMAL;
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index aee8f7e..638eb9c 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -95,8 +95,13 @@ static bool migrate_one_irq(struct irq_desc *desc)
affinity = cpu_online_mask;
brokeaff = true;
}
-
- err = irq_do_set_affinity(d, affinity, true);
+ /*
+ * Do not set the force argument of irq_do_set_affinity() as this
+ * disables the masking of offline CPUs from the supplied affinity
+ * mask and therefore might keep/reassign the irq to the outgoing
+ * CPU.
+ */
+ err = irq_do_set_affinity(d, affinity, false);
if (err) {
pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
d->irq, err);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 9da14d1..a2c4805 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -227,6 +227,16 @@ static inline bool irqd_has_set(struct irq_data *d, unsigned int mask)
return __irqd_to_state(d) & mask;
}
+static inline void irq_state_set_disabled(struct irq_desc *desc)
+{
+ irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
+}
+
+static inline void irq_state_set_masked(struct irq_desc *desc)
+{
+ irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
+}
+
#undef __irqd_to_state
static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
@@ -437,7 +447,9 @@ static inline void irq_remove_debugfs_entry(struct irq_desc *desc)
# ifdef CONFIG_IRQ_DOMAIN
void irq_domain_debugfs_init(struct dentry *root);
# else
-static inline void irq_domain_debugfs_init(struct dentry *root);
+static inline void irq_domain_debugfs_init(struct dentry *root)
+{
+}
# endif
#else /* CONFIG_GENERIC_IRQ_DEBUGFS */
static inline void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *d)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 8bbd064..73be2b3 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -373,6 +373,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
raw_spin_lock_init(&desc->lock);
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+ mutex_init(&desc->request_mutex);
init_rcu_head(&desc->rcu);
desc_set_defaults(irq, desc, node, affinity, owner);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 14fe862..f1f2514 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1,5 +1,6 @@
#define pr_fmt(fmt) "irq: " fmt
+#include <linux/acpi.h>
#include <linux/debugfs.h>
#include <linux/hardirq.h>
#include <linux/interrupt.h>
@@ -155,6 +156,21 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
domain->name = fwid->name;
break;
}
+#ifdef CONFIG_ACPI
+ } else if (is_acpi_device_node(fwnode)) {
+ struct acpi_buffer buf = {
+ .length = ACPI_ALLOCATE_BUFFER,
+ };
+ acpi_handle handle;
+
+ handle = acpi_device_handle(to_acpi_device_node(fwnode));
+ if (acpi_get_name(handle, ACPI_FULL_PATHNAME, &buf) == AE_OK) {
+ domain->name = buf.pointer;
+ domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+ }
+
+ domain->fwnode = fwnode;
+#endif
} else if (of_node) {
char *name;
@@ -1667,8 +1683,7 @@ static void debugfs_add_domain_dir(struct irq_domain *d)
static void debugfs_remove_domain_dir(struct irq_domain *d)
{
- if (d->debugfs_file)
- debugfs_remove(d->debugfs_file);
+ debugfs_remove(d->debugfs_file);
}
void __init irq_domain_debugfs_init(struct dentry *root)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 5c11c17..1d1a5b9 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1090,6 +1090,16 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
/*
* Internal function to register an irqaction - typically used to
* allocate special interrupts that are part of the architecture.
+ *
+ * Locking rules:
+ *
+ * desc->request_mutex Provides serialization against a concurrent free_irq()
+ * chip_bus_lock Provides serialization for slow bus operations
+ * desc->lock Provides serialization against hard interrupts
+ *
+ * chip_bus_lock and desc->lock are sufficient for all other management and
+ * interrupt related functions. desc->request_mutex solely serializes
+ * request/free_irq().
*/
static int
__setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
@@ -1168,7 +1178,34 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
new->flags &= ~IRQF_ONESHOT;
/*
+ * Protects against a concurrent __free_irq() call which might wait
+ * for synchronize_irq() to complete without holding the optional
+ * chip bus lock and desc->lock.
+ */
+ mutex_lock(&desc->request_mutex);
+
+ /*
+ * Acquire bus lock as the irq_request_resources() callback below
+ * might rely on the serialization or the magic power management
+ * functions which are abusing the irq_bus_lock() callback,
+ */
+ chip_bus_lock(desc);
+
+ /* First installed action requests resources. */
+ if (!desc->action) {
+ ret = irq_request_resources(desc);
+ if (ret) {
+ pr_err("Failed to request resources for %s (irq %d) on irqchip %s\n",
+ new->name, irq, desc->irq_data.chip->name);
+ goto out_bus_unlock;
+ }
+ }
+
+ /*
* The following block of code has to be executed atomically
+ * protected against a concurrent interrupt and any of the other
+ * management calls which are not serialized via
+ * desc->request_mutex or the optional bus lock.
*/
raw_spin_lock_irqsave(&desc->lock, flags);
old_ptr = &desc->action;
@@ -1267,13 +1304,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
}
if (!shared) {
- ret = irq_request_resources(desc);
- if (ret) {
- pr_err("Failed to request resources for %s (irq %d) on irqchip %s\n",
- new->name, irq, desc->irq_data.chip->name);
- goto out_unlock;
- }
-
init_waitqueue_head(&desc->wait_for_threads);
/* Setup the type (level, edge polarity) if configured: */
@@ -1281,10 +1311,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
ret = __irq_set_trigger(desc,
new->flags & IRQF_TRIGGER_MASK);
- if (ret) {
- irq_release_resources(desc);
+ if (ret)
goto out_unlock;
- }
}
desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
@@ -1347,6 +1375,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
}
raw_spin_unlock_irqrestore(&desc->lock, flags);
+ chip_bus_sync_unlock(desc);
+ mutex_unlock(&desc->request_mutex);
irq_setup_timings(desc, new);
@@ -1378,6 +1408,12 @@ mismatch:
out_unlock:
raw_spin_unlock_irqrestore(&desc->lock, flags);
+ if (!desc->action)
+ irq_release_resources(desc);
+out_bus_unlock:
+ chip_bus_sync_unlock(desc);
+ mutex_unlock(&desc->request_mutex);
+
out_thread:
if (new->thread) {
struct task_struct *t = new->thread;
@@ -1417,9 +1453,7 @@ int setup_irq(unsigned int irq, struct irqaction *act)
if (retval < 0)
return retval;
- chip_bus_lock(desc);
retval = __setup_irq(irq, desc, act);
- chip_bus_sync_unlock(desc);
if (retval)
irq_chip_pm_put(&desc->irq_data);
@@ -1443,6 +1477,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
if (!desc)
return NULL;
+ mutex_lock(&desc->request_mutex);
chip_bus_lock(desc);
raw_spin_lock_irqsave(&desc->lock, flags);
@@ -1458,6 +1493,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
WARN(1, "Trying to free already-free IRQ %d\n", irq);
raw_spin_unlock_irqrestore(&desc->lock, flags);
chip_bus_sync_unlock(desc);
+ mutex_unlock(&desc->request_mutex);
return NULL;
}
@@ -1475,8 +1511,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
if (!desc->action) {
irq_settings_clr_disable_unlazy(desc);
irq_shutdown(desc);
- irq_release_resources(desc);
- irq_remove_timings(desc);
}
#ifdef CONFIG_SMP
@@ -1486,6 +1520,20 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
#endif
raw_spin_unlock_irqrestore(&desc->lock, flags);
+ /*
+ * Drop bus_lock here so the changes which were done in the chip
+ * callbacks above are synced out to the irq chips which hang
+ * behind a slow bus (I2C, SPI) before calling synchronize_irq().
+ *
+ * Aside of that the bus_lock can also be taken from the threaded
+ * handler in irq_finalize_oneshot() which results in a deadlock
+ * because synchronize_irq() would wait forever for the thread to
+ * complete, which is blocked on the bus lock.
+ *
+ * The still held desc->request_mutex() protects against a
+ * concurrent request_irq() of this irq so the release of resources
+ * and timing data is properly serialized.
+ */
chip_bus_sync_unlock(desc);
unregister_handler_proc(irq, action);
@@ -1518,6 +1566,20 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
}
}
+ /* Last action releases resources */
+ if (!desc->action) {
+ /*
+ * Reaquire bus lock as irq_release_resources() might
+ * require it to deallocate resources over the slow bus.
+ */
+ chip_bus_lock(desc);
+ irq_release_resources(desc);
+ chip_bus_sync_unlock(desc);
+ irq_remove_timings(desc);
+ }
+
+ mutex_unlock(&desc->request_mutex);
+
irq_chip_pm_put(&desc->irq_data);
module_put(desc->owner);
kfree(action->secondary);
@@ -1674,9 +1736,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
return retval;
}
- chip_bus_lock(desc);
retval = __setup_irq(irq, desc, action);
- chip_bus_sync_unlock(desc);
if (retval) {
irq_chip_pm_put(&desc->irq_data);
@@ -1924,9 +1984,7 @@ int setup_percpu_irq(unsigned int irq, struct irqaction *act)
if (retval < 0)
return retval;
- chip_bus_lock(desc);
retval = __setup_irq(irq, desc, act);
- chip_bus_sync_unlock(desc);
if (retval)
irq_chip_pm_put(&desc->irq_data);
@@ -1935,9 +1993,10 @@ int setup_percpu_irq(unsigned int irq, struct irqaction *act)
}
/**
- * request_percpu_irq - allocate a percpu interrupt line
+ * __request_percpu_irq - allocate a percpu interrupt line
* @irq: Interrupt line to allocate
* @handler: Function to be called when the IRQ occurs.
+ * @flags: Interrupt type flags (IRQF_TIMER only)
* @devname: An ascii name for the claiming device
* @dev_id: A percpu cookie passed back to the handler function
*
@@ -1950,8 +2009,9 @@ int setup_percpu_irq(unsigned int irq, struct irqaction *act)
* the handler gets called with the interrupted CPU's instance of
* that variable.
*/
-int request_percpu_irq(unsigned int irq, irq_handler_t handler,
- const char *devname, void __percpu *dev_id)
+int __request_percpu_irq(unsigned int irq, irq_handler_t handler,
+ unsigned long flags, const char *devname,
+ void __percpu *dev_id)
{
struct irqaction *action;
struct irq_desc *desc;
@@ -1965,12 +2025,15 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler,
!irq_settings_is_per_cpu_devid(desc))
return -EINVAL;
+ if (flags && flags != IRQF_TIMER)
+ return -EINVAL;
+
action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
if (!action)
return -ENOMEM;
action->handler = handler;
- action->flags = IRQF_PERCPU | IRQF_NO_SUSPEND;
+ action->flags = flags | IRQF_PERCPU | IRQF_NO_SUSPEND;
action->name = devname;
action->percpu_dev_id = dev_id;
@@ -1980,9 +2043,7 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler,
return retval;
}
- chip_bus_lock(desc);
retval = __setup_irq(irq, desc, action);
- chip_bus_sync_unlock(desc);
if (retval) {
irq_chip_pm_put(&desc->irq_data);
@@ -1991,7 +2052,7 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler,
return retval;
}
-EXPORT_SYMBOL_GPL(request_percpu_irq);
+EXPORT_SYMBOL_GPL(__request_percpu_irq);
/**
* irq_get_irqchip_state - returns the irqchip state of a interrupt.
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index cea1de0..6bd9b58 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -149,6 +149,8 @@ static void resume_irq(struct irq_desc *desc)
/* Pretend that it got disabled ! */
desc->depth++;
+ irq_state_set_disabled(desc);
+ irq_state_set_masked(desc);
resume:
desc->istate &= ~IRQS_SUSPENDED;
__enable_irq(desc);
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 6a3b249..127e7cf 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -28,12 +28,6 @@
#include <asm/sections.h>
-#ifdef CONFIG_KALLSYMS_ALL
-#define all_var 1
-#else
-#define all_var 0
-#endif
-
/*
* These will be re-linked against their real values
* during the second link stage.
@@ -82,7 +76,7 @@ static inline int is_kernel(unsigned long addr)
static int is_ksym_addr(unsigned long addr)
{
- if (all_var)
+ if (IS_ENABLED(CONFIG_KALLSYMS_ALL))
return is_kernel(addr);
return is_kernel_text(addr) || is_kernel_inittext(addr);
@@ -280,7 +274,7 @@ static unsigned long get_symbol_pos(unsigned long addr,
if (!symbol_end) {
if (is_kernel_inittext(addr))
symbol_end = (unsigned long)_einittext;
- else if (all_var)
+ else if (IS_ENABLED(CONFIG_KALLSYMS_ALL))
symbol_end = (unsigned long)_end;
else
symbol_end = (unsigned long)_etext;
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index 3a47fa9..ea34ed8 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@ -11,6 +11,10 @@
#include <linux/bug.h>
#include <linux/err.h>
#include <linux/kcmp.h>
+#include <linux/capability.h>
+#include <linux/list.h>
+#include <linux/eventpoll.h>
+#include <linux/file.h>
#include <asm/unistd.h>
@@ -94,6 +98,56 @@ static int kcmp_lock(struct mutex *m1, struct mutex *m2)
return err;
}
+#ifdef CONFIG_EPOLL
+static int kcmp_epoll_target(struct task_struct *task1,
+ struct task_struct *task2,
+ unsigned long idx1,
+ struct kcmp_epoll_slot __user *uslot)
+{
+ struct file *filp, *filp_epoll, *filp_tgt;
+ struct kcmp_epoll_slot slot;
+ struct files_struct *files;
+
+ if (copy_from_user(&slot, uslot, sizeof(slot)))
+ return -EFAULT;
+
+ filp = get_file_raw_ptr(task1, idx1);
+ if (!filp)
+ return -EBADF;
+
+ files = get_files_struct(task2);
+ if (!files)
+ return -EBADF;
+
+ spin_lock(&files->file_lock);
+ filp_epoll = fcheck_files(files, slot.efd);
+ if (filp_epoll)
+ get_file(filp_epoll);
+ else
+ filp_tgt = ERR_PTR(-EBADF);
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+
+ if (filp_epoll) {
+ filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
+ fput(filp_epoll);
+ } else
+
+ if (IS_ERR(filp_tgt))
+ return PTR_ERR(filp_tgt);
+
+ return kcmp_ptr(filp, filp_tgt, KCMP_FILE);
+}
+#else
+static int kcmp_epoll_target(struct task_struct *task1,
+ struct task_struct *task2,
+ unsigned long idx1,
+ struct kcmp_epoll_slot __user *uslot)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
unsigned long, idx1, unsigned long, idx2)
{
@@ -165,6 +219,9 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
ret = -EOPNOTSUPP;
#endif
break;
+ case KCMP_EPOLL_TFD:
+ ret = kcmp_epoll_target(task1, task2, idx1, (void *)idx2);
+ break;
default:
ret = -EINVAL;
break;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 980936a..e62ec4d 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -144,6 +144,14 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
if (ret)
goto out;
+ /*
+ * Some architecture(like S390) may touch the crash memory before
+ * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+ */
+ ret = kimage_crash_copy_vmcoreinfo(image);
+ if (ret)
+ goto out;
+
for (i = 0; i < nr_segments; i++) {
ret = kimage_load_segment(image, &image->segment[i]);
if (ret)
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 154ffb4..1ae7c41 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -482,6 +482,40 @@ struct page *kimage_alloc_control_pages(struct kimage *image,
return pages;
}
+int kimage_crash_copy_vmcoreinfo(struct kimage *image)
+{
+ struct page *vmcoreinfo_page;
+ void *safecopy;
+
+ if (image->type != KEXEC_TYPE_CRASH)
+ return 0;
+
+ /*
+ * For kdump, allocate one vmcoreinfo safe copy from the
+ * crash memory. as we have arch_kexec_protect_crashkres()
+ * after kexec syscall, we naturally protect it from write
+ * (even read) access under kernel direct mapping. But on
+ * the other hand, we still need to operate it when crash
+ * happens to generate vmcoreinfo note, hereby we rely on
+ * vmap for this purpose.
+ */
+ vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
+ if (!vmcoreinfo_page) {
+ pr_warn("Could not allocate vmcoreinfo buffer\n");
+ return -ENOMEM;
+ }
+ safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
+ if (!safecopy) {
+ pr_warn("Could not vmap vmcoreinfo buffer\n");
+ return -ENOMEM;
+ }
+
+ image->vmcoreinfo_data_copy = safecopy;
+ crash_update_vmcoreinfo_safecopy(safecopy);
+
+ return 0;
+}
+
static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
{
if (*image->entry != 0)
@@ -569,6 +603,11 @@ void kimage_free(struct kimage *image)
if (!image)
return;
+ if (image->vmcoreinfo_data_copy) {
+ crash_update_vmcoreinfo_safecopy(NULL);
+ vunmap(image->vmcoreinfo_data_copy);
+ }
+
kimage_free_extra_pages(image);
for_each_kimage_entry(image, ptr, entry) {
if (entry & IND_INDIRECTION) {
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 766e7e4..9f48f44 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -26,13 +26,6 @@
#include <linux/vmalloc.h>
#include "kexec_internal.h"
-/*
- * Declare these symbols weak so that if architecture provides a purgatory,
- * these will be overridden.
- */
-char __weak kexec_purgatory[0];
-size_t __weak kexec_purgatory_size = 0;
-
static int kexec_calculate_store_digests(struct kimage *image);
/* Architectures can provide this probe function */
@@ -298,6 +291,14 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
if (ret)
goto out;
+ /*
+ * Some architecture(like S390) may touch the crash memory before
+ * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+ */
+ ret = kimage_crash_copy_vmcoreinfo(image);
+ if (ret)
+ goto out;
+
ret = kexec_calculate_store_digests(image);
if (ret)
goto out;
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 799a8a4..50dfcb0 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -17,6 +17,8 @@ extern struct mutex kexec_mutex;
#ifdef CONFIG_KEXEC_FILE
#include <linux/purgatory.h>
void kimage_file_post_load_cleanup(struct kimage *image);
+extern char kexec_purgatory[];
+extern size_t kexec_purgatory_size;
#else /* CONFIG_KEXEC_FILE */
static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
#endif /* CONFIG_KEXEC_FILE */
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 563f97e..6d016c5 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -45,8 +45,6 @@
#include <trace/events/module.h>
-extern int max_threads;
-
#define CAP_BSET (void *)1
#define CAP_PI (void *)2
@@ -56,6 +54,21 @@ static DEFINE_SPINLOCK(umh_sysctl_lock);
static DECLARE_RWSEM(umhelper_sem);
#ifdef CONFIG_MODULES
+/*
+ * Assuming:
+ *
+ * threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE,
+ * (u64) THREAD_SIZE * 8UL);
+ *
+ * If you need less than 50 threads would mean we're dealing with systems
+ * smaller than 3200 pages. This assuems you are capable of having ~13M memory,
+ * and this would only be an be an upper limit, after which the OOM killer
+ * would take effect. Systems like these are very unlikely if modules are
+ * enabled.
+ */
+#define MAX_KMOD_CONCURRENT 50
+static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT);
+static DECLARE_WAIT_QUEUE_HEAD(kmod_wq);
/*
modprobe_path is set via /proc/sys.
@@ -127,11 +140,7 @@ int __request_module(bool wait, const char *fmt, ...)
{
va_list args;
char module_name[MODULE_NAME_LEN];
- unsigned int max_modprobes;
int ret;
- static atomic_t kmod_concurrent = ATOMIC_INIT(0);
-#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
- static int kmod_loop_msg;
/*
* We don't allow synchronous module loading from async. Module
@@ -154,40 +163,25 @@ int __request_module(bool wait, const char *fmt, ...)
if (ret)
return ret;
- /* If modprobe needs a service that is in a module, we get a recursive
- * loop. Limit the number of running kmod threads to max_threads/2 or
- * MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method
- * would be to run the parents of this process, counting how many times
- * kmod was invoked. That would mean accessing the internals of the
- * process tables to get the command line, proc_pid_cmdline is static
- * and it is not worth changing the proc code just to handle this case.
- * KAO.
- *
- * "trace the ppid" is simple, but will fail if someone's
- * parent exits. I think this is as good as it gets. --RR
- */
- max_modprobes = min(max_threads/2, MAX_KMOD_CONCURRENT);
- atomic_inc(&kmod_concurrent);
- if (atomic_read(&kmod_concurrent) > max_modprobes) {
- /* We may be blaming an innocent here, but unlikely */
- if (kmod_loop_msg < 5) {
- printk(KERN_ERR
- "request_module: runaway loop modprobe %s\n",
- module_name);
- kmod_loop_msg++;
- }
- atomic_dec(&kmod_concurrent);
- return -ENOMEM;
+ if (atomic_dec_if_positive(&kmod_concurrent_max) < 0) {
+ pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...",
+ atomic_read(&kmod_concurrent_max),
+ MAX_KMOD_CONCURRENT, module_name);
+ wait_event_interruptible(kmod_wq,
+ atomic_dec_if_positive(&kmod_concurrent_max) >= 0);
}
trace_module_request(module_name, wait, _RET_IP_);
ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
- atomic_dec(&kmod_concurrent);
+ atomic_inc(&kmod_concurrent_max);
+ wake_up(&kmod_wq);
+
return ret;
}
EXPORT_SYMBOL(__request_module);
+
#endif /* CONFIG_MODULES */
static void call_usermodehelper_freeinfo(struct subprocess_info *info)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 6756d75..a1606a4 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1771,24 +1771,13 @@ unsigned long __weak arch_deref_entry_point(void *entry)
int register_jprobes(struct jprobe **jps, int num)
{
- struct jprobe *jp;
int ret = 0, i;
if (num <= 0)
return -EINVAL;
+
for (i = 0; i < num; i++) {
- unsigned long addr, offset;
- jp = jps[i];
- addr = arch_deref_entry_point(jp->entry);
-
- /* Verify probepoint is a function entry point */
- if (kallsyms_lookup_size_offset(addr, NULL, &offset) &&
- offset == 0) {
- jp->kp.pre_handler = setjmp_pre_handler;
- jp->kp.break_handler = longjmp_break_handler;
- ret = register_kprobe(&jp->kp);
- } else
- ret = -EINVAL;
+ ret = register_jprobe(jps[i]);
if (ret < 0) {
if (i > 0)
@@ -1796,13 +1785,30 @@ int register_jprobes(struct jprobe **jps, int num)
break;
}
}
+
return ret;
}
EXPORT_SYMBOL_GPL(register_jprobes);
int register_jprobe(struct jprobe *jp)
{
- return register_jprobes(&jp, 1);
+ unsigned long addr, offset;
+ struct kprobe *kp = &jp->kp;
+
+ /*
+ * Verify probepoint as well as the jprobe handler are
+ * valid function entry points.
+ */
+ addr = arch_deref_entry_point(jp->entry);
+
+ if (kallsyms_lookup_size_offset(addr, NULL, &offset) && offset == 0 &&
+ kprobe_on_func_entry(kp->addr, kp->symbol_name, kp->offset)) {
+ kp->pre_handler = setjmp_pre_handler;
+ kp->break_handler = longjmp_break_handler;
+ return register_kprobe(kp);
+ }
+
+ return -EINVAL;
}
EXPORT_SYMBOL_GPL(register_jprobe);
@@ -1888,12 +1894,12 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(pre_handler_kretprobe);
-bool __weak arch_function_offset_within_entry(unsigned long offset)
+bool __weak arch_kprobe_on_func_entry(unsigned long offset)
{
return !offset;
}
-bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
+bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
{
kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
@@ -1901,7 +1907,7 @@ bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsign
return false;
if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
- !arch_function_offset_within_entry(offset))
+ !arch_kprobe_on_func_entry(offset))
return false;
return true;
@@ -1914,7 +1920,7 @@ int register_kretprobe(struct kretprobe *rp)
int i;
void *addr;
- if (!function_offset_within_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset))
+ if (!kprobe_on_func_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset))
return -EINVAL;
if (kretprobe_blacklist_size) {
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 23cd706..46ba853 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -134,7 +134,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
{
phys_addr_t vmcore_base = paddr_vmcoreinfo_note();
return sprintf(buf, "%pa %x\n", &vmcore_base,
- (unsigned int)sizeof(vmcoreinfo_note));
+ (unsigned int)VMCOREINFO_NOTE_SIZE);
}
KERNEL_ATTR_RO(vmcoreinfo);
@@ -234,7 +234,7 @@ static struct attribute * kernel_attrs[] = {
NULL
};
-static struct attribute_group kernel_attr_group = {
+static const struct attribute_group kernel_attr_group = {
.attrs = kernel_attrs,
};
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index cc3ed0c..2655f26 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -20,6 +20,7 @@
#include <linux/cpumask.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
+#include <linux/spinlock.h>
#include <asm/qrwlock.h>
/*
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index b2caec7..fd24153 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -28,6 +28,7 @@
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/mutex.h>
+#include <linux/prefetch.h>
#include <asm/byteorder.h>
#include <asm/qspinlock.h>
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 7806989..649dc9d 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -963,7 +963,6 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
return -EDEADLK;
raw_spin_lock(&task->pi_lock);
- rt_mutex_adjust_prio(task);
waiter->task = task;
waiter->lock = lock;
waiter->prio = task->prio;
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index c65f798..20819df 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -231,8 +231,8 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state)
out_nolock:
list_del(&waiter.list);
- if (!list_empty(&sem->wait_list))
- __rwsem_do_wake(sem, 1);
+ if (!list_empty(&sem->wait_list) && sem->count >= 0)
+ __rwsem_do_wake(sem, 0);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return -EINTR;
diff --git a/kernel/module.c b/kernel/module.c
index b3dbdde..40f983c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -300,6 +300,7 @@ int unregister_module_notifier(struct notifier_block *nb)
EXPORT_SYMBOL(unregister_module_notifier);
struct load_info {
+ const char *name;
Elf_Ehdr *hdr;
unsigned long len;
Elf_Shdr *sechdrs;
@@ -600,7 +601,7 @@ static struct module *find_module_all(const char *name, size_t len,
module_assert_mutex_or_preempt();
- list_for_each_entry(mod, &modules, list) {
+ list_for_each_entry_rcu(mod, &modules, list) {
if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
continue;
if (strlen(mod->name) == len && !memcmp(mod->name, name, len))
@@ -1273,12 +1274,13 @@ static u32 resolve_rel_crc(const s32 *crc)
return *(u32 *)((void *)crc + *crc);
}
-static int check_version(Elf_Shdr *sechdrs,
- unsigned int versindex,
+static int check_version(const struct load_info *info,
const char *symname,
struct module *mod,
const s32 *crc)
{
+ Elf_Shdr *sechdrs = info->sechdrs;
+ unsigned int versindex = info->index.vers;
unsigned int i, num_versions;
struct modversion_info *versions;
@@ -1312,17 +1314,16 @@ static int check_version(Elf_Shdr *sechdrs,
}
/* Broken toolchain. Warn once, then let it go.. */
- pr_warn_once("%s: no symbol version for %s\n", mod->name, symname);
+ pr_warn_once("%s: no symbol version for %s\n", info->name, symname);
return 1;
bad_version:
pr_warn("%s: disagrees about version of symbol %s\n",
- mod->name, symname);
+ info->name, symname);
return 0;
}
-static inline int check_modstruct_version(Elf_Shdr *sechdrs,
- unsigned int versindex,
+static inline int check_modstruct_version(const struct load_info *info,
struct module *mod)
{
const s32 *crc;
@@ -1338,8 +1339,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
BUG();
}
preempt_enable();
- return check_version(sechdrs, versindex,
- VMLINUX_SYMBOL_STR(module_layout), mod, crc);
+ return check_version(info, VMLINUX_SYMBOL_STR(module_layout),
+ mod, crc);
}
/* First part is kernel version, which we ignore if module has crcs. */
@@ -1353,8 +1354,7 @@ static inline int same_magic(const char *amagic, const char *bmagic,
return strcmp(amagic, bmagic) == 0;
}
#else
-static inline int check_version(Elf_Shdr *sechdrs,
- unsigned int versindex,
+static inline int check_version(const struct load_info *info,
const char *symname,
struct module *mod,
const s32 *crc)
@@ -1362,8 +1362,7 @@ static inline int check_version(Elf_Shdr *sechdrs,
return 1;
}
-static inline int check_modstruct_version(Elf_Shdr *sechdrs,
- unsigned int versindex,
+static inline int check_modstruct_version(const struct load_info *info,
struct module *mod)
{
return 1;
@@ -1399,7 +1398,7 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod,
if (!sym)
goto unlock;
- if (!check_version(info->sechdrs, info->index.vers, name, mod, crc)) {
+ if (!check_version(info, name, mod, crc)) {
sym = ERR_PTR(-EINVAL);
goto getname;
}
@@ -1662,31 +1661,36 @@ static inline void remove_notes_attrs(struct module *mod)
}
#endif /* CONFIG_KALLSYMS */
-static void add_usage_links(struct module *mod)
+static void del_usage_links(struct module *mod)
{
#ifdef CONFIG_MODULE_UNLOAD
struct module_use *use;
- int nowarn;
mutex_lock(&module_mutex);
- list_for_each_entry(use, &mod->target_list, target_list) {
- nowarn = sysfs_create_link(use->target->holders_dir,
- &mod->mkobj.kobj, mod->name);
- }
+ list_for_each_entry(use, &mod->target_list, target_list)
+ sysfs_remove_link(use->target->holders_dir, mod->name);
mutex_unlock(&module_mutex);
#endif
}
-static void del_usage_links(struct module *mod)
+static int add_usage_links(struct module *mod)
{
+ int ret = 0;
#ifdef CONFIG_MODULE_UNLOAD
struct module_use *use;
mutex_lock(&module_mutex);
- list_for_each_entry(use, &mod->target_list, target_list)
- sysfs_remove_link(use->target->holders_dir, mod->name);
+ list_for_each_entry(use, &mod->target_list, target_list) {
+ ret = sysfs_create_link(use->target->holders_dir,
+ &mod->mkobj.kobj, mod->name);
+ if (ret)
+ break;
+ }
mutex_unlock(&module_mutex);
+ if (ret)
+ del_usage_links(mod);
#endif
+ return ret;
}
static int module_add_modinfo_attrs(struct module *mod)
@@ -1797,13 +1801,18 @@ static int mod_sysfs_setup(struct module *mod,
if (err)
goto out_unreg_param;
- add_usage_links(mod);
+ err = add_usage_links(mod);
+ if (err)
+ goto out_unreg_modinfo_attrs;
+
add_sect_attrs(mod, info);
add_notes_attrs(mod, info);
kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
return 0;
+out_unreg_modinfo_attrs:
+ module_remove_modinfo_attrs(mod);
out_unreg_param:
module_param_sysfs_remove(mod);
out_unreg_holders:
@@ -2910,9 +2919,15 @@ static int rewrite_section_headers(struct load_info *info, int flags)
info->index.vers = 0; /* Pretend no __versions section! */
else
info->index.vers = find_sec(info, "__versions");
+ info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
+
info->index.info = find_sec(info, ".modinfo");
+ if (!info->index.info)
+ info->name = "(missing .modinfo section)";
+ else
+ info->name = get_modinfo(info, "name");
info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
- info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
+
return 0;
}
@@ -2952,21 +2967,29 @@ static struct module *setup_load_info(struct load_info *info, int flags)
info->index.mod = find_sec(info, ".gnu.linkonce.this_module");
if (!info->index.mod) {
- pr_warn("No module found in object\n");
+ pr_warn("%s: No module found in object\n",
+ info->name ?: "(missing .modinfo name field)");
return ERR_PTR(-ENOEXEC);
}
/* This is temporary: point mod into copy of data. */
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
+ /*
+ * If we didn't load the .modinfo 'name' field, fall back to
+ * on-disk struct mod 'name' field.
+ */
+ if (!info->name)
+ info->name = mod->name;
+
if (info->index.sym == 0) {
- pr_warn("%s: module has no symbols (stripped?)\n", mod->name);
+ pr_warn("%s: module has no symbols (stripped?)\n", info->name);
return ERR_PTR(-ENOEXEC);
}
info->index.pcpu = find_pcpusec(info);
/* Check module struct version now, before we try to use module. */
- if (!check_modstruct_version(info->sechdrs, info->index.vers, mod))
+ if (!check_modstruct_version(info, mod))
return ERR_PTR(-ENOEXEC);
return mod;
@@ -2987,7 +3010,7 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags)
return err;
} else if (!same_magic(modmagic, vermagic, info->index.vers)) {
pr_err("%s: version magic '%s' should be '%s'\n",
- mod->name, modmagic, vermagic);
+ info->name, modmagic, vermagic);
return -ENOEXEC;
}
@@ -3237,7 +3260,7 @@ int __weak module_frob_arch_sections(Elf_Ehdr *hdr,
/* module_blacklist is a comma-separated list of module names */
static char *module_blacklist;
-static bool blacklisted(char *module_name)
+static bool blacklisted(const char *module_name)
{
const char *p;
size_t len;
@@ -3267,7 +3290,7 @@ static struct module *layout_and_allocate(struct load_info *info, int flags)
if (IS_ERR(mod))
return mod;
- if (blacklisted(mod->name))
+ if (blacklisted(info->name))
return ERR_PTR(-EPERM);
err = check_modinfo(mod, info, flags);
@@ -4196,7 +4219,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
goto out;
e = search_extable(mod->extable,
- mod->extable + mod->num_exentries - 1,
+ mod->num_exentries,
addr);
out:
preempt_enable();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index d401c21..42bd800 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -705,7 +705,7 @@ static struct attribute * g[] = {
NULL,
};
-static struct attribute_group attr_group = {
+static const struct attribute_group attr_group = {
.attrs = g,
};
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 17c667b..0869b20 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2069,7 +2069,7 @@ out:
/**
* try_to_wake_up_local - try to wake up a local task with rq lock held
* @p: the thread to be awakened
- * @cookie: context's cookie for pinning
+ * @rf: request-queue flags for pinning
*
* Put @p on the run-queue if it's not already there. The caller must
* ensure that this_rq() is locked, @p is bound to this_rq() and not
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 076a2e3..29a3970 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -610,6 +610,11 @@ static int sugov_start(struct cpufreq_policy *policy)
sg_cpu->sg_policy = sg_policy;
sg_cpu->flags = SCHED_CPUFREQ_RT;
sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
+ }
+
+ for_each_cpu(cpu, policy->cpus) {
+ struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
+
cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
policy_is_shared(policy) ?
sugov_update_shared :
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 67c70e2..14d2dbf 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -611,17 +611,23 @@ static void cputime_adjust(struct task_cputime *curr,
utime = curr->utime;
/*
- * If either stime or both stime and utime are 0, assume all runtime is
- * userspace. Once a task gets some ticks, the monotonicy code at
- * 'update' will ensure things converge to the observed ratio.
+ * If either stime or utime are 0, assume all runtime is userspace.
+ * Once a task gets some ticks, the monotonicy code at 'update:'
+ * will ensure things converge to the observed ratio.
*/
- if (stime != 0) {
- if (utime == 0)
- stime = rtime;
- else
- stime = scale_stime(stime, rtime, stime + utime);
+ if (stime == 0) {
+ utime = rtime;
+ goto update;
}
+ if (utime == 0) {
+ stime = rtime;
+ goto update;
+ }
+
+ stime = scale_stime(stime, rtime, stime + utime);
+
+update:
/*
* Make sure stime doesn't go backwards; this preserves monotonicity
* for utime because rtime is monotonic.
@@ -673,20 +679,21 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-static u64 vtime_delta(struct task_struct *tsk)
+static u64 vtime_delta(struct vtime *vtime)
{
- unsigned long now = READ_ONCE(jiffies);
+ unsigned long long clock;
- if (time_before(now, (unsigned long)tsk->vtime_snap))
+ clock = sched_clock();
+ if (clock < vtime->starttime)
return 0;
- return jiffies_to_nsecs(now - tsk->vtime_snap);
+ return clock - vtime->starttime;
}
-static u64 get_vtime_delta(struct task_struct *tsk)
+static u64 get_vtime_delta(struct vtime *vtime)
{
- unsigned long now = READ_ONCE(jiffies);
- u64 delta, other;
+ u64 delta = vtime_delta(vtime);
+ u64 other;
/*
* Unlike tick based timing, vtime based timing never has lost
@@ -695,104 +702,138 @@ static u64 get_vtime_delta(struct task_struct *tsk)
* elapsed time. Limit account_other_time to prevent rounding
* errors from causing elapsed vtime to go negative.
*/
- delta = jiffies_to_nsecs(now - tsk->vtime_snap);
other = account_other_time(delta);
- WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
- tsk->vtime_snap = now;
+ WARN_ON_ONCE(vtime->state == VTIME_INACTIVE);
+ vtime->starttime += delta;
return delta - other;
}
-static void __vtime_account_system(struct task_struct *tsk)
+static void __vtime_account_system(struct task_struct *tsk,
+ struct vtime *vtime)
+{
+ vtime->stime += get_vtime_delta(vtime);
+ if (vtime->stime >= TICK_NSEC) {
+ account_system_time(tsk, irq_count(), vtime->stime);
+ vtime->stime = 0;
+ }
+}
+
+static void vtime_account_guest(struct task_struct *tsk,
+ struct vtime *vtime)
{
- account_system_time(tsk, irq_count(), get_vtime_delta(tsk));
+ vtime->gtime += get_vtime_delta(vtime);
+ if (vtime->gtime >= TICK_NSEC) {
+ account_guest_time(tsk, vtime->gtime);
+ vtime->gtime = 0;
+ }
}
void vtime_account_system(struct task_struct *tsk)
{
- if (!vtime_delta(tsk))
+ struct vtime *vtime = &tsk->vtime;
+
+ if (!vtime_delta(vtime))
return;
- write_seqcount_begin(&tsk->vtime_seqcount);
- __vtime_account_system(tsk);
- write_seqcount_end(&tsk->vtime_seqcount);
+ write_seqcount_begin(&vtime->seqcount);
+ /* We might have scheduled out from guest path */
+ if (current->flags & PF_VCPU)
+ vtime_account_guest(tsk, vtime);
+ else
+ __vtime_account_system(tsk, vtime);
+ write_seqcount_end(&vtime->seqcount);
}
-void vtime_account_user(struct task_struct *tsk)
+void vtime_user_enter(struct task_struct *tsk)
{
- write_seqcount_begin(&tsk->vtime_seqcount);
- tsk->vtime_snap_whence = VTIME_SYS;
- if (vtime_delta(tsk))
- account_user_time(tsk, get_vtime_delta(tsk));
- write_seqcount_end(&tsk->vtime_seqcount);
+ struct vtime *vtime = &tsk->vtime;
+
+ write_seqcount_begin(&vtime->seqcount);
+ __vtime_account_system(tsk, vtime);
+ vtime->state = VTIME_USER;
+ write_seqcount_end(&vtime->seqcount);
}
-void vtime_user_enter(struct task_struct *tsk)
+void vtime_user_exit(struct task_struct *tsk)
{
- write_seqcount_begin(&tsk->vtime_seqcount);
- if (vtime_delta(tsk))
- __vtime_account_system(tsk);
- tsk->vtime_snap_whence = VTIME_USER;
- write_seqcount_end(&tsk->vtime_seqcount);
+ struct vtime *vtime = &tsk->vtime;
+
+ write_seqcount_begin(&vtime->seqcount);
+ vtime->utime += get_vtime_delta(vtime);
+ if (vtime->utime >= TICK_NSEC) {
+ account_user_time(tsk, vtime->utime);
+ vtime->utime = 0;
+ }
+ vtime->state = VTIME_SYS;
+ write_seqcount_end(&vtime->seqcount);
}
void vtime_guest_enter(struct task_struct *tsk)
{
+ struct vtime *vtime = &tsk->vtime;
/*
* The flags must be updated under the lock with
- * the vtime_snap flush and update.
+ * the vtime_starttime flush and update.
* That enforces a right ordering and update sequence
* synchronization against the reader (task_gtime())
* that can thus safely catch up with a tickless delta.
*/
- write_seqcount_begin(&tsk->vtime_seqcount);
- if (vtime_delta(tsk))
- __vtime_account_system(tsk);
+ write_seqcount_begin(&vtime->seqcount);
+ __vtime_account_system(tsk, vtime);
current->flags |= PF_VCPU;
- write_seqcount_end(&tsk->vtime_seqcount);
+ write_seqcount_end(&vtime->seqcount);
}
EXPORT_SYMBOL_GPL(vtime_guest_enter);
void vtime_guest_exit(struct task_struct *tsk)
{
- write_seqcount_begin(&tsk->vtime_seqcount);
- __vtime_account_system(tsk);
+ struct vtime *vtime = &tsk->vtime;
+
+ write_seqcount_begin(&vtime->seqcount);
+ vtime_account_guest(tsk, vtime);
current->flags &= ~PF_VCPU;
- write_seqcount_end(&tsk->vtime_seqcount);
+ write_seqcount_end(&vtime->seqcount);
}
EXPORT_SYMBOL_GPL(vtime_guest_exit);
void vtime_account_idle(struct task_struct *tsk)
{
- account_idle_time(get_vtime_delta(tsk));
+ account_idle_time(get_vtime_delta(&tsk->vtime));
}
void arch_vtime_task_switch(struct task_struct *prev)
{
- write_seqcount_begin(&prev->vtime_seqcount);
- prev->vtime_snap_whence = VTIME_INACTIVE;
- write_seqcount_end(&prev->vtime_seqcount);
+ struct vtime *vtime = &prev->vtime;
+
+ write_seqcount_begin(&vtime->seqcount);
+ vtime->state = VTIME_INACTIVE;
+ write_seqcount_end(&vtime->seqcount);
+
+ vtime = &current->vtime;
- write_seqcount_begin(&current->vtime_seqcount);
- current->vtime_snap_whence = VTIME_SYS;
- current->vtime_snap = jiffies;
- write_seqcount_end(&current->vtime_seqcount);
+ write_seqcount_begin(&vtime->seqcount);
+ vtime->state = VTIME_SYS;
+ vtime->starttime = sched_clock();
+ write_seqcount_end(&vtime->seqcount);
}
void vtime_init_idle(struct task_struct *t, int cpu)
{
+ struct vtime *vtime = &t->vtime;
unsigned long flags;
local_irq_save(flags);
- write_seqcount_begin(&t->vtime_seqcount);
- t->vtime_snap_whence = VTIME_SYS;
- t->vtime_snap = jiffies;
- write_seqcount_end(&t->vtime_seqcount);
+ write_seqcount_begin(&vtime->seqcount);
+ vtime->state = VTIME_SYS;
+ vtime->starttime = sched_clock();
+ write_seqcount_end(&vtime->seqcount);
local_irq_restore(flags);
}
u64 task_gtime(struct task_struct *t)
{
+ struct vtime *vtime = &t->vtime;
unsigned int seq;
u64 gtime;
@@ -800,13 +841,13 @@ u64 task_gtime(struct task_struct *t)
return t->gtime;
do {
- seq = read_seqcount_begin(&t->vtime_seqcount);
+ seq = read_seqcount_begin(&vtime->seqcount);
gtime = t->gtime;
- if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU)
- gtime += vtime_delta(t);
+ if (vtime->state == VTIME_SYS && t->flags & PF_VCPU)
+ gtime += vtime->gtime + vtime_delta(vtime);
- } while (read_seqcount_retry(&t->vtime_seqcount, seq));
+ } while (read_seqcount_retry(&vtime->seqcount, seq));
return gtime;
}
@@ -818,8 +859,9 @@ u64 task_gtime(struct task_struct *t)
*/
void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
{
- u64 delta;
+ struct vtime *vtime = &t->vtime;
unsigned int seq;
+ u64 delta;
if (!vtime_accounting_enabled()) {
*utime = t->utime;
@@ -828,25 +870,25 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
}
do {
- seq = read_seqcount_begin(&t->vtime_seqcount);
+ seq = read_seqcount_begin(&vtime->seqcount);
*utime = t->utime;
*stime = t->stime;
/* Task is sleeping, nothing to add */
- if (t->vtime_snap_whence == VTIME_INACTIVE || is_idle_task(t))
+ if (vtime->state == VTIME_INACTIVE || is_idle_task(t))
continue;
- delta = vtime_delta(t);
+ delta = vtime_delta(vtime);
/*
* Task runs either in user or kernel space, add pending nohz time to
* the right place.
*/
- if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU)
- *utime += delta;
- else if (t->vtime_snap_whence == VTIME_SYS)
- *stime += delta;
- } while (read_seqcount_retry(&t->vtime_seqcount, seq));
+ if (vtime->state == VTIME_USER || t->flags & PF_VCPU)
+ *utime += vtime->utime + delta;
+ else if (vtime->state == VTIME_SYS)
+ *stime += vtime->stime + delta;
+ } while (read_seqcount_retry(&vtime->seqcount, seq));
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a84299f..755bd3f 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1392,17 +1392,19 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
struct sched_dl_entity *pi_se = &p->dl;
/*
- * Use the scheduling parameters of the top pi-waiter
- * task if we have one and its (absolute) deadline is
- * smaller than our one... OTW we keep our runtime and
- * deadline.
+ * Use the scheduling parameters of the top pi-waiter task if:
+ * - we have a top pi-waiter which is a SCHED_DEADLINE task AND
+ * - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is
+ * smaller than our deadline OR we are a !SCHED_DEADLINE task getting
+ * boosted due to a SCHED_DEADLINE pi-waiter).
+ * Otherwise we keep our runtime and deadline.
*/
- if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) {
+ if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) {
pi_se = &pi_task->dl;
} else if (!dl_prio(p->normal_prio)) {
/*
* Special case in which we have a !SCHED_DEADLINE task
- * that is going to be deboosted, but exceedes its
+ * that is going to be deboosted, but exceeds its
* runtime while doing so. No point in replenishing
* it, as it's going to return back to its original
* scheduling class after this.
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 008c514..c95880e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6646,10 +6646,10 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
* our sched_group. We may want to revisit it if we couldn't
* meet load balance goals by pulling other tasks on src_cpu.
*
- * Also avoid computing new_dst_cpu if we have already computed
- * one in current iteration.
+ * Avoid computing new_dst_cpu for NEWLY_IDLE or if we have
+ * already computed one in current iteration.
*/
- if (!env->dst_grpmask || (env->flags & LBF_DST_PINNED))
+ if (env->idle == CPU_NEWLY_IDLE || (env->flags & LBF_DST_PINNED))
return 0;
/* Prevent to re-select dst_cpu via env's cpus */
@@ -8022,14 +8022,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
.tasks = LIST_HEAD_INIT(env.tasks),
};
- /*
- * For NEWLY_IDLE load_balancing, we don't need to consider
- * other cpus in our group
- */
- if (idle == CPU_NEWLY_IDLE)
- env.dst_grpmask = NULL;
-
- cpumask_copy(cpus, cpu_active_mask);
+ cpumask_and(cpus, sched_domain_span(sd), cpu_active_mask);
schedstat_inc(sd->lb_count[idle]);
@@ -8151,7 +8144,15 @@ more_balance:
/* All tasks on this runqueue were pinned by CPU affinity */
if (unlikely(env.flags & LBF_ALL_PINNED)) {
cpumask_clear_cpu(cpu_of(busiest), cpus);
- if (!cpumask_empty(cpus)) {
+ /*
+ * Attempting to continue load balancing at the current
+ * sched_domain level only makes sense if there are
+ * active CPUs remaining as possible busiest CPUs to
+ * pull load from which are not contained within the
+ * destination group that is receiving any migrated
+ * load.
+ */
+ if (!cpumask_subset(cpus, env.dst_grpmask)) {
env.loop = 0;
env.loop_break = sched_nr_migrate_break;
goto redo;
@@ -8447,6 +8448,13 @@ static int active_load_balance_cpu_stop(void *data)
.src_cpu = busiest_rq->cpu,
.src_rq = busiest_rq,
.idle = CPU_IDLE,
+ /*
+ * can_migrate_task() doesn't need to compute new_dst_cpu
+ * for active balancing. Since we have CPU_IDLE, but no
+ * @dst_grpmask we need to make that test go away with lying
+ * about DST_PINNED.
+ */
+ .flags = LBF_DST_PINNED,
};
schedstat_inc(sd->alb_count);
diff --git a/kernel/signal.c b/kernel/signal.c
index 48a59ee..caed913 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1402,6 +1402,10 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
return ret;
}
+ /* -INT_MIN is undefined. Exclude this case to avoid a UBSAN warning */
+ if (pid == INT_MIN)
+ return -ESRCH;
+
read_lock(&tasklist_lock);
if (pid != -1) {
ret = __kill_pgrp_info(sig, info,
diff --git a/kernel/sys.c b/kernel/sys.c
index 47d9015..2855ee7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1362,7 +1362,7 @@ COMPAT_SYSCALL_DEFINE2(getrlimit, unsigned int, resource,
ret = do_prlimit(current, resource, NULL, &r);
if (!ret) {
- struct rlimit r32;
+ struct compat_rlimit r32;
if (r.rlim_cur > COMPAT_RLIM_INFINITY)
r32.rlim_cur = COMPAT_RLIM_INFINITY;
else
@@ -2360,7 +2360,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_GET_THP_DISABLE:
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
- error = !!(me->mm->def_flags & VM_NOHUGEPAGE);
+ error = !!test_bit(MMF_DISABLE_THP, &me->mm->flags);
break;
case PR_SET_THP_DISABLE:
if (arg3 || arg4 || arg5)
@@ -2368,9 +2368,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
if (down_write_killable(&me->mm->mmap_sem))
return -EINTR;
if (arg2)
- me->mm->def_flags |= VM_NOHUGEPAGE;
+ set_bit(MMF_DISABLE_THP, &me->mm->flags);
else
- me->mm->def_flags &= ~VM_NOHUGEPAGE;
+ clear_bit(MMF_DISABLE_THP, &me->mm->flags);
up_write(&me->mm->mmap_sem);
break;
case PR_MPX_ENABLE_MANAGEMENT:
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4dfba1a..6648fbb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -174,11 +174,32 @@ extern int no_unaligned_warning;
#ifdef CONFIG_PROC_SYSCTL
-#define SYSCTL_WRITES_LEGACY -1
-#define SYSCTL_WRITES_WARN 0
-#define SYSCTL_WRITES_STRICT 1
+/**
+ * enum sysctl_writes_mode - supported sysctl write modes
+ *
+ * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
+ * to be written, and multiple writes on the same sysctl file descriptor
+ * will rewrite the sysctl value, regardless of file position. No warning
+ * is issued when the initial position is not 0.
+ * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
+ * not 0.
+ * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
+ * file position 0 and the value must be fully contained in the buffer
+ * sent to the write syscall. If dealing with strings respect the file
+ * position, but restrict this to the max length of the buffer, anything
+ * passed the max lenght will be ignored. Multiple writes will append
+ * to the buffer.
+ *
+ * These write modes control how current file position affects the behavior of
+ * updating sysctl values through the proc interface on each write.
+ */
+enum sysctl_writes_mode {
+ SYSCTL_WRITES_LEGACY = -1,
+ SYSCTL_WRITES_WARN = 0,
+ SYSCTL_WRITES_STRICT = 1,
+};
-static int sysctl_writes_strict = SYSCTL_WRITES_STRICT;
+static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
static int proc_do_cad_pid(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -880,6 +901,14 @@ static struct ctl_table kern_table[] = {
#endif
},
{
+ .procname = "watchdog_cpumask",
+ .data = &watchdog_cpumask_bits,
+ .maxlen = NR_CPUS,
+ .mode = 0644,
+ .proc_handler = proc_watchdog_cpumask,
+ },
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+ {
.procname = "soft_watchdog",
.data = &soft_watchdog_enabled,
.maxlen = sizeof (int),
@@ -889,13 +918,6 @@ static struct ctl_table kern_table[] = {
.extra2 = &one,
},
{
- .procname = "watchdog_cpumask",
- .data = &watchdog_cpumask_bits,
- .maxlen = NR_CPUS,
- .mode = 0644,
- .proc_handler = proc_watchdog_cpumask,
- },
- {
.procname = "softlockup_panic",
.data = &softlockup_panic,
.maxlen = sizeof(int),
@@ -904,27 +926,29 @@ static struct ctl_table kern_table[] = {
.extra1 = &zero,
.extra2 = &one,
},
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_SMP
{
- .procname = "hardlockup_panic",
- .data = &hardlockup_panic,
+ .procname = "softlockup_all_cpu_backtrace",
+ .data = &sysctl_softlockup_all_cpu_backtrace,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
+#endif /* CONFIG_SMP */
#endif
-#ifdef CONFIG_SMP
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
{
- .procname = "softlockup_all_cpu_backtrace",
- .data = &sysctl_softlockup_all_cpu_backtrace,
+ .procname = "hardlockup_panic",
+ .data = &hardlockup_panic,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
+#ifdef CONFIG_SMP
{
.procname = "hardlockup_all_cpu_backtrace",
.data = &sysctl_hardlockup_all_cpu_backtrace,
@@ -936,6 +960,8 @@ static struct ctl_table kern_table[] = {
},
#endif /* CONFIG_SMP */
#endif
+#endif
+
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
{
.procname = "unknown_nmi_panic",
@@ -1950,6 +1976,32 @@ static void warn_sysctl_write(struct ctl_table *table)
}
/**
+ * proc_first_pos_non_zero_ignore - check if firs position is allowed
+ * @ppos: file position
+ * @table: the sysctl table
+ *
+ * Returns true if the first position is non-zero and the sysctl_writes_strict
+ * mode indicates this is not allowed for numeric input types. String proc
+ * hadlers can ignore the return value.
+ */
+static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
+ struct ctl_table *table)
+{
+ if (!*ppos)
+ return false;
+
+ switch (sysctl_writes_strict) {
+ case SYSCTL_WRITES_STRICT:
+ return true;
+ case SYSCTL_WRITES_WARN:
+ warn_sysctl_write(table);
+ return false;
+ default:
+ return false;
+ }
+}
+
+/**
* proc_dostring - read a string sysctl
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
@@ -1969,8 +2021,8 @@ static void warn_sysctl_write(struct ctl_table *table)
int proc_dostring(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- if (write && *ppos && sysctl_writes_strict == SYSCTL_WRITES_WARN)
- warn_sysctl_write(table);
+ if (write)
+ proc_first_pos_non_zero_ignore(ppos, table);
return _proc_do_string((char *)(table->data), table->maxlen, write,
(char __user *)buffer, lenp, ppos);
@@ -2128,19 +2180,18 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
return 0;
}
-static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
- int *valp,
- int write, void *data)
+static int do_proc_douintvec_conv(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data)
{
if (write) {
- if (*negp)
+ if (*lvalp > UINT_MAX)
return -EINVAL;
if (*lvalp > UINT_MAX)
return -EINVAL;
*valp = *lvalp;
} else {
unsigned int val = *valp;
- *negp = false;
*lvalp = (unsigned long)val;
}
return 0;
@@ -2172,17 +2223,8 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
conv = do_proc_dointvec_conv;
if (write) {
- if (*ppos) {
- switch (sysctl_writes_strict) {
- case SYSCTL_WRITES_STRICT:
- goto out;
- case SYSCTL_WRITES_WARN:
- warn_sysctl_write(table);
- break;
- default:
- break;
- }
- }
+ if (proc_first_pos_non_zero_ignore(ppos, table))
+ goto out;
if (left > PAGE_SIZE - 1)
left = PAGE_SIZE - 1;
@@ -2249,6 +2291,146 @@ static int do_proc_dointvec(struct ctl_table *table, int write,
buffer, lenp, ppos, conv, data);
}
+static int do_proc_douintvec_w(unsigned int *tbl_data,
+ struct ctl_table *table,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ int (*conv)(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data),
+ void *data)
+{
+ unsigned long lval;
+ int err = 0;
+ size_t left;
+ bool neg;
+ char *kbuf = NULL, *p;
+
+ left = *lenp;
+
+ if (proc_first_pos_non_zero_ignore(ppos, table))
+ goto bail_early;
+
+ if (left > PAGE_SIZE - 1)
+ left = PAGE_SIZE - 1;
+
+ p = kbuf = memdup_user_nul(buffer, left);
+ if (IS_ERR(kbuf))
+ return -EINVAL;
+
+ left -= proc_skip_spaces(&p);
+ if (!left) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ err = proc_get_long(&p, &left, &lval, &neg,
+ proc_wspace_sep,
+ sizeof(proc_wspace_sep), NULL);
+ if (err || neg) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ if (conv(&lval, tbl_data, 1, data)) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ if (!err && left)
+ left -= proc_skip_spaces(&p);
+
+out_free:
+ kfree(kbuf);
+ if (err)
+ return -EINVAL;
+
+ return 0;
+
+ /* This is in keeping with old __do_proc_dointvec() */
+bail_early:
+ *ppos += *lenp;
+ return err;
+}
+
+static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ int (*conv)(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data),
+ void *data)
+{
+ unsigned long lval;
+ int err = 0;
+ size_t left;
+
+ left = *lenp;
+
+ if (conv(&lval, tbl_data, 0, data)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = proc_put_long(&buffer, &left, lval, false);
+ if (err || !left)
+ goto out;
+
+ err = proc_put_char(&buffer, &left, '\n');
+
+out:
+ *lenp -= left;
+ *ppos += *lenp;
+
+ return err;
+}
+
+static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
+ int write, void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ int (*conv)(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data),
+ void *data)
+{
+ unsigned int *i, vleft;
+
+ if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+
+ i = (unsigned int *) tbl_data;
+ vleft = table->maxlen / sizeof(*i);
+
+ /*
+ * Arrays are not supported, keep this simple. *Do not* add
+ * support for them.
+ */
+ if (vleft != 1) {
+ *lenp = 0;
+ return -EINVAL;
+ }
+
+ if (!conv)
+ conv = do_proc_douintvec_conv;
+
+ if (write)
+ return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
+ conv, data);
+ return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
+}
+
+static int do_proc_douintvec(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos,
+ int (*conv)(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data),
+ void *data)
+{
+ return __do_proc_douintvec(table->data, table, write,
+ buffer, lenp, ppos, conv, data);
+}
+
/**
* proc_dointvec - read a vector of integers
* @table: the sysctl table
@@ -2284,8 +2466,8 @@ int proc_dointvec(struct ctl_table *table, int write,
int proc_douintvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return do_proc_dointvec(table, write, buffer, lenp, ppos,
- do_proc_douintvec_conv, NULL);
+ return do_proc_douintvec(table, write, buffer, lenp, ppos,
+ do_proc_douintvec_conv, NULL);
}
/*
@@ -2390,6 +2572,65 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
do_proc_dointvec_minmax_conv, &param);
}
+struct do_proc_douintvec_minmax_conv_param {
+ unsigned int *min;
+ unsigned int *max;
+};
+
+static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data)
+{
+ struct do_proc_douintvec_minmax_conv_param *param = data;
+
+ if (write) {
+ unsigned int val = *lvalp;
+
+ if ((param->min && *param->min > val) ||
+ (param->max && *param->max < val))
+ return -ERANGE;
+
+ if (*lvalp > UINT_MAX)
+ return -EINVAL;
+ *valp = val;
+ } else {
+ unsigned int val = *valp;
+ *lvalp = (unsigned long) val;
+ }
+
+ return 0;
+}
+
+/**
+ * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
+ * values from/to the user buffer, treated as an ASCII string. Negative
+ * strings are not allowed.
+ *
+ * This routine will ensure the values are within the range specified by
+ * table->extra1 (min) and table->extra2 (max). There is a final sanity
+ * check for UINT_MAX to avoid having to support wrap around uses from
+ * userspace.
+ *
+ * Returns 0 on success.
+ */
+int proc_douintvec_minmax(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct do_proc_douintvec_minmax_conv_param param = {
+ .min = (unsigned int *) table->extra1,
+ .max = (unsigned int *) table->extra2,
+ };
+ return do_proc_douintvec(table, write, buffer, lenp, ppos,
+ do_proc_douintvec_minmax_conv, &param);
+}
+
static void validate_coredump_safety(void)
{
#ifdef CONFIG_COREDUMP
@@ -2447,17 +2688,8 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
left = *lenp;
if (write) {
- if (*ppos) {
- switch (sysctl_writes_strict) {
- case SYSCTL_WRITES_STRICT:
- goto out;
- case SYSCTL_WRITES_WARN:
- warn_sysctl_write(table);
- break;
- default:
- break;
- }
- }
+ if (proc_first_pos_non_zero_ignore(ppos, table))
+ goto out;
if (left > PAGE_SIZE - 1)
left = PAGE_SIZE - 1;
@@ -2898,6 +3130,12 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
return -ENOSYS;
}
+int proc_douintvec_minmax(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+
int proc_dointvec_jiffies(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
@@ -2940,6 +3178,7 @@ EXPORT_SYMBOL(proc_dointvec);
EXPORT_SYMBOL(proc_douintvec);
EXPORT_SYMBOL(proc_dointvec_jiffies);
EXPORT_SYMBOL(proc_dointvec_minmax);
+EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
EXPORT_SYMBOL(proc_dostring);
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 939a158..02e1859 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1346,7 +1346,7 @@ static void deprecated_sysctl_warning(const int *name, int nlen)
* CTL_KERN/KERN_VERSION is used by older glibc and cannot
* ever go away.
*/
- if (name[0] == CTL_KERN && name[1] == KERN_VERSION)
+ if (nlen >= 2 && name[0] == CTL_KERN && name[1] == KERN_VERSION)
return;
if (printk_ratelimit()) {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2953d55..02004ae9 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -113,7 +113,7 @@ static int ftrace_disabled __read_mostly;
static DEFINE_MUTEX(ftrace_lock);
-static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
+static struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end;
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
static struct ftrace_ops global_ops;
@@ -169,8 +169,11 @@ int ftrace_nr_registered_ops(void)
mutex_lock(&ftrace_lock);
- for (ops = ftrace_ops_list;
- ops != &ftrace_list_end; ops = ops->next)
+ for (ops = rcu_dereference_protected(ftrace_ops_list,
+ lockdep_is_held(&ftrace_lock));
+ ops != &ftrace_list_end;
+ ops = rcu_dereference_protected(ops->next,
+ lockdep_is_held(&ftrace_lock)))
cnt++;
mutex_unlock(&ftrace_lock);
@@ -275,10 +278,11 @@ static void update_ftrace_function(void)
* If there's only one ftrace_ops registered, the ftrace_ops_list
* will point to the ops we want.
*/
- set_function_trace_op = ftrace_ops_list;
+ set_function_trace_op = rcu_dereference_protected(ftrace_ops_list,
+ lockdep_is_held(&ftrace_lock));
/* If there's no ftrace_ops registered, just call the stub function */
- if (ftrace_ops_list == &ftrace_list_end) {
+ if (set_function_trace_op == &ftrace_list_end) {
func = ftrace_stub;
/*
@@ -286,7 +290,8 @@ static void update_ftrace_function(void)
* recursion safe and not dynamic and the arch supports passing ops,
* then have the mcount trampoline call the function directly.
*/
- } else if (ftrace_ops_list->next == &ftrace_list_end) {
+ } else if (rcu_dereference_protected(ftrace_ops_list->next,
+ lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) {
func = ftrace_ops_get_list_func(ftrace_ops_list);
} else {
@@ -348,9 +353,11 @@ int using_ftrace_ops_list_func(void)
return ftrace_trace_function == ftrace_ops_list_func;
}
-static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
+static void add_ftrace_ops(struct ftrace_ops __rcu **list,
+ struct ftrace_ops *ops)
{
- ops->next = *list;
+ rcu_assign_pointer(ops->next, *list);
+
/*
* We are entering ops into the list but another
* CPU might be walking that list. We need to make sure
@@ -360,7 +367,8 @@ static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
rcu_assign_pointer(*list, ops);
}
-static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
+static int remove_ftrace_ops(struct ftrace_ops __rcu **list,
+ struct ftrace_ops *ops)
{
struct ftrace_ops **p;
@@ -368,7 +376,10 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
* If we are removing the last function, then simply point
* to the ftrace_stub.
*/
- if (*list == ops && ops->next == &ftrace_list_end) {
+ if (rcu_dereference_protected(*list,
+ lockdep_is_held(&ftrace_lock)) == ops &&
+ rcu_dereference_protected(ops->next,
+ lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) {
*list = &ftrace_list_end;
return 0;
}
@@ -1569,8 +1580,8 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
return 0;
#endif
- hash.filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash);
- hash.notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash);
+ rcu_assign_pointer(hash.filter_hash, ops->func_hash->filter_hash);
+ rcu_assign_pointer(hash.notrace_hash, ops->func_hash->notrace_hash);
if (hash_contains_ip(ip, &hash))
ret = 1;
@@ -2840,7 +2851,8 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
* If there's no more ops registered with ftrace, run a
* sanity check to make sure all rec flags are cleared.
*/
- if (ftrace_ops_list == &ftrace_list_end) {
+ if (rcu_dereference_protected(ftrace_ops_list,
+ lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) {
struct ftrace_page *pg;
struct dyn_ftrace *rec;
@@ -3816,7 +3828,7 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod)
int exclude_mod = 0;
int found = 0;
int ret;
- int clear_filter;
+ int clear_filter = 0;
if (func) {
func_g.type = filter_parse_regex(func, len, &func_g.search,
@@ -3950,7 +3962,7 @@ static int cache_mod(struct trace_array *tr,
continue;
/* no func matches all */
- if (!func || strcmp(func, "*") == 0 ||
+ if (strcmp(func, "*") == 0 ||
(ftrace_mod->func &&
strcmp(ftrace_mod->func, func) == 0)) {
ret = 0;
@@ -3978,6 +3990,7 @@ static int
ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
int reset, int enable);
+#ifdef CONFIG_MODULES
static void process_mod_list(struct list_head *head, struct ftrace_ops *ops,
char *mod, bool enable)
{
@@ -4068,6 +4081,7 @@ static void process_cached_mods(const char *mod_name)
kfree(mod);
}
+#endif
/*
* We register the module command as a template to show others how
@@ -6451,7 +6465,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
if (ftrace_enabled) {
/* we are starting ftrace again */
- if (ftrace_ops_list != &ftrace_list_end)
+ if (rcu_dereference_protected(ftrace_ops_list,
+ lockdep_is_held(&ftrace_lock)) != &ftrace_list_end)
update_ftrace_function();
ftrace_startup_sysctl();
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 4ae268e..529cc50 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1136,12 +1136,12 @@ static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
for (i = 0; i < nr_pages; i++) {
struct page *page;
/*
- * __GFP_NORETRY flag makes sure that the allocation fails
- * gracefully without invoking oom-killer and the system is
- * not destabilized.
+ * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
+ * gracefully without invoking oom-killer and the system is not
+ * destabilized.
*/
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
- GFP_KERNEL | __GFP_NORETRY,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL,
cpu_to_node(cpu));
if (!bpage)
goto free_pages;
@@ -1149,7 +1149,7 @@ static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
list_add(&bpage->list, pages);
page = alloc_pages_node(cpu_to_node(cpu),
- GFP_KERNEL | __GFP_NORETRY, 0);
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL, 0);
if (!page)
goto free_pages;
bpage->page = page_address(page);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 948ec32..42b9355 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1916,7 +1916,11 @@ static int trace_save_cmdline(struct task_struct *tsk)
{
unsigned pid, idx;
- if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
+ /* treat recording of idle task as a success */
+ if (!tsk->pid)
+ return 1;
+
+ if (unlikely(tsk->pid > PID_MAX_DEFAULT))
return 0;
/*
@@ -2002,7 +2006,11 @@ int trace_find_tgid(int pid)
static int trace_save_tgid(struct task_struct *tsk)
{
- if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT))
+ /* treat recording of idle task as a success */
+ if (!tsk->pid)
+ return 1;
+
+ if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
return 0;
tgid_map[tsk->pid] = tsk->tgid;
@@ -2029,11 +2037,20 @@ static bool tracing_record_taskinfo_skip(int flags)
*/
void tracing_record_taskinfo(struct task_struct *task, int flags)
{
+ bool done;
+
if (tracing_record_taskinfo_skip(flags))
return;
- if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task))
- return;
- if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task))
+
+ /*
+ * Record as much task information as possible. If some fail, continue
+ * to try to record the others.
+ */
+ done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
+ done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
+
+ /* If recording any information failed, retry again soon. */
+ if (!done)
return;
__this_cpu_write(trace_taskinfo_save, false);
@@ -2050,15 +2067,22 @@ void tracing_record_taskinfo(struct task_struct *task, int flags)
void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
struct task_struct *next, int flags)
{
+ bool done;
+
if (tracing_record_taskinfo_skip(flags))
return;
- if ((flags & TRACE_RECORD_CMDLINE) &&
- (!trace_save_cmdline(prev) || !trace_save_cmdline(next)))
- return;
+ /*
+ * Record as much task information as possible. If some fail, continue
+ * to try to record the others.
+ */
+ done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
+ done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
+ done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
+ done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
- if ((flags & TRACE_RECORD_TGID) &&
- (!trace_save_tgid(prev) || !trace_save_tgid(next)))
+ /* If recording any information failed, retry again soon. */
+ if (!done)
return;
__this_cpu_write(trace_taskinfo_save, false);
@@ -3334,14 +3358,23 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file
unsigned int flags)
{
bool tgid = flags & TRACE_ITER_RECORD_TGID;
-
- seq_printf(m, "# %s _-----=> irqs-off\n", tgid ? " " : "");
- seq_printf(m, "# %s / _----=> need-resched\n", tgid ? " " : "");
- seq_printf(m, "# %s| / _---=> hardirq/softirq\n", tgid ? " " : "");
- seq_printf(m, "# %s|| / _--=> preempt-depth\n", tgid ? " " : "");
- seq_printf(m, "# %s||| / delay\n", tgid ? " " : "");
- seq_printf(m, "# TASK-PID CPU#%s|||| TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
- seq_printf(m, "# | | | %s|||| | |\n", tgid ? " | " : "");
+ const char tgid_space[] = " ";
+ const char space[] = " ";
+
+ seq_printf(m, "# %s _-----=> irqs-off\n",
+ tgid ? tgid_space : space);
+ seq_printf(m, "# %s / _----=> need-resched\n",
+ tgid ? tgid_space : space);
+ seq_printf(m, "# %s| / _---=> hardirq/softirq\n",
+ tgid ? tgid_space : space);
+ seq_printf(m, "# %s|| / _--=> preempt-depth\n",
+ tgid ? tgid_space : space);
+ seq_printf(m, "# %s||| / delay\n",
+ tgid ? tgid_space : space);
+ seq_printf(m, "# TASK-PID CPU#%s|||| TIMESTAMP FUNCTION\n",
+ tgid ? " TGID " : space);
+ seq_printf(m, "# | | | %s|||| | |\n",
+ tgid ? " | " : space);
}
void
@@ -4689,6 +4722,76 @@ static const struct file_operations tracing_readme_fops = {
.llseek = generic_file_llseek,
};
+static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ int *ptr = v;
+
+ if (*pos || m->count)
+ ptr++;
+
+ (*pos)++;
+
+ for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
+ if (trace_find_tgid(*ptr))
+ return ptr;
+ }
+
+ return NULL;
+}
+
+static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
+{
+ void *v;
+ loff_t l = 0;
+
+ if (!tgid_map)
+ return NULL;
+
+ v = &tgid_map[0];
+ while (l <= *pos) {
+ v = saved_tgids_next(m, v, &l);
+ if (!v)
+ return NULL;
+ }
+
+ return v;
+}
+
+static void saved_tgids_stop(struct seq_file *m, void *v)
+{
+}
+
+static int saved_tgids_show(struct seq_file *m, void *v)
+{
+ int pid = (int *)v - tgid_map;
+
+ seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
+ return 0;
+}
+
+static const struct seq_operations tracing_saved_tgids_seq_ops = {
+ .start = saved_tgids_start,
+ .stop = saved_tgids_stop,
+ .next = saved_tgids_next,
+ .show = saved_tgids_show,
+};
+
+static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
+{
+ if (tracing_disabled)
+ return -ENODEV;
+
+ return seq_open(filp, &tracing_saved_tgids_seq_ops);
+}
+
+
+static const struct file_operations tracing_saved_tgids_fops = {
+ .open = tracing_saved_tgids_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
{
unsigned int *ptr = v;
@@ -7671,6 +7774,7 @@ static int instance_rmdir(const char *name)
}
kfree(tr->topts);
+ free_cpumask_var(tr->tracing_cpumask);
kfree(tr->name);
kfree(tr);
@@ -7921,6 +8025,9 @@ static __init int tracer_init_tracefs(void)
trace_create_file("saved_cmdlines_size", 0644, d_tracer,
NULL, &tracing_saved_cmdlines_size_fops);
+ trace_create_file("saved_tgids", 0444, d_tracer,
+ NULL, &tracing_saved_tgids_fops);
+
trace_eval_init();
trace_create_eval_file(d_tracer);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6ade1c5..490ba22 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1210,9 +1210,9 @@ struct ftrace_event_field {
struct event_filter {
int n_preds; /* Number assigned */
int a_preds; /* allocated */
- struct filter_pred *preds;
- struct filter_pred *root;
- char *filter_string;
+ struct filter_pred __rcu *preds;
+ struct filter_pred __rcu *root;
+ char *filter_string;
};
struct event_subsystem {
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b53c8d3..c9b5aa1 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -598,6 +598,14 @@ static struct notifier_block trace_kprobe_module_nb = {
.priority = 1 /* Invoked after kprobe module callback */
};
+/* Convert certain expected symbols into '_' when generating event names */
+static inline void sanitize_event_name(char *name)
+{
+ while (*name++ != '\0')
+ if (*name == ':' || *name == '.')
+ *name = '_';
+}
+
static int create_trace_kprobe(int argc, char **argv)
{
/*
@@ -720,7 +728,7 @@ static int create_trace_kprobe(int argc, char **argv)
return ret;
}
if (offset && is_return &&
- !function_offset_within_entry(NULL, symbol, offset)) {
+ !kprobe_on_func_entry(NULL, symbol, offset)) {
pr_info("Given offset is not valid for return probe.\n");
return -EINVAL;
}
@@ -736,6 +744,7 @@ static int create_trace_kprobe(int argc, char **argv)
else
snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
is_return ? 'r' : 'p', addr);
+ sanitize_event_name(buf);
event = buf;
}
tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index b4a751e..a4df67c 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -406,6 +406,8 @@ static const struct file_operations stack_trace_fops = {
.release = seq_release,
};
+#ifdef CONFIG_DYNAMIC_FTRACE
+
static int
stack_trace_filter_open(struct inode *inode, struct file *file)
{
@@ -423,6 +425,8 @@ static const struct file_operations stack_trace_filter_fops = {
.release = ftrace_regex_release,
};
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
int
stack_trace_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
@@ -477,8 +481,10 @@ static __init int stack_trace_init(void)
trace_create_file("stack_trace", 0444, d_tracer,
NULL, &stack_trace_fops);
+#ifdef CONFIG_DYNAMIC_FTRACE
trace_create_file("stack_trace_filter", 0444, d_tracer,
&trace_ops, &stack_trace_filter_fops);
+#endif
if (stack_trace_filter_buf[0])
ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 03e0b69..06d3389 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -9,7 +9,7 @@
* to those contributors as well.
*/
-#define pr_fmt(fmt) "NMI watchdog: " fmt
+#define pr_fmt(fmt) "watchdog: " fmt
#include <linux/mm.h>
#include <linux/cpu.h>
@@ -29,15 +29,58 @@
#include <linux/kvm_para.h>
#include <linux/kthread.h>
+/* Watchdog configuration */
static DEFINE_MUTEX(watchdog_proc_mutex);
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
-unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
+int __read_mostly nmi_watchdog_enabled;
+
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED |
+ NMI_WATCHDOG_ENABLED;
#else
unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
#endif
-int __read_mostly nmi_watchdog_enabled;
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+/* boot commands */
+/*
+ * Should we panic when a soft-lockup or hard-lockup occurs:
+ */
+unsigned int __read_mostly hardlockup_panic =
+ CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+/*
+ * We may not want to enable hard lockup detection by default in all cases,
+ * for example when running the kernel as a guest on a hypervisor. In these
+ * cases this function can be called to disable hard lockup detection. This
+ * function should only be executed once by the boot processor before the
+ * kernel command line parameters are parsed, because otherwise it is not
+ * possible to override this in hardlockup_panic_setup().
+ */
+void hardlockup_detector_disable(void)
+{
+ watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+}
+
+static int __init hardlockup_panic_setup(char *str)
+{
+ if (!strncmp(str, "panic", 5))
+ hardlockup_panic = 1;
+ else if (!strncmp(str, "nopanic", 7))
+ hardlockup_panic = 0;
+ else if (!strncmp(str, "0", 1))
+ watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+ else if (!strncmp(str, "1", 1))
+ watchdog_enabled |= NMI_WATCHDOG_ENABLED;
+ return 1;
+}
+__setup("nmi_watchdog=", hardlockup_panic_setup);
+
+#endif
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
int __read_mostly soft_watchdog_enabled;
+#endif
+
int __read_mostly watchdog_user_enabled;
int __read_mostly watchdog_thresh = 10;
@@ -45,15 +88,9 @@ int __read_mostly watchdog_thresh = 10;
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
#endif
-static struct cpumask watchdog_cpumask __read_mostly;
+struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
-/* Helper for online, unparked cpus. */
-#define for_each_watchdog_cpu(cpu) \
- for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
-
-atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
-
/*
* The 'watchdog_running' variable is set to 1 when the watchdog threads
* are registered/started and is set to 0 when the watchdog threads are
@@ -72,7 +109,47 @@ static int __read_mostly watchdog_running;
* of 'watchdog_running' cannot change while the watchdog is deactivated
* temporarily (see related code in 'proc' handlers).
*/
-static int __read_mostly watchdog_suspended;
+int __read_mostly watchdog_suspended;
+
+/*
+ * These functions can be overridden if an architecture implements its
+ * own hardlockup detector.
+ *
+ * watchdog_nmi_enable/disable can be implemented to start and stop when
+ * softlockup watchdog threads start and stop. The arch must select the
+ * SOFTLOCKUP_DETECTOR Kconfig.
+ */
+int __weak watchdog_nmi_enable(unsigned int cpu)
+{
+ return 0;
+}
+void __weak watchdog_nmi_disable(unsigned int cpu)
+{
+}
+
+/*
+ * watchdog_nmi_reconfigure can be implemented to be notified after any
+ * watchdog configuration change. The arch hardlockup watchdog should
+ * respond to the following variables:
+ * - nmi_watchdog_enabled
+ * - watchdog_thresh
+ * - watchdog_cpumask
+ * - sysctl_hardlockup_all_cpu_backtrace
+ * - hardlockup_panic
+ * - watchdog_suspended
+ */
+void __weak watchdog_nmi_reconfigure(void)
+{
+}
+
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+
+/* Helper for online, unparked cpus. */
+#define for_each_watchdog_cpu(cpu) \
+ for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
+
+atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
static u64 __read_mostly sample_period;
@@ -120,6 +197,7 @@ static int __init softlockup_all_cpu_backtrace_setup(char *str)
return 1;
}
__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
static int __init hardlockup_all_cpu_backtrace_setup(char *str)
{
sysctl_hardlockup_all_cpu_backtrace =
@@ -128,6 +206,7 @@ static int __init hardlockup_all_cpu_backtrace_setup(char *str)
}
__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
#endif
+#endif
/*
* Hard-lockup warnings should be triggered after just a few seconds. Soft-
@@ -213,18 +292,6 @@ void touch_softlockup_watchdog_sync(void)
__this_cpu_write(watchdog_touch_ts, 0);
}
-/* watchdog detector functions */
-bool is_hardlockup(void)
-{
- unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
-
- if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
- return true;
-
- __this_cpu_write(hrtimer_interrupts_saved, hrint);
- return false;
-}
-
static int is_softlockup(unsigned long touch_ts)
{
unsigned long now = get_timestamp();
@@ -237,21 +304,21 @@ static int is_softlockup(unsigned long touch_ts)
return 0;
}
-static void watchdog_interrupt_count(void)
+/* watchdog detector functions */
+bool is_hardlockup(void)
{
- __this_cpu_inc(hrtimer_interrupts);
-}
+ unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
-/*
- * These two functions are mostly architecture specific
- * defining them as weak here.
- */
-int __weak watchdog_nmi_enable(unsigned int cpu)
-{
- return 0;
+ if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
+ return true;
+
+ __this_cpu_write(hrtimer_interrupts_saved, hrint);
+ return false;
}
-void __weak watchdog_nmi_disable(unsigned int cpu)
+
+static void watchdog_interrupt_count(void)
{
+ __this_cpu_inc(hrtimer_interrupts);
}
static int watchdog_enable_all_cpus(void);
@@ -502,57 +569,6 @@ static void watchdog_unpark_threads(void)
kthread_unpark(per_cpu(softlockup_watchdog, cpu));
}
-/*
- * Suspend the hard and soft lockup detector by parking the watchdog threads.
- */
-int lockup_detector_suspend(void)
-{
- int ret = 0;
-
- get_online_cpus();
- mutex_lock(&watchdog_proc_mutex);
- /*
- * Multiple suspend requests can be active in parallel (counted by
- * the 'watchdog_suspended' variable). If the watchdog threads are
- * running, the first caller takes care that they will be parked.
- * The state of 'watchdog_running' cannot change while a suspend
- * request is active (see related code in 'proc' handlers).
- */
- if (watchdog_running && !watchdog_suspended)
- ret = watchdog_park_threads();
-
- if (ret == 0)
- watchdog_suspended++;
- else {
- watchdog_disable_all_cpus();
- pr_err("Failed to suspend lockup detectors, disabled\n");
- watchdog_enabled = 0;
- }
-
- mutex_unlock(&watchdog_proc_mutex);
-
- return ret;
-}
-
-/*
- * Resume the hard and soft lockup detector by unparking the watchdog threads.
- */
-void lockup_detector_resume(void)
-{
- mutex_lock(&watchdog_proc_mutex);
-
- watchdog_suspended--;
- /*
- * The watchdog threads are unparked if they were previously running
- * and if there is no more active suspend request.
- */
- if (watchdog_running && !watchdog_suspended)
- watchdog_unpark_threads();
-
- mutex_unlock(&watchdog_proc_mutex);
- put_online_cpus();
-}
-
static int update_watchdog_all_cpus(void)
{
int ret;
@@ -605,6 +621,100 @@ static void watchdog_disable_all_cpus(void)
}
#ifdef CONFIG_SYSCTL
+static int watchdog_update_cpus(void)
+{
+ return smpboot_update_cpumask_percpu_thread(
+ &watchdog_threads, &watchdog_cpumask);
+}
+#endif
+
+#else /* SOFTLOCKUP */
+static int watchdog_park_threads(void)
+{
+ return 0;
+}
+
+static void watchdog_unpark_threads(void)
+{
+}
+
+static int watchdog_enable_all_cpus(void)
+{
+ return 0;
+}
+
+static void watchdog_disable_all_cpus(void)
+{
+}
+
+#ifdef CONFIG_SYSCTL
+static int watchdog_update_cpus(void)
+{
+ return 0;
+}
+#endif
+
+static void set_sample_period(void)
+{
+}
+#endif /* SOFTLOCKUP */
+
+/*
+ * Suspend the hard and soft lockup detector by parking the watchdog threads.
+ */
+int lockup_detector_suspend(void)
+{
+ int ret = 0;
+
+ get_online_cpus();
+ mutex_lock(&watchdog_proc_mutex);
+ /*
+ * Multiple suspend requests can be active in parallel (counted by
+ * the 'watchdog_suspended' variable). If the watchdog threads are
+ * running, the first caller takes care that they will be parked.
+ * The state of 'watchdog_running' cannot change while a suspend
+ * request is active (see related code in 'proc' handlers).
+ */
+ if (watchdog_running && !watchdog_suspended)
+ ret = watchdog_park_threads();
+
+ if (ret == 0)
+ watchdog_suspended++;
+ else {
+ watchdog_disable_all_cpus();
+ pr_err("Failed to suspend lockup detectors, disabled\n");
+ watchdog_enabled = 0;
+ }
+
+ watchdog_nmi_reconfigure();
+
+ mutex_unlock(&watchdog_proc_mutex);
+
+ return ret;
+}
+
+/*
+ * Resume the hard and soft lockup detector by unparking the watchdog threads.
+ */
+void lockup_detector_resume(void)
+{
+ mutex_lock(&watchdog_proc_mutex);
+
+ watchdog_suspended--;
+ /*
+ * The watchdog threads are unparked if they were previously running
+ * and if there is no more active suspend request.
+ */
+ if (watchdog_running && !watchdog_suspended)
+ watchdog_unpark_threads();
+
+ watchdog_nmi_reconfigure();
+
+ mutex_unlock(&watchdog_proc_mutex);
+ put_online_cpus();
+}
+
+#ifdef CONFIG_SYSCTL
/*
* Update the run state of the lockup detectors.
@@ -625,6 +735,8 @@ static int proc_watchdog_update(void)
else
watchdog_disable_all_cpus();
+ watchdog_nmi_reconfigure();
+
return err;
}
@@ -810,10 +922,11 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
* a temporary cpumask, so we are likely not in a
* position to do much else to make things better.
*/
- if (smpboot_update_cpumask_percpu_thread(
- &watchdog_threads, &watchdog_cpumask) != 0)
+ if (watchdog_update_cpus() != 0)
pr_err("cpumask update failed\n");
}
+
+ watchdog_nmi_reconfigure();
}
out:
mutex_unlock(&watchdog_proc_mutex);
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 54a427d..295a0d8 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -22,41 +22,9 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
-/* boot commands */
-/*
- * Should we panic when a soft-lockup or hard-lockup occurs:
- */
-unsigned int __read_mostly hardlockup_panic =
- CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
static unsigned long hardlockup_allcpu_dumped;
-/*
- * We may not want to enable hard lockup detection by default in all cases,
- * for example when running the kernel as a guest on a hypervisor. In these
- * cases this function can be called to disable hard lockup detection. This
- * function should only be executed once by the boot processor before the
- * kernel command line parameters are parsed, because otherwise it is not
- * possible to override this in hardlockup_panic_setup().
- */
-void hardlockup_detector_disable(void)
-{
- watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-}
-
-static int __init hardlockup_panic_setup(char *str)
-{
- if (!strncmp(str, "panic", 5))
- hardlockup_panic = 1;
- else if (!strncmp(str, "nopanic", 7))
- hardlockup_panic = 0;
- else if (!strncmp(str, "0", 1))
- watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
- else if (!strncmp(str, "1", 1))
- watchdog_enabled |= NMI_WATCHDOG_ENABLED;
- return 1;
-}
-__setup("nmi_watchdog=", hardlockup_panic_setup);
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
{
/*
* Using __raw here because some code paths have
@@ -66,9 +34,8 @@ void touch_nmi_watchdog(void)
* going off.
*/
raw_cpu_write(watchdog_nmi_touch, true);
- touch_softlockup_watchdog();
}
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
static struct perf_event_attr wd_hw_attr = {
.type = PERF_TYPE_HARDWARE,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a86688f..ca937b0 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3577,6 +3577,13 @@ static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
/* yeap, return possible CPUs in @node that @attrs wants */
cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
+
+ if (cpumask_empty(cpumask)) {
+ pr_warn_once("WARNING: workqueue cpumask: online intersect > "
+ "possible intersect\n");
+ return false;
+ }
+
return !cpumask_equal(cpumask, attrs->cpumask);
use_dfl:
@@ -3744,8 +3751,12 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
return -EINVAL;
/* creating multiple pwqs breaks ordering guarantee */
- if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
- return -EINVAL;
+ if (!list_empty(&wq->pwqs)) {
+ if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
+ return -EINVAL;
+
+ wq->flags &= ~__WQ_ORDERED;
+ }
ctx = apply_wqattrs_prepare(wq, attrs);
if (!ctx)
@@ -3929,6 +3940,16 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
struct workqueue_struct *wq;
struct pool_workqueue *pwq;
+ /*
+ * Unbound && max_active == 1 used to imply ordered, which is no
+ * longer the case on NUMA machines due to per-node pools. While
+ * alloc_ordered_workqueue() is the right way to create an ordered
+ * workqueue, keep the previous behavior to avoid subtle breakages
+ * on NUMA.
+ */
+ if ((flags & WQ_UNBOUND) && max_active == 1)
+ flags |= __WQ_ORDERED;
+
/* see the comment above the definition of WQ_POWER_EFFICIENT */
if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
flags |= WQ_UNBOUND;
@@ -4119,13 +4140,14 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
struct pool_workqueue *pwq;
/* disallow meddling with max_active for ordered workqueues */
- if (WARN_ON(wq->flags & __WQ_ORDERED))
+ if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
return;
max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
mutex_lock(&wq->mutex);
+ wq->flags &= ~__WQ_ORDERED;
wq->saved_max_active = max_active;
for_each_pwq(pwq, wq)
@@ -5253,7 +5275,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
* attributes breaks ordering guarantee. Disallow exposing ordered
* workqueues.
*/
- if (WARN_ON(wq->flags & __WQ_ORDERED))
+ if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
return -EINVAL;
wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
OpenPOWER on IntegriCloud