summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c9
-rw-r--r--kernel/debug/debug_core.c2
-rw-r--r--kernel/debug/kdb/kdb_main.c2
-rw-r--r--kernel/debug/kdb/kdb_private.h7
-rw-r--r--kernel/debug/kdb/kdb_support.c4
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/fork.c17
-rw-r--r--kernel/kfifo.c9
-rw-r--r--kernel/kmod.c4
-rw-r--r--kernel/pm_qos_params.c12
-rw-r--r--kernel/power/poweroff.c2
-rw-r--r--kernel/sched.c10
-rw-r--r--kernel/sched_fair.c2
-rw-r--r--kernel/trace/ring_buffer.c3
-rw-r--r--kernel/trace/trace.c11
-rw-r--r--kernel/trace/trace_events.c207
-rw-r--r--kernel/trace/trace_functions_graph.c10
-rw-r--r--kernel/trace/trace_stack.c2
-rw-r--r--kernel/watchdog.c3
-rw-r--r--kernel/workqueue.c62
20 files changed, 273 insertions, 110 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 192f88c..ed19afd 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1791,10 +1791,11 @@ out:
}
/**
- * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup
+ * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
+ * @from: attach to all cgroups of a given task
* @tsk: the task to be attached
*/
-int cgroup_attach_task_current_cg(struct task_struct *tsk)
+int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
{
struct cgroupfs_root *root;
struct cgroup *cur_cg;
@@ -1802,7 +1803,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk)
cgroup_lock();
for_each_active_root(root) {
- cur_cg = task_cgroup_from_root(current, root);
+ cur_cg = task_cgroup_from_root(from, root);
retval = cgroup_attach_task(cur_cg, tsk);
if (retval)
break;
@@ -1811,7 +1812,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk)
return retval;
}
-EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg);
+EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
/*
* Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 3c2d497..de407c7 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -741,7 +741,7 @@ static struct console kgdbcons = {
};
#ifdef CONFIG_MAGIC_SYSRQ
-static void sysrq_handle_dbg(int key, struct tty_struct *tty)
+static void sysrq_handle_dbg(int key)
{
if (!dbg_io_ops) {
printk(KERN_CRIT "ERROR: No KGDB I/O module available\n");
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 28b8441..caf057a 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1929,7 +1929,7 @@ static int kdb_sr(int argc, const char **argv)
if (argc != 1)
return KDB_ARGCOUNT;
kdb_trap_printk++;
- __handle_sysrq(*argv[1], NULL, 0);
+ __handle_sysrq(*argv[1], false);
kdb_trap_printk--;
return 0;
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index c438f54..be775f7 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -255,7 +255,14 @@ extern void kdb_ps1(const struct task_struct *p);
extern void kdb_print_nameval(const char *name, unsigned long val);
extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info);
extern void kdb_meminfo_proc_show(void);
+#ifdef CONFIG_KALLSYMS
extern const char *kdb_walk_kallsyms(loff_t *pos);
+#else /* ! CONFIG_KALLSYMS */
+static inline const char *kdb_walk_kallsyms(loff_t *pos)
+{
+ return NULL;
+}
+#endif /* ! CONFIG_KALLSYMS */
extern char *kdb_getstr(char *, size_t, char *);
/* Defines for kdb_symbol_print */
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
index 45344d5c..6b2485d 100644
--- a/kernel/debug/kdb/kdb_support.c
+++ b/kernel/debug/kdb/kdb_support.c
@@ -82,8 +82,8 @@ static char *kdb_name_table[100]; /* arbitrary size */
int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
{
int ret = 0;
- unsigned long symbolsize;
- unsigned long offset;
+ unsigned long symbolsize = 0;
+ unsigned long offset = 0;
#define knt1_size 128 /* must be >= kallsyms table size */
char *knt1 = NULL;
diff --git a/kernel/exit.c b/kernel/exit.c
index 671ed56..0312022 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1386,8 +1386,7 @@ static int wait_task_stopped(struct wait_opts *wo,
if (!unlikely(wo->wo_flags & WNOWAIT))
*p_code = 0;
- /* don't need the RCU readlock here as we're holding a spinlock */
- uid = __task_cred(p)->uid;
+ uid = task_uid(p);
unlock_sig:
spin_unlock_irq(&p->sighand->siglock);
if (!exit_code)
@@ -1460,7 +1459,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
}
if (!unlikely(wo->wo_flags & WNOWAIT))
p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
- uid = __task_cred(p)->uid;
+ uid = task_uid(p);
spin_unlock_irq(&p->sighand->siglock);
pid = task_pid_vnr(p);
diff --git a/kernel/fork.c b/kernel/fork.c
index 98b4508..b7e9d60 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -300,7 +300,7 @@ out:
#ifdef CONFIG_MMU
static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
{
- struct vm_area_struct *mpnt, *tmp, **pprev;
+ struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
struct rb_node **rb_link, *rb_parent;
int retval;
unsigned long charge;
@@ -328,6 +328,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
if (retval)
goto out;
+ prev = NULL;
for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
struct file *file;
@@ -359,7 +360,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
goto fail_nomem_anon_vma_fork;
tmp->vm_flags &= ~VM_LOCKED;
tmp->vm_mm = mm;
- tmp->vm_next = NULL;
+ tmp->vm_next = tmp->vm_prev = NULL;
file = tmp->vm_file;
if (file) {
struct inode *inode = file->f_path.dentry->d_inode;
@@ -392,6 +393,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
*/
*pprev = tmp;
pprev = &tmp->vm_next;
+ tmp->vm_prev = prev;
+ prev = tmp;
__vma_link_rb(mm, tmp, rb_link, rb_parent);
rb_link = &tmp->vm_rb.rb_right;
@@ -752,13 +755,13 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
struct fs_struct *fs = current->fs;
if (clone_flags & CLONE_FS) {
/* tsk->fs is already what we want */
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
if (fs->in_exec) {
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
return -EAGAIN;
}
fs->users++;
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
return 0;
}
tsk->fs = copy_fs_struct(fs);
@@ -1676,13 +1679,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
if (new_fs) {
fs = current->fs;
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
current->fs = new_fs;
if (--fs->users)
new_fs = NULL;
else
new_fs = fs;
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
}
if (new_mm) {
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 4502604..6b5580c 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -503,6 +503,15 @@ unsigned int __kfifo_out_r(struct __kfifo *fifo, void *buf,
}
EXPORT_SYMBOL(__kfifo_out_r);
+void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize)
+{
+ unsigned int n;
+
+ n = __kfifo_peek_n(fifo, recsize);
+ fifo->out += n + recsize;
+}
+EXPORT_SYMBOL(__kfifo_skip_r);
+
int __kfifo_from_user_r(struct __kfifo *fifo, const void __user *from,
unsigned long len, unsigned int *copied, size_t recsize)
{
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 6e9b196..9cd0591 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -153,7 +153,9 @@ static int ____call_usermodehelper(void *data)
goto fail;
}
- retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp);
+ retval = kernel_execve(sub_info->path,
+ (const char *const *)sub_info->argv,
+ (const char *const *)sub_info->envp);
/* Exec failed? */
fail:
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index 996a4de..b7e4c36 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -212,15 +212,17 @@ EXPORT_SYMBOL_GPL(pm_qos_request_active);
/**
* pm_qos_add_request - inserts new qos request into the list
- * @pm_qos_class: identifies which list of qos request to us
+ * @dep: pointer to a preallocated handle
+ * @pm_qos_class: identifies which list of qos request to use
* @value: defines the qos request
*
* This function inserts a new entry in the pm_qos_class list of requested qos
* performance characteristics. It recomputes the aggregate QoS expectations
- * for the pm_qos_class of parameters, and returns the pm_qos_request list
- * element as a handle for use in updating and removal. Call needs to save
- * this handle for later use.
+ * for the pm_qos_class of parameters and initializes the pm_qos_request_list
+ * handle. Caller needs to save this handle for later use in updates and
+ * removal.
*/
+
void pm_qos_add_request(struct pm_qos_request_list *dep,
int pm_qos_class, s32 value)
{
@@ -348,7 +350,7 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp)
pm_qos_class = find_pm_qos_object_by_minor(iminor(inode));
if (pm_qos_class >= 0) {
- struct pm_qos_request_list *req = kzalloc(GFP_KERNEL, sizeof(*req));
+ struct pm_qos_request_list *req = kzalloc(sizeof(*req), GFP_KERNEL);
if (!req)
return -ENOMEM;
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index e8b3370..d523593 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -24,7 +24,7 @@ static void do_poweroff(struct work_struct *dummy)
static DECLARE_WORK(poweroff_work, do_poweroff);
-static void handle_poweroff(int key, struct tty_struct *tty)
+static void handle_poweroff(int key)
{
/* run sysrq poweroff on boot cpu */
schedule_work_on(cpumask_first(cpu_online_mask), &poweroff_work);
diff --git a/kernel/sched.c b/kernel/sched.c
index 41541d7..09b574e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3865,8 +3865,16 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
/*
* Owner changed, break to re-assess state.
*/
- if (lock->owner != owner)
+ if (lock->owner != owner) {
+ /*
+ * If the lock has switched to a different owner,
+ * we likely have heavy contention. Return 0 to quit
+ * optimistic spinning and not contend further:
+ */
+ if (lock->owner)
+ return 0;
break;
+ }
/*
* Is that owner really running on that cpu?
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 806d1b2..ab661eb 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -3752,6 +3752,8 @@ static void task_fork_fair(struct task_struct *p)
raw_spin_lock_irqsave(&rq->lock, flags);
+ update_rq_clock(rq);
+
if (unlikely(task_cpu(p) != this_cpu))
__set_task_cpu(p, this_cpu);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3632ce8..19cccc3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3846,6 +3846,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
rpos = reader->read;
pos += size;
+ if (rpos >= commit)
+ break;
+
event = rb_reader_event(cpu_buffer);
size = rb_event_length(event);
} while (len > size);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ba14a22..9ec59f5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3463,6 +3463,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
{
char *buf;
+ size_t written;
if (tracing_disabled)
return -EINVAL;
@@ -3484,11 +3485,15 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
} else
buf[cnt] = '\0';
- cnt = mark_printk("%s", buf);
+ written = mark_printk("%s", buf);
kfree(buf);
- *fpos += cnt;
+ *fpos += written;
- return cnt;
+ /* don't tell userspace we wrote more - it might confuse them */
+ if (written > cnt)
+ written = cnt;
+
+ return written;
}
static int tracing_clock_show(struct seq_file *m, void *v)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 09b4fa6..4c758f1 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -598,88 +598,165 @@ out:
return ret;
}
-static void print_event_fields(struct trace_seq *s, struct list_head *head)
+enum {
+ FORMAT_HEADER = 1,
+ FORMAT_PRINTFMT = 2,
+};
+
+static void *f_next(struct seq_file *m, void *v, loff_t *pos)
{
+ struct ftrace_event_call *call = m->private;
struct ftrace_event_field *field;
+ struct list_head *head;
- list_for_each_entry_reverse(field, head, link) {
- /*
- * Smartly shows the array type(except dynamic array).
- * Normal:
- * field:TYPE VAR
- * If TYPE := TYPE[LEN], it is shown:
- * field:TYPE VAR[LEN]
- */
- const char *array_descriptor = strchr(field->type, '[');
+ (*pos)++;
- if (!strncmp(field->type, "__data_loc", 10))
- array_descriptor = NULL;
+ switch ((unsigned long)v) {
+ case FORMAT_HEADER:
+ head = &ftrace_common_fields;
- if (!array_descriptor) {
- trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
- "\tsize:%u;\tsigned:%d;\n",
- field->type, field->name, field->offset,
- field->size, !!field->is_signed);
- } else {
- trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
- "\tsize:%u;\tsigned:%d;\n",
- (int)(array_descriptor - field->type),
- field->type, field->name,
- array_descriptor, field->offset,
- field->size, !!field->is_signed);
- }
+ if (unlikely(list_empty(head)))
+ return NULL;
+
+ field = list_entry(head->prev, struct ftrace_event_field, link);
+ return field;
+
+ case FORMAT_PRINTFMT:
+ /* all done */
+ return NULL;
+ }
+
+ head = trace_get_fields(call);
+
+ /*
+ * To separate common fields from event fields, the
+ * LSB is set on the first event field. Clear it in case.
+ */
+ v = (void *)((unsigned long)v & ~1L);
+
+ field = v;
+ /*
+ * If this is a common field, and at the end of the list, then
+ * continue with main list.
+ */
+ if (field->link.prev == &ftrace_common_fields) {
+ if (unlikely(list_empty(head)))
+ return NULL;
+ field = list_entry(head->prev, struct ftrace_event_field, link);
+ /* Set the LSB to notify f_show to print an extra newline */
+ field = (struct ftrace_event_field *)
+ ((unsigned long)field | 1);
+ return field;
}
+
+ /* If we are done tell f_show to print the format */
+ if (field->link.prev == head)
+ return (void *)FORMAT_PRINTFMT;
+
+ field = list_entry(field->link.prev, struct ftrace_event_field, link);
+
+ return field;
}
-static ssize_t
-event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
- loff_t *ppos)
+static void *f_start(struct seq_file *m, loff_t *pos)
{
- struct ftrace_event_call *call = filp->private_data;
- struct list_head *head;
- struct trace_seq *s;
- char *buf;
- int r;
+ loff_t l = 0;
+ void *p;
- if (*ppos)
+ /* Start by showing the header */
+ if (!*pos)
+ return (void *)FORMAT_HEADER;
+
+ p = (void *)FORMAT_HEADER;
+ do {
+ p = f_next(m, p, &l);
+ } while (p && l < *pos);
+
+ return p;
+}
+
+static int f_show(struct seq_file *m, void *v)
+{
+ struct ftrace_event_call *call = m->private;
+ struct ftrace_event_field *field;
+ const char *array_descriptor;
+
+ switch ((unsigned long)v) {
+ case FORMAT_HEADER:
+ seq_printf(m, "name: %s\n", call->name);
+ seq_printf(m, "ID: %d\n", call->event.type);
+ seq_printf(m, "format:\n");
return 0;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
- if (!s)
- return -ENOMEM;
+ case FORMAT_PRINTFMT:
+ seq_printf(m, "\nprint fmt: %s\n",
+ call->print_fmt);
+ return 0;
+ }
- trace_seq_init(s);
+ /*
+ * To separate common fields from event fields, the
+ * LSB is set on the first event field. Clear it and
+ * print a newline if it is set.
+ */
+ if ((unsigned long)v & 1) {
+ seq_putc(m, '\n');
+ v = (void *)((unsigned long)v & ~1L);
+ }
- trace_seq_printf(s, "name: %s\n", call->name);
- trace_seq_printf(s, "ID: %d\n", call->event.type);
- trace_seq_printf(s, "format:\n");
+ field = v;
- /* print common fields */
- print_event_fields(s, &ftrace_common_fields);
+ /*
+ * Smartly shows the array type(except dynamic array).
+ * Normal:
+ * field:TYPE VAR
+ * If TYPE := TYPE[LEN], it is shown:
+ * field:TYPE VAR[LEN]
+ */
+ array_descriptor = strchr(field->type, '[');
- trace_seq_putc(s, '\n');
+ if (!strncmp(field->type, "__data_loc", 10))
+ array_descriptor = NULL;
- /* print event specific fields */
- head = trace_get_fields(call);
- print_event_fields(s, head);
+ if (!array_descriptor)
+ seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+ field->type, field->name, field->offset,
+ field->size, !!field->is_signed);
+ else
+ seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+ (int)(array_descriptor - field->type),
+ field->type, field->name,
+ array_descriptor, field->offset,
+ field->size, !!field->is_signed);
- r = trace_seq_printf(s, "\nprint fmt: %s\n", call->print_fmt);
+ return 0;
+}
- if (!r) {
- /*
- * ug! The format output is bigger than a PAGE!!
- */
- buf = "FORMAT TOO BIG\n";
- r = simple_read_from_buffer(ubuf, cnt, ppos,
- buf, strlen(buf));
- goto out;
- }
+static void f_stop(struct seq_file *m, void *p)
+{
+}
- r = simple_read_from_buffer(ubuf, cnt, ppos,
- s->buffer, s->len);
- out:
- kfree(s);
- return r;
+static const struct seq_operations trace_format_seq_ops = {
+ .start = f_start,
+ .next = f_next,
+ .stop = f_stop,
+ .show = f_show,
+};
+
+static int trace_format_open(struct inode *inode, struct file *file)
+{
+ struct ftrace_event_call *call = inode->i_private;
+ struct seq_file *m;
+ int ret;
+
+ ret = seq_open(file, &trace_format_seq_ops);
+ if (ret < 0)
+ return ret;
+
+ m = file->private_data;
+ m->private = call;
+
+ return 0;
}
static ssize_t
@@ -877,8 +954,10 @@ static const struct file_operations ftrace_enable_fops = {
};
static const struct file_operations ftrace_event_format_fops = {
- .open = tracing_open_generic,
- .read = event_format_read,
+ .open = trace_format_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
};
static const struct file_operations ftrace_event_id_fops = {
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 6bff236..6f23369 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -507,7 +507,15 @@ get_return_for_leaf(struct trace_iterator *iter,
* if the output fails.
*/
data->ent = *curr;
- data->ret = *next;
+ /*
+ * If the next event is not a return type, then
+ * we only care about what type it is. Otherwise we can
+ * safely copy the entire event.
+ */
+ if (next->ent.type == TRACE_GRAPH_RET)
+ data->ret = *next;
+ else
+ data->ret.ent.type = next->ent.type;
}
}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 056468e..a6b7e0e 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -249,7 +249,7 @@ static int trace_lookup_stack(struct seq_file *m, long i)
{
unsigned long addr = stack_dump_trace[i];
- return seq_printf(m, "%pF\n", (void *)addr);
+ return seq_printf(m, "%pS\n", (void *)addr);
}
static void print_disabled(struct seq_file *m)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 613bc1f..0d53c8e 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -206,6 +206,9 @@ void watchdog_overflow_callback(struct perf_event *event, int nmi,
struct perf_sample_data *data,
struct pt_regs *regs)
{
+ /* Ensure the watchdog never gets throttled */
+ event->hw.interrupts = 0;
+
if (__get_cpu_var(watchdog_nmi_touch) == true) {
__get_cpu_var(watchdog_nmi_touch) = false;
return;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2994a0e..727f24e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -35,6 +35,9 @@
#include <linux/lockdep.h>
#include <linux/idr.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/workqueue.h>
+
#include "workqueue_sched.h"
enum {
@@ -87,7 +90,8 @@ enum {
/*
* Structure fields follow one of the following exclusion rules.
*
- * I: Set during initialization and read-only afterwards.
+ * I: Modifiable by initialization/destruction paths and read-only for
+ * everyone else.
*
* P: Preemption protected. Disabling preemption is enough and should
* only be modified and accessed from the local cpu.
@@ -195,7 +199,7 @@ typedef cpumask_var_t mayday_mask_t;
cpumask_test_and_set_cpu((cpu), (mask))
#define mayday_clear_cpu(cpu, mask) cpumask_clear_cpu((cpu), (mask))
#define for_each_mayday_cpu(cpu, mask) for_each_cpu((cpu), (mask))
-#define alloc_mayday_mask(maskp, gfp) alloc_cpumask_var((maskp), (gfp))
+#define alloc_mayday_mask(maskp, gfp) zalloc_cpumask_var((maskp), (gfp))
#define free_mayday_mask(mask) free_cpumask_var((mask))
#else
typedef unsigned long mayday_mask_t;
@@ -940,10 +944,14 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
struct global_cwq *gcwq;
struct cpu_workqueue_struct *cwq;
struct list_head *worklist;
+ unsigned int work_flags;
unsigned long flags;
debug_work_activate(work);
+ if (WARN_ON_ONCE(wq->flags & WQ_DYING))
+ return;
+
/* determine gcwq to use */
if (!(wq->flags & WQ_UNBOUND)) {
struct global_cwq *last_gcwq;
@@ -986,14 +994,17 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
BUG_ON(!list_empty(&work->entry));
cwq->nr_in_flight[cwq->work_color]++;
+ work_flags = work_color_to_flags(cwq->work_color);
if (likely(cwq->nr_active < cwq->max_active)) {
cwq->nr_active++;
worklist = gcwq_determine_ins_pos(gcwq, cwq);
- } else
+ } else {
+ work_flags |= WORK_STRUCT_DELAYED;
worklist = &cwq->delayed_works;
+ }
- insert_work(cwq, work, worklist, work_color_to_flags(cwq->work_color));
+ insert_work(cwq, work, worklist, work_flags);
spin_unlock_irqrestore(&gcwq->lock, flags);
}
@@ -1212,6 +1223,7 @@ static void worker_leave_idle(struct worker *worker)
* bound), %false if offline.
*/
static bool worker_maybe_bind_and_lock(struct worker *worker)
+__acquires(&gcwq->lock)
{
struct global_cwq *gcwq = worker->gcwq;
struct task_struct *task = worker->task;
@@ -1485,6 +1497,8 @@ static void gcwq_mayday_timeout(unsigned long __gcwq)
* otherwise.
*/
static bool maybe_create_worker(struct global_cwq *gcwq)
+__releases(&gcwq->lock)
+__acquires(&gcwq->lock)
{
if (!need_to_create_worker(gcwq))
return false;
@@ -1659,6 +1673,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq);
move_linked_works(work, pos, NULL);
+ __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
cwq->nr_active++;
}
@@ -1666,6 +1681,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
* cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
* @cwq: cwq of interest
* @color: color of work which left the queue
+ * @delayed: for a delayed work
*
* A work either has completed or is removed from pending queue,
* decrement nr_in_flight of its cwq and handle workqueue flushing.
@@ -1673,19 +1689,22 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
* CONTEXT:
* spin_lock_irq(gcwq->lock).
*/
-static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
+static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
+ bool delayed)
{
/* ignore uncolored works */
if (color == WORK_NO_COLOR)
return;
cwq->nr_in_flight[color]--;
- cwq->nr_active--;
- if (!list_empty(&cwq->delayed_works)) {
- /* one down, submit a delayed one */
- if (cwq->nr_active < cwq->max_active)
- cwq_activate_first_delayed(cwq);
+ if (!delayed) {
+ cwq->nr_active--;
+ if (!list_empty(&cwq->delayed_works)) {
+ /* one down, submit a delayed one */
+ if (cwq->nr_active < cwq->max_active)
+ cwq_activate_first_delayed(cwq);
+ }
}
/* is flush in progress and are we at the flushing tip? */
@@ -1722,6 +1741,8 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
* spin_lock_irq(gcwq->lock) which is released and regrabbed.
*/
static void process_one_work(struct worker *worker, struct work_struct *work)
+__releases(&gcwq->lock)
+__acquires(&gcwq->lock)
{
struct cpu_workqueue_struct *cwq = get_work_cwq(work);
struct global_cwq *gcwq = cwq->gcwq;
@@ -1790,7 +1811,13 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
work_clear_pending(work);
lock_map_acquire(&cwq->wq->lockdep_map);
lock_map_acquire(&lockdep_map);
+ trace_workqueue_execute_start(work);
f(work);
+ /*
+ * While we must be careful to not use "work" after this, the trace
+ * point will only record its address.
+ */
+ trace_workqueue_execute_end(work);
lock_map_release(&lockdep_map);
lock_map_release(&cwq->wq->lockdep_map);
@@ -1814,7 +1841,7 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
hlist_del_init(&worker->hentry);
worker->current_work = NULL;
worker->current_cwq = NULL;
- cwq_dec_nr_in_flight(cwq, work_color);
+ cwq_dec_nr_in_flight(cwq, work_color, false);
}
/**
@@ -2379,7 +2406,8 @@ static int try_to_grab_pending(struct work_struct *work)
debug_work_deactivate(work);
list_del_init(&work->entry);
cwq_dec_nr_in_flight(get_work_cwq(work),
- get_work_color(work));
+ get_work_color(work),
+ *work_data_bits(work) & WORK_STRUCT_DELAYED);
ret = 1;
}
}
@@ -2782,7 +2810,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
if (IS_ERR(rescuer->task))
goto err;
- wq->rescuer = rescuer;
rescuer->task->flags |= PF_THREAD_BOUND;
wake_up_process(rescuer->task);
}
@@ -2824,6 +2851,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
{
unsigned int cpu;
+ wq->flags |= WQ_DYING;
flush_workqueue(wq);
/*
@@ -2848,6 +2876,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
if (wq->flags & WQ_RESCUER) {
kthread_stop(wq->rescuer->task);
free_mayday_mask(wq->mayday_mask);
+ kfree(wq->rescuer);
}
free_cwqs(wq);
@@ -3230,6 +3259,8 @@ static int __cpuinit trustee_thread(void *__gcwq)
* multiple times. To be used by cpu_callback.
*/
static void __cpuinit wait_trustee_state(struct global_cwq *gcwq, int state)
+__releases(&gcwq->lock)
+__acquires(&gcwq->lock)
{
if (!(gcwq->trustee_state == state ||
gcwq->trustee_state == TRUSTEE_DONE)) {
@@ -3536,8 +3567,7 @@ static int __init init_workqueues(void)
spin_lock_init(&gcwq->lock);
INIT_LIST_HEAD(&gcwq->worklist);
gcwq->cpu = cpu;
- if (cpu == WORK_CPU_UNBOUND)
- gcwq->flags |= GCWQ_DISASSOCIATED;
+ gcwq->flags |= GCWQ_DISASSOCIATED;
INIT_LIST_HEAD(&gcwq->idle_list);
for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
@@ -3561,6 +3591,8 @@ static int __init init_workqueues(void)
struct global_cwq *gcwq = get_gcwq(cpu);
struct worker *worker;
+ if (cpu != WORK_CPU_UNBOUND)
+ gcwq->flags &= ~GCWQ_DISASSOCIATED;
worker = create_worker(gcwq, true);
BUG_ON(!worker);
spin_lock_irq(&gcwq->lock);
OpenPOWER on IntegriCloud