From cd64647f043e3fd3569bcf068f47f030198ff93a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 23 Sep 2013 16:43:58 +0800 Subject: hung_task: Change sysctl_hung_task_check_count to 'int' As 'sysctl_hung_task_check_count' is 'unsigned long' when this value is assigned to max_count in check_hung_uninterruptible_tasks(), it's truncated to 'int' type. This causes a minor artifact: if we write 2^32 to sysctl.hung_task_check_count, hung task detection will be effectively disabled. With this fix, it will still truncate the user input to 32 bits, but reading sysctl.hung_task_check_count reflects the actual truncated value. Signed-off-by: Li Zefan Acked-by: Ingo Molnar Link: http://lkml.kernel.org/r/523FFF4E.9050401@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched/sysctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index bf8086b..9552afa 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -2,8 +2,8 @@ #define _SCHED_SYSCTL_H #ifdef CONFIG_DETECT_HUNG_TASK +extern int sysctl_hung_task_check_count; extern unsigned int sysctl_hung_task_panic; -extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, -- cgit v1.1 From 6a716c90a51338009c3bc1f460829afaed8f922d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 19 Oct 2013 18:18:28 +0200 Subject: hung_task debugging: Add tracepoint to report the hang Currently check_hung_task() prints a warning if it detects the problem, but it is not convenient to watch the system logs if user-space wants to be notified about the hang. Add the new trace_sched_process_hang() into check_hung_task(), this way a user-space monitor can easily wait for the hang and potentially resolve a problem. Signed-off-by: Oleg Nesterov Cc: Dave Sullivan Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20131019161828.GA7439@redhat.com Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 2e7d994..2a652d1 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -424,6 +424,25 @@ TRACE_EVENT(sched_pi_setprio, __entry->oldprio, __entry->newprio) ); +#ifdef CONFIG_DETECT_HUNG_TASK +TRACE_EVENT(sched_process_hang, + TP_PROTO(struct task_struct *tsk), + TP_ARGS(tsk), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + ), + + TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) +); +#endif /* CONFIG_DETECT_HUNG_TASK */ + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ -- cgit v1.1 From 827da44c61419f29ae3be198c342e2147f1a10cb Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:51:58 -0700 Subject: net: Explicitly initialize u64_stats_sync structures for lockdep In order to enable lockdep on seqcount/seqlock structures, we must explicitly initialize any locks. The u64_stats_sync structure, uses a seqcount, and thus we need to introduce a u64_stats_init() function and use it to initialize the structure. This unfortunately adds a lot of fairly trivial initialization code to a number of drivers. But the benefit of ensuring correctness makes this worth while. Because these changes are required for lockdep to be enabled, and the changes are quite trivial, I've not yet split this patch out into 30-some separate patches, as I figured it would be better to get the various maintainers thoughts on how to best merge this change along with the seqcount lockdep enablement. Feedback would be appreciated! Signed-off-by: John Stultz Acked-by: Julian Anastasov Signed-off-by: Peter Zijlstra Cc: Alexey Kuznetsov Cc: "David S. Miller" Cc: Eric Dumazet Cc: Hideaki YOSHIFUJI Cc: James Morris Cc: Jesse Gross Cc: Mathieu Desnoyers Cc: "Michael S. Tsirkin" Cc: Mirko Lindner Cc: Patrick McHardy Cc: Roger Luethi Cc: Rusty Russell Cc: Simon Horman Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Thomas Petazzoni Cc: Wensong Zhang Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-2-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/u64_stats_sync.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 8da8c4e..7bfabd2 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -67,6 +67,13 @@ struct u64_stats_sync { #endif }; + +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) +# define u64_stats_init(syncp) seqcount_init(syncp.seq) +#else +# define u64_stats_init(syncp) do { } while (0) +#endif + static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) -- cgit v1.1 From 1ca7d67cf5d5a2aef26a8d9afd789006fa098347 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:51:59 -0700 Subject: seqcount: Add lockdep functionality to seqcount/seqlock structures Currently seqlocks and seqcounts don't support lockdep. After running across a seqcount related deadlock in the timekeeping code, I used a less-refined and more focused variant of this patch to narrow down the cause of the issue. This is a first-pass attempt to properly enable lockdep functionality on seqlocks and seqcounts. Since seqcounts are used in the vdso gettimeofday code, I've provided non-lockdep accessors for those needs. I've also handled one case where there were nested seqlock writers and there may be more edge cases. Comments and feedback would be appreciated! Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra Cc: Eric Dumazet Cc: Li Zefan Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: "David S. Miller" Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-3-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 8 ++--- include/linux/lockdep.h | 8 +++-- include/linux/seqlock.h | 79 ++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 82 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 5cd0f09..b0ed422 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -32,10 +32,10 @@ extern struct fs_struct init_fs; #endif #ifdef CONFIG_CPUSETS -#define INIT_CPUSET_SEQ \ - .mems_allowed_seq = SEQCNT_ZERO, +#define INIT_CPUSET_SEQ(tsk) \ + .mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq), #else -#define INIT_CPUSET_SEQ +#define INIT_CPUSET_SEQ(tsk) #endif #define INIT_SIGNALS(sig) { \ @@ -220,7 +220,7 @@ extern struct task_group root_task_group; INIT_FTRACE_GRAPH \ INIT_TRACE_RECURSION \ INIT_TASK_RCU_PREEMPT(tsk) \ - INIT_CPUSET_SEQ \ + INIT_CPUSET_SEQ(tsk) \ INIT_VTIME(tsk) \ } diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index cfc2f11..92b1bfc 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -497,6 +497,10 @@ static inline void print_irqtrace_events(struct task_struct *curr) #define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) #define rwlock_release(l, n, i) lock_release(l, n, i) +#define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) +#define seqcount_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) +#define seqcount_release(l, n, i) lock_release(l, n, i) + #define mutex_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define mutex_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) #define mutex_release(l, n, i) lock_release(l, n, i) @@ -504,11 +508,11 @@ static inline void print_irqtrace_events(struct task_struct *curr) #define rwsem_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define rwsem_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) #define rwsem_acquire_read(l, s, t, i) lock_acquire_shared(l, s, t, NULL, i) -# define rwsem_release(l, n, i) lock_release(l, n, i) +#define rwsem_release(l, n, i) lock_release(l, n, i) #define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_) #define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_) -# define lock_map_release(l) lock_release(l, 1, _THIS_IP_) +#define lock_map_release(l) lock_release(l, 1, _THIS_IP_) #ifdef CONFIG_PROVE_LOCKING # define might_lock(lock) \ diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 21a2093..1e8a8b6 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -34,6 +34,7 @@ #include #include +#include #include /* @@ -44,10 +45,50 @@ */ typedef struct seqcount { unsigned sequence; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } seqcount_t; -#define SEQCNT_ZERO { 0 } -#define seqcount_init(x) do { *(x) = (seqcount_t) SEQCNT_ZERO; } while (0) +static inline void __seqcount_init(seqcount_t *s, const char *name, + struct lock_class_key *key) +{ + /* + * Make sure we are not reinitializing a held lock: + */ + lockdep_init_map(&s->dep_map, name, key, 0); + s->sequence = 0; +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define SEQCOUNT_DEP_MAP_INIT(lockname) \ + .dep_map = { .name = #lockname } \ + +# define seqcount_init(s) \ + do { \ + static struct lock_class_key __key; \ + __seqcount_init((s), #s, &__key); \ + } while (0) + +static inline void seqcount_lockdep_reader_access(const seqcount_t *s) +{ + seqcount_t *l = (seqcount_t *)s; + unsigned long flags; + + local_irq_save(flags); + seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); + seqcount_release(&l->dep_map, 1, _RET_IP_); + local_irq_restore(flags); +} + +#else +# define SEQCOUNT_DEP_MAP_INIT(lockname) +# define seqcount_init(s) __seqcount_init(s, NULL, NULL) +# define seqcount_lockdep_reader_access(x) +#endif + +#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)} + /** * __read_seqcount_begin - begin a seq-read critical section (without barrier) @@ -76,6 +117,22 @@ repeat: } /** + * read_seqcount_begin_no_lockdep - start seq-read critical section w/o lockdep + * @s: pointer to seqcount_t + * Returns: count to be passed to read_seqcount_retry + * + * read_seqcount_begin_no_lockdep opens a read critical section of the given + * seqcount, but without any lockdep checking. Validity of the critical + * section is tested by checking read_seqcount_retry function. + */ +static inline unsigned read_seqcount_begin_no_lockdep(const seqcount_t *s) +{ + unsigned ret = __read_seqcount_begin(s); + smp_rmb(); + return ret; +} + +/** * read_seqcount_begin - begin a seq-read critical section * @s: pointer to seqcount_t * Returns: count to be passed to read_seqcount_retry @@ -86,9 +143,8 @@ repeat: */ static inline unsigned read_seqcount_begin(const seqcount_t *s) { - unsigned ret = __read_seqcount_begin(s); - smp_rmb(); - return ret; + seqcount_lockdep_reader_access(s); + return read_seqcount_begin_no_lockdep(s); } /** @@ -108,6 +164,8 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s) static inline unsigned raw_seqcount_begin(const seqcount_t *s) { unsigned ret = ACCESS_ONCE(s->sequence); + + seqcount_lockdep_reader_access(s); smp_rmb(); return ret & ~1; } @@ -152,14 +210,21 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) * Sequence counter only version assumes that callers are using their * own mutexing. */ -static inline void write_seqcount_begin(seqcount_t *s) +static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) { s->sequence++; smp_wmb(); + seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); +} + +static inline void write_seqcount_begin(seqcount_t *s) +{ + write_seqcount_begin_nested(s, 0); } static inline void write_seqcount_end(seqcount_t *s) { + seqcount_release(&s->dep_map, 1, _RET_IP_); smp_wmb(); s->sequence++; } @@ -188,7 +253,7 @@ typedef struct { */ #define __SEQLOCK_UNLOCKED(lockname) \ { \ - .seqcount = SEQCNT_ZERO, \ + .seqcount = SEQCNT_ZERO(lockname), \ .lock = __SPIN_LOCK_UNLOCKED(lockname) \ } -- cgit v1.1 From db751fe3ea6880ff5ac5abe60cb7b80deb5a4140 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:52:00 -0700 Subject: cpuset: Fix potential deadlock w/ set_mems_allowed After adding lockdep support to seqlock/seqcount structures, I started seeing the following warning: [ 1.070907] ====================================================== [ 1.072015] [ INFO: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected ] [ 1.073181] 3.11.0+ #67 Not tainted [ 1.073801] ------------------------------------------------------ [ 1.074882] kworker/u4:2/708 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: [ 1.076088] (&p->mems_allowed_seq){+.+...}, at: [] new_slab+0x5f/0x280 [ 1.077572] [ 1.077572] and this task is already holding: [ 1.078593] (&(&q->__queue_lock)->rlock){..-...}, at: [] blk_execute_rq_nowait+0x53/0xf0 [ 1.080042] which would create a new lock dependency: [ 1.080042] (&(&q->__queue_lock)->rlock){..-...} -> (&p->mems_allowed_seq){+.+...} [ 1.080042] [ 1.080042] but this new dependency connects a SOFTIRQ-irq-safe lock: [ 1.080042] (&(&q->__queue_lock)->rlock){..-...} [ 1.080042] ... which became SOFTIRQ-irq-safe at: [ 1.080042] [] __lock_acquire+0x5b9/0x1db0 [ 1.080042] [] lock_acquire+0x95/0x130 [ 1.080042] [] _raw_spin_lock+0x41/0x80 [ 1.080042] [] scsi_device_unbusy+0x7e/0xd0 [ 1.080042] [] scsi_finish_command+0x32/0xf0 [ 1.080042] [] scsi_softirq_done+0xa1/0x130 [ 1.080042] [] blk_done_softirq+0x73/0x90 [ 1.080042] [] __do_softirq+0x110/0x2f0 [ 1.080042] [] run_ksoftirqd+0x2d/0x60 [ 1.080042] [] smpboot_thread_fn+0x156/0x1e0 [ 1.080042] [] kthread+0xd6/0xe0 [ 1.080042] [] ret_from_fork+0x7c/0xb0 [ 1.080042] [ 1.080042] to a SOFTIRQ-irq-unsafe lock: [ 1.080042] (&p->mems_allowed_seq){+.+...} [ 1.080042] ... which became SOFTIRQ-irq-unsafe at: [ 1.080042] ... [] __lock_acquire+0x613/0x1db0 [ 1.080042] [] lock_acquire+0x95/0x130 [ 1.080042] [] kthreadd+0x82/0x180 [ 1.080042] [] ret_from_fork+0x7c/0xb0 [ 1.080042] [ 1.080042] other info that might help us debug this: [ 1.080042] [ 1.080042] Possible interrupt unsafe locking scenario: [ 1.080042] [ 1.080042] CPU0 CPU1 [ 1.080042] ---- ---- [ 1.080042] lock(&p->mems_allowed_seq); [ 1.080042] local_irq_disable(); [ 1.080042] lock(&(&q->__queue_lock)->rlock); [ 1.080042] lock(&p->mems_allowed_seq); [ 1.080042] [ 1.080042] lock(&(&q->__queue_lock)->rlock); [ 1.080042] [ 1.080042] *** DEADLOCK *** The issue stems from the kthreadd() function calling set_mems_allowed with irqs enabled. While its possibly unlikely for the actual deadlock to trigger, a fix is fairly simple: disable irqs before taking the mems_allowed_seq lock. Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra Acked-by: Li Zefan Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: "David S. Miller" Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-4-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/cpuset.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index cc1b01c..3fe661f 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -110,10 +110,14 @@ static inline bool put_mems_allowed(unsigned int seq) static inline void set_mems_allowed(nodemask_t nodemask) { + unsigned long flags; + task_lock(current); + local_irq_save(flags); write_seqcount_begin(¤t->mems_allowed_seq); current->mems_allowed = nodemask; write_seqcount_end(¤t->mems_allowed_seq); + local_irq_restore(flags); task_unlock(current); } -- cgit v1.1 From 67a6de49bf545c34eb8dee99abbb92d9ea268200 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Nov 2013 08:26:39 +0100 Subject: locking/doc: Update references to kernel/mutex.c Fix this docbook error: >> docproc: kernel/mutex.c: No such file or directory by updating the stale references to kernel/mutex.c. Reported-by: fengguang.wu@intel.com Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-34pikw1tlsskj65rrt5iusrq@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index bab49da..d318193 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -131,7 +131,7 @@ static inline int mutex_is_locked(struct mutex *lock) } /* - * See kernel/mutex.c for detailed documentation of these APIs. + * See kernel/locking/mutex.c for detailed documentation of these APIs. * Also see Documentation/mutex-design.txt. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC -- cgit v1.1