From cd64647f043e3fd3569bcf068f47f030198ff93a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 23 Sep 2013 16:43:58 +0800 Subject: hung_task: Change sysctl_hung_task_check_count to 'int' As 'sysctl_hung_task_check_count' is 'unsigned long' when this value is assigned to max_count in check_hung_uninterruptible_tasks(), it's truncated to 'int' type. This causes a minor artifact: if we write 2^32 to sysctl.hung_task_check_count, hung task detection will be effectively disabled. With this fix, it will still truncate the user input to 32 bits, but reading sysctl.hung_task_check_count reflects the actual truncated value. Signed-off-by: Li Zefan Acked-by: Ingo Molnar Link: http://lkml.kernel.org/r/523FFF4E.9050401@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched/sysctl.h | 2 +- kernel/hung_task.c | 2 +- kernel/sysctl.c | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index bf8086b..9552afa 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -2,8 +2,8 @@ #define _SCHED_SYSCTL_H #ifdef CONFIG_DETECT_HUNG_TASK +extern int sysctl_hung_task_check_count; extern unsigned int sysctl_hung_task_panic; -extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 3e97fb1..0422523 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -20,7 +20,7 @@ /* * The number of tasks checked: */ -unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; +int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; /* * Limit number of tasks checked in a batch. diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b2f06f3..b24ed7f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -962,9 +962,10 @@ static struct ctl_table kern_table[] = { { .procname = "hung_task_check_count", .data = &sysctl_hung_task_check_count, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_doulongvec_minmax, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "hung_task_timeout_secs", -- cgit v1.1 From 2a929242ee50db84c1a561c81897bb0551f2c32f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 27 Sep 2013 16:34:42 +0200 Subject: lockdep, x86/alternatives: Drop ancient lockdep fixup message It messes up the output of the nodes/cores bootup table and it is obsolete anyway, see 17abecfe651c x86: fix up alternatives with lockdep enabled Signed-off-by: Borislav Petkov Cc: huawei.libin@huawei.com Cc: wangyijing@huawei.com Cc: fenghua.yu@intel.com Cc: guohanjun@huawei.com Cc: paul.gortmaker@windriver.com Link: http://lkml.kernel.org/r/20130927143442.GE4422@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 15e8563..df94598 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -402,17 +402,6 @@ void alternatives_enable_smp(void) { struct smp_alt_module *mod; -#ifdef CONFIG_LOCKDEP - /* - * Older binutils section handling bug prevented - * alternatives-replacement from working reliably. - * - * If this still occurs then you should see a hang - * or crash shortly after this line: - */ - pr_info("lockdep: fixing up alternatives\n"); -#endif - /* Why bother if there are no other CPUs? */ BUG_ON(num_possible_cpus() == 1); -- cgit v1.1 From 1232e3807f597748d437ab8680873af21fa81da9 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 8 Oct 2013 20:37:16 -0700 Subject: lockstat: Report avg wait and hold times While both the nr and total times are showed, having the avg lock hold and wait times show in the report is quite useful when working on performance related issues. Furthermore, I find myself constantly doing the calculations manually. In addition, some of the documentation examples were changed to easily update them to show the two new columns. No textual change otherwise, as descriptions match the lockstat output. Signed-off-by: Davidlohr Bueso Cc: aswin@hp.com Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1380746928.2313.14.camel@buesod1.americas.hpqcorp.net [ Fixlets: changed a seq_printf() to seq_puts(), converted spaces to tabs. ] Signed-off-by: Ingo Molnar --- Documentation/lockstat.txt | 123 ++++++++++++++++++++++----------------------- kernel/lockdep_proc.c | 15 +++--- 2 files changed, 70 insertions(+), 68 deletions(-) diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt index dd2f7b2..72d0106 100644 --- a/Documentation/lockstat.txt +++ b/Documentation/lockstat.txt @@ -46,16 +46,14 @@ With these hooks we provide the following statistics: contentions - number of lock acquisitions that had to wait wait time min - shortest (non-0) time we ever had to wait for a lock max - longest time we ever had to wait for a lock - total - total time we spend waiting on this lock + total - total time we spend waiting on this lock + avg - average time spent waiting on this lock acq-bounces - number of lock acquisitions that involved x-cpu data acquisitions - number of times we took the lock hold time min - shortest (non-0) time we ever held the lock - max - longest time we ever held the lock - total - total time this lock was held - -From these number various other statistics can be derived, such as: - - hold time average = hold time total / acquisitions + max - longest time we ever held the lock + total - total time this lock was held + avg - average time this lock was held These numbers are gathered per lock class, per read/write state (when applicable). @@ -84,37 +82,38 @@ Look at the current lock statistics: # less /proc/lock_stat -01 lock_stat version 0.3 -02 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -03 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total -04 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +01 lock_stat version 0.4 +02----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +03 class name con-bounces contentions waittime-min waittime-max waittime-total waittime-avg acq-bounces acquisitions holdtime-min holdtime-max holdtime-total holdtime-avg +04----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 05 -06 &mm->mmap_sem-W: 233 538 18446744073708 22924.27 607243.51 1342 45806 1.71 8595.89 1180582.34 -07 &mm->mmap_sem-R: 205 587 18446744073708 28403.36 731975.00 1940 412426 0.58 187825.45 6307502.88 -08 --------------- -09 &mm->mmap_sem 487 [] do_page_fault+0x466/0x928 -10 &mm->mmap_sem 179 [] sys_mprotect+0xcd/0x21d -11 &mm->mmap_sem 279 [] sys_mmap+0x75/0xce -12 &mm->mmap_sem 76 [] sys_munmap+0x32/0x59 -13 --------------- -14 &mm->mmap_sem 270 [] sys_mmap+0x75/0xce -15 &mm->mmap_sem 431 [] do_page_fault+0x466/0x928 -16 &mm->mmap_sem 138 [] sys_munmap+0x32/0x59 -17 &mm->mmap_sem 145 [] sys_mprotect+0xcd/0x21d +06 &mm->mmap_sem-W: 46 84 0.26 939.10 16371.53 194.90 47291 2922365 0.16 2220301.69 17464026916.32 5975.99 +07 &mm->mmap_sem-R: 37 100 1.31 299502.61 325629.52 3256.30 212344 34316685 0.10 7744.91 95016910.20 2.77 +08 --------------- +09 &mm->mmap_sem 1 [] khugepaged_scan_mm_slot+0x57/0x280 +19 &mm->mmap_sem 96 [] __do_page_fault+0x1d4/0x510 +11 &mm->mmap_sem 34 [] vm_mmap_pgoff+0x87/0xd0 +12 &mm->mmap_sem 17 [] vm_munmap+0x41/0x80 +13 --------------- +14 &mm->mmap_sem 1 [] dup_mmap+0x2a/0x3f0 +15 &mm->mmap_sem 60 [] SyS_mprotect+0xe9/0x250 +16 &mm->mmap_sem 41 [] __do_page_fault+0x1d4/0x510 +17 &mm->mmap_sem 68 [] vm_mmap_pgoff+0x87/0xd0 18 -19 ............................................................................................................................................................................................... +19............................................................................................................................................................................................................................. 20 -21 dcache_lock: 621 623 0.52 118.26 1053.02 6745 91930 0.29 316.29 118423.41 -22 ----------- -23 dcache_lock 179 [] _atomic_dec_and_lock+0x34/0x54 -24 dcache_lock 113 [] d_alloc+0x19a/0x1eb -25 dcache_lock 99 [] d_rehash+0x1b/0x44 -26 dcache_lock 104 [] d_instantiate+0x36/0x8a -27 ----------- -28 dcache_lock 192 [] _atomic_dec_and_lock+0x34/0x54 -29 dcache_lock 98 [] d_rehash+0x1b/0x44 -30 dcache_lock 72 [] d_alloc+0x19a/0x1eb -31 dcache_lock 112 [] d_instantiate+0x36/0x8a +21 unix_table_lock: 110 112 0.21 49.24 163.91 1.46 21094 66312 0.12 624.42 31589.81 0.48 +22 --------------- +23 unix_table_lock 45 [] unix_create1+0x16e/0x1b0 +24 unix_table_lock 47 [] unix_release_sock+0x31/0x250 +25 unix_table_lock 15 [] unix_find_other+0x117/0x230 +26 unix_table_lock 5 [] unix_autobind+0x11f/0x1b0 +27 --------------- +28 unix_table_lock 39 [] unix_release_sock+0x31/0x250 +29 unix_table_lock 49 [] unix_create1+0x16e/0x1b0 +30 unix_table_lock 20 [] unix_find_other+0x117/0x230 +31 unix_table_lock 4 [] unix_autobind+0x11f/0x1b0 + This excerpt shows the first two lock class statistics. Line 01 shows the output version - each time the format changes this will be updated. Line 02-04 @@ -131,30 +130,30 @@ The integer part of the time values is in us. Dealing with nested locks, subclasses may appear: -32............................................................................................................................................................................................... +32........................................................................................................................................................................................................................... 33 -34 &rq->lock: 13128 13128 0.43 190.53 103881.26 97454 3453404 0.00 401.11 13224683.11 +34 &rq->lock: 13128 13128 0.43 190.53 103881.26 7.91 97454 3453404 0.00 401.11 13224683.11 3.82 35 --------- -36 &rq->lock 645 [] task_rq_lock+0x43/0x75 -37 &rq->lock 297 [] try_to_wake_up+0x127/0x25a -38 &rq->lock 360 [] select_task_rq_fair+0x1f0/0x74a -39 &rq->lock 428 [] scheduler_tick+0x46/0x1fb +36 &rq->lock 645 [] task_rq_lock+0x43/0x75 +37 &rq->lock 297 [] try_to_wake_up+0x127/0x25a +38 &rq->lock 360 [] select_task_rq_fair+0x1f0/0x74a +39 &rq->lock 428 [] scheduler_tick+0x46/0x1fb 40 --------- -41 &rq->lock 77 [] task_rq_lock+0x43/0x75 -42 &rq->lock 174 [] try_to_wake_up+0x127/0x25a -43 &rq->lock 4715 [] double_rq_lock+0x42/0x54 -44 &rq->lock 893 [] schedule+0x157/0x7b8 +41 &rq->lock 77 [] task_rq_lock+0x43/0x75 +42 &rq->lock 174 [] try_to_wake_up+0x127/0x25a +43 &rq->lock 4715 [] double_rq_lock+0x42/0x54 +44 &rq->lock 893 [] schedule+0x157/0x7b8 45 -46............................................................................................................................................................................................... +46........................................................................................................................................................................................................................... 47 -48 &rq->lock/1: 11526 11488 0.33 388.73 136294.31 21461 38404 0.00 37.93 109388.53 +48 &rq->lock/1: 1526 11488 0.33 388.73 136294.31 11.86 21461 38404 0.00 37.93 109388.53 2.84 49 ----------- -50 &rq->lock/1 11526 [] double_rq_lock+0x4f/0x54 +50 &rq->lock/1 11526 [] double_rq_lock+0x4f/0x54 51 ----------- -52 &rq->lock/1 5645 [] double_rq_lock+0x42/0x54 -53 &rq->lock/1 1224 [] schedule+0x157/0x7b8 -54 &rq->lock/1 4336 [] double_rq_lock+0x4f/0x54 -55 &rq->lock/1 181 [] try_to_wake_up+0x127/0x25a +52 &rq->lock/1 5645 [] double_rq_lock+0x42/0x54 +53 &rq->lock/1 1224 [] schedule+0x157/0x7b8 +54 &rq->lock/1 4336 [] double_rq_lock+0x4f/0x54 +55 &rq->lock/1 181 [] try_to_wake_up+0x127/0x25a Line 48 shows statistics for the second subclass (/1) of &rq->lock class (subclass starts from 0), since in this case, as line 50 suggests, @@ -163,16 +162,16 @@ double_rq_lock actually acquires a nested lock of two spinlocks. View the top contending locks: # grep : /proc/lock_stat | head - &inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60 - &inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38 - dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 - &inode->i_mutex: 161 286 18446744073709 62882.54 1244614.55 3653 20598 18446744073709 62318.60 1693822.74 - &zone->lru_lock: 94 94 0.53 7.33 92.10 4366 32690 0.29 59.81 16350.06 - &inode->i_data.i_mmap_mutex: 79 79 0.40 3.77 53.03 11779 87755 0.28 116.93 29898.44 - &q->__queue_lock: 48 50 0.52 31.62 86.31 774 13131 0.17 113.08 12277.52 - &rq->rq_lock_key: 43 47 0.74 68.50 170.63 3706 33929 0.22 107.99 17460.62 - &rq->rq_lock_key#2: 39 46 0.75 6.68 49.03 2979 32292 0.17 125.17 17137.63 - tasklist_lock-W: 15 15 1.45 10.87 32.70 1201 7390 0.58 62.55 13648.47 + clockevents_lock: 2926159 2947636 0.15 46882.81 1784540466.34 605.41 3381345 3879161 0.00 2260.97 53178395.68 13.71 + tick_broadcast_lock: 346460 346717 0.18 2257.43 39364622.71 113.54 3642919 4242696 0.00 2263.79 49173646.60 11.59 + &mapping->i_mmap_mutex: 203896 203899 3.36 645530.05 31767507988.39 155800.21 3361776 8893984 0.17 2254.15 14110121.02 1.59 + &rq->lock: 135014 136909 0.18 606.09 842160.68 6.15 1540728 10436146 0.00 728.72 17606683.41 1.69 + &(&zone->lru_lock)->rlock: 93000 94934 0.16 59.18 188253.78 1.98 1199912 3809894 0.15 391.40 3559518.81 0.93 + tasklist_lock-W: 40667 41130 0.23 1189.42 428980.51 10.43 270278 510106 0.16 653.51 3939674.91 7.72 + tasklist_lock-R: 21298 21305 0.20 1310.05 215511.12 10.12 186204 241258 0.14 1162.33 1179779.23 4.89 + rcu_node_1: 47656 49022 0.16 635.41 193616.41 3.95 844888 1865423 0.00 764.26 1656226.96 0.89 + &(&dentry->d_lockref.lock)->rlock: 39791 40179 0.15 1302.08 88851.96 2.21 2790851 12527025 0.10 1910.75 3379714.27 0.27 + rcu_node_0: 29203 30064 0.16 786.55 1555573.00 51.74 88963 244254 0.00 398.87 428872.51 1.76 Clear the statistics: diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index b2c71c5..0922065 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -421,6 +421,7 @@ static void seq_lock_time(struct seq_file *m, struct lock_time *lt) seq_time(m, lt->min); seq_time(m, lt->max); seq_time(m, lt->total); + seq_time(m, lt->nr ? do_div(lt->total, lt->nr) : 0); } static void seq_stats(struct seq_file *m, struct lock_stat_data *data) @@ -518,20 +519,20 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) } if (i) { seq_puts(m, "\n"); - seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); + seq_line(m, '.', 0, 40 + 1 + 12 * (14 + 1)); seq_puts(m, "\n"); } } static void seq_header(struct seq_file *m) { - seq_printf(m, "lock_stat version 0.3\n"); + seq_puts(m, "lock_stat version 0.4\n"); if (unlikely(!debug_locks)) seq_printf(m, "*WARNING* lock debugging disabled!! - possibly due to a lockdep warning\n"); - seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); - seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " + seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1)); + seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s " "%14s %14s\n", "class name", "con-bounces", @@ -539,12 +540,14 @@ static void seq_header(struct seq_file *m) "waittime-min", "waittime-max", "waittime-total", + "waittime-avg", "acq-bounces", "acquisitions", "holdtime-min", "holdtime-max", - "holdtime-total"); - seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); + "holdtime-total", + "holdtime-avg"); + seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1)); seq_printf(m, "\n"); } -- cgit v1.1 From 4c4e4f61368164f326fe59e2156c70d7faa72c17 Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Mon, 21 Oct 2013 21:35:08 +0530 Subject: x86/locking/kconfig: Update paravirt spinlock Kconfig description Since the paravirt spinlock optimizations went into the v3.12 kernel we have a very good performance benefit for paravirtualized KVM / Xen kernels. Also we no longer suffer from 5% side effect on native kernel that is mentioned in the Kconfig entry. So update the Kconfig entry accordingly. pvspinlock benefit on KVM link: https://lkml.org/lkml/2013/8/6/178 Attilio's tests on native kernel impact: http://blog.xen.org/index.php/2012/05/11/benchmarking-the-new-pv-ticketlock-implementation/ Signed-off-by: Raghavendra K T Cc: Cc: Cc: Cc: Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1382371508-3843-1-git-send-email-raghavendra.kt@linux.vnet.ibm.com [ Updated the changelog. ] Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ee2fb9d..a92572d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -638,10 +638,10 @@ config PARAVIRT_SPINLOCKS spinlock implementation with something virtualization-friendly (for example, block the virtual CPU rather than spinning). - Unfortunately the downside is an up to 5% performance hit on - native kernels, with various workloads. + It has a minimal impact on native kernels and gives a nice performance + benefit on paravirtualized KVM / Xen kernels. - If you are unsure how to answer this question, answer N. + If you are unsure how to answer this question, answer Y. source "arch/x86/xen/Kconfig" -- cgit v1.1 From 6a716c90a51338009c3bc1f460829afaed8f922d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 19 Oct 2013 18:18:28 +0200 Subject: hung_task debugging: Add tracepoint to report the hang Currently check_hung_task() prints a warning if it detects the problem, but it is not convenient to watch the system logs if user-space wants to be notified about the hang. Add the new trace_sched_process_hang() into check_hung_task(), this way a user-space monitor can easily wait for the hang and potentially resolve a problem. Signed-off-by: Oleg Nesterov Cc: Dave Sullivan Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20131019161828.GA7439@redhat.com Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 19 +++++++++++++++++++ kernel/hung_task.c | 4 ++++ 2 files changed, 23 insertions(+) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 2e7d994..2a652d1 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -424,6 +424,25 @@ TRACE_EVENT(sched_pi_setprio, __entry->oldprio, __entry->newprio) ); +#ifdef CONFIG_DETECT_HUNG_TASK +TRACE_EVENT(sched_process_hang, + TP_PROTO(struct task_struct *tsk), + TP_ARGS(tsk), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + ), + + TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) +); +#endif /* CONFIG_DETECT_HUNG_TASK */ + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 0422523..8807061 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -16,6 +16,7 @@ #include #include #include +#include /* * The number of tasks checked: @@ -92,6 +93,9 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) t->last_switch_count = switch_count; return; } + + trace_sched_process_hang(t); + if (!sysctl_hung_task_warnings) return; sysctl_hung_task_warnings--; -- cgit v1.1 From 01768b42dc97a67b4fb33a2535c49fc1969880df Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 18:11:53 +0100 Subject: locking: Move the mutex code to kernel/locking/ Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-1ditvncg30dgbpvrz2bxfmke@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/Makefile | 6 +- kernel/locking/Makefile | 9 + kernel/locking/mutex-debug.c | 110 +++++ kernel/locking/mutex-debug.h | 55 +++ kernel/locking/mutex.c | 960 +++++++++++++++++++++++++++++++++++++++++++ kernel/locking/mutex.h | 48 +++ kernel/mutex-debug.c | 110 ----- kernel/mutex-debug.h | 55 --- kernel/mutex.c | 960 ------------------------------------------- kernel/mutex.h | 48 --- 10 files changed, 1184 insertions(+), 1177 deletions(-) create mode 100644 kernel/locking/Makefile create mode 100644 kernel/locking/mutex-debug.c create mode 100644 kernel/locking/mutex-debug.h create mode 100644 kernel/locking/mutex.c create mode 100644 kernel/locking/mutex.h delete mode 100644 kernel/mutex-debug.c delete mode 100644 kernel/mutex-debug.h delete mode 100644 kernel/mutex.c delete mode 100644 kernel/mutex.h diff --git a/kernel/Makefile b/kernel/Makefile index a4d1aa8..330b146 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -7,7 +7,7 @@ obj-y = fork.o exec_domain.o panic.o \ sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ extable.o params.o posix-timers.o \ - kthread.o sys_ni.o posix-cpu-timers.o mutex.o \ + kthread.o sys_ni.o posix-cpu-timers.o \ hrtimer.o rwsem.o nsproxy.o semaphore.o \ notifier.o ksysfs.o cred.o reboot.o \ async.o range.o groups.o lglock.o smpboot.o @@ -16,13 +16,12 @@ ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files CFLAGS_REMOVE_lockdep.o = -pg CFLAGS_REMOVE_lockdep_proc.o = -pg -CFLAGS_REMOVE_mutex-debug.o = -pg -CFLAGS_REMOVE_rtmutex-debug.o = -pg CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_irq_work.o = -pg endif obj-y += sched/ +obj-y += locking/ obj-y += power/ obj-y += printk/ obj-y += cpu/ @@ -34,7 +33,6 @@ obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ -obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o obj-$(CONFIG_LOCKDEP) += lockdep.o ifeq ($(CONFIG_PROC_FS),y) obj-$(CONFIG_LOCKDEP) += lockdep_proc.o diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile new file mode 100644 index 0000000..fe8bd58 --- /dev/null +++ b/kernel/locking/Makefile @@ -0,0 +1,9 @@ + +obj-y += mutex.o + +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_mutex-debug.o = -pg +CFLAGS_REMOVE_rtmutex-debug.o = -pg +endif + +obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c new file mode 100644 index 0000000..7e3443f --- /dev/null +++ b/kernel/locking/mutex-debug.c @@ -0,0 +1,110 @@ +/* + * kernel/mutex-debug.c + * + * Debugging code for mutexes + * + * Started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar + * + * lock debugging, locking tree, deadlock detection started by: + * + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey + * Released under the General Public License (GPL). + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mutex-debug.h" + +/* + * Must be called with lock->wait_lock held. + */ +void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) +{ + memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); + waiter->magic = waiter; + INIT_LIST_HEAD(&waiter->list); +} + +void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) +{ + SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); + DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list)); + DEBUG_LOCKS_WARN_ON(waiter->magic != waiter); + DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); +} + +void debug_mutex_free_waiter(struct mutex_waiter *waiter) +{ + DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list)); + memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter)); +} + +void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, + struct thread_info *ti) +{ + SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); + + /* Mark the current thread as blocked on the lock: */ + ti->task->blocked_on = waiter; +} + +void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, + struct thread_info *ti) +{ + DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); + DEBUG_LOCKS_WARN_ON(waiter->task != ti->task); + DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter); + ti->task->blocked_on = NULL; + + list_del_init(&waiter->list); + waiter->task = NULL; +} + +void debug_mutex_unlock(struct mutex *lock) +{ + if (unlikely(!debug_locks)) + return; + + DEBUG_LOCKS_WARN_ON(lock->magic != lock); + DEBUG_LOCKS_WARN_ON(lock->owner != current); + DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); + mutex_clear_owner(lock); +} + +void debug_mutex_init(struct mutex *lock, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); + lockdep_init_map(&lock->dep_map, name, key, 0); +#endif + lock->magic = lock; +} + +/*** + * mutex_destroy - mark a mutex unusable + * @lock: the mutex to be destroyed + * + * This function marks the mutex uninitialized, and any subsequent + * use of the mutex is forbidden. The mutex must not be locked when + * this function is called. + */ +void mutex_destroy(struct mutex *lock) +{ + DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock)); + lock->magic = NULL; +} + +EXPORT_SYMBOL_GPL(mutex_destroy); diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h new file mode 100644 index 0000000..0799fd3 --- /dev/null +++ b/kernel/locking/mutex-debug.h @@ -0,0 +1,55 @@ +/* + * Mutexes: blocking mutual exclusion locks + * + * started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar + * + * This file contains mutex debugging related internal declarations, + * prototypes and inline functions, for the CONFIG_DEBUG_MUTEXES case. + * More details are in kernel/mutex-debug.c. + */ + +/* + * This must be called with lock->wait_lock held. + */ +extern void debug_mutex_lock_common(struct mutex *lock, + struct mutex_waiter *waiter); +extern void debug_mutex_wake_waiter(struct mutex *lock, + struct mutex_waiter *waiter); +extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); +extern void debug_mutex_add_waiter(struct mutex *lock, + struct mutex_waiter *waiter, + struct thread_info *ti); +extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, + struct thread_info *ti); +extern void debug_mutex_unlock(struct mutex *lock); +extern void debug_mutex_init(struct mutex *lock, const char *name, + struct lock_class_key *key); + +static inline void mutex_set_owner(struct mutex *lock) +{ + lock->owner = current; +} + +static inline void mutex_clear_owner(struct mutex *lock) +{ + lock->owner = NULL; +} + +#define spin_lock_mutex(lock, flags) \ + do { \ + struct mutex *l = container_of(lock, struct mutex, wait_lock); \ + \ + DEBUG_LOCKS_WARN_ON(in_interrupt()); \ + local_irq_save(flags); \ + arch_spin_lock(&(lock)->rlock.raw_lock);\ + DEBUG_LOCKS_WARN_ON(l->magic != l); \ + } while (0) + +#define spin_unlock_mutex(lock, flags) \ + do { \ + arch_spin_unlock(&(lock)->rlock.raw_lock); \ + local_irq_restore(flags); \ + preempt_check_resched(); \ + } while (0) diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c new file mode 100644 index 0000000..d24105b --- /dev/null +++ b/kernel/locking/mutex.c @@ -0,0 +1,960 @@ +/* + * kernel/mutex.c + * + * Mutexes: blocking mutual exclusion locks + * + * Started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar + * + * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and + * David Howells for suggestions and improvements. + * + * - Adaptive spinning for mutexes by Peter Zijlstra. (Ported to mainline + * from the -rt tree, where it was originally implemented for rtmutexes + * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale + * and Sven Dietrich. + * + * Also see Documentation/mutex-design.txt. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * In the DEBUG case we are using the "NULL fastpath" for mutexes, + * which forces all calls into the slowpath: + */ +#ifdef CONFIG_DEBUG_MUTEXES +# include "mutex-debug.h" +# include +#else +# include "mutex.h" +# include +#endif + +/* + * A negative mutex count indicates that waiters are sleeping waiting for the + * mutex. + */ +#define MUTEX_SHOW_NO_WAITER(mutex) (atomic_read(&(mutex)->count) >= 0) + +void +__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) +{ + atomic_set(&lock->count, 1); + spin_lock_init(&lock->wait_lock); + INIT_LIST_HEAD(&lock->wait_list); + mutex_clear_owner(lock); +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER + lock->spin_mlock = NULL; +#endif + + debug_mutex_init(lock, name, key); +} + +EXPORT_SYMBOL(__mutex_init); + +#ifndef CONFIG_DEBUG_LOCK_ALLOC +/* + * We split the mutex lock/unlock logic into separate fastpath and + * slowpath functions, to reduce the register pressure on the fastpath. + * We also put the fastpath first in the kernel image, to make sure the + * branch is predicted by the CPU as default-untaken. + */ +static __used noinline void __sched +__mutex_lock_slowpath(atomic_t *lock_count); + +/** + * mutex_lock - acquire the mutex + * @lock: the mutex to be acquired + * + * Lock the mutex exclusively for this task. If the mutex is not + * available right now, it will sleep until it can get it. + * + * The mutex must later on be released by the same task that + * acquired it. Recursive locking is not allowed. The task + * may not exit without first unlocking the mutex. Also, kernel + * memory where the mutex resides mutex must not be freed with + * the mutex still locked. The mutex must first be initialized + * (or statically defined) before it can be locked. memset()-ing + * the mutex to 0 is not allowed. + * + * ( The CONFIG_DEBUG_MUTEXES .config option turns on debugging + * checks that will enforce the restrictions and will also do + * deadlock debugging. ) + * + * This function is similar to (but not equivalent to) down(). + */ +void __sched mutex_lock(struct mutex *lock) +{ + might_sleep(); + /* + * The locking fastpath is the 1->0 transition from + * 'unlocked' into 'locked' state. + */ + __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); + mutex_set_owner(lock); +} + +EXPORT_SYMBOL(mutex_lock); +#endif + +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER +/* + * In order to avoid a stampede of mutex spinners from acquiring the mutex + * more or less simultaneously, the spinners need to acquire a MCS lock + * first before spinning on the owner field. + * + * We don't inline mspin_lock() so that perf can correctly account for the + * time spent in this lock function. + */ +struct mspin_node { + struct mspin_node *next ; + int locked; /* 1 if lock acquired */ +}; +#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) + +static noinline +void mspin_lock(struct mspin_node **lock, struct mspin_node *node) +{ + struct mspin_node *prev; + + /* Init node */ + node->locked = 0; + node->next = NULL; + + prev = xchg(lock, node); + if (likely(prev == NULL)) { + /* Lock acquired */ + node->locked = 1; + return; + } + ACCESS_ONCE(prev->next) = node; + smp_wmb(); + /* Wait until the lock holder passes the lock down */ + while (!ACCESS_ONCE(node->locked)) + arch_mutex_cpu_relax(); +} + +static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) +{ + struct mspin_node *next = ACCESS_ONCE(node->next); + + if (likely(!next)) { + /* + * Release the lock by setting it to NULL + */ + if (cmpxchg(lock, node, NULL) == node) + return; + /* Wait until the next pointer is set */ + while (!(next = ACCESS_ONCE(node->next))) + arch_mutex_cpu_relax(); + } + ACCESS_ONCE(next->locked) = 1; + smp_wmb(); +} + +/* + * Mutex spinning code migrated from kernel/sched/core.c + */ + +static inline bool owner_running(struct mutex *lock, struct task_struct *owner) +{ + if (lock->owner != owner) + return false; + + /* + * Ensure we emit the owner->on_cpu, dereference _after_ checking + * lock->owner still matches owner, if that fails, owner might + * point to free()d memory, if it still matches, the rcu_read_lock() + * ensures the memory stays valid. + */ + barrier(); + + return owner->on_cpu; +} + +/* + * Look out! "owner" is an entirely speculative pointer + * access and not reliable. + */ +static noinline +int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) +{ + rcu_read_lock(); + while (owner_running(lock, owner)) { + if (need_resched()) + break; + + arch_mutex_cpu_relax(); + } + rcu_read_unlock(); + + /* + * We break out the loop above on need_resched() and when the + * owner changed, which is a sign for heavy contention. Return + * success only when lock->owner is NULL. + */ + return lock->owner == NULL; +} + +/* + * Initial check for entering the mutex spinning loop + */ +static inline int mutex_can_spin_on_owner(struct mutex *lock) +{ + struct task_struct *owner; + int retval = 1; + + rcu_read_lock(); + owner = ACCESS_ONCE(lock->owner); + if (owner) + retval = owner->on_cpu; + rcu_read_unlock(); + /* + * if lock->owner is not set, the mutex owner may have just acquired + * it and not set the owner yet or the mutex has been released. + */ + return retval; +} +#endif + +static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); + +/** + * mutex_unlock - release the mutex + * @lock: the mutex to be released + * + * Unlock a mutex that has been locked by this task previously. + * + * This function must not be used in interrupt context. Unlocking + * of a not locked mutex is not allowed. + * + * This function is similar to (but not equivalent to) up(). + */ +void __sched mutex_unlock(struct mutex *lock) +{ + /* + * The unlocking fastpath is the 0->1 transition from 'locked' + * into 'unlocked' state: + */ +#ifndef CONFIG_DEBUG_MUTEXES + /* + * When debugging is enabled we must not clear the owner before time, + * the slow path will always be taken, and that clears the owner field + * after verifying that it was indeed current. + */ + mutex_clear_owner(lock); +#endif + __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath); +} + +EXPORT_SYMBOL(mutex_unlock); + +/** + * ww_mutex_unlock - release the w/w mutex + * @lock: the mutex to be released + * + * Unlock a mutex that has been locked by this task previously with any of the + * ww_mutex_lock* functions (with or without an acquire context). It is + * forbidden to release the locks after releasing the acquire context. + * + * This function must not be used in interrupt context. Unlocking + * of a unlocked mutex is not allowed. + */ +void __sched ww_mutex_unlock(struct ww_mutex *lock) +{ + /* + * The unlocking fastpath is the 0->1 transition from 'locked' + * into 'unlocked' state: + */ + if (lock->ctx) { +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); +#endif + if (lock->ctx->acquired > 0) + lock->ctx->acquired--; + lock->ctx = NULL; + } + +#ifndef CONFIG_DEBUG_MUTEXES + /* + * When debugging is enabled we must not clear the owner before time, + * the slow path will always be taken, and that clears the owner field + * after verifying that it was indeed current. + */ + mutex_clear_owner(&lock->base); +#endif + __mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath); +} +EXPORT_SYMBOL(ww_mutex_unlock); + +static inline int __sched +__mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) +{ + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); + + if (!hold_ctx) + return 0; + + if (unlikely(ctx == hold_ctx)) + return -EALREADY; + + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(ctx->contending_lock); + ctx->contending_lock = ww; +#endif + return -EDEADLK; + } + + return 0; +} + +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx) +{ +#ifdef CONFIG_DEBUG_MUTEXES + /* + * If this WARN_ON triggers, you used ww_mutex_lock to acquire, + * but released with a normal mutex_unlock in this call. + * + * This should never happen, always use ww_mutex_unlock. + */ + DEBUG_LOCKS_WARN_ON(ww->ctx); + + /* + * Not quite done after calling ww_acquire_done() ? + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); + + if (ww_ctx->contending_lock) { + /* + * After -EDEADLK you tried to + * acquire a different ww_mutex? Bad! + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); + + /* + * You called ww_mutex_lock after receiving -EDEADLK, + * but 'forgot' to unlock everything else first? + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); + ww_ctx->contending_lock = NULL; + } + + /* + * Naughty, using a different class will lead to undefined behavior! + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); +#endif + ww_ctx->acquired++; +} + +/* + * after acquiring lock with fastpath or when we lost out in contested + * slowpath, set ctx and wake up any waiters so they can recheck. + * + * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set, + * as the fastpath and opportunistic spinning are disabled in that case. + */ +static __always_inline void +ww_mutex_set_context_fastpath(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx) +{ + unsigned long flags; + struct mutex_waiter *cur; + + ww_mutex_lock_acquired(lock, ctx); + + lock->ctx = ctx; + + /* + * The lock->ctx update should be visible on all cores before + * the atomic read is done, otherwise contended waiters might be + * missed. The contended waiters will either see ww_ctx == NULL + * and keep spinning, or it will acquire wait_lock, add itself + * to waiter list and sleep. + */ + smp_mb(); /* ^^^ */ + + /* + * Check if lock is contended, if not there is nobody to wake up + */ + if (likely(atomic_read(&lock->base.count) == 0)) + return; + + /* + * Uh oh, we raced in fastpath, wake up everyone in this case, + * so they can see the new lock->ctx. + */ + spin_lock_mutex(&lock->base.wait_lock, flags); + list_for_each_entry(cur, &lock->base.wait_list, list) { + debug_mutex_wake_waiter(&lock->base, cur); + wake_up_process(cur->task); + } + spin_unlock_mutex(&lock->base.wait_lock, flags); +} + +/* + * Lock a mutex (possibly interruptible), slowpath: + */ +static __always_inline int __sched +__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, + struct lockdep_map *nest_lock, unsigned long ip, + struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) +{ + struct task_struct *task = current; + struct mutex_waiter waiter; + unsigned long flags; + int ret; + + preempt_disable(); + mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); + +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER + /* + * Optimistic spinning. + * + * We try to spin for acquisition when we find that there are no + * pending waiters and the lock owner is currently running on a + * (different) CPU. + * + * The rationale is that if the lock owner is running, it is likely to + * release the lock soon. + * + * Since this needs the lock owner, and this mutex implementation + * doesn't track the owner atomically in the lock field, we need to + * track it non-atomically. + * + * We can't do this for DEBUG_MUTEXES because that relies on wait_lock + * to serialize everything. + * + * The mutex spinners are queued up using MCS lock so that only one + * spinner can compete for the mutex. However, if mutex spinning isn't + * going to happen, there is no point in going through the lock/unlock + * overhead. + */ + if (!mutex_can_spin_on_owner(lock)) + goto slowpath; + + for (;;) { + struct task_struct *owner; + struct mspin_node node; + + if (use_ww_ctx && ww_ctx->acquired > 0) { + struct ww_mutex *ww; + + ww = container_of(lock, struct ww_mutex, base); + /* + * If ww->ctx is set the contents are undefined, only + * by acquiring wait_lock there is a guarantee that + * they are not invalid when reading. + * + * As such, when deadlock detection needs to be + * performed the optimistic spinning cannot be done. + */ + if (ACCESS_ONCE(ww->ctx)) + goto slowpath; + } + + /* + * If there's an owner, wait for it to either + * release the lock or go to sleep. + */ + mspin_lock(MLOCK(lock), &node); + owner = ACCESS_ONCE(lock->owner); + if (owner && !mutex_spin_on_owner(lock, owner)) { + mspin_unlock(MLOCK(lock), &node); + goto slowpath; + } + + if ((atomic_read(&lock->count) == 1) && + (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { + lock_acquired(&lock->dep_map, ip); + if (use_ww_ctx) { + struct ww_mutex *ww; + ww = container_of(lock, struct ww_mutex, base); + + ww_mutex_set_context_fastpath(ww, ww_ctx); + } + + mutex_set_owner(lock); + mspin_unlock(MLOCK(lock), &node); + preempt_enable(); + return 0; + } + mspin_unlock(MLOCK(lock), &node); + + /* + * When there's no owner, we might have preempted between the + * owner acquiring the lock and setting the owner field. If + * we're an RT task that will live-lock because we won't let + * the owner complete. + */ + if (!owner && (need_resched() || rt_task(task))) + goto slowpath; + + /* + * The cpu_relax() call is a compiler barrier which forces + * everything in this loop to be re-loaded. We don't need + * memory barriers as we'll eventually observe the right + * values at the cost of a few extra spins. + */ + arch_mutex_cpu_relax(); + } +slowpath: +#endif + spin_lock_mutex(&lock->wait_lock, flags); + + /* once more, can we acquire the lock? */ + if (MUTEX_SHOW_NO_WAITER(lock) && (atomic_xchg(&lock->count, 0) == 1)) + goto skip_wait; + + debug_mutex_lock_common(lock, &waiter); + debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); + + /* add waiting tasks to the end of the waitqueue (FIFO): */ + list_add_tail(&waiter.list, &lock->wait_list); + waiter.task = task; + + lock_contended(&lock->dep_map, ip); + + for (;;) { + /* + * Lets try to take the lock again - this is needed even if + * we get here for the first time (shortly after failing to + * acquire the lock), to make sure that we get a wakeup once + * it's unlocked. Later on, if we sleep, this is the + * operation that gives us the lock. We xchg it to -1, so + * that when we release the lock, we properly wake up the + * other waiters: + */ + if (MUTEX_SHOW_NO_WAITER(lock) && + (atomic_xchg(&lock->count, -1) == 1)) + break; + + /* + * got a signal? (This code gets eliminated in the + * TASK_UNINTERRUPTIBLE case.) + */ + if (unlikely(signal_pending_state(state, task))) { + ret = -EINTR; + goto err; + } + + if (use_ww_ctx && ww_ctx->acquired > 0) { + ret = __mutex_lock_check_stamp(lock, ww_ctx); + if (ret) + goto err; + } + + __set_task_state(task, state); + + /* didn't get the lock, go to sleep: */ + spin_unlock_mutex(&lock->wait_lock, flags); + schedule_preempt_disabled(); + spin_lock_mutex(&lock->wait_lock, flags); + } + mutex_remove_waiter(lock, &waiter, current_thread_info()); + /* set it to 0 if there are no waiters left: */ + if (likely(list_empty(&lock->wait_list))) + atomic_set(&lock->count, 0); + debug_mutex_free_waiter(&waiter); + +skip_wait: + /* got the lock - cleanup and rejoice! */ + lock_acquired(&lock->dep_map, ip); + mutex_set_owner(lock); + + if (use_ww_ctx) { + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); + struct mutex_waiter *cur; + + /* + * This branch gets optimized out for the common case, + * and is only important for ww_mutex_lock. + */ + ww_mutex_lock_acquired(ww, ww_ctx); + ww->ctx = ww_ctx; + + /* + * Give any possible sleeping processes the chance to wake up, + * so they can recheck if they have to back off. + */ + list_for_each_entry(cur, &lock->wait_list, list) { + debug_mutex_wake_waiter(lock, cur); + wake_up_process(cur->task); + } + } + + spin_unlock_mutex(&lock->wait_lock, flags); + preempt_enable(); + return 0; + +err: + mutex_remove_waiter(lock, &waiter, task_thread_info(task)); + spin_unlock_mutex(&lock->wait_lock, flags); + debug_mutex_free_waiter(&waiter); + mutex_release(&lock->dep_map, 1, ip); + preempt_enable(); + return ret; +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +void __sched +mutex_lock_nested(struct mutex *lock, unsigned int subclass) +{ + might_sleep(); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, + subclass, NULL, _RET_IP_, NULL, 0); +} + +EXPORT_SYMBOL_GPL(mutex_lock_nested); + +void __sched +_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) +{ + might_sleep(); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, + 0, nest, _RET_IP_, NULL, 0); +} + +EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); + +int __sched +mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) +{ + might_sleep(); + return __mutex_lock_common(lock, TASK_KILLABLE, + subclass, NULL, _RET_IP_, NULL, 0); +} +EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); + +int __sched +mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) +{ + might_sleep(); + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, + subclass, NULL, _RET_IP_, NULL, 0); +} + +EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); + +static inline int +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH + unsigned tmp; + + if (ctx->deadlock_inject_countdown-- == 0) { + tmp = ctx->deadlock_inject_interval; + if (tmp > UINT_MAX/4) + tmp = UINT_MAX; + else + tmp = tmp*2 + tmp + tmp/2; + + ctx->deadlock_inject_interval = tmp; + ctx->deadlock_inject_countdown = tmp; + ctx->contending_lock = lock; + + ww_mutex_unlock(lock); + + return -EDEADLK; + } +#endif + + return 0; +} + +int __sched +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ + int ret; + + might_sleep(); + ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, + 0, &ctx->dep_map, _RET_IP_, ctx, 1); + if (!ret && ctx->acquired > 1) + return ww_mutex_deadlock_injection(lock, ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(__ww_mutex_lock); + +int __sched +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ + int ret; + + might_sleep(); + ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, + 0, &ctx->dep_map, _RET_IP_, ctx, 1); + + if (!ret && ctx->acquired > 1) + return ww_mutex_deadlock_injection(lock, ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); + +#endif + +/* + * Release the lock, slowpath: + */ +static inline void +__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) +{ + struct mutex *lock = container_of(lock_count, struct mutex, count); + unsigned long flags; + + spin_lock_mutex(&lock->wait_lock, flags); + mutex_release(&lock->dep_map, nested, _RET_IP_); + debug_mutex_unlock(lock); + + /* + * some architectures leave the lock unlocked in the fastpath failure + * case, others need to leave it locked. In the later case we have to + * unlock it here + */ + if (__mutex_slowpath_needs_to_unlock()) + atomic_set(&lock->count, 1); + + if (!list_empty(&lock->wait_list)) { + /* get the first entry from the wait-list: */ + struct mutex_waiter *waiter = + list_entry(lock->wait_list.next, + struct mutex_waiter, list); + + debug_mutex_wake_waiter(lock, waiter); + + wake_up_process(waiter->task); + } + + spin_unlock_mutex(&lock->wait_lock, flags); +} + +/* + * Release the lock, slowpath: + */ +static __used noinline void +__mutex_unlock_slowpath(atomic_t *lock_count) +{ + __mutex_unlock_common_slowpath(lock_count, 1); +} + +#ifndef CONFIG_DEBUG_LOCK_ALLOC +/* + * Here come the less common (and hence less performance-critical) APIs: + * mutex_lock_interruptible() and mutex_trylock(). + */ +static noinline int __sched +__mutex_lock_killable_slowpath(struct mutex *lock); + +static noinline int __sched +__mutex_lock_interruptible_slowpath(struct mutex *lock); + +/** + * mutex_lock_interruptible - acquire the mutex, interruptible + * @lock: the mutex to be acquired + * + * Lock the mutex like mutex_lock(), and return 0 if the mutex has + * been acquired or sleep until the mutex becomes available. If a + * signal arrives while waiting for the lock then this function + * returns -EINTR. + * + * This function is similar to (but not equivalent to) down_interruptible(). + */ +int __sched mutex_lock_interruptible(struct mutex *lock) +{ + int ret; + + might_sleep(); + ret = __mutex_fastpath_lock_retval(&lock->count); + if (likely(!ret)) { + mutex_set_owner(lock); + return 0; + } else + return __mutex_lock_interruptible_slowpath(lock); +} + +EXPORT_SYMBOL(mutex_lock_interruptible); + +int __sched mutex_lock_killable(struct mutex *lock) +{ + int ret; + + might_sleep(); + ret = __mutex_fastpath_lock_retval(&lock->count); + if (likely(!ret)) { + mutex_set_owner(lock); + return 0; + } else + return __mutex_lock_killable_slowpath(lock); +} +EXPORT_SYMBOL(mutex_lock_killable); + +static __used noinline void __sched +__mutex_lock_slowpath(atomic_t *lock_count) +{ + struct mutex *lock = container_of(lock_count, struct mutex, count); + + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, + NULL, _RET_IP_, NULL, 0); +} + +static noinline int __sched +__mutex_lock_killable_slowpath(struct mutex *lock) +{ + return __mutex_lock_common(lock, TASK_KILLABLE, 0, + NULL, _RET_IP_, NULL, 0); +} + +static noinline int __sched +__mutex_lock_interruptible_slowpath(struct mutex *lock) +{ + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, + NULL, _RET_IP_, NULL, 0); +} + +static noinline int __sched +__ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ + return __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, 0, + NULL, _RET_IP_, ctx, 1); +} + +static noinline int __sched +__ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx) +{ + return __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 0, + NULL, _RET_IP_, ctx, 1); +} + +#endif + +/* + * Spinlock based trylock, we take the spinlock and check whether we + * can get the lock: + */ +static inline int __mutex_trylock_slowpath(atomic_t *lock_count) +{ + struct mutex *lock = container_of(lock_count, struct mutex, count); + unsigned long flags; + int prev; + + spin_lock_mutex(&lock->wait_lock, flags); + + prev = atomic_xchg(&lock->count, -1); + if (likely(prev == 1)) { + mutex_set_owner(lock); + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); + } + + /* Set it back to 0 if there are no waiters: */ + if (likely(list_empty(&lock->wait_list))) + atomic_set(&lock->count, 0); + + spin_unlock_mutex(&lock->wait_lock, flags); + + return prev == 1; +} + +/** + * mutex_trylock - try to acquire the mutex, without waiting + * @lock: the mutex to be acquired + * + * Try to acquire the mutex atomically. Returns 1 if the mutex + * has been acquired successfully, and 0 on contention. + * + * NOTE: this function follows the spin_trylock() convention, so + * it is negated from the down_trylock() return values! Be careful + * about this when converting semaphore users to mutexes. + * + * This function must not be used in interrupt context. The + * mutex must be released by the same task that acquired it. + */ +int __sched mutex_trylock(struct mutex *lock) +{ + int ret; + + ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath); + if (ret) + mutex_set_owner(lock); + + return ret; +} +EXPORT_SYMBOL(mutex_trylock); + +#ifndef CONFIG_DEBUG_LOCK_ALLOC +int __sched +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ + int ret; + + might_sleep(); + + ret = __mutex_fastpath_lock_retval(&lock->base.count); + + if (likely(!ret)) { + ww_mutex_set_context_fastpath(lock, ctx); + mutex_set_owner(&lock->base); + } else + ret = __ww_mutex_lock_slowpath(lock, ctx); + return ret; +} +EXPORT_SYMBOL(__ww_mutex_lock); + +int __sched +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ + int ret; + + might_sleep(); + + ret = __mutex_fastpath_lock_retval(&lock->base.count); + + if (likely(!ret)) { + ww_mutex_set_context_fastpath(lock, ctx); + mutex_set_owner(&lock->base); + } else + ret = __ww_mutex_lock_interruptible_slowpath(lock, ctx); + return ret; +} +EXPORT_SYMBOL(__ww_mutex_lock_interruptible); + +#endif + +/** + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 + * @cnt: the atomic which we are to dec + * @lock: the mutex to return holding if we dec to 0 + * + * return true and hold lock if we dec to 0, return false otherwise + */ +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) +{ + /* dec if we can't possibly hit 0 */ + if (atomic_add_unless(cnt, -1, 1)) + return 0; + /* we might hit 0, so take the lock */ + mutex_lock(lock); + if (!atomic_dec_and_test(cnt)) { + /* when we actually did the dec, we didn't hit 0 */ + mutex_unlock(lock); + return 0; + } + /* we hit 0, and we hold the lock */ + return 1; +} +EXPORT_SYMBOL(atomic_dec_and_mutex_lock); diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h new file mode 100644 index 0000000..4115fbf --- /dev/null +++ b/kernel/locking/mutex.h @@ -0,0 +1,48 @@ +/* + * Mutexes: blocking mutual exclusion locks + * + * started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar + * + * This file contains mutex debugging related internal prototypes, for the + * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: + */ + +#define spin_lock_mutex(lock, flags) \ + do { spin_lock(lock); (void)(flags); } while (0) +#define spin_unlock_mutex(lock, flags) \ + do { spin_unlock(lock); (void)(flags); } while (0) +#define mutex_remove_waiter(lock, waiter, ti) \ + __list_del((waiter)->list.prev, (waiter)->list.next) + +#ifdef CONFIG_SMP +static inline void mutex_set_owner(struct mutex *lock) +{ + lock->owner = current; +} + +static inline void mutex_clear_owner(struct mutex *lock) +{ + lock->owner = NULL; +} +#else +static inline void mutex_set_owner(struct mutex *lock) +{ +} + +static inline void mutex_clear_owner(struct mutex *lock) +{ +} +#endif + +#define debug_mutex_wake_waiter(lock, waiter) do { } while (0) +#define debug_mutex_free_waiter(waiter) do { } while (0) +#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) +#define debug_mutex_unlock(lock) do { } while (0) +#define debug_mutex_init(lock, name, key) do { } while (0) + +static inline void +debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) +{ +} diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c deleted file mode 100644 index 7e3443f..0000000 --- a/kernel/mutex-debug.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * kernel/mutex-debug.c - * - * Debugging code for mutexes - * - * Started by Ingo Molnar: - * - * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar - * - * lock debugging, locking tree, deadlock detection started by: - * - * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey - * Released under the General Public License (GPL). - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "mutex-debug.h" - -/* - * Must be called with lock->wait_lock held. - */ -void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) -{ - memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); - waiter->magic = waiter; - INIT_LIST_HEAD(&waiter->list); -} - -void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) -{ - SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); - DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list)); - DEBUG_LOCKS_WARN_ON(waiter->magic != waiter); - DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); -} - -void debug_mutex_free_waiter(struct mutex_waiter *waiter) -{ - DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list)); - memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter)); -} - -void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti) -{ - SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); - - /* Mark the current thread as blocked on the lock: */ - ti->task->blocked_on = waiter; -} - -void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti) -{ - DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); - DEBUG_LOCKS_WARN_ON(waiter->task != ti->task); - DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter); - ti->task->blocked_on = NULL; - - list_del_init(&waiter->list); - waiter->task = NULL; -} - -void debug_mutex_unlock(struct mutex *lock) -{ - if (unlikely(!debug_locks)) - return; - - DEBUG_LOCKS_WARN_ON(lock->magic != lock); - DEBUG_LOCKS_WARN_ON(lock->owner != current); - DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); - mutex_clear_owner(lock); -} - -void debug_mutex_init(struct mutex *lock, const char *name, - struct lock_class_key *key) -{ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* - * Make sure we are not reinitializing a held lock: - */ - debug_check_no_locks_freed((void *)lock, sizeof(*lock)); - lockdep_init_map(&lock->dep_map, name, key, 0); -#endif - lock->magic = lock; -} - -/*** - * mutex_destroy - mark a mutex unusable - * @lock: the mutex to be destroyed - * - * This function marks the mutex uninitialized, and any subsequent - * use of the mutex is forbidden. The mutex must not be locked when - * this function is called. - */ -void mutex_destroy(struct mutex *lock) -{ - DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock)); - lock->magic = NULL; -} - -EXPORT_SYMBOL_GPL(mutex_destroy); diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h deleted file mode 100644 index 0799fd3..0000000 --- a/kernel/mutex-debug.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Mutexes: blocking mutual exclusion locks - * - * started by Ingo Molnar: - * - * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar - * - * This file contains mutex debugging related internal declarations, - * prototypes and inline functions, for the CONFIG_DEBUG_MUTEXES case. - * More details are in kernel/mutex-debug.c. - */ - -/* - * This must be called with lock->wait_lock held. - */ -extern void debug_mutex_lock_common(struct mutex *lock, - struct mutex_waiter *waiter); -extern void debug_mutex_wake_waiter(struct mutex *lock, - struct mutex_waiter *waiter); -extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); -extern void debug_mutex_add_waiter(struct mutex *lock, - struct mutex_waiter *waiter, - struct thread_info *ti); -extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti); -extern void debug_mutex_unlock(struct mutex *lock); -extern void debug_mutex_init(struct mutex *lock, const char *name, - struct lock_class_key *key); - -static inline void mutex_set_owner(struct mutex *lock) -{ - lock->owner = current; -} - -static inline void mutex_clear_owner(struct mutex *lock) -{ - lock->owner = NULL; -} - -#define spin_lock_mutex(lock, flags) \ - do { \ - struct mutex *l = container_of(lock, struct mutex, wait_lock); \ - \ - DEBUG_LOCKS_WARN_ON(in_interrupt()); \ - local_irq_save(flags); \ - arch_spin_lock(&(lock)->rlock.raw_lock);\ - DEBUG_LOCKS_WARN_ON(l->magic != l); \ - } while (0) - -#define spin_unlock_mutex(lock, flags) \ - do { \ - arch_spin_unlock(&(lock)->rlock.raw_lock); \ - local_irq_restore(flags); \ - preempt_check_resched(); \ - } while (0) diff --git a/kernel/mutex.c b/kernel/mutex.c deleted file mode 100644 index d24105b..0000000 --- a/kernel/mutex.c +++ /dev/null @@ -1,960 +0,0 @@ -/* - * kernel/mutex.c - * - * Mutexes: blocking mutual exclusion locks - * - * Started by Ingo Molnar: - * - * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar - * - * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and - * David Howells for suggestions and improvements. - * - * - Adaptive spinning for mutexes by Peter Zijlstra. (Ported to mainline - * from the -rt tree, where it was originally implemented for rtmutexes - * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale - * and Sven Dietrich. - * - * Also see Documentation/mutex-design.txt. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * In the DEBUG case we are using the "NULL fastpath" for mutexes, - * which forces all calls into the slowpath: - */ -#ifdef CONFIG_DEBUG_MUTEXES -# include "mutex-debug.h" -# include -#else -# include "mutex.h" -# include -#endif - -/* - * A negative mutex count indicates that waiters are sleeping waiting for the - * mutex. - */ -#define MUTEX_SHOW_NO_WAITER(mutex) (atomic_read(&(mutex)->count) >= 0) - -void -__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) -{ - atomic_set(&lock->count, 1); - spin_lock_init(&lock->wait_lock); - INIT_LIST_HEAD(&lock->wait_list); - mutex_clear_owner(lock); -#ifdef CONFIG_MUTEX_SPIN_ON_OWNER - lock->spin_mlock = NULL; -#endif - - debug_mutex_init(lock, name, key); -} - -EXPORT_SYMBOL(__mutex_init); - -#ifndef CONFIG_DEBUG_LOCK_ALLOC -/* - * We split the mutex lock/unlock logic into separate fastpath and - * slowpath functions, to reduce the register pressure on the fastpath. - * We also put the fastpath first in the kernel image, to make sure the - * branch is predicted by the CPU as default-untaken. - */ -static __used noinline void __sched -__mutex_lock_slowpath(atomic_t *lock_count); - -/** - * mutex_lock - acquire the mutex - * @lock: the mutex to be acquired - * - * Lock the mutex exclusively for this task. If the mutex is not - * available right now, it will sleep until it can get it. - * - * The mutex must later on be released by the same task that - * acquired it. Recursive locking is not allowed. The task - * may not exit without first unlocking the mutex. Also, kernel - * memory where the mutex resides mutex must not be freed with - * the mutex still locked. The mutex must first be initialized - * (or statically defined) before it can be locked. memset()-ing - * the mutex to 0 is not allowed. - * - * ( The CONFIG_DEBUG_MUTEXES .config option turns on debugging - * checks that will enforce the restrictions and will also do - * deadlock debugging. ) - * - * This function is similar to (but not equivalent to) down(). - */ -void __sched mutex_lock(struct mutex *lock) -{ - might_sleep(); - /* - * The locking fastpath is the 1->0 transition from - * 'unlocked' into 'locked' state. - */ - __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); - mutex_set_owner(lock); -} - -EXPORT_SYMBOL(mutex_lock); -#endif - -#ifdef CONFIG_MUTEX_SPIN_ON_OWNER -/* - * In order to avoid a stampede of mutex spinners from acquiring the mutex - * more or less simultaneously, the spinners need to acquire a MCS lock - * first before spinning on the owner field. - * - * We don't inline mspin_lock() so that perf can correctly account for the - * time spent in this lock function. - */ -struct mspin_node { - struct mspin_node *next ; - int locked; /* 1 if lock acquired */ -}; -#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) - -static noinline -void mspin_lock(struct mspin_node **lock, struct mspin_node *node) -{ - struct mspin_node *prev; - - /* Init node */ - node->locked = 0; - node->next = NULL; - - prev = xchg(lock, node); - if (likely(prev == NULL)) { - /* Lock acquired */ - node->locked = 1; - return; - } - ACCESS_ONCE(prev->next) = node; - smp_wmb(); - /* Wait until the lock holder passes the lock down */ - while (!ACCESS_ONCE(node->locked)) - arch_mutex_cpu_relax(); -} - -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) -{ - struct mspin_node *next = ACCESS_ONCE(node->next); - - if (likely(!next)) { - /* - * Release the lock by setting it to NULL - */ - if (cmpxchg(lock, node, NULL) == node) - return; - /* Wait until the next pointer is set */ - while (!(next = ACCESS_ONCE(node->next))) - arch_mutex_cpu_relax(); - } - ACCESS_ONCE(next->locked) = 1; - smp_wmb(); -} - -/* - * Mutex spinning code migrated from kernel/sched/core.c - */ - -static inline bool owner_running(struct mutex *lock, struct task_struct *owner) -{ - if (lock->owner != owner) - return false; - - /* - * Ensure we emit the owner->on_cpu, dereference _after_ checking - * lock->owner still matches owner, if that fails, owner might - * point to free()d memory, if it still matches, the rcu_read_lock() - * ensures the memory stays valid. - */ - barrier(); - - return owner->on_cpu; -} - -/* - * Look out! "owner" is an entirely speculative pointer - * access and not reliable. - */ -static noinline -int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) -{ - rcu_read_lock(); - while (owner_running(lock, owner)) { - if (need_resched()) - break; - - arch_mutex_cpu_relax(); - } - rcu_read_unlock(); - - /* - * We break out the loop above on need_resched() and when the - * owner changed, which is a sign for heavy contention. Return - * success only when lock->owner is NULL. - */ - return lock->owner == NULL; -} - -/* - * Initial check for entering the mutex spinning loop - */ -static inline int mutex_can_spin_on_owner(struct mutex *lock) -{ - struct task_struct *owner; - int retval = 1; - - rcu_read_lock(); - owner = ACCESS_ONCE(lock->owner); - if (owner) - retval = owner->on_cpu; - rcu_read_unlock(); - /* - * if lock->owner is not set, the mutex owner may have just acquired - * it and not set the owner yet or the mutex has been released. - */ - return retval; -} -#endif - -static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); - -/** - * mutex_unlock - release the mutex - * @lock: the mutex to be released - * - * Unlock a mutex that has been locked by this task previously. - * - * This function must not be used in interrupt context. Unlocking - * of a not locked mutex is not allowed. - * - * This function is similar to (but not equivalent to) up(). - */ -void __sched mutex_unlock(struct mutex *lock) -{ - /* - * The unlocking fastpath is the 0->1 transition from 'locked' - * into 'unlocked' state: - */ -#ifndef CONFIG_DEBUG_MUTEXES - /* - * When debugging is enabled we must not clear the owner before time, - * the slow path will always be taken, and that clears the owner field - * after verifying that it was indeed current. - */ - mutex_clear_owner(lock); -#endif - __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath); -} - -EXPORT_SYMBOL(mutex_unlock); - -/** - * ww_mutex_unlock - release the w/w mutex - * @lock: the mutex to be released - * - * Unlock a mutex that has been locked by this task previously with any of the - * ww_mutex_lock* functions (with or without an acquire context). It is - * forbidden to release the locks after releasing the acquire context. - * - * This function must not be used in interrupt context. Unlocking - * of a unlocked mutex is not allowed. - */ -void __sched ww_mutex_unlock(struct ww_mutex *lock) -{ - /* - * The unlocking fastpath is the 0->1 transition from 'locked' - * into 'unlocked' state: - */ - if (lock->ctx) { -#ifdef CONFIG_DEBUG_MUTEXES - DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); -#endif - if (lock->ctx->acquired > 0) - lock->ctx->acquired--; - lock->ctx = NULL; - } - -#ifndef CONFIG_DEBUG_MUTEXES - /* - * When debugging is enabled we must not clear the owner before time, - * the slow path will always be taken, and that clears the owner field - * after verifying that it was indeed current. - */ - mutex_clear_owner(&lock->base); -#endif - __mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath); -} -EXPORT_SYMBOL(ww_mutex_unlock); - -static inline int __sched -__mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) -{ - struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); - struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); - - if (!hold_ctx) - return 0; - - if (unlikely(ctx == hold_ctx)) - return -EALREADY; - - if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && - (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { -#ifdef CONFIG_DEBUG_MUTEXES - DEBUG_LOCKS_WARN_ON(ctx->contending_lock); - ctx->contending_lock = ww; -#endif - return -EDEADLK; - } - - return 0; -} - -static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, - struct ww_acquire_ctx *ww_ctx) -{ -#ifdef CONFIG_DEBUG_MUTEXES - /* - * If this WARN_ON triggers, you used ww_mutex_lock to acquire, - * but released with a normal mutex_unlock in this call. - * - * This should never happen, always use ww_mutex_unlock. - */ - DEBUG_LOCKS_WARN_ON(ww->ctx); - - /* - * Not quite done after calling ww_acquire_done() ? - */ - DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); - - if (ww_ctx->contending_lock) { - /* - * After -EDEADLK you tried to - * acquire a different ww_mutex? Bad! - */ - DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); - - /* - * You called ww_mutex_lock after receiving -EDEADLK, - * but 'forgot' to unlock everything else first? - */ - DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); - ww_ctx->contending_lock = NULL; - } - - /* - * Naughty, using a different class will lead to undefined behavior! - */ - DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); -#endif - ww_ctx->acquired++; -} - -/* - * after acquiring lock with fastpath or when we lost out in contested - * slowpath, set ctx and wake up any waiters so they can recheck. - * - * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set, - * as the fastpath and opportunistic spinning are disabled in that case. - */ -static __always_inline void -ww_mutex_set_context_fastpath(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx) -{ - unsigned long flags; - struct mutex_waiter *cur; - - ww_mutex_lock_acquired(lock, ctx); - - lock->ctx = ctx; - - /* - * The lock->ctx update should be visible on all cores before - * the atomic read is done, otherwise contended waiters might be - * missed. The contended waiters will either see ww_ctx == NULL - * and keep spinning, or it will acquire wait_lock, add itself - * to waiter list and sleep. - */ - smp_mb(); /* ^^^ */ - - /* - * Check if lock is contended, if not there is nobody to wake up - */ - if (likely(atomic_read(&lock->base.count) == 0)) - return; - - /* - * Uh oh, we raced in fastpath, wake up everyone in this case, - * so they can see the new lock->ctx. - */ - spin_lock_mutex(&lock->base.wait_lock, flags); - list_for_each_entry(cur, &lock->base.wait_list, list) { - debug_mutex_wake_waiter(&lock->base, cur); - wake_up_process(cur->task); - } - spin_unlock_mutex(&lock->base.wait_lock, flags); -} - -/* - * Lock a mutex (possibly interruptible), slowpath: - */ -static __always_inline int __sched -__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, - struct lockdep_map *nest_lock, unsigned long ip, - struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) -{ - struct task_struct *task = current; - struct mutex_waiter waiter; - unsigned long flags; - int ret; - - preempt_disable(); - mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); - -#ifdef CONFIG_MUTEX_SPIN_ON_OWNER - /* - * Optimistic spinning. - * - * We try to spin for acquisition when we find that there are no - * pending waiters and the lock owner is currently running on a - * (different) CPU. - * - * The rationale is that if the lock owner is running, it is likely to - * release the lock soon. - * - * Since this needs the lock owner, and this mutex implementation - * doesn't track the owner atomically in the lock field, we need to - * track it non-atomically. - * - * We can't do this for DEBUG_MUTEXES because that relies on wait_lock - * to serialize everything. - * - * The mutex spinners are queued up using MCS lock so that only one - * spinner can compete for the mutex. However, if mutex spinning isn't - * going to happen, there is no point in going through the lock/unlock - * overhead. - */ - if (!mutex_can_spin_on_owner(lock)) - goto slowpath; - - for (;;) { - struct task_struct *owner; - struct mspin_node node; - - if (use_ww_ctx && ww_ctx->acquired > 0) { - struct ww_mutex *ww; - - ww = container_of(lock, struct ww_mutex, base); - /* - * If ww->ctx is set the contents are undefined, only - * by acquiring wait_lock there is a guarantee that - * they are not invalid when reading. - * - * As such, when deadlock detection needs to be - * performed the optimistic spinning cannot be done. - */ - if (ACCESS_ONCE(ww->ctx)) - goto slowpath; - } - - /* - * If there's an owner, wait for it to either - * release the lock or go to sleep. - */ - mspin_lock(MLOCK(lock), &node); - owner = ACCESS_ONCE(lock->owner); - if (owner && !mutex_spin_on_owner(lock, owner)) { - mspin_unlock(MLOCK(lock), &node); - goto slowpath; - } - - if ((atomic_read(&lock->count) == 1) && - (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { - lock_acquired(&lock->dep_map, ip); - if (use_ww_ctx) { - struct ww_mutex *ww; - ww = container_of(lock, struct ww_mutex, base); - - ww_mutex_set_context_fastpath(ww, ww_ctx); - } - - mutex_set_owner(lock); - mspin_unlock(MLOCK(lock), &node); - preempt_enable(); - return 0; - } - mspin_unlock(MLOCK(lock), &node); - - /* - * When there's no owner, we might have preempted between the - * owner acquiring the lock and setting the owner field. If - * we're an RT task that will live-lock because we won't let - * the owner complete. - */ - if (!owner && (need_resched() || rt_task(task))) - goto slowpath; - - /* - * The cpu_relax() call is a compiler barrier which forces - * everything in this loop to be re-loaded. We don't need - * memory barriers as we'll eventually observe the right - * values at the cost of a few extra spins. - */ - arch_mutex_cpu_relax(); - } -slowpath: -#endif - spin_lock_mutex(&lock->wait_lock, flags); - - /* once more, can we acquire the lock? */ - if (MUTEX_SHOW_NO_WAITER(lock) && (atomic_xchg(&lock->count, 0) == 1)) - goto skip_wait; - - debug_mutex_lock_common(lock, &waiter); - debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); - - /* add waiting tasks to the end of the waitqueue (FIFO): */ - list_add_tail(&waiter.list, &lock->wait_list); - waiter.task = task; - - lock_contended(&lock->dep_map, ip); - - for (;;) { - /* - * Lets try to take the lock again - this is needed even if - * we get here for the first time (shortly after failing to - * acquire the lock), to make sure that we get a wakeup once - * it's unlocked. Later on, if we sleep, this is the - * operation that gives us the lock. We xchg it to -1, so - * that when we release the lock, we properly wake up the - * other waiters: - */ - if (MUTEX_SHOW_NO_WAITER(lock) && - (atomic_xchg(&lock->count, -1) == 1)) - break; - - /* - * got a signal? (This code gets eliminated in the - * TASK_UNINTERRUPTIBLE case.) - */ - if (unlikely(signal_pending_state(state, task))) { - ret = -EINTR; - goto err; - } - - if (use_ww_ctx && ww_ctx->acquired > 0) { - ret = __mutex_lock_check_stamp(lock, ww_ctx); - if (ret) - goto err; - } - - __set_task_state(task, state); - - /* didn't get the lock, go to sleep: */ - spin_unlock_mutex(&lock->wait_lock, flags); - schedule_preempt_disabled(); - spin_lock_mutex(&lock->wait_lock, flags); - } - mutex_remove_waiter(lock, &waiter, current_thread_info()); - /* set it to 0 if there are no waiters left: */ - if (likely(list_empty(&lock->wait_list))) - atomic_set(&lock->count, 0); - debug_mutex_free_waiter(&waiter); - -skip_wait: - /* got the lock - cleanup and rejoice! */ - lock_acquired(&lock->dep_map, ip); - mutex_set_owner(lock); - - if (use_ww_ctx) { - struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); - struct mutex_waiter *cur; - - /* - * This branch gets optimized out for the common case, - * and is only important for ww_mutex_lock. - */ - ww_mutex_lock_acquired(ww, ww_ctx); - ww->ctx = ww_ctx; - - /* - * Give any possible sleeping processes the chance to wake up, - * so they can recheck if they have to back off. - */ - list_for_each_entry(cur, &lock->wait_list, list) { - debug_mutex_wake_waiter(lock, cur); - wake_up_process(cur->task); - } - } - - spin_unlock_mutex(&lock->wait_lock, flags); - preempt_enable(); - return 0; - -err: - mutex_remove_waiter(lock, &waiter, task_thread_info(task)); - spin_unlock_mutex(&lock->wait_lock, flags); - debug_mutex_free_waiter(&waiter); - mutex_release(&lock->dep_map, 1, ip); - preempt_enable(); - return ret; -} - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -void __sched -mutex_lock_nested(struct mutex *lock, unsigned int subclass) -{ - might_sleep(); - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, - subclass, NULL, _RET_IP_, NULL, 0); -} - -EXPORT_SYMBOL_GPL(mutex_lock_nested); - -void __sched -_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) -{ - might_sleep(); - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, - 0, nest, _RET_IP_, NULL, 0); -} - -EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); - -int __sched -mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) -{ - might_sleep(); - return __mutex_lock_common(lock, TASK_KILLABLE, - subclass, NULL, _RET_IP_, NULL, 0); -} -EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); - -int __sched -mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) -{ - might_sleep(); - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, - subclass, NULL, _RET_IP_, NULL, 0); -} - -EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); - -static inline int -ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -{ -#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH - unsigned tmp; - - if (ctx->deadlock_inject_countdown-- == 0) { - tmp = ctx->deadlock_inject_interval; - if (tmp > UINT_MAX/4) - tmp = UINT_MAX; - else - tmp = tmp*2 + tmp + tmp/2; - - ctx->deadlock_inject_interval = tmp; - ctx->deadlock_inject_countdown = tmp; - ctx->contending_lock = lock; - - ww_mutex_unlock(lock); - - return -EDEADLK; - } -#endif - - return 0; -} - -int __sched -__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -{ - int ret; - - might_sleep(); - ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, - 0, &ctx->dep_map, _RET_IP_, ctx, 1); - if (!ret && ctx->acquired > 1) - return ww_mutex_deadlock_injection(lock, ctx); - - return ret; -} -EXPORT_SYMBOL_GPL(__ww_mutex_lock); - -int __sched -__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -{ - int ret; - - might_sleep(); - ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, - 0, &ctx->dep_map, _RET_IP_, ctx, 1); - - if (!ret && ctx->acquired > 1) - return ww_mutex_deadlock_injection(lock, ctx); - - return ret; -} -EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); - -#endif - -/* - * Release the lock, slowpath: - */ -static inline void -__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) -{ - struct mutex *lock = container_of(lock_count, struct mutex, count); - unsigned long flags; - - spin_lock_mutex(&lock->wait_lock, flags); - mutex_release(&lock->dep_map, nested, _RET_IP_); - debug_mutex_unlock(lock); - - /* - * some architectures leave the lock unlocked in the fastpath failure - * case, others need to leave it locked. In the later case we have to - * unlock it here - */ - if (__mutex_slowpath_needs_to_unlock()) - atomic_set(&lock->count, 1); - - if (!list_empty(&lock->wait_list)) { - /* get the first entry from the wait-list: */ - struct mutex_waiter *waiter = - list_entry(lock->wait_list.next, - struct mutex_waiter, list); - - debug_mutex_wake_waiter(lock, waiter); - - wake_up_process(waiter->task); - } - - spin_unlock_mutex(&lock->wait_lock, flags); -} - -/* - * Release the lock, slowpath: - */ -static __used noinline void -__mutex_unlock_slowpath(atomic_t *lock_count) -{ - __mutex_unlock_common_slowpath(lock_count, 1); -} - -#ifndef CONFIG_DEBUG_LOCK_ALLOC -/* - * Here come the less common (and hence less performance-critical) APIs: - * mutex_lock_interruptible() and mutex_trylock(). - */ -static noinline int __sched -__mutex_lock_killable_slowpath(struct mutex *lock); - -static noinline int __sched -__mutex_lock_interruptible_slowpath(struct mutex *lock); - -/** - * mutex_lock_interruptible - acquire the mutex, interruptible - * @lock: the mutex to be acquired - * - * Lock the mutex like mutex_lock(), and return 0 if the mutex has - * been acquired or sleep until the mutex becomes available. If a - * signal arrives while waiting for the lock then this function - * returns -EINTR. - * - * This function is similar to (but not equivalent to) down_interruptible(). - */ -int __sched mutex_lock_interruptible(struct mutex *lock) -{ - int ret; - - might_sleep(); - ret = __mutex_fastpath_lock_retval(&lock->count); - if (likely(!ret)) { - mutex_set_owner(lock); - return 0; - } else - return __mutex_lock_interruptible_slowpath(lock); -} - -EXPORT_SYMBOL(mutex_lock_interruptible); - -int __sched mutex_lock_killable(struct mutex *lock) -{ - int ret; - - might_sleep(); - ret = __mutex_fastpath_lock_retval(&lock->count); - if (likely(!ret)) { - mutex_set_owner(lock); - return 0; - } else - return __mutex_lock_killable_slowpath(lock); -} -EXPORT_SYMBOL(mutex_lock_killable); - -static __used noinline void __sched -__mutex_lock_slowpath(atomic_t *lock_count) -{ - struct mutex *lock = container_of(lock_count, struct mutex, count); - - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, - NULL, _RET_IP_, NULL, 0); -} - -static noinline int __sched -__mutex_lock_killable_slowpath(struct mutex *lock) -{ - return __mutex_lock_common(lock, TASK_KILLABLE, 0, - NULL, _RET_IP_, NULL, 0); -} - -static noinline int __sched -__mutex_lock_interruptible_slowpath(struct mutex *lock) -{ - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, - NULL, _RET_IP_, NULL, 0); -} - -static noinline int __sched -__ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -{ - return __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, 0, - NULL, _RET_IP_, ctx, 1); -} - -static noinline int __sched -__ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx) -{ - return __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 0, - NULL, _RET_IP_, ctx, 1); -} - -#endif - -/* - * Spinlock based trylock, we take the spinlock and check whether we - * can get the lock: - */ -static inline int __mutex_trylock_slowpath(atomic_t *lock_count) -{ - struct mutex *lock = container_of(lock_count, struct mutex, count); - unsigned long flags; - int prev; - - spin_lock_mutex(&lock->wait_lock, flags); - - prev = atomic_xchg(&lock->count, -1); - if (likely(prev == 1)) { - mutex_set_owner(lock); - mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); - } - - /* Set it back to 0 if there are no waiters: */ - if (likely(list_empty(&lock->wait_list))) - atomic_set(&lock->count, 0); - - spin_unlock_mutex(&lock->wait_lock, flags); - - return prev == 1; -} - -/** - * mutex_trylock - try to acquire the mutex, without waiting - * @lock: the mutex to be acquired - * - * Try to acquire the mutex atomically. Returns 1 if the mutex - * has been acquired successfully, and 0 on contention. - * - * NOTE: this function follows the spin_trylock() convention, so - * it is negated from the down_trylock() return values! Be careful - * about this when converting semaphore users to mutexes. - * - * This function must not be used in interrupt context. The - * mutex must be released by the same task that acquired it. - */ -int __sched mutex_trylock(struct mutex *lock) -{ - int ret; - - ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath); - if (ret) - mutex_set_owner(lock); - - return ret; -} -EXPORT_SYMBOL(mutex_trylock); - -#ifndef CONFIG_DEBUG_LOCK_ALLOC -int __sched -__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -{ - int ret; - - might_sleep(); - - ret = __mutex_fastpath_lock_retval(&lock->base.count); - - if (likely(!ret)) { - ww_mutex_set_context_fastpath(lock, ctx); - mutex_set_owner(&lock->base); - } else - ret = __ww_mutex_lock_slowpath(lock, ctx); - return ret; -} -EXPORT_SYMBOL(__ww_mutex_lock); - -int __sched -__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -{ - int ret; - - might_sleep(); - - ret = __mutex_fastpath_lock_retval(&lock->base.count); - - if (likely(!ret)) { - ww_mutex_set_context_fastpath(lock, ctx); - mutex_set_owner(&lock->base); - } else - ret = __ww_mutex_lock_interruptible_slowpath(lock, ctx); - return ret; -} -EXPORT_SYMBOL(__ww_mutex_lock_interruptible); - -#endif - -/** - * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 - * @cnt: the atomic which we are to dec - * @lock: the mutex to return holding if we dec to 0 - * - * return true and hold lock if we dec to 0, return false otherwise - */ -int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) -{ - /* dec if we can't possibly hit 0 */ - if (atomic_add_unless(cnt, -1, 1)) - return 0; - /* we might hit 0, so take the lock */ - mutex_lock(lock); - if (!atomic_dec_and_test(cnt)) { - /* when we actually did the dec, we didn't hit 0 */ - mutex_unlock(lock); - return 0; - } - /* we hit 0, and we hold the lock */ - return 1; -} -EXPORT_SYMBOL(atomic_dec_and_mutex_lock); diff --git a/kernel/mutex.h b/kernel/mutex.h deleted file mode 100644 index 4115fbf..0000000 --- a/kernel/mutex.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Mutexes: blocking mutual exclusion locks - * - * started by Ingo Molnar: - * - * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar - * - * This file contains mutex debugging related internal prototypes, for the - * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: - */ - -#define spin_lock_mutex(lock, flags) \ - do { spin_lock(lock); (void)(flags); } while (0) -#define spin_unlock_mutex(lock, flags) \ - do { spin_unlock(lock); (void)(flags); } while (0) -#define mutex_remove_waiter(lock, waiter, ti) \ - __list_del((waiter)->list.prev, (waiter)->list.next) - -#ifdef CONFIG_SMP -static inline void mutex_set_owner(struct mutex *lock) -{ - lock->owner = current; -} - -static inline void mutex_clear_owner(struct mutex *lock) -{ - lock->owner = NULL; -} -#else -static inline void mutex_set_owner(struct mutex *lock) -{ -} - -static inline void mutex_clear_owner(struct mutex *lock) -{ -} -#endif - -#define debug_mutex_wake_waiter(lock, waiter) do { } while (0) -#define debug_mutex_free_waiter(waiter) do { } while (0) -#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) -#define debug_mutex_unlock(lock) do { } while (0) -#define debug_mutex_init(lock, name, key) do { } while (0) - -static inline void -debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) -{ -} -- cgit v1.1 From 8eddac3f103736163f49255bcb109edadea167f6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 18:14:17 +0100 Subject: locking: Move the lockdep code to kernel/locking/ Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-wl7s3tta5isufzfguc23et06@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/Makefile | 6 - kernel/lockdep.c | 4257 ------------------------------------ kernel/lockdep_internals.h | 170 -- kernel/lockdep_proc.c | 683 ------ kernel/lockdep_states.h | 9 - kernel/locking/Makefile | 6 + kernel/locking/lockdep.c | 4257 ++++++++++++++++++++++++++++++++++++ kernel/locking/lockdep_internals.h | 170 ++ kernel/locking/lockdep_proc.c | 683 ++++++ kernel/locking/lockdep_states.h | 9 + 10 files changed, 5125 insertions(+), 5125 deletions(-) delete mode 100644 kernel/lockdep.c delete mode 100644 kernel/lockdep_internals.h delete mode 100644 kernel/lockdep_proc.c delete mode 100644 kernel/lockdep_states.h create mode 100644 kernel/locking/lockdep.c create mode 100644 kernel/locking/lockdep_internals.h create mode 100644 kernel/locking/lockdep_proc.c create mode 100644 kernel/locking/lockdep_states.h diff --git a/kernel/Makefile b/kernel/Makefile index 330b146..4fffd6ee 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -14,8 +14,6 @@ obj-y = fork.o exec_domain.o panic.o \ ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files -CFLAGS_REMOVE_lockdep.o = -pg -CFLAGS_REMOVE_lockdep_proc.o = -pg CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_irq_work.o = -pg endif @@ -33,10 +31,6 @@ obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ -obj-$(CONFIG_LOCKDEP) += lockdep.o -ifeq ($(CONFIG_PROC_FS),y) -obj-$(CONFIG_LOCKDEP) += lockdep_proc.o -endif obj-$(CONFIG_FUTEX) += futex.o ifeq ($(CONFIG_COMPAT),y) obj-$(CONFIG_FUTEX) += futex_compat.o diff --git a/kernel/lockdep.c b/kernel/lockdep.c deleted file mode 100644 index 4e8e14c..0000000 --- a/kernel/lockdep.c +++ /dev/null @@ -1,4257 +0,0 @@ -/* - * kernel/lockdep.c - * - * Runtime locking correctness validator - * - * Started by Ingo Molnar: - * - * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra - * - * this code maps all the lock dependencies as they occur in a live kernel - * and will warn about the following classes of locking bugs: - * - * - lock inversion scenarios - * - circular lock dependencies - * - hardirq/softirq safe/unsafe locking bugs - * - * Bugs are reported even if the current locking scenario does not cause - * any deadlock at this point. - * - * I.e. if anytime in the past two locks were taken in a different order, - * even if it happened for another task, even if those were different - * locks (but of the same class as this lock), this code will detect it. - * - * Thanks to Arjan van de Ven for coming up with the initial idea of - * mapping lock dependencies runtime. - */ -#define DISABLE_BRANCH_PROFILING -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "lockdep_internals.h" - -#define CREATE_TRACE_POINTS -#include - -#ifdef CONFIG_PROVE_LOCKING -int prove_locking = 1; -module_param(prove_locking, int, 0644); -#else -#define prove_locking 0 -#endif - -#ifdef CONFIG_LOCK_STAT -int lock_stat = 1; -module_param(lock_stat, int, 0644); -#else -#define lock_stat 0 -#endif - -/* - * lockdep_lock: protects the lockdep graph, the hashes and the - * class/list/hash allocators. - * - * This is one of the rare exceptions where it's justified - * to use a raw spinlock - we really dont want the spinlock - * code to recurse back into the lockdep code... - */ -static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; - -static int graph_lock(void) -{ - arch_spin_lock(&lockdep_lock); - /* - * Make sure that if another CPU detected a bug while - * walking the graph we dont change it (while the other - * CPU is busy printing out stuff with the graph lock - * dropped already) - */ - if (!debug_locks) { - arch_spin_unlock(&lockdep_lock); - return 0; - } - /* prevent any recursions within lockdep from causing deadlocks */ - current->lockdep_recursion++; - return 1; -} - -static inline int graph_unlock(void) -{ - if (debug_locks && !arch_spin_is_locked(&lockdep_lock)) { - /* - * The lockdep graph lock isn't locked while we expect it to - * be, we're confused now, bye! - */ - return DEBUG_LOCKS_WARN_ON(1); - } - - current->lockdep_recursion--; - arch_spin_unlock(&lockdep_lock); - return 0; -} - -/* - * Turn lock debugging off and return with 0 if it was off already, - * and also release the graph lock: - */ -static inline int debug_locks_off_graph_unlock(void) -{ - int ret = debug_locks_off(); - - arch_spin_unlock(&lockdep_lock); - - return ret; -} - -static int lockdep_initialized; - -unsigned long nr_list_entries; -static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; - -/* - * All data structures here are protected by the global debug_lock. - * - * Mutex key structs only get allocated, once during bootup, and never - * get freed - this significantly simplifies the debugging code. - */ -unsigned long nr_lock_classes; -static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; - -static inline struct lock_class *hlock_class(struct held_lock *hlock) -{ - if (!hlock->class_idx) { - /* - * Someone passed in garbage, we give up. - */ - DEBUG_LOCKS_WARN_ON(1); - return NULL; - } - return lock_classes + hlock->class_idx - 1; -} - -#ifdef CONFIG_LOCK_STAT -static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], - cpu_lock_stats); - -static inline u64 lockstat_clock(void) -{ - return local_clock(); -} - -static int lock_point(unsigned long points[], unsigned long ip) -{ - int i; - - for (i = 0; i < LOCKSTAT_POINTS; i++) { - if (points[i] == 0) { - points[i] = ip; - break; - } - if (points[i] == ip) - break; - } - - return i; -} - -static void lock_time_inc(struct lock_time *lt, u64 time) -{ - if (time > lt->max) - lt->max = time; - - if (time < lt->min || !lt->nr) - lt->min = time; - - lt->total += time; - lt->nr++; -} - -static inline void lock_time_add(struct lock_time *src, struct lock_time *dst) -{ - if (!src->nr) - return; - - if (src->max > dst->max) - dst->max = src->max; - - if (src->min < dst->min || !dst->nr) - dst->min = src->min; - - dst->total += src->total; - dst->nr += src->nr; -} - -struct lock_class_stats lock_stats(struct lock_class *class) -{ - struct lock_class_stats stats; - int cpu, i; - - memset(&stats, 0, sizeof(struct lock_class_stats)); - for_each_possible_cpu(cpu) { - struct lock_class_stats *pcs = - &per_cpu(cpu_lock_stats, cpu)[class - lock_classes]; - - for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) - stats.contention_point[i] += pcs->contention_point[i]; - - for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++) - stats.contending_point[i] += pcs->contending_point[i]; - - lock_time_add(&pcs->read_waittime, &stats.read_waittime); - lock_time_add(&pcs->write_waittime, &stats.write_waittime); - - lock_time_add(&pcs->read_holdtime, &stats.read_holdtime); - lock_time_add(&pcs->write_holdtime, &stats.write_holdtime); - - for (i = 0; i < ARRAY_SIZE(stats.bounces); i++) - stats.bounces[i] += pcs->bounces[i]; - } - - return stats; -} - -void clear_lock_stats(struct lock_class *class) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct lock_class_stats *cpu_stats = - &per_cpu(cpu_lock_stats, cpu)[class - lock_classes]; - - memset(cpu_stats, 0, sizeof(struct lock_class_stats)); - } - memset(class->contention_point, 0, sizeof(class->contention_point)); - memset(class->contending_point, 0, sizeof(class->contending_point)); -} - -static struct lock_class_stats *get_lock_stats(struct lock_class *class) -{ - return &get_cpu_var(cpu_lock_stats)[class - lock_classes]; -} - -static void put_lock_stats(struct lock_class_stats *stats) -{ - put_cpu_var(cpu_lock_stats); -} - -static void lock_release_holdtime(struct held_lock *hlock) -{ - struct lock_class_stats *stats; - u64 holdtime; - - if (!lock_stat) - return; - - holdtime = lockstat_clock() - hlock->holdtime_stamp; - - stats = get_lock_stats(hlock_class(hlock)); - if (hlock->read) - lock_time_inc(&stats->read_holdtime, holdtime); - else - lock_time_inc(&stats->write_holdtime, holdtime); - put_lock_stats(stats); -} -#else -static inline void lock_release_holdtime(struct held_lock *hlock) -{ -} -#endif - -/* - * We keep a global list of all lock classes. The list only grows, - * never shrinks. The list is only accessed with the lockdep - * spinlock lock held. - */ -LIST_HEAD(all_lock_classes); - -/* - * The lockdep classes are in a hash-table as well, for fast lookup: - */ -#define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1) -#define CLASSHASH_SIZE (1UL << CLASSHASH_BITS) -#define __classhashfn(key) hash_long((unsigned long)key, CLASSHASH_BITS) -#define classhashentry(key) (classhash_table + __classhashfn((key))) - -static struct list_head classhash_table[CLASSHASH_SIZE]; - -/* - * We put the lock dependency chains into a hash-table as well, to cache - * their existence: - */ -#define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1) -#define CHAINHASH_SIZE (1UL << CHAINHASH_BITS) -#define __chainhashfn(chain) hash_long(chain, CHAINHASH_BITS) -#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain))) - -static struct list_head chainhash_table[CHAINHASH_SIZE]; - -/* - * The hash key of the lock dependency chains is a hash itself too: - * it's a hash of all locks taken up to that lock, including that lock. - * It's a 64-bit hash, because it's important for the keys to be - * unique. - */ -#define iterate_chain_key(key1, key2) \ - (((key1) << MAX_LOCKDEP_KEYS_BITS) ^ \ - ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS)) ^ \ - (key2)) - -void lockdep_off(void) -{ - current->lockdep_recursion++; -} -EXPORT_SYMBOL(lockdep_off); - -void lockdep_on(void) -{ - current->lockdep_recursion--; -} -EXPORT_SYMBOL(lockdep_on); - -/* - * Debugging switches: - */ - -#define VERBOSE 0 -#define VERY_VERBOSE 0 - -#if VERBOSE -# define HARDIRQ_VERBOSE 1 -# define SOFTIRQ_VERBOSE 1 -# define RECLAIM_VERBOSE 1 -#else -# define HARDIRQ_VERBOSE 0 -# define SOFTIRQ_VERBOSE 0 -# define RECLAIM_VERBOSE 0 -#endif - -#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE -/* - * Quick filtering for interesting events: - */ -static int class_filter(struct lock_class *class) -{ -#if 0 - /* Example */ - if (class->name_version == 1 && - !strcmp(class->name, "lockname")) - return 1; - if (class->name_version == 1 && - !strcmp(class->name, "&struct->lockfield")) - return 1; -#endif - /* Filter everything else. 1 would be to allow everything else */ - return 0; -} -#endif - -static int verbose(struct lock_class *class) -{ -#if VERBOSE - return class_filter(class); -#endif - return 0; -} - -/* - * Stack-trace: tightly packed array of stack backtrace - * addresses. Protected by the graph_lock. - */ -unsigned long nr_stack_trace_entries; -static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES]; - -static void print_lockdep_off(const char *bug_msg) -{ - printk(KERN_DEBUG "%s\n", bug_msg); - printk(KERN_DEBUG "turning off the locking correctness validator.\n"); - printk(KERN_DEBUG "Please attach the output of /proc/lock_stat to the bug report\n"); -} - -static int save_trace(struct stack_trace *trace) -{ - trace->nr_entries = 0; - trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; - trace->entries = stack_trace + nr_stack_trace_entries; - - trace->skip = 3; - - save_stack_trace(trace); - - /* - * Some daft arches put -1 at the end to indicate its a full trace. - * - * this is buggy anyway, since it takes a whole extra entry so a - * complete trace that maxes out the entries provided will be reported - * as incomplete, friggin useless - */ - if (trace->nr_entries != 0 && - trace->entries[trace->nr_entries-1] == ULONG_MAX) - trace->nr_entries--; - - trace->max_entries = trace->nr_entries; - - nr_stack_trace_entries += trace->nr_entries; - - if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) { - if (!debug_locks_off_graph_unlock()) - return 0; - - print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!"); - dump_stack(); - - return 0; - } - - return 1; -} - -unsigned int nr_hardirq_chains; -unsigned int nr_softirq_chains; -unsigned int nr_process_chains; -unsigned int max_lockdep_depth; - -#ifdef CONFIG_DEBUG_LOCKDEP -/* - * We cannot printk in early bootup code. Not even early_printk() - * might work. So we mark any initialization errors and printk - * about it later on, in lockdep_info(). - */ -static int lockdep_init_error; -static const char *lock_init_error; -static unsigned long lockdep_init_trace_data[20]; -static struct stack_trace lockdep_init_trace = { - .max_entries = ARRAY_SIZE(lockdep_init_trace_data), - .entries = lockdep_init_trace_data, -}; - -/* - * Various lockdep statistics: - */ -DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats); -#endif - -/* - * Locking printouts: - */ - -#define __USAGE(__STATE) \ - [LOCK_USED_IN_##__STATE] = "IN-"__stringify(__STATE)"-W", \ - [LOCK_ENABLED_##__STATE] = __stringify(__STATE)"-ON-W", \ - [LOCK_USED_IN_##__STATE##_READ] = "IN-"__stringify(__STATE)"-R",\ - [LOCK_ENABLED_##__STATE##_READ] = __stringify(__STATE)"-ON-R", - -static const char *usage_str[] = -{ -#define LOCKDEP_STATE(__STATE) __USAGE(__STATE) -#include "lockdep_states.h" -#undef LOCKDEP_STATE - [LOCK_USED] = "INITIAL USE", -}; - -const char * __get_key_name(struct lockdep_subclass_key *key, char *str) -{ - return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str); -} - -static inline unsigned long lock_flag(enum lock_usage_bit bit) -{ - return 1UL << bit; -} - -static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit) -{ - char c = '.'; - - if (class->usage_mask & lock_flag(bit + 2)) - c = '+'; - if (class->usage_mask & lock_flag(bit)) { - c = '-'; - if (class->usage_mask & lock_flag(bit + 2)) - c = '?'; - } - - return c; -} - -void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS]) -{ - int i = 0; - -#define LOCKDEP_STATE(__STATE) \ - usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE); \ - usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE##_READ); -#include "lockdep_states.h" -#undef LOCKDEP_STATE - - usage[i] = '\0'; -} - -static void __print_lock_name(struct lock_class *class) -{ - char str[KSYM_NAME_LEN]; - const char *name; - - name = class->name; - if (!name) { - name = __get_key_name(class->key, str); - printk("%s", name); - } else { - printk("%s", name); - if (class->name_version > 1) - printk("#%d", class->name_version); - if (class->subclass) - printk("/%d", class->subclass); - } -} - -static void print_lock_name(struct lock_class *class) -{ - char usage[LOCK_USAGE_CHARS]; - - get_usage_chars(class, usage); - - printk(" ("); - __print_lock_name(class); - printk("){%s}", usage); -} - -static void print_lockdep_cache(struct lockdep_map *lock) -{ - const char *name; - char str[KSYM_NAME_LEN]; - - name = lock->name; - if (!name) - name = __get_key_name(lock->key->subkeys, str); - - printk("%s", name); -} - -static void print_lock(struct held_lock *hlock) -{ - print_lock_name(hlock_class(hlock)); - printk(", at: "); - print_ip_sym(hlock->acquire_ip); -} - -static void lockdep_print_held_locks(struct task_struct *curr) -{ - int i, depth = curr->lockdep_depth; - - if (!depth) { - printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr)); - return; - } - printk("%d lock%s held by %s/%d:\n", - depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr)); - - for (i = 0; i < depth; i++) { - printk(" #%d: ", i); - print_lock(curr->held_locks + i); - } -} - -static void print_kernel_ident(void) -{ - printk("%s %.*s %s\n", init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version, - print_tainted()); -} - -static int very_verbose(struct lock_class *class) -{ -#if VERY_VERBOSE - return class_filter(class); -#endif - return 0; -} - -/* - * Is this the address of a static object: - */ -static int static_obj(void *obj) -{ - unsigned long start = (unsigned long) &_stext, - end = (unsigned long) &_end, - addr = (unsigned long) obj; - - /* - * static variable? - */ - if ((addr >= start) && (addr < end)) - return 1; - - if (arch_is_kernel_data(addr)) - return 1; - - /* - * in-kernel percpu var? - */ - if (is_kernel_percpu_address(addr)) - return 1; - - /* - * module static or percpu var? - */ - return is_module_address(addr) || is_module_percpu_address(addr); -} - -/* - * To make lock name printouts unique, we calculate a unique - * class->name_version generation counter: - */ -static int count_matching_names(struct lock_class *new_class) -{ - struct lock_class *class; - int count = 0; - - if (!new_class->name) - return 0; - - list_for_each_entry(class, &all_lock_classes, lock_entry) { - if (new_class->key - new_class->subclass == class->key) - return class->name_version; - if (class->name && !strcmp(class->name, new_class->name)) - count = max(count, class->name_version); - } - - return count + 1; -} - -/* - * Register a lock's class in the hash-table, if the class is not present - * yet. Otherwise we look it up. We cache the result in the lock object - * itself, so actual lookup of the hash should be once per lock object. - */ -static inline struct lock_class * -look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) -{ - struct lockdep_subclass_key *key; - struct list_head *hash_head; - struct lock_class *class; - -#ifdef CONFIG_DEBUG_LOCKDEP - /* - * If the architecture calls into lockdep before initializing - * the hashes then we'll warn about it later. (we cannot printk - * right now) - */ - if (unlikely(!lockdep_initialized)) { - lockdep_init(); - lockdep_init_error = 1; - lock_init_error = lock->name; - save_stack_trace(&lockdep_init_trace); - } -#endif - - if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { - debug_locks_off(); - printk(KERN_ERR - "BUG: looking up invalid subclass: %u\n", subclass); - printk(KERN_ERR - "turning off the locking correctness validator.\n"); - dump_stack(); - return NULL; - } - - /* - * Static locks do not have their class-keys yet - for them the key - * is the lock object itself: - */ - if (unlikely(!lock->key)) - lock->key = (void *)lock; - - /* - * NOTE: the class-key must be unique. For dynamic locks, a static - * lock_class_key variable is passed in through the mutex_init() - * (or spin_lock_init()) call - which acts as the key. For static - * locks we use the lock object itself as the key. - */ - BUILD_BUG_ON(sizeof(struct lock_class_key) > - sizeof(struct lockdep_map)); - - key = lock->key->subkeys + subclass; - - hash_head = classhashentry(key); - - /* - * We can walk the hash lockfree, because the hash only - * grows, and we are careful when adding entries to the end: - */ - list_for_each_entry(class, hash_head, hash_entry) { - if (class->key == key) { - /* - * Huh! same key, different name? Did someone trample - * on some memory? We're most confused. - */ - WARN_ON_ONCE(class->name != lock->name); - return class; - } - } - - return NULL; -} - -/* - * Register a lock's class in the hash-table, if the class is not present - * yet. Otherwise we look it up. We cache the result in the lock object - * itself, so actual lookup of the hash should be once per lock object. - */ -static inline struct lock_class * -register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) -{ - struct lockdep_subclass_key *key; - struct list_head *hash_head; - struct lock_class *class; - unsigned long flags; - - class = look_up_lock_class(lock, subclass); - if (likely(class)) - goto out_set_class_cache; - - /* - * Debug-check: all keys must be persistent! - */ - if (!static_obj(lock->key)) { - debug_locks_off(); - printk("INFO: trying to register non-static key.\n"); - printk("the code is fine but needs lockdep annotation.\n"); - printk("turning off the locking correctness validator.\n"); - dump_stack(); - - return NULL; - } - - key = lock->key->subkeys + subclass; - hash_head = classhashentry(key); - - raw_local_irq_save(flags); - if (!graph_lock()) { - raw_local_irq_restore(flags); - return NULL; - } - /* - * We have to do the hash-walk again, to avoid races - * with another CPU: - */ - list_for_each_entry(class, hash_head, hash_entry) - if (class->key == key) - goto out_unlock_set; - /* - * Allocate a new key from the static array, and add it to - * the hash: - */ - if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { - if (!debug_locks_off_graph_unlock()) { - raw_local_irq_restore(flags); - return NULL; - } - raw_local_irq_restore(flags); - - print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!"); - dump_stack(); - return NULL; - } - class = lock_classes + nr_lock_classes++; - debug_atomic_inc(nr_unused_locks); - class->key = key; - class->name = lock->name; - class->subclass = subclass; - INIT_LIST_HEAD(&class->lock_entry); - INIT_LIST_HEAD(&class->locks_before); - INIT_LIST_HEAD(&class->locks_after); - class->name_version = count_matching_names(class); - /* - * We use RCU's safe list-add method to make - * parallel walking of the hash-list safe: - */ - list_add_tail_rcu(&class->hash_entry, hash_head); - /* - * Add it to the global list of classes: - */ - list_add_tail_rcu(&class->lock_entry, &all_lock_classes); - - if (verbose(class)) { - graph_unlock(); - raw_local_irq_restore(flags); - - printk("\nnew class %p: %s", class->key, class->name); - if (class->name_version > 1) - printk("#%d", class->name_version); - printk("\n"); - dump_stack(); - - raw_local_irq_save(flags); - if (!graph_lock()) { - raw_local_irq_restore(flags); - return NULL; - } - } -out_unlock_set: - graph_unlock(); - raw_local_irq_restore(flags); - -out_set_class_cache: - if (!subclass || force) - lock->class_cache[0] = class; - else if (subclass < NR_LOCKDEP_CACHING_CLASSES) - lock->class_cache[subclass] = class; - - /* - * Hash collision, did we smoke some? We found a class with a matching - * hash but the subclass -- which is hashed in -- didn't match. - */ - if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) - return NULL; - - return class; -} - -#ifdef CONFIG_PROVE_LOCKING -/* - * Allocate a lockdep entry. (assumes the graph_lock held, returns - * with NULL on failure) - */ -static struct lock_list *alloc_list_entry(void) -{ - if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { - if (!debug_locks_off_graph_unlock()) - return NULL; - - print_lockdep_off("BUG: MAX_LOCKDEP_ENTRIES too low!"); - dump_stack(); - return NULL; - } - return list_entries + nr_list_entries++; -} - -/* - * Add a new dependency to the head of the list: - */ -static int add_lock_to_list(struct lock_class *class, struct lock_class *this, - struct list_head *head, unsigned long ip, - int distance, struct stack_trace *trace) -{ - struct lock_list *entry; - /* - * Lock not present yet - get a new dependency struct and - * add it to the list: - */ - entry = alloc_list_entry(); - if (!entry) - return 0; - - entry->class = this; - entry->distance = distance; - entry->trace = *trace; - /* - * Since we never remove from the dependency list, the list can - * be walked lockless by other CPUs, it's only allocation - * that must be protected by the spinlock. But this also means - * we must make new entries visible only once writes to the - * entry become visible - hence the RCU op: - */ - list_add_tail_rcu(&entry->entry, head); - - return 1; -} - -/* - * For good efficiency of modular, we use power of 2 - */ -#define MAX_CIRCULAR_QUEUE_SIZE 4096UL -#define CQ_MASK (MAX_CIRCULAR_QUEUE_SIZE-1) - -/* - * The circular_queue and helpers is used to implement the - * breadth-first search(BFS)algorithem, by which we can build - * the shortest path from the next lock to be acquired to the - * previous held lock if there is a circular between them. - */ -struct circular_queue { - unsigned long element[MAX_CIRCULAR_QUEUE_SIZE]; - unsigned int front, rear; -}; - -static struct circular_queue lock_cq; - -unsigned int max_bfs_queue_depth; - -static unsigned int lockdep_dependency_gen_id; - -static inline void __cq_init(struct circular_queue *cq) -{ - cq->front = cq->rear = 0; - lockdep_dependency_gen_id++; -} - -static inline int __cq_empty(struct circular_queue *cq) -{ - return (cq->front == cq->rear); -} - -static inline int __cq_full(struct circular_queue *cq) -{ - return ((cq->rear + 1) & CQ_MASK) == cq->front; -} - -static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem) -{ - if (__cq_full(cq)) - return -1; - - cq->element[cq->rear] = elem; - cq->rear = (cq->rear + 1) & CQ_MASK; - return 0; -} - -static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem) -{ - if (__cq_empty(cq)) - return -1; - - *elem = cq->element[cq->front]; - cq->front = (cq->front + 1) & CQ_MASK; - return 0; -} - -static inline unsigned int __cq_get_elem_count(struct circular_queue *cq) -{ - return (cq->rear - cq->front) & CQ_MASK; -} - -static inline void mark_lock_accessed(struct lock_list *lock, - struct lock_list *parent) -{ - unsigned long nr; - - nr = lock - list_entries; - WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */ - lock->parent = parent; - lock->class->dep_gen_id = lockdep_dependency_gen_id; -} - -static inline unsigned long lock_accessed(struct lock_list *lock) -{ - unsigned long nr; - - nr = lock - list_entries; - WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */ - return lock->class->dep_gen_id == lockdep_dependency_gen_id; -} - -static inline struct lock_list *get_lock_parent(struct lock_list *child) -{ - return child->parent; -} - -static inline int get_lock_depth(struct lock_list *child) -{ - int depth = 0; - struct lock_list *parent; - - while ((parent = get_lock_parent(child))) { - child = parent; - depth++; - } - return depth; -} - -static int __bfs(struct lock_list *source_entry, - void *data, - int (*match)(struct lock_list *entry, void *data), - struct lock_list **target_entry, - int forward) -{ - struct lock_list *entry; - struct list_head *head; - struct circular_queue *cq = &lock_cq; - int ret = 1; - - if (match(source_entry, data)) { - *target_entry = source_entry; - ret = 0; - goto exit; - } - - if (forward) - head = &source_entry->class->locks_after; - else - head = &source_entry->class->locks_before; - - if (list_empty(head)) - goto exit; - - __cq_init(cq); - __cq_enqueue(cq, (unsigned long)source_entry); - - while (!__cq_empty(cq)) { - struct lock_list *lock; - - __cq_dequeue(cq, (unsigned long *)&lock); - - if (!lock->class) { - ret = -2; - goto exit; - } - - if (forward) - head = &lock->class->locks_after; - else - head = &lock->class->locks_before; - - list_for_each_entry(entry, head, entry) { - if (!lock_accessed(entry)) { - unsigned int cq_depth; - mark_lock_accessed(entry, lock); - if (match(entry, data)) { - *target_entry = entry; - ret = 0; - goto exit; - } - - if (__cq_enqueue(cq, (unsigned long)entry)) { - ret = -1; - goto exit; - } - cq_depth = __cq_get_elem_count(cq); - if (max_bfs_queue_depth < cq_depth) - max_bfs_queue_depth = cq_depth; - } - } - } -exit: - return ret; -} - -static inline int __bfs_forwards(struct lock_list *src_entry, - void *data, - int (*match)(struct lock_list *entry, void *data), - struct lock_list **target_entry) -{ - return __bfs(src_entry, data, match, target_entry, 1); - -} - -static inline int __bfs_backwards(struct lock_list *src_entry, - void *data, - int (*match)(struct lock_list *entry, void *data), - struct lock_list **target_entry) -{ - return __bfs(src_entry, data, match, target_entry, 0); - -} - -/* - * Recursive, forwards-direction lock-dependency checking, used for - * both noncyclic checking and for hardirq-unsafe/softirq-unsafe - * checking. - */ - -/* - * Print a dependency chain entry (this is only done when a deadlock - * has been detected): - */ -static noinline int -print_circular_bug_entry(struct lock_list *target, int depth) -{ - if (debug_locks_silent) - return 0; - printk("\n-> #%u", depth); - print_lock_name(target->class); - printk(":\n"); - print_stack_trace(&target->trace, 6); - - return 0; -} - -static void -print_circular_lock_scenario(struct held_lock *src, - struct held_lock *tgt, - struct lock_list *prt) -{ - struct lock_class *source = hlock_class(src); - struct lock_class *target = hlock_class(tgt); - struct lock_class *parent = prt->class; - - /* - * A direct locking problem where unsafe_class lock is taken - * directly by safe_class lock, then all we need to show - * is the deadlock scenario, as it is obvious that the - * unsafe lock is taken under the safe lock. - * - * But if there is a chain instead, where the safe lock takes - * an intermediate lock (middle_class) where this lock is - * not the same as the safe lock, then the lock chain is - * used to describe the problem. Otherwise we would need - * to show a different CPU case for each link in the chain - * from the safe_class lock to the unsafe_class lock. - */ - if (parent != source) { - printk("Chain exists of:\n "); - __print_lock_name(source); - printk(" --> "); - __print_lock_name(parent); - printk(" --> "); - __print_lock_name(target); - printk("\n\n"); - } - - printk(" Possible unsafe locking scenario:\n\n"); - printk(" CPU0 CPU1\n"); - printk(" ---- ----\n"); - printk(" lock("); - __print_lock_name(target); - printk(");\n"); - printk(" lock("); - __print_lock_name(parent); - printk(");\n"); - printk(" lock("); - __print_lock_name(target); - printk(");\n"); - printk(" lock("); - __print_lock_name(source); - printk(");\n"); - printk("\n *** DEADLOCK ***\n\n"); -} - -/* - * When a circular dependency is detected, print the - * header first: - */ -static noinline int -print_circular_bug_header(struct lock_list *entry, unsigned int depth, - struct held_lock *check_src, - struct held_lock *check_tgt) -{ - struct task_struct *curr = current; - - if (debug_locks_silent) - return 0; - - printk("\n"); - printk("======================================================\n"); - printk("[ INFO: possible circular locking dependency detected ]\n"); - print_kernel_ident(); - printk("-------------------------------------------------------\n"); - printk("%s/%d is trying to acquire lock:\n", - curr->comm, task_pid_nr(curr)); - print_lock(check_src); - printk("\nbut task is already holding lock:\n"); - print_lock(check_tgt); - printk("\nwhich lock already depends on the new lock.\n\n"); - printk("\nthe existing dependency chain (in reverse order) is:\n"); - - print_circular_bug_entry(entry, depth); - - return 0; -} - -static inline int class_equal(struct lock_list *entry, void *data) -{ - return entry->class == data; -} - -static noinline int print_circular_bug(struct lock_list *this, - struct lock_list *target, - struct held_lock *check_src, - struct held_lock *check_tgt) -{ - struct task_struct *curr = current; - struct lock_list *parent; - struct lock_list *first_parent; - int depth; - - if (!debug_locks_off_graph_unlock() || debug_locks_silent) - return 0; - - if (!save_trace(&this->trace)) - return 0; - - depth = get_lock_depth(target); - - print_circular_bug_header(target, depth, check_src, check_tgt); - - parent = get_lock_parent(target); - first_parent = parent; - - while (parent) { - print_circular_bug_entry(parent, --depth); - parent = get_lock_parent(parent); - } - - printk("\nother info that might help us debug this:\n\n"); - print_circular_lock_scenario(check_src, check_tgt, - first_parent); - - lockdep_print_held_locks(curr); - - printk("\nstack backtrace:\n"); - dump_stack(); - - return 0; -} - -static noinline int print_bfs_bug(int ret) -{ - if (!debug_locks_off_graph_unlock()) - return 0; - - /* - * Breadth-first-search failed, graph got corrupted? - */ - WARN(1, "lockdep bfs error:%d\n", ret); - - return 0; -} - -static int noop_count(struct lock_list *entry, void *data) -{ - (*(unsigned long *)data)++; - return 0; -} - -unsigned long __lockdep_count_forward_deps(struct lock_list *this) -{ - unsigned long count = 0; - struct lock_list *uninitialized_var(target_entry); - - __bfs_forwards(this, (void *)&count, noop_count, &target_entry); - - return count; -} -unsigned long lockdep_count_forward_deps(struct lock_class *class) -{ - unsigned long ret, flags; - struct lock_list this; - - this.parent = NULL; - this.class = class; - - local_irq_save(flags); - arch_spin_lock(&lockdep_lock); - ret = __lockdep_count_forward_deps(&this); - arch_spin_unlock(&lockdep_lock); - local_irq_restore(flags); - - return ret; -} - -unsigned long __lockdep_count_backward_deps(struct lock_list *this) -{ - unsigned long count = 0; - struct lock_list *uninitialized_var(target_entry); - - __bfs_backwards(this, (void *)&count, noop_count, &target_entry); - - return count; -} - -unsigned long lockdep_count_backward_deps(struct lock_class *class) -{ - unsigned long ret, flags; - struct lock_list this; - - this.parent = NULL; - this.class = class; - - local_irq_save(flags); - arch_spin_lock(&lockdep_lock); - ret = __lockdep_count_backward_deps(&this); - arch_spin_unlock(&lockdep_lock); - local_irq_restore(flags); - - return ret; -} - -/* - * Prove that the dependency graph starting at can not - * lead to . Print an error and return 0 if it does. - */ -static noinline int -check_noncircular(struct lock_list *root, struct lock_class *target, - struct lock_list **target_entry) -{ - int result; - - debug_atomic_inc(nr_cyclic_checks); - - result = __bfs_forwards(root, target, class_equal, target_entry); - - return result; -} - -#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) -/* - * Forwards and backwards subgraph searching, for the purposes of - * proving that two subgraphs can be connected by a new dependency - * without creating any illegal irq-safe -> irq-unsafe lock dependency. - */ - -static inline int usage_match(struct lock_list *entry, void *bit) -{ - return entry->class->usage_mask & (1 << (enum lock_usage_bit)bit); -} - - - -/* - * Find a node in the forwards-direction dependency sub-graph starting - * at @root->class that matches @bit. - * - * Return 0 if such a node exists in the subgraph, and put that node - * into *@target_entry. - * - * Return 1 otherwise and keep *@target_entry unchanged. - * Return <0 on error. - */ -static int -find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit, - struct lock_list **target_entry) -{ - int result; - - debug_atomic_inc(nr_find_usage_forwards_checks); - - result = __bfs_forwards(root, (void *)bit, usage_match, target_entry); - - return result; -} - -/* - * Find a node in the backwards-direction dependency sub-graph starting - * at @root->class that matches @bit. - * - * Return 0 if such a node exists in the subgraph, and put that node - * into *@target_entry. - * - * Return 1 otherwise and keep *@target_entry unchanged. - * Return <0 on error. - */ -static int -find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit, - struct lock_list **target_entry) -{ - int result; - - debug_atomic_inc(nr_find_usage_backwards_checks); - - result = __bfs_backwards(root, (void *)bit, usage_match, target_entry); - - return result; -} - -static void print_lock_class_header(struct lock_class *class, int depth) -{ - int bit; - - printk("%*s->", depth, ""); - print_lock_name(class); - printk(" ops: %lu", class->ops); - printk(" {\n"); - - for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { - if (class->usage_mask & (1 << bit)) { - int len = depth; - - len += printk("%*s %s", depth, "", usage_str[bit]); - len += printk(" at:\n"); - print_stack_trace(class->usage_traces + bit, len); - } - } - printk("%*s }\n", depth, ""); - - printk("%*s ... key at: ",depth,""); - print_ip_sym((unsigned long)class->key); -} - -/* - * printk the shortest lock dependencies from @start to @end in reverse order: - */ -static void __used -print_shortest_lock_dependencies(struct lock_list *leaf, - struct lock_list *root) -{ - struct lock_list *entry = leaf; - int depth; - - /*compute depth from generated tree by BFS*/ - depth = get_lock_depth(leaf); - - do { - print_lock_class_header(entry->class, depth); - printk("%*s ... acquired at:\n", depth, ""); - print_stack_trace(&entry->trace, 2); - printk("\n"); - - if (depth == 0 && (entry != root)) { - printk("lockdep:%s bad path found in chain graph\n", __func__); - break; - } - - entry = get_lock_parent(entry); - depth--; - } while (entry && (depth >= 0)); - - return; -} - -static void -print_irq_lock_scenario(struct lock_list *safe_entry, - struct lock_list *unsafe_entry, - struct lock_class *prev_class, - struct lock_class *next_class) -{ - struct lock_class *safe_class = safe_entry->class; - struct lock_class *unsafe_class = unsafe_entry->class; - struct lock_class *middle_class = prev_class; - - if (middle_class == safe_class) - middle_class = next_class; - - /* - * A direct locking problem where unsafe_class lock is taken - * directly by safe_class lock, then all we need to show - * is the deadlock scenario, as it is obvious that the - * unsafe lock is taken under the safe lock. - * - * But if there is a chain instead, where the safe lock takes - * an intermediate lock (middle_class) where this lock is - * not the same as the safe lock, then the lock chain is - * used to describe the problem. Otherwise we would need - * to show a different CPU case for each link in the chain - * from the safe_class lock to the unsafe_class lock. - */ - if (middle_class != unsafe_class) { - printk("Chain exists of:\n "); - __print_lock_name(safe_class); - printk(" --> "); - __print_lock_name(middle_class); - printk(" --> "); - __print_lock_name(unsafe_class); - printk("\n\n"); - } - - printk(" Possible interrupt unsafe locking scenario:\n\n"); - printk(" CPU0 CPU1\n"); - printk(" ---- ----\n"); - printk(" lock("); - __print_lock_name(unsafe_class); - printk(");\n"); - printk(" local_irq_disable();\n"); - printk(" lock("); - __print_lock_name(safe_class); - printk(");\n"); - printk(" lock("); - __print_lock_name(middle_class); - printk(");\n"); - printk(" \n"); - printk(" lock("); - __print_lock_name(safe_class); - printk(");\n"); - printk("\n *** DEADLOCK ***\n\n"); -} - -static int -print_bad_irq_dependency(struct task_struct *curr, - struct lock_list *prev_root, - struct lock_list *next_root, - struct lock_list *backwards_entry, - struct lock_list *forwards_entry, - struct held_lock *prev, - struct held_lock *next, - enum lock_usage_bit bit1, - enum lock_usage_bit bit2, - const char *irqclass) -{ - if (!debug_locks_off_graph_unlock() || debug_locks_silent) - return 0; - - printk("\n"); - printk("======================================================\n"); - printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", - irqclass, irqclass); - print_kernel_ident(); - printk("------------------------------------------------------\n"); - printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", - curr->comm, task_pid_nr(curr), - curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, - curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT, - curr->hardirqs_enabled, - curr->softirqs_enabled); - print_lock(next); - - printk("\nand this task is already holding:\n"); - print_lock(prev); - printk("which would create a new lock dependency:\n"); - print_lock_name(hlock_class(prev)); - printk(" ->"); - print_lock_name(hlock_class(next)); - printk("\n"); - - printk("\nbut this new dependency connects a %s-irq-safe lock:\n", - irqclass); - print_lock_name(backwards_entry->class); - printk("\n... which became %s-irq-safe at:\n", irqclass); - - print_stack_trace(backwards_entry->class->usage_traces + bit1, 1); - - printk("\nto a %s-irq-unsafe lock:\n", irqclass); - print_lock_name(forwards_entry->class); - printk("\n... which became %s-irq-unsafe at:\n", irqclass); - printk("..."); - - print_stack_trace(forwards_entry->class->usage_traces + bit2, 1); - - printk("\nother info that might help us debug this:\n\n"); - print_irq_lock_scenario(backwards_entry, forwards_entry, - hlock_class(prev), hlock_class(next)); - - lockdep_print_held_locks(curr); - - printk("\nthe dependencies between %s-irq-safe lock", irqclass); - printk(" and the holding lock:\n"); - if (!save_trace(&prev_root->trace)) - return 0; - print_shortest_lock_dependencies(backwards_entry, prev_root); - - printk("\nthe dependencies between the lock to be acquired"); - printk(" and %s-irq-unsafe lock:\n", irqclass); - if (!save_trace(&next_root->trace)) - return 0; - print_shortest_lock_dependencies(forwards_entry, next_root); - - printk("\nstack backtrace:\n"); - dump_stack(); - - return 0; -} - -static int -check_usage(struct task_struct *curr, struct held_lock *prev, - struct held_lock *next, enum lock_usage_bit bit_backwards, - enum lock_usage_bit bit_forwards, const char *irqclass) -{ - int ret; - struct lock_list this, that; - struct lock_list *uninitialized_var(target_entry); - struct lock_list *uninitialized_var(target_entry1); - - this.parent = NULL; - - this.class = hlock_class(prev); - ret = find_usage_backwards(&this, bit_backwards, &target_entry); - if (ret < 0) - return print_bfs_bug(ret); - if (ret == 1) - return ret; - - that.parent = NULL; - that.class = hlock_class(next); - ret = find_usage_forwards(&that, bit_forwards, &target_entry1); - if (ret < 0) - return print_bfs_bug(ret); - if (ret == 1) - return ret; - - return print_bad_irq_dependency(curr, &this, &that, - target_entry, target_entry1, - prev, next, - bit_backwards, bit_forwards, irqclass); -} - -static const char *state_names[] = { -#define LOCKDEP_STATE(__STATE) \ - __stringify(__STATE), -#include "lockdep_states.h" -#undef LOCKDEP_STATE -}; - -static const char *state_rnames[] = { -#define LOCKDEP_STATE(__STATE) \ - __stringify(__STATE)"-READ", -#include "lockdep_states.h" -#undef LOCKDEP_STATE -}; - -static inline const char *state_name(enum lock_usage_bit bit) -{ - return (bit & 1) ? state_rnames[bit >> 2] : state_names[bit >> 2]; -} - -static int exclusive_bit(int new_bit) -{ - /* - * USED_IN - * USED_IN_READ - * ENABLED - * ENABLED_READ - * - * bit 0 - write/read - * bit 1 - used_in/enabled - * bit 2+ state - */ - - int state = new_bit & ~3; - int dir = new_bit & 2; - - /* - * keep state, bit flip the direction and strip read. - */ - return state | (dir ^ 2); -} - -static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, - struct held_lock *next, enum lock_usage_bit bit) -{ - /* - * Prove that the new dependency does not connect a hardirq-safe - * lock with a hardirq-unsafe lock - to achieve this we search - * the backwards-subgraph starting at , and the - * forwards-subgraph starting at : - */ - if (!check_usage(curr, prev, next, bit, - exclusive_bit(bit), state_name(bit))) - return 0; - - bit++; /* _READ */ - - /* - * Prove that the new dependency does not connect a hardirq-safe-read - * lock with a hardirq-unsafe lock - to achieve this we search - * the backwards-subgraph starting at , and the - * forwards-subgraph starting at : - */ - if (!check_usage(curr, prev, next, bit, - exclusive_bit(bit), state_name(bit))) - return 0; - - return 1; -} - -static int -check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, - struct held_lock *next) -{ -#define LOCKDEP_STATE(__STATE) \ - if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE)) \ - return 0; -#include "lockdep_states.h" -#undef LOCKDEP_STATE - - return 1; -} - -static void inc_chains(void) -{ - if (current->hardirq_context) - nr_hardirq_chains++; - else { - if (current->softirq_context) - nr_softirq_chains++; - else - nr_process_chains++; - } -} - -#else - -static inline int -check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, - struct held_lock *next) -{ - return 1; -} - -static inline void inc_chains(void) -{ - nr_process_chains++; -} - -#endif - -static void -print_deadlock_scenario(struct held_lock *nxt, - struct held_lock *prv) -{ - struct lock_class *next = hlock_class(nxt); - struct lock_class *prev = hlock_class(prv); - - printk(" Possible unsafe locking scenario:\n\n"); - printk(" CPU0\n"); - printk(" ----\n"); - printk(" lock("); - __print_lock_name(prev); - printk(");\n"); - printk(" lock("); - __print_lock_name(next); - printk(");\n"); - printk("\n *** DEADLOCK ***\n\n"); - printk(" May be due to missing lock nesting notation\n\n"); -} - -static int -print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, - struct held_lock *next) -{ - if (!debug_locks_off_graph_unlock() || debug_locks_silent) - return 0; - - printk("\n"); - printk("=============================================\n"); - printk("[ INFO: possible recursive locking detected ]\n"); - print_kernel_ident(); - printk("---------------------------------------------\n"); - printk("%s/%d is trying to acquire lock:\n", - curr->comm, task_pid_nr(curr)); - print_lock(next); - printk("\nbut task is already holding lock:\n"); - print_lock(prev); - - printk("\nother info that might help us debug this:\n"); - print_deadlock_scenario(next, prev); - lockdep_print_held_locks(curr); - - printk("\nstack backtrace:\n"); - dump_stack(); - - return 0; -} - -/* - * Check whether we are holding such a class already. - * - * (Note that this has to be done separately, because the graph cannot - * detect such classes of deadlocks.) - * - * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read - */ -static int -check_deadlock(struct task_struct *curr, struct held_lock *next, - struct lockdep_map *next_instance, int read) -{ - struct held_lock *prev; - struct held_lock *nest = NULL; - int i; - - for (i = 0; i < curr->lockdep_depth; i++) { - prev = curr->held_locks + i; - - if (prev->instance == next->nest_lock) - nest = prev; - - if (hlock_class(prev) != hlock_class(next)) - continue; - - /* - * Allow read-after-read recursion of the same - * lock class (i.e. read_lock(lock)+read_lock(lock)): - */ - if ((read == 2) && prev->read) - return 2; - - /* - * We're holding the nest_lock, which serializes this lock's - * nesting behaviour. - */ - if (nest) - return 2; - - return print_deadlock_bug(curr, prev, next); - } - return 1; -} - -/* - * There was a chain-cache miss, and we are about to add a new dependency - * to a previous lock. We recursively validate the following rules: - * - * - would the adding of the -> dependency create a - * circular dependency in the graph? [== circular deadlock] - * - * - does the new prev->next dependency connect any hardirq-safe lock - * (in the full backwards-subgraph starting at ) with any - * hardirq-unsafe lock (in the full forwards-subgraph starting at - * )? [== illegal lock inversion with hardirq contexts] - * - * - does the new prev->next dependency connect any softirq-safe lock - * (in the full backwards-subgraph starting at ) with any - * softirq-unsafe lock (in the full forwards-subgraph starting at - * )? [== illegal lock inversion with softirq contexts] - * - * any of these scenarios could lead to a deadlock. - * - * Then if all the validations pass, we add the forwards and backwards - * dependency. - */ -static int -check_prev_add(struct task_struct *curr, struct held_lock *prev, - struct held_lock *next, int distance, int trylock_loop) -{ - struct lock_list *entry; - int ret; - struct lock_list this; - struct lock_list *uninitialized_var(target_entry); - /* - * Static variable, serialized by the graph_lock(). - * - * We use this static variable to save the stack trace in case - * we call into this function multiple times due to encountering - * trylocks in the held lock stack. - */ - static struct stack_trace trace; - - /* - * Prove that the new -> dependency would not - * create a circular dependency in the graph. (We do this by - * forward-recursing into the graph starting at , and - * checking whether we can reach .) - * - * We are using global variables to control the recursion, to - * keep the stackframe size of the recursive functions low: - */ - this.class = hlock_class(next); - this.parent = NULL; - ret = check_noncircular(&this, hlock_class(prev), &target_entry); - if (unlikely(!ret)) - return print_circular_bug(&this, target_entry, next, prev); - else if (unlikely(ret < 0)) - return print_bfs_bug(ret); - - if (!check_prev_add_irq(curr, prev, next)) - return 0; - - /* - * For recursive read-locks we do all the dependency checks, - * but we dont store read-triggered dependencies (only - * write-triggered dependencies). This ensures that only the - * write-side dependencies matter, and that if for example a - * write-lock never takes any other locks, then the reads are - * equivalent to a NOP. - */ - if (next->read == 2 || prev->read == 2) - return 1; - /* - * Is the -> dependency already present? - * - * (this may occur even though this is a new chain: consider - * e.g. the L1 -> L2 -> L3 -> L4 and the L5 -> L1 -> L2 -> L3 - * chains - the second one will be new, but L1 already has - * L2 added to its dependency list, due to the first chain.) - */ - list_for_each_entry(entry, &hlock_class(prev)->locks_after, entry) { - if (entry->class == hlock_class(next)) { - if (distance == 1) - entry->distance = 1; - return 2; - } - } - - if (!trylock_loop && !save_trace(&trace)) - return 0; - - /* - * Ok, all validations passed, add the new lock - * to the previous lock's dependency list: - */ - ret = add_lock_to_list(hlock_class(prev), hlock_class(next), - &hlock_class(prev)->locks_after, - next->acquire_ip, distance, &trace); - - if (!ret) - return 0; - - ret = add_lock_to_list(hlock_class(next), hlock_class(prev), - &hlock_class(next)->locks_before, - next->acquire_ip, distance, &trace); - if (!ret) - return 0; - - /* - * Debugging printouts: - */ - if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) { - graph_unlock(); - printk("\n new dependency: "); - print_lock_name(hlock_class(prev)); - printk(" => "); - print_lock_name(hlock_class(next)); - printk("\n"); - dump_stack(); - return graph_lock(); - } - return 1; -} - -/* - * Add the dependency to all directly-previous locks that are 'relevant'. - * The ones that are relevant are (in increasing distance from curr): - * all consecutive trylock entries and the final non-trylock entry - or - * the end of this context's lock-chain - whichever comes first. - */ -static int -check_prevs_add(struct task_struct *curr, struct held_lock *next) -{ - int depth = curr->lockdep_depth; - int trylock_loop = 0; - struct held_lock *hlock; - - /* - * Debugging checks. - * - * Depth must not be zero for a non-head lock: - */ - if (!depth) - goto out_bug; - /* - * At least two relevant locks must exist for this - * to be a head: - */ - if (curr->held_locks[depth].irq_context != - curr->held_locks[depth-1].irq_context) - goto out_bug; - - for (;;) { - int distance = curr->lockdep_depth - depth + 1; - hlock = curr->held_locks + depth-1; - /* - * Only non-recursive-read entries get new dependencies - * added: - */ - if (hlock->read != 2) { - if (!check_prev_add(curr, hlock, next, - distance, trylock_loop)) - return 0; - /* - * Stop after the first non-trylock entry, - * as non-trylock entries have added their - * own direct dependencies already, so this - * lock is connected to them indirectly: - */ - if (!hlock->trylock) - break; - } - depth--; - /* - * End of lock-stack? - */ - if (!depth) - break; - /* - * Stop the search if we cross into another context: - */ - if (curr->held_locks[depth].irq_context != - curr->held_locks[depth-1].irq_context) - break; - trylock_loop = 1; - } - return 1; -out_bug: - if (!debug_locks_off_graph_unlock()) - return 0; - - /* - * Clearly we all shouldn't be here, but since we made it we - * can reliable say we messed up our state. See the above two - * gotos for reasons why we could possibly end up here. - */ - WARN_ON(1); - - return 0; -} - -unsigned long nr_lock_chains; -struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS]; -int nr_chain_hlocks; -static u16 chain_hlocks[MAX_LOCKDEP_CHAIN_HLOCKS]; - -struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i) -{ - return lock_classes + chain_hlocks[chain->base + i]; -} - -/* - * Look up a dependency chain. If the key is not present yet then - * add it and return 1 - in this case the new dependency chain is - * validated. If the key is already hashed, return 0. - * (On return with 1 graph_lock is held.) - */ -static inline int lookup_chain_cache(struct task_struct *curr, - struct held_lock *hlock, - u64 chain_key) -{ - struct lock_class *class = hlock_class(hlock); - struct list_head *hash_head = chainhashentry(chain_key); - struct lock_chain *chain; - struct held_lock *hlock_curr; - int i, j; - - /* - * We might need to take the graph lock, ensure we've got IRQs - * disabled to make this an IRQ-safe lock.. for recursion reasons - * lockdep won't complain about its own locking errors. - */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) - return 0; - /* - * We can walk it lock-free, because entries only get added - * to the hash: - */ - list_for_each_entry(chain, hash_head, entry) { - if (chain->chain_key == chain_key) { -cache_hit: - debug_atomic_inc(chain_lookup_hits); - if (very_verbose(class)) - printk("\nhash chain already cached, key: " - "%016Lx tail class: [%p] %s\n", - (unsigned long long)chain_key, - class->key, class->name); - return 0; - } - } - if (very_verbose(class)) - printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n", - (unsigned long long)chain_key, class->key, class->name); - /* - * Allocate a new chain entry from the static array, and add - * it to the hash: - */ - if (!graph_lock()) - return 0; - /* - * We have to walk the chain again locked - to avoid duplicates: - */ - list_for_each_entry(chain, hash_head, entry) { - if (chain->chain_key == chain_key) { - graph_unlock(); - goto cache_hit; - } - } - if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { - if (!debug_locks_off_graph_unlock()) - return 0; - - print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!"); - dump_stack(); - return 0; - } - chain = lock_chains + nr_lock_chains++; - chain->chain_key = chain_key; - chain->irq_context = hlock->irq_context; - /* Find the first held_lock of current chain */ - for (i = curr->lockdep_depth - 1; i >= 0; i--) { - hlock_curr = curr->held_locks + i; - if (hlock_curr->irq_context != hlock->irq_context) - break; - } - i++; - chain->depth = curr->lockdep_depth + 1 - i; - if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { - chain->base = nr_chain_hlocks; - nr_chain_hlocks += chain->depth; - for (j = 0; j < chain->depth - 1; j++, i++) { - int lock_id = curr->held_locks[i].class_idx - 1; - chain_hlocks[chain->base + j] = lock_id; - } - chain_hlocks[chain->base + j] = class - lock_classes; - } - list_add_tail_rcu(&chain->entry, hash_head); - debug_atomic_inc(chain_lookup_misses); - inc_chains(); - - return 1; -} - -static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, - struct held_lock *hlock, int chain_head, u64 chain_key) -{ - /* - * Trylock needs to maintain the stack of held locks, but it - * does not add new dependencies, because trylock can be done - * in any order. - * - * We look up the chain_key and do the O(N^2) check and update of - * the dependencies only if this is a new dependency chain. - * (If lookup_chain_cache() returns with 1 it acquires - * graph_lock for us) - */ - if (!hlock->trylock && (hlock->check == 2) && - lookup_chain_cache(curr, hlock, chain_key)) { - /* - * Check whether last held lock: - * - * - is irq-safe, if this lock is irq-unsafe - * - is softirq-safe, if this lock is hardirq-unsafe - * - * And check whether the new lock's dependency graph - * could lead back to the previous lock. - * - * any of these scenarios could lead to a deadlock. If - * All validations - */ - int ret = check_deadlock(curr, hlock, lock, hlock->read); - - if (!ret) - return 0; - /* - * Mark recursive read, as we jump over it when - * building dependencies (just like we jump over - * trylock entries): - */ - if (ret == 2) - hlock->read = 2; - /* - * Add dependency only if this lock is not the head - * of the chain, and if it's not a secondary read-lock: - */ - if (!chain_head && ret != 2) - if (!check_prevs_add(curr, hlock)) - return 0; - graph_unlock(); - } else - /* after lookup_chain_cache(): */ - if (unlikely(!debug_locks)) - return 0; - - return 1; -} -#else -static inline int validate_chain(struct task_struct *curr, - struct lockdep_map *lock, struct held_lock *hlock, - int chain_head, u64 chain_key) -{ - return 1; -} -#endif - -/* - * We are building curr_chain_key incrementally, so double-check - * it from scratch, to make sure that it's done correctly: - */ -static void check_chain_key(struct task_struct *curr) -{ -#ifdef CONFIG_DEBUG_LOCKDEP - struct held_lock *hlock, *prev_hlock = NULL; - unsigned int i, id; - u64 chain_key = 0; - - for (i = 0; i < curr->lockdep_depth; i++) { - hlock = curr->held_locks + i; - if (chain_key != hlock->prev_chain_key) { - debug_locks_off(); - /* - * We got mighty confused, our chain keys don't match - * with what we expect, someone trample on our task state? - */ - WARN(1, "hm#1, depth: %u [%u], %016Lx != %016Lx\n", - curr->lockdep_depth, i, - (unsigned long long)chain_key, - (unsigned long long)hlock->prev_chain_key); - return; - } - id = hlock->class_idx - 1; - /* - * Whoops ran out of static storage again? - */ - if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) - return; - - if (prev_hlock && (prev_hlock->irq_context != - hlock->irq_context)) - chain_key = 0; - chain_key = iterate_chain_key(chain_key, id); - prev_hlock = hlock; - } - if (chain_key != curr->curr_chain_key) { - debug_locks_off(); - /* - * More smoking hash instead of calculating it, damn see these - * numbers float.. I bet that a pink elephant stepped on my memory. - */ - WARN(1, "hm#2, depth: %u [%u], %016Lx != %016Lx\n", - curr->lockdep_depth, i, - (unsigned long long)chain_key, - (unsigned long long)curr->curr_chain_key); - } -#endif -} - -static void -print_usage_bug_scenario(struct held_lock *lock) -{ - struct lock_class *class = hlock_class(lock); - - printk(" Possible unsafe locking scenario:\n\n"); - printk(" CPU0\n"); - printk(" ----\n"); - printk(" lock("); - __print_lock_name(class); - printk(");\n"); - printk(" \n"); - printk(" lock("); - __print_lock_name(class); - printk(");\n"); - printk("\n *** DEADLOCK ***\n\n"); -} - -static int -print_usage_bug(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) -{ - if (!debug_locks_off_graph_unlock() || debug_locks_silent) - return 0; - - printk("\n"); - printk("=================================\n"); - printk("[ INFO: inconsistent lock state ]\n"); - print_kernel_ident(); - printk("---------------------------------\n"); - - printk("inconsistent {%s} -> {%s} usage.\n", - usage_str[prev_bit], usage_str[new_bit]); - - printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", - curr->comm, task_pid_nr(curr), - trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, - trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, - trace_hardirqs_enabled(curr), - trace_softirqs_enabled(curr)); - print_lock(this); - - printk("{%s} state was registered at:\n", usage_str[prev_bit]); - print_stack_trace(hlock_class(this)->usage_traces + prev_bit, 1); - - print_irqtrace_events(curr); - printk("\nother info that might help us debug this:\n"); - print_usage_bug_scenario(this); - - lockdep_print_held_locks(curr); - - printk("\nstack backtrace:\n"); - dump_stack(); - - return 0; -} - -/* - * Print out an error if an invalid bit is set: - */ -static inline int -valid_state(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) -{ - if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit))) - return print_usage_bug(curr, this, bad_bit, new_bit); - return 1; -} - -static int mark_lock(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit new_bit); - -#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) - -/* - * print irq inversion bug: - */ -static int -print_irq_inversion_bug(struct task_struct *curr, - struct lock_list *root, struct lock_list *other, - struct held_lock *this, int forwards, - const char *irqclass) -{ - struct lock_list *entry = other; - struct lock_list *middle = NULL; - int depth; - - if (!debug_locks_off_graph_unlock() || debug_locks_silent) - return 0; - - printk("\n"); - printk("=========================================================\n"); - printk("[ INFO: possible irq lock inversion dependency detected ]\n"); - print_kernel_ident(); - printk("---------------------------------------------------------\n"); - printk("%s/%d just changed the state of lock:\n", - curr->comm, task_pid_nr(curr)); - print_lock(this); - if (forwards) - printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass); - else - printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass); - print_lock_name(other->class); - printk("\n\nand interrupts could create inverse lock ordering between them.\n\n"); - - printk("\nother info that might help us debug this:\n"); - - /* Find a middle lock (if one exists) */ - depth = get_lock_depth(other); - do { - if (depth == 0 && (entry != root)) { - printk("lockdep:%s bad path found in chain graph\n", __func__); - break; - } - middle = entry; - entry = get_lock_parent(entry); - depth--; - } while (entry && entry != root && (depth >= 0)); - if (forwards) - print_irq_lock_scenario(root, other, - middle ? middle->class : root->class, other->class); - else - print_irq_lock_scenario(other, root, - middle ? middle->class : other->class, root->class); - - lockdep_print_held_locks(curr); - - printk("\nthe shortest dependencies between 2nd lock and 1st lock:\n"); - if (!save_trace(&root->trace)) - return 0; - print_shortest_lock_dependencies(other, root); - - printk("\nstack backtrace:\n"); - dump_stack(); - - return 0; -} - -/* - * Prove that in the forwards-direction subgraph starting at - * there is no lock matching : - */ -static int -check_usage_forwards(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit bit, const char *irqclass) -{ - int ret; - struct lock_list root; - struct lock_list *uninitialized_var(target_entry); - - root.parent = NULL; - root.class = hlock_class(this); - ret = find_usage_forwards(&root, bit, &target_entry); - if (ret < 0) - return print_bfs_bug(ret); - if (ret == 1) - return ret; - - return print_irq_inversion_bug(curr, &root, target_entry, - this, 1, irqclass); -} - -/* - * Prove that in the backwards-direction subgraph starting at - * there is no lock matching : - */ -static int -check_usage_backwards(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit bit, const char *irqclass) -{ - int ret; - struct lock_list root; - struct lock_list *uninitialized_var(target_entry); - - root.parent = NULL; - root.class = hlock_class(this); - ret = find_usage_backwards(&root, bit, &target_entry); - if (ret < 0) - return print_bfs_bug(ret); - if (ret == 1) - return ret; - - return print_irq_inversion_bug(curr, &root, target_entry, - this, 0, irqclass); -} - -void print_irqtrace_events(struct task_struct *curr) -{ - printk("irq event stamp: %u\n", curr->irq_events); - printk("hardirqs last enabled at (%u): ", curr->hardirq_enable_event); - print_ip_sym(curr->hardirq_enable_ip); - printk("hardirqs last disabled at (%u): ", curr->hardirq_disable_event); - print_ip_sym(curr->hardirq_disable_ip); - printk("softirqs last enabled at (%u): ", curr->softirq_enable_event); - print_ip_sym(curr->softirq_enable_ip); - printk("softirqs last disabled at (%u): ", curr->softirq_disable_event); - print_ip_sym(curr->softirq_disable_ip); -} - -static int HARDIRQ_verbose(struct lock_class *class) -{ -#if HARDIRQ_VERBOSE - return class_filter(class); -#endif - return 0; -} - -static int SOFTIRQ_verbose(struct lock_class *class) -{ -#if SOFTIRQ_VERBOSE - return class_filter(class); -#endif - return 0; -} - -static int RECLAIM_FS_verbose(struct lock_class *class) -{ -#if RECLAIM_VERBOSE - return class_filter(class); -#endif - return 0; -} - -#define STRICT_READ_CHECKS 1 - -static int (*state_verbose_f[])(struct lock_class *class) = { -#define LOCKDEP_STATE(__STATE) \ - __STATE##_verbose, -#include "lockdep_states.h" -#undef LOCKDEP_STATE -}; - -static inline int state_verbose(enum lock_usage_bit bit, - struct lock_class *class) -{ - return state_verbose_f[bit >> 2](class); -} - -typedef int (*check_usage_f)(struct task_struct *, struct held_lock *, - enum lock_usage_bit bit, const char *name); - -static int -mark_lock_irq(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit new_bit) -{ - int excl_bit = exclusive_bit(new_bit); - int read = new_bit & 1; - int dir = new_bit & 2; - - /* - * mark USED_IN has to look forwards -- to ensure no dependency - * has ENABLED state, which would allow recursion deadlocks. - * - * mark ENABLED has to look backwards -- to ensure no dependee - * has USED_IN state, which, again, would allow recursion deadlocks. - */ - check_usage_f usage = dir ? - check_usage_backwards : check_usage_forwards; - - /* - * Validate that this particular lock does not have conflicting - * usage states. - */ - if (!valid_state(curr, this, new_bit, excl_bit)) - return 0; - - /* - * Validate that the lock dependencies don't have conflicting usage - * states. - */ - if ((!read || !dir || STRICT_READ_CHECKS) && - !usage(curr, this, excl_bit, state_name(new_bit & ~1))) - return 0; - - /* - * Check for read in write conflicts - */ - if (!read) { - if (!valid_state(curr, this, new_bit, excl_bit + 1)) - return 0; - - if (STRICT_READ_CHECKS && - !usage(curr, this, excl_bit + 1, - state_name(new_bit + 1))) - return 0; - } - - if (state_verbose(new_bit, hlock_class(this))) - return 2; - - return 1; -} - -enum mark_type { -#define LOCKDEP_STATE(__STATE) __STATE, -#include "lockdep_states.h" -#undef LOCKDEP_STATE -}; - -/* - * Mark all held locks with a usage bit: - */ -static int -mark_held_locks(struct task_struct *curr, enum mark_type mark) -{ - enum lock_usage_bit usage_bit; - struct held_lock *hlock; - int i; - - for (i = 0; i < curr->lockdep_depth; i++) { - hlock = curr->held_locks + i; - - usage_bit = 2 + (mark << 2); /* ENABLED */ - if (hlock->read) - usage_bit += 1; /* READ */ - - BUG_ON(usage_bit >= LOCK_USAGE_STATES); - - if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys) - continue; - - if (!mark_lock(curr, hlock, usage_bit)) - return 0; - } - - return 1; -} - -/* - * Hardirqs will be enabled: - */ -static void __trace_hardirqs_on_caller(unsigned long ip) -{ - struct task_struct *curr = current; - - /* we'll do an OFF -> ON transition: */ - curr->hardirqs_enabled = 1; - - /* - * We are going to turn hardirqs on, so set the - * usage bit for all held locks: - */ - if (!mark_held_locks(curr, HARDIRQ)) - return; - /* - * If we have softirqs enabled, then set the usage - * bit for all held locks. (disabled hardirqs prevented - * this bit from being set before) - */ - if (curr->softirqs_enabled) - if (!mark_held_locks(curr, SOFTIRQ)) - return; - - curr->hardirq_enable_ip = ip; - curr->hardirq_enable_event = ++curr->irq_events; - debug_atomic_inc(hardirqs_on_events); -} - -void trace_hardirqs_on_caller(unsigned long ip) -{ - time_hardirqs_on(CALLER_ADDR0, ip); - - if (unlikely(!debug_locks || current->lockdep_recursion)) - return; - - if (unlikely(current->hardirqs_enabled)) { - /* - * Neither irq nor preemption are disabled here - * so this is racy by nature but losing one hit - * in a stat is not a big deal. - */ - __debug_atomic_inc(redundant_hardirqs_on); - return; - } - - /* - * We're enabling irqs and according to our state above irqs weren't - * already enabled, yet we find the hardware thinks they are in fact - * enabled.. someone messed up their IRQ state tracing. - */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) - return; - - /* - * See the fine text that goes along with this variable definition. - */ - if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) - return; - - /* - * Can't allow enabling interrupts while in an interrupt handler, - * that's general bad form and such. Recursion, limited stack etc.. - */ - if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) - return; - - current->lockdep_recursion = 1; - __trace_hardirqs_on_caller(ip); - current->lockdep_recursion = 0; -} -EXPORT_SYMBOL(trace_hardirqs_on_caller); - -void trace_hardirqs_on(void) -{ - trace_hardirqs_on_caller(CALLER_ADDR0); -} -EXPORT_SYMBOL(trace_hardirqs_on); - -/* - * Hardirqs were disabled: - */ -void trace_hardirqs_off_caller(unsigned long ip) -{ - struct task_struct *curr = current; - - time_hardirqs_off(CALLER_ADDR0, ip); - - if (unlikely(!debug_locks || current->lockdep_recursion)) - return; - - /* - * So we're supposed to get called after you mask local IRQs, but for - * some reason the hardware doesn't quite think you did a proper job. - */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) - return; - - if (curr->hardirqs_enabled) { - /* - * We have done an ON -> OFF transition: - */ - curr->hardirqs_enabled = 0; - curr->hardirq_disable_ip = ip; - curr->hardirq_disable_event = ++curr->irq_events; - debug_atomic_inc(hardirqs_off_events); - } else - debug_atomic_inc(redundant_hardirqs_off); -} -EXPORT_SYMBOL(trace_hardirqs_off_caller); - -void trace_hardirqs_off(void) -{ - trace_hardirqs_off_caller(CALLER_ADDR0); -} -EXPORT_SYMBOL(trace_hardirqs_off); - -/* - * Softirqs will be enabled: - */ -void trace_softirqs_on(unsigned long ip) -{ - struct task_struct *curr = current; - - if (unlikely(!debug_locks || current->lockdep_recursion)) - return; - - /* - * We fancy IRQs being disabled here, see softirq.c, avoids - * funny state and nesting things. - */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) - return; - - if (curr->softirqs_enabled) { - debug_atomic_inc(redundant_softirqs_on); - return; - } - - current->lockdep_recursion = 1; - /* - * We'll do an OFF -> ON transition: - */ - curr->softirqs_enabled = 1; - curr->softirq_enable_ip = ip; - curr->softirq_enable_event = ++curr->irq_events; - debug_atomic_inc(softirqs_on_events); - /* - * We are going to turn softirqs on, so set the - * usage bit for all held locks, if hardirqs are - * enabled too: - */ - if (curr->hardirqs_enabled) - mark_held_locks(curr, SOFTIRQ); - current->lockdep_recursion = 0; -} - -/* - * Softirqs were disabled: - */ -void trace_softirqs_off(unsigned long ip) -{ - struct task_struct *curr = current; - - if (unlikely(!debug_locks || current->lockdep_recursion)) - return; - - /* - * We fancy IRQs being disabled here, see softirq.c - */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) - return; - - if (curr->softirqs_enabled) { - /* - * We have done an ON -> OFF transition: - */ - curr->softirqs_enabled = 0; - curr->softirq_disable_ip = ip; - curr->softirq_disable_event = ++curr->irq_events; - debug_atomic_inc(softirqs_off_events); - /* - * Whoops, we wanted softirqs off, so why aren't they? - */ - DEBUG_LOCKS_WARN_ON(!softirq_count()); - } else - debug_atomic_inc(redundant_softirqs_off); -} - -static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) -{ - struct task_struct *curr = current; - - if (unlikely(!debug_locks)) - return; - - /* no reclaim without waiting on it */ - if (!(gfp_mask & __GFP_WAIT)) - return; - - /* this guy won't enter reclaim */ - if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC)) - return; - - /* We're only interested __GFP_FS allocations for now */ - if (!(gfp_mask & __GFP_FS)) - return; - - /* - * Oi! Can't be having __GFP_FS allocations with IRQs disabled. - */ - if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))) - return; - - mark_held_locks(curr, RECLAIM_FS); -} - -static void check_flags(unsigned long flags); - -void lockdep_trace_alloc(gfp_t gfp_mask) -{ - unsigned long flags; - - if (unlikely(current->lockdep_recursion)) - return; - - raw_local_irq_save(flags); - check_flags(flags); - current->lockdep_recursion = 1; - __lockdep_trace_alloc(gfp_mask, flags); - current->lockdep_recursion = 0; - raw_local_irq_restore(flags); -} - -static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) -{ - /* - * If non-trylock use in a hardirq or softirq context, then - * mark the lock as used in these contexts: - */ - if (!hlock->trylock) { - if (hlock->read) { - if (curr->hardirq_context) - if (!mark_lock(curr, hlock, - LOCK_USED_IN_HARDIRQ_READ)) - return 0; - if (curr->softirq_context) - if (!mark_lock(curr, hlock, - LOCK_USED_IN_SOFTIRQ_READ)) - return 0; - } else { - if (curr->hardirq_context) - if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ)) - return 0; - if (curr->softirq_context) - if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ)) - return 0; - } - } - if (!hlock->hardirqs_off) { - if (hlock->read) { - if (!mark_lock(curr, hlock, - LOCK_ENABLED_HARDIRQ_READ)) - return 0; - if (curr->softirqs_enabled) - if (!mark_lock(curr, hlock, - LOCK_ENABLED_SOFTIRQ_READ)) - return 0; - } else { - if (!mark_lock(curr, hlock, - LOCK_ENABLED_HARDIRQ)) - return 0; - if (curr->softirqs_enabled) - if (!mark_lock(curr, hlock, - LOCK_ENABLED_SOFTIRQ)) - return 0; - } - } - - /* - * We reuse the irq context infrastructure more broadly as a general - * context checking code. This tests GFP_FS recursion (a lock taken - * during reclaim for a GFP_FS allocation is held over a GFP_FS - * allocation). - */ - if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) { - if (hlock->read) { - if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ)) - return 0; - } else { - if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS)) - return 0; - } - } - - return 1; -} - -static int separate_irq_context(struct task_struct *curr, - struct held_lock *hlock) -{ - unsigned int depth = curr->lockdep_depth; - - /* - * Keep track of points where we cross into an interrupt context: - */ - hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) + - curr->softirq_context; - if (depth) { - struct held_lock *prev_hlock; - - prev_hlock = curr->held_locks + depth-1; - /* - * If we cross into another context, reset the - * hash key (this also prevents the checking and the - * adding of the dependency to 'prev'): - */ - if (prev_hlock->irq_context != hlock->irq_context) - return 1; - } - return 0; -} - -#else /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ - -static inline -int mark_lock_irq(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit new_bit) -{ - WARN_ON(1); /* Impossible innit? when we don't have TRACE_IRQFLAG */ - return 1; -} - -static inline int mark_irqflags(struct task_struct *curr, - struct held_lock *hlock) -{ - return 1; -} - -static inline int separate_irq_context(struct task_struct *curr, - struct held_lock *hlock) -{ - return 0; -} - -void lockdep_trace_alloc(gfp_t gfp_mask) -{ -} - -#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ - -/* - * Mark a lock with a usage bit, and validate the state transition: - */ -static int mark_lock(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit new_bit) -{ - unsigned int new_mask = 1 << new_bit, ret = 1; - - /* - * If already set then do not dirty the cacheline, - * nor do any checks: - */ - if (likely(hlock_class(this)->usage_mask & new_mask)) - return 1; - - if (!graph_lock()) - return 0; - /* - * Make sure we didn't race: - */ - if (unlikely(hlock_class(this)->usage_mask & new_mask)) { - graph_unlock(); - return 1; - } - - hlock_class(this)->usage_mask |= new_mask; - - if (!save_trace(hlock_class(this)->usage_traces + new_bit)) - return 0; - - switch (new_bit) { -#define LOCKDEP_STATE(__STATE) \ - case LOCK_USED_IN_##__STATE: \ - case LOCK_USED_IN_##__STATE##_READ: \ - case LOCK_ENABLED_##__STATE: \ - case LOCK_ENABLED_##__STATE##_READ: -#include "lockdep_states.h" -#undef LOCKDEP_STATE - ret = mark_lock_irq(curr, this, new_bit); - if (!ret) - return 0; - break; - case LOCK_USED: - debug_atomic_dec(nr_unused_locks); - break; - default: - if (!debug_locks_off_graph_unlock()) - return 0; - WARN_ON(1); - return 0; - } - - graph_unlock(); - - /* - * We must printk outside of the graph_lock: - */ - if (ret == 2) { - printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); - print_lock(this); - print_irqtrace_events(curr); - dump_stack(); - } - - return ret; -} - -/* - * Initialize a lock instance's lock-class mapping info: - */ -void lockdep_init_map(struct lockdep_map *lock, const char *name, - struct lock_class_key *key, int subclass) -{ - int i; - - kmemcheck_mark_initialized(lock, sizeof(*lock)); - - for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) - lock->class_cache[i] = NULL; - -#ifdef CONFIG_LOCK_STAT - lock->cpu = raw_smp_processor_id(); -#endif - - /* - * Can't be having no nameless bastards around this place! - */ - if (DEBUG_LOCKS_WARN_ON(!name)) { - lock->name = "NULL"; - return; - } - - lock->name = name; - - /* - * No key, no joy, we need to hash something. - */ - if (DEBUG_LOCKS_WARN_ON(!key)) - return; - /* - * Sanity check, the lock-class key must be persistent: - */ - if (!static_obj(key)) { - printk("BUG: key %p not in .data!\n", key); - /* - * What it says above ^^^^^, I suggest you read it. - */ - DEBUG_LOCKS_WARN_ON(1); - return; - } - lock->key = key; - - if (unlikely(!debug_locks)) - return; - - if (subclass) - register_lock_class(lock, subclass, 1); -} -EXPORT_SYMBOL_GPL(lockdep_init_map); - -struct lock_class_key __lockdep_no_validate__; -EXPORT_SYMBOL_GPL(__lockdep_no_validate__); - -static int -print_lock_nested_lock_not_held(struct task_struct *curr, - struct held_lock *hlock, - unsigned long ip) -{ - if (!debug_locks_off()) - return 0; - if (debug_locks_silent) - return 0; - - printk("\n"); - printk("==================================\n"); - printk("[ BUG: Nested lock was not taken ]\n"); - print_kernel_ident(); - printk("----------------------------------\n"); - - printk("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr)); - print_lock(hlock); - - printk("\nbut this task is not holding:\n"); - printk("%s\n", hlock->nest_lock->name); - - printk("\nstack backtrace:\n"); - dump_stack(); - - printk("\nother info that might help us debug this:\n"); - lockdep_print_held_locks(curr); - - printk("\nstack backtrace:\n"); - dump_stack(); - - return 0; -} - -static int __lock_is_held(struct lockdep_map *lock); - -/* - * This gets called for every mutex_lock*()/spin_lock*() operation. - * We maintain the dependency maps and validate the locking attempt: - */ -static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, int hardirqs_off, - struct lockdep_map *nest_lock, unsigned long ip, - int references) -{ - struct task_struct *curr = current; - struct lock_class *class = NULL; - struct held_lock *hlock; - unsigned int depth, id; - int chain_head = 0; - int class_idx; - u64 chain_key; - - if (!prove_locking) - check = 1; - - if (unlikely(!debug_locks)) - return 0; - - /* - * Lockdep should run with IRQs disabled, otherwise we could - * get an interrupt which would want to take locks, which would - * end up in lockdep and have you got a head-ache already? - */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) - return 0; - - if (lock->key == &__lockdep_no_validate__) - check = 1; - - if (subclass < NR_LOCKDEP_CACHING_CLASSES) - class = lock->class_cache[subclass]; - /* - * Not cached? - */ - if (unlikely(!class)) { - class = register_lock_class(lock, subclass, 0); - if (!class) - return 0; - } - atomic_inc((atomic_t *)&class->ops); - if (very_verbose(class)) { - printk("\nacquire class [%p] %s", class->key, class->name); - if (class->name_version > 1) - printk("#%d", class->name_version); - printk("\n"); - dump_stack(); - } - - /* - * Add the lock to the list of currently held locks. - * (we dont increase the depth just yet, up until the - * dependency checks are done) - */ - depth = curr->lockdep_depth; - /* - * Ran out of static storage for our per-task lock stack again have we? - */ - if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH)) - return 0; - - class_idx = class - lock_classes + 1; - - if (depth) { - hlock = curr->held_locks + depth - 1; - if (hlock->class_idx == class_idx && nest_lock) { - if (hlock->references) - hlock->references++; - else - hlock->references = 2; - - return 1; - } - } - - hlock = curr->held_locks + depth; - /* - * Plain impossible, we just registered it and checked it weren't no - * NULL like.. I bet this mushroom I ate was good! - */ - if (DEBUG_LOCKS_WARN_ON(!class)) - return 0; - hlock->class_idx = class_idx; - hlock->acquire_ip = ip; - hlock->instance = lock; - hlock->nest_lock = nest_lock; - hlock->trylock = trylock; - hlock->read = read; - hlock->check = check; - hlock->hardirqs_off = !!hardirqs_off; - hlock->references = references; -#ifdef CONFIG_LOCK_STAT - hlock->waittime_stamp = 0; - hlock->holdtime_stamp = lockstat_clock(); -#endif - - if (check == 2 && !mark_irqflags(curr, hlock)) - return 0; - - /* mark it as used: */ - if (!mark_lock(curr, hlock, LOCK_USED)) - return 0; - - /* - * Calculate the chain hash: it's the combined hash of all the - * lock keys along the dependency chain. We save the hash value - * at every step so that we can get the current hash easily - * after unlock. The chain hash is then used to cache dependency - * results. - * - * The 'key ID' is what is the most compact key value to drive - * the hash, not class->key. - */ - id = class - lock_classes; - /* - * Whoops, we did it again.. ran straight out of our static allocation. - */ - if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) - return 0; - - chain_key = curr->curr_chain_key; - if (!depth) { - /* - * How can we have a chain hash when we ain't got no keys?! - */ - if (DEBUG_LOCKS_WARN_ON(chain_key != 0)) - return 0; - chain_head = 1; - } - - hlock->prev_chain_key = chain_key; - if (separate_irq_context(curr, hlock)) { - chain_key = 0; - chain_head = 1; - } - chain_key = iterate_chain_key(chain_key, id); - - if (nest_lock && !__lock_is_held(nest_lock)) - return print_lock_nested_lock_not_held(curr, hlock, ip); - - if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) - return 0; - - curr->curr_chain_key = chain_key; - curr->lockdep_depth++; - check_chain_key(curr); -#ifdef CONFIG_DEBUG_LOCKDEP - if (unlikely(!debug_locks)) - return 0; -#endif - if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { - debug_locks_off(); - print_lockdep_off("BUG: MAX_LOCK_DEPTH too low!"); - printk(KERN_DEBUG "depth: %i max: %lu!\n", - curr->lockdep_depth, MAX_LOCK_DEPTH); - - lockdep_print_held_locks(current); - debug_show_all_locks(); - dump_stack(); - - return 0; - } - - if (unlikely(curr->lockdep_depth > max_lockdep_depth)) - max_lockdep_depth = curr->lockdep_depth; - - return 1; -} - -static int -print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock, - unsigned long ip) -{ - if (!debug_locks_off()) - return 0; - if (debug_locks_silent) - return 0; - - printk("\n"); - printk("=====================================\n"); - printk("[ BUG: bad unlock balance detected! ]\n"); - print_kernel_ident(); - printk("-------------------------------------\n"); - printk("%s/%d is trying to release lock (", - curr->comm, task_pid_nr(curr)); - print_lockdep_cache(lock); - printk(") at:\n"); - print_ip_sym(ip); - printk("but there are no more locks to release!\n"); - printk("\nother info that might help us debug this:\n"); - lockdep_print_held_locks(curr); - - printk("\nstack backtrace:\n"); - dump_stack(); - - return 0; -} - -/* - * Common debugging checks for both nested and non-nested unlock: - */ -static int check_unlock(struct task_struct *curr, struct lockdep_map *lock, - unsigned long ip) -{ - if (unlikely(!debug_locks)) - return 0; - /* - * Lockdep should run with IRQs disabled, recursion, head-ache, etc.. - */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) - return 0; - - if (curr->lockdep_depth <= 0) - return print_unlock_imbalance_bug(curr, lock, ip); - - return 1; -} - -static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) -{ - if (hlock->instance == lock) - return 1; - - if (hlock->references) { - struct lock_class *class = lock->class_cache[0]; - - if (!class) - class = look_up_lock_class(lock, 0); - - /* - * If look_up_lock_class() failed to find a class, we're trying - * to test if we hold a lock that has never yet been acquired. - * Clearly if the lock hasn't been acquired _ever_, we're not - * holding it either, so report failure. - */ - if (!class) - return 0; - - /* - * References, but not a lock we're actually ref-counting? - * State got messed up, follow the sites that change ->references - * and try to make sense of it. - */ - if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock)) - return 0; - - if (hlock->class_idx == class - lock_classes + 1) - return 1; - } - - return 0; -} - -static int -__lock_set_class(struct lockdep_map *lock, const char *name, - struct lock_class_key *key, unsigned int subclass, - unsigned long ip) -{ - struct task_struct *curr = current; - struct held_lock *hlock, *prev_hlock; - struct lock_class *class; - unsigned int depth; - int i; - - depth = curr->lockdep_depth; - /* - * This function is about (re)setting the class of a held lock, - * yet we're not actually holding any locks. Naughty user! - */ - if (DEBUG_LOCKS_WARN_ON(!depth)) - return 0; - - prev_hlock = NULL; - for (i = depth-1; i >= 0; i--) { - hlock = curr->held_locks + i; - /* - * We must not cross into another context: - */ - if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) - break; - if (match_held_lock(hlock, lock)) - goto found_it; - prev_hlock = hlock; - } - return print_unlock_imbalance_bug(curr, lock, ip); - -found_it: - lockdep_init_map(lock, name, key, 0); - class = register_lock_class(lock, subclass, 0); - hlock->class_idx = class - lock_classes + 1; - - curr->lockdep_depth = i; - curr->curr_chain_key = hlock->prev_chain_key; - - for (; i < depth; i++) { - hlock = curr->held_locks + i; - if (!__lock_acquire(hlock->instance, - hlock_class(hlock)->subclass, hlock->trylock, - hlock->read, hlock->check, hlock->hardirqs_off, - hlock->nest_lock, hlock->acquire_ip, - hlock->references)) - return 0; - } - - /* - * I took it apart and put it back together again, except now I have - * these 'spare' parts.. where shall I put them. - */ - if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth)) - return 0; - return 1; -} - -/* - * Remove the lock to the list of currently held locks in a - * potentially non-nested (out of order) manner. This is a - * relatively rare operation, as all the unlock APIs default - * to nested mode (which uses lock_release()): - */ -static int -lock_release_non_nested(struct task_struct *curr, - struct lockdep_map *lock, unsigned long ip) -{ - struct held_lock *hlock, *prev_hlock; - unsigned int depth; - int i; - - /* - * Check whether the lock exists in the current stack - * of held locks: - */ - depth = curr->lockdep_depth; - /* - * So we're all set to release this lock.. wait what lock? We don't - * own any locks, you've been drinking again? - */ - if (DEBUG_LOCKS_WARN_ON(!depth)) - return 0; - - prev_hlock = NULL; - for (i = depth-1; i >= 0; i--) { - hlock = curr->held_locks + i; - /* - * We must not cross into another context: - */ - if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) - break; - if (match_held_lock(hlock, lock)) - goto found_it; - prev_hlock = hlock; - } - return print_unlock_imbalance_bug(curr, lock, ip); - -found_it: - if (hlock->instance == lock) - lock_release_holdtime(hlock); - - if (hlock->references) { - hlock->references--; - if (hlock->references) { - /* - * We had, and after removing one, still have - * references, the current lock stack is still - * valid. We're done! - */ - return 1; - } - } - - /* - * We have the right lock to unlock, 'hlock' points to it. - * Now we remove it from the stack, and add back the other - * entries (if any), recalculating the hash along the way: - */ - - curr->lockdep_depth = i; - curr->curr_chain_key = hlock->prev_chain_key; - - for (i++; i < depth; i++) { - hlock = curr->held_locks + i; - if (!__lock_acquire(hlock->instance, - hlock_class(hlock)->subclass, hlock->trylock, - hlock->read, hlock->check, hlock->hardirqs_off, - hlock->nest_lock, hlock->acquire_ip, - hlock->references)) - return 0; - } - - /* - * We had N bottles of beer on the wall, we drank one, but now - * there's not N-1 bottles of beer left on the wall... - */ - if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1)) - return 0; - return 1; -} - -/* - * Remove the lock to the list of currently held locks - this gets - * called on mutex_unlock()/spin_unlock*() (or on a failed - * mutex_lock_interruptible()). This is done for unlocks that nest - * perfectly. (i.e. the current top of the lock-stack is unlocked) - */ -static int lock_release_nested(struct task_struct *curr, - struct lockdep_map *lock, unsigned long ip) -{ - struct held_lock *hlock; - unsigned int depth; - - /* - * Pop off the top of the lock stack: - */ - depth = curr->lockdep_depth - 1; - hlock = curr->held_locks + depth; - - /* - * Is the unlock non-nested: - */ - if (hlock->instance != lock || hlock->references) - return lock_release_non_nested(curr, lock, ip); - curr->lockdep_depth--; - - /* - * No more locks, but somehow we've got hash left over, who left it? - */ - if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0))) - return 0; - - curr->curr_chain_key = hlock->prev_chain_key; - - lock_release_holdtime(hlock); - -#ifdef CONFIG_DEBUG_LOCKDEP - hlock->prev_chain_key = 0; - hlock->class_idx = 0; - hlock->acquire_ip = 0; - hlock->irq_context = 0; -#endif - return 1; -} - -/* - * Remove the lock to the list of currently held locks - this gets - * called on mutex_unlock()/spin_unlock*() (or on a failed - * mutex_lock_interruptible()). This is done for unlocks that nest - * perfectly. (i.e. the current top of the lock-stack is unlocked) - */ -static void -__lock_release(struct lockdep_map *lock, int nested, unsigned long ip) -{ - struct task_struct *curr = current; - - if (!check_unlock(curr, lock, ip)) - return; - - if (nested) { - if (!lock_release_nested(curr, lock, ip)) - return; - } else { - if (!lock_release_non_nested(curr, lock, ip)) - return; - } - - check_chain_key(curr); -} - -static int __lock_is_held(struct lockdep_map *lock) -{ - struct task_struct *curr = current; - int i; - - for (i = 0; i < curr->lockdep_depth; i++) { - struct held_lock *hlock = curr->held_locks + i; - - if (match_held_lock(hlock, lock)) - return 1; - } - - return 0; -} - -/* - * Check whether we follow the irq-flags state precisely: - */ -static void check_flags(unsigned long flags) -{ -#if defined(CONFIG_PROVE_LOCKING) && defined(CONFIG_DEBUG_LOCKDEP) && \ - defined(CONFIG_TRACE_IRQFLAGS) - if (!debug_locks) - return; - - if (irqs_disabled_flags(flags)) { - if (DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled)) { - printk("possible reason: unannotated irqs-off.\n"); - } - } else { - if (DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled)) { - printk("possible reason: unannotated irqs-on.\n"); - } - } - - /* - * We dont accurately track softirq state in e.g. - * hardirq contexts (such as on 4KSTACKS), so only - * check if not in hardirq contexts: - */ - if (!hardirq_count()) { - if (softirq_count()) { - /* like the above, but with softirqs */ - DEBUG_LOCKS_WARN_ON(current->softirqs_enabled); - } else { - /* lick the above, does it taste good? */ - DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); - } - } - - if (!debug_locks) - print_irqtrace_events(current); -#endif -} - -void lock_set_class(struct lockdep_map *lock, const char *name, - struct lock_class_key *key, unsigned int subclass, - unsigned long ip) -{ - unsigned long flags; - - if (unlikely(current->lockdep_recursion)) - return; - - raw_local_irq_save(flags); - current->lockdep_recursion = 1; - check_flags(flags); - if (__lock_set_class(lock, name, key, subclass, ip)) - check_chain_key(current); - current->lockdep_recursion = 0; - raw_local_irq_restore(flags); -} -EXPORT_SYMBOL_GPL(lock_set_class); - -/* - * We are not always called with irqs disabled - do that here, - * and also avoid lockdep recursion: - */ -void lock_acquire(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *nest_lock, unsigned long ip) -{ - unsigned long flags; - - if (unlikely(current->lockdep_recursion)) - return; - - raw_local_irq_save(flags); - check_flags(flags); - - current->lockdep_recursion = 1; - trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); - __lock_acquire(lock, subclass, trylock, read, check, - irqs_disabled_flags(flags), nest_lock, ip, 0); - current->lockdep_recursion = 0; - raw_local_irq_restore(flags); -} -EXPORT_SYMBOL_GPL(lock_acquire); - -void lock_release(struct lockdep_map *lock, int nested, - unsigned long ip) -{ - unsigned long flags; - - if (unlikely(current->lockdep_recursion)) - return; - - raw_local_irq_save(flags); - check_flags(flags); - current->lockdep_recursion = 1; - trace_lock_release(lock, ip); - __lock_release(lock, nested, ip); - current->lockdep_recursion = 0; - raw_local_irq_restore(flags); -} -EXPORT_SYMBOL_GPL(lock_release); - -int lock_is_held(struct lockdep_map *lock) -{ - unsigned long flags; - int ret = 0; - - if (unlikely(current->lockdep_recursion)) - return 1; /* avoid false negative lockdep_assert_held() */ - - raw_local_irq_save(flags); - check_flags(flags); - - current->lockdep_recursion = 1; - ret = __lock_is_held(lock); - current->lockdep_recursion = 0; - raw_local_irq_restore(flags); - - return ret; -} -EXPORT_SYMBOL_GPL(lock_is_held); - -void lockdep_set_current_reclaim_state(gfp_t gfp_mask) -{ - current->lockdep_reclaim_gfp = gfp_mask; -} - -void lockdep_clear_current_reclaim_state(void) -{ - current->lockdep_reclaim_gfp = 0; -} - -#ifdef CONFIG_LOCK_STAT -static int -print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, - unsigned long ip) -{ - if (!debug_locks_off()) - return 0; - if (debug_locks_silent) - return 0; - - printk("\n"); - printk("=================================\n"); - printk("[ BUG: bad contention detected! ]\n"); - print_kernel_ident(); - printk("---------------------------------\n"); - printk("%s/%d is trying to contend lock (", - curr->comm, task_pid_nr(curr)); - print_lockdep_cache(lock); - printk(") at:\n"); - print_ip_sym(ip); - printk("but there are no locks held!\n"); - printk("\nother info that might help us debug this:\n"); - lockdep_print_held_locks(curr); - - printk("\nstack backtrace:\n"); - dump_stack(); - - return 0; -} - -static void -__lock_contended(struct lockdep_map *lock, unsigned long ip) -{ - struct task_struct *curr = current; - struct held_lock *hlock, *prev_hlock; - struct lock_class_stats *stats; - unsigned int depth; - int i, contention_point, contending_point; - - depth = curr->lockdep_depth; - /* - * Whee, we contended on this lock, except it seems we're not - * actually trying to acquire anything much at all.. - */ - if (DEBUG_LOCKS_WARN_ON(!depth)) - return; - - prev_hlock = NULL; - for (i = depth-1; i >= 0; i--) { - hlock = curr->held_locks + i; - /* - * We must not cross into another context: - */ - if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) - break; - if (match_held_lock(hlock, lock)) - goto found_it; - prev_hlock = hlock; - } - print_lock_contention_bug(curr, lock, ip); - return; - -found_it: - if (hlock->instance != lock) - return; - - hlock->waittime_stamp = lockstat_clock(); - - contention_point = lock_point(hlock_class(hlock)->contention_point, ip); - contending_point = lock_point(hlock_class(hlock)->contending_point, - lock->ip); - - stats = get_lock_stats(hlock_class(hlock)); - if (contention_point < LOCKSTAT_POINTS) - stats->contention_point[contention_point]++; - if (contending_point < LOCKSTAT_POINTS) - stats->contending_point[contending_point]++; - if (lock->cpu != smp_processor_id()) - stats->bounces[bounce_contended + !!hlock->read]++; - put_lock_stats(stats); -} - -static void -__lock_acquired(struct lockdep_map *lock, unsigned long ip) -{ - struct task_struct *curr = current; - struct held_lock *hlock, *prev_hlock; - struct lock_class_stats *stats; - unsigned int depth; - u64 now, waittime = 0; - int i, cpu; - - depth = curr->lockdep_depth; - /* - * Yay, we acquired ownership of this lock we didn't try to - * acquire, how the heck did that happen? - */ - if (DEBUG_LOCKS_WARN_ON(!depth)) - return; - - prev_hlock = NULL; - for (i = depth-1; i >= 0; i--) { - hlock = curr->held_locks + i; - /* - * We must not cross into another context: - */ - if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) - break; - if (match_held_lock(hlock, lock)) - goto found_it; - prev_hlock = hlock; - } - print_lock_contention_bug(curr, lock, _RET_IP_); - return; - -found_it: - if (hlock->instance != lock) - return; - - cpu = smp_processor_id(); - if (hlock->waittime_stamp) { - now = lockstat_clock(); - waittime = now - hlock->waittime_stamp; - hlock->holdtime_stamp = now; - } - - trace_lock_acquired(lock, ip); - - stats = get_lock_stats(hlock_class(hlock)); - if (waittime) { - if (hlock->read) - lock_time_inc(&stats->read_waittime, waittime); - else - lock_time_inc(&stats->write_waittime, waittime); - } - if (lock->cpu != cpu) - stats->bounces[bounce_acquired + !!hlock->read]++; - put_lock_stats(stats); - - lock->cpu = cpu; - lock->ip = ip; -} - -void lock_contended(struct lockdep_map *lock, unsigned long ip) -{ - unsigned long flags; - - if (unlikely(!lock_stat)) - return; - - if (unlikely(current->lockdep_recursion)) - return; - - raw_local_irq_save(flags); - check_flags(flags); - current->lockdep_recursion = 1; - trace_lock_contended(lock, ip); - __lock_contended(lock, ip); - current->lockdep_recursion = 0; - raw_local_irq_restore(flags); -} -EXPORT_SYMBOL_GPL(lock_contended); - -void lock_acquired(struct lockdep_map *lock, unsigned long ip) -{ - unsigned long flags; - - if (unlikely(!lock_stat)) - return; - - if (unlikely(current->lockdep_recursion)) - return; - - raw_local_irq_save(flags); - check_flags(flags); - current->lockdep_recursion = 1; - __lock_acquired(lock, ip); - current->lockdep_recursion = 0; - raw_local_irq_restore(flags); -} -EXPORT_SYMBOL_GPL(lock_acquired); -#endif - -/* - * Used by the testsuite, sanitize the validator state - * after a simulated failure: - */ - -void lockdep_reset(void) -{ - unsigned long flags; - int i; - - raw_local_irq_save(flags); - current->curr_chain_key = 0; - current->lockdep_depth = 0; - current->lockdep_recursion = 0; - memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock)); - nr_hardirq_chains = 0; - nr_softirq_chains = 0; - nr_process_chains = 0; - debug_locks = 1; - for (i = 0; i < CHAINHASH_SIZE; i++) - INIT_LIST_HEAD(chainhash_table + i); - raw_local_irq_restore(flags); -} - -static void zap_class(struct lock_class *class) -{ - int i; - - /* - * Remove all dependencies this lock is - * involved in: - */ - for (i = 0; i < nr_list_entries; i++) { - if (list_entries[i].class == class) - list_del_rcu(&list_entries[i].entry); - } - /* - * Unhash the class and remove it from the all_lock_classes list: - */ - list_del_rcu(&class->hash_entry); - list_del_rcu(&class->lock_entry); - - class->key = NULL; -} - -static inline int within(const void *addr, void *start, unsigned long size) -{ - return addr >= start && addr < start + size; -} - -void lockdep_free_key_range(void *start, unsigned long size) -{ - struct lock_class *class, *next; - struct list_head *head; - unsigned long flags; - int i; - int locked; - - raw_local_irq_save(flags); - locked = graph_lock(); - - /* - * Unhash all classes that were created by this module: - */ - for (i = 0; i < CLASSHASH_SIZE; i++) { - head = classhash_table + i; - if (list_empty(head)) - continue; - list_for_each_entry_safe(class, next, head, hash_entry) { - if (within(class->key, start, size)) - zap_class(class); - else if (within(class->name, start, size)) - zap_class(class); - } - } - - if (locked) - graph_unlock(); - raw_local_irq_restore(flags); -} - -void lockdep_reset_lock(struct lockdep_map *lock) -{ - struct lock_class *class, *next; - struct list_head *head; - unsigned long flags; - int i, j; - int locked; - - raw_local_irq_save(flags); - - /* - * Remove all classes this lock might have: - */ - for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) { - /* - * If the class exists we look it up and zap it: - */ - class = look_up_lock_class(lock, j); - if (class) - zap_class(class); - } - /* - * Debug check: in the end all mapped classes should - * be gone. - */ - locked = graph_lock(); - for (i = 0; i < CLASSHASH_SIZE; i++) { - head = classhash_table + i; - if (list_empty(head)) - continue; - list_for_each_entry_safe(class, next, head, hash_entry) { - int match = 0; - - for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) - match |= class == lock->class_cache[j]; - - if (unlikely(match)) { - if (debug_locks_off_graph_unlock()) { - /* - * We all just reset everything, how did it match? - */ - WARN_ON(1); - } - goto out_restore; - } - } - } - if (locked) - graph_unlock(); - -out_restore: - raw_local_irq_restore(flags); -} - -void lockdep_init(void) -{ - int i; - - /* - * Some architectures have their own start_kernel() - * code which calls lockdep_init(), while we also - * call lockdep_init() from the start_kernel() itself, - * and we want to initialize the hashes only once: - */ - if (lockdep_initialized) - return; - - for (i = 0; i < CLASSHASH_SIZE; i++) - INIT_LIST_HEAD(classhash_table + i); - - for (i = 0; i < CHAINHASH_SIZE; i++) - INIT_LIST_HEAD(chainhash_table + i); - - lockdep_initialized = 1; -} - -void __init lockdep_info(void) -{ - printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); - - printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES); - printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH); - printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS); - printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE); - printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES); - printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS); - printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE); - - printk(" memory used by lock dependency info: %lu kB\n", - (sizeof(struct lock_class) * MAX_LOCKDEP_KEYS + - sizeof(struct list_head) * CLASSHASH_SIZE + - sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES + - sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS + - sizeof(struct list_head) * CHAINHASH_SIZE -#ifdef CONFIG_PROVE_LOCKING - + sizeof(struct circular_queue) -#endif - ) / 1024 - ); - - printk(" per task-struct memory footprint: %lu bytes\n", - sizeof(struct held_lock) * MAX_LOCK_DEPTH); - -#ifdef CONFIG_DEBUG_LOCKDEP - if (lockdep_init_error) { - printk("WARNING: lockdep init error! lock-%s was acquired" - "before lockdep_init\n", lock_init_error); - printk("Call stack leading to lockdep invocation was:\n"); - print_stack_trace(&lockdep_init_trace, 0); - } -#endif -} - -static void -print_freed_lock_bug(struct task_struct *curr, const void *mem_from, - const void *mem_to, struct held_lock *hlock) -{ - if (!debug_locks_off()) - return; - if (debug_locks_silent) - return; - - printk("\n"); - printk("=========================\n"); - printk("[ BUG: held lock freed! ]\n"); - print_kernel_ident(); - printk("-------------------------\n"); - printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", - curr->comm, task_pid_nr(curr), mem_from, mem_to-1); - print_lock(hlock); - lockdep_print_held_locks(curr); - - printk("\nstack backtrace:\n"); - dump_stack(); -} - -static inline int not_in_range(const void* mem_from, unsigned long mem_len, - const void* lock_from, unsigned long lock_len) -{ - return lock_from + lock_len <= mem_from || - mem_from + mem_len <= lock_from; -} - -/* - * Called when kernel memory is freed (or unmapped), or if a lock - * is destroyed or reinitialized - this code checks whether there is - * any held lock in the memory range of to : - */ -void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) -{ - struct task_struct *curr = current; - struct held_lock *hlock; - unsigned long flags; - int i; - - if (unlikely(!debug_locks)) - return; - - local_irq_save(flags); - for (i = 0; i < curr->lockdep_depth; i++) { - hlock = curr->held_locks + i; - - if (not_in_range(mem_from, mem_len, hlock->instance, - sizeof(*hlock->instance))) - continue; - - print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock); - break; - } - local_irq_restore(flags); -} -EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); - -static void print_held_locks_bug(void) -{ - if (!debug_locks_off()) - return; - if (debug_locks_silent) - return; - - printk("\n"); - printk("=====================================\n"); - printk("[ BUG: %s/%d still has locks held! ]\n", - current->comm, task_pid_nr(current)); - print_kernel_ident(); - printk("-------------------------------------\n"); - lockdep_print_held_locks(current); - printk("\nstack backtrace:\n"); - dump_stack(); -} - -void debug_check_no_locks_held(void) -{ - if (unlikely(current->lockdep_depth > 0)) - print_held_locks_bug(); -} -EXPORT_SYMBOL_GPL(debug_check_no_locks_held); - -void debug_show_all_locks(void) -{ - struct task_struct *g, *p; - int count = 10; - int unlock = 1; - - if (unlikely(!debug_locks)) { - printk("INFO: lockdep is turned off.\n"); - return; - } - printk("\nShowing all locks held in the system:\n"); - - /* - * Here we try to get the tasklist_lock as hard as possible, - * if not successful after 2 seconds we ignore it (but keep - * trying). This is to enable a debug printout even if a - * tasklist_lock-holding task deadlocks or crashes. - */ -retry: - if (!read_trylock(&tasklist_lock)) { - if (count == 10) - printk("hm, tasklist_lock locked, retrying... "); - if (count) { - count--; - printk(" #%d", 10-count); - mdelay(200); - goto retry; - } - printk(" ignoring it.\n"); - unlock = 0; - } else { - if (count != 10) - printk(KERN_CONT " locked it.\n"); - } - - do_each_thread(g, p) { - /* - * It's not reliable to print a task's held locks - * if it's not sleeping (or if it's not the current - * task): - */ - if (p->state == TASK_RUNNING && p != current) - continue; - if (p->lockdep_depth) - lockdep_print_held_locks(p); - if (!unlock) - if (read_trylock(&tasklist_lock)) - unlock = 1; - } while_each_thread(g, p); - - printk("\n"); - printk("=============================================\n\n"); - - if (unlock) - read_unlock(&tasklist_lock); -} -EXPORT_SYMBOL_GPL(debug_show_all_locks); - -/* - * Careful: only use this function if you are sure that - * the task cannot run in parallel! - */ -void debug_show_held_locks(struct task_struct *task) -{ - if (unlikely(!debug_locks)) { - printk("INFO: lockdep is turned off.\n"); - return; - } - lockdep_print_held_locks(task); -} -EXPORT_SYMBOL_GPL(debug_show_held_locks); - -void lockdep_sys_exit(void) -{ - struct task_struct *curr = current; - - if (unlikely(curr->lockdep_depth)) { - if (!debug_locks_off()) - return; - printk("\n"); - printk("================================================\n"); - printk("[ BUG: lock held when returning to user space! ]\n"); - print_kernel_ident(); - printk("------------------------------------------------\n"); - printk("%s/%d is leaving the kernel with locks still held!\n", - curr->comm, curr->pid); - lockdep_print_held_locks(curr); - } -} - -void lockdep_rcu_suspicious(const char *file, const int line, const char *s) -{ - struct task_struct *curr = current; - -#ifndef CONFIG_PROVE_RCU_REPEATEDLY - if (!debug_locks_off()) - return; -#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ - /* Note: the following can be executed concurrently, so be careful. */ - printk("\n"); - printk("===============================\n"); - printk("[ INFO: suspicious RCU usage. ]\n"); - print_kernel_ident(); - printk("-------------------------------\n"); - printk("%s:%d %s!\n", file, line, s); - printk("\nother info that might help us debug this:\n\n"); - printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n", - !rcu_lockdep_current_cpu_online() - ? "RCU used illegally from offline CPU!\n" - : !rcu_is_watching() - ? "RCU used illegally from idle CPU!\n" - : "", - rcu_scheduler_active, debug_locks); - - /* - * If a CPU is in the RCU-free window in idle (ie: in the section - * between rcu_idle_enter() and rcu_idle_exit(), then RCU - * considers that CPU to be in an "extended quiescent state", - * which means that RCU will be completely ignoring that CPU. - * Therefore, rcu_read_lock() and friends have absolutely no - * effect on a CPU running in that state. In other words, even if - * such an RCU-idle CPU has called rcu_read_lock(), RCU might well - * delete data structures out from under it. RCU really has no - * choice here: we need to keep an RCU-free window in idle where - * the CPU may possibly enter into low power mode. This way we can - * notice an extended quiescent state to other CPUs that started a grace - * period. Otherwise we would delay any grace period as long as we run - * in the idle task. - * - * So complain bitterly if someone does call rcu_read_lock(), - * rcu_read_lock_bh() and so on from extended quiescent states. - */ - if (!rcu_is_watching()) - printk("RCU used illegally from extended quiescent state!\n"); - - lockdep_print_held_locks(curr); - printk("\nstack backtrace:\n"); - dump_stack(); -} -EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h deleted file mode 100644 index 4f560cf..0000000 --- a/kernel/lockdep_internals.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - * kernel/lockdep_internals.h - * - * Runtime locking correctness validator - * - * lockdep subsystem internal functions and variables. - */ - -/* - * Lock-class usage-state bits: - */ -enum lock_usage_bit { -#define LOCKDEP_STATE(__STATE) \ - LOCK_USED_IN_##__STATE, \ - LOCK_USED_IN_##__STATE##_READ, \ - LOCK_ENABLED_##__STATE, \ - LOCK_ENABLED_##__STATE##_READ, -#include "lockdep_states.h" -#undef LOCKDEP_STATE - LOCK_USED, - LOCK_USAGE_STATES -}; - -/* - * Usage-state bitmasks: - */ -#define __LOCKF(__STATE) LOCKF_##__STATE = (1 << LOCK_##__STATE), - -enum { -#define LOCKDEP_STATE(__STATE) \ - __LOCKF(USED_IN_##__STATE) \ - __LOCKF(USED_IN_##__STATE##_READ) \ - __LOCKF(ENABLED_##__STATE) \ - __LOCKF(ENABLED_##__STATE##_READ) -#include "lockdep_states.h" -#undef LOCKDEP_STATE - __LOCKF(USED) -}; - -#define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ) -#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ) - -#define LOCKF_ENABLED_IRQ_READ \ - (LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ) -#define LOCKF_USED_IN_IRQ_READ \ - (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) - -/* - * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies - * we track. - * - * We use the per-lock dependency maps in two ways: we grow it by adding - * every to-be-taken lock to all currently held lock's own dependency - * table (if it's not there yet), and we check it for lock order - * conflicts and deadlocks. - */ -#define MAX_LOCKDEP_ENTRIES 16384UL - -#define MAX_LOCKDEP_CHAINS_BITS 15 -#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) - -#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) - -/* - * Stack-trace: tightly packed array of stack backtrace - * addresses. Protected by the hash_lock. - */ -#define MAX_STACK_TRACE_ENTRIES 262144UL - -extern struct list_head all_lock_classes; -extern struct lock_chain lock_chains[]; - -#define LOCK_USAGE_CHARS (1+LOCK_USAGE_STATES/2) - -extern void get_usage_chars(struct lock_class *class, - char usage[LOCK_USAGE_CHARS]); - -extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str); - -struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i); - -extern unsigned long nr_lock_classes; -extern unsigned long nr_list_entries; -extern unsigned long nr_lock_chains; -extern int nr_chain_hlocks; -extern unsigned long nr_stack_trace_entries; - -extern unsigned int nr_hardirq_chains; -extern unsigned int nr_softirq_chains; -extern unsigned int nr_process_chains; -extern unsigned int max_lockdep_depth; -extern unsigned int max_recursion_depth; - -extern unsigned int max_bfs_queue_depth; - -#ifdef CONFIG_PROVE_LOCKING -extern unsigned long lockdep_count_forward_deps(struct lock_class *); -extern unsigned long lockdep_count_backward_deps(struct lock_class *); -#else -static inline unsigned long -lockdep_count_forward_deps(struct lock_class *class) -{ - return 0; -} -static inline unsigned long -lockdep_count_backward_deps(struct lock_class *class) -{ - return 0; -} -#endif - -#ifdef CONFIG_DEBUG_LOCKDEP - -#include -/* - * Various lockdep statistics. - * We want them per cpu as they are often accessed in fast path - * and we want to avoid too much cache bouncing. - */ -struct lockdep_stats { - int chain_lookup_hits; - int chain_lookup_misses; - int hardirqs_on_events; - int hardirqs_off_events; - int redundant_hardirqs_on; - int redundant_hardirqs_off; - int softirqs_on_events; - int softirqs_off_events; - int redundant_softirqs_on; - int redundant_softirqs_off; - int nr_unused_locks; - int nr_cyclic_checks; - int nr_cyclic_check_recursions; - int nr_find_usage_forwards_checks; - int nr_find_usage_forwards_recursions; - int nr_find_usage_backwards_checks; - int nr_find_usage_backwards_recursions; -}; - -DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); - -#define __debug_atomic_inc(ptr) \ - this_cpu_inc(lockdep_stats.ptr); - -#define debug_atomic_inc(ptr) { \ - WARN_ON_ONCE(!irqs_disabled()); \ - __this_cpu_inc(lockdep_stats.ptr); \ -} - -#define debug_atomic_dec(ptr) { \ - WARN_ON_ONCE(!irqs_disabled()); \ - __this_cpu_dec(lockdep_stats.ptr); \ -} - -#define debug_atomic_read(ptr) ({ \ - struct lockdep_stats *__cpu_lockdep_stats; \ - unsigned long long __total = 0; \ - int __cpu; \ - for_each_possible_cpu(__cpu) { \ - __cpu_lockdep_stats = &per_cpu(lockdep_stats, __cpu); \ - __total += __cpu_lockdep_stats->ptr; \ - } \ - __total; \ -}) -#else -# define __debug_atomic_inc(ptr) do { } while (0) -# define debug_atomic_inc(ptr) do { } while (0) -# define debug_atomic_dec(ptr) do { } while (0) -# define debug_atomic_read(ptr) 0 -#endif diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c deleted file mode 100644 index 0922065..0000000 --- a/kernel/lockdep_proc.c +++ /dev/null @@ -1,683 +0,0 @@ -/* - * kernel/lockdep_proc.c - * - * Runtime locking correctness validator - * - * Started by Ingo Molnar: - * - * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra - * - * Code for /proc/lockdep and /proc/lockdep_stats: - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "lockdep_internals.h" - -static void *l_next(struct seq_file *m, void *v, loff_t *pos) -{ - return seq_list_next(v, &all_lock_classes, pos); -} - -static void *l_start(struct seq_file *m, loff_t *pos) -{ - return seq_list_start_head(&all_lock_classes, *pos); -} - -static void l_stop(struct seq_file *m, void *v) -{ -} - -static void print_name(struct seq_file *m, struct lock_class *class) -{ - char str[KSYM_NAME_LEN]; - const char *name = class->name; - - if (!name) { - name = __get_key_name(class->key, str); - seq_printf(m, "%s", name); - } else{ - seq_printf(m, "%s", name); - if (class->name_version > 1) - seq_printf(m, "#%d", class->name_version); - if (class->subclass) - seq_printf(m, "/%d", class->subclass); - } -} - -static int l_show(struct seq_file *m, void *v) -{ - struct lock_class *class = list_entry(v, struct lock_class, lock_entry); - struct lock_list *entry; - char usage[LOCK_USAGE_CHARS]; - - if (v == &all_lock_classes) { - seq_printf(m, "all lock classes:\n"); - return 0; - } - - seq_printf(m, "%p", class->key); -#ifdef CONFIG_DEBUG_LOCKDEP - seq_printf(m, " OPS:%8ld", class->ops); -#endif -#ifdef CONFIG_PROVE_LOCKING - seq_printf(m, " FD:%5ld", lockdep_count_forward_deps(class)); - seq_printf(m, " BD:%5ld", lockdep_count_backward_deps(class)); -#endif - - get_usage_chars(class, usage); - seq_printf(m, " %s", usage); - - seq_printf(m, ": "); - print_name(m, class); - seq_puts(m, "\n"); - - list_for_each_entry(entry, &class->locks_after, entry) { - if (entry->distance == 1) { - seq_printf(m, " -> [%p] ", entry->class->key); - print_name(m, entry->class); - seq_puts(m, "\n"); - } - } - seq_puts(m, "\n"); - - return 0; -} - -static const struct seq_operations lockdep_ops = { - .start = l_start, - .next = l_next, - .stop = l_stop, - .show = l_show, -}; - -static int lockdep_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &lockdep_ops); -} - -static const struct file_operations proc_lockdep_operations = { - .open = lockdep_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -#ifdef CONFIG_PROVE_LOCKING -static void *lc_start(struct seq_file *m, loff_t *pos) -{ - if (*pos == 0) - return SEQ_START_TOKEN; - - if (*pos - 1 < nr_lock_chains) - return lock_chains + (*pos - 1); - - return NULL; -} - -static void *lc_next(struct seq_file *m, void *v, loff_t *pos) -{ - (*pos)++; - return lc_start(m, pos); -} - -static void lc_stop(struct seq_file *m, void *v) -{ -} - -static int lc_show(struct seq_file *m, void *v) -{ - struct lock_chain *chain = v; - struct lock_class *class; - int i; - - if (v == SEQ_START_TOKEN) { - seq_printf(m, "all lock chains:\n"); - return 0; - } - - seq_printf(m, "irq_context: %d\n", chain->irq_context); - - for (i = 0; i < chain->depth; i++) { - class = lock_chain_get_class(chain, i); - if (!class->key) - continue; - - seq_printf(m, "[%p] ", class->key); - print_name(m, class); - seq_puts(m, "\n"); - } - seq_puts(m, "\n"); - - return 0; -} - -static const struct seq_operations lockdep_chains_ops = { - .start = lc_start, - .next = lc_next, - .stop = lc_stop, - .show = lc_show, -}; - -static int lockdep_chains_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &lockdep_chains_ops); -} - -static const struct file_operations proc_lockdep_chains_operations = { - .open = lockdep_chains_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; -#endif /* CONFIG_PROVE_LOCKING */ - -static void lockdep_stats_debug_show(struct seq_file *m) -{ -#ifdef CONFIG_DEBUG_LOCKDEP - unsigned long long hi1 = debug_atomic_read(hardirqs_on_events), - hi2 = debug_atomic_read(hardirqs_off_events), - hr1 = debug_atomic_read(redundant_hardirqs_on), - hr2 = debug_atomic_read(redundant_hardirqs_off), - si1 = debug_atomic_read(softirqs_on_events), - si2 = debug_atomic_read(softirqs_off_events), - sr1 = debug_atomic_read(redundant_softirqs_on), - sr2 = debug_atomic_read(redundant_softirqs_off); - - seq_printf(m, " chain lookup misses: %11llu\n", - debug_atomic_read(chain_lookup_misses)); - seq_printf(m, " chain lookup hits: %11llu\n", - debug_atomic_read(chain_lookup_hits)); - seq_printf(m, " cyclic checks: %11llu\n", - debug_atomic_read(nr_cyclic_checks)); - seq_printf(m, " find-mask forwards checks: %11llu\n", - debug_atomic_read(nr_find_usage_forwards_checks)); - seq_printf(m, " find-mask backwards checks: %11llu\n", - debug_atomic_read(nr_find_usage_backwards_checks)); - - seq_printf(m, " hardirq on events: %11llu\n", hi1); - seq_printf(m, " hardirq off events: %11llu\n", hi2); - seq_printf(m, " redundant hardirq ons: %11llu\n", hr1); - seq_printf(m, " redundant hardirq offs: %11llu\n", hr2); - seq_printf(m, " softirq on events: %11llu\n", si1); - seq_printf(m, " softirq off events: %11llu\n", si2); - seq_printf(m, " redundant softirq ons: %11llu\n", sr1); - seq_printf(m, " redundant softirq offs: %11llu\n", sr2); -#endif -} - -static int lockdep_stats_show(struct seq_file *m, void *v) -{ - struct lock_class *class; - unsigned long nr_unused = 0, nr_uncategorized = 0, - nr_irq_safe = 0, nr_irq_unsafe = 0, - nr_softirq_safe = 0, nr_softirq_unsafe = 0, - nr_hardirq_safe = 0, nr_hardirq_unsafe = 0, - nr_irq_read_safe = 0, nr_irq_read_unsafe = 0, - nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0, - nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0, - sum_forward_deps = 0; - - list_for_each_entry(class, &all_lock_classes, lock_entry) { - - if (class->usage_mask == 0) - nr_unused++; - if (class->usage_mask == LOCKF_USED) - nr_uncategorized++; - if (class->usage_mask & LOCKF_USED_IN_IRQ) - nr_irq_safe++; - if (class->usage_mask & LOCKF_ENABLED_IRQ) - nr_irq_unsafe++; - if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ) - nr_softirq_safe++; - if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ) - nr_softirq_unsafe++; - if (class->usage_mask & LOCKF_USED_IN_HARDIRQ) - nr_hardirq_safe++; - if (class->usage_mask & LOCKF_ENABLED_HARDIRQ) - nr_hardirq_unsafe++; - if (class->usage_mask & LOCKF_USED_IN_IRQ_READ) - nr_irq_read_safe++; - if (class->usage_mask & LOCKF_ENABLED_IRQ_READ) - nr_irq_read_unsafe++; - if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) - nr_softirq_read_safe++; - if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ_READ) - nr_softirq_read_unsafe++; - if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) - nr_hardirq_read_safe++; - if (class->usage_mask & LOCKF_ENABLED_HARDIRQ_READ) - nr_hardirq_read_unsafe++; - -#ifdef CONFIG_PROVE_LOCKING - sum_forward_deps += lockdep_count_forward_deps(class); -#endif - } -#ifdef CONFIG_DEBUG_LOCKDEP - DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused); -#endif - seq_printf(m, " lock-classes: %11lu [max: %lu]\n", - nr_lock_classes, MAX_LOCKDEP_KEYS); - seq_printf(m, " direct dependencies: %11lu [max: %lu]\n", - nr_list_entries, MAX_LOCKDEP_ENTRIES); - seq_printf(m, " indirect dependencies: %11lu\n", - sum_forward_deps); - - /* - * Total number of dependencies: - * - * All irq-safe locks may nest inside irq-unsafe locks, - * plus all the other known dependencies: - */ - seq_printf(m, " all direct dependencies: %11lu\n", - nr_irq_unsafe * nr_irq_safe + - nr_hardirq_unsafe * nr_hardirq_safe + - nr_list_entries); - -#ifdef CONFIG_PROVE_LOCKING - seq_printf(m, " dependency chains: %11lu [max: %lu]\n", - nr_lock_chains, MAX_LOCKDEP_CHAINS); - seq_printf(m, " dependency chain hlocks: %11d [max: %lu]\n", - nr_chain_hlocks, MAX_LOCKDEP_CHAIN_HLOCKS); -#endif - -#ifdef CONFIG_TRACE_IRQFLAGS - seq_printf(m, " in-hardirq chains: %11u\n", - nr_hardirq_chains); - seq_printf(m, " in-softirq chains: %11u\n", - nr_softirq_chains); -#endif - seq_printf(m, " in-process chains: %11u\n", - nr_process_chains); - seq_printf(m, " stack-trace entries: %11lu [max: %lu]\n", - nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES); - seq_printf(m, " combined max dependencies: %11u\n", - (nr_hardirq_chains + 1) * - (nr_softirq_chains + 1) * - (nr_process_chains + 1) - ); - seq_printf(m, " hardirq-safe locks: %11lu\n", - nr_hardirq_safe); - seq_printf(m, " hardirq-unsafe locks: %11lu\n", - nr_hardirq_unsafe); - seq_printf(m, " softirq-safe locks: %11lu\n", - nr_softirq_safe); - seq_printf(m, " softirq-unsafe locks: %11lu\n", - nr_softirq_unsafe); - seq_printf(m, " irq-safe locks: %11lu\n", - nr_irq_safe); - seq_printf(m, " irq-unsafe locks: %11lu\n", - nr_irq_unsafe); - - seq_printf(m, " hardirq-read-safe locks: %11lu\n", - nr_hardirq_read_safe); - seq_printf(m, " hardirq-read-unsafe locks: %11lu\n", - nr_hardirq_read_unsafe); - seq_printf(m, " softirq-read-safe locks: %11lu\n", - nr_softirq_read_safe); - seq_printf(m, " softirq-read-unsafe locks: %11lu\n", - nr_softirq_read_unsafe); - seq_printf(m, " irq-read-safe locks: %11lu\n", - nr_irq_read_safe); - seq_printf(m, " irq-read-unsafe locks: %11lu\n", - nr_irq_read_unsafe); - - seq_printf(m, " uncategorized locks: %11lu\n", - nr_uncategorized); - seq_printf(m, " unused locks: %11lu\n", - nr_unused); - seq_printf(m, " max locking depth: %11u\n", - max_lockdep_depth); -#ifdef CONFIG_PROVE_LOCKING - seq_printf(m, " max bfs queue depth: %11u\n", - max_bfs_queue_depth); -#endif - lockdep_stats_debug_show(m); - seq_printf(m, " debug_locks: %11u\n", - debug_locks); - - return 0; -} - -static int lockdep_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, lockdep_stats_show, NULL); -} - -static const struct file_operations proc_lockdep_stats_operations = { - .open = lockdep_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -#ifdef CONFIG_LOCK_STAT - -struct lock_stat_data { - struct lock_class *class; - struct lock_class_stats stats; -}; - -struct lock_stat_seq { - struct lock_stat_data *iter_end; - struct lock_stat_data stats[MAX_LOCKDEP_KEYS]; -}; - -/* - * sort on absolute number of contentions - */ -static int lock_stat_cmp(const void *l, const void *r) -{ - const struct lock_stat_data *dl = l, *dr = r; - unsigned long nl, nr; - - nl = dl->stats.read_waittime.nr + dl->stats.write_waittime.nr; - nr = dr->stats.read_waittime.nr + dr->stats.write_waittime.nr; - - return nr - nl; -} - -static void seq_line(struct seq_file *m, char c, int offset, int length) -{ - int i; - - for (i = 0; i < offset; i++) - seq_puts(m, " "); - for (i = 0; i < length; i++) - seq_printf(m, "%c", c); - seq_puts(m, "\n"); -} - -static void snprint_time(char *buf, size_t bufsiz, s64 nr) -{ - s64 div; - s32 rem; - - nr += 5; /* for display rounding */ - div = div_s64_rem(nr, 1000, &rem); - snprintf(buf, bufsiz, "%lld.%02d", (long long)div, (int)rem/10); -} - -static void seq_time(struct seq_file *m, s64 time) -{ - char num[15]; - - snprint_time(num, sizeof(num), time); - seq_printf(m, " %14s", num); -} - -static void seq_lock_time(struct seq_file *m, struct lock_time *lt) -{ - seq_printf(m, "%14lu", lt->nr); - seq_time(m, lt->min); - seq_time(m, lt->max); - seq_time(m, lt->total); - seq_time(m, lt->nr ? do_div(lt->total, lt->nr) : 0); -} - -static void seq_stats(struct seq_file *m, struct lock_stat_data *data) -{ - char name[39]; - struct lock_class *class; - struct lock_class_stats *stats; - int i, namelen; - - class = data->class; - stats = &data->stats; - - namelen = 38; - if (class->name_version > 1) - namelen -= 2; /* XXX truncates versions > 9 */ - if (class->subclass) - namelen -= 2; - - if (!class->name) { - char str[KSYM_NAME_LEN]; - const char *key_name; - - key_name = __get_key_name(class->key, str); - snprintf(name, namelen, "%s", key_name); - } else { - snprintf(name, namelen, "%s", class->name); - } - namelen = strlen(name); - if (class->name_version > 1) { - snprintf(name+namelen, 3, "#%d", class->name_version); - namelen += 2; - } - if (class->subclass) { - snprintf(name+namelen, 3, "/%d", class->subclass); - namelen += 2; - } - - if (stats->write_holdtime.nr) { - if (stats->read_holdtime.nr) - seq_printf(m, "%38s-W:", name); - else - seq_printf(m, "%40s:", name); - - seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]); - seq_lock_time(m, &stats->write_waittime); - seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]); - seq_lock_time(m, &stats->write_holdtime); - seq_puts(m, "\n"); - } - - if (stats->read_holdtime.nr) { - seq_printf(m, "%38s-R:", name); - seq_printf(m, "%14lu ", stats->bounces[bounce_contended_read]); - seq_lock_time(m, &stats->read_waittime); - seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_read]); - seq_lock_time(m, &stats->read_holdtime); - seq_puts(m, "\n"); - } - - if (stats->read_waittime.nr + stats->write_waittime.nr == 0) - return; - - if (stats->read_holdtime.nr) - namelen += 2; - - for (i = 0; i < LOCKSTAT_POINTS; i++) { - char ip[32]; - - if (class->contention_point[i] == 0) - break; - - if (!i) - seq_line(m, '-', 40-namelen, namelen); - - snprintf(ip, sizeof(ip), "[<%p>]", - (void *)class->contention_point[i]); - seq_printf(m, "%40s %14lu %29s %pS\n", - name, stats->contention_point[i], - ip, (void *)class->contention_point[i]); - } - for (i = 0; i < LOCKSTAT_POINTS; i++) { - char ip[32]; - - if (class->contending_point[i] == 0) - break; - - if (!i) - seq_line(m, '-', 40-namelen, namelen); - - snprintf(ip, sizeof(ip), "[<%p>]", - (void *)class->contending_point[i]); - seq_printf(m, "%40s %14lu %29s %pS\n", - name, stats->contending_point[i], - ip, (void *)class->contending_point[i]); - } - if (i) { - seq_puts(m, "\n"); - seq_line(m, '.', 0, 40 + 1 + 12 * (14 + 1)); - seq_puts(m, "\n"); - } -} - -static void seq_header(struct seq_file *m) -{ - seq_puts(m, "lock_stat version 0.4\n"); - - if (unlikely(!debug_locks)) - seq_printf(m, "*WARNING* lock debugging disabled!! - possibly due to a lockdep warning\n"); - - seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1)); - seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s " - "%14s %14s\n", - "class name", - "con-bounces", - "contentions", - "waittime-min", - "waittime-max", - "waittime-total", - "waittime-avg", - "acq-bounces", - "acquisitions", - "holdtime-min", - "holdtime-max", - "holdtime-total", - "holdtime-avg"); - seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1)); - seq_printf(m, "\n"); -} - -static void *ls_start(struct seq_file *m, loff_t *pos) -{ - struct lock_stat_seq *data = m->private; - struct lock_stat_data *iter; - - if (*pos == 0) - return SEQ_START_TOKEN; - - iter = data->stats + (*pos - 1); - if (iter >= data->iter_end) - iter = NULL; - - return iter; -} - -static void *ls_next(struct seq_file *m, void *v, loff_t *pos) -{ - (*pos)++; - return ls_start(m, pos); -} - -static void ls_stop(struct seq_file *m, void *v) -{ -} - -static int ls_show(struct seq_file *m, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_header(m); - else - seq_stats(m, v); - - return 0; -} - -static const struct seq_operations lockstat_ops = { - .start = ls_start, - .next = ls_next, - .stop = ls_stop, - .show = ls_show, -}; - -static int lock_stat_open(struct inode *inode, struct file *file) -{ - int res; - struct lock_class *class; - struct lock_stat_seq *data = vmalloc(sizeof(struct lock_stat_seq)); - - if (!data) - return -ENOMEM; - - res = seq_open(file, &lockstat_ops); - if (!res) { - struct lock_stat_data *iter = data->stats; - struct seq_file *m = file->private_data; - - list_for_each_entry(class, &all_lock_classes, lock_entry) { - iter->class = class; - iter->stats = lock_stats(class); - iter++; - } - data->iter_end = iter; - - sort(data->stats, data->iter_end - data->stats, - sizeof(struct lock_stat_data), - lock_stat_cmp, NULL); - - m->private = data; - } else - vfree(data); - - return res; -} - -static ssize_t lock_stat_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct lock_class *class; - char c; - - if (count) { - if (get_user(c, buf)) - return -EFAULT; - - if (c != '0') - return count; - - list_for_each_entry(class, &all_lock_classes, lock_entry) - clear_lock_stats(class); - } - return count; -} - -static int lock_stat_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - - vfree(seq->private); - return seq_release(inode, file); -} - -static const struct file_operations proc_lock_stat_operations = { - .open = lock_stat_open, - .write = lock_stat_write, - .read = seq_read, - .llseek = seq_lseek, - .release = lock_stat_release, -}; -#endif /* CONFIG_LOCK_STAT */ - -static int __init lockdep_proc_init(void) -{ - proc_create("lockdep", S_IRUSR, NULL, &proc_lockdep_operations); -#ifdef CONFIG_PROVE_LOCKING - proc_create("lockdep_chains", S_IRUSR, NULL, - &proc_lockdep_chains_operations); -#endif - proc_create("lockdep_stats", S_IRUSR, NULL, - &proc_lockdep_stats_operations); - -#ifdef CONFIG_LOCK_STAT - proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL, - &proc_lock_stat_operations); -#endif - - return 0; -} - -__initcall(lockdep_proc_init); - diff --git a/kernel/lockdep_states.h b/kernel/lockdep_states.h deleted file mode 100644 index 995b0cc..0000000 --- a/kernel/lockdep_states.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Lockdep states, - * - * please update XXX_LOCK_USAGE_STATES in include/linux/lockdep.h whenever - * you add one, or come up with a nice dynamic solution. - */ -LOCKDEP_STATE(HARDIRQ) -LOCKDEP_STATE(SOFTIRQ) -LOCKDEP_STATE(RECLAIM_FS) diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index fe8bd58..c103599 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -2,8 +2,14 @@ obj-y += mutex.o ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_lockdep.o = -pg +CFLAGS_REMOVE_lockdep_proc.o = -pg CFLAGS_REMOVE_mutex-debug.o = -pg CFLAGS_REMOVE_rtmutex-debug.o = -pg endif obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o +obj-$(CONFIG_LOCKDEP) += lockdep.o +ifeq ($(CONFIG_PROC_FS),y) +obj-$(CONFIG_LOCKDEP) += lockdep_proc.o +endif diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c new file mode 100644 index 0000000..4e8e14c --- /dev/null +++ b/kernel/locking/lockdep.c @@ -0,0 +1,4257 @@ +/* + * kernel/lockdep.c + * + * Runtime locking correctness validator + * + * Started by Ingo Molnar: + * + * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra + * + * this code maps all the lock dependencies as they occur in a live kernel + * and will warn about the following classes of locking bugs: + * + * - lock inversion scenarios + * - circular lock dependencies + * - hardirq/softirq safe/unsafe locking bugs + * + * Bugs are reported even if the current locking scenario does not cause + * any deadlock at this point. + * + * I.e. if anytime in the past two locks were taken in a different order, + * even if it happened for another task, even if those were different + * locks (but of the same class as this lock), this code will detect it. + * + * Thanks to Arjan van de Ven for coming up with the initial idea of + * mapping lock dependencies runtime. + */ +#define DISABLE_BRANCH_PROFILING +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "lockdep_internals.h" + +#define CREATE_TRACE_POINTS +#include + +#ifdef CONFIG_PROVE_LOCKING +int prove_locking = 1; +module_param(prove_locking, int, 0644); +#else +#define prove_locking 0 +#endif + +#ifdef CONFIG_LOCK_STAT +int lock_stat = 1; +module_param(lock_stat, int, 0644); +#else +#define lock_stat 0 +#endif + +/* + * lockdep_lock: protects the lockdep graph, the hashes and the + * class/list/hash allocators. + * + * This is one of the rare exceptions where it's justified + * to use a raw spinlock - we really dont want the spinlock + * code to recurse back into the lockdep code... + */ +static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; + +static int graph_lock(void) +{ + arch_spin_lock(&lockdep_lock); + /* + * Make sure that if another CPU detected a bug while + * walking the graph we dont change it (while the other + * CPU is busy printing out stuff with the graph lock + * dropped already) + */ + if (!debug_locks) { + arch_spin_unlock(&lockdep_lock); + return 0; + } + /* prevent any recursions within lockdep from causing deadlocks */ + current->lockdep_recursion++; + return 1; +} + +static inline int graph_unlock(void) +{ + if (debug_locks && !arch_spin_is_locked(&lockdep_lock)) { + /* + * The lockdep graph lock isn't locked while we expect it to + * be, we're confused now, bye! + */ + return DEBUG_LOCKS_WARN_ON(1); + } + + current->lockdep_recursion--; + arch_spin_unlock(&lockdep_lock); + return 0; +} + +/* + * Turn lock debugging off and return with 0 if it was off already, + * and also release the graph lock: + */ +static inline int debug_locks_off_graph_unlock(void) +{ + int ret = debug_locks_off(); + + arch_spin_unlock(&lockdep_lock); + + return ret; +} + +static int lockdep_initialized; + +unsigned long nr_list_entries; +static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; + +/* + * All data structures here are protected by the global debug_lock. + * + * Mutex key structs only get allocated, once during bootup, and never + * get freed - this significantly simplifies the debugging code. + */ +unsigned long nr_lock_classes; +static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; + +static inline struct lock_class *hlock_class(struct held_lock *hlock) +{ + if (!hlock->class_idx) { + /* + * Someone passed in garbage, we give up. + */ + DEBUG_LOCKS_WARN_ON(1); + return NULL; + } + return lock_classes + hlock->class_idx - 1; +} + +#ifdef CONFIG_LOCK_STAT +static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], + cpu_lock_stats); + +static inline u64 lockstat_clock(void) +{ + return local_clock(); +} + +static int lock_point(unsigned long points[], unsigned long ip) +{ + int i; + + for (i = 0; i < LOCKSTAT_POINTS; i++) { + if (points[i] == 0) { + points[i] = ip; + break; + } + if (points[i] == ip) + break; + } + + return i; +} + +static void lock_time_inc(struct lock_time *lt, u64 time) +{ + if (time > lt->max) + lt->max = time; + + if (time < lt->min || !lt->nr) + lt->min = time; + + lt->total += time; + lt->nr++; +} + +static inline void lock_time_add(struct lock_time *src, struct lock_time *dst) +{ + if (!src->nr) + return; + + if (src->max > dst->max) + dst->max = src->max; + + if (src->min < dst->min || !dst->nr) + dst->min = src->min; + + dst->total += src->total; + dst->nr += src->nr; +} + +struct lock_class_stats lock_stats(struct lock_class *class) +{ + struct lock_class_stats stats; + int cpu, i; + + memset(&stats, 0, sizeof(struct lock_class_stats)); + for_each_possible_cpu(cpu) { + struct lock_class_stats *pcs = + &per_cpu(cpu_lock_stats, cpu)[class - lock_classes]; + + for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) + stats.contention_point[i] += pcs->contention_point[i]; + + for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++) + stats.contending_point[i] += pcs->contending_point[i]; + + lock_time_add(&pcs->read_waittime, &stats.read_waittime); + lock_time_add(&pcs->write_waittime, &stats.write_waittime); + + lock_time_add(&pcs->read_holdtime, &stats.read_holdtime); + lock_time_add(&pcs->write_holdtime, &stats.write_holdtime); + + for (i = 0; i < ARRAY_SIZE(stats.bounces); i++) + stats.bounces[i] += pcs->bounces[i]; + } + + return stats; +} + +void clear_lock_stats(struct lock_class *class) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct lock_class_stats *cpu_stats = + &per_cpu(cpu_lock_stats, cpu)[class - lock_classes]; + + memset(cpu_stats, 0, sizeof(struct lock_class_stats)); + } + memset(class->contention_point, 0, sizeof(class->contention_point)); + memset(class->contending_point, 0, sizeof(class->contending_point)); +} + +static struct lock_class_stats *get_lock_stats(struct lock_class *class) +{ + return &get_cpu_var(cpu_lock_stats)[class - lock_classes]; +} + +static void put_lock_stats(struct lock_class_stats *stats) +{ + put_cpu_var(cpu_lock_stats); +} + +static void lock_release_holdtime(struct held_lock *hlock) +{ + struct lock_class_stats *stats; + u64 holdtime; + + if (!lock_stat) + return; + + holdtime = lockstat_clock() - hlock->holdtime_stamp; + + stats = get_lock_stats(hlock_class(hlock)); + if (hlock->read) + lock_time_inc(&stats->read_holdtime, holdtime); + else + lock_time_inc(&stats->write_holdtime, holdtime); + put_lock_stats(stats); +} +#else +static inline void lock_release_holdtime(struct held_lock *hlock) +{ +} +#endif + +/* + * We keep a global list of all lock classes. The list only grows, + * never shrinks. The list is only accessed with the lockdep + * spinlock lock held. + */ +LIST_HEAD(all_lock_classes); + +/* + * The lockdep classes are in a hash-table as well, for fast lookup: + */ +#define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1) +#define CLASSHASH_SIZE (1UL << CLASSHASH_BITS) +#define __classhashfn(key) hash_long((unsigned long)key, CLASSHASH_BITS) +#define classhashentry(key) (classhash_table + __classhashfn((key))) + +static struct list_head classhash_table[CLASSHASH_SIZE]; + +/* + * We put the lock dependency chains into a hash-table as well, to cache + * their existence: + */ +#define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1) +#define CHAINHASH_SIZE (1UL << CHAINHASH_BITS) +#define __chainhashfn(chain) hash_long(chain, CHAINHASH_BITS) +#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain))) + +static struct list_head chainhash_table[CHAINHASH_SIZE]; + +/* + * The hash key of the lock dependency chains is a hash itself too: + * it's a hash of all locks taken up to that lock, including that lock. + * It's a 64-bit hash, because it's important for the keys to be + * unique. + */ +#define iterate_chain_key(key1, key2) \ + (((key1) << MAX_LOCKDEP_KEYS_BITS) ^ \ + ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS)) ^ \ + (key2)) + +void lockdep_off(void) +{ + current->lockdep_recursion++; +} +EXPORT_SYMBOL(lockdep_off); + +void lockdep_on(void) +{ + current->lockdep_recursion--; +} +EXPORT_SYMBOL(lockdep_on); + +/* + * Debugging switches: + */ + +#define VERBOSE 0 +#define VERY_VERBOSE 0 + +#if VERBOSE +# define HARDIRQ_VERBOSE 1 +# define SOFTIRQ_VERBOSE 1 +# define RECLAIM_VERBOSE 1 +#else +# define HARDIRQ_VERBOSE 0 +# define SOFTIRQ_VERBOSE 0 +# define RECLAIM_VERBOSE 0 +#endif + +#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE +/* + * Quick filtering for interesting events: + */ +static int class_filter(struct lock_class *class) +{ +#if 0 + /* Example */ + if (class->name_version == 1 && + !strcmp(class->name, "lockname")) + return 1; + if (class->name_version == 1 && + !strcmp(class->name, "&struct->lockfield")) + return 1; +#endif + /* Filter everything else. 1 would be to allow everything else */ + return 0; +} +#endif + +static int verbose(struct lock_class *class) +{ +#if VERBOSE + return class_filter(class); +#endif + return 0; +} + +/* + * Stack-trace: tightly packed array of stack backtrace + * addresses. Protected by the graph_lock. + */ +unsigned long nr_stack_trace_entries; +static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES]; + +static void print_lockdep_off(const char *bug_msg) +{ + printk(KERN_DEBUG "%s\n", bug_msg); + printk(KERN_DEBUG "turning off the locking correctness validator.\n"); + printk(KERN_DEBUG "Please attach the output of /proc/lock_stat to the bug report\n"); +} + +static int save_trace(struct stack_trace *trace) +{ + trace->nr_entries = 0; + trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; + trace->entries = stack_trace + nr_stack_trace_entries; + + trace->skip = 3; + + save_stack_trace(trace); + + /* + * Some daft arches put -1 at the end to indicate its a full trace. + * + * this is buggy anyway, since it takes a whole extra entry so a + * complete trace that maxes out the entries provided will be reported + * as incomplete, friggin useless + */ + if (trace->nr_entries != 0 && + trace->entries[trace->nr_entries-1] == ULONG_MAX) + trace->nr_entries--; + + trace->max_entries = trace->nr_entries; + + nr_stack_trace_entries += trace->nr_entries; + + if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) { + if (!debug_locks_off_graph_unlock()) + return 0; + + print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!"); + dump_stack(); + + return 0; + } + + return 1; +} + +unsigned int nr_hardirq_chains; +unsigned int nr_softirq_chains; +unsigned int nr_process_chains; +unsigned int max_lockdep_depth; + +#ifdef CONFIG_DEBUG_LOCKDEP +/* + * We cannot printk in early bootup code. Not even early_printk() + * might work. So we mark any initialization errors and printk + * about it later on, in lockdep_info(). + */ +static int lockdep_init_error; +static const char *lock_init_error; +static unsigned long lockdep_init_trace_data[20]; +static struct stack_trace lockdep_init_trace = { + .max_entries = ARRAY_SIZE(lockdep_init_trace_data), + .entries = lockdep_init_trace_data, +}; + +/* + * Various lockdep statistics: + */ +DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats); +#endif + +/* + * Locking printouts: + */ + +#define __USAGE(__STATE) \ + [LOCK_USED_IN_##__STATE] = "IN-"__stringify(__STATE)"-W", \ + [LOCK_ENABLED_##__STATE] = __stringify(__STATE)"-ON-W", \ + [LOCK_USED_IN_##__STATE##_READ] = "IN-"__stringify(__STATE)"-R",\ + [LOCK_ENABLED_##__STATE##_READ] = __stringify(__STATE)"-ON-R", + +static const char *usage_str[] = +{ +#define LOCKDEP_STATE(__STATE) __USAGE(__STATE) +#include "lockdep_states.h" +#undef LOCKDEP_STATE + [LOCK_USED] = "INITIAL USE", +}; + +const char * __get_key_name(struct lockdep_subclass_key *key, char *str) +{ + return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str); +} + +static inline unsigned long lock_flag(enum lock_usage_bit bit) +{ + return 1UL << bit; +} + +static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit) +{ + char c = '.'; + + if (class->usage_mask & lock_flag(bit + 2)) + c = '+'; + if (class->usage_mask & lock_flag(bit)) { + c = '-'; + if (class->usage_mask & lock_flag(bit + 2)) + c = '?'; + } + + return c; +} + +void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS]) +{ + int i = 0; + +#define LOCKDEP_STATE(__STATE) \ + usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE); \ + usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE##_READ); +#include "lockdep_states.h" +#undef LOCKDEP_STATE + + usage[i] = '\0'; +} + +static void __print_lock_name(struct lock_class *class) +{ + char str[KSYM_NAME_LEN]; + const char *name; + + name = class->name; + if (!name) { + name = __get_key_name(class->key, str); + printk("%s", name); + } else { + printk("%s", name); + if (class->name_version > 1) + printk("#%d", class->name_version); + if (class->subclass) + printk("/%d", class->subclass); + } +} + +static void print_lock_name(struct lock_class *class) +{ + char usage[LOCK_USAGE_CHARS]; + + get_usage_chars(class, usage); + + printk(" ("); + __print_lock_name(class); + printk("){%s}", usage); +} + +static void print_lockdep_cache(struct lockdep_map *lock) +{ + const char *name; + char str[KSYM_NAME_LEN]; + + name = lock->name; + if (!name) + name = __get_key_name(lock->key->subkeys, str); + + printk("%s", name); +} + +static void print_lock(struct held_lock *hlock) +{ + print_lock_name(hlock_class(hlock)); + printk(", at: "); + print_ip_sym(hlock->acquire_ip); +} + +static void lockdep_print_held_locks(struct task_struct *curr) +{ + int i, depth = curr->lockdep_depth; + + if (!depth) { + printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr)); + return; + } + printk("%d lock%s held by %s/%d:\n", + depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr)); + + for (i = 0; i < depth; i++) { + printk(" #%d: ", i); + print_lock(curr->held_locks + i); + } +} + +static void print_kernel_ident(void) +{ + printk("%s %.*s %s\n", init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version, + print_tainted()); +} + +static int very_verbose(struct lock_class *class) +{ +#if VERY_VERBOSE + return class_filter(class); +#endif + return 0; +} + +/* + * Is this the address of a static object: + */ +static int static_obj(void *obj) +{ + unsigned long start = (unsigned long) &_stext, + end = (unsigned long) &_end, + addr = (unsigned long) obj; + + /* + * static variable? + */ + if ((addr >= start) && (addr < end)) + return 1; + + if (arch_is_kernel_data(addr)) + return 1; + + /* + * in-kernel percpu var? + */ + if (is_kernel_percpu_address(addr)) + return 1; + + /* + * module static or percpu var? + */ + return is_module_address(addr) || is_module_percpu_address(addr); +} + +/* + * To make lock name printouts unique, we calculate a unique + * class->name_version generation counter: + */ +static int count_matching_names(struct lock_class *new_class) +{ + struct lock_class *class; + int count = 0; + + if (!new_class->name) + return 0; + + list_for_each_entry(class, &all_lock_classes, lock_entry) { + if (new_class->key - new_class->subclass == class->key) + return class->name_version; + if (class->name && !strcmp(class->name, new_class->name)) + count = max(count, class->name_version); + } + + return count + 1; +} + +/* + * Register a lock's class in the hash-table, if the class is not present + * yet. Otherwise we look it up. We cache the result in the lock object + * itself, so actual lookup of the hash should be once per lock object. + */ +static inline struct lock_class * +look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) +{ + struct lockdep_subclass_key *key; + struct list_head *hash_head; + struct lock_class *class; + +#ifdef CONFIG_DEBUG_LOCKDEP + /* + * If the architecture calls into lockdep before initializing + * the hashes then we'll warn about it later. (we cannot printk + * right now) + */ + if (unlikely(!lockdep_initialized)) { + lockdep_init(); + lockdep_init_error = 1; + lock_init_error = lock->name; + save_stack_trace(&lockdep_init_trace); + } +#endif + + if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { + debug_locks_off(); + printk(KERN_ERR + "BUG: looking up invalid subclass: %u\n", subclass); + printk(KERN_ERR + "turning off the locking correctness validator.\n"); + dump_stack(); + return NULL; + } + + /* + * Static locks do not have their class-keys yet - for them the key + * is the lock object itself: + */ + if (unlikely(!lock->key)) + lock->key = (void *)lock; + + /* + * NOTE: the class-key must be unique. For dynamic locks, a static + * lock_class_key variable is passed in through the mutex_init() + * (or spin_lock_init()) call - which acts as the key. For static + * locks we use the lock object itself as the key. + */ + BUILD_BUG_ON(sizeof(struct lock_class_key) > + sizeof(struct lockdep_map)); + + key = lock->key->subkeys + subclass; + + hash_head = classhashentry(key); + + /* + * We can walk the hash lockfree, because the hash only + * grows, and we are careful when adding entries to the end: + */ + list_for_each_entry(class, hash_head, hash_entry) { + if (class->key == key) { + /* + * Huh! same key, different name? Did someone trample + * on some memory? We're most confused. + */ + WARN_ON_ONCE(class->name != lock->name); + return class; + } + } + + return NULL; +} + +/* + * Register a lock's class in the hash-table, if the class is not present + * yet. Otherwise we look it up. We cache the result in the lock object + * itself, so actual lookup of the hash should be once per lock object. + */ +static inline struct lock_class * +register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) +{ + struct lockdep_subclass_key *key; + struct list_head *hash_head; + struct lock_class *class; + unsigned long flags; + + class = look_up_lock_class(lock, subclass); + if (likely(class)) + goto out_set_class_cache; + + /* + * Debug-check: all keys must be persistent! + */ + if (!static_obj(lock->key)) { + debug_locks_off(); + printk("INFO: trying to register non-static key.\n"); + printk("the code is fine but needs lockdep annotation.\n"); + printk("turning off the locking correctness validator.\n"); + dump_stack(); + + return NULL; + } + + key = lock->key->subkeys + subclass; + hash_head = classhashentry(key); + + raw_local_irq_save(flags); + if (!graph_lock()) { + raw_local_irq_restore(flags); + return NULL; + } + /* + * We have to do the hash-walk again, to avoid races + * with another CPU: + */ + list_for_each_entry(class, hash_head, hash_entry) + if (class->key == key) + goto out_unlock_set; + /* + * Allocate a new key from the static array, and add it to + * the hash: + */ + if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { + if (!debug_locks_off_graph_unlock()) { + raw_local_irq_restore(flags); + return NULL; + } + raw_local_irq_restore(flags); + + print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!"); + dump_stack(); + return NULL; + } + class = lock_classes + nr_lock_classes++; + debug_atomic_inc(nr_unused_locks); + class->key = key; + class->name = lock->name; + class->subclass = subclass; + INIT_LIST_HEAD(&class->lock_entry); + INIT_LIST_HEAD(&class->locks_before); + INIT_LIST_HEAD(&class->locks_after); + class->name_version = count_matching_names(class); + /* + * We use RCU's safe list-add method to make + * parallel walking of the hash-list safe: + */ + list_add_tail_rcu(&class->hash_entry, hash_head); + /* + * Add it to the global list of classes: + */ + list_add_tail_rcu(&class->lock_entry, &all_lock_classes); + + if (verbose(class)) { + graph_unlock(); + raw_local_irq_restore(flags); + + printk("\nnew class %p: %s", class->key, class->name); + if (class->name_version > 1) + printk("#%d", class->name_version); + printk("\n"); + dump_stack(); + + raw_local_irq_save(flags); + if (!graph_lock()) { + raw_local_irq_restore(flags); + return NULL; + } + } +out_unlock_set: + graph_unlock(); + raw_local_irq_restore(flags); + +out_set_class_cache: + if (!subclass || force) + lock->class_cache[0] = class; + else if (subclass < NR_LOCKDEP_CACHING_CLASSES) + lock->class_cache[subclass] = class; + + /* + * Hash collision, did we smoke some? We found a class with a matching + * hash but the subclass -- which is hashed in -- didn't match. + */ + if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) + return NULL; + + return class; +} + +#ifdef CONFIG_PROVE_LOCKING +/* + * Allocate a lockdep entry. (assumes the graph_lock held, returns + * with NULL on failure) + */ +static struct lock_list *alloc_list_entry(void) +{ + if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { + if (!debug_locks_off_graph_unlock()) + return NULL; + + print_lockdep_off("BUG: MAX_LOCKDEP_ENTRIES too low!"); + dump_stack(); + return NULL; + } + return list_entries + nr_list_entries++; +} + +/* + * Add a new dependency to the head of the list: + */ +static int add_lock_to_list(struct lock_class *class, struct lock_class *this, + struct list_head *head, unsigned long ip, + int distance, struct stack_trace *trace) +{ + struct lock_list *entry; + /* + * Lock not present yet - get a new dependency struct and + * add it to the list: + */ + entry = alloc_list_entry(); + if (!entry) + return 0; + + entry->class = this; + entry->distance = distance; + entry->trace = *trace; + /* + * Since we never remove from the dependency list, the list can + * be walked lockless by other CPUs, it's only allocation + * that must be protected by the spinlock. But this also means + * we must make new entries visible only once writes to the + * entry become visible - hence the RCU op: + */ + list_add_tail_rcu(&entry->entry, head); + + return 1; +} + +/* + * For good efficiency of modular, we use power of 2 + */ +#define MAX_CIRCULAR_QUEUE_SIZE 4096UL +#define CQ_MASK (MAX_CIRCULAR_QUEUE_SIZE-1) + +/* + * The circular_queue and helpers is used to implement the + * breadth-first search(BFS)algorithem, by which we can build + * the shortest path from the next lock to be acquired to the + * previous held lock if there is a circular between them. + */ +struct circular_queue { + unsigned long element[MAX_CIRCULAR_QUEUE_SIZE]; + unsigned int front, rear; +}; + +static struct circular_queue lock_cq; + +unsigned int max_bfs_queue_depth; + +static unsigned int lockdep_dependency_gen_id; + +static inline void __cq_init(struct circular_queue *cq) +{ + cq->front = cq->rear = 0; + lockdep_dependency_gen_id++; +} + +static inline int __cq_empty(struct circular_queue *cq) +{ + return (cq->front == cq->rear); +} + +static inline int __cq_full(struct circular_queue *cq) +{ + return ((cq->rear + 1) & CQ_MASK) == cq->front; +} + +static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem) +{ + if (__cq_full(cq)) + return -1; + + cq->element[cq->rear] = elem; + cq->rear = (cq->rear + 1) & CQ_MASK; + return 0; +} + +static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem) +{ + if (__cq_empty(cq)) + return -1; + + *elem = cq->element[cq->front]; + cq->front = (cq->front + 1) & CQ_MASK; + return 0; +} + +static inline unsigned int __cq_get_elem_count(struct circular_queue *cq) +{ + return (cq->rear - cq->front) & CQ_MASK; +} + +static inline void mark_lock_accessed(struct lock_list *lock, + struct lock_list *parent) +{ + unsigned long nr; + + nr = lock - list_entries; + WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */ + lock->parent = parent; + lock->class->dep_gen_id = lockdep_dependency_gen_id; +} + +static inline unsigned long lock_accessed(struct lock_list *lock) +{ + unsigned long nr; + + nr = lock - list_entries; + WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */ + return lock->class->dep_gen_id == lockdep_dependency_gen_id; +} + +static inline struct lock_list *get_lock_parent(struct lock_list *child) +{ + return child->parent; +} + +static inline int get_lock_depth(struct lock_list *child) +{ + int depth = 0; + struct lock_list *parent; + + while ((parent = get_lock_parent(child))) { + child = parent; + depth++; + } + return depth; +} + +static int __bfs(struct lock_list *source_entry, + void *data, + int (*match)(struct lock_list *entry, void *data), + struct lock_list **target_entry, + int forward) +{ + struct lock_list *entry; + struct list_head *head; + struct circular_queue *cq = &lock_cq; + int ret = 1; + + if (match(source_entry, data)) { + *target_entry = source_entry; + ret = 0; + goto exit; + } + + if (forward) + head = &source_entry->class->locks_after; + else + head = &source_entry->class->locks_before; + + if (list_empty(head)) + goto exit; + + __cq_init(cq); + __cq_enqueue(cq, (unsigned long)source_entry); + + while (!__cq_empty(cq)) { + struct lock_list *lock; + + __cq_dequeue(cq, (unsigned long *)&lock); + + if (!lock->class) { + ret = -2; + goto exit; + } + + if (forward) + head = &lock->class->locks_after; + else + head = &lock->class->locks_before; + + list_for_each_entry(entry, head, entry) { + if (!lock_accessed(entry)) { + unsigned int cq_depth; + mark_lock_accessed(entry, lock); + if (match(entry, data)) { + *target_entry = entry; + ret = 0; + goto exit; + } + + if (__cq_enqueue(cq, (unsigned long)entry)) { + ret = -1; + goto exit; + } + cq_depth = __cq_get_elem_count(cq); + if (max_bfs_queue_depth < cq_depth) + max_bfs_queue_depth = cq_depth; + } + } + } +exit: + return ret; +} + +static inline int __bfs_forwards(struct lock_list *src_entry, + void *data, + int (*match)(struct lock_list *entry, void *data), + struct lock_list **target_entry) +{ + return __bfs(src_entry, data, match, target_entry, 1); + +} + +static inline int __bfs_backwards(struct lock_list *src_entry, + void *data, + int (*match)(struct lock_list *entry, void *data), + struct lock_list **target_entry) +{ + return __bfs(src_entry, data, match, target_entry, 0); + +} + +/* + * Recursive, forwards-direction lock-dependency checking, used for + * both noncyclic checking and for hardirq-unsafe/softirq-unsafe + * checking. + */ + +/* + * Print a dependency chain entry (this is only done when a deadlock + * has been detected): + */ +static noinline int +print_circular_bug_entry(struct lock_list *target, int depth) +{ + if (debug_locks_silent) + return 0; + printk("\n-> #%u", depth); + print_lock_name(target->class); + printk(":\n"); + print_stack_trace(&target->trace, 6); + + return 0; +} + +static void +print_circular_lock_scenario(struct held_lock *src, + struct held_lock *tgt, + struct lock_list *prt) +{ + struct lock_class *source = hlock_class(src); + struct lock_class *target = hlock_class(tgt); + struct lock_class *parent = prt->class; + + /* + * A direct locking problem where unsafe_class lock is taken + * directly by safe_class lock, then all we need to show + * is the deadlock scenario, as it is obvious that the + * unsafe lock is taken under the safe lock. + * + * But if there is a chain instead, where the safe lock takes + * an intermediate lock (middle_class) where this lock is + * not the same as the safe lock, then the lock chain is + * used to describe the problem. Otherwise we would need + * to show a different CPU case for each link in the chain + * from the safe_class lock to the unsafe_class lock. + */ + if (parent != source) { + printk("Chain exists of:\n "); + __print_lock_name(source); + printk(" --> "); + __print_lock_name(parent); + printk(" --> "); + __print_lock_name(target); + printk("\n\n"); + } + + printk(" Possible unsafe locking scenario:\n\n"); + printk(" CPU0 CPU1\n"); + printk(" ---- ----\n"); + printk(" lock("); + __print_lock_name(target); + printk(");\n"); + printk(" lock("); + __print_lock_name(parent); + printk(");\n"); + printk(" lock("); + __print_lock_name(target); + printk(");\n"); + printk(" lock("); + __print_lock_name(source); + printk(");\n"); + printk("\n *** DEADLOCK ***\n\n"); +} + +/* + * When a circular dependency is detected, print the + * header first: + */ +static noinline int +print_circular_bug_header(struct lock_list *entry, unsigned int depth, + struct held_lock *check_src, + struct held_lock *check_tgt) +{ + struct task_struct *curr = current; + + if (debug_locks_silent) + return 0; + + printk("\n"); + printk("======================================================\n"); + printk("[ INFO: possible circular locking dependency detected ]\n"); + print_kernel_ident(); + printk("-------------------------------------------------------\n"); + printk("%s/%d is trying to acquire lock:\n", + curr->comm, task_pid_nr(curr)); + print_lock(check_src); + printk("\nbut task is already holding lock:\n"); + print_lock(check_tgt); + printk("\nwhich lock already depends on the new lock.\n\n"); + printk("\nthe existing dependency chain (in reverse order) is:\n"); + + print_circular_bug_entry(entry, depth); + + return 0; +} + +static inline int class_equal(struct lock_list *entry, void *data) +{ + return entry->class == data; +} + +static noinline int print_circular_bug(struct lock_list *this, + struct lock_list *target, + struct held_lock *check_src, + struct held_lock *check_tgt) +{ + struct task_struct *curr = current; + struct lock_list *parent; + struct lock_list *first_parent; + int depth; + + if (!debug_locks_off_graph_unlock() || debug_locks_silent) + return 0; + + if (!save_trace(&this->trace)) + return 0; + + depth = get_lock_depth(target); + + print_circular_bug_header(target, depth, check_src, check_tgt); + + parent = get_lock_parent(target); + first_parent = parent; + + while (parent) { + print_circular_bug_entry(parent, --depth); + parent = get_lock_parent(parent); + } + + printk("\nother info that might help us debug this:\n\n"); + print_circular_lock_scenario(check_src, check_tgt, + first_parent); + + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +static noinline int print_bfs_bug(int ret) +{ + if (!debug_locks_off_graph_unlock()) + return 0; + + /* + * Breadth-first-search failed, graph got corrupted? + */ + WARN(1, "lockdep bfs error:%d\n", ret); + + return 0; +} + +static int noop_count(struct lock_list *entry, void *data) +{ + (*(unsigned long *)data)++; + return 0; +} + +unsigned long __lockdep_count_forward_deps(struct lock_list *this) +{ + unsigned long count = 0; + struct lock_list *uninitialized_var(target_entry); + + __bfs_forwards(this, (void *)&count, noop_count, &target_entry); + + return count; +} +unsigned long lockdep_count_forward_deps(struct lock_class *class) +{ + unsigned long ret, flags; + struct lock_list this; + + this.parent = NULL; + this.class = class; + + local_irq_save(flags); + arch_spin_lock(&lockdep_lock); + ret = __lockdep_count_forward_deps(&this); + arch_spin_unlock(&lockdep_lock); + local_irq_restore(flags); + + return ret; +} + +unsigned long __lockdep_count_backward_deps(struct lock_list *this) +{ + unsigned long count = 0; + struct lock_list *uninitialized_var(target_entry); + + __bfs_backwards(this, (void *)&count, noop_count, &target_entry); + + return count; +} + +unsigned long lockdep_count_backward_deps(struct lock_class *class) +{ + unsigned long ret, flags; + struct lock_list this; + + this.parent = NULL; + this.class = class; + + local_irq_save(flags); + arch_spin_lock(&lockdep_lock); + ret = __lockdep_count_backward_deps(&this); + arch_spin_unlock(&lockdep_lock); + local_irq_restore(flags); + + return ret; +} + +/* + * Prove that the dependency graph starting at can not + * lead to . Print an error and return 0 if it does. + */ +static noinline int +check_noncircular(struct lock_list *root, struct lock_class *target, + struct lock_list **target_entry) +{ + int result; + + debug_atomic_inc(nr_cyclic_checks); + + result = __bfs_forwards(root, target, class_equal, target_entry); + + return result; +} + +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) +/* + * Forwards and backwards subgraph searching, for the purposes of + * proving that two subgraphs can be connected by a new dependency + * without creating any illegal irq-safe -> irq-unsafe lock dependency. + */ + +static inline int usage_match(struct lock_list *entry, void *bit) +{ + return entry->class->usage_mask & (1 << (enum lock_usage_bit)bit); +} + + + +/* + * Find a node in the forwards-direction dependency sub-graph starting + * at @root->class that matches @bit. + * + * Return 0 if such a node exists in the subgraph, and put that node + * into *@target_entry. + * + * Return 1 otherwise and keep *@target_entry unchanged. + * Return <0 on error. + */ +static int +find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit, + struct lock_list **target_entry) +{ + int result; + + debug_atomic_inc(nr_find_usage_forwards_checks); + + result = __bfs_forwards(root, (void *)bit, usage_match, target_entry); + + return result; +} + +/* + * Find a node in the backwards-direction dependency sub-graph starting + * at @root->class that matches @bit. + * + * Return 0 if such a node exists in the subgraph, and put that node + * into *@target_entry. + * + * Return 1 otherwise and keep *@target_entry unchanged. + * Return <0 on error. + */ +static int +find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit, + struct lock_list **target_entry) +{ + int result; + + debug_atomic_inc(nr_find_usage_backwards_checks); + + result = __bfs_backwards(root, (void *)bit, usage_match, target_entry); + + return result; +} + +static void print_lock_class_header(struct lock_class *class, int depth) +{ + int bit; + + printk("%*s->", depth, ""); + print_lock_name(class); + printk(" ops: %lu", class->ops); + printk(" {\n"); + + for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { + if (class->usage_mask & (1 << bit)) { + int len = depth; + + len += printk("%*s %s", depth, "", usage_str[bit]); + len += printk(" at:\n"); + print_stack_trace(class->usage_traces + bit, len); + } + } + printk("%*s }\n", depth, ""); + + printk("%*s ... key at: ",depth,""); + print_ip_sym((unsigned long)class->key); +} + +/* + * printk the shortest lock dependencies from @start to @end in reverse order: + */ +static void __used +print_shortest_lock_dependencies(struct lock_list *leaf, + struct lock_list *root) +{ + struct lock_list *entry = leaf; + int depth; + + /*compute depth from generated tree by BFS*/ + depth = get_lock_depth(leaf); + + do { + print_lock_class_header(entry->class, depth); + printk("%*s ... acquired at:\n", depth, ""); + print_stack_trace(&entry->trace, 2); + printk("\n"); + + if (depth == 0 && (entry != root)) { + printk("lockdep:%s bad path found in chain graph\n", __func__); + break; + } + + entry = get_lock_parent(entry); + depth--; + } while (entry && (depth >= 0)); + + return; +} + +static void +print_irq_lock_scenario(struct lock_list *safe_entry, + struct lock_list *unsafe_entry, + struct lock_class *prev_class, + struct lock_class *next_class) +{ + struct lock_class *safe_class = safe_entry->class; + struct lock_class *unsafe_class = unsafe_entry->class; + struct lock_class *middle_class = prev_class; + + if (middle_class == safe_class) + middle_class = next_class; + + /* + * A direct locking problem where unsafe_class lock is taken + * directly by safe_class lock, then all we need to show + * is the deadlock scenario, as it is obvious that the + * unsafe lock is taken under the safe lock. + * + * But if there is a chain instead, where the safe lock takes + * an intermediate lock (middle_class) where this lock is + * not the same as the safe lock, then the lock chain is + * used to describe the problem. Otherwise we would need + * to show a different CPU case for each link in the chain + * from the safe_class lock to the unsafe_class lock. + */ + if (middle_class != unsafe_class) { + printk("Chain exists of:\n "); + __print_lock_name(safe_class); + printk(" --> "); + __print_lock_name(middle_class); + printk(" --> "); + __print_lock_name(unsafe_class); + printk("\n\n"); + } + + printk(" Possible interrupt unsafe locking scenario:\n\n"); + printk(" CPU0 CPU1\n"); + printk(" ---- ----\n"); + printk(" lock("); + __print_lock_name(unsafe_class); + printk(");\n"); + printk(" local_irq_disable();\n"); + printk(" lock("); + __print_lock_name(safe_class); + printk(");\n"); + printk(" lock("); + __print_lock_name(middle_class); + printk(");\n"); + printk(" \n"); + printk(" lock("); + __print_lock_name(safe_class); + printk(");\n"); + printk("\n *** DEADLOCK ***\n\n"); +} + +static int +print_bad_irq_dependency(struct task_struct *curr, + struct lock_list *prev_root, + struct lock_list *next_root, + struct lock_list *backwards_entry, + struct lock_list *forwards_entry, + struct held_lock *prev, + struct held_lock *next, + enum lock_usage_bit bit1, + enum lock_usage_bit bit2, + const char *irqclass) +{ + if (!debug_locks_off_graph_unlock() || debug_locks_silent) + return 0; + + printk("\n"); + printk("======================================================\n"); + printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", + irqclass, irqclass); + print_kernel_ident(); + printk("------------------------------------------------------\n"); + printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", + curr->comm, task_pid_nr(curr), + curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, + curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT, + curr->hardirqs_enabled, + curr->softirqs_enabled); + print_lock(next); + + printk("\nand this task is already holding:\n"); + print_lock(prev); + printk("which would create a new lock dependency:\n"); + print_lock_name(hlock_class(prev)); + printk(" ->"); + print_lock_name(hlock_class(next)); + printk("\n"); + + printk("\nbut this new dependency connects a %s-irq-safe lock:\n", + irqclass); + print_lock_name(backwards_entry->class); + printk("\n... which became %s-irq-safe at:\n", irqclass); + + print_stack_trace(backwards_entry->class->usage_traces + bit1, 1); + + printk("\nto a %s-irq-unsafe lock:\n", irqclass); + print_lock_name(forwards_entry->class); + printk("\n... which became %s-irq-unsafe at:\n", irqclass); + printk("..."); + + print_stack_trace(forwards_entry->class->usage_traces + bit2, 1); + + printk("\nother info that might help us debug this:\n\n"); + print_irq_lock_scenario(backwards_entry, forwards_entry, + hlock_class(prev), hlock_class(next)); + + lockdep_print_held_locks(curr); + + printk("\nthe dependencies between %s-irq-safe lock", irqclass); + printk(" and the holding lock:\n"); + if (!save_trace(&prev_root->trace)) + return 0; + print_shortest_lock_dependencies(backwards_entry, prev_root); + + printk("\nthe dependencies between the lock to be acquired"); + printk(" and %s-irq-unsafe lock:\n", irqclass); + if (!save_trace(&next_root->trace)) + return 0; + print_shortest_lock_dependencies(forwards_entry, next_root); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +static int +check_usage(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next, enum lock_usage_bit bit_backwards, + enum lock_usage_bit bit_forwards, const char *irqclass) +{ + int ret; + struct lock_list this, that; + struct lock_list *uninitialized_var(target_entry); + struct lock_list *uninitialized_var(target_entry1); + + this.parent = NULL; + + this.class = hlock_class(prev); + ret = find_usage_backwards(&this, bit_backwards, &target_entry); + if (ret < 0) + return print_bfs_bug(ret); + if (ret == 1) + return ret; + + that.parent = NULL; + that.class = hlock_class(next); + ret = find_usage_forwards(&that, bit_forwards, &target_entry1); + if (ret < 0) + return print_bfs_bug(ret); + if (ret == 1) + return ret; + + return print_bad_irq_dependency(curr, &this, &that, + target_entry, target_entry1, + prev, next, + bit_backwards, bit_forwards, irqclass); +} + +static const char *state_names[] = { +#define LOCKDEP_STATE(__STATE) \ + __stringify(__STATE), +#include "lockdep_states.h" +#undef LOCKDEP_STATE +}; + +static const char *state_rnames[] = { +#define LOCKDEP_STATE(__STATE) \ + __stringify(__STATE)"-READ", +#include "lockdep_states.h" +#undef LOCKDEP_STATE +}; + +static inline const char *state_name(enum lock_usage_bit bit) +{ + return (bit & 1) ? state_rnames[bit >> 2] : state_names[bit >> 2]; +} + +static int exclusive_bit(int new_bit) +{ + /* + * USED_IN + * USED_IN_READ + * ENABLED + * ENABLED_READ + * + * bit 0 - write/read + * bit 1 - used_in/enabled + * bit 2+ state + */ + + int state = new_bit & ~3; + int dir = new_bit & 2; + + /* + * keep state, bit flip the direction and strip read. + */ + return state | (dir ^ 2); +} + +static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next, enum lock_usage_bit bit) +{ + /* + * Prove that the new dependency does not connect a hardirq-safe + * lock with a hardirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at , and the + * forwards-subgraph starting at : + */ + if (!check_usage(curr, prev, next, bit, + exclusive_bit(bit), state_name(bit))) + return 0; + + bit++; /* _READ */ + + /* + * Prove that the new dependency does not connect a hardirq-safe-read + * lock with a hardirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at , and the + * forwards-subgraph starting at : + */ + if (!check_usage(curr, prev, next, bit, + exclusive_bit(bit), state_name(bit))) + return 0; + + return 1; +} + +static int +check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next) +{ +#define LOCKDEP_STATE(__STATE) \ + if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE)) \ + return 0; +#include "lockdep_states.h" +#undef LOCKDEP_STATE + + return 1; +} + +static void inc_chains(void) +{ + if (current->hardirq_context) + nr_hardirq_chains++; + else { + if (current->softirq_context) + nr_softirq_chains++; + else + nr_process_chains++; + } +} + +#else + +static inline int +check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next) +{ + return 1; +} + +static inline void inc_chains(void) +{ + nr_process_chains++; +} + +#endif + +static void +print_deadlock_scenario(struct held_lock *nxt, + struct held_lock *prv) +{ + struct lock_class *next = hlock_class(nxt); + struct lock_class *prev = hlock_class(prv); + + printk(" Possible unsafe locking scenario:\n\n"); + printk(" CPU0\n"); + printk(" ----\n"); + printk(" lock("); + __print_lock_name(prev); + printk(");\n"); + printk(" lock("); + __print_lock_name(next); + printk(");\n"); + printk("\n *** DEADLOCK ***\n\n"); + printk(" May be due to missing lock nesting notation\n\n"); +} + +static int +print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next) +{ + if (!debug_locks_off_graph_unlock() || debug_locks_silent) + return 0; + + printk("\n"); + printk("=============================================\n"); + printk("[ INFO: possible recursive locking detected ]\n"); + print_kernel_ident(); + printk("---------------------------------------------\n"); + printk("%s/%d is trying to acquire lock:\n", + curr->comm, task_pid_nr(curr)); + print_lock(next); + printk("\nbut task is already holding lock:\n"); + print_lock(prev); + + printk("\nother info that might help us debug this:\n"); + print_deadlock_scenario(next, prev); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +/* + * Check whether we are holding such a class already. + * + * (Note that this has to be done separately, because the graph cannot + * detect such classes of deadlocks.) + * + * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read + */ +static int +check_deadlock(struct task_struct *curr, struct held_lock *next, + struct lockdep_map *next_instance, int read) +{ + struct held_lock *prev; + struct held_lock *nest = NULL; + int i; + + for (i = 0; i < curr->lockdep_depth; i++) { + prev = curr->held_locks + i; + + if (prev->instance == next->nest_lock) + nest = prev; + + if (hlock_class(prev) != hlock_class(next)) + continue; + + /* + * Allow read-after-read recursion of the same + * lock class (i.e. read_lock(lock)+read_lock(lock)): + */ + if ((read == 2) && prev->read) + return 2; + + /* + * We're holding the nest_lock, which serializes this lock's + * nesting behaviour. + */ + if (nest) + return 2; + + return print_deadlock_bug(curr, prev, next); + } + return 1; +} + +/* + * There was a chain-cache miss, and we are about to add a new dependency + * to a previous lock. We recursively validate the following rules: + * + * - would the adding of the -> dependency create a + * circular dependency in the graph? [== circular deadlock] + * + * - does the new prev->next dependency connect any hardirq-safe lock + * (in the full backwards-subgraph starting at ) with any + * hardirq-unsafe lock (in the full forwards-subgraph starting at + * )? [== illegal lock inversion with hardirq contexts] + * + * - does the new prev->next dependency connect any softirq-safe lock + * (in the full backwards-subgraph starting at ) with any + * softirq-unsafe lock (in the full forwards-subgraph starting at + * )? [== illegal lock inversion with softirq contexts] + * + * any of these scenarios could lead to a deadlock. + * + * Then if all the validations pass, we add the forwards and backwards + * dependency. + */ +static int +check_prev_add(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next, int distance, int trylock_loop) +{ + struct lock_list *entry; + int ret; + struct lock_list this; + struct lock_list *uninitialized_var(target_entry); + /* + * Static variable, serialized by the graph_lock(). + * + * We use this static variable to save the stack trace in case + * we call into this function multiple times due to encountering + * trylocks in the held lock stack. + */ + static struct stack_trace trace; + + /* + * Prove that the new -> dependency would not + * create a circular dependency in the graph. (We do this by + * forward-recursing into the graph starting at , and + * checking whether we can reach .) + * + * We are using global variables to control the recursion, to + * keep the stackframe size of the recursive functions low: + */ + this.class = hlock_class(next); + this.parent = NULL; + ret = check_noncircular(&this, hlock_class(prev), &target_entry); + if (unlikely(!ret)) + return print_circular_bug(&this, target_entry, next, prev); + else if (unlikely(ret < 0)) + return print_bfs_bug(ret); + + if (!check_prev_add_irq(curr, prev, next)) + return 0; + + /* + * For recursive read-locks we do all the dependency checks, + * but we dont store read-triggered dependencies (only + * write-triggered dependencies). This ensures that only the + * write-side dependencies matter, and that if for example a + * write-lock never takes any other locks, then the reads are + * equivalent to a NOP. + */ + if (next->read == 2 || prev->read == 2) + return 1; + /* + * Is the -> dependency already present? + * + * (this may occur even though this is a new chain: consider + * e.g. the L1 -> L2 -> L3 -> L4 and the L5 -> L1 -> L2 -> L3 + * chains - the second one will be new, but L1 already has + * L2 added to its dependency list, due to the first chain.) + */ + list_for_each_entry(entry, &hlock_class(prev)->locks_after, entry) { + if (entry->class == hlock_class(next)) { + if (distance == 1) + entry->distance = 1; + return 2; + } + } + + if (!trylock_loop && !save_trace(&trace)) + return 0; + + /* + * Ok, all validations passed, add the new lock + * to the previous lock's dependency list: + */ + ret = add_lock_to_list(hlock_class(prev), hlock_class(next), + &hlock_class(prev)->locks_after, + next->acquire_ip, distance, &trace); + + if (!ret) + return 0; + + ret = add_lock_to_list(hlock_class(next), hlock_class(prev), + &hlock_class(next)->locks_before, + next->acquire_ip, distance, &trace); + if (!ret) + return 0; + + /* + * Debugging printouts: + */ + if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) { + graph_unlock(); + printk("\n new dependency: "); + print_lock_name(hlock_class(prev)); + printk(" => "); + print_lock_name(hlock_class(next)); + printk("\n"); + dump_stack(); + return graph_lock(); + } + return 1; +} + +/* + * Add the dependency to all directly-previous locks that are 'relevant'. + * The ones that are relevant are (in increasing distance from curr): + * all consecutive trylock entries and the final non-trylock entry - or + * the end of this context's lock-chain - whichever comes first. + */ +static int +check_prevs_add(struct task_struct *curr, struct held_lock *next) +{ + int depth = curr->lockdep_depth; + int trylock_loop = 0; + struct held_lock *hlock; + + /* + * Debugging checks. + * + * Depth must not be zero for a non-head lock: + */ + if (!depth) + goto out_bug; + /* + * At least two relevant locks must exist for this + * to be a head: + */ + if (curr->held_locks[depth].irq_context != + curr->held_locks[depth-1].irq_context) + goto out_bug; + + for (;;) { + int distance = curr->lockdep_depth - depth + 1; + hlock = curr->held_locks + depth-1; + /* + * Only non-recursive-read entries get new dependencies + * added: + */ + if (hlock->read != 2) { + if (!check_prev_add(curr, hlock, next, + distance, trylock_loop)) + return 0; + /* + * Stop after the first non-trylock entry, + * as non-trylock entries have added their + * own direct dependencies already, so this + * lock is connected to them indirectly: + */ + if (!hlock->trylock) + break; + } + depth--; + /* + * End of lock-stack? + */ + if (!depth) + break; + /* + * Stop the search if we cross into another context: + */ + if (curr->held_locks[depth].irq_context != + curr->held_locks[depth-1].irq_context) + break; + trylock_loop = 1; + } + return 1; +out_bug: + if (!debug_locks_off_graph_unlock()) + return 0; + + /* + * Clearly we all shouldn't be here, but since we made it we + * can reliable say we messed up our state. See the above two + * gotos for reasons why we could possibly end up here. + */ + WARN_ON(1); + + return 0; +} + +unsigned long nr_lock_chains; +struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS]; +int nr_chain_hlocks; +static u16 chain_hlocks[MAX_LOCKDEP_CHAIN_HLOCKS]; + +struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i) +{ + return lock_classes + chain_hlocks[chain->base + i]; +} + +/* + * Look up a dependency chain. If the key is not present yet then + * add it and return 1 - in this case the new dependency chain is + * validated. If the key is already hashed, return 0. + * (On return with 1 graph_lock is held.) + */ +static inline int lookup_chain_cache(struct task_struct *curr, + struct held_lock *hlock, + u64 chain_key) +{ + struct lock_class *class = hlock_class(hlock); + struct list_head *hash_head = chainhashentry(chain_key); + struct lock_chain *chain; + struct held_lock *hlock_curr; + int i, j; + + /* + * We might need to take the graph lock, ensure we've got IRQs + * disabled to make this an IRQ-safe lock.. for recursion reasons + * lockdep won't complain about its own locking errors. + */ + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return 0; + /* + * We can walk it lock-free, because entries only get added + * to the hash: + */ + list_for_each_entry(chain, hash_head, entry) { + if (chain->chain_key == chain_key) { +cache_hit: + debug_atomic_inc(chain_lookup_hits); + if (very_verbose(class)) + printk("\nhash chain already cached, key: " + "%016Lx tail class: [%p] %s\n", + (unsigned long long)chain_key, + class->key, class->name); + return 0; + } + } + if (very_verbose(class)) + printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n", + (unsigned long long)chain_key, class->key, class->name); + /* + * Allocate a new chain entry from the static array, and add + * it to the hash: + */ + if (!graph_lock()) + return 0; + /* + * We have to walk the chain again locked - to avoid duplicates: + */ + list_for_each_entry(chain, hash_head, entry) { + if (chain->chain_key == chain_key) { + graph_unlock(); + goto cache_hit; + } + } + if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { + if (!debug_locks_off_graph_unlock()) + return 0; + + print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!"); + dump_stack(); + return 0; + } + chain = lock_chains + nr_lock_chains++; + chain->chain_key = chain_key; + chain->irq_context = hlock->irq_context; + /* Find the first held_lock of current chain */ + for (i = curr->lockdep_depth - 1; i >= 0; i--) { + hlock_curr = curr->held_locks + i; + if (hlock_curr->irq_context != hlock->irq_context) + break; + } + i++; + chain->depth = curr->lockdep_depth + 1 - i; + if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { + chain->base = nr_chain_hlocks; + nr_chain_hlocks += chain->depth; + for (j = 0; j < chain->depth - 1; j++, i++) { + int lock_id = curr->held_locks[i].class_idx - 1; + chain_hlocks[chain->base + j] = lock_id; + } + chain_hlocks[chain->base + j] = class - lock_classes; + } + list_add_tail_rcu(&chain->entry, hash_head); + debug_atomic_inc(chain_lookup_misses); + inc_chains(); + + return 1; +} + +static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, + struct held_lock *hlock, int chain_head, u64 chain_key) +{ + /* + * Trylock needs to maintain the stack of held locks, but it + * does not add new dependencies, because trylock can be done + * in any order. + * + * We look up the chain_key and do the O(N^2) check and update of + * the dependencies only if this is a new dependency chain. + * (If lookup_chain_cache() returns with 1 it acquires + * graph_lock for us) + */ + if (!hlock->trylock && (hlock->check == 2) && + lookup_chain_cache(curr, hlock, chain_key)) { + /* + * Check whether last held lock: + * + * - is irq-safe, if this lock is irq-unsafe + * - is softirq-safe, if this lock is hardirq-unsafe + * + * And check whether the new lock's dependency graph + * could lead back to the previous lock. + * + * any of these scenarios could lead to a deadlock. If + * All validations + */ + int ret = check_deadlock(curr, hlock, lock, hlock->read); + + if (!ret) + return 0; + /* + * Mark recursive read, as we jump over it when + * building dependencies (just like we jump over + * trylock entries): + */ + if (ret == 2) + hlock->read = 2; + /* + * Add dependency only if this lock is not the head + * of the chain, and if it's not a secondary read-lock: + */ + if (!chain_head && ret != 2) + if (!check_prevs_add(curr, hlock)) + return 0; + graph_unlock(); + } else + /* after lookup_chain_cache(): */ + if (unlikely(!debug_locks)) + return 0; + + return 1; +} +#else +static inline int validate_chain(struct task_struct *curr, + struct lockdep_map *lock, struct held_lock *hlock, + int chain_head, u64 chain_key) +{ + return 1; +} +#endif + +/* + * We are building curr_chain_key incrementally, so double-check + * it from scratch, to make sure that it's done correctly: + */ +static void check_chain_key(struct task_struct *curr) +{ +#ifdef CONFIG_DEBUG_LOCKDEP + struct held_lock *hlock, *prev_hlock = NULL; + unsigned int i, id; + u64 chain_key = 0; + + for (i = 0; i < curr->lockdep_depth; i++) { + hlock = curr->held_locks + i; + if (chain_key != hlock->prev_chain_key) { + debug_locks_off(); + /* + * We got mighty confused, our chain keys don't match + * with what we expect, someone trample on our task state? + */ + WARN(1, "hm#1, depth: %u [%u], %016Lx != %016Lx\n", + curr->lockdep_depth, i, + (unsigned long long)chain_key, + (unsigned long long)hlock->prev_chain_key); + return; + } + id = hlock->class_idx - 1; + /* + * Whoops ran out of static storage again? + */ + if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) + return; + + if (prev_hlock && (prev_hlock->irq_context != + hlock->irq_context)) + chain_key = 0; + chain_key = iterate_chain_key(chain_key, id); + prev_hlock = hlock; + } + if (chain_key != curr->curr_chain_key) { + debug_locks_off(); + /* + * More smoking hash instead of calculating it, damn see these + * numbers float.. I bet that a pink elephant stepped on my memory. + */ + WARN(1, "hm#2, depth: %u [%u], %016Lx != %016Lx\n", + curr->lockdep_depth, i, + (unsigned long long)chain_key, + (unsigned long long)curr->curr_chain_key); + } +#endif +} + +static void +print_usage_bug_scenario(struct held_lock *lock) +{ + struct lock_class *class = hlock_class(lock); + + printk(" Possible unsafe locking scenario:\n\n"); + printk(" CPU0\n"); + printk(" ----\n"); + printk(" lock("); + __print_lock_name(class); + printk(");\n"); + printk(" \n"); + printk(" lock("); + __print_lock_name(class); + printk(");\n"); + printk("\n *** DEADLOCK ***\n\n"); +} + +static int +print_usage_bug(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) +{ + if (!debug_locks_off_graph_unlock() || debug_locks_silent) + return 0; + + printk("\n"); + printk("=================================\n"); + printk("[ INFO: inconsistent lock state ]\n"); + print_kernel_ident(); + printk("---------------------------------\n"); + + printk("inconsistent {%s} -> {%s} usage.\n", + usage_str[prev_bit], usage_str[new_bit]); + + printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", + curr->comm, task_pid_nr(curr), + trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, + trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, + trace_hardirqs_enabled(curr), + trace_softirqs_enabled(curr)); + print_lock(this); + + printk("{%s} state was registered at:\n", usage_str[prev_bit]); + print_stack_trace(hlock_class(this)->usage_traces + prev_bit, 1); + + print_irqtrace_events(curr); + printk("\nother info that might help us debug this:\n"); + print_usage_bug_scenario(this); + + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +/* + * Print out an error if an invalid bit is set: + */ +static inline int +valid_state(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) +{ + if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit))) + return print_usage_bug(curr, this, bad_bit, new_bit); + return 1; +} + +static int mark_lock(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit); + +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) + +/* + * print irq inversion bug: + */ +static int +print_irq_inversion_bug(struct task_struct *curr, + struct lock_list *root, struct lock_list *other, + struct held_lock *this, int forwards, + const char *irqclass) +{ + struct lock_list *entry = other; + struct lock_list *middle = NULL; + int depth; + + if (!debug_locks_off_graph_unlock() || debug_locks_silent) + return 0; + + printk("\n"); + printk("=========================================================\n"); + printk("[ INFO: possible irq lock inversion dependency detected ]\n"); + print_kernel_ident(); + printk("---------------------------------------------------------\n"); + printk("%s/%d just changed the state of lock:\n", + curr->comm, task_pid_nr(curr)); + print_lock(this); + if (forwards) + printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass); + else + printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass); + print_lock_name(other->class); + printk("\n\nand interrupts could create inverse lock ordering between them.\n\n"); + + printk("\nother info that might help us debug this:\n"); + + /* Find a middle lock (if one exists) */ + depth = get_lock_depth(other); + do { + if (depth == 0 && (entry != root)) { + printk("lockdep:%s bad path found in chain graph\n", __func__); + break; + } + middle = entry; + entry = get_lock_parent(entry); + depth--; + } while (entry && entry != root && (depth >= 0)); + if (forwards) + print_irq_lock_scenario(root, other, + middle ? middle->class : root->class, other->class); + else + print_irq_lock_scenario(other, root, + middle ? middle->class : other->class, root->class); + + lockdep_print_held_locks(curr); + + printk("\nthe shortest dependencies between 2nd lock and 1st lock:\n"); + if (!save_trace(&root->trace)) + return 0; + print_shortest_lock_dependencies(other, root); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +/* + * Prove that in the forwards-direction subgraph starting at + * there is no lock matching : + */ +static int +check_usage_forwards(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit bit, const char *irqclass) +{ + int ret; + struct lock_list root; + struct lock_list *uninitialized_var(target_entry); + + root.parent = NULL; + root.class = hlock_class(this); + ret = find_usage_forwards(&root, bit, &target_entry); + if (ret < 0) + return print_bfs_bug(ret); + if (ret == 1) + return ret; + + return print_irq_inversion_bug(curr, &root, target_entry, + this, 1, irqclass); +} + +/* + * Prove that in the backwards-direction subgraph starting at + * there is no lock matching : + */ +static int +check_usage_backwards(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit bit, const char *irqclass) +{ + int ret; + struct lock_list root; + struct lock_list *uninitialized_var(target_entry); + + root.parent = NULL; + root.class = hlock_class(this); + ret = find_usage_backwards(&root, bit, &target_entry); + if (ret < 0) + return print_bfs_bug(ret); + if (ret == 1) + return ret; + + return print_irq_inversion_bug(curr, &root, target_entry, + this, 0, irqclass); +} + +void print_irqtrace_events(struct task_struct *curr) +{ + printk("irq event stamp: %u\n", curr->irq_events); + printk("hardirqs last enabled at (%u): ", curr->hardirq_enable_event); + print_ip_sym(curr->hardirq_enable_ip); + printk("hardirqs last disabled at (%u): ", curr->hardirq_disable_event); + print_ip_sym(curr->hardirq_disable_ip); + printk("softirqs last enabled at (%u): ", curr->softirq_enable_event); + print_ip_sym(curr->softirq_enable_ip); + printk("softirqs last disabled at (%u): ", curr->softirq_disable_event); + print_ip_sym(curr->softirq_disable_ip); +} + +static int HARDIRQ_verbose(struct lock_class *class) +{ +#if HARDIRQ_VERBOSE + return class_filter(class); +#endif + return 0; +} + +static int SOFTIRQ_verbose(struct lock_class *class) +{ +#if SOFTIRQ_VERBOSE + return class_filter(class); +#endif + return 0; +} + +static int RECLAIM_FS_verbose(struct lock_class *class) +{ +#if RECLAIM_VERBOSE + return class_filter(class); +#endif + return 0; +} + +#define STRICT_READ_CHECKS 1 + +static int (*state_verbose_f[])(struct lock_class *class) = { +#define LOCKDEP_STATE(__STATE) \ + __STATE##_verbose, +#include "lockdep_states.h" +#undef LOCKDEP_STATE +}; + +static inline int state_verbose(enum lock_usage_bit bit, + struct lock_class *class) +{ + return state_verbose_f[bit >> 2](class); +} + +typedef int (*check_usage_f)(struct task_struct *, struct held_lock *, + enum lock_usage_bit bit, const char *name); + +static int +mark_lock_irq(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit) +{ + int excl_bit = exclusive_bit(new_bit); + int read = new_bit & 1; + int dir = new_bit & 2; + + /* + * mark USED_IN has to look forwards -- to ensure no dependency + * has ENABLED state, which would allow recursion deadlocks. + * + * mark ENABLED has to look backwards -- to ensure no dependee + * has USED_IN state, which, again, would allow recursion deadlocks. + */ + check_usage_f usage = dir ? + check_usage_backwards : check_usage_forwards; + + /* + * Validate that this particular lock does not have conflicting + * usage states. + */ + if (!valid_state(curr, this, new_bit, excl_bit)) + return 0; + + /* + * Validate that the lock dependencies don't have conflicting usage + * states. + */ + if ((!read || !dir || STRICT_READ_CHECKS) && + !usage(curr, this, excl_bit, state_name(new_bit & ~1))) + return 0; + + /* + * Check for read in write conflicts + */ + if (!read) { + if (!valid_state(curr, this, new_bit, excl_bit + 1)) + return 0; + + if (STRICT_READ_CHECKS && + !usage(curr, this, excl_bit + 1, + state_name(new_bit + 1))) + return 0; + } + + if (state_verbose(new_bit, hlock_class(this))) + return 2; + + return 1; +} + +enum mark_type { +#define LOCKDEP_STATE(__STATE) __STATE, +#include "lockdep_states.h" +#undef LOCKDEP_STATE +}; + +/* + * Mark all held locks with a usage bit: + */ +static int +mark_held_locks(struct task_struct *curr, enum mark_type mark) +{ + enum lock_usage_bit usage_bit; + struct held_lock *hlock; + int i; + + for (i = 0; i < curr->lockdep_depth; i++) { + hlock = curr->held_locks + i; + + usage_bit = 2 + (mark << 2); /* ENABLED */ + if (hlock->read) + usage_bit += 1; /* READ */ + + BUG_ON(usage_bit >= LOCK_USAGE_STATES); + + if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys) + continue; + + if (!mark_lock(curr, hlock, usage_bit)) + return 0; + } + + return 1; +} + +/* + * Hardirqs will be enabled: + */ +static void __trace_hardirqs_on_caller(unsigned long ip) +{ + struct task_struct *curr = current; + + /* we'll do an OFF -> ON transition: */ + curr->hardirqs_enabled = 1; + + /* + * We are going to turn hardirqs on, so set the + * usage bit for all held locks: + */ + if (!mark_held_locks(curr, HARDIRQ)) + return; + /* + * If we have softirqs enabled, then set the usage + * bit for all held locks. (disabled hardirqs prevented + * this bit from being set before) + */ + if (curr->softirqs_enabled) + if (!mark_held_locks(curr, SOFTIRQ)) + return; + + curr->hardirq_enable_ip = ip; + curr->hardirq_enable_event = ++curr->irq_events; + debug_atomic_inc(hardirqs_on_events); +} + +void trace_hardirqs_on_caller(unsigned long ip) +{ + time_hardirqs_on(CALLER_ADDR0, ip); + + if (unlikely(!debug_locks || current->lockdep_recursion)) + return; + + if (unlikely(current->hardirqs_enabled)) { + /* + * Neither irq nor preemption are disabled here + * so this is racy by nature but losing one hit + * in a stat is not a big deal. + */ + __debug_atomic_inc(redundant_hardirqs_on); + return; + } + + /* + * We're enabling irqs and according to our state above irqs weren't + * already enabled, yet we find the hardware thinks they are in fact + * enabled.. someone messed up their IRQ state tracing. + */ + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return; + + /* + * See the fine text that goes along with this variable definition. + */ + if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) + return; + + /* + * Can't allow enabling interrupts while in an interrupt handler, + * that's general bad form and such. Recursion, limited stack etc.. + */ + if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) + return; + + current->lockdep_recursion = 1; + __trace_hardirqs_on_caller(ip); + current->lockdep_recursion = 0; +} +EXPORT_SYMBOL(trace_hardirqs_on_caller); + +void trace_hardirqs_on(void) +{ + trace_hardirqs_on_caller(CALLER_ADDR0); +} +EXPORT_SYMBOL(trace_hardirqs_on); + +/* + * Hardirqs were disabled: + */ +void trace_hardirqs_off_caller(unsigned long ip) +{ + struct task_struct *curr = current; + + time_hardirqs_off(CALLER_ADDR0, ip); + + if (unlikely(!debug_locks || current->lockdep_recursion)) + return; + + /* + * So we're supposed to get called after you mask local IRQs, but for + * some reason the hardware doesn't quite think you did a proper job. + */ + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return; + + if (curr->hardirqs_enabled) { + /* + * We have done an ON -> OFF transition: + */ + curr->hardirqs_enabled = 0; + curr->hardirq_disable_ip = ip; + curr->hardirq_disable_event = ++curr->irq_events; + debug_atomic_inc(hardirqs_off_events); + } else + debug_atomic_inc(redundant_hardirqs_off); +} +EXPORT_SYMBOL(trace_hardirqs_off_caller); + +void trace_hardirqs_off(void) +{ + trace_hardirqs_off_caller(CALLER_ADDR0); +} +EXPORT_SYMBOL(trace_hardirqs_off); + +/* + * Softirqs will be enabled: + */ +void trace_softirqs_on(unsigned long ip) +{ + struct task_struct *curr = current; + + if (unlikely(!debug_locks || current->lockdep_recursion)) + return; + + /* + * We fancy IRQs being disabled here, see softirq.c, avoids + * funny state and nesting things. + */ + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return; + + if (curr->softirqs_enabled) { + debug_atomic_inc(redundant_softirqs_on); + return; + } + + current->lockdep_recursion = 1; + /* + * We'll do an OFF -> ON transition: + */ + curr->softirqs_enabled = 1; + curr->softirq_enable_ip = ip; + curr->softirq_enable_event = ++curr->irq_events; + debug_atomic_inc(softirqs_on_events); + /* + * We are going to turn softirqs on, so set the + * usage bit for all held locks, if hardirqs are + * enabled too: + */ + if (curr->hardirqs_enabled) + mark_held_locks(curr, SOFTIRQ); + current->lockdep_recursion = 0; +} + +/* + * Softirqs were disabled: + */ +void trace_softirqs_off(unsigned long ip) +{ + struct task_struct *curr = current; + + if (unlikely(!debug_locks || current->lockdep_recursion)) + return; + + /* + * We fancy IRQs being disabled here, see softirq.c + */ + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return; + + if (curr->softirqs_enabled) { + /* + * We have done an ON -> OFF transition: + */ + curr->softirqs_enabled = 0; + curr->softirq_disable_ip = ip; + curr->softirq_disable_event = ++curr->irq_events; + debug_atomic_inc(softirqs_off_events); + /* + * Whoops, we wanted softirqs off, so why aren't they? + */ + DEBUG_LOCKS_WARN_ON(!softirq_count()); + } else + debug_atomic_inc(redundant_softirqs_off); +} + +static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) +{ + struct task_struct *curr = current; + + if (unlikely(!debug_locks)) + return; + + /* no reclaim without waiting on it */ + if (!(gfp_mask & __GFP_WAIT)) + return; + + /* this guy won't enter reclaim */ + if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC)) + return; + + /* We're only interested __GFP_FS allocations for now */ + if (!(gfp_mask & __GFP_FS)) + return; + + /* + * Oi! Can't be having __GFP_FS allocations with IRQs disabled. + */ + if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))) + return; + + mark_held_locks(curr, RECLAIM_FS); +} + +static void check_flags(unsigned long flags); + +void lockdep_trace_alloc(gfp_t gfp_mask) +{ + unsigned long flags; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + current->lockdep_recursion = 1; + __lockdep_trace_alloc(gfp_mask, flags); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} + +static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) +{ + /* + * If non-trylock use in a hardirq or softirq context, then + * mark the lock as used in these contexts: + */ + if (!hlock->trylock) { + if (hlock->read) { + if (curr->hardirq_context) + if (!mark_lock(curr, hlock, + LOCK_USED_IN_HARDIRQ_READ)) + return 0; + if (curr->softirq_context) + if (!mark_lock(curr, hlock, + LOCK_USED_IN_SOFTIRQ_READ)) + return 0; + } else { + if (curr->hardirq_context) + if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ)) + return 0; + if (curr->softirq_context) + if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ)) + return 0; + } + } + if (!hlock->hardirqs_off) { + if (hlock->read) { + if (!mark_lock(curr, hlock, + LOCK_ENABLED_HARDIRQ_READ)) + return 0; + if (curr->softirqs_enabled) + if (!mark_lock(curr, hlock, + LOCK_ENABLED_SOFTIRQ_READ)) + return 0; + } else { + if (!mark_lock(curr, hlock, + LOCK_ENABLED_HARDIRQ)) + return 0; + if (curr->softirqs_enabled) + if (!mark_lock(curr, hlock, + LOCK_ENABLED_SOFTIRQ)) + return 0; + } + } + + /* + * We reuse the irq context infrastructure more broadly as a general + * context checking code. This tests GFP_FS recursion (a lock taken + * during reclaim for a GFP_FS allocation is held over a GFP_FS + * allocation). + */ + if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) { + if (hlock->read) { + if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ)) + return 0; + } else { + if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS)) + return 0; + } + } + + return 1; +} + +static int separate_irq_context(struct task_struct *curr, + struct held_lock *hlock) +{ + unsigned int depth = curr->lockdep_depth; + + /* + * Keep track of points where we cross into an interrupt context: + */ + hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) + + curr->softirq_context; + if (depth) { + struct held_lock *prev_hlock; + + prev_hlock = curr->held_locks + depth-1; + /* + * If we cross into another context, reset the + * hash key (this also prevents the checking and the + * adding of the dependency to 'prev'): + */ + if (prev_hlock->irq_context != hlock->irq_context) + return 1; + } + return 0; +} + +#else /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ + +static inline +int mark_lock_irq(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit) +{ + WARN_ON(1); /* Impossible innit? when we don't have TRACE_IRQFLAG */ + return 1; +} + +static inline int mark_irqflags(struct task_struct *curr, + struct held_lock *hlock) +{ + return 1; +} + +static inline int separate_irq_context(struct task_struct *curr, + struct held_lock *hlock) +{ + return 0; +} + +void lockdep_trace_alloc(gfp_t gfp_mask) +{ +} + +#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ + +/* + * Mark a lock with a usage bit, and validate the state transition: + */ +static int mark_lock(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit) +{ + unsigned int new_mask = 1 << new_bit, ret = 1; + + /* + * If already set then do not dirty the cacheline, + * nor do any checks: + */ + if (likely(hlock_class(this)->usage_mask & new_mask)) + return 1; + + if (!graph_lock()) + return 0; + /* + * Make sure we didn't race: + */ + if (unlikely(hlock_class(this)->usage_mask & new_mask)) { + graph_unlock(); + return 1; + } + + hlock_class(this)->usage_mask |= new_mask; + + if (!save_trace(hlock_class(this)->usage_traces + new_bit)) + return 0; + + switch (new_bit) { +#define LOCKDEP_STATE(__STATE) \ + case LOCK_USED_IN_##__STATE: \ + case LOCK_USED_IN_##__STATE##_READ: \ + case LOCK_ENABLED_##__STATE: \ + case LOCK_ENABLED_##__STATE##_READ: +#include "lockdep_states.h" +#undef LOCKDEP_STATE + ret = mark_lock_irq(curr, this, new_bit); + if (!ret) + return 0; + break; + case LOCK_USED: + debug_atomic_dec(nr_unused_locks); + break; + default: + if (!debug_locks_off_graph_unlock()) + return 0; + WARN_ON(1); + return 0; + } + + graph_unlock(); + + /* + * We must printk outside of the graph_lock: + */ + if (ret == 2) { + printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); + print_lock(this); + print_irqtrace_events(curr); + dump_stack(); + } + + return ret; +} + +/* + * Initialize a lock instance's lock-class mapping info: + */ +void lockdep_init_map(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, int subclass) +{ + int i; + + kmemcheck_mark_initialized(lock, sizeof(*lock)); + + for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) + lock->class_cache[i] = NULL; + +#ifdef CONFIG_LOCK_STAT + lock->cpu = raw_smp_processor_id(); +#endif + + /* + * Can't be having no nameless bastards around this place! + */ + if (DEBUG_LOCKS_WARN_ON(!name)) { + lock->name = "NULL"; + return; + } + + lock->name = name; + + /* + * No key, no joy, we need to hash something. + */ + if (DEBUG_LOCKS_WARN_ON(!key)) + return; + /* + * Sanity check, the lock-class key must be persistent: + */ + if (!static_obj(key)) { + printk("BUG: key %p not in .data!\n", key); + /* + * What it says above ^^^^^, I suggest you read it. + */ + DEBUG_LOCKS_WARN_ON(1); + return; + } + lock->key = key; + + if (unlikely(!debug_locks)) + return; + + if (subclass) + register_lock_class(lock, subclass, 1); +} +EXPORT_SYMBOL_GPL(lockdep_init_map); + +struct lock_class_key __lockdep_no_validate__; +EXPORT_SYMBOL_GPL(__lockdep_no_validate__); + +static int +print_lock_nested_lock_not_held(struct task_struct *curr, + struct held_lock *hlock, + unsigned long ip) +{ + if (!debug_locks_off()) + return 0; + if (debug_locks_silent) + return 0; + + printk("\n"); + printk("==================================\n"); + printk("[ BUG: Nested lock was not taken ]\n"); + print_kernel_ident(); + printk("----------------------------------\n"); + + printk("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr)); + print_lock(hlock); + + printk("\nbut this task is not holding:\n"); + printk("%s\n", hlock->nest_lock->name); + + printk("\nstack backtrace:\n"); + dump_stack(); + + printk("\nother info that might help us debug this:\n"); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +static int __lock_is_held(struct lockdep_map *lock); + +/* + * This gets called for every mutex_lock*()/spin_lock*() operation. + * We maintain the dependency maps and validate the locking attempt: + */ +static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, int hardirqs_off, + struct lockdep_map *nest_lock, unsigned long ip, + int references) +{ + struct task_struct *curr = current; + struct lock_class *class = NULL; + struct held_lock *hlock; + unsigned int depth, id; + int chain_head = 0; + int class_idx; + u64 chain_key; + + if (!prove_locking) + check = 1; + + if (unlikely(!debug_locks)) + return 0; + + /* + * Lockdep should run with IRQs disabled, otherwise we could + * get an interrupt which would want to take locks, which would + * end up in lockdep and have you got a head-ache already? + */ + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return 0; + + if (lock->key == &__lockdep_no_validate__) + check = 1; + + if (subclass < NR_LOCKDEP_CACHING_CLASSES) + class = lock->class_cache[subclass]; + /* + * Not cached? + */ + if (unlikely(!class)) { + class = register_lock_class(lock, subclass, 0); + if (!class) + return 0; + } + atomic_inc((atomic_t *)&class->ops); + if (very_verbose(class)) { + printk("\nacquire class [%p] %s", class->key, class->name); + if (class->name_version > 1) + printk("#%d", class->name_version); + printk("\n"); + dump_stack(); + } + + /* + * Add the lock to the list of currently held locks. + * (we dont increase the depth just yet, up until the + * dependency checks are done) + */ + depth = curr->lockdep_depth; + /* + * Ran out of static storage for our per-task lock stack again have we? + */ + if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH)) + return 0; + + class_idx = class - lock_classes + 1; + + if (depth) { + hlock = curr->held_locks + depth - 1; + if (hlock->class_idx == class_idx && nest_lock) { + if (hlock->references) + hlock->references++; + else + hlock->references = 2; + + return 1; + } + } + + hlock = curr->held_locks + depth; + /* + * Plain impossible, we just registered it and checked it weren't no + * NULL like.. I bet this mushroom I ate was good! + */ + if (DEBUG_LOCKS_WARN_ON(!class)) + return 0; + hlock->class_idx = class_idx; + hlock->acquire_ip = ip; + hlock->instance = lock; + hlock->nest_lock = nest_lock; + hlock->trylock = trylock; + hlock->read = read; + hlock->check = check; + hlock->hardirqs_off = !!hardirqs_off; + hlock->references = references; +#ifdef CONFIG_LOCK_STAT + hlock->waittime_stamp = 0; + hlock->holdtime_stamp = lockstat_clock(); +#endif + + if (check == 2 && !mark_irqflags(curr, hlock)) + return 0; + + /* mark it as used: */ + if (!mark_lock(curr, hlock, LOCK_USED)) + return 0; + + /* + * Calculate the chain hash: it's the combined hash of all the + * lock keys along the dependency chain. We save the hash value + * at every step so that we can get the current hash easily + * after unlock. The chain hash is then used to cache dependency + * results. + * + * The 'key ID' is what is the most compact key value to drive + * the hash, not class->key. + */ + id = class - lock_classes; + /* + * Whoops, we did it again.. ran straight out of our static allocation. + */ + if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) + return 0; + + chain_key = curr->curr_chain_key; + if (!depth) { + /* + * How can we have a chain hash when we ain't got no keys?! + */ + if (DEBUG_LOCKS_WARN_ON(chain_key != 0)) + return 0; + chain_head = 1; + } + + hlock->prev_chain_key = chain_key; + if (separate_irq_context(curr, hlock)) { + chain_key = 0; + chain_head = 1; + } + chain_key = iterate_chain_key(chain_key, id); + + if (nest_lock && !__lock_is_held(nest_lock)) + return print_lock_nested_lock_not_held(curr, hlock, ip); + + if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) + return 0; + + curr->curr_chain_key = chain_key; + curr->lockdep_depth++; + check_chain_key(curr); +#ifdef CONFIG_DEBUG_LOCKDEP + if (unlikely(!debug_locks)) + return 0; +#endif + if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { + debug_locks_off(); + print_lockdep_off("BUG: MAX_LOCK_DEPTH too low!"); + printk(KERN_DEBUG "depth: %i max: %lu!\n", + curr->lockdep_depth, MAX_LOCK_DEPTH); + + lockdep_print_held_locks(current); + debug_show_all_locks(); + dump_stack(); + + return 0; + } + + if (unlikely(curr->lockdep_depth > max_lockdep_depth)) + max_lockdep_depth = curr->lockdep_depth; + + return 1; +} + +static int +print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock, + unsigned long ip) +{ + if (!debug_locks_off()) + return 0; + if (debug_locks_silent) + return 0; + + printk("\n"); + printk("=====================================\n"); + printk("[ BUG: bad unlock balance detected! ]\n"); + print_kernel_ident(); + printk("-------------------------------------\n"); + printk("%s/%d is trying to release lock (", + curr->comm, task_pid_nr(curr)); + print_lockdep_cache(lock); + printk(") at:\n"); + print_ip_sym(ip); + printk("but there are no more locks to release!\n"); + printk("\nother info that might help us debug this:\n"); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +/* + * Common debugging checks for both nested and non-nested unlock: + */ +static int check_unlock(struct task_struct *curr, struct lockdep_map *lock, + unsigned long ip) +{ + if (unlikely(!debug_locks)) + return 0; + /* + * Lockdep should run with IRQs disabled, recursion, head-ache, etc.. + */ + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return 0; + + if (curr->lockdep_depth <= 0) + return print_unlock_imbalance_bug(curr, lock, ip); + + return 1; +} + +static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) +{ + if (hlock->instance == lock) + return 1; + + if (hlock->references) { + struct lock_class *class = lock->class_cache[0]; + + if (!class) + class = look_up_lock_class(lock, 0); + + /* + * If look_up_lock_class() failed to find a class, we're trying + * to test if we hold a lock that has never yet been acquired. + * Clearly if the lock hasn't been acquired _ever_, we're not + * holding it either, so report failure. + */ + if (!class) + return 0; + + /* + * References, but not a lock we're actually ref-counting? + * State got messed up, follow the sites that change ->references + * and try to make sense of it. + */ + if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock)) + return 0; + + if (hlock->class_idx == class - lock_classes + 1) + return 1; + } + + return 0; +} + +static int +__lock_set_class(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, unsigned int subclass, + unsigned long ip) +{ + struct task_struct *curr = current; + struct held_lock *hlock, *prev_hlock; + struct lock_class *class; + unsigned int depth; + int i; + + depth = curr->lockdep_depth; + /* + * This function is about (re)setting the class of a held lock, + * yet we're not actually holding any locks. Naughty user! + */ + if (DEBUG_LOCKS_WARN_ON(!depth)) + return 0; + + prev_hlock = NULL; + for (i = depth-1; i >= 0; i--) { + hlock = curr->held_locks + i; + /* + * We must not cross into another context: + */ + if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) + break; + if (match_held_lock(hlock, lock)) + goto found_it; + prev_hlock = hlock; + } + return print_unlock_imbalance_bug(curr, lock, ip); + +found_it: + lockdep_init_map(lock, name, key, 0); + class = register_lock_class(lock, subclass, 0); + hlock->class_idx = class - lock_classes + 1; + + curr->lockdep_depth = i; + curr->curr_chain_key = hlock->prev_chain_key; + + for (; i < depth; i++) { + hlock = curr->held_locks + i; + if (!__lock_acquire(hlock->instance, + hlock_class(hlock)->subclass, hlock->trylock, + hlock->read, hlock->check, hlock->hardirqs_off, + hlock->nest_lock, hlock->acquire_ip, + hlock->references)) + return 0; + } + + /* + * I took it apart and put it back together again, except now I have + * these 'spare' parts.. where shall I put them. + */ + if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth)) + return 0; + return 1; +} + +/* + * Remove the lock to the list of currently held locks in a + * potentially non-nested (out of order) manner. This is a + * relatively rare operation, as all the unlock APIs default + * to nested mode (which uses lock_release()): + */ +static int +lock_release_non_nested(struct task_struct *curr, + struct lockdep_map *lock, unsigned long ip) +{ + struct held_lock *hlock, *prev_hlock; + unsigned int depth; + int i; + + /* + * Check whether the lock exists in the current stack + * of held locks: + */ + depth = curr->lockdep_depth; + /* + * So we're all set to release this lock.. wait what lock? We don't + * own any locks, you've been drinking again? + */ + if (DEBUG_LOCKS_WARN_ON(!depth)) + return 0; + + prev_hlock = NULL; + for (i = depth-1; i >= 0; i--) { + hlock = curr->held_locks + i; + /* + * We must not cross into another context: + */ + if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) + break; + if (match_held_lock(hlock, lock)) + goto found_it; + prev_hlock = hlock; + } + return print_unlock_imbalance_bug(curr, lock, ip); + +found_it: + if (hlock->instance == lock) + lock_release_holdtime(hlock); + + if (hlock->references) { + hlock->references--; + if (hlock->references) { + /* + * We had, and after removing one, still have + * references, the current lock stack is still + * valid. We're done! + */ + return 1; + } + } + + /* + * We have the right lock to unlock, 'hlock' points to it. + * Now we remove it from the stack, and add back the other + * entries (if any), recalculating the hash along the way: + */ + + curr->lockdep_depth = i; + curr->curr_chain_key = hlock->prev_chain_key; + + for (i++; i < depth; i++) { + hlock = curr->held_locks + i; + if (!__lock_acquire(hlock->instance, + hlock_class(hlock)->subclass, hlock->trylock, + hlock->read, hlock->check, hlock->hardirqs_off, + hlock->nest_lock, hlock->acquire_ip, + hlock->references)) + return 0; + } + + /* + * We had N bottles of beer on the wall, we drank one, but now + * there's not N-1 bottles of beer left on the wall... + */ + if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1)) + return 0; + return 1; +} + +/* + * Remove the lock to the list of currently held locks - this gets + * called on mutex_unlock()/spin_unlock*() (or on a failed + * mutex_lock_interruptible()). This is done for unlocks that nest + * perfectly. (i.e. the current top of the lock-stack is unlocked) + */ +static int lock_release_nested(struct task_struct *curr, + struct lockdep_map *lock, unsigned long ip) +{ + struct held_lock *hlock; + unsigned int depth; + + /* + * Pop off the top of the lock stack: + */ + depth = curr->lockdep_depth - 1; + hlock = curr->held_locks + depth; + + /* + * Is the unlock non-nested: + */ + if (hlock->instance != lock || hlock->references) + return lock_release_non_nested(curr, lock, ip); + curr->lockdep_depth--; + + /* + * No more locks, but somehow we've got hash left over, who left it? + */ + if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0))) + return 0; + + curr->curr_chain_key = hlock->prev_chain_key; + + lock_release_holdtime(hlock); + +#ifdef CONFIG_DEBUG_LOCKDEP + hlock->prev_chain_key = 0; + hlock->class_idx = 0; + hlock->acquire_ip = 0; + hlock->irq_context = 0; +#endif + return 1; +} + +/* + * Remove the lock to the list of currently held locks - this gets + * called on mutex_unlock()/spin_unlock*() (or on a failed + * mutex_lock_interruptible()). This is done for unlocks that nest + * perfectly. (i.e. the current top of the lock-stack is unlocked) + */ +static void +__lock_release(struct lockdep_map *lock, int nested, unsigned long ip) +{ + struct task_struct *curr = current; + + if (!check_unlock(curr, lock, ip)) + return; + + if (nested) { + if (!lock_release_nested(curr, lock, ip)) + return; + } else { + if (!lock_release_non_nested(curr, lock, ip)) + return; + } + + check_chain_key(curr); +} + +static int __lock_is_held(struct lockdep_map *lock) +{ + struct task_struct *curr = current; + int i; + + for (i = 0; i < curr->lockdep_depth; i++) { + struct held_lock *hlock = curr->held_locks + i; + + if (match_held_lock(hlock, lock)) + return 1; + } + + return 0; +} + +/* + * Check whether we follow the irq-flags state precisely: + */ +static void check_flags(unsigned long flags) +{ +#if defined(CONFIG_PROVE_LOCKING) && defined(CONFIG_DEBUG_LOCKDEP) && \ + defined(CONFIG_TRACE_IRQFLAGS) + if (!debug_locks) + return; + + if (irqs_disabled_flags(flags)) { + if (DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled)) { + printk("possible reason: unannotated irqs-off.\n"); + } + } else { + if (DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled)) { + printk("possible reason: unannotated irqs-on.\n"); + } + } + + /* + * We dont accurately track softirq state in e.g. + * hardirq contexts (such as on 4KSTACKS), so only + * check if not in hardirq contexts: + */ + if (!hardirq_count()) { + if (softirq_count()) { + /* like the above, but with softirqs */ + DEBUG_LOCKS_WARN_ON(current->softirqs_enabled); + } else { + /* lick the above, does it taste good? */ + DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); + } + } + + if (!debug_locks) + print_irqtrace_events(current); +#endif +} + +void lock_set_class(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, unsigned int subclass, + unsigned long ip) +{ + unsigned long flags; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + current->lockdep_recursion = 1; + check_flags(flags); + if (__lock_set_class(lock, name, key, subclass, ip)) + check_chain_key(current); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(lock_set_class); + +/* + * We are not always called with irqs disabled - do that here, + * and also avoid lockdep recursion: + */ +void lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *nest_lock, unsigned long ip) +{ + unsigned long flags; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + + current->lockdep_recursion = 1; + trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); + __lock_acquire(lock, subclass, trylock, read, check, + irqs_disabled_flags(flags), nest_lock, ip, 0); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(lock_acquire); + +void lock_release(struct lockdep_map *lock, int nested, + unsigned long ip) +{ + unsigned long flags; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + current->lockdep_recursion = 1; + trace_lock_release(lock, ip); + __lock_release(lock, nested, ip); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(lock_release); + +int lock_is_held(struct lockdep_map *lock) +{ + unsigned long flags; + int ret = 0; + + if (unlikely(current->lockdep_recursion)) + return 1; /* avoid false negative lockdep_assert_held() */ + + raw_local_irq_save(flags); + check_flags(flags); + + current->lockdep_recursion = 1; + ret = __lock_is_held(lock); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); + + return ret; +} +EXPORT_SYMBOL_GPL(lock_is_held); + +void lockdep_set_current_reclaim_state(gfp_t gfp_mask) +{ + current->lockdep_reclaim_gfp = gfp_mask; +} + +void lockdep_clear_current_reclaim_state(void) +{ + current->lockdep_reclaim_gfp = 0; +} + +#ifdef CONFIG_LOCK_STAT +static int +print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, + unsigned long ip) +{ + if (!debug_locks_off()) + return 0; + if (debug_locks_silent) + return 0; + + printk("\n"); + printk("=================================\n"); + printk("[ BUG: bad contention detected! ]\n"); + print_kernel_ident(); + printk("---------------------------------\n"); + printk("%s/%d is trying to contend lock (", + curr->comm, task_pid_nr(curr)); + print_lockdep_cache(lock); + printk(") at:\n"); + print_ip_sym(ip); + printk("but there are no locks held!\n"); + printk("\nother info that might help us debug this:\n"); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +static void +__lock_contended(struct lockdep_map *lock, unsigned long ip) +{ + struct task_struct *curr = current; + struct held_lock *hlock, *prev_hlock; + struct lock_class_stats *stats; + unsigned int depth; + int i, contention_point, contending_point; + + depth = curr->lockdep_depth; + /* + * Whee, we contended on this lock, except it seems we're not + * actually trying to acquire anything much at all.. + */ + if (DEBUG_LOCKS_WARN_ON(!depth)) + return; + + prev_hlock = NULL; + for (i = depth-1; i >= 0; i--) { + hlock = curr->held_locks + i; + /* + * We must not cross into another context: + */ + if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) + break; + if (match_held_lock(hlock, lock)) + goto found_it; + prev_hlock = hlock; + } + print_lock_contention_bug(curr, lock, ip); + return; + +found_it: + if (hlock->instance != lock) + return; + + hlock->waittime_stamp = lockstat_clock(); + + contention_point = lock_point(hlock_class(hlock)->contention_point, ip); + contending_point = lock_point(hlock_class(hlock)->contending_point, + lock->ip); + + stats = get_lock_stats(hlock_class(hlock)); + if (contention_point < LOCKSTAT_POINTS) + stats->contention_point[contention_point]++; + if (contending_point < LOCKSTAT_POINTS) + stats->contending_point[contending_point]++; + if (lock->cpu != smp_processor_id()) + stats->bounces[bounce_contended + !!hlock->read]++; + put_lock_stats(stats); +} + +static void +__lock_acquired(struct lockdep_map *lock, unsigned long ip) +{ + struct task_struct *curr = current; + struct held_lock *hlock, *prev_hlock; + struct lock_class_stats *stats; + unsigned int depth; + u64 now, waittime = 0; + int i, cpu; + + depth = curr->lockdep_depth; + /* + * Yay, we acquired ownership of this lock we didn't try to + * acquire, how the heck did that happen? + */ + if (DEBUG_LOCKS_WARN_ON(!depth)) + return; + + prev_hlock = NULL; + for (i = depth-1; i >= 0; i--) { + hlock = curr->held_locks + i; + /* + * We must not cross into another context: + */ + if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) + break; + if (match_held_lock(hlock, lock)) + goto found_it; + prev_hlock = hlock; + } + print_lock_contention_bug(curr, lock, _RET_IP_); + return; + +found_it: + if (hlock->instance != lock) + return; + + cpu = smp_processor_id(); + if (hlock->waittime_stamp) { + now = lockstat_clock(); + waittime = now - hlock->waittime_stamp; + hlock->holdtime_stamp = now; + } + + trace_lock_acquired(lock, ip); + + stats = get_lock_stats(hlock_class(hlock)); + if (waittime) { + if (hlock->read) + lock_time_inc(&stats->read_waittime, waittime); + else + lock_time_inc(&stats->write_waittime, waittime); + } + if (lock->cpu != cpu) + stats->bounces[bounce_acquired + !!hlock->read]++; + put_lock_stats(stats); + + lock->cpu = cpu; + lock->ip = ip; +} + +void lock_contended(struct lockdep_map *lock, unsigned long ip) +{ + unsigned long flags; + + if (unlikely(!lock_stat)) + return; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + current->lockdep_recursion = 1; + trace_lock_contended(lock, ip); + __lock_contended(lock, ip); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(lock_contended); + +void lock_acquired(struct lockdep_map *lock, unsigned long ip) +{ + unsigned long flags; + + if (unlikely(!lock_stat)) + return; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + current->lockdep_recursion = 1; + __lock_acquired(lock, ip); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(lock_acquired); +#endif + +/* + * Used by the testsuite, sanitize the validator state + * after a simulated failure: + */ + +void lockdep_reset(void) +{ + unsigned long flags; + int i; + + raw_local_irq_save(flags); + current->curr_chain_key = 0; + current->lockdep_depth = 0; + current->lockdep_recursion = 0; + memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock)); + nr_hardirq_chains = 0; + nr_softirq_chains = 0; + nr_process_chains = 0; + debug_locks = 1; + for (i = 0; i < CHAINHASH_SIZE; i++) + INIT_LIST_HEAD(chainhash_table + i); + raw_local_irq_restore(flags); +} + +static void zap_class(struct lock_class *class) +{ + int i; + + /* + * Remove all dependencies this lock is + * involved in: + */ + for (i = 0; i < nr_list_entries; i++) { + if (list_entries[i].class == class) + list_del_rcu(&list_entries[i].entry); + } + /* + * Unhash the class and remove it from the all_lock_classes list: + */ + list_del_rcu(&class->hash_entry); + list_del_rcu(&class->lock_entry); + + class->key = NULL; +} + +static inline int within(const void *addr, void *start, unsigned long size) +{ + return addr >= start && addr < start + size; +} + +void lockdep_free_key_range(void *start, unsigned long size) +{ + struct lock_class *class, *next; + struct list_head *head; + unsigned long flags; + int i; + int locked; + + raw_local_irq_save(flags); + locked = graph_lock(); + + /* + * Unhash all classes that were created by this module: + */ + for (i = 0; i < CLASSHASH_SIZE; i++) { + head = classhash_table + i; + if (list_empty(head)) + continue; + list_for_each_entry_safe(class, next, head, hash_entry) { + if (within(class->key, start, size)) + zap_class(class); + else if (within(class->name, start, size)) + zap_class(class); + } + } + + if (locked) + graph_unlock(); + raw_local_irq_restore(flags); +} + +void lockdep_reset_lock(struct lockdep_map *lock) +{ + struct lock_class *class, *next; + struct list_head *head; + unsigned long flags; + int i, j; + int locked; + + raw_local_irq_save(flags); + + /* + * Remove all classes this lock might have: + */ + for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) { + /* + * If the class exists we look it up and zap it: + */ + class = look_up_lock_class(lock, j); + if (class) + zap_class(class); + } + /* + * Debug check: in the end all mapped classes should + * be gone. + */ + locked = graph_lock(); + for (i = 0; i < CLASSHASH_SIZE; i++) { + head = classhash_table + i; + if (list_empty(head)) + continue; + list_for_each_entry_safe(class, next, head, hash_entry) { + int match = 0; + + for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) + match |= class == lock->class_cache[j]; + + if (unlikely(match)) { + if (debug_locks_off_graph_unlock()) { + /* + * We all just reset everything, how did it match? + */ + WARN_ON(1); + } + goto out_restore; + } + } + } + if (locked) + graph_unlock(); + +out_restore: + raw_local_irq_restore(flags); +} + +void lockdep_init(void) +{ + int i; + + /* + * Some architectures have their own start_kernel() + * code which calls lockdep_init(), while we also + * call lockdep_init() from the start_kernel() itself, + * and we want to initialize the hashes only once: + */ + if (lockdep_initialized) + return; + + for (i = 0; i < CLASSHASH_SIZE; i++) + INIT_LIST_HEAD(classhash_table + i); + + for (i = 0; i < CHAINHASH_SIZE; i++) + INIT_LIST_HEAD(chainhash_table + i); + + lockdep_initialized = 1; +} + +void __init lockdep_info(void) +{ + printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); + + printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES); + printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH); + printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS); + printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE); + printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES); + printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS); + printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE); + + printk(" memory used by lock dependency info: %lu kB\n", + (sizeof(struct lock_class) * MAX_LOCKDEP_KEYS + + sizeof(struct list_head) * CLASSHASH_SIZE + + sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES + + sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS + + sizeof(struct list_head) * CHAINHASH_SIZE +#ifdef CONFIG_PROVE_LOCKING + + sizeof(struct circular_queue) +#endif + ) / 1024 + ); + + printk(" per task-struct memory footprint: %lu bytes\n", + sizeof(struct held_lock) * MAX_LOCK_DEPTH); + +#ifdef CONFIG_DEBUG_LOCKDEP + if (lockdep_init_error) { + printk("WARNING: lockdep init error! lock-%s was acquired" + "before lockdep_init\n", lock_init_error); + printk("Call stack leading to lockdep invocation was:\n"); + print_stack_trace(&lockdep_init_trace, 0); + } +#endif +} + +static void +print_freed_lock_bug(struct task_struct *curr, const void *mem_from, + const void *mem_to, struct held_lock *hlock) +{ + if (!debug_locks_off()) + return; + if (debug_locks_silent) + return; + + printk("\n"); + printk("=========================\n"); + printk("[ BUG: held lock freed! ]\n"); + print_kernel_ident(); + printk("-------------------------\n"); + printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", + curr->comm, task_pid_nr(curr), mem_from, mem_to-1); + print_lock(hlock); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); +} + +static inline int not_in_range(const void* mem_from, unsigned long mem_len, + const void* lock_from, unsigned long lock_len) +{ + return lock_from + lock_len <= mem_from || + mem_from + mem_len <= lock_from; +} + +/* + * Called when kernel memory is freed (or unmapped), or if a lock + * is destroyed or reinitialized - this code checks whether there is + * any held lock in the memory range of to : + */ +void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) +{ + struct task_struct *curr = current; + struct held_lock *hlock; + unsigned long flags; + int i; + + if (unlikely(!debug_locks)) + return; + + local_irq_save(flags); + for (i = 0; i < curr->lockdep_depth; i++) { + hlock = curr->held_locks + i; + + if (not_in_range(mem_from, mem_len, hlock->instance, + sizeof(*hlock->instance))) + continue; + + print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock); + break; + } + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); + +static void print_held_locks_bug(void) +{ + if (!debug_locks_off()) + return; + if (debug_locks_silent) + return; + + printk("\n"); + printk("=====================================\n"); + printk("[ BUG: %s/%d still has locks held! ]\n", + current->comm, task_pid_nr(current)); + print_kernel_ident(); + printk("-------------------------------------\n"); + lockdep_print_held_locks(current); + printk("\nstack backtrace:\n"); + dump_stack(); +} + +void debug_check_no_locks_held(void) +{ + if (unlikely(current->lockdep_depth > 0)) + print_held_locks_bug(); +} +EXPORT_SYMBOL_GPL(debug_check_no_locks_held); + +void debug_show_all_locks(void) +{ + struct task_struct *g, *p; + int count = 10; + int unlock = 1; + + if (unlikely(!debug_locks)) { + printk("INFO: lockdep is turned off.\n"); + return; + } + printk("\nShowing all locks held in the system:\n"); + + /* + * Here we try to get the tasklist_lock as hard as possible, + * if not successful after 2 seconds we ignore it (but keep + * trying). This is to enable a debug printout even if a + * tasklist_lock-holding task deadlocks or crashes. + */ +retry: + if (!read_trylock(&tasklist_lock)) { + if (count == 10) + printk("hm, tasklist_lock locked, retrying... "); + if (count) { + count--; + printk(" #%d", 10-count); + mdelay(200); + goto retry; + } + printk(" ignoring it.\n"); + unlock = 0; + } else { + if (count != 10) + printk(KERN_CONT " locked it.\n"); + } + + do_each_thread(g, p) { + /* + * It's not reliable to print a task's held locks + * if it's not sleeping (or if it's not the current + * task): + */ + if (p->state == TASK_RUNNING && p != current) + continue; + if (p->lockdep_depth) + lockdep_print_held_locks(p); + if (!unlock) + if (read_trylock(&tasklist_lock)) + unlock = 1; + } while_each_thread(g, p); + + printk("\n"); + printk("=============================================\n\n"); + + if (unlock) + read_unlock(&tasklist_lock); +} +EXPORT_SYMBOL_GPL(debug_show_all_locks); + +/* + * Careful: only use this function if you are sure that + * the task cannot run in parallel! + */ +void debug_show_held_locks(struct task_struct *task) +{ + if (unlikely(!debug_locks)) { + printk("INFO: lockdep is turned off.\n"); + return; + } + lockdep_print_held_locks(task); +} +EXPORT_SYMBOL_GPL(debug_show_held_locks); + +void lockdep_sys_exit(void) +{ + struct task_struct *curr = current; + + if (unlikely(curr->lockdep_depth)) { + if (!debug_locks_off()) + return; + printk("\n"); + printk("================================================\n"); + printk("[ BUG: lock held when returning to user space! ]\n"); + print_kernel_ident(); + printk("------------------------------------------------\n"); + printk("%s/%d is leaving the kernel with locks still held!\n", + curr->comm, curr->pid); + lockdep_print_held_locks(curr); + } +} + +void lockdep_rcu_suspicious(const char *file, const int line, const char *s) +{ + struct task_struct *curr = current; + +#ifndef CONFIG_PROVE_RCU_REPEATEDLY + if (!debug_locks_off()) + return; +#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ + /* Note: the following can be executed concurrently, so be careful. */ + printk("\n"); + printk("===============================\n"); + printk("[ INFO: suspicious RCU usage. ]\n"); + print_kernel_ident(); + printk("-------------------------------\n"); + printk("%s:%d %s!\n", file, line, s); + printk("\nother info that might help us debug this:\n\n"); + printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n", + !rcu_lockdep_current_cpu_online() + ? "RCU used illegally from offline CPU!\n" + : !rcu_is_watching() + ? "RCU used illegally from idle CPU!\n" + : "", + rcu_scheduler_active, debug_locks); + + /* + * If a CPU is in the RCU-free window in idle (ie: in the section + * between rcu_idle_enter() and rcu_idle_exit(), then RCU + * considers that CPU to be in an "extended quiescent state", + * which means that RCU will be completely ignoring that CPU. + * Therefore, rcu_read_lock() and friends have absolutely no + * effect on a CPU running in that state. In other words, even if + * such an RCU-idle CPU has called rcu_read_lock(), RCU might well + * delete data structures out from under it. RCU really has no + * choice here: we need to keep an RCU-free window in idle where + * the CPU may possibly enter into low power mode. This way we can + * notice an extended quiescent state to other CPUs that started a grace + * period. Otherwise we would delay any grace period as long as we run + * in the idle task. + * + * So complain bitterly if someone does call rcu_read_lock(), + * rcu_read_lock_bh() and so on from extended quiescent states. + */ + if (!rcu_is_watching()) + printk("RCU used illegally from extended quiescent state!\n"); + + lockdep_print_held_locks(curr); + printk("\nstack backtrace:\n"); + dump_stack(); +} +EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h new file mode 100644 index 0000000..4f560cf --- /dev/null +++ b/kernel/locking/lockdep_internals.h @@ -0,0 +1,170 @@ +/* + * kernel/lockdep_internals.h + * + * Runtime locking correctness validator + * + * lockdep subsystem internal functions and variables. + */ + +/* + * Lock-class usage-state bits: + */ +enum lock_usage_bit { +#define LOCKDEP_STATE(__STATE) \ + LOCK_USED_IN_##__STATE, \ + LOCK_USED_IN_##__STATE##_READ, \ + LOCK_ENABLED_##__STATE, \ + LOCK_ENABLED_##__STATE##_READ, +#include "lockdep_states.h" +#undef LOCKDEP_STATE + LOCK_USED, + LOCK_USAGE_STATES +}; + +/* + * Usage-state bitmasks: + */ +#define __LOCKF(__STATE) LOCKF_##__STATE = (1 << LOCK_##__STATE), + +enum { +#define LOCKDEP_STATE(__STATE) \ + __LOCKF(USED_IN_##__STATE) \ + __LOCKF(USED_IN_##__STATE##_READ) \ + __LOCKF(ENABLED_##__STATE) \ + __LOCKF(ENABLED_##__STATE##_READ) +#include "lockdep_states.h" +#undef LOCKDEP_STATE + __LOCKF(USED) +}; + +#define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ) +#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ) + +#define LOCKF_ENABLED_IRQ_READ \ + (LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ) +#define LOCKF_USED_IN_IRQ_READ \ + (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) + +/* + * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies + * we track. + * + * We use the per-lock dependency maps in two ways: we grow it by adding + * every to-be-taken lock to all currently held lock's own dependency + * table (if it's not there yet), and we check it for lock order + * conflicts and deadlocks. + */ +#define MAX_LOCKDEP_ENTRIES 16384UL + +#define MAX_LOCKDEP_CHAINS_BITS 15 +#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) + +#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) + +/* + * Stack-trace: tightly packed array of stack backtrace + * addresses. Protected by the hash_lock. + */ +#define MAX_STACK_TRACE_ENTRIES 262144UL + +extern struct list_head all_lock_classes; +extern struct lock_chain lock_chains[]; + +#define LOCK_USAGE_CHARS (1+LOCK_USAGE_STATES/2) + +extern void get_usage_chars(struct lock_class *class, + char usage[LOCK_USAGE_CHARS]); + +extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str); + +struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i); + +extern unsigned long nr_lock_classes; +extern unsigned long nr_list_entries; +extern unsigned long nr_lock_chains; +extern int nr_chain_hlocks; +extern unsigned long nr_stack_trace_entries; + +extern unsigned int nr_hardirq_chains; +extern unsigned int nr_softirq_chains; +extern unsigned int nr_process_chains; +extern unsigned int max_lockdep_depth; +extern unsigned int max_recursion_depth; + +extern unsigned int max_bfs_queue_depth; + +#ifdef CONFIG_PROVE_LOCKING +extern unsigned long lockdep_count_forward_deps(struct lock_class *); +extern unsigned long lockdep_count_backward_deps(struct lock_class *); +#else +static inline unsigned long +lockdep_count_forward_deps(struct lock_class *class) +{ + return 0; +} +static inline unsigned long +lockdep_count_backward_deps(struct lock_class *class) +{ + return 0; +} +#endif + +#ifdef CONFIG_DEBUG_LOCKDEP + +#include +/* + * Various lockdep statistics. + * We want them per cpu as they are often accessed in fast path + * and we want to avoid too much cache bouncing. + */ +struct lockdep_stats { + int chain_lookup_hits; + int chain_lookup_misses; + int hardirqs_on_events; + int hardirqs_off_events; + int redundant_hardirqs_on; + int redundant_hardirqs_off; + int softirqs_on_events; + int softirqs_off_events; + int redundant_softirqs_on; + int redundant_softirqs_off; + int nr_unused_locks; + int nr_cyclic_checks; + int nr_cyclic_check_recursions; + int nr_find_usage_forwards_checks; + int nr_find_usage_forwards_recursions; + int nr_find_usage_backwards_checks; + int nr_find_usage_backwards_recursions; +}; + +DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); + +#define __debug_atomic_inc(ptr) \ + this_cpu_inc(lockdep_stats.ptr); + +#define debug_atomic_inc(ptr) { \ + WARN_ON_ONCE(!irqs_disabled()); \ + __this_cpu_inc(lockdep_stats.ptr); \ +} + +#define debug_atomic_dec(ptr) { \ + WARN_ON_ONCE(!irqs_disabled()); \ + __this_cpu_dec(lockdep_stats.ptr); \ +} + +#define debug_atomic_read(ptr) ({ \ + struct lockdep_stats *__cpu_lockdep_stats; \ + unsigned long long __total = 0; \ + int __cpu; \ + for_each_possible_cpu(__cpu) { \ + __cpu_lockdep_stats = &per_cpu(lockdep_stats, __cpu); \ + __total += __cpu_lockdep_stats->ptr; \ + } \ + __total; \ +}) +#else +# define __debug_atomic_inc(ptr) do { } while (0) +# define debug_atomic_inc(ptr) do { } while (0) +# define debug_atomic_dec(ptr) do { } while (0) +# define debug_atomic_read(ptr) 0 +#endif diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c new file mode 100644 index 0000000..0922065 --- /dev/null +++ b/kernel/locking/lockdep_proc.c @@ -0,0 +1,683 @@ +/* + * kernel/lockdep_proc.c + * + * Runtime locking correctness validator + * + * Started by Ingo Molnar: + * + * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra + * + * Code for /proc/lockdep and /proc/lockdep_stats: + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lockdep_internals.h" + +static void *l_next(struct seq_file *m, void *v, loff_t *pos) +{ + return seq_list_next(v, &all_lock_classes, pos); +} + +static void *l_start(struct seq_file *m, loff_t *pos) +{ + return seq_list_start_head(&all_lock_classes, *pos); +} + +static void l_stop(struct seq_file *m, void *v) +{ +} + +static void print_name(struct seq_file *m, struct lock_class *class) +{ + char str[KSYM_NAME_LEN]; + const char *name = class->name; + + if (!name) { + name = __get_key_name(class->key, str); + seq_printf(m, "%s", name); + } else{ + seq_printf(m, "%s", name); + if (class->name_version > 1) + seq_printf(m, "#%d", class->name_version); + if (class->subclass) + seq_printf(m, "/%d", class->subclass); + } +} + +static int l_show(struct seq_file *m, void *v) +{ + struct lock_class *class = list_entry(v, struct lock_class, lock_entry); + struct lock_list *entry; + char usage[LOCK_USAGE_CHARS]; + + if (v == &all_lock_classes) { + seq_printf(m, "all lock classes:\n"); + return 0; + } + + seq_printf(m, "%p", class->key); +#ifdef CONFIG_DEBUG_LOCKDEP + seq_printf(m, " OPS:%8ld", class->ops); +#endif +#ifdef CONFIG_PROVE_LOCKING + seq_printf(m, " FD:%5ld", lockdep_count_forward_deps(class)); + seq_printf(m, " BD:%5ld", lockdep_count_backward_deps(class)); +#endif + + get_usage_chars(class, usage); + seq_printf(m, " %s", usage); + + seq_printf(m, ": "); + print_name(m, class); + seq_puts(m, "\n"); + + list_for_each_entry(entry, &class->locks_after, entry) { + if (entry->distance == 1) { + seq_printf(m, " -> [%p] ", entry->class->key); + print_name(m, entry->class); + seq_puts(m, "\n"); + } + } + seq_puts(m, "\n"); + + return 0; +} + +static const struct seq_operations lockdep_ops = { + .start = l_start, + .next = l_next, + .stop = l_stop, + .show = l_show, +}; + +static int lockdep_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &lockdep_ops); +} + +static const struct file_operations proc_lockdep_operations = { + .open = lockdep_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +#ifdef CONFIG_PROVE_LOCKING +static void *lc_start(struct seq_file *m, loff_t *pos) +{ + if (*pos == 0) + return SEQ_START_TOKEN; + + if (*pos - 1 < nr_lock_chains) + return lock_chains + (*pos - 1); + + return NULL; +} + +static void *lc_next(struct seq_file *m, void *v, loff_t *pos) +{ + (*pos)++; + return lc_start(m, pos); +} + +static void lc_stop(struct seq_file *m, void *v) +{ +} + +static int lc_show(struct seq_file *m, void *v) +{ + struct lock_chain *chain = v; + struct lock_class *class; + int i; + + if (v == SEQ_START_TOKEN) { + seq_printf(m, "all lock chains:\n"); + return 0; + } + + seq_printf(m, "irq_context: %d\n", chain->irq_context); + + for (i = 0; i < chain->depth; i++) { + class = lock_chain_get_class(chain, i); + if (!class->key) + continue; + + seq_printf(m, "[%p] ", class->key); + print_name(m, class); + seq_puts(m, "\n"); + } + seq_puts(m, "\n"); + + return 0; +} + +static const struct seq_operations lockdep_chains_ops = { + .start = lc_start, + .next = lc_next, + .stop = lc_stop, + .show = lc_show, +}; + +static int lockdep_chains_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &lockdep_chains_ops); +} + +static const struct file_operations proc_lockdep_chains_operations = { + .open = lockdep_chains_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif /* CONFIG_PROVE_LOCKING */ + +static void lockdep_stats_debug_show(struct seq_file *m) +{ +#ifdef CONFIG_DEBUG_LOCKDEP + unsigned long long hi1 = debug_atomic_read(hardirqs_on_events), + hi2 = debug_atomic_read(hardirqs_off_events), + hr1 = debug_atomic_read(redundant_hardirqs_on), + hr2 = debug_atomic_read(redundant_hardirqs_off), + si1 = debug_atomic_read(softirqs_on_events), + si2 = debug_atomic_read(softirqs_off_events), + sr1 = debug_atomic_read(redundant_softirqs_on), + sr2 = debug_atomic_read(redundant_softirqs_off); + + seq_printf(m, " chain lookup misses: %11llu\n", + debug_atomic_read(chain_lookup_misses)); + seq_printf(m, " chain lookup hits: %11llu\n", + debug_atomic_read(chain_lookup_hits)); + seq_printf(m, " cyclic checks: %11llu\n", + debug_atomic_read(nr_cyclic_checks)); + seq_printf(m, " find-mask forwards checks: %11llu\n", + debug_atomic_read(nr_find_usage_forwards_checks)); + seq_printf(m, " find-mask backwards checks: %11llu\n", + debug_atomic_read(nr_find_usage_backwards_checks)); + + seq_printf(m, " hardirq on events: %11llu\n", hi1); + seq_printf(m, " hardirq off events: %11llu\n", hi2); + seq_printf(m, " redundant hardirq ons: %11llu\n", hr1); + seq_printf(m, " redundant hardirq offs: %11llu\n", hr2); + seq_printf(m, " softirq on events: %11llu\n", si1); + seq_printf(m, " softirq off events: %11llu\n", si2); + seq_printf(m, " redundant softirq ons: %11llu\n", sr1); + seq_printf(m, " redundant softirq offs: %11llu\n", sr2); +#endif +} + +static int lockdep_stats_show(struct seq_file *m, void *v) +{ + struct lock_class *class; + unsigned long nr_unused = 0, nr_uncategorized = 0, + nr_irq_safe = 0, nr_irq_unsafe = 0, + nr_softirq_safe = 0, nr_softirq_unsafe = 0, + nr_hardirq_safe = 0, nr_hardirq_unsafe = 0, + nr_irq_read_safe = 0, nr_irq_read_unsafe = 0, + nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0, + nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0, + sum_forward_deps = 0; + + list_for_each_entry(class, &all_lock_classes, lock_entry) { + + if (class->usage_mask == 0) + nr_unused++; + if (class->usage_mask == LOCKF_USED) + nr_uncategorized++; + if (class->usage_mask & LOCKF_USED_IN_IRQ) + nr_irq_safe++; + if (class->usage_mask & LOCKF_ENABLED_IRQ) + nr_irq_unsafe++; + if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ) + nr_softirq_safe++; + if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ) + nr_softirq_unsafe++; + if (class->usage_mask & LOCKF_USED_IN_HARDIRQ) + nr_hardirq_safe++; + if (class->usage_mask & LOCKF_ENABLED_HARDIRQ) + nr_hardirq_unsafe++; + if (class->usage_mask & LOCKF_USED_IN_IRQ_READ) + nr_irq_read_safe++; + if (class->usage_mask & LOCKF_ENABLED_IRQ_READ) + nr_irq_read_unsafe++; + if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) + nr_softirq_read_safe++; + if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ_READ) + nr_softirq_read_unsafe++; + if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) + nr_hardirq_read_safe++; + if (class->usage_mask & LOCKF_ENABLED_HARDIRQ_READ) + nr_hardirq_read_unsafe++; + +#ifdef CONFIG_PROVE_LOCKING + sum_forward_deps += lockdep_count_forward_deps(class); +#endif + } +#ifdef CONFIG_DEBUG_LOCKDEP + DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused); +#endif + seq_printf(m, " lock-classes: %11lu [max: %lu]\n", + nr_lock_classes, MAX_LOCKDEP_KEYS); + seq_printf(m, " direct dependencies: %11lu [max: %lu]\n", + nr_list_entries, MAX_LOCKDEP_ENTRIES); + seq_printf(m, " indirect dependencies: %11lu\n", + sum_forward_deps); + + /* + * Total number of dependencies: + * + * All irq-safe locks may nest inside irq-unsafe locks, + * plus all the other known dependencies: + */ + seq_printf(m, " all direct dependencies: %11lu\n", + nr_irq_unsafe * nr_irq_safe + + nr_hardirq_unsafe * nr_hardirq_safe + + nr_list_entries); + +#ifdef CONFIG_PROVE_LOCKING + seq_printf(m, " dependency chains: %11lu [max: %lu]\n", + nr_lock_chains, MAX_LOCKDEP_CHAINS); + seq_printf(m, " dependency chain hlocks: %11d [max: %lu]\n", + nr_chain_hlocks, MAX_LOCKDEP_CHAIN_HLOCKS); +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS + seq_printf(m, " in-hardirq chains: %11u\n", + nr_hardirq_chains); + seq_printf(m, " in-softirq chains: %11u\n", + nr_softirq_chains); +#endif + seq_printf(m, " in-process chains: %11u\n", + nr_process_chains); + seq_printf(m, " stack-trace entries: %11lu [max: %lu]\n", + nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES); + seq_printf(m, " combined max dependencies: %11u\n", + (nr_hardirq_chains + 1) * + (nr_softirq_chains + 1) * + (nr_process_chains + 1) + ); + seq_printf(m, " hardirq-safe locks: %11lu\n", + nr_hardirq_safe); + seq_printf(m, " hardirq-unsafe locks: %11lu\n", + nr_hardirq_unsafe); + seq_printf(m, " softirq-safe locks: %11lu\n", + nr_softirq_safe); + seq_printf(m, " softirq-unsafe locks: %11lu\n", + nr_softirq_unsafe); + seq_printf(m, " irq-safe locks: %11lu\n", + nr_irq_safe); + seq_printf(m, " irq-unsafe locks: %11lu\n", + nr_irq_unsafe); + + seq_printf(m, " hardirq-read-safe locks: %11lu\n", + nr_hardirq_read_safe); + seq_printf(m, " hardirq-read-unsafe locks: %11lu\n", + nr_hardirq_read_unsafe); + seq_printf(m, " softirq-read-safe locks: %11lu\n", + nr_softirq_read_safe); + seq_printf(m, " softirq-read-unsafe locks: %11lu\n", + nr_softirq_read_unsafe); + seq_printf(m, " irq-read-safe locks: %11lu\n", + nr_irq_read_safe); + seq_printf(m, " irq-read-unsafe locks: %11lu\n", + nr_irq_read_unsafe); + + seq_printf(m, " uncategorized locks: %11lu\n", + nr_uncategorized); + seq_printf(m, " unused locks: %11lu\n", + nr_unused); + seq_printf(m, " max locking depth: %11u\n", + max_lockdep_depth); +#ifdef CONFIG_PROVE_LOCKING + seq_printf(m, " max bfs queue depth: %11u\n", + max_bfs_queue_depth); +#endif + lockdep_stats_debug_show(m); + seq_printf(m, " debug_locks: %11u\n", + debug_locks); + + return 0; +} + +static int lockdep_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, lockdep_stats_show, NULL); +} + +static const struct file_operations proc_lockdep_stats_operations = { + .open = lockdep_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +#ifdef CONFIG_LOCK_STAT + +struct lock_stat_data { + struct lock_class *class; + struct lock_class_stats stats; +}; + +struct lock_stat_seq { + struct lock_stat_data *iter_end; + struct lock_stat_data stats[MAX_LOCKDEP_KEYS]; +}; + +/* + * sort on absolute number of contentions + */ +static int lock_stat_cmp(const void *l, const void *r) +{ + const struct lock_stat_data *dl = l, *dr = r; + unsigned long nl, nr; + + nl = dl->stats.read_waittime.nr + dl->stats.write_waittime.nr; + nr = dr->stats.read_waittime.nr + dr->stats.write_waittime.nr; + + return nr - nl; +} + +static void seq_line(struct seq_file *m, char c, int offset, int length) +{ + int i; + + for (i = 0; i < offset; i++) + seq_puts(m, " "); + for (i = 0; i < length; i++) + seq_printf(m, "%c", c); + seq_puts(m, "\n"); +} + +static void snprint_time(char *buf, size_t bufsiz, s64 nr) +{ + s64 div; + s32 rem; + + nr += 5; /* for display rounding */ + div = div_s64_rem(nr, 1000, &rem); + snprintf(buf, bufsiz, "%lld.%02d", (long long)div, (int)rem/10); +} + +static void seq_time(struct seq_file *m, s64 time) +{ + char num[15]; + + snprint_time(num, sizeof(num), time); + seq_printf(m, " %14s", num); +} + +static void seq_lock_time(struct seq_file *m, struct lock_time *lt) +{ + seq_printf(m, "%14lu", lt->nr); + seq_time(m, lt->min); + seq_time(m, lt->max); + seq_time(m, lt->total); + seq_time(m, lt->nr ? do_div(lt->total, lt->nr) : 0); +} + +static void seq_stats(struct seq_file *m, struct lock_stat_data *data) +{ + char name[39]; + struct lock_class *class; + struct lock_class_stats *stats; + int i, namelen; + + class = data->class; + stats = &data->stats; + + namelen = 38; + if (class->name_version > 1) + namelen -= 2; /* XXX truncates versions > 9 */ + if (class->subclass) + namelen -= 2; + + if (!class->name) { + char str[KSYM_NAME_LEN]; + const char *key_name; + + key_name = __get_key_name(class->key, str); + snprintf(name, namelen, "%s", key_name); + } else { + snprintf(name, namelen, "%s", class->name); + } + namelen = strlen(name); + if (class->name_version > 1) { + snprintf(name+namelen, 3, "#%d", class->name_version); + namelen += 2; + } + if (class->subclass) { + snprintf(name+namelen, 3, "/%d", class->subclass); + namelen += 2; + } + + if (stats->write_holdtime.nr) { + if (stats->read_holdtime.nr) + seq_printf(m, "%38s-W:", name); + else + seq_printf(m, "%40s:", name); + + seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]); + seq_lock_time(m, &stats->write_waittime); + seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]); + seq_lock_time(m, &stats->write_holdtime); + seq_puts(m, "\n"); + } + + if (stats->read_holdtime.nr) { + seq_printf(m, "%38s-R:", name); + seq_printf(m, "%14lu ", stats->bounces[bounce_contended_read]); + seq_lock_time(m, &stats->read_waittime); + seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_read]); + seq_lock_time(m, &stats->read_holdtime); + seq_puts(m, "\n"); + } + + if (stats->read_waittime.nr + stats->write_waittime.nr == 0) + return; + + if (stats->read_holdtime.nr) + namelen += 2; + + for (i = 0; i < LOCKSTAT_POINTS; i++) { + char ip[32]; + + if (class->contention_point[i] == 0) + break; + + if (!i) + seq_line(m, '-', 40-namelen, namelen); + + snprintf(ip, sizeof(ip), "[<%p>]", + (void *)class->contention_point[i]); + seq_printf(m, "%40s %14lu %29s %pS\n", + name, stats->contention_point[i], + ip, (void *)class->contention_point[i]); + } + for (i = 0; i < LOCKSTAT_POINTS; i++) { + char ip[32]; + + if (class->contending_point[i] == 0) + break; + + if (!i) + seq_line(m, '-', 40-namelen, namelen); + + snprintf(ip, sizeof(ip), "[<%p>]", + (void *)class->contending_point[i]); + seq_printf(m, "%40s %14lu %29s %pS\n", + name, stats->contending_point[i], + ip, (void *)class->contending_point[i]); + } + if (i) { + seq_puts(m, "\n"); + seq_line(m, '.', 0, 40 + 1 + 12 * (14 + 1)); + seq_puts(m, "\n"); + } +} + +static void seq_header(struct seq_file *m) +{ + seq_puts(m, "lock_stat version 0.4\n"); + + if (unlikely(!debug_locks)) + seq_printf(m, "*WARNING* lock debugging disabled!! - possibly due to a lockdep warning\n"); + + seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1)); + seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s " + "%14s %14s\n", + "class name", + "con-bounces", + "contentions", + "waittime-min", + "waittime-max", + "waittime-total", + "waittime-avg", + "acq-bounces", + "acquisitions", + "holdtime-min", + "holdtime-max", + "holdtime-total", + "holdtime-avg"); + seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1)); + seq_printf(m, "\n"); +} + +static void *ls_start(struct seq_file *m, loff_t *pos) +{ + struct lock_stat_seq *data = m->private; + struct lock_stat_data *iter; + + if (*pos == 0) + return SEQ_START_TOKEN; + + iter = data->stats + (*pos - 1); + if (iter >= data->iter_end) + iter = NULL; + + return iter; +} + +static void *ls_next(struct seq_file *m, void *v, loff_t *pos) +{ + (*pos)++; + return ls_start(m, pos); +} + +static void ls_stop(struct seq_file *m, void *v) +{ +} + +static int ls_show(struct seq_file *m, void *v) +{ + if (v == SEQ_START_TOKEN) + seq_header(m); + else + seq_stats(m, v); + + return 0; +} + +static const struct seq_operations lockstat_ops = { + .start = ls_start, + .next = ls_next, + .stop = ls_stop, + .show = ls_show, +}; + +static int lock_stat_open(struct inode *inode, struct file *file) +{ + int res; + struct lock_class *class; + struct lock_stat_seq *data = vmalloc(sizeof(struct lock_stat_seq)); + + if (!data) + return -ENOMEM; + + res = seq_open(file, &lockstat_ops); + if (!res) { + struct lock_stat_data *iter = data->stats; + struct seq_file *m = file->private_data; + + list_for_each_entry(class, &all_lock_classes, lock_entry) { + iter->class = class; + iter->stats = lock_stats(class); + iter++; + } + data->iter_end = iter; + + sort(data->stats, data->iter_end - data->stats, + sizeof(struct lock_stat_data), + lock_stat_cmp, NULL); + + m->private = data; + } else + vfree(data); + + return res; +} + +static ssize_t lock_stat_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct lock_class *class; + char c; + + if (count) { + if (get_user(c, buf)) + return -EFAULT; + + if (c != '0') + return count; + + list_for_each_entry(class, &all_lock_classes, lock_entry) + clear_lock_stats(class); + } + return count; +} + +static int lock_stat_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + + vfree(seq->private); + return seq_release(inode, file); +} + +static const struct file_operations proc_lock_stat_operations = { + .open = lock_stat_open, + .write = lock_stat_write, + .read = seq_read, + .llseek = seq_lseek, + .release = lock_stat_release, +}; +#endif /* CONFIG_LOCK_STAT */ + +static int __init lockdep_proc_init(void) +{ + proc_create("lockdep", S_IRUSR, NULL, &proc_lockdep_operations); +#ifdef CONFIG_PROVE_LOCKING + proc_create("lockdep_chains", S_IRUSR, NULL, + &proc_lockdep_chains_operations); +#endif + proc_create("lockdep_stats", S_IRUSR, NULL, + &proc_lockdep_stats_operations); + +#ifdef CONFIG_LOCK_STAT + proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL, + &proc_lock_stat_operations); +#endif + + return 0; +} + +__initcall(lockdep_proc_init); + diff --git a/kernel/locking/lockdep_states.h b/kernel/locking/lockdep_states.h new file mode 100644 index 0000000..995b0cc --- /dev/null +++ b/kernel/locking/lockdep_states.h @@ -0,0 +1,9 @@ +/* + * Lockdep states, + * + * please update XXX_LOCK_USAGE_STATES in include/linux/lockdep.h whenever + * you add one, or come up with a nice dynamic solution. + */ +LOCKDEP_STATE(HARDIRQ) +LOCKDEP_STATE(SOFTIRQ) +LOCKDEP_STATE(RECLAIM_FS) -- cgit v1.1 From 60fc28746a7b61775ae28950ddf7a4ac15955639 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 18:15:36 +0100 Subject: locking: Move the spinlock code to kernel/locking/ Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-b81ol0z3mon45m51o131yc9j@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/Makefile | 3 - kernel/locking/Makefile | 4 + kernel/locking/spinlock.c | 399 ++++++++++++++++++++++++++++++++++++++++ kernel/locking/spinlock_debug.c | 302 ++++++++++++++++++++++++++++++ kernel/spinlock.c | 399 ---------------------------------------- lib/Makefile | 1 - lib/spinlock_debug.c | 302 ------------------------------ 7 files changed, 705 insertions(+), 705 deletions(-) create mode 100644 kernel/locking/spinlock.c create mode 100644 kernel/locking/spinlock_debug.c delete mode 100644 kernel/spinlock.c delete mode 100644 lib/spinlock_debug.c diff --git a/kernel/Makefile b/kernel/Makefile index 4fffd6ee..4bce165 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -43,9 +43,6 @@ obj-$(CONFIG_SMP) += smp.o ifneq ($(CONFIG_SMP),y) obj-y += up.o endif -obj-$(CONFIG_SMP) += spinlock.o -obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o -obj-$(CONFIG_PROVE_LOCKING) += spinlock.o obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o modsign_certificate.o diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index c103599..674d215 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -13,3 +13,7 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o ifeq ($(CONFIG_PROC_FS),y) obj-$(CONFIG_LOCKDEP) += lockdep_proc.o endif +obj-$(CONFIG_SMP) += spinlock.o +obj-$(CONFIG_PROVE_LOCKING) += spinlock.o +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c new file mode 100644 index 0000000..4b082b5 --- /dev/null +++ b/kernel/locking/spinlock.c @@ -0,0 +1,399 @@ +/* + * Copyright (2004) Linus Torvalds + * + * Author: Zwane Mwaikambo + * + * Copyright (2004, 2005) Ingo Molnar + * + * This file contains the spinlock/rwlock implementations for the + * SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them) + * + * Note that some architectures have special knowledge about the + * stack frames of these functions in their profile_pc. If you + * change anything significant here that could change the stack + * frame contact the architecture maintainers. + */ + +#include +#include +#include +#include +#include +#include + +/* + * If lockdep is enabled then we use the non-preemption spin-ops + * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are + * not re-enabled during lock-acquire (which the preempt-spin-ops do): + */ +#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) +/* + * The __lock_function inlines are taken from + * include/linux/spinlock_api_smp.h + */ +#else +#define raw_read_can_lock(l) read_can_lock(l) +#define raw_write_can_lock(l) write_can_lock(l) + +/* + * Some architectures can relax in favour of the CPU owning the lock. + */ +#ifndef arch_read_relax +# define arch_read_relax(l) cpu_relax() +#endif +#ifndef arch_write_relax +# define arch_write_relax(l) cpu_relax() +#endif +#ifndef arch_spin_relax +# define arch_spin_relax(l) cpu_relax() +#endif + +/* + * We build the __lock_function inlines here. They are too large for + * inlining all over the place, but here is only one user per function + * which embedds them into the calling _lock_function below. + * + * This could be a long-held lock. We both prepare to spin for a long + * time (making _this_ CPU preemptable if possible), and we also signal + * towards that other CPU that it should break the lock ASAP. + */ +#define BUILD_LOCK_OPS(op, locktype) \ +void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ +{ \ + for (;;) { \ + preempt_disable(); \ + if (likely(do_raw_##op##_trylock(lock))) \ + break; \ + preempt_enable(); \ + \ + if (!(lock)->break_lock) \ + (lock)->break_lock = 1; \ + while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\ + arch_##op##_relax(&lock->raw_lock); \ + } \ + (lock)->break_lock = 0; \ +} \ + \ +unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ +{ \ + unsigned long flags; \ + \ + for (;;) { \ + preempt_disable(); \ + local_irq_save(flags); \ + if (likely(do_raw_##op##_trylock(lock))) \ + break; \ + local_irq_restore(flags); \ + preempt_enable(); \ + \ + if (!(lock)->break_lock) \ + (lock)->break_lock = 1; \ + while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\ + arch_##op##_relax(&lock->raw_lock); \ + } \ + (lock)->break_lock = 0; \ + return flags; \ +} \ + \ +void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock) \ +{ \ + _raw_##op##_lock_irqsave(lock); \ +} \ + \ +void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ +{ \ + unsigned long flags; \ + \ + /* */ \ + /* Careful: we must exclude softirqs too, hence the */ \ + /* irq-disabling. We use the generic preemption-aware */ \ + /* function: */ \ + /**/ \ + flags = _raw_##op##_lock_irqsave(lock); \ + local_bh_disable(); \ + local_irq_restore(flags); \ +} \ + +/* + * Build preemption-friendly versions of the following + * lock-spinning functions: + * + * __[spin|read|write]_lock() + * __[spin|read|write]_lock_irq() + * __[spin|read|write]_lock_irqsave() + * __[spin|read|write]_lock_bh() + */ +BUILD_LOCK_OPS(spin, raw_spinlock); +BUILD_LOCK_OPS(read, rwlock); +BUILD_LOCK_OPS(write, rwlock); + +#endif + +#ifndef CONFIG_INLINE_SPIN_TRYLOCK +int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock) +{ + return __raw_spin_trylock(lock); +} +EXPORT_SYMBOL(_raw_spin_trylock); +#endif + +#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH +int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock) +{ + return __raw_spin_trylock_bh(lock); +} +EXPORT_SYMBOL(_raw_spin_trylock_bh); +#endif + +#ifndef CONFIG_INLINE_SPIN_LOCK +void __lockfunc _raw_spin_lock(raw_spinlock_t *lock) +{ + __raw_spin_lock(lock); +} +EXPORT_SYMBOL(_raw_spin_lock); +#endif + +#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE +unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock) +{ + return __raw_spin_lock_irqsave(lock); +} +EXPORT_SYMBOL(_raw_spin_lock_irqsave); +#endif + +#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ +void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock) +{ + __raw_spin_lock_irq(lock); +} +EXPORT_SYMBOL(_raw_spin_lock_irq); +#endif + +#ifndef CONFIG_INLINE_SPIN_LOCK_BH +void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock) +{ + __raw_spin_lock_bh(lock); +} +EXPORT_SYMBOL(_raw_spin_lock_bh); +#endif + +#ifdef CONFIG_UNINLINE_SPIN_UNLOCK +void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock) +{ + __raw_spin_unlock(lock); +} +EXPORT_SYMBOL(_raw_spin_unlock); +#endif + +#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE +void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags) +{ + __raw_spin_unlock_irqrestore(lock, flags); +} +EXPORT_SYMBOL(_raw_spin_unlock_irqrestore); +#endif + +#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ +void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock) +{ + __raw_spin_unlock_irq(lock); +} +EXPORT_SYMBOL(_raw_spin_unlock_irq); +#endif + +#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH +void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock) +{ + __raw_spin_unlock_bh(lock); +} +EXPORT_SYMBOL(_raw_spin_unlock_bh); +#endif + +#ifndef CONFIG_INLINE_READ_TRYLOCK +int __lockfunc _raw_read_trylock(rwlock_t *lock) +{ + return __raw_read_trylock(lock); +} +EXPORT_SYMBOL(_raw_read_trylock); +#endif + +#ifndef CONFIG_INLINE_READ_LOCK +void __lockfunc _raw_read_lock(rwlock_t *lock) +{ + __raw_read_lock(lock); +} +EXPORT_SYMBOL(_raw_read_lock); +#endif + +#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE +unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock) +{ + return __raw_read_lock_irqsave(lock); +} +EXPORT_SYMBOL(_raw_read_lock_irqsave); +#endif + +#ifndef CONFIG_INLINE_READ_LOCK_IRQ +void __lockfunc _raw_read_lock_irq(rwlock_t *lock) +{ + __raw_read_lock_irq(lock); +} +EXPORT_SYMBOL(_raw_read_lock_irq); +#endif + +#ifndef CONFIG_INLINE_READ_LOCK_BH +void __lockfunc _raw_read_lock_bh(rwlock_t *lock) +{ + __raw_read_lock_bh(lock); +} +EXPORT_SYMBOL(_raw_read_lock_bh); +#endif + +#ifndef CONFIG_INLINE_READ_UNLOCK +void __lockfunc _raw_read_unlock(rwlock_t *lock) +{ + __raw_read_unlock(lock); +} +EXPORT_SYMBOL(_raw_read_unlock); +#endif + +#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE +void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) +{ + __raw_read_unlock_irqrestore(lock, flags); +} +EXPORT_SYMBOL(_raw_read_unlock_irqrestore); +#endif + +#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ +void __lockfunc _raw_read_unlock_irq(rwlock_t *lock) +{ + __raw_read_unlock_irq(lock); +} +EXPORT_SYMBOL(_raw_read_unlock_irq); +#endif + +#ifndef CONFIG_INLINE_READ_UNLOCK_BH +void __lockfunc _raw_read_unlock_bh(rwlock_t *lock) +{ + __raw_read_unlock_bh(lock); +} +EXPORT_SYMBOL(_raw_read_unlock_bh); +#endif + +#ifndef CONFIG_INLINE_WRITE_TRYLOCK +int __lockfunc _raw_write_trylock(rwlock_t *lock) +{ + return __raw_write_trylock(lock); +} +EXPORT_SYMBOL(_raw_write_trylock); +#endif + +#ifndef CONFIG_INLINE_WRITE_LOCK +void __lockfunc _raw_write_lock(rwlock_t *lock) +{ + __raw_write_lock(lock); +} +EXPORT_SYMBOL(_raw_write_lock); +#endif + +#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE +unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock) +{ + return __raw_write_lock_irqsave(lock); +} +EXPORT_SYMBOL(_raw_write_lock_irqsave); +#endif + +#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ +void __lockfunc _raw_write_lock_irq(rwlock_t *lock) +{ + __raw_write_lock_irq(lock); +} +EXPORT_SYMBOL(_raw_write_lock_irq); +#endif + +#ifndef CONFIG_INLINE_WRITE_LOCK_BH +void __lockfunc _raw_write_lock_bh(rwlock_t *lock) +{ + __raw_write_lock_bh(lock); +} +EXPORT_SYMBOL(_raw_write_lock_bh); +#endif + +#ifndef CONFIG_INLINE_WRITE_UNLOCK +void __lockfunc _raw_write_unlock(rwlock_t *lock) +{ + __raw_write_unlock(lock); +} +EXPORT_SYMBOL(_raw_write_unlock); +#endif + +#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE +void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) +{ + __raw_write_unlock_irqrestore(lock, flags); +} +EXPORT_SYMBOL(_raw_write_unlock_irqrestore); +#endif + +#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ +void __lockfunc _raw_write_unlock_irq(rwlock_t *lock) +{ + __raw_write_unlock_irq(lock); +} +EXPORT_SYMBOL(_raw_write_unlock_irq); +#endif + +#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH +void __lockfunc _raw_write_unlock_bh(rwlock_t *lock) +{ + __raw_write_unlock_bh(lock); +} +EXPORT_SYMBOL(_raw_write_unlock_bh); +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) +{ + preempt_disable(); + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); +} +EXPORT_SYMBOL(_raw_spin_lock_nested); + +unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock, + int subclass) +{ + unsigned long flags; + + local_irq_save(flags); + preempt_disable(); + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + LOCK_CONTENDED_FLAGS(lock, do_raw_spin_trylock, do_raw_spin_lock, + do_raw_spin_lock_flags, &flags); + return flags; +} +EXPORT_SYMBOL(_raw_spin_lock_irqsave_nested); + +void __lockfunc _raw_spin_lock_nest_lock(raw_spinlock_t *lock, + struct lockdep_map *nest_lock) +{ + preempt_disable(); + spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_); + LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); +} +EXPORT_SYMBOL(_raw_spin_lock_nest_lock); + +#endif + +notrace int in_lock_functions(unsigned long addr) +{ + /* Linker adds these: start and end of __lockfunc functions */ + extern char __lock_text_start[], __lock_text_end[]; + + return addr >= (unsigned long)__lock_text_start + && addr < (unsigned long)__lock_text_end; +} +EXPORT_SYMBOL(in_lock_functions); diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c new file mode 100644 index 0000000..0374a59 --- /dev/null +++ b/kernel/locking/spinlock_debug.c @@ -0,0 +1,302 @@ +/* + * Copyright 2005, Red Hat, Inc., Ingo Molnar + * Released under the General Public License (GPL). + * + * This file contains the spinlock/rwlock implementations for + * DEBUG_SPINLOCK. + */ + +#include +#include +#include +#include +#include +#include + +void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); + lockdep_init_map(&lock->dep_map, name, key, 0); +#endif + lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; + lock->magic = SPINLOCK_MAGIC; + lock->owner = SPINLOCK_OWNER_INIT; + lock->owner_cpu = -1; +} + +EXPORT_SYMBOL(__raw_spin_lock_init); + +void __rwlock_init(rwlock_t *lock, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); + lockdep_init_map(&lock->dep_map, name, key, 0); +#endif + lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED; + lock->magic = RWLOCK_MAGIC; + lock->owner = SPINLOCK_OWNER_INIT; + lock->owner_cpu = -1; +} + +EXPORT_SYMBOL(__rwlock_init); + +static void spin_dump(raw_spinlock_t *lock, const char *msg) +{ + struct task_struct *owner = NULL; + + if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT) + owner = lock->owner; + printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", + msg, raw_smp_processor_id(), + current->comm, task_pid_nr(current)); + printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, " + ".owner_cpu: %d\n", + lock, lock->magic, + owner ? owner->comm : "", + owner ? task_pid_nr(owner) : -1, + lock->owner_cpu); + dump_stack(); +} + +static void spin_bug(raw_spinlock_t *lock, const char *msg) +{ + if (!debug_locks_off()) + return; + + spin_dump(lock, msg); +} + +#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg) + +static inline void +debug_spin_lock_before(raw_spinlock_t *lock) +{ + SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); + SPIN_BUG_ON(lock->owner == current, lock, "recursion"); + SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(), + lock, "cpu recursion"); +} + +static inline void debug_spin_lock_after(raw_spinlock_t *lock) +{ + lock->owner_cpu = raw_smp_processor_id(); + lock->owner = current; +} + +static inline void debug_spin_unlock(raw_spinlock_t *lock) +{ + SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); + SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked"); + SPIN_BUG_ON(lock->owner != current, lock, "wrong owner"); + SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), + lock, "wrong CPU"); + lock->owner = SPINLOCK_OWNER_INIT; + lock->owner_cpu = -1; +} + +static void __spin_lock_debug(raw_spinlock_t *lock) +{ + u64 i; + u64 loops = loops_per_jiffy * HZ; + + for (i = 0; i < loops; i++) { + if (arch_spin_trylock(&lock->raw_lock)) + return; + __delay(1); + } + /* lockup suspected: */ + spin_dump(lock, "lockup suspected"); +#ifdef CONFIG_SMP + trigger_all_cpu_backtrace(); +#endif + + /* + * The trylock above was causing a livelock. Give the lower level arch + * specific lock code a chance to acquire the lock. We have already + * printed a warning/backtrace at this point. The non-debug arch + * specific code might actually succeed in acquiring the lock. If it is + * not successful, the end-result is the same - there is no forward + * progress. + */ + arch_spin_lock(&lock->raw_lock); +} + +void do_raw_spin_lock(raw_spinlock_t *lock) +{ + debug_spin_lock_before(lock); + if (unlikely(!arch_spin_trylock(&lock->raw_lock))) + __spin_lock_debug(lock); + debug_spin_lock_after(lock); +} + +int do_raw_spin_trylock(raw_spinlock_t *lock) +{ + int ret = arch_spin_trylock(&lock->raw_lock); + + if (ret) + debug_spin_lock_after(lock); +#ifndef CONFIG_SMP + /* + * Must not happen on UP: + */ + SPIN_BUG_ON(!ret, lock, "trylock failure on UP"); +#endif + return ret; +} + +void do_raw_spin_unlock(raw_spinlock_t *lock) +{ + debug_spin_unlock(lock); + arch_spin_unlock(&lock->raw_lock); +} + +static void rwlock_bug(rwlock_t *lock, const char *msg) +{ + if (!debug_locks_off()) + return; + + printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n", + msg, raw_smp_processor_id(), current->comm, + task_pid_nr(current), lock); + dump_stack(); +} + +#define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg) + +#if 0 /* __write_lock_debug() can lock up - maybe this can too? */ +static void __read_lock_debug(rwlock_t *lock) +{ + u64 i; + u64 loops = loops_per_jiffy * HZ; + int print_once = 1; + + for (;;) { + for (i = 0; i < loops; i++) { + if (arch_read_trylock(&lock->raw_lock)) + return; + __delay(1); + } + /* lockup suspected: */ + if (print_once) { + print_once = 0; + printk(KERN_EMERG "BUG: read-lock lockup on CPU#%d, " + "%s/%d, %p\n", + raw_smp_processor_id(), current->comm, + current->pid, lock); + dump_stack(); + } + } +} +#endif + +void do_raw_read_lock(rwlock_t *lock) +{ + RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); + arch_read_lock(&lock->raw_lock); +} + +int do_raw_read_trylock(rwlock_t *lock) +{ + int ret = arch_read_trylock(&lock->raw_lock); + +#ifndef CONFIG_SMP + /* + * Must not happen on UP: + */ + RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP"); +#endif + return ret; +} + +void do_raw_read_unlock(rwlock_t *lock) +{ + RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); + arch_read_unlock(&lock->raw_lock); +} + +static inline void debug_write_lock_before(rwlock_t *lock) +{ + RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); + RWLOCK_BUG_ON(lock->owner == current, lock, "recursion"); + RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(), + lock, "cpu recursion"); +} + +static inline void debug_write_lock_after(rwlock_t *lock) +{ + lock->owner_cpu = raw_smp_processor_id(); + lock->owner = current; +} + +static inline void debug_write_unlock(rwlock_t *lock) +{ + RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); + RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner"); + RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), + lock, "wrong CPU"); + lock->owner = SPINLOCK_OWNER_INIT; + lock->owner_cpu = -1; +} + +#if 0 /* This can cause lockups */ +static void __write_lock_debug(rwlock_t *lock) +{ + u64 i; + u64 loops = loops_per_jiffy * HZ; + int print_once = 1; + + for (;;) { + for (i = 0; i < loops; i++) { + if (arch_write_trylock(&lock->raw_lock)) + return; + __delay(1); + } + /* lockup suspected: */ + if (print_once) { + print_once = 0; + printk(KERN_EMERG "BUG: write-lock lockup on CPU#%d, " + "%s/%d, %p\n", + raw_smp_processor_id(), current->comm, + current->pid, lock); + dump_stack(); + } + } +} +#endif + +void do_raw_write_lock(rwlock_t *lock) +{ + debug_write_lock_before(lock); + arch_write_lock(&lock->raw_lock); + debug_write_lock_after(lock); +} + +int do_raw_write_trylock(rwlock_t *lock) +{ + int ret = arch_write_trylock(&lock->raw_lock); + + if (ret) + debug_write_lock_after(lock); +#ifndef CONFIG_SMP + /* + * Must not happen on UP: + */ + RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP"); +#endif + return ret; +} + +void do_raw_write_unlock(rwlock_t *lock) +{ + debug_write_unlock(lock); + arch_write_unlock(&lock->raw_lock); +} diff --git a/kernel/spinlock.c b/kernel/spinlock.c deleted file mode 100644 index 4b082b5..0000000 --- a/kernel/spinlock.c +++ /dev/null @@ -1,399 +0,0 @@ -/* - * Copyright (2004) Linus Torvalds - * - * Author: Zwane Mwaikambo - * - * Copyright (2004, 2005) Ingo Molnar - * - * This file contains the spinlock/rwlock implementations for the - * SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them) - * - * Note that some architectures have special knowledge about the - * stack frames of these functions in their profile_pc. If you - * change anything significant here that could change the stack - * frame contact the architecture maintainers. - */ - -#include -#include -#include -#include -#include -#include - -/* - * If lockdep is enabled then we use the non-preemption spin-ops - * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are - * not re-enabled during lock-acquire (which the preempt-spin-ops do): - */ -#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) -/* - * The __lock_function inlines are taken from - * include/linux/spinlock_api_smp.h - */ -#else -#define raw_read_can_lock(l) read_can_lock(l) -#define raw_write_can_lock(l) write_can_lock(l) - -/* - * Some architectures can relax in favour of the CPU owning the lock. - */ -#ifndef arch_read_relax -# define arch_read_relax(l) cpu_relax() -#endif -#ifndef arch_write_relax -# define arch_write_relax(l) cpu_relax() -#endif -#ifndef arch_spin_relax -# define arch_spin_relax(l) cpu_relax() -#endif - -/* - * We build the __lock_function inlines here. They are too large for - * inlining all over the place, but here is only one user per function - * which embedds them into the calling _lock_function below. - * - * This could be a long-held lock. We both prepare to spin for a long - * time (making _this_ CPU preemptable if possible), and we also signal - * towards that other CPU that it should break the lock ASAP. - */ -#define BUILD_LOCK_OPS(op, locktype) \ -void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ -{ \ - for (;;) { \ - preempt_disable(); \ - if (likely(do_raw_##op##_trylock(lock))) \ - break; \ - preempt_enable(); \ - \ - if (!(lock)->break_lock) \ - (lock)->break_lock = 1; \ - while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\ - arch_##op##_relax(&lock->raw_lock); \ - } \ - (lock)->break_lock = 0; \ -} \ - \ -unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ -{ \ - unsigned long flags; \ - \ - for (;;) { \ - preempt_disable(); \ - local_irq_save(flags); \ - if (likely(do_raw_##op##_trylock(lock))) \ - break; \ - local_irq_restore(flags); \ - preempt_enable(); \ - \ - if (!(lock)->break_lock) \ - (lock)->break_lock = 1; \ - while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\ - arch_##op##_relax(&lock->raw_lock); \ - } \ - (lock)->break_lock = 0; \ - return flags; \ -} \ - \ -void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock) \ -{ \ - _raw_##op##_lock_irqsave(lock); \ -} \ - \ -void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ -{ \ - unsigned long flags; \ - \ - /* */ \ - /* Careful: we must exclude softirqs too, hence the */ \ - /* irq-disabling. We use the generic preemption-aware */ \ - /* function: */ \ - /**/ \ - flags = _raw_##op##_lock_irqsave(lock); \ - local_bh_disable(); \ - local_irq_restore(flags); \ -} \ - -/* - * Build preemption-friendly versions of the following - * lock-spinning functions: - * - * __[spin|read|write]_lock() - * __[spin|read|write]_lock_irq() - * __[spin|read|write]_lock_irqsave() - * __[spin|read|write]_lock_bh() - */ -BUILD_LOCK_OPS(spin, raw_spinlock); -BUILD_LOCK_OPS(read, rwlock); -BUILD_LOCK_OPS(write, rwlock); - -#endif - -#ifndef CONFIG_INLINE_SPIN_TRYLOCK -int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock) -{ - return __raw_spin_trylock(lock); -} -EXPORT_SYMBOL(_raw_spin_trylock); -#endif - -#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH -int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock) -{ - return __raw_spin_trylock_bh(lock); -} -EXPORT_SYMBOL(_raw_spin_trylock_bh); -#endif - -#ifndef CONFIG_INLINE_SPIN_LOCK -void __lockfunc _raw_spin_lock(raw_spinlock_t *lock) -{ - __raw_spin_lock(lock); -} -EXPORT_SYMBOL(_raw_spin_lock); -#endif - -#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE -unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock) -{ - return __raw_spin_lock_irqsave(lock); -} -EXPORT_SYMBOL(_raw_spin_lock_irqsave); -#endif - -#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ -void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock) -{ - __raw_spin_lock_irq(lock); -} -EXPORT_SYMBOL(_raw_spin_lock_irq); -#endif - -#ifndef CONFIG_INLINE_SPIN_LOCK_BH -void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock) -{ - __raw_spin_lock_bh(lock); -} -EXPORT_SYMBOL(_raw_spin_lock_bh); -#endif - -#ifdef CONFIG_UNINLINE_SPIN_UNLOCK -void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock) -{ - __raw_spin_unlock(lock); -} -EXPORT_SYMBOL(_raw_spin_unlock); -#endif - -#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE -void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags) -{ - __raw_spin_unlock_irqrestore(lock, flags); -} -EXPORT_SYMBOL(_raw_spin_unlock_irqrestore); -#endif - -#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ -void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock) -{ - __raw_spin_unlock_irq(lock); -} -EXPORT_SYMBOL(_raw_spin_unlock_irq); -#endif - -#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH -void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock) -{ - __raw_spin_unlock_bh(lock); -} -EXPORT_SYMBOL(_raw_spin_unlock_bh); -#endif - -#ifndef CONFIG_INLINE_READ_TRYLOCK -int __lockfunc _raw_read_trylock(rwlock_t *lock) -{ - return __raw_read_trylock(lock); -} -EXPORT_SYMBOL(_raw_read_trylock); -#endif - -#ifndef CONFIG_INLINE_READ_LOCK -void __lockfunc _raw_read_lock(rwlock_t *lock) -{ - __raw_read_lock(lock); -} -EXPORT_SYMBOL(_raw_read_lock); -#endif - -#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE -unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock) -{ - return __raw_read_lock_irqsave(lock); -} -EXPORT_SYMBOL(_raw_read_lock_irqsave); -#endif - -#ifndef CONFIG_INLINE_READ_LOCK_IRQ -void __lockfunc _raw_read_lock_irq(rwlock_t *lock) -{ - __raw_read_lock_irq(lock); -} -EXPORT_SYMBOL(_raw_read_lock_irq); -#endif - -#ifndef CONFIG_INLINE_READ_LOCK_BH -void __lockfunc _raw_read_lock_bh(rwlock_t *lock) -{ - __raw_read_lock_bh(lock); -} -EXPORT_SYMBOL(_raw_read_lock_bh); -#endif - -#ifndef CONFIG_INLINE_READ_UNLOCK -void __lockfunc _raw_read_unlock(rwlock_t *lock) -{ - __raw_read_unlock(lock); -} -EXPORT_SYMBOL(_raw_read_unlock); -#endif - -#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE -void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) -{ - __raw_read_unlock_irqrestore(lock, flags); -} -EXPORT_SYMBOL(_raw_read_unlock_irqrestore); -#endif - -#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ -void __lockfunc _raw_read_unlock_irq(rwlock_t *lock) -{ - __raw_read_unlock_irq(lock); -} -EXPORT_SYMBOL(_raw_read_unlock_irq); -#endif - -#ifndef CONFIG_INLINE_READ_UNLOCK_BH -void __lockfunc _raw_read_unlock_bh(rwlock_t *lock) -{ - __raw_read_unlock_bh(lock); -} -EXPORT_SYMBOL(_raw_read_unlock_bh); -#endif - -#ifndef CONFIG_INLINE_WRITE_TRYLOCK -int __lockfunc _raw_write_trylock(rwlock_t *lock) -{ - return __raw_write_trylock(lock); -} -EXPORT_SYMBOL(_raw_write_trylock); -#endif - -#ifndef CONFIG_INLINE_WRITE_LOCK -void __lockfunc _raw_write_lock(rwlock_t *lock) -{ - __raw_write_lock(lock); -} -EXPORT_SYMBOL(_raw_write_lock); -#endif - -#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE -unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock) -{ - return __raw_write_lock_irqsave(lock); -} -EXPORT_SYMBOL(_raw_write_lock_irqsave); -#endif - -#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ -void __lockfunc _raw_write_lock_irq(rwlock_t *lock) -{ - __raw_write_lock_irq(lock); -} -EXPORT_SYMBOL(_raw_write_lock_irq); -#endif - -#ifndef CONFIG_INLINE_WRITE_LOCK_BH -void __lockfunc _raw_write_lock_bh(rwlock_t *lock) -{ - __raw_write_lock_bh(lock); -} -EXPORT_SYMBOL(_raw_write_lock_bh); -#endif - -#ifndef CONFIG_INLINE_WRITE_UNLOCK -void __lockfunc _raw_write_unlock(rwlock_t *lock) -{ - __raw_write_unlock(lock); -} -EXPORT_SYMBOL(_raw_write_unlock); -#endif - -#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE -void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) -{ - __raw_write_unlock_irqrestore(lock, flags); -} -EXPORT_SYMBOL(_raw_write_unlock_irqrestore); -#endif - -#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ -void __lockfunc _raw_write_unlock_irq(rwlock_t *lock) -{ - __raw_write_unlock_irq(lock); -} -EXPORT_SYMBOL(_raw_write_unlock_irq); -#endif - -#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH -void __lockfunc _raw_write_unlock_bh(rwlock_t *lock) -{ - __raw_write_unlock_bh(lock); -} -EXPORT_SYMBOL(_raw_write_unlock_bh); -#endif - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - -void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) -{ - preempt_disable(); - spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); - LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); -} -EXPORT_SYMBOL(_raw_spin_lock_nested); - -unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock, - int subclass) -{ - unsigned long flags; - - local_irq_save(flags); - preempt_disable(); - spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); - LOCK_CONTENDED_FLAGS(lock, do_raw_spin_trylock, do_raw_spin_lock, - do_raw_spin_lock_flags, &flags); - return flags; -} -EXPORT_SYMBOL(_raw_spin_lock_irqsave_nested); - -void __lockfunc _raw_spin_lock_nest_lock(raw_spinlock_t *lock, - struct lockdep_map *nest_lock) -{ - preempt_disable(); - spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_); - LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); -} -EXPORT_SYMBOL(_raw_spin_lock_nest_lock); - -#endif - -notrace int in_lock_functions(unsigned long addr) -{ - /* Linker adds these: start and end of __lockfunc functions */ - extern char __lock_text_start[], __lock_text_end[]; - - return addr >= (unsigned long)__lock_text_start - && addr < (unsigned long)__lock_text_end; -} -EXPORT_SYMBOL(in_lock_functions); diff --git a/lib/Makefile b/lib/Makefile index f3bb2cb..bee27e1 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -42,7 +42,6 @@ obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o -obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c deleted file mode 100644 index 0374a59..0000000 --- a/lib/spinlock_debug.c +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Copyright 2005, Red Hat, Inc., Ingo Molnar - * Released under the General Public License (GPL). - * - * This file contains the spinlock/rwlock implementations for - * DEBUG_SPINLOCK. - */ - -#include -#include -#include -#include -#include -#include - -void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, - struct lock_class_key *key) -{ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* - * Make sure we are not reinitializing a held lock: - */ - debug_check_no_locks_freed((void *)lock, sizeof(*lock)); - lockdep_init_map(&lock->dep_map, name, key, 0); -#endif - lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; - lock->magic = SPINLOCK_MAGIC; - lock->owner = SPINLOCK_OWNER_INIT; - lock->owner_cpu = -1; -} - -EXPORT_SYMBOL(__raw_spin_lock_init); - -void __rwlock_init(rwlock_t *lock, const char *name, - struct lock_class_key *key) -{ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* - * Make sure we are not reinitializing a held lock: - */ - debug_check_no_locks_freed((void *)lock, sizeof(*lock)); - lockdep_init_map(&lock->dep_map, name, key, 0); -#endif - lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED; - lock->magic = RWLOCK_MAGIC; - lock->owner = SPINLOCK_OWNER_INIT; - lock->owner_cpu = -1; -} - -EXPORT_SYMBOL(__rwlock_init); - -static void spin_dump(raw_spinlock_t *lock, const char *msg) -{ - struct task_struct *owner = NULL; - - if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT) - owner = lock->owner; - printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", - msg, raw_smp_processor_id(), - current->comm, task_pid_nr(current)); - printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, " - ".owner_cpu: %d\n", - lock, lock->magic, - owner ? owner->comm : "", - owner ? task_pid_nr(owner) : -1, - lock->owner_cpu); - dump_stack(); -} - -static void spin_bug(raw_spinlock_t *lock, const char *msg) -{ - if (!debug_locks_off()) - return; - - spin_dump(lock, msg); -} - -#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg) - -static inline void -debug_spin_lock_before(raw_spinlock_t *lock) -{ - SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); - SPIN_BUG_ON(lock->owner == current, lock, "recursion"); - SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(), - lock, "cpu recursion"); -} - -static inline void debug_spin_lock_after(raw_spinlock_t *lock) -{ - lock->owner_cpu = raw_smp_processor_id(); - lock->owner = current; -} - -static inline void debug_spin_unlock(raw_spinlock_t *lock) -{ - SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); - SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked"); - SPIN_BUG_ON(lock->owner != current, lock, "wrong owner"); - SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), - lock, "wrong CPU"); - lock->owner = SPINLOCK_OWNER_INIT; - lock->owner_cpu = -1; -} - -static void __spin_lock_debug(raw_spinlock_t *lock) -{ - u64 i; - u64 loops = loops_per_jiffy * HZ; - - for (i = 0; i < loops; i++) { - if (arch_spin_trylock(&lock->raw_lock)) - return; - __delay(1); - } - /* lockup suspected: */ - spin_dump(lock, "lockup suspected"); -#ifdef CONFIG_SMP - trigger_all_cpu_backtrace(); -#endif - - /* - * The trylock above was causing a livelock. Give the lower level arch - * specific lock code a chance to acquire the lock. We have already - * printed a warning/backtrace at this point. The non-debug arch - * specific code might actually succeed in acquiring the lock. If it is - * not successful, the end-result is the same - there is no forward - * progress. - */ - arch_spin_lock(&lock->raw_lock); -} - -void do_raw_spin_lock(raw_spinlock_t *lock) -{ - debug_spin_lock_before(lock); - if (unlikely(!arch_spin_trylock(&lock->raw_lock))) - __spin_lock_debug(lock); - debug_spin_lock_after(lock); -} - -int do_raw_spin_trylock(raw_spinlock_t *lock) -{ - int ret = arch_spin_trylock(&lock->raw_lock); - - if (ret) - debug_spin_lock_after(lock); -#ifndef CONFIG_SMP - /* - * Must not happen on UP: - */ - SPIN_BUG_ON(!ret, lock, "trylock failure on UP"); -#endif - return ret; -} - -void do_raw_spin_unlock(raw_spinlock_t *lock) -{ - debug_spin_unlock(lock); - arch_spin_unlock(&lock->raw_lock); -} - -static void rwlock_bug(rwlock_t *lock, const char *msg) -{ - if (!debug_locks_off()) - return; - - printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n", - msg, raw_smp_processor_id(), current->comm, - task_pid_nr(current), lock); - dump_stack(); -} - -#define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg) - -#if 0 /* __write_lock_debug() can lock up - maybe this can too? */ -static void __read_lock_debug(rwlock_t *lock) -{ - u64 i; - u64 loops = loops_per_jiffy * HZ; - int print_once = 1; - - for (;;) { - for (i = 0; i < loops; i++) { - if (arch_read_trylock(&lock->raw_lock)) - return; - __delay(1); - } - /* lockup suspected: */ - if (print_once) { - print_once = 0; - printk(KERN_EMERG "BUG: read-lock lockup on CPU#%d, " - "%s/%d, %p\n", - raw_smp_processor_id(), current->comm, - current->pid, lock); - dump_stack(); - } - } -} -#endif - -void do_raw_read_lock(rwlock_t *lock) -{ - RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); - arch_read_lock(&lock->raw_lock); -} - -int do_raw_read_trylock(rwlock_t *lock) -{ - int ret = arch_read_trylock(&lock->raw_lock); - -#ifndef CONFIG_SMP - /* - * Must not happen on UP: - */ - RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP"); -#endif - return ret; -} - -void do_raw_read_unlock(rwlock_t *lock) -{ - RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); - arch_read_unlock(&lock->raw_lock); -} - -static inline void debug_write_lock_before(rwlock_t *lock) -{ - RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); - RWLOCK_BUG_ON(lock->owner == current, lock, "recursion"); - RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(), - lock, "cpu recursion"); -} - -static inline void debug_write_lock_after(rwlock_t *lock) -{ - lock->owner_cpu = raw_smp_processor_id(); - lock->owner = current; -} - -static inline void debug_write_unlock(rwlock_t *lock) -{ - RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); - RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner"); - RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), - lock, "wrong CPU"); - lock->owner = SPINLOCK_OWNER_INIT; - lock->owner_cpu = -1; -} - -#if 0 /* This can cause lockups */ -static void __write_lock_debug(rwlock_t *lock) -{ - u64 i; - u64 loops = loops_per_jiffy * HZ; - int print_once = 1; - - for (;;) { - for (i = 0; i < loops; i++) { - if (arch_write_trylock(&lock->raw_lock)) - return; - __delay(1); - } - /* lockup suspected: */ - if (print_once) { - print_once = 0; - printk(KERN_EMERG "BUG: write-lock lockup on CPU#%d, " - "%s/%d, %p\n", - raw_smp_processor_id(), current->comm, - current->pid, lock); - dump_stack(); - } - } -} -#endif - -void do_raw_write_lock(rwlock_t *lock) -{ - debug_write_lock_before(lock); - arch_write_lock(&lock->raw_lock); - debug_write_lock_after(lock); -} - -int do_raw_write_trylock(rwlock_t *lock) -{ - int ret = arch_write_trylock(&lock->raw_lock); - - if (ret) - debug_write_lock_after(lock); -#ifndef CONFIG_SMP - /* - * Must not happen on UP: - */ - RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP"); -#endif - return ret; -} - -void do_raw_write_unlock(rwlock_t *lock) -{ - debug_write_unlock(lock); - arch_write_unlock(&lock->raw_lock); -} -- cgit v1.1 From e25a64c4017e3a3cda17454b040737e410a12991 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 18:16:43 +0100 Subject: locking: Move the semaphore core to kernel/locking/ Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-vmw5sf6vzmua1z6nx1cg69h2@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/Makefile | 2 +- kernel/locking/Makefile | 2 +- kernel/locking/semaphore.c | 263 +++++++++++++++++++++++++++++++++++++++++++++ kernel/semaphore.c | 263 --------------------------------------------- 4 files changed, 265 insertions(+), 265 deletions(-) create mode 100644 kernel/locking/semaphore.c delete mode 100644 kernel/semaphore.c diff --git a/kernel/Makefile b/kernel/Makefile index 4bce165..45e5ae2 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -8,7 +8,7 @@ obj-y = fork.o exec_domain.o panic.o \ signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ extable.o params.o posix-timers.o \ kthread.o sys_ni.o posix-cpu-timers.o \ - hrtimer.o rwsem.o nsproxy.o semaphore.o \ + hrtimer.o rwsem.o nsproxy.o \ notifier.o ksysfs.o cred.o reboot.o \ async.o range.o groups.o lglock.o smpboot.o diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 674d215..5978fdd 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -1,5 +1,5 @@ -obj-y += mutex.o +obj-y += mutex.o semaphore.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_lockdep.o = -pg diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c new file mode 100644 index 0000000..6815171 --- /dev/null +++ b/kernel/locking/semaphore.c @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2008 Intel Corporation + * Author: Matthew Wilcox + * + * Distributed under the terms of the GNU GPL, version 2 + * + * This file implements counting semaphores. + * A counting semaphore may be acquired 'n' times before sleeping. + * See mutex.c for single-acquisition sleeping locks which enforce + * rules which allow code to be debugged more easily. + */ + +/* + * Some notes on the implementation: + * + * The spinlock controls access to the other members of the semaphore. + * down_trylock() and up() can be called from interrupt context, so we + * have to disable interrupts when taking the lock. It turns out various + * parts of the kernel expect to be able to use down() on a semaphore in + * interrupt context when they know it will succeed, so we have to use + * irqsave variants for down(), down_interruptible() and down_killable() + * too. + * + * The ->count variable represents how many more tasks can acquire this + * semaphore. If it's zero, there may be tasks waiting on the wait_list. + */ + +#include +#include +#include +#include +#include +#include +#include + +static noinline void __down(struct semaphore *sem); +static noinline int __down_interruptible(struct semaphore *sem); +static noinline int __down_killable(struct semaphore *sem); +static noinline int __down_timeout(struct semaphore *sem, long jiffies); +static noinline void __up(struct semaphore *sem); + +/** + * down - acquire the semaphore + * @sem: the semaphore to be acquired + * + * Acquires the semaphore. If no more tasks are allowed to acquire the + * semaphore, calling this function will put the task to sleep until the + * semaphore is released. + * + * Use of this function is deprecated, please use down_interruptible() or + * down_killable() instead. + */ +void down(struct semaphore *sem) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&sem->lock, flags); + if (likely(sem->count > 0)) + sem->count--; + else + __down(sem); + raw_spin_unlock_irqrestore(&sem->lock, flags); +} +EXPORT_SYMBOL(down); + +/** + * down_interruptible - acquire the semaphore unless interrupted + * @sem: the semaphore to be acquired + * + * Attempts to acquire the semaphore. If no more tasks are allowed to + * acquire the semaphore, calling this function will put the task to sleep. + * If the sleep is interrupted by a signal, this function will return -EINTR. + * If the semaphore is successfully acquired, this function returns 0. + */ +int down_interruptible(struct semaphore *sem) +{ + unsigned long flags; + int result = 0; + + raw_spin_lock_irqsave(&sem->lock, flags); + if (likely(sem->count > 0)) + sem->count--; + else + result = __down_interruptible(sem); + raw_spin_unlock_irqrestore(&sem->lock, flags); + + return result; +} +EXPORT_SYMBOL(down_interruptible); + +/** + * down_killable - acquire the semaphore unless killed + * @sem: the semaphore to be acquired + * + * Attempts to acquire the semaphore. If no more tasks are allowed to + * acquire the semaphore, calling this function will put the task to sleep. + * If the sleep is interrupted by a fatal signal, this function will return + * -EINTR. If the semaphore is successfully acquired, this function returns + * 0. + */ +int down_killable(struct semaphore *sem) +{ + unsigned long flags; + int result = 0; + + raw_spin_lock_irqsave(&sem->lock, flags); + if (likely(sem->count > 0)) + sem->count--; + else + result = __down_killable(sem); + raw_spin_unlock_irqrestore(&sem->lock, flags); + + return result; +} +EXPORT_SYMBOL(down_killable); + +/** + * down_trylock - try to acquire the semaphore, without waiting + * @sem: the semaphore to be acquired + * + * Try to acquire the semaphore atomically. Returns 0 if the semaphore has + * been acquired successfully or 1 if it it cannot be acquired. + * + * NOTE: This return value is inverted from both spin_trylock and + * mutex_trylock! Be careful about this when converting code. + * + * Unlike mutex_trylock, this function can be used from interrupt context, + * and the semaphore can be released by any task or interrupt. + */ +int down_trylock(struct semaphore *sem) +{ + unsigned long flags; + int count; + + raw_spin_lock_irqsave(&sem->lock, flags); + count = sem->count - 1; + if (likely(count >= 0)) + sem->count = count; + raw_spin_unlock_irqrestore(&sem->lock, flags); + + return (count < 0); +} +EXPORT_SYMBOL(down_trylock); + +/** + * down_timeout - acquire the semaphore within a specified time + * @sem: the semaphore to be acquired + * @jiffies: how long to wait before failing + * + * Attempts to acquire the semaphore. If no more tasks are allowed to + * acquire the semaphore, calling this function will put the task to sleep. + * If the semaphore is not released within the specified number of jiffies, + * this function returns -ETIME. It returns 0 if the semaphore was acquired. + */ +int down_timeout(struct semaphore *sem, long jiffies) +{ + unsigned long flags; + int result = 0; + + raw_spin_lock_irqsave(&sem->lock, flags); + if (likely(sem->count > 0)) + sem->count--; + else + result = __down_timeout(sem, jiffies); + raw_spin_unlock_irqrestore(&sem->lock, flags); + + return result; +} +EXPORT_SYMBOL(down_timeout); + +/** + * up - release the semaphore + * @sem: the semaphore to release + * + * Release the semaphore. Unlike mutexes, up() may be called from any + * context and even by tasks which have never called down(). + */ +void up(struct semaphore *sem) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&sem->lock, flags); + if (likely(list_empty(&sem->wait_list))) + sem->count++; + else + __up(sem); + raw_spin_unlock_irqrestore(&sem->lock, flags); +} +EXPORT_SYMBOL(up); + +/* Functions for the contended case */ + +struct semaphore_waiter { + struct list_head list; + struct task_struct *task; + bool up; +}; + +/* + * Because this function is inlined, the 'state' parameter will be + * constant, and thus optimised away by the compiler. Likewise the + * 'timeout' parameter for the cases without timeouts. + */ +static inline int __sched __down_common(struct semaphore *sem, long state, + long timeout) +{ + struct task_struct *task = current; + struct semaphore_waiter waiter; + + list_add_tail(&waiter.list, &sem->wait_list); + waiter.task = task; + waiter.up = false; + + for (;;) { + if (signal_pending_state(state, task)) + goto interrupted; + if (unlikely(timeout <= 0)) + goto timed_out; + __set_task_state(task, state); + raw_spin_unlock_irq(&sem->lock); + timeout = schedule_timeout(timeout); + raw_spin_lock_irq(&sem->lock); + if (waiter.up) + return 0; + } + + timed_out: + list_del(&waiter.list); + return -ETIME; + + interrupted: + list_del(&waiter.list); + return -EINTR; +} + +static noinline void __sched __down(struct semaphore *sem) +{ + __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); +} + +static noinline int __sched __down_interruptible(struct semaphore *sem) +{ + return __down_common(sem, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); +} + +static noinline int __sched __down_killable(struct semaphore *sem) +{ + return __down_common(sem, TASK_KILLABLE, MAX_SCHEDULE_TIMEOUT); +} + +static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies) +{ + return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies); +} + +static noinline void __sched __up(struct semaphore *sem) +{ + struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, + struct semaphore_waiter, list); + list_del(&waiter->list); + waiter->up = true; + wake_up_process(waiter->task); +} diff --git a/kernel/semaphore.c b/kernel/semaphore.c deleted file mode 100644 index 6815171..0000000 --- a/kernel/semaphore.c +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Copyright (c) 2008 Intel Corporation - * Author: Matthew Wilcox - * - * Distributed under the terms of the GNU GPL, version 2 - * - * This file implements counting semaphores. - * A counting semaphore may be acquired 'n' times before sleeping. - * See mutex.c for single-acquisition sleeping locks which enforce - * rules which allow code to be debugged more easily. - */ - -/* - * Some notes on the implementation: - * - * The spinlock controls access to the other members of the semaphore. - * down_trylock() and up() can be called from interrupt context, so we - * have to disable interrupts when taking the lock. It turns out various - * parts of the kernel expect to be able to use down() on a semaphore in - * interrupt context when they know it will succeed, so we have to use - * irqsave variants for down(), down_interruptible() and down_killable() - * too. - * - * The ->count variable represents how many more tasks can acquire this - * semaphore. If it's zero, there may be tasks waiting on the wait_list. - */ - -#include -#include -#include -#include -#include -#include -#include - -static noinline void __down(struct semaphore *sem); -static noinline int __down_interruptible(struct semaphore *sem); -static noinline int __down_killable(struct semaphore *sem); -static noinline int __down_timeout(struct semaphore *sem, long jiffies); -static noinline void __up(struct semaphore *sem); - -/** - * down - acquire the semaphore - * @sem: the semaphore to be acquired - * - * Acquires the semaphore. If no more tasks are allowed to acquire the - * semaphore, calling this function will put the task to sleep until the - * semaphore is released. - * - * Use of this function is deprecated, please use down_interruptible() or - * down_killable() instead. - */ -void down(struct semaphore *sem) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&sem->lock, flags); - if (likely(sem->count > 0)) - sem->count--; - else - __down(sem); - raw_spin_unlock_irqrestore(&sem->lock, flags); -} -EXPORT_SYMBOL(down); - -/** - * down_interruptible - acquire the semaphore unless interrupted - * @sem: the semaphore to be acquired - * - * Attempts to acquire the semaphore. If no more tasks are allowed to - * acquire the semaphore, calling this function will put the task to sleep. - * If the sleep is interrupted by a signal, this function will return -EINTR. - * If the semaphore is successfully acquired, this function returns 0. - */ -int down_interruptible(struct semaphore *sem) -{ - unsigned long flags; - int result = 0; - - raw_spin_lock_irqsave(&sem->lock, flags); - if (likely(sem->count > 0)) - sem->count--; - else - result = __down_interruptible(sem); - raw_spin_unlock_irqrestore(&sem->lock, flags); - - return result; -} -EXPORT_SYMBOL(down_interruptible); - -/** - * down_killable - acquire the semaphore unless killed - * @sem: the semaphore to be acquired - * - * Attempts to acquire the semaphore. If no more tasks are allowed to - * acquire the semaphore, calling this function will put the task to sleep. - * If the sleep is interrupted by a fatal signal, this function will return - * -EINTR. If the semaphore is successfully acquired, this function returns - * 0. - */ -int down_killable(struct semaphore *sem) -{ - unsigned long flags; - int result = 0; - - raw_spin_lock_irqsave(&sem->lock, flags); - if (likely(sem->count > 0)) - sem->count--; - else - result = __down_killable(sem); - raw_spin_unlock_irqrestore(&sem->lock, flags); - - return result; -} -EXPORT_SYMBOL(down_killable); - -/** - * down_trylock - try to acquire the semaphore, without waiting - * @sem: the semaphore to be acquired - * - * Try to acquire the semaphore atomically. Returns 0 if the semaphore has - * been acquired successfully or 1 if it it cannot be acquired. - * - * NOTE: This return value is inverted from both spin_trylock and - * mutex_trylock! Be careful about this when converting code. - * - * Unlike mutex_trylock, this function can be used from interrupt context, - * and the semaphore can be released by any task or interrupt. - */ -int down_trylock(struct semaphore *sem) -{ - unsigned long flags; - int count; - - raw_spin_lock_irqsave(&sem->lock, flags); - count = sem->count - 1; - if (likely(count >= 0)) - sem->count = count; - raw_spin_unlock_irqrestore(&sem->lock, flags); - - return (count < 0); -} -EXPORT_SYMBOL(down_trylock); - -/** - * down_timeout - acquire the semaphore within a specified time - * @sem: the semaphore to be acquired - * @jiffies: how long to wait before failing - * - * Attempts to acquire the semaphore. If no more tasks are allowed to - * acquire the semaphore, calling this function will put the task to sleep. - * If the semaphore is not released within the specified number of jiffies, - * this function returns -ETIME. It returns 0 if the semaphore was acquired. - */ -int down_timeout(struct semaphore *sem, long jiffies) -{ - unsigned long flags; - int result = 0; - - raw_spin_lock_irqsave(&sem->lock, flags); - if (likely(sem->count > 0)) - sem->count--; - else - result = __down_timeout(sem, jiffies); - raw_spin_unlock_irqrestore(&sem->lock, flags); - - return result; -} -EXPORT_SYMBOL(down_timeout); - -/** - * up - release the semaphore - * @sem: the semaphore to release - * - * Release the semaphore. Unlike mutexes, up() may be called from any - * context and even by tasks which have never called down(). - */ -void up(struct semaphore *sem) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&sem->lock, flags); - if (likely(list_empty(&sem->wait_list))) - sem->count++; - else - __up(sem); - raw_spin_unlock_irqrestore(&sem->lock, flags); -} -EXPORT_SYMBOL(up); - -/* Functions for the contended case */ - -struct semaphore_waiter { - struct list_head list; - struct task_struct *task; - bool up; -}; - -/* - * Because this function is inlined, the 'state' parameter will be - * constant, and thus optimised away by the compiler. Likewise the - * 'timeout' parameter for the cases without timeouts. - */ -static inline int __sched __down_common(struct semaphore *sem, long state, - long timeout) -{ - struct task_struct *task = current; - struct semaphore_waiter waiter; - - list_add_tail(&waiter.list, &sem->wait_list); - waiter.task = task; - waiter.up = false; - - for (;;) { - if (signal_pending_state(state, task)) - goto interrupted; - if (unlikely(timeout <= 0)) - goto timed_out; - __set_task_state(task, state); - raw_spin_unlock_irq(&sem->lock); - timeout = schedule_timeout(timeout); - raw_spin_lock_irq(&sem->lock); - if (waiter.up) - return 0; - } - - timed_out: - list_del(&waiter.list); - return -ETIME; - - interrupted: - list_del(&waiter.list); - return -EINTR; -} - -static noinline void __sched __down(struct semaphore *sem) -{ - __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); -} - -static noinline int __sched __down_interruptible(struct semaphore *sem) -{ - return __down_common(sem, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); -} - -static noinline int __sched __down_killable(struct semaphore *sem) -{ - return __down_common(sem, TASK_KILLABLE, MAX_SCHEDULE_TIMEOUT); -} - -static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies) -{ - return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies); -} - -static noinline void __sched __up(struct semaphore *sem) -{ - struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, - struct semaphore_waiter, list); - list_del(&waiter->list); - waiter->up = true; - wake_up_process(waiter->task); -} -- cgit v1.1 From 1696a8bee390929fed05c6297164816ae2ced280 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 18:18:19 +0100 Subject: locking: Move the rtmutex code to kernel/locking/ Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-p9ijt8div0hwldexwfm4nlhj@git.kernel.org [ Fixed build failure in kernel/rcu/tree_plugin.h. ] Signed-off-by: Ingo Molnar --- kernel/Makefile | 3 - kernel/futex.c | 2 +- kernel/locking/Makefile | 3 + kernel/locking/rtmutex-debug.c | 187 +++++++ kernel/locking/rtmutex-debug.h | 33 ++ kernel/locking/rtmutex-tester.c | 420 ++++++++++++++++ kernel/locking/rtmutex.c | 1060 +++++++++++++++++++++++++++++++++++++++ kernel/locking/rtmutex.h | 26 + kernel/locking/rtmutex_common.h | 126 +++++ kernel/rcu/tree_plugin.h | 2 +- kernel/rtmutex-debug.c | 187 ------- kernel/rtmutex-debug.h | 33 -- kernel/rtmutex-tester.c | 420 ---------------- kernel/rtmutex.c | 1060 --------------------------------------- kernel/rtmutex.h | 26 - kernel/rtmutex_common.h | 126 ----- 16 files changed, 1857 insertions(+), 1857 deletions(-) create mode 100644 kernel/locking/rtmutex-debug.c create mode 100644 kernel/locking/rtmutex-debug.h create mode 100644 kernel/locking/rtmutex-tester.c create mode 100644 kernel/locking/rtmutex.c create mode 100644 kernel/locking/rtmutex.h create mode 100644 kernel/locking/rtmutex_common.h delete mode 100644 kernel/rtmutex-debug.c delete mode 100644 kernel/rtmutex-debug.h delete mode 100644 kernel/rtmutex-tester.c delete mode 100644 kernel/rtmutex.c delete mode 100644 kernel/rtmutex.h delete mode 100644 kernel/rtmutex_common.h diff --git a/kernel/Makefile b/kernel/Makefile index 45e5ae2..9c2ad18 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -35,9 +35,6 @@ obj-$(CONFIG_FUTEX) += futex.o ifeq ($(CONFIG_COMPAT),y) obj-$(CONFIG_FUTEX) += futex_compat.o endif -obj-$(CONFIG_RT_MUTEXES) += rtmutex.o -obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o -obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += smp.o ifneq ($(CONFIG_SMP),y) diff --git a/kernel/futex.c b/kernel/futex.c index c3a1a55..80ba086 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -66,7 +66,7 @@ #include -#include "rtmutex_common.h" +#include "locking/rtmutex_common.h" int __read_mostly futex_cmpxchg_enabled; diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 5978fdd..59f66de 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -15,5 +15,8 @@ obj-$(CONFIG_LOCKDEP) += lockdep_proc.o endif obj-$(CONFIG_SMP) += spinlock.o obj-$(CONFIG_PROVE_LOCKING) += spinlock.o +obj-$(CONFIG_RT_MUTEXES) += rtmutex.o +obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o +obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c new file mode 100644 index 0000000..13b243a --- /dev/null +++ b/kernel/locking/rtmutex-debug.c @@ -0,0 +1,187 @@ +/* + * RT-Mutexes: blocking mutual exclusion locks with PI support + * + * started by Ingo Molnar and Thomas Gleixner: + * + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2006 Timesys Corp., Thomas Gleixner + * + * This code is based on the rt.c implementation in the preempt-rt tree. + * Portions of said code are + * + * Copyright (C) 2004 LynuxWorks, Inc., Igor Manyilov, Bill Huey + * Copyright (C) 2006 Esben Nielsen + * Copyright (C) 2006 Kihon Technologies Inc., + * Steven Rostedt + * + * See rt.c in preempt-rt for proper credits and further information + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rtmutex_common.h" + +static void printk_task(struct task_struct *p) +{ + if (p) + printk("%16s:%5d [%p, %3d]", p->comm, task_pid_nr(p), p, p->prio); + else + printk(""); +} + +static void printk_lock(struct rt_mutex *lock, int print_owner) +{ + if (lock->name) + printk(" [%p] {%s}\n", + lock, lock->name); + else + printk(" [%p] {%s:%d}\n", + lock, lock->file, lock->line); + + if (print_owner && rt_mutex_owner(lock)) { + printk(".. ->owner: %p\n", lock->owner); + printk(".. held by: "); + printk_task(rt_mutex_owner(lock)); + printk("\n"); + } +} + +void rt_mutex_debug_task_free(struct task_struct *task) +{ + DEBUG_LOCKS_WARN_ON(!plist_head_empty(&task->pi_waiters)); + DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); +} + +/* + * We fill out the fields in the waiter to store the information about + * the deadlock. We print when we return. act_waiter can be NULL in + * case of a remove waiter operation. + */ +void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *act_waiter, + struct rt_mutex *lock) +{ + struct task_struct *task; + + if (!debug_locks || detect || !act_waiter) + return; + + task = rt_mutex_owner(act_waiter->lock); + if (task && task != current) { + act_waiter->deadlock_task_pid = get_pid(task_pid(task)); + act_waiter->deadlock_lock = lock; + } +} + +void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) +{ + struct task_struct *task; + + if (!waiter->deadlock_lock || !debug_locks) + return; + + rcu_read_lock(); + task = pid_task(waiter->deadlock_task_pid, PIDTYPE_PID); + if (!task) { + rcu_read_unlock(); + return; + } + + if (!debug_locks_off()) { + rcu_read_unlock(); + return; + } + + printk("\n============================================\n"); + printk( "[ BUG: circular locking deadlock detected! ]\n"); + printk("%s\n", print_tainted()); + printk( "--------------------------------------------\n"); + printk("%s/%d is deadlocking current task %s/%d\n\n", + task->comm, task_pid_nr(task), + current->comm, task_pid_nr(current)); + + printk("\n1) %s/%d is trying to acquire this lock:\n", + current->comm, task_pid_nr(current)); + printk_lock(waiter->lock, 1); + + printk("\n2) %s/%d is blocked on this lock:\n", + task->comm, task_pid_nr(task)); + printk_lock(waiter->deadlock_lock, 1); + + debug_show_held_locks(current); + debug_show_held_locks(task); + + printk("\n%s/%d's [blocked] stackdump:\n\n", + task->comm, task_pid_nr(task)); + show_stack(task, NULL); + printk("\n%s/%d's [current] stackdump:\n\n", + current->comm, task_pid_nr(current)); + dump_stack(); + debug_show_all_locks(); + rcu_read_unlock(); + + printk("[ turning off deadlock detection." + "Please report this trace. ]\n\n"); +} + +void debug_rt_mutex_lock(struct rt_mutex *lock) +{ +} + +void debug_rt_mutex_unlock(struct rt_mutex *lock) +{ + DEBUG_LOCKS_WARN_ON(rt_mutex_owner(lock) != current); +} + +void +debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner) +{ +} + +void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock) +{ + DEBUG_LOCKS_WARN_ON(!rt_mutex_owner(lock)); +} + +void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) +{ + memset(waiter, 0x11, sizeof(*waiter)); + plist_node_init(&waiter->list_entry, MAX_PRIO); + plist_node_init(&waiter->pi_list_entry, MAX_PRIO); + waiter->deadlock_task_pid = NULL; +} + +void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) +{ + put_pid(waiter->deadlock_task_pid); + DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->list_entry)); + DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); + memset(waiter, 0x22, sizeof(*waiter)); +} + +void debug_rt_mutex_init(struct rt_mutex *lock, const char *name) +{ + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); + lock->name = name; +} + +void +rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task) +{ +} + +void rt_mutex_deadlock_account_unlock(struct task_struct *task) +{ +} + diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h new file mode 100644 index 0000000..14193d5 --- /dev/null +++ b/kernel/locking/rtmutex-debug.h @@ -0,0 +1,33 @@ +/* + * RT-Mutexes: blocking mutual exclusion locks with PI support + * + * started by Ingo Molnar and Thomas Gleixner: + * + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner + * + * This file contains macros used solely by rtmutex.c. Debug version. + */ + +extern void +rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task); +extern void rt_mutex_deadlock_account_unlock(struct task_struct *task); +extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); +extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter); +extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name); +extern void debug_rt_mutex_lock(struct rt_mutex *lock); +extern void debug_rt_mutex_unlock(struct rt_mutex *lock); +extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, + struct task_struct *powner); +extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock); +extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter, + struct rt_mutex *lock); +extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter); +# define debug_rt_mutex_reset_waiter(w) \ + do { (w)->deadlock_lock = NULL; } while (0) + +static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, + int detect) +{ + return (waiter != NULL); +} diff --git a/kernel/locking/rtmutex-tester.c b/kernel/locking/rtmutex-tester.c new file mode 100644 index 0000000..1d96dd0 --- /dev/null +++ b/kernel/locking/rtmutex-tester.c @@ -0,0 +1,420 @@ +/* + * RT-Mutex-tester: scriptable tester for rt mutexes + * + * started by Thomas Gleixner: + * + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rtmutex.h" + +#define MAX_RT_TEST_THREADS 8 +#define MAX_RT_TEST_MUTEXES 8 + +static spinlock_t rttest_lock; +static atomic_t rttest_event; + +struct test_thread_data { + int opcode; + int opdata; + int mutexes[MAX_RT_TEST_MUTEXES]; + int event; + struct device dev; +}; + +static struct test_thread_data thread_data[MAX_RT_TEST_THREADS]; +static struct task_struct *threads[MAX_RT_TEST_THREADS]; +static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES]; + +enum test_opcodes { + RTTEST_NOP = 0, + RTTEST_SCHEDOT, /* 1 Sched other, data = nice */ + RTTEST_SCHEDRT, /* 2 Sched fifo, data = prio */ + RTTEST_LOCK, /* 3 Lock uninterruptible, data = lockindex */ + RTTEST_LOCKNOWAIT, /* 4 Lock uninterruptible no wait in wakeup, data = lockindex */ + RTTEST_LOCKINT, /* 5 Lock interruptible, data = lockindex */ + RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */ + RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */ + RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */ + /* 9, 10 - reserved for BKL commemoration */ + RTTEST_SIGNAL = 11, /* 11 Signal other test thread, data = thread id */ + RTTEST_RESETEVENT = 98, /* 98 Reset event counter */ + RTTEST_RESET = 99, /* 99 Reset all pending operations */ +}; + +static int handle_op(struct test_thread_data *td, int lockwakeup) +{ + int i, id, ret = -EINVAL; + + switch(td->opcode) { + + case RTTEST_NOP: + return 0; + + case RTTEST_LOCKCONT: + td->mutexes[td->opdata] = 1; + td->event = atomic_add_return(1, &rttest_event); + return 0; + + case RTTEST_RESET: + for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) { + if (td->mutexes[i] == 4) { + rt_mutex_unlock(&mutexes[i]); + td->mutexes[i] = 0; + } + } + return 0; + + case RTTEST_RESETEVENT: + atomic_set(&rttest_event, 0); + return 0; + + default: + if (lockwakeup) + return ret; + } + + switch(td->opcode) { + + case RTTEST_LOCK: + case RTTEST_LOCKNOWAIT: + id = td->opdata; + if (id < 0 || id >= MAX_RT_TEST_MUTEXES) + return ret; + + td->mutexes[id] = 1; + td->event = atomic_add_return(1, &rttest_event); + rt_mutex_lock(&mutexes[id]); + td->event = atomic_add_return(1, &rttest_event); + td->mutexes[id] = 4; + return 0; + + case RTTEST_LOCKINT: + case RTTEST_LOCKINTNOWAIT: + id = td->opdata; + if (id < 0 || id >= MAX_RT_TEST_MUTEXES) + return ret; + + td->mutexes[id] = 1; + td->event = atomic_add_return(1, &rttest_event); + ret = rt_mutex_lock_interruptible(&mutexes[id], 0); + td->event = atomic_add_return(1, &rttest_event); + td->mutexes[id] = ret ? 0 : 4; + return ret ? -EINTR : 0; + + case RTTEST_UNLOCK: + id = td->opdata; + if (id < 0 || id >= MAX_RT_TEST_MUTEXES || td->mutexes[id] != 4) + return ret; + + td->event = atomic_add_return(1, &rttest_event); + rt_mutex_unlock(&mutexes[id]); + td->event = atomic_add_return(1, &rttest_event); + td->mutexes[id] = 0; + return 0; + + default: + break; + } + return ret; +} + +/* + * Schedule replacement for rtsem_down(). Only called for threads with + * PF_MUTEX_TESTER set. + * + * This allows us to have finegrained control over the event flow. + * + */ +void schedule_rt_mutex_test(struct rt_mutex *mutex) +{ + int tid, op, dat; + struct test_thread_data *td; + + /* We have to lookup the task */ + for (tid = 0; tid < MAX_RT_TEST_THREADS; tid++) { + if (threads[tid] == current) + break; + } + + BUG_ON(tid == MAX_RT_TEST_THREADS); + + td = &thread_data[tid]; + + op = td->opcode; + dat = td->opdata; + + switch (op) { + case RTTEST_LOCK: + case RTTEST_LOCKINT: + case RTTEST_LOCKNOWAIT: + case RTTEST_LOCKINTNOWAIT: + if (mutex != &mutexes[dat]) + break; + + if (td->mutexes[dat] != 1) + break; + + td->mutexes[dat] = 2; + td->event = atomic_add_return(1, &rttest_event); + break; + + default: + break; + } + + schedule(); + + + switch (op) { + case RTTEST_LOCK: + case RTTEST_LOCKINT: + if (mutex != &mutexes[dat]) + return; + + if (td->mutexes[dat] != 2) + return; + + td->mutexes[dat] = 3; + td->event = atomic_add_return(1, &rttest_event); + break; + + case RTTEST_LOCKNOWAIT: + case RTTEST_LOCKINTNOWAIT: + if (mutex != &mutexes[dat]) + return; + + if (td->mutexes[dat] != 2) + return; + + td->mutexes[dat] = 1; + td->event = atomic_add_return(1, &rttest_event); + return; + + default: + return; + } + + td->opcode = 0; + + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + + if (td->opcode > 0) { + int ret; + + set_current_state(TASK_RUNNING); + ret = handle_op(td, 1); + set_current_state(TASK_INTERRUPTIBLE); + if (td->opcode == RTTEST_LOCKCONT) + break; + td->opcode = ret; + } + + /* Wait for the next command to be executed */ + schedule(); + } + + /* Restore previous command and data */ + td->opcode = op; + td->opdata = dat; +} + +static int test_func(void *data) +{ + struct test_thread_data *td = data; + int ret; + + current->flags |= PF_MUTEX_TESTER; + set_freezable(); + allow_signal(SIGHUP); + + for(;;) { + + set_current_state(TASK_INTERRUPTIBLE); + + if (td->opcode > 0) { + set_current_state(TASK_RUNNING); + ret = handle_op(td, 0); + set_current_state(TASK_INTERRUPTIBLE); + td->opcode = ret; + } + + /* Wait for the next command to be executed */ + schedule(); + try_to_freeze(); + + if (signal_pending(current)) + flush_signals(current); + + if(kthread_should_stop()) + break; + } + return 0; +} + +/** + * sysfs_test_command - interface for test commands + * @dev: thread reference + * @buf: command for actual step + * @count: length of buffer + * + * command syntax: + * + * opcode:data + */ +static ssize_t sysfs_test_command(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sched_param schedpar; + struct test_thread_data *td; + char cmdbuf[32]; + int op, dat, tid, ret; + + td = container_of(dev, struct test_thread_data, dev); + tid = td->dev.id; + + /* strings from sysfs write are not 0 terminated! */ + if (count >= sizeof(cmdbuf)) + return -EINVAL; + + /* strip of \n: */ + if (buf[count-1] == '\n') + count--; + if (count < 1) + return -EINVAL; + + memcpy(cmdbuf, buf, count); + cmdbuf[count] = 0; + + if (sscanf(cmdbuf, "%d:%d", &op, &dat) != 2) + return -EINVAL; + + switch (op) { + case RTTEST_SCHEDOT: + schedpar.sched_priority = 0; + ret = sched_setscheduler(threads[tid], SCHED_NORMAL, &schedpar); + if (ret) + return ret; + set_user_nice(current, 0); + break; + + case RTTEST_SCHEDRT: + schedpar.sched_priority = dat; + ret = sched_setscheduler(threads[tid], SCHED_FIFO, &schedpar); + if (ret) + return ret; + break; + + case RTTEST_SIGNAL: + send_sig(SIGHUP, threads[tid], 0); + break; + + default: + if (td->opcode > 0) + return -EBUSY; + td->opdata = dat; + td->opcode = op; + wake_up_process(threads[tid]); + } + + return count; +} + +/** + * sysfs_test_status - sysfs interface for rt tester + * @dev: thread to query + * @buf: char buffer to be filled with thread status info + */ +static ssize_t sysfs_test_status(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct test_thread_data *td; + struct task_struct *tsk; + char *curr = buf; + int i; + + td = container_of(dev, struct test_thread_data, dev); + tsk = threads[td->dev.id]; + + spin_lock(&rttest_lock); + + curr += sprintf(curr, + "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:", + td->opcode, td->event, tsk->state, + (MAX_RT_PRIO - 1) - tsk->prio, + (MAX_RT_PRIO - 1) - tsk->normal_prio, + tsk->pi_blocked_on); + + for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--) + curr += sprintf(curr, "%d", td->mutexes[i]); + + spin_unlock(&rttest_lock); + + curr += sprintf(curr, ", T: %p, R: %p\n", tsk, + mutexes[td->dev.id].owner); + + return curr - buf; +} + +static DEVICE_ATTR(status, S_IRUSR, sysfs_test_status, NULL); +static DEVICE_ATTR(command, S_IWUSR, NULL, sysfs_test_command); + +static struct bus_type rttest_subsys = { + .name = "rttest", + .dev_name = "rttest", +}; + +static int init_test_thread(int id) +{ + thread_data[id].dev.bus = &rttest_subsys; + thread_data[id].dev.id = id; + + threads[id] = kthread_run(test_func, &thread_data[id], "rt-test-%d", id); + if (IS_ERR(threads[id])) + return PTR_ERR(threads[id]); + + return device_register(&thread_data[id].dev); +} + +static int init_rttest(void) +{ + int ret, i; + + spin_lock_init(&rttest_lock); + + for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) + rt_mutex_init(&mutexes[i]); + + ret = subsys_system_register(&rttest_subsys, NULL); + if (ret) + return ret; + + for (i = 0; i < MAX_RT_TEST_THREADS; i++) { + ret = init_test_thread(i); + if (ret) + break; + ret = device_create_file(&thread_data[i].dev, &dev_attr_status); + if (ret) + break; + ret = device_create_file(&thread_data[i].dev, &dev_attr_command); + if (ret) + break; + } + + printk("Initializing RT-Tester: %s\n", ret ? "Failed" : "OK" ); + + return ret; +} + +device_initcall(init_rttest); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c new file mode 100644 index 0000000..0dd6aec --- /dev/null +++ b/kernel/locking/rtmutex.c @@ -0,0 +1,1060 @@ +/* + * RT-Mutexes: simple blocking mutual exclusion locks with PI support + * + * started by Ingo Molnar and Thomas Gleixner. + * + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner + * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt + * Copyright (C) 2006 Esben Nielsen + * + * See Documentation/rt-mutex-design.txt for details. + */ +#include +#include +#include +#include +#include + +#include "rtmutex_common.h" + +/* + * lock->owner state tracking: + * + * lock->owner holds the task_struct pointer of the owner. Bit 0 + * is used to keep track of the "lock has waiters" state. + * + * owner bit0 + * NULL 0 lock is free (fast acquire possible) + * NULL 1 lock is free and has waiters and the top waiter + * is going to take the lock* + * taskpointer 0 lock is held (fast release possible) + * taskpointer 1 lock is held and has waiters** + * + * The fast atomic compare exchange based acquire and release is only + * possible when bit 0 of lock->owner is 0. + * + * (*) It also can be a transitional state when grabbing the lock + * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, + * we need to set the bit0 before looking at the lock, and the owner may be + * NULL in this small time, hence this can be a transitional state. + * + * (**) There is a small time when bit 0 is set but there are no + * waiters. This can happen when grabbing the lock in the slow path. + * To prevent a cmpxchg of the owner releasing the lock, we need to + * set this bit before looking at the lock. + */ + +static void +rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner) +{ + unsigned long val = (unsigned long)owner; + + if (rt_mutex_has_waiters(lock)) + val |= RT_MUTEX_HAS_WAITERS; + + lock->owner = (struct task_struct *)val; +} + +static inline void clear_rt_mutex_waiters(struct rt_mutex *lock) +{ + lock->owner = (struct task_struct *) + ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); +} + +static void fixup_rt_mutex_waiters(struct rt_mutex *lock) +{ + if (!rt_mutex_has_waiters(lock)) + clear_rt_mutex_waiters(lock); +} + +/* + * We can speed up the acquire/release, if the architecture + * supports cmpxchg and if there's no debugging state to be set up + */ +#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES) +# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c) +static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) +{ + unsigned long owner, *p = (unsigned long *) &lock->owner; + + do { + owner = *p; + } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); +} +#else +# define rt_mutex_cmpxchg(l,c,n) (0) +static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) +{ + lock->owner = (struct task_struct *) + ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); +} +#endif + +/* + * Calculate task priority from the waiter list priority + * + * Return task->normal_prio when the waiter list is empty or when + * the waiter is not allowed to do priority boosting + */ +int rt_mutex_getprio(struct task_struct *task) +{ + if (likely(!task_has_pi_waiters(task))) + return task->normal_prio; + + return min(task_top_pi_waiter(task)->pi_list_entry.prio, + task->normal_prio); +} + +/* + * Adjust the priority of a task, after its pi_waiters got modified. + * + * This can be both boosting and unboosting. task->pi_lock must be held. + */ +static void __rt_mutex_adjust_prio(struct task_struct *task) +{ + int prio = rt_mutex_getprio(task); + + if (task->prio != prio) + rt_mutex_setprio(task, prio); +} + +/* + * Adjust task priority (undo boosting). Called from the exit path of + * rt_mutex_slowunlock() and rt_mutex_slowlock(). + * + * (Note: We do this outside of the protection of lock->wait_lock to + * allow the lock to be taken while or before we readjust the priority + * of task. We do not use the spin_xx_mutex() variants here as we are + * outside of the debug path.) + */ +static void rt_mutex_adjust_prio(struct task_struct *task) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&task->pi_lock, flags); + __rt_mutex_adjust_prio(task); + raw_spin_unlock_irqrestore(&task->pi_lock, flags); +} + +/* + * Max number of times we'll walk the boosting chain: + */ +int max_lock_depth = 1024; + +/* + * Adjust the priority chain. Also used for deadlock detection. + * Decreases task's usage by one - may thus free the task. + * + * @task: the task owning the mutex (owner) for which a chain walk is probably + * needed + * @deadlock_detect: do we have to carry out deadlock detection? + * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck + * things for a task that has just got its priority adjusted, and + * is waiting on a mutex) + * @orig_waiter: rt_mutex_waiter struct for the task that has just donated + * its priority to the mutex owner (can be NULL in the case + * depicted above or if the top waiter is gone away and we are + * actually deboosting the owner) + * @top_task: the current top waiter + * + * Returns 0 or -EDEADLK. + */ +static int rt_mutex_adjust_prio_chain(struct task_struct *task, + int deadlock_detect, + struct rt_mutex *orig_lock, + struct rt_mutex_waiter *orig_waiter, + struct task_struct *top_task) +{ + struct rt_mutex *lock; + struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; + int detect_deadlock, ret = 0, depth = 0; + unsigned long flags; + + detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter, + deadlock_detect); + + /* + * The (de)boosting is a step by step approach with a lot of + * pitfalls. We want this to be preemptible and we want hold a + * maximum of two locks per step. So we have to check + * carefully whether things change under us. + */ + again: + if (++depth > max_lock_depth) { + static int prev_max; + + /* + * Print this only once. If the admin changes the limit, + * print a new message when reaching the limit again. + */ + if (prev_max != max_lock_depth) { + prev_max = max_lock_depth; + printk(KERN_WARNING "Maximum lock depth %d reached " + "task: %s (%d)\n", max_lock_depth, + top_task->comm, task_pid_nr(top_task)); + } + put_task_struct(task); + + return deadlock_detect ? -EDEADLK : 0; + } + retry: + /* + * Task can not go away as we did a get_task() before ! + */ + raw_spin_lock_irqsave(&task->pi_lock, flags); + + waiter = task->pi_blocked_on; + /* + * Check whether the end of the boosting chain has been + * reached or the state of the chain has changed while we + * dropped the locks. + */ + if (!waiter) + goto out_unlock_pi; + + /* + * Check the orig_waiter state. After we dropped the locks, + * the previous owner of the lock might have released the lock. + */ + if (orig_waiter && !rt_mutex_owner(orig_lock)) + goto out_unlock_pi; + + /* + * Drop out, when the task has no waiters. Note, + * top_waiter can be NULL, when we are in the deboosting + * mode! + */ + if (top_waiter && (!task_has_pi_waiters(task) || + top_waiter != task_top_pi_waiter(task))) + goto out_unlock_pi; + + /* + * When deadlock detection is off then we check, if further + * priority adjustment is necessary. + */ + if (!detect_deadlock && waiter->list_entry.prio == task->prio) + goto out_unlock_pi; + + lock = waiter->lock; + if (!raw_spin_trylock(&lock->wait_lock)) { + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + cpu_relax(); + goto retry; + } + + /* Deadlock detection */ + if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { + debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); + raw_spin_unlock(&lock->wait_lock); + ret = deadlock_detect ? -EDEADLK : 0; + goto out_unlock_pi; + } + + top_waiter = rt_mutex_top_waiter(lock); + + /* Requeue the waiter */ + plist_del(&waiter->list_entry, &lock->wait_list); + waiter->list_entry.prio = task->prio; + plist_add(&waiter->list_entry, &lock->wait_list); + + /* Release the task */ + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + if (!rt_mutex_owner(lock)) { + /* + * If the requeue above changed the top waiter, then we need + * to wake the new top waiter up to try to get the lock. + */ + + if (top_waiter != rt_mutex_top_waiter(lock)) + wake_up_process(rt_mutex_top_waiter(lock)->task); + raw_spin_unlock(&lock->wait_lock); + goto out_put_task; + } + put_task_struct(task); + + /* Grab the next task */ + task = rt_mutex_owner(lock); + get_task_struct(task); + raw_spin_lock_irqsave(&task->pi_lock, flags); + + if (waiter == rt_mutex_top_waiter(lock)) { + /* Boost the owner */ + plist_del(&top_waiter->pi_list_entry, &task->pi_waiters); + waiter->pi_list_entry.prio = waiter->list_entry.prio; + plist_add(&waiter->pi_list_entry, &task->pi_waiters); + __rt_mutex_adjust_prio(task); + + } else if (top_waiter == waiter) { + /* Deboost the owner */ + plist_del(&waiter->pi_list_entry, &task->pi_waiters); + waiter = rt_mutex_top_waiter(lock); + waiter->pi_list_entry.prio = waiter->list_entry.prio; + plist_add(&waiter->pi_list_entry, &task->pi_waiters); + __rt_mutex_adjust_prio(task); + } + + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + + top_waiter = rt_mutex_top_waiter(lock); + raw_spin_unlock(&lock->wait_lock); + + if (!detect_deadlock && waiter != top_waiter) + goto out_put_task; + + goto again; + + out_unlock_pi: + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + out_put_task: + put_task_struct(task); + + return ret; +} + +/* + * Try to take an rt-mutex + * + * Must be called with lock->wait_lock held. + * + * @lock: the lock to be acquired. + * @task: the task which wants to acquire the lock + * @waiter: the waiter that is queued to the lock's wait list. (could be NULL) + */ +static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, + struct rt_mutex_waiter *waiter) +{ + /* + * We have to be careful here if the atomic speedups are + * enabled, such that, when + * - no other waiter is on the lock + * - the lock has been released since we did the cmpxchg + * the lock can be released or taken while we are doing the + * checks and marking the lock with RT_MUTEX_HAS_WAITERS. + * + * The atomic acquire/release aware variant of + * mark_rt_mutex_waiters uses a cmpxchg loop. After setting + * the WAITERS bit, the atomic release / acquire can not + * happen anymore and lock->wait_lock protects us from the + * non-atomic case. + * + * Note, that this might set lock->owner = + * RT_MUTEX_HAS_WAITERS in the case the lock is not contended + * any more. This is fixed up when we take the ownership. + * This is the transitional state explained at the top of this file. + */ + mark_rt_mutex_waiters(lock); + + if (rt_mutex_owner(lock)) + return 0; + + /* + * It will get the lock because of one of these conditions: + * 1) there is no waiter + * 2) higher priority than waiters + * 3) it is top waiter + */ + if (rt_mutex_has_waiters(lock)) { + if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) { + if (!waiter || waiter != rt_mutex_top_waiter(lock)) + return 0; + } + } + + if (waiter || rt_mutex_has_waiters(lock)) { + unsigned long flags; + struct rt_mutex_waiter *top; + + raw_spin_lock_irqsave(&task->pi_lock, flags); + + /* remove the queued waiter. */ + if (waiter) { + plist_del(&waiter->list_entry, &lock->wait_list); + task->pi_blocked_on = NULL; + } + + /* + * We have to enqueue the top waiter(if it exists) into + * task->pi_waiters list. + */ + if (rt_mutex_has_waiters(lock)) { + top = rt_mutex_top_waiter(lock); + top->pi_list_entry.prio = top->list_entry.prio; + plist_add(&top->pi_list_entry, &task->pi_waiters); + } + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + } + + /* We got the lock. */ + debug_rt_mutex_lock(lock); + + rt_mutex_set_owner(lock, task); + + rt_mutex_deadlock_account_lock(lock, task); + + return 1; +} + +/* + * Task blocks on lock. + * + * Prepare waiter and propagate pi chain + * + * This must be called with lock->wait_lock held. + */ +static int task_blocks_on_rt_mutex(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task, + int detect_deadlock) +{ + struct task_struct *owner = rt_mutex_owner(lock); + struct rt_mutex_waiter *top_waiter = waiter; + unsigned long flags; + int chain_walk = 0, res; + + raw_spin_lock_irqsave(&task->pi_lock, flags); + __rt_mutex_adjust_prio(task); + waiter->task = task; + waiter->lock = lock; + plist_node_init(&waiter->list_entry, task->prio); + plist_node_init(&waiter->pi_list_entry, task->prio); + + /* Get the top priority waiter on the lock */ + if (rt_mutex_has_waiters(lock)) + top_waiter = rt_mutex_top_waiter(lock); + plist_add(&waiter->list_entry, &lock->wait_list); + + task->pi_blocked_on = waiter; + + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + + if (!owner) + return 0; + + if (waiter == rt_mutex_top_waiter(lock)) { + raw_spin_lock_irqsave(&owner->pi_lock, flags); + plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); + plist_add(&waiter->pi_list_entry, &owner->pi_waiters); + + __rt_mutex_adjust_prio(owner); + if (owner->pi_blocked_on) + chain_walk = 1; + raw_spin_unlock_irqrestore(&owner->pi_lock, flags); + } + else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) + chain_walk = 1; + + if (!chain_walk) + return 0; + + /* + * The owner can't disappear while holding a lock, + * so the owner struct is protected by wait_lock. + * Gets dropped in rt_mutex_adjust_prio_chain()! + */ + get_task_struct(owner); + + raw_spin_unlock(&lock->wait_lock); + + res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, + task); + + raw_spin_lock(&lock->wait_lock); + + return res; +} + +/* + * Wake up the next waiter on the lock. + * + * Remove the top waiter from the current tasks waiter list and wake it up. + * + * Called with lock->wait_lock held. + */ +static void wakeup_next_waiter(struct rt_mutex *lock) +{ + struct rt_mutex_waiter *waiter; + unsigned long flags; + + raw_spin_lock_irqsave(¤t->pi_lock, flags); + + waiter = rt_mutex_top_waiter(lock); + + /* + * Remove it from current->pi_waiters. We do not adjust a + * possible priority boost right now. We execute wakeup in the + * boosted mode and go back to normal after releasing + * lock->wait_lock. + */ + plist_del(&waiter->pi_list_entry, ¤t->pi_waiters); + + rt_mutex_set_owner(lock, NULL); + + raw_spin_unlock_irqrestore(¤t->pi_lock, flags); + + wake_up_process(waiter->task); +} + +/* + * Remove a waiter from a lock and give up + * + * Must be called with lock->wait_lock held and + * have just failed to try_to_take_rt_mutex(). + */ +static void remove_waiter(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter) +{ + int first = (waiter == rt_mutex_top_waiter(lock)); + struct task_struct *owner = rt_mutex_owner(lock); + unsigned long flags; + int chain_walk = 0; + + raw_spin_lock_irqsave(¤t->pi_lock, flags); + plist_del(&waiter->list_entry, &lock->wait_list); + current->pi_blocked_on = NULL; + raw_spin_unlock_irqrestore(¤t->pi_lock, flags); + + if (!owner) + return; + + if (first) { + + raw_spin_lock_irqsave(&owner->pi_lock, flags); + + plist_del(&waiter->pi_list_entry, &owner->pi_waiters); + + if (rt_mutex_has_waiters(lock)) { + struct rt_mutex_waiter *next; + + next = rt_mutex_top_waiter(lock); + plist_add(&next->pi_list_entry, &owner->pi_waiters); + } + __rt_mutex_adjust_prio(owner); + + if (owner->pi_blocked_on) + chain_walk = 1; + + raw_spin_unlock_irqrestore(&owner->pi_lock, flags); + } + + WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); + + if (!chain_walk) + return; + + /* gets dropped in rt_mutex_adjust_prio_chain()! */ + get_task_struct(owner); + + raw_spin_unlock(&lock->wait_lock); + + rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); + + raw_spin_lock(&lock->wait_lock); +} + +/* + * Recheck the pi chain, in case we got a priority setting + * + * Called from sched_setscheduler + */ +void rt_mutex_adjust_pi(struct task_struct *task) +{ + struct rt_mutex_waiter *waiter; + unsigned long flags; + + raw_spin_lock_irqsave(&task->pi_lock, flags); + + waiter = task->pi_blocked_on; + if (!waiter || waiter->list_entry.prio == task->prio) { + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + return; + } + + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + + /* gets dropped in rt_mutex_adjust_prio_chain()! */ + get_task_struct(task); + rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); +} + +/** + * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop + * @lock: the rt_mutex to take + * @state: the state the task should block in (TASK_INTERRUPTIBLE + * or TASK_UNINTERRUPTIBLE) + * @timeout: the pre-initialized and started timer, or NULL for none + * @waiter: the pre-initialized rt_mutex_waiter + * + * lock->wait_lock must be held by the caller. + */ +static int __sched +__rt_mutex_slowlock(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + struct rt_mutex_waiter *waiter) +{ + int ret = 0; + + for (;;) { + /* Try to acquire the lock: */ + if (try_to_take_rt_mutex(lock, current, waiter)) + break; + + /* + * TASK_INTERRUPTIBLE checks for signals and + * timeout. Ignored otherwise. + */ + if (unlikely(state == TASK_INTERRUPTIBLE)) { + /* Signal pending? */ + if (signal_pending(current)) + ret = -EINTR; + if (timeout && !timeout->task) + ret = -ETIMEDOUT; + if (ret) + break; + } + + raw_spin_unlock(&lock->wait_lock); + + debug_rt_mutex_print_deadlock(waiter); + + schedule_rt_mutex(lock); + + raw_spin_lock(&lock->wait_lock); + set_current_state(state); + } + + return ret; +} + +/* + * Slow path lock function: + */ +static int __sched +rt_mutex_slowlock(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + int detect_deadlock) +{ + struct rt_mutex_waiter waiter; + int ret = 0; + + debug_rt_mutex_init_waiter(&waiter); + + raw_spin_lock(&lock->wait_lock); + + /* Try to acquire the lock again: */ + if (try_to_take_rt_mutex(lock, current, NULL)) { + raw_spin_unlock(&lock->wait_lock); + return 0; + } + + set_current_state(state); + + /* Setup the timer, when timeout != NULL */ + if (unlikely(timeout)) { + hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); + if (!hrtimer_active(&timeout->timer)) + timeout->task = NULL; + } + + ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock); + + if (likely(!ret)) + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); + + set_current_state(TASK_RUNNING); + + if (unlikely(ret)) + remove_waiter(lock, &waiter); + + /* + * try_to_take_rt_mutex() sets the waiter bit + * unconditionally. We might have to fix that up. + */ + fixup_rt_mutex_waiters(lock); + + raw_spin_unlock(&lock->wait_lock); + + /* Remove pending timer: */ + if (unlikely(timeout)) + hrtimer_cancel(&timeout->timer); + + debug_rt_mutex_free_waiter(&waiter); + + return ret; +} + +/* + * Slow path try-lock function: + */ +static inline int +rt_mutex_slowtrylock(struct rt_mutex *lock) +{ + int ret = 0; + + raw_spin_lock(&lock->wait_lock); + + if (likely(rt_mutex_owner(lock) != current)) { + + ret = try_to_take_rt_mutex(lock, current, NULL); + /* + * try_to_take_rt_mutex() sets the lock waiters + * bit unconditionally. Clean this up. + */ + fixup_rt_mutex_waiters(lock); + } + + raw_spin_unlock(&lock->wait_lock); + + return ret; +} + +/* + * Slow path to release a rt-mutex: + */ +static void __sched +rt_mutex_slowunlock(struct rt_mutex *lock) +{ + raw_spin_lock(&lock->wait_lock); + + debug_rt_mutex_unlock(lock); + + rt_mutex_deadlock_account_unlock(current); + + if (!rt_mutex_has_waiters(lock)) { + lock->owner = NULL; + raw_spin_unlock(&lock->wait_lock); + return; + } + + wakeup_next_waiter(lock); + + raw_spin_unlock(&lock->wait_lock); + + /* Undo pi boosting if necessary: */ + rt_mutex_adjust_prio(current); +} + +/* + * debug aware fast / slowpath lock,trylock,unlock + * + * The atomic acquire/release ops are compiled away, when either the + * architecture does not support cmpxchg or when debugging is enabled. + */ +static inline int +rt_mutex_fastlock(struct rt_mutex *lock, int state, + int detect_deadlock, + int (*slowfn)(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + int detect_deadlock)) +{ + if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { + rt_mutex_deadlock_account_lock(lock, current); + return 0; + } else + return slowfn(lock, state, NULL, detect_deadlock); +} + +static inline int +rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, int detect_deadlock, + int (*slowfn)(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + int detect_deadlock)) +{ + if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { + rt_mutex_deadlock_account_lock(lock, current); + return 0; + } else + return slowfn(lock, state, timeout, detect_deadlock); +} + +static inline int +rt_mutex_fasttrylock(struct rt_mutex *lock, + int (*slowfn)(struct rt_mutex *lock)) +{ + if (likely(rt_mutex_cmpxchg(lock, NULL, current))) { + rt_mutex_deadlock_account_lock(lock, current); + return 1; + } + return slowfn(lock); +} + +static inline void +rt_mutex_fastunlock(struct rt_mutex *lock, + void (*slowfn)(struct rt_mutex *lock)) +{ + if (likely(rt_mutex_cmpxchg(lock, current, NULL))) + rt_mutex_deadlock_account_unlock(current); + else + slowfn(lock); +} + +/** + * rt_mutex_lock - lock a rt_mutex + * + * @lock: the rt_mutex to be locked + */ +void __sched rt_mutex_lock(struct rt_mutex *lock) +{ + might_sleep(); + + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock); +} +EXPORT_SYMBOL_GPL(rt_mutex_lock); + +/** + * rt_mutex_lock_interruptible - lock a rt_mutex interruptible + * + * @lock: the rt_mutex to be locked + * @detect_deadlock: deadlock detection on/off + * + * Returns: + * 0 on success + * -EINTR when interrupted by a signal + * -EDEADLK when the lock would deadlock (when deadlock detection is on) + */ +int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock, + int detect_deadlock) +{ + might_sleep(); + + return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, + detect_deadlock, rt_mutex_slowlock); +} +EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); + +/** + * rt_mutex_timed_lock - lock a rt_mutex interruptible + * the timeout structure is provided + * by the caller + * + * @lock: the rt_mutex to be locked + * @timeout: timeout structure or NULL (no timeout) + * @detect_deadlock: deadlock detection on/off + * + * Returns: + * 0 on success + * -EINTR when interrupted by a signal + * -ETIMEDOUT when the timeout expired + * -EDEADLK when the lock would deadlock (when deadlock detection is on) + */ +int +rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout, + int detect_deadlock) +{ + might_sleep(); + + return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, + detect_deadlock, rt_mutex_slowlock); +} +EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); + +/** + * rt_mutex_trylock - try to lock a rt_mutex + * + * @lock: the rt_mutex to be locked + * + * Returns 1 on success and 0 on contention + */ +int __sched rt_mutex_trylock(struct rt_mutex *lock) +{ + return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); +} +EXPORT_SYMBOL_GPL(rt_mutex_trylock); + +/** + * rt_mutex_unlock - unlock a rt_mutex + * + * @lock: the rt_mutex to be unlocked + */ +void __sched rt_mutex_unlock(struct rt_mutex *lock) +{ + rt_mutex_fastunlock(lock, rt_mutex_slowunlock); +} +EXPORT_SYMBOL_GPL(rt_mutex_unlock); + +/** + * rt_mutex_destroy - mark a mutex unusable + * @lock: the mutex to be destroyed + * + * This function marks the mutex uninitialized, and any subsequent + * use of the mutex is forbidden. The mutex must not be locked when + * this function is called. + */ +void rt_mutex_destroy(struct rt_mutex *lock) +{ + WARN_ON(rt_mutex_is_locked(lock)); +#ifdef CONFIG_DEBUG_RT_MUTEXES + lock->magic = NULL; +#endif +} + +EXPORT_SYMBOL_GPL(rt_mutex_destroy); + +/** + * __rt_mutex_init - initialize the rt lock + * + * @lock: the rt lock to be initialized + * + * Initialize the rt lock to unlocked state. + * + * Initializing of a locked rt lock is not allowed + */ +void __rt_mutex_init(struct rt_mutex *lock, const char *name) +{ + lock->owner = NULL; + raw_spin_lock_init(&lock->wait_lock); + plist_head_init(&lock->wait_list); + + debug_rt_mutex_init(lock, name); +} +EXPORT_SYMBOL_GPL(__rt_mutex_init); + +/** + * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a + * proxy owner + * + * @lock: the rt_mutex to be locked + * @proxy_owner:the task to set as owner + * + * No locking. Caller has to do serializing itself + * Special API call for PI-futex support + */ +void rt_mutex_init_proxy_locked(struct rt_mutex *lock, + struct task_struct *proxy_owner) +{ + __rt_mutex_init(lock, NULL); + debug_rt_mutex_proxy_lock(lock, proxy_owner); + rt_mutex_set_owner(lock, proxy_owner); + rt_mutex_deadlock_account_lock(lock, proxy_owner); +} + +/** + * rt_mutex_proxy_unlock - release a lock on behalf of owner + * + * @lock: the rt_mutex to be locked + * + * No locking. Caller has to do serializing itself + * Special API call for PI-futex support + */ +void rt_mutex_proxy_unlock(struct rt_mutex *lock, + struct task_struct *proxy_owner) +{ + debug_rt_mutex_proxy_unlock(lock); + rt_mutex_set_owner(lock, NULL); + rt_mutex_deadlock_account_unlock(proxy_owner); +} + +/** + * rt_mutex_start_proxy_lock() - Start lock acquisition for another task + * @lock: the rt_mutex to take + * @waiter: the pre-initialized rt_mutex_waiter + * @task: the task to prepare + * @detect_deadlock: perform deadlock detection (1) or not (0) + * + * Returns: + * 0 - task blocked on lock + * 1 - acquired the lock for task, caller should wake it up + * <0 - error + * + * Special API call for FUTEX_REQUEUE_PI support. + */ +int rt_mutex_start_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task, int detect_deadlock) +{ + int ret; + + raw_spin_lock(&lock->wait_lock); + + if (try_to_take_rt_mutex(lock, task, NULL)) { + raw_spin_unlock(&lock->wait_lock); + return 1; + } + + ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); + + if (ret && !rt_mutex_owner(lock)) { + /* + * Reset the return value. We might have + * returned with -EDEADLK and the owner + * released the lock while we were walking the + * pi chain. Let the waiter sort it out. + */ + ret = 0; + } + + if (unlikely(ret)) + remove_waiter(lock, waiter); + + raw_spin_unlock(&lock->wait_lock); + + debug_rt_mutex_print_deadlock(waiter); + + return ret; +} + +/** + * rt_mutex_next_owner - return the next owner of the lock + * + * @lock: the rt lock query + * + * Returns the next owner of the lock or NULL + * + * Caller has to serialize against other accessors to the lock + * itself. + * + * Special API call for PI-futex support + */ +struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) +{ + if (!rt_mutex_has_waiters(lock)) + return NULL; + + return rt_mutex_top_waiter(lock)->task; +} + +/** + * rt_mutex_finish_proxy_lock() - Complete lock acquisition + * @lock: the rt_mutex we were woken on + * @to: the timeout, null if none. hrtimer should already have + * been started. + * @waiter: the pre-initialized rt_mutex_waiter + * @detect_deadlock: perform deadlock detection (1) or not (0) + * + * Complete the lock acquisition started our behalf by another thread. + * + * Returns: + * 0 - success + * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK + * + * Special API call for PI-futex requeue support + */ +int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, + struct hrtimer_sleeper *to, + struct rt_mutex_waiter *waiter, + int detect_deadlock) +{ + int ret; + + raw_spin_lock(&lock->wait_lock); + + set_current_state(TASK_INTERRUPTIBLE); + + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); + + set_current_state(TASK_RUNNING); + + if (unlikely(ret)) + remove_waiter(lock, waiter); + + /* + * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might + * have to fix that up. + */ + fixup_rt_mutex_waiters(lock); + + raw_spin_unlock(&lock->wait_lock); + + return ret; +} diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h new file mode 100644 index 0000000..a1a1dd0 --- /dev/null +++ b/kernel/locking/rtmutex.h @@ -0,0 +1,26 @@ +/* + * RT-Mutexes: blocking mutual exclusion locks with PI support + * + * started by Ingo Molnar and Thomas Gleixner: + * + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner + * + * This file contains macros used solely by rtmutex.c. + * Non-debug version. + */ + +#define rt_mutex_deadlock_check(l) (0) +#define rt_mutex_deadlock_account_lock(m, t) do { } while (0) +#define rt_mutex_deadlock_account_unlock(l) do { } while (0) +#define debug_rt_mutex_init_waiter(w) do { } while (0) +#define debug_rt_mutex_free_waiter(w) do { } while (0) +#define debug_rt_mutex_lock(l) do { } while (0) +#define debug_rt_mutex_proxy_lock(l,p) do { } while (0) +#define debug_rt_mutex_proxy_unlock(l) do { } while (0) +#define debug_rt_mutex_unlock(l) do { } while (0) +#define debug_rt_mutex_init(m, n) do { } while (0) +#define debug_rt_mutex_deadlock(d, a ,l) do { } while (0) +#define debug_rt_mutex_print_deadlock(w) do { } while (0) +#define debug_rt_mutex_detect_deadlock(w,d) (d) +#define debug_rt_mutex_reset_waiter(w) do { } while (0) diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h new file mode 100644 index 0000000..53a66c8 --- /dev/null +++ b/kernel/locking/rtmutex_common.h @@ -0,0 +1,126 @@ +/* + * RT Mutexes: blocking mutual exclusion locks with PI support + * + * started by Ingo Molnar and Thomas Gleixner: + * + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner + * + * This file contains the private data structure and API definitions. + */ + +#ifndef __KERNEL_RTMUTEX_COMMON_H +#define __KERNEL_RTMUTEX_COMMON_H + +#include + +/* + * The rtmutex in kernel tester is independent of rtmutex debugging. We + * call schedule_rt_mutex_test() instead of schedule() for the tasks which + * belong to the tester. That way we can delay the wakeup path of those + * threads to provoke lock stealing and testing of complex boosting scenarios. + */ +#ifdef CONFIG_RT_MUTEX_TESTER + +extern void schedule_rt_mutex_test(struct rt_mutex *lock); + +#define schedule_rt_mutex(_lock) \ + do { \ + if (!(current->flags & PF_MUTEX_TESTER)) \ + schedule(); \ + else \ + schedule_rt_mutex_test(_lock); \ + } while (0) + +#else +# define schedule_rt_mutex(_lock) schedule() +#endif + +/* + * This is the control structure for tasks blocked on a rt_mutex, + * which is allocated on the kernel stack on of the blocked task. + * + * @list_entry: pi node to enqueue into the mutex waiters list + * @pi_list_entry: pi node to enqueue into the mutex owner waiters list + * @task: task reference to the blocked task + */ +struct rt_mutex_waiter { + struct plist_node list_entry; + struct plist_node pi_list_entry; + struct task_struct *task; + struct rt_mutex *lock; +#ifdef CONFIG_DEBUG_RT_MUTEXES + unsigned long ip; + struct pid *deadlock_task_pid; + struct rt_mutex *deadlock_lock; +#endif +}; + +/* + * Various helpers to access the waiters-plist: + */ +static inline int rt_mutex_has_waiters(struct rt_mutex *lock) +{ + return !plist_head_empty(&lock->wait_list); +} + +static inline struct rt_mutex_waiter * +rt_mutex_top_waiter(struct rt_mutex *lock) +{ + struct rt_mutex_waiter *w; + + w = plist_first_entry(&lock->wait_list, struct rt_mutex_waiter, + list_entry); + BUG_ON(w->lock != lock); + + return w; +} + +static inline int task_has_pi_waiters(struct task_struct *p) +{ + return !plist_head_empty(&p->pi_waiters); +} + +static inline struct rt_mutex_waiter * +task_top_pi_waiter(struct task_struct *p) +{ + return plist_first_entry(&p->pi_waiters, struct rt_mutex_waiter, + pi_list_entry); +} + +/* + * lock->owner state tracking: + */ +#define RT_MUTEX_HAS_WAITERS 1UL +#define RT_MUTEX_OWNER_MASKALL 1UL + +static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) +{ + return (struct task_struct *) + ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL); +} + +/* + * PI-futex support (proxy locking functions, etc.): + */ +extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); +extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, + struct task_struct *proxy_owner); +extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, + struct task_struct *proxy_owner); +extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task, + int detect_deadlock); +extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, + struct hrtimer_sleeper *to, + struct rt_mutex_waiter *waiter, + int detect_deadlock); + +#ifdef CONFIG_DEBUG_RT_MUTEXES +# include "rtmutex-debug.h" +#else +# include "rtmutex.h" +#endif + +#endif diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 3822ac0..6abb03d 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1133,7 +1133,7 @@ void exit_rcu(void) #ifdef CONFIG_RCU_BOOST -#include "../rtmutex_common.h" +#include "../locking/rtmutex_common.h" #ifdef CONFIG_RCU_TRACE diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c deleted file mode 100644 index 13b243a..0000000 --- a/kernel/rtmutex-debug.c +++ /dev/null @@ -1,187 +0,0 @@ -/* - * RT-Mutexes: blocking mutual exclusion locks with PI support - * - * started by Ingo Molnar and Thomas Gleixner: - * - * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2006 Timesys Corp., Thomas Gleixner - * - * This code is based on the rt.c implementation in the preempt-rt tree. - * Portions of said code are - * - * Copyright (C) 2004 LynuxWorks, Inc., Igor Manyilov, Bill Huey - * Copyright (C) 2006 Esben Nielsen - * Copyright (C) 2006 Kihon Technologies Inc., - * Steven Rostedt - * - * See rt.c in preempt-rt for proper credits and further information - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "rtmutex_common.h" - -static void printk_task(struct task_struct *p) -{ - if (p) - printk("%16s:%5d [%p, %3d]", p->comm, task_pid_nr(p), p, p->prio); - else - printk(""); -} - -static void printk_lock(struct rt_mutex *lock, int print_owner) -{ - if (lock->name) - printk(" [%p] {%s}\n", - lock, lock->name); - else - printk(" [%p] {%s:%d}\n", - lock, lock->file, lock->line); - - if (print_owner && rt_mutex_owner(lock)) { - printk(".. ->owner: %p\n", lock->owner); - printk(".. held by: "); - printk_task(rt_mutex_owner(lock)); - printk("\n"); - } -} - -void rt_mutex_debug_task_free(struct task_struct *task) -{ - DEBUG_LOCKS_WARN_ON(!plist_head_empty(&task->pi_waiters)); - DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); -} - -/* - * We fill out the fields in the waiter to store the information about - * the deadlock. We print when we return. act_waiter can be NULL in - * case of a remove waiter operation. - */ -void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *act_waiter, - struct rt_mutex *lock) -{ - struct task_struct *task; - - if (!debug_locks || detect || !act_waiter) - return; - - task = rt_mutex_owner(act_waiter->lock); - if (task && task != current) { - act_waiter->deadlock_task_pid = get_pid(task_pid(task)); - act_waiter->deadlock_lock = lock; - } -} - -void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) -{ - struct task_struct *task; - - if (!waiter->deadlock_lock || !debug_locks) - return; - - rcu_read_lock(); - task = pid_task(waiter->deadlock_task_pid, PIDTYPE_PID); - if (!task) { - rcu_read_unlock(); - return; - } - - if (!debug_locks_off()) { - rcu_read_unlock(); - return; - } - - printk("\n============================================\n"); - printk( "[ BUG: circular locking deadlock detected! ]\n"); - printk("%s\n", print_tainted()); - printk( "--------------------------------------------\n"); - printk("%s/%d is deadlocking current task %s/%d\n\n", - task->comm, task_pid_nr(task), - current->comm, task_pid_nr(current)); - - printk("\n1) %s/%d is trying to acquire this lock:\n", - current->comm, task_pid_nr(current)); - printk_lock(waiter->lock, 1); - - printk("\n2) %s/%d is blocked on this lock:\n", - task->comm, task_pid_nr(task)); - printk_lock(waiter->deadlock_lock, 1); - - debug_show_held_locks(current); - debug_show_held_locks(task); - - printk("\n%s/%d's [blocked] stackdump:\n\n", - task->comm, task_pid_nr(task)); - show_stack(task, NULL); - printk("\n%s/%d's [current] stackdump:\n\n", - current->comm, task_pid_nr(current)); - dump_stack(); - debug_show_all_locks(); - rcu_read_unlock(); - - printk("[ turning off deadlock detection." - "Please report this trace. ]\n\n"); -} - -void debug_rt_mutex_lock(struct rt_mutex *lock) -{ -} - -void debug_rt_mutex_unlock(struct rt_mutex *lock) -{ - DEBUG_LOCKS_WARN_ON(rt_mutex_owner(lock) != current); -} - -void -debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner) -{ -} - -void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock) -{ - DEBUG_LOCKS_WARN_ON(!rt_mutex_owner(lock)); -} - -void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) -{ - memset(waiter, 0x11, sizeof(*waiter)); - plist_node_init(&waiter->list_entry, MAX_PRIO); - plist_node_init(&waiter->pi_list_entry, MAX_PRIO); - waiter->deadlock_task_pid = NULL; -} - -void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) -{ - put_pid(waiter->deadlock_task_pid); - DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->list_entry)); - DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); - memset(waiter, 0x22, sizeof(*waiter)); -} - -void debug_rt_mutex_init(struct rt_mutex *lock, const char *name) -{ - /* - * Make sure we are not reinitializing a held lock: - */ - debug_check_no_locks_freed((void *)lock, sizeof(*lock)); - lock->name = name; -} - -void -rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task) -{ -} - -void rt_mutex_deadlock_account_unlock(struct task_struct *task) -{ -} - diff --git a/kernel/rtmutex-debug.h b/kernel/rtmutex-debug.h deleted file mode 100644 index 14193d5..0000000 --- a/kernel/rtmutex-debug.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * RT-Mutexes: blocking mutual exclusion locks with PI support - * - * started by Ingo Molnar and Thomas Gleixner: - * - * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2006, Timesys Corp., Thomas Gleixner - * - * This file contains macros used solely by rtmutex.c. Debug version. - */ - -extern void -rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task); -extern void rt_mutex_deadlock_account_unlock(struct task_struct *task); -extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); -extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter); -extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name); -extern void debug_rt_mutex_lock(struct rt_mutex *lock); -extern void debug_rt_mutex_unlock(struct rt_mutex *lock); -extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, - struct task_struct *powner); -extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock); -extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter, - struct rt_mutex *lock); -extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter); -# define debug_rt_mutex_reset_waiter(w) \ - do { (w)->deadlock_lock = NULL; } while (0) - -static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, - int detect) -{ - return (waiter != NULL); -} diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c deleted file mode 100644 index 1d96dd0..0000000 --- a/kernel/rtmutex-tester.c +++ /dev/null @@ -1,420 +0,0 @@ -/* - * RT-Mutex-tester: scriptable tester for rt mutexes - * - * started by Thomas Gleixner: - * - * Copyright (C) 2006, Timesys Corp., Thomas Gleixner - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "rtmutex.h" - -#define MAX_RT_TEST_THREADS 8 -#define MAX_RT_TEST_MUTEXES 8 - -static spinlock_t rttest_lock; -static atomic_t rttest_event; - -struct test_thread_data { - int opcode; - int opdata; - int mutexes[MAX_RT_TEST_MUTEXES]; - int event; - struct device dev; -}; - -static struct test_thread_data thread_data[MAX_RT_TEST_THREADS]; -static struct task_struct *threads[MAX_RT_TEST_THREADS]; -static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES]; - -enum test_opcodes { - RTTEST_NOP = 0, - RTTEST_SCHEDOT, /* 1 Sched other, data = nice */ - RTTEST_SCHEDRT, /* 2 Sched fifo, data = prio */ - RTTEST_LOCK, /* 3 Lock uninterruptible, data = lockindex */ - RTTEST_LOCKNOWAIT, /* 4 Lock uninterruptible no wait in wakeup, data = lockindex */ - RTTEST_LOCKINT, /* 5 Lock interruptible, data = lockindex */ - RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */ - RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */ - RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */ - /* 9, 10 - reserved for BKL commemoration */ - RTTEST_SIGNAL = 11, /* 11 Signal other test thread, data = thread id */ - RTTEST_RESETEVENT = 98, /* 98 Reset event counter */ - RTTEST_RESET = 99, /* 99 Reset all pending operations */ -}; - -static int handle_op(struct test_thread_data *td, int lockwakeup) -{ - int i, id, ret = -EINVAL; - - switch(td->opcode) { - - case RTTEST_NOP: - return 0; - - case RTTEST_LOCKCONT: - td->mutexes[td->opdata] = 1; - td->event = atomic_add_return(1, &rttest_event); - return 0; - - case RTTEST_RESET: - for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) { - if (td->mutexes[i] == 4) { - rt_mutex_unlock(&mutexes[i]); - td->mutexes[i] = 0; - } - } - return 0; - - case RTTEST_RESETEVENT: - atomic_set(&rttest_event, 0); - return 0; - - default: - if (lockwakeup) - return ret; - } - - switch(td->opcode) { - - case RTTEST_LOCK: - case RTTEST_LOCKNOWAIT: - id = td->opdata; - if (id < 0 || id >= MAX_RT_TEST_MUTEXES) - return ret; - - td->mutexes[id] = 1; - td->event = atomic_add_return(1, &rttest_event); - rt_mutex_lock(&mutexes[id]); - td->event = atomic_add_return(1, &rttest_event); - td->mutexes[id] = 4; - return 0; - - case RTTEST_LOCKINT: - case RTTEST_LOCKINTNOWAIT: - id = td->opdata; - if (id < 0 || id >= MAX_RT_TEST_MUTEXES) - return ret; - - td->mutexes[id] = 1; - td->event = atomic_add_return(1, &rttest_event); - ret = rt_mutex_lock_interruptible(&mutexes[id], 0); - td->event = atomic_add_return(1, &rttest_event); - td->mutexes[id] = ret ? 0 : 4; - return ret ? -EINTR : 0; - - case RTTEST_UNLOCK: - id = td->opdata; - if (id < 0 || id >= MAX_RT_TEST_MUTEXES || td->mutexes[id] != 4) - return ret; - - td->event = atomic_add_return(1, &rttest_event); - rt_mutex_unlock(&mutexes[id]); - td->event = atomic_add_return(1, &rttest_event); - td->mutexes[id] = 0; - return 0; - - default: - break; - } - return ret; -} - -/* - * Schedule replacement for rtsem_down(). Only called for threads with - * PF_MUTEX_TESTER set. - * - * This allows us to have finegrained control over the event flow. - * - */ -void schedule_rt_mutex_test(struct rt_mutex *mutex) -{ - int tid, op, dat; - struct test_thread_data *td; - - /* We have to lookup the task */ - for (tid = 0; tid < MAX_RT_TEST_THREADS; tid++) { - if (threads[tid] == current) - break; - } - - BUG_ON(tid == MAX_RT_TEST_THREADS); - - td = &thread_data[tid]; - - op = td->opcode; - dat = td->opdata; - - switch (op) { - case RTTEST_LOCK: - case RTTEST_LOCKINT: - case RTTEST_LOCKNOWAIT: - case RTTEST_LOCKINTNOWAIT: - if (mutex != &mutexes[dat]) - break; - - if (td->mutexes[dat] != 1) - break; - - td->mutexes[dat] = 2; - td->event = atomic_add_return(1, &rttest_event); - break; - - default: - break; - } - - schedule(); - - - switch (op) { - case RTTEST_LOCK: - case RTTEST_LOCKINT: - if (mutex != &mutexes[dat]) - return; - - if (td->mutexes[dat] != 2) - return; - - td->mutexes[dat] = 3; - td->event = atomic_add_return(1, &rttest_event); - break; - - case RTTEST_LOCKNOWAIT: - case RTTEST_LOCKINTNOWAIT: - if (mutex != &mutexes[dat]) - return; - - if (td->mutexes[dat] != 2) - return; - - td->mutexes[dat] = 1; - td->event = atomic_add_return(1, &rttest_event); - return; - - default: - return; - } - - td->opcode = 0; - - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - - if (td->opcode > 0) { - int ret; - - set_current_state(TASK_RUNNING); - ret = handle_op(td, 1); - set_current_state(TASK_INTERRUPTIBLE); - if (td->opcode == RTTEST_LOCKCONT) - break; - td->opcode = ret; - } - - /* Wait for the next command to be executed */ - schedule(); - } - - /* Restore previous command and data */ - td->opcode = op; - td->opdata = dat; -} - -static int test_func(void *data) -{ - struct test_thread_data *td = data; - int ret; - - current->flags |= PF_MUTEX_TESTER; - set_freezable(); - allow_signal(SIGHUP); - - for(;;) { - - set_current_state(TASK_INTERRUPTIBLE); - - if (td->opcode > 0) { - set_current_state(TASK_RUNNING); - ret = handle_op(td, 0); - set_current_state(TASK_INTERRUPTIBLE); - td->opcode = ret; - } - - /* Wait for the next command to be executed */ - schedule(); - try_to_freeze(); - - if (signal_pending(current)) - flush_signals(current); - - if(kthread_should_stop()) - break; - } - return 0; -} - -/** - * sysfs_test_command - interface for test commands - * @dev: thread reference - * @buf: command for actual step - * @count: length of buffer - * - * command syntax: - * - * opcode:data - */ -static ssize_t sysfs_test_command(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct sched_param schedpar; - struct test_thread_data *td; - char cmdbuf[32]; - int op, dat, tid, ret; - - td = container_of(dev, struct test_thread_data, dev); - tid = td->dev.id; - - /* strings from sysfs write are not 0 terminated! */ - if (count >= sizeof(cmdbuf)) - return -EINVAL; - - /* strip of \n: */ - if (buf[count-1] == '\n') - count--; - if (count < 1) - return -EINVAL; - - memcpy(cmdbuf, buf, count); - cmdbuf[count] = 0; - - if (sscanf(cmdbuf, "%d:%d", &op, &dat) != 2) - return -EINVAL; - - switch (op) { - case RTTEST_SCHEDOT: - schedpar.sched_priority = 0; - ret = sched_setscheduler(threads[tid], SCHED_NORMAL, &schedpar); - if (ret) - return ret; - set_user_nice(current, 0); - break; - - case RTTEST_SCHEDRT: - schedpar.sched_priority = dat; - ret = sched_setscheduler(threads[tid], SCHED_FIFO, &schedpar); - if (ret) - return ret; - break; - - case RTTEST_SIGNAL: - send_sig(SIGHUP, threads[tid], 0); - break; - - default: - if (td->opcode > 0) - return -EBUSY; - td->opdata = dat; - td->opcode = op; - wake_up_process(threads[tid]); - } - - return count; -} - -/** - * sysfs_test_status - sysfs interface for rt tester - * @dev: thread to query - * @buf: char buffer to be filled with thread status info - */ -static ssize_t sysfs_test_status(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct test_thread_data *td; - struct task_struct *tsk; - char *curr = buf; - int i; - - td = container_of(dev, struct test_thread_data, dev); - tsk = threads[td->dev.id]; - - spin_lock(&rttest_lock); - - curr += sprintf(curr, - "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:", - td->opcode, td->event, tsk->state, - (MAX_RT_PRIO - 1) - tsk->prio, - (MAX_RT_PRIO - 1) - tsk->normal_prio, - tsk->pi_blocked_on); - - for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--) - curr += sprintf(curr, "%d", td->mutexes[i]); - - spin_unlock(&rttest_lock); - - curr += sprintf(curr, ", T: %p, R: %p\n", tsk, - mutexes[td->dev.id].owner); - - return curr - buf; -} - -static DEVICE_ATTR(status, S_IRUSR, sysfs_test_status, NULL); -static DEVICE_ATTR(command, S_IWUSR, NULL, sysfs_test_command); - -static struct bus_type rttest_subsys = { - .name = "rttest", - .dev_name = "rttest", -}; - -static int init_test_thread(int id) -{ - thread_data[id].dev.bus = &rttest_subsys; - thread_data[id].dev.id = id; - - threads[id] = kthread_run(test_func, &thread_data[id], "rt-test-%d", id); - if (IS_ERR(threads[id])) - return PTR_ERR(threads[id]); - - return device_register(&thread_data[id].dev); -} - -static int init_rttest(void) -{ - int ret, i; - - spin_lock_init(&rttest_lock); - - for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) - rt_mutex_init(&mutexes[i]); - - ret = subsys_system_register(&rttest_subsys, NULL); - if (ret) - return ret; - - for (i = 0; i < MAX_RT_TEST_THREADS; i++) { - ret = init_test_thread(i); - if (ret) - break; - ret = device_create_file(&thread_data[i].dev, &dev_attr_status); - if (ret) - break; - ret = device_create_file(&thread_data[i].dev, &dev_attr_command); - if (ret) - break; - } - - printk("Initializing RT-Tester: %s\n", ret ? "Failed" : "OK" ); - - return ret; -} - -device_initcall(init_rttest); diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c deleted file mode 100644 index 0dd6aec..0000000 --- a/kernel/rtmutex.c +++ /dev/null @@ -1,1060 +0,0 @@ -/* - * RT-Mutexes: simple blocking mutual exclusion locks with PI support - * - * started by Ingo Molnar and Thomas Gleixner. - * - * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner - * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt - * Copyright (C) 2006 Esben Nielsen - * - * See Documentation/rt-mutex-design.txt for details. - */ -#include -#include -#include -#include -#include - -#include "rtmutex_common.h" - -/* - * lock->owner state tracking: - * - * lock->owner holds the task_struct pointer of the owner. Bit 0 - * is used to keep track of the "lock has waiters" state. - * - * owner bit0 - * NULL 0 lock is free (fast acquire possible) - * NULL 1 lock is free and has waiters and the top waiter - * is going to take the lock* - * taskpointer 0 lock is held (fast release possible) - * taskpointer 1 lock is held and has waiters** - * - * The fast atomic compare exchange based acquire and release is only - * possible when bit 0 of lock->owner is 0. - * - * (*) It also can be a transitional state when grabbing the lock - * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, - * we need to set the bit0 before looking at the lock, and the owner may be - * NULL in this small time, hence this can be a transitional state. - * - * (**) There is a small time when bit 0 is set but there are no - * waiters. This can happen when grabbing the lock in the slow path. - * To prevent a cmpxchg of the owner releasing the lock, we need to - * set this bit before looking at the lock. - */ - -static void -rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner) -{ - unsigned long val = (unsigned long)owner; - - if (rt_mutex_has_waiters(lock)) - val |= RT_MUTEX_HAS_WAITERS; - - lock->owner = (struct task_struct *)val; -} - -static inline void clear_rt_mutex_waiters(struct rt_mutex *lock) -{ - lock->owner = (struct task_struct *) - ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); -} - -static void fixup_rt_mutex_waiters(struct rt_mutex *lock) -{ - if (!rt_mutex_has_waiters(lock)) - clear_rt_mutex_waiters(lock); -} - -/* - * We can speed up the acquire/release, if the architecture - * supports cmpxchg and if there's no debugging state to be set up - */ -#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES) -# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c) -static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) -{ - unsigned long owner, *p = (unsigned long *) &lock->owner; - - do { - owner = *p; - } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); -} -#else -# define rt_mutex_cmpxchg(l,c,n) (0) -static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) -{ - lock->owner = (struct task_struct *) - ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); -} -#endif - -/* - * Calculate task priority from the waiter list priority - * - * Return task->normal_prio when the waiter list is empty or when - * the waiter is not allowed to do priority boosting - */ -int rt_mutex_getprio(struct task_struct *task) -{ - if (likely(!task_has_pi_waiters(task))) - return task->normal_prio; - - return min(task_top_pi_waiter(task)->pi_list_entry.prio, - task->normal_prio); -} - -/* - * Adjust the priority of a task, after its pi_waiters got modified. - * - * This can be both boosting and unboosting. task->pi_lock must be held. - */ -static void __rt_mutex_adjust_prio(struct task_struct *task) -{ - int prio = rt_mutex_getprio(task); - - if (task->prio != prio) - rt_mutex_setprio(task, prio); -} - -/* - * Adjust task priority (undo boosting). Called from the exit path of - * rt_mutex_slowunlock() and rt_mutex_slowlock(). - * - * (Note: We do this outside of the protection of lock->wait_lock to - * allow the lock to be taken while or before we readjust the priority - * of task. We do not use the spin_xx_mutex() variants here as we are - * outside of the debug path.) - */ -static void rt_mutex_adjust_prio(struct task_struct *task) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&task->pi_lock, flags); - __rt_mutex_adjust_prio(task); - raw_spin_unlock_irqrestore(&task->pi_lock, flags); -} - -/* - * Max number of times we'll walk the boosting chain: - */ -int max_lock_depth = 1024; - -/* - * Adjust the priority chain. Also used for deadlock detection. - * Decreases task's usage by one - may thus free the task. - * - * @task: the task owning the mutex (owner) for which a chain walk is probably - * needed - * @deadlock_detect: do we have to carry out deadlock detection? - * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck - * things for a task that has just got its priority adjusted, and - * is waiting on a mutex) - * @orig_waiter: rt_mutex_waiter struct for the task that has just donated - * its priority to the mutex owner (can be NULL in the case - * depicted above or if the top waiter is gone away and we are - * actually deboosting the owner) - * @top_task: the current top waiter - * - * Returns 0 or -EDEADLK. - */ -static int rt_mutex_adjust_prio_chain(struct task_struct *task, - int deadlock_detect, - struct rt_mutex *orig_lock, - struct rt_mutex_waiter *orig_waiter, - struct task_struct *top_task) -{ - struct rt_mutex *lock; - struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; - int detect_deadlock, ret = 0, depth = 0; - unsigned long flags; - - detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter, - deadlock_detect); - - /* - * The (de)boosting is a step by step approach with a lot of - * pitfalls. We want this to be preemptible and we want hold a - * maximum of two locks per step. So we have to check - * carefully whether things change under us. - */ - again: - if (++depth > max_lock_depth) { - static int prev_max; - - /* - * Print this only once. If the admin changes the limit, - * print a new message when reaching the limit again. - */ - if (prev_max != max_lock_depth) { - prev_max = max_lock_depth; - printk(KERN_WARNING "Maximum lock depth %d reached " - "task: %s (%d)\n", max_lock_depth, - top_task->comm, task_pid_nr(top_task)); - } - put_task_struct(task); - - return deadlock_detect ? -EDEADLK : 0; - } - retry: - /* - * Task can not go away as we did a get_task() before ! - */ - raw_spin_lock_irqsave(&task->pi_lock, flags); - - waiter = task->pi_blocked_on; - /* - * Check whether the end of the boosting chain has been - * reached or the state of the chain has changed while we - * dropped the locks. - */ - if (!waiter) - goto out_unlock_pi; - - /* - * Check the orig_waiter state. After we dropped the locks, - * the previous owner of the lock might have released the lock. - */ - if (orig_waiter && !rt_mutex_owner(orig_lock)) - goto out_unlock_pi; - - /* - * Drop out, when the task has no waiters. Note, - * top_waiter can be NULL, when we are in the deboosting - * mode! - */ - if (top_waiter && (!task_has_pi_waiters(task) || - top_waiter != task_top_pi_waiter(task))) - goto out_unlock_pi; - - /* - * When deadlock detection is off then we check, if further - * priority adjustment is necessary. - */ - if (!detect_deadlock && waiter->list_entry.prio == task->prio) - goto out_unlock_pi; - - lock = waiter->lock; - if (!raw_spin_trylock(&lock->wait_lock)) { - raw_spin_unlock_irqrestore(&task->pi_lock, flags); - cpu_relax(); - goto retry; - } - - /* Deadlock detection */ - if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { - debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); - raw_spin_unlock(&lock->wait_lock); - ret = deadlock_detect ? -EDEADLK : 0; - goto out_unlock_pi; - } - - top_waiter = rt_mutex_top_waiter(lock); - - /* Requeue the waiter */ - plist_del(&waiter->list_entry, &lock->wait_list); - waiter->list_entry.prio = task->prio; - plist_add(&waiter->list_entry, &lock->wait_list); - - /* Release the task */ - raw_spin_unlock_irqrestore(&task->pi_lock, flags); - if (!rt_mutex_owner(lock)) { - /* - * If the requeue above changed the top waiter, then we need - * to wake the new top waiter up to try to get the lock. - */ - - if (top_waiter != rt_mutex_top_waiter(lock)) - wake_up_process(rt_mutex_top_waiter(lock)->task); - raw_spin_unlock(&lock->wait_lock); - goto out_put_task; - } - put_task_struct(task); - - /* Grab the next task */ - task = rt_mutex_owner(lock); - get_task_struct(task); - raw_spin_lock_irqsave(&task->pi_lock, flags); - - if (waiter == rt_mutex_top_waiter(lock)) { - /* Boost the owner */ - plist_del(&top_waiter->pi_list_entry, &task->pi_waiters); - waiter->pi_list_entry.prio = waiter->list_entry.prio; - plist_add(&waiter->pi_list_entry, &task->pi_waiters); - __rt_mutex_adjust_prio(task); - - } else if (top_waiter == waiter) { - /* Deboost the owner */ - plist_del(&waiter->pi_list_entry, &task->pi_waiters); - waiter = rt_mutex_top_waiter(lock); - waiter->pi_list_entry.prio = waiter->list_entry.prio; - plist_add(&waiter->pi_list_entry, &task->pi_waiters); - __rt_mutex_adjust_prio(task); - } - - raw_spin_unlock_irqrestore(&task->pi_lock, flags); - - top_waiter = rt_mutex_top_waiter(lock); - raw_spin_unlock(&lock->wait_lock); - - if (!detect_deadlock && waiter != top_waiter) - goto out_put_task; - - goto again; - - out_unlock_pi: - raw_spin_unlock_irqrestore(&task->pi_lock, flags); - out_put_task: - put_task_struct(task); - - return ret; -} - -/* - * Try to take an rt-mutex - * - * Must be called with lock->wait_lock held. - * - * @lock: the lock to be acquired. - * @task: the task which wants to acquire the lock - * @waiter: the waiter that is queued to the lock's wait list. (could be NULL) - */ -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, - struct rt_mutex_waiter *waiter) -{ - /* - * We have to be careful here if the atomic speedups are - * enabled, such that, when - * - no other waiter is on the lock - * - the lock has been released since we did the cmpxchg - * the lock can be released or taken while we are doing the - * checks and marking the lock with RT_MUTEX_HAS_WAITERS. - * - * The atomic acquire/release aware variant of - * mark_rt_mutex_waiters uses a cmpxchg loop. After setting - * the WAITERS bit, the atomic release / acquire can not - * happen anymore and lock->wait_lock protects us from the - * non-atomic case. - * - * Note, that this might set lock->owner = - * RT_MUTEX_HAS_WAITERS in the case the lock is not contended - * any more. This is fixed up when we take the ownership. - * This is the transitional state explained at the top of this file. - */ - mark_rt_mutex_waiters(lock); - - if (rt_mutex_owner(lock)) - return 0; - - /* - * It will get the lock because of one of these conditions: - * 1) there is no waiter - * 2) higher priority than waiters - * 3) it is top waiter - */ - if (rt_mutex_has_waiters(lock)) { - if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) { - if (!waiter || waiter != rt_mutex_top_waiter(lock)) - return 0; - } - } - - if (waiter || rt_mutex_has_waiters(lock)) { - unsigned long flags; - struct rt_mutex_waiter *top; - - raw_spin_lock_irqsave(&task->pi_lock, flags); - - /* remove the queued waiter. */ - if (waiter) { - plist_del(&waiter->list_entry, &lock->wait_list); - task->pi_blocked_on = NULL; - } - - /* - * We have to enqueue the top waiter(if it exists) into - * task->pi_waiters list. - */ - if (rt_mutex_has_waiters(lock)) { - top = rt_mutex_top_waiter(lock); - top->pi_list_entry.prio = top->list_entry.prio; - plist_add(&top->pi_list_entry, &task->pi_waiters); - } - raw_spin_unlock_irqrestore(&task->pi_lock, flags); - } - - /* We got the lock. */ - debug_rt_mutex_lock(lock); - - rt_mutex_set_owner(lock, task); - - rt_mutex_deadlock_account_lock(lock, task); - - return 1; -} - -/* - * Task blocks on lock. - * - * Prepare waiter and propagate pi chain - * - * This must be called with lock->wait_lock held. - */ -static int task_blocks_on_rt_mutex(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter, - struct task_struct *task, - int detect_deadlock) -{ - struct task_struct *owner = rt_mutex_owner(lock); - struct rt_mutex_waiter *top_waiter = waiter; - unsigned long flags; - int chain_walk = 0, res; - - raw_spin_lock_irqsave(&task->pi_lock, flags); - __rt_mutex_adjust_prio(task); - waiter->task = task; - waiter->lock = lock; - plist_node_init(&waiter->list_entry, task->prio); - plist_node_init(&waiter->pi_list_entry, task->prio); - - /* Get the top priority waiter on the lock */ - if (rt_mutex_has_waiters(lock)) - top_waiter = rt_mutex_top_waiter(lock); - plist_add(&waiter->list_entry, &lock->wait_list); - - task->pi_blocked_on = waiter; - - raw_spin_unlock_irqrestore(&task->pi_lock, flags); - - if (!owner) - return 0; - - if (waiter == rt_mutex_top_waiter(lock)) { - raw_spin_lock_irqsave(&owner->pi_lock, flags); - plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); - plist_add(&waiter->pi_list_entry, &owner->pi_waiters); - - __rt_mutex_adjust_prio(owner); - if (owner->pi_blocked_on) - chain_walk = 1; - raw_spin_unlock_irqrestore(&owner->pi_lock, flags); - } - else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) - chain_walk = 1; - - if (!chain_walk) - return 0; - - /* - * The owner can't disappear while holding a lock, - * so the owner struct is protected by wait_lock. - * Gets dropped in rt_mutex_adjust_prio_chain()! - */ - get_task_struct(owner); - - raw_spin_unlock(&lock->wait_lock); - - res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, - task); - - raw_spin_lock(&lock->wait_lock); - - return res; -} - -/* - * Wake up the next waiter on the lock. - * - * Remove the top waiter from the current tasks waiter list and wake it up. - * - * Called with lock->wait_lock held. - */ -static void wakeup_next_waiter(struct rt_mutex *lock) -{ - struct rt_mutex_waiter *waiter; - unsigned long flags; - - raw_spin_lock_irqsave(¤t->pi_lock, flags); - - waiter = rt_mutex_top_waiter(lock); - - /* - * Remove it from current->pi_waiters. We do not adjust a - * possible priority boost right now. We execute wakeup in the - * boosted mode and go back to normal after releasing - * lock->wait_lock. - */ - plist_del(&waiter->pi_list_entry, ¤t->pi_waiters); - - rt_mutex_set_owner(lock, NULL); - - raw_spin_unlock_irqrestore(¤t->pi_lock, flags); - - wake_up_process(waiter->task); -} - -/* - * Remove a waiter from a lock and give up - * - * Must be called with lock->wait_lock held and - * have just failed to try_to_take_rt_mutex(). - */ -static void remove_waiter(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter) -{ - int first = (waiter == rt_mutex_top_waiter(lock)); - struct task_struct *owner = rt_mutex_owner(lock); - unsigned long flags; - int chain_walk = 0; - - raw_spin_lock_irqsave(¤t->pi_lock, flags); - plist_del(&waiter->list_entry, &lock->wait_list); - current->pi_blocked_on = NULL; - raw_spin_unlock_irqrestore(¤t->pi_lock, flags); - - if (!owner) - return; - - if (first) { - - raw_spin_lock_irqsave(&owner->pi_lock, flags); - - plist_del(&waiter->pi_list_entry, &owner->pi_waiters); - - if (rt_mutex_has_waiters(lock)) { - struct rt_mutex_waiter *next; - - next = rt_mutex_top_waiter(lock); - plist_add(&next->pi_list_entry, &owner->pi_waiters); - } - __rt_mutex_adjust_prio(owner); - - if (owner->pi_blocked_on) - chain_walk = 1; - - raw_spin_unlock_irqrestore(&owner->pi_lock, flags); - } - - WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); - - if (!chain_walk) - return; - - /* gets dropped in rt_mutex_adjust_prio_chain()! */ - get_task_struct(owner); - - raw_spin_unlock(&lock->wait_lock); - - rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); - - raw_spin_lock(&lock->wait_lock); -} - -/* - * Recheck the pi chain, in case we got a priority setting - * - * Called from sched_setscheduler - */ -void rt_mutex_adjust_pi(struct task_struct *task) -{ - struct rt_mutex_waiter *waiter; - unsigned long flags; - - raw_spin_lock_irqsave(&task->pi_lock, flags); - - waiter = task->pi_blocked_on; - if (!waiter || waiter->list_entry.prio == task->prio) { - raw_spin_unlock_irqrestore(&task->pi_lock, flags); - return; - } - - raw_spin_unlock_irqrestore(&task->pi_lock, flags); - - /* gets dropped in rt_mutex_adjust_prio_chain()! */ - get_task_struct(task); - rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); -} - -/** - * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop - * @lock: the rt_mutex to take - * @state: the state the task should block in (TASK_INTERRUPTIBLE - * or TASK_UNINTERRUPTIBLE) - * @timeout: the pre-initialized and started timer, or NULL for none - * @waiter: the pre-initialized rt_mutex_waiter - * - * lock->wait_lock must be held by the caller. - */ -static int __sched -__rt_mutex_slowlock(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, - struct rt_mutex_waiter *waiter) -{ - int ret = 0; - - for (;;) { - /* Try to acquire the lock: */ - if (try_to_take_rt_mutex(lock, current, waiter)) - break; - - /* - * TASK_INTERRUPTIBLE checks for signals and - * timeout. Ignored otherwise. - */ - if (unlikely(state == TASK_INTERRUPTIBLE)) { - /* Signal pending? */ - if (signal_pending(current)) - ret = -EINTR; - if (timeout && !timeout->task) - ret = -ETIMEDOUT; - if (ret) - break; - } - - raw_spin_unlock(&lock->wait_lock); - - debug_rt_mutex_print_deadlock(waiter); - - schedule_rt_mutex(lock); - - raw_spin_lock(&lock->wait_lock); - set_current_state(state); - } - - return ret; -} - -/* - * Slow path lock function: - */ -static int __sched -rt_mutex_slowlock(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, - int detect_deadlock) -{ - struct rt_mutex_waiter waiter; - int ret = 0; - - debug_rt_mutex_init_waiter(&waiter); - - raw_spin_lock(&lock->wait_lock); - - /* Try to acquire the lock again: */ - if (try_to_take_rt_mutex(lock, current, NULL)) { - raw_spin_unlock(&lock->wait_lock); - return 0; - } - - set_current_state(state); - - /* Setup the timer, when timeout != NULL */ - if (unlikely(timeout)) { - hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); - if (!hrtimer_active(&timeout->timer)) - timeout->task = NULL; - } - - ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock); - - if (likely(!ret)) - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); - - set_current_state(TASK_RUNNING); - - if (unlikely(ret)) - remove_waiter(lock, &waiter); - - /* - * try_to_take_rt_mutex() sets the waiter bit - * unconditionally. We might have to fix that up. - */ - fixup_rt_mutex_waiters(lock); - - raw_spin_unlock(&lock->wait_lock); - - /* Remove pending timer: */ - if (unlikely(timeout)) - hrtimer_cancel(&timeout->timer); - - debug_rt_mutex_free_waiter(&waiter); - - return ret; -} - -/* - * Slow path try-lock function: - */ -static inline int -rt_mutex_slowtrylock(struct rt_mutex *lock) -{ - int ret = 0; - - raw_spin_lock(&lock->wait_lock); - - if (likely(rt_mutex_owner(lock) != current)) { - - ret = try_to_take_rt_mutex(lock, current, NULL); - /* - * try_to_take_rt_mutex() sets the lock waiters - * bit unconditionally. Clean this up. - */ - fixup_rt_mutex_waiters(lock); - } - - raw_spin_unlock(&lock->wait_lock); - - return ret; -} - -/* - * Slow path to release a rt-mutex: - */ -static void __sched -rt_mutex_slowunlock(struct rt_mutex *lock) -{ - raw_spin_lock(&lock->wait_lock); - - debug_rt_mutex_unlock(lock); - - rt_mutex_deadlock_account_unlock(current); - - if (!rt_mutex_has_waiters(lock)) { - lock->owner = NULL; - raw_spin_unlock(&lock->wait_lock); - return; - } - - wakeup_next_waiter(lock); - - raw_spin_unlock(&lock->wait_lock); - - /* Undo pi boosting if necessary: */ - rt_mutex_adjust_prio(current); -} - -/* - * debug aware fast / slowpath lock,trylock,unlock - * - * The atomic acquire/release ops are compiled away, when either the - * architecture does not support cmpxchg or when debugging is enabled. - */ -static inline int -rt_mutex_fastlock(struct rt_mutex *lock, int state, - int detect_deadlock, - int (*slowfn)(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, - int detect_deadlock)) -{ - if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { - rt_mutex_deadlock_account_lock(lock, current); - return 0; - } else - return slowfn(lock, state, NULL, detect_deadlock); -} - -static inline int -rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, int detect_deadlock, - int (*slowfn)(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, - int detect_deadlock)) -{ - if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { - rt_mutex_deadlock_account_lock(lock, current); - return 0; - } else - return slowfn(lock, state, timeout, detect_deadlock); -} - -static inline int -rt_mutex_fasttrylock(struct rt_mutex *lock, - int (*slowfn)(struct rt_mutex *lock)) -{ - if (likely(rt_mutex_cmpxchg(lock, NULL, current))) { - rt_mutex_deadlock_account_lock(lock, current); - return 1; - } - return slowfn(lock); -} - -static inline void -rt_mutex_fastunlock(struct rt_mutex *lock, - void (*slowfn)(struct rt_mutex *lock)) -{ - if (likely(rt_mutex_cmpxchg(lock, current, NULL))) - rt_mutex_deadlock_account_unlock(current); - else - slowfn(lock); -} - -/** - * rt_mutex_lock - lock a rt_mutex - * - * @lock: the rt_mutex to be locked - */ -void __sched rt_mutex_lock(struct rt_mutex *lock) -{ - might_sleep(); - - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock); -} -EXPORT_SYMBOL_GPL(rt_mutex_lock); - -/** - * rt_mutex_lock_interruptible - lock a rt_mutex interruptible - * - * @lock: the rt_mutex to be locked - * @detect_deadlock: deadlock detection on/off - * - * Returns: - * 0 on success - * -EINTR when interrupted by a signal - * -EDEADLK when the lock would deadlock (when deadlock detection is on) - */ -int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock, - int detect_deadlock) -{ - might_sleep(); - - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, - detect_deadlock, rt_mutex_slowlock); -} -EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); - -/** - * rt_mutex_timed_lock - lock a rt_mutex interruptible - * the timeout structure is provided - * by the caller - * - * @lock: the rt_mutex to be locked - * @timeout: timeout structure or NULL (no timeout) - * @detect_deadlock: deadlock detection on/off - * - * Returns: - * 0 on success - * -EINTR when interrupted by a signal - * -ETIMEDOUT when the timeout expired - * -EDEADLK when the lock would deadlock (when deadlock detection is on) - */ -int -rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout, - int detect_deadlock) -{ - might_sleep(); - - return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, - detect_deadlock, rt_mutex_slowlock); -} -EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); - -/** - * rt_mutex_trylock - try to lock a rt_mutex - * - * @lock: the rt_mutex to be locked - * - * Returns 1 on success and 0 on contention - */ -int __sched rt_mutex_trylock(struct rt_mutex *lock) -{ - return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); -} -EXPORT_SYMBOL_GPL(rt_mutex_trylock); - -/** - * rt_mutex_unlock - unlock a rt_mutex - * - * @lock: the rt_mutex to be unlocked - */ -void __sched rt_mutex_unlock(struct rt_mutex *lock) -{ - rt_mutex_fastunlock(lock, rt_mutex_slowunlock); -} -EXPORT_SYMBOL_GPL(rt_mutex_unlock); - -/** - * rt_mutex_destroy - mark a mutex unusable - * @lock: the mutex to be destroyed - * - * This function marks the mutex uninitialized, and any subsequent - * use of the mutex is forbidden. The mutex must not be locked when - * this function is called. - */ -void rt_mutex_destroy(struct rt_mutex *lock) -{ - WARN_ON(rt_mutex_is_locked(lock)); -#ifdef CONFIG_DEBUG_RT_MUTEXES - lock->magic = NULL; -#endif -} - -EXPORT_SYMBOL_GPL(rt_mutex_destroy); - -/** - * __rt_mutex_init - initialize the rt lock - * - * @lock: the rt lock to be initialized - * - * Initialize the rt lock to unlocked state. - * - * Initializing of a locked rt lock is not allowed - */ -void __rt_mutex_init(struct rt_mutex *lock, const char *name) -{ - lock->owner = NULL; - raw_spin_lock_init(&lock->wait_lock); - plist_head_init(&lock->wait_list); - - debug_rt_mutex_init(lock, name); -} -EXPORT_SYMBOL_GPL(__rt_mutex_init); - -/** - * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a - * proxy owner - * - * @lock: the rt_mutex to be locked - * @proxy_owner:the task to set as owner - * - * No locking. Caller has to do serializing itself - * Special API call for PI-futex support - */ -void rt_mutex_init_proxy_locked(struct rt_mutex *lock, - struct task_struct *proxy_owner) -{ - __rt_mutex_init(lock, NULL); - debug_rt_mutex_proxy_lock(lock, proxy_owner); - rt_mutex_set_owner(lock, proxy_owner); - rt_mutex_deadlock_account_lock(lock, proxy_owner); -} - -/** - * rt_mutex_proxy_unlock - release a lock on behalf of owner - * - * @lock: the rt_mutex to be locked - * - * No locking. Caller has to do serializing itself - * Special API call for PI-futex support - */ -void rt_mutex_proxy_unlock(struct rt_mutex *lock, - struct task_struct *proxy_owner) -{ - debug_rt_mutex_proxy_unlock(lock); - rt_mutex_set_owner(lock, NULL); - rt_mutex_deadlock_account_unlock(proxy_owner); -} - -/** - * rt_mutex_start_proxy_lock() - Start lock acquisition for another task - * @lock: the rt_mutex to take - * @waiter: the pre-initialized rt_mutex_waiter - * @task: the task to prepare - * @detect_deadlock: perform deadlock detection (1) or not (0) - * - * Returns: - * 0 - task blocked on lock - * 1 - acquired the lock for task, caller should wake it up - * <0 - error - * - * Special API call for FUTEX_REQUEUE_PI support. - */ -int rt_mutex_start_proxy_lock(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter, - struct task_struct *task, int detect_deadlock) -{ - int ret; - - raw_spin_lock(&lock->wait_lock); - - if (try_to_take_rt_mutex(lock, task, NULL)) { - raw_spin_unlock(&lock->wait_lock); - return 1; - } - - ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); - - if (ret && !rt_mutex_owner(lock)) { - /* - * Reset the return value. We might have - * returned with -EDEADLK and the owner - * released the lock while we were walking the - * pi chain. Let the waiter sort it out. - */ - ret = 0; - } - - if (unlikely(ret)) - remove_waiter(lock, waiter); - - raw_spin_unlock(&lock->wait_lock); - - debug_rt_mutex_print_deadlock(waiter); - - return ret; -} - -/** - * rt_mutex_next_owner - return the next owner of the lock - * - * @lock: the rt lock query - * - * Returns the next owner of the lock or NULL - * - * Caller has to serialize against other accessors to the lock - * itself. - * - * Special API call for PI-futex support - */ -struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) -{ - if (!rt_mutex_has_waiters(lock)) - return NULL; - - return rt_mutex_top_waiter(lock)->task; -} - -/** - * rt_mutex_finish_proxy_lock() - Complete lock acquisition - * @lock: the rt_mutex we were woken on - * @to: the timeout, null if none. hrtimer should already have - * been started. - * @waiter: the pre-initialized rt_mutex_waiter - * @detect_deadlock: perform deadlock detection (1) or not (0) - * - * Complete the lock acquisition started our behalf by another thread. - * - * Returns: - * 0 - success - * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK - * - * Special API call for PI-futex requeue support - */ -int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, - struct hrtimer_sleeper *to, - struct rt_mutex_waiter *waiter, - int detect_deadlock) -{ - int ret; - - raw_spin_lock(&lock->wait_lock); - - set_current_state(TASK_INTERRUPTIBLE); - - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); - - set_current_state(TASK_RUNNING); - - if (unlikely(ret)) - remove_waiter(lock, waiter); - - /* - * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might - * have to fix that up. - */ - fixup_rt_mutex_waiters(lock); - - raw_spin_unlock(&lock->wait_lock); - - return ret; -} diff --git a/kernel/rtmutex.h b/kernel/rtmutex.h deleted file mode 100644 index a1a1dd0..0000000 --- a/kernel/rtmutex.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * RT-Mutexes: blocking mutual exclusion locks with PI support - * - * started by Ingo Molnar and Thomas Gleixner: - * - * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2006, Timesys Corp., Thomas Gleixner - * - * This file contains macros used solely by rtmutex.c. - * Non-debug version. - */ - -#define rt_mutex_deadlock_check(l) (0) -#define rt_mutex_deadlock_account_lock(m, t) do { } while (0) -#define rt_mutex_deadlock_account_unlock(l) do { } while (0) -#define debug_rt_mutex_init_waiter(w) do { } while (0) -#define debug_rt_mutex_free_waiter(w) do { } while (0) -#define debug_rt_mutex_lock(l) do { } while (0) -#define debug_rt_mutex_proxy_lock(l,p) do { } while (0) -#define debug_rt_mutex_proxy_unlock(l) do { } while (0) -#define debug_rt_mutex_unlock(l) do { } while (0) -#define debug_rt_mutex_init(m, n) do { } while (0) -#define debug_rt_mutex_deadlock(d, a ,l) do { } while (0) -#define debug_rt_mutex_print_deadlock(w) do { } while (0) -#define debug_rt_mutex_detect_deadlock(w,d) (d) -#define debug_rt_mutex_reset_waiter(w) do { } while (0) diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h deleted file mode 100644 index 53a66c8..0000000 --- a/kernel/rtmutex_common.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * RT Mutexes: blocking mutual exclusion locks with PI support - * - * started by Ingo Molnar and Thomas Gleixner: - * - * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2006, Timesys Corp., Thomas Gleixner - * - * This file contains the private data structure and API definitions. - */ - -#ifndef __KERNEL_RTMUTEX_COMMON_H -#define __KERNEL_RTMUTEX_COMMON_H - -#include - -/* - * The rtmutex in kernel tester is independent of rtmutex debugging. We - * call schedule_rt_mutex_test() instead of schedule() for the tasks which - * belong to the tester. That way we can delay the wakeup path of those - * threads to provoke lock stealing and testing of complex boosting scenarios. - */ -#ifdef CONFIG_RT_MUTEX_TESTER - -extern void schedule_rt_mutex_test(struct rt_mutex *lock); - -#define schedule_rt_mutex(_lock) \ - do { \ - if (!(current->flags & PF_MUTEX_TESTER)) \ - schedule(); \ - else \ - schedule_rt_mutex_test(_lock); \ - } while (0) - -#else -# define schedule_rt_mutex(_lock) schedule() -#endif - -/* - * This is the control structure for tasks blocked on a rt_mutex, - * which is allocated on the kernel stack on of the blocked task. - * - * @list_entry: pi node to enqueue into the mutex waiters list - * @pi_list_entry: pi node to enqueue into the mutex owner waiters list - * @task: task reference to the blocked task - */ -struct rt_mutex_waiter { - struct plist_node list_entry; - struct plist_node pi_list_entry; - struct task_struct *task; - struct rt_mutex *lock; -#ifdef CONFIG_DEBUG_RT_MUTEXES - unsigned long ip; - struct pid *deadlock_task_pid; - struct rt_mutex *deadlock_lock; -#endif -}; - -/* - * Various helpers to access the waiters-plist: - */ -static inline int rt_mutex_has_waiters(struct rt_mutex *lock) -{ - return !plist_head_empty(&lock->wait_list); -} - -static inline struct rt_mutex_waiter * -rt_mutex_top_waiter(struct rt_mutex *lock) -{ - struct rt_mutex_waiter *w; - - w = plist_first_entry(&lock->wait_list, struct rt_mutex_waiter, - list_entry); - BUG_ON(w->lock != lock); - - return w; -} - -static inline int task_has_pi_waiters(struct task_struct *p) -{ - return !plist_head_empty(&p->pi_waiters); -} - -static inline struct rt_mutex_waiter * -task_top_pi_waiter(struct task_struct *p) -{ - return plist_first_entry(&p->pi_waiters, struct rt_mutex_waiter, - pi_list_entry); -} - -/* - * lock->owner state tracking: - */ -#define RT_MUTEX_HAS_WAITERS 1UL -#define RT_MUTEX_OWNER_MASKALL 1UL - -static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) -{ - return (struct task_struct *) - ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL); -} - -/* - * PI-futex support (proxy locking functions, etc.): - */ -extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); -extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, - struct task_struct *proxy_owner); -extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, - struct task_struct *proxy_owner); -extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter, - struct task_struct *task, - int detect_deadlock); -extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, - struct hrtimer_sleeper *to, - struct rt_mutex_waiter *waiter, - int detect_deadlock); - -#ifdef CONFIG_DEBUG_RT_MUTEXES -# include "rtmutex-debug.h" -#else -# include "rtmutex.h" -#endif - -#endif -- cgit v1.1 From ed428bfc3caaa4b1e6cd15ea12c90c30291903f0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 18:19:28 +0100 Subject: locking: Move the rwsem code to kernel/locking/ Notably: changed lib/rwsem* targets from lib- to obj-, no idea about the ramifications of that. Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-g0kynfh5feriwc6p3h6kpbw6@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/Makefile | 2 +- kernel/locking/Makefile | 4 +- kernel/locking/rwsem-spinlock.c | 296 ++++++++++++++++++++++++++++++++++++++++ kernel/locking/rwsem-xadd.c | 293 +++++++++++++++++++++++++++++++++++++++ kernel/locking/rwsem.c | 157 +++++++++++++++++++++ kernel/rwsem.c | 157 --------------------- lib/Makefile | 2 - lib/rwsem-spinlock.c | 296 ---------------------------------------- lib/rwsem.c | 293 --------------------------------------- 9 files changed, 750 insertions(+), 750 deletions(-) create mode 100644 kernel/locking/rwsem-spinlock.c create mode 100644 kernel/locking/rwsem-xadd.c create mode 100644 kernel/locking/rwsem.c delete mode 100644 kernel/rwsem.c delete mode 100644 lib/rwsem-spinlock.c delete mode 100644 lib/rwsem.c diff --git a/kernel/Makefile b/kernel/Makefile index 9c2ad18..1aef002 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -8,7 +8,7 @@ obj-y = fork.o exec_domain.o panic.o \ signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ extable.o params.o posix-timers.o \ kthread.o sys_ni.o posix-cpu-timers.o \ - hrtimer.o rwsem.o nsproxy.o \ + hrtimer.o nsproxy.o \ notifier.o ksysfs.o cred.o reboot.o \ async.o range.o groups.o lglock.o smpboot.o diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 59f66de..b0e0d73 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -1,5 +1,5 @@ -obj-y += mutex.o semaphore.o +obj-y += mutex.o semaphore.o rwsem.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_lockdep.o = -pg @@ -20,3 +20,5 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o +obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o +obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o \ No newline at end of file diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c new file mode 100644 index 0000000..9be8a91 --- /dev/null +++ b/kernel/locking/rwsem-spinlock.c @@ -0,0 +1,296 @@ +/* rwsem-spinlock.c: R/W semaphores: contention handling functions for + * generic spinlock implementation + * + * Copyright (c) 2001 David Howells (dhowells@redhat.com). + * - Derived partially from idea by Andrea Arcangeli + * - Derived also from comments by Linus + */ +#include +#include +#include + +enum rwsem_waiter_type { + RWSEM_WAITING_FOR_WRITE, + RWSEM_WAITING_FOR_READ +}; + +struct rwsem_waiter { + struct list_head list; + struct task_struct *task; + enum rwsem_waiter_type type; +}; + +int rwsem_is_locked(struct rw_semaphore *sem) +{ + int ret = 1; + unsigned long flags; + + if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { + ret = (sem->activity != 0); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + } + return ret; +} +EXPORT_SYMBOL(rwsem_is_locked); + +/* + * initialise the semaphore + */ +void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held semaphore: + */ + debug_check_no_locks_freed((void *)sem, sizeof(*sem)); + lockdep_init_map(&sem->dep_map, name, key, 0); +#endif + sem->activity = 0; + raw_spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); +} +EXPORT_SYMBOL(__init_rwsem); + +/* + * handle the lock release when processes blocked on it that can now run + * - if we come here, then: + * - the 'active count' _reached_ zero + * - the 'waiting count' is non-zero + * - the spinlock must be held by the caller + * - woken process blocks are discarded from the list after having task zeroed + * - writers are only woken if wakewrite is non-zero + */ +static inline struct rw_semaphore * +__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) +{ + struct rwsem_waiter *waiter; + struct task_struct *tsk; + int woken; + + waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); + + if (waiter->type == RWSEM_WAITING_FOR_WRITE) { + if (wakewrite) + /* Wake up a writer. Note that we do not grant it the + * lock - it will have to acquire it when it runs. */ + wake_up_process(waiter->task); + goto out; + } + + /* grant an infinite number of read locks to the front of the queue */ + woken = 0; + do { + struct list_head *next = waiter->list.next; + + list_del(&waiter->list); + tsk = waiter->task; + smp_mb(); + waiter->task = NULL; + wake_up_process(tsk); + put_task_struct(tsk); + woken++; + if (next == &sem->wait_list) + break; + waiter = list_entry(next, struct rwsem_waiter, list); + } while (waiter->type != RWSEM_WAITING_FOR_WRITE); + + sem->activity += woken; + + out: + return sem; +} + +/* + * wake a single writer + */ +static inline struct rw_semaphore * +__rwsem_wake_one_writer(struct rw_semaphore *sem) +{ + struct rwsem_waiter *waiter; + + waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); + wake_up_process(waiter->task); + + return sem; +} + +/* + * get a read lock on the semaphore + */ +void __sched __down_read(struct rw_semaphore *sem) +{ + struct rwsem_waiter waiter; + struct task_struct *tsk; + unsigned long flags; + + raw_spin_lock_irqsave(&sem->wait_lock, flags); + + if (sem->activity >= 0 && list_empty(&sem->wait_list)) { + /* granted */ + sem->activity++; + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + goto out; + } + + tsk = current; + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + + /* set up my own style of waitqueue */ + waiter.task = tsk; + waiter.type = RWSEM_WAITING_FOR_READ; + get_task_struct(tsk); + + list_add_tail(&waiter.list, &sem->wait_list); + + /* we don't need to touch the semaphore struct anymore */ + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + /* wait to be given the lock */ + for (;;) { + if (!waiter.task) + break; + schedule(); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + } + + tsk->state = TASK_RUNNING; + out: + ; +} + +/* + * trylock for reading -- returns 1 if successful, 0 if contention + */ +int __down_read_trylock(struct rw_semaphore *sem) +{ + unsigned long flags; + int ret = 0; + + + raw_spin_lock_irqsave(&sem->wait_lock, flags); + + if (sem->activity >= 0 && list_empty(&sem->wait_list)) { + /* granted */ + sem->activity++; + ret = 1; + } + + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + return ret; +} + +/* + * get a write lock on the semaphore + */ +void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) +{ + struct rwsem_waiter waiter; + struct task_struct *tsk; + unsigned long flags; + + raw_spin_lock_irqsave(&sem->wait_lock, flags); + + /* set up my own style of waitqueue */ + tsk = current; + waiter.task = tsk; + waiter.type = RWSEM_WAITING_FOR_WRITE; + list_add_tail(&waiter.list, &sem->wait_list); + + /* wait for someone to release the lock */ + for (;;) { + /* + * That is the key to support write lock stealing: allows the + * task already on CPU to get the lock soon rather than put + * itself into sleep and waiting for system woke it or someone + * else in the head of the wait list up. + */ + if (sem->activity == 0) + break; + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + schedule(); + raw_spin_lock_irqsave(&sem->wait_lock, flags); + } + /* got the lock */ + sem->activity = -1; + list_del(&waiter.list); + + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); +} + +void __sched __down_write(struct rw_semaphore *sem) +{ + __down_write_nested(sem, 0); +} + +/* + * trylock for writing -- returns 1 if successful, 0 if contention + */ +int __down_write_trylock(struct rw_semaphore *sem) +{ + unsigned long flags; + int ret = 0; + + raw_spin_lock_irqsave(&sem->wait_lock, flags); + + if (sem->activity == 0) { + /* got the lock */ + sem->activity = -1; + ret = 1; + } + + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + return ret; +} + +/* + * release a read lock on the semaphore + */ +void __up_read(struct rw_semaphore *sem) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&sem->wait_lock, flags); + + if (--sem->activity == 0 && !list_empty(&sem->wait_list)) + sem = __rwsem_wake_one_writer(sem); + + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); +} + +/* + * release a write lock on the semaphore + */ +void __up_write(struct rw_semaphore *sem) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&sem->wait_lock, flags); + + sem->activity = 0; + if (!list_empty(&sem->wait_list)) + sem = __rwsem_do_wake(sem, 1); + + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); +} + +/* + * downgrade a write lock into a read lock + * - just wake up any readers at the front of the queue + */ +void __downgrade_write(struct rw_semaphore *sem) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&sem->wait_lock, flags); + + sem->activity = 1; + if (!list_empty(&sem->wait_list)) + sem = __rwsem_do_wake(sem, 0); + + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); +} + diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c new file mode 100644 index 0000000..19c5fa9 --- /dev/null +++ b/kernel/locking/rwsem-xadd.c @@ -0,0 +1,293 @@ +/* rwsem.c: R/W semaphores: contention handling functions + * + * Written by David Howells (dhowells@redhat.com). + * Derived from arch/i386/kernel/semaphore.c + * + * Writer lock-stealing by Alex Shi + * and Michel Lespinasse + */ +#include +#include +#include +#include + +/* + * Initialize an rwsem: + */ +void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held semaphore: + */ + debug_check_no_locks_freed((void *)sem, sizeof(*sem)); + lockdep_init_map(&sem->dep_map, name, key, 0); +#endif + sem->count = RWSEM_UNLOCKED_VALUE; + raw_spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); +} + +EXPORT_SYMBOL(__init_rwsem); + +enum rwsem_waiter_type { + RWSEM_WAITING_FOR_WRITE, + RWSEM_WAITING_FOR_READ +}; + +struct rwsem_waiter { + struct list_head list; + struct task_struct *task; + enum rwsem_waiter_type type; +}; + +enum rwsem_wake_type { + RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ + RWSEM_WAKE_READERS, /* Wake readers only */ + RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */ +}; + +/* + * handle the lock release when processes blocked on it that can now run + * - if we come here from up_xxxx(), then: + * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) + * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) + * - there must be someone on the queue + * - the spinlock must be held by the caller + * - woken process blocks are discarded from the list after having task zeroed + * - writers are only woken if downgrading is false + */ +static struct rw_semaphore * +__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) +{ + struct rwsem_waiter *waiter; + struct task_struct *tsk; + struct list_head *next; + long oldcount, woken, loop, adjustment; + + waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); + if (waiter->type == RWSEM_WAITING_FOR_WRITE) { + if (wake_type == RWSEM_WAKE_ANY) + /* Wake writer at the front of the queue, but do not + * grant it the lock yet as we want other writers + * to be able to steal it. Readers, on the other hand, + * will block as they will notice the queued writer. + */ + wake_up_process(waiter->task); + goto out; + } + + /* Writers might steal the lock before we grant it to the next reader. + * We prefer to do the first reader grant before counting readers + * so we can bail out early if a writer stole the lock. + */ + adjustment = 0; + if (wake_type != RWSEM_WAKE_READ_OWNED) { + adjustment = RWSEM_ACTIVE_READ_BIAS; + try_reader_grant: + oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; + if (unlikely(oldcount < RWSEM_WAITING_BIAS)) { + /* A writer stole the lock. Undo our reader grant. */ + if (rwsem_atomic_update(-adjustment, sem) & + RWSEM_ACTIVE_MASK) + goto out; + /* Last active locker left. Retry waking readers. */ + goto try_reader_grant; + } + } + + /* Grant an infinite number of read locks to the readers at the front + * of the queue. Note we increment the 'active part' of the count by + * the number of readers before waking any processes up. + */ + woken = 0; + do { + woken++; + + if (waiter->list.next == &sem->wait_list) + break; + + waiter = list_entry(waiter->list.next, + struct rwsem_waiter, list); + + } while (waiter->type != RWSEM_WAITING_FOR_WRITE); + + adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; + if (waiter->type != RWSEM_WAITING_FOR_WRITE) + /* hit end of list above */ + adjustment -= RWSEM_WAITING_BIAS; + + if (adjustment) + rwsem_atomic_add(adjustment, sem); + + next = sem->wait_list.next; + loop = woken; + do { + waiter = list_entry(next, struct rwsem_waiter, list); + next = waiter->list.next; + tsk = waiter->task; + smp_mb(); + waiter->task = NULL; + wake_up_process(tsk); + put_task_struct(tsk); + } while (--loop); + + sem->wait_list.next = next; + next->prev = &sem->wait_list; + + out: + return sem; +} + +/* + * wait for the read lock to be granted + */ +struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) +{ + long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; + struct rwsem_waiter waiter; + struct task_struct *tsk = current; + + /* set up my own style of waitqueue */ + waiter.task = tsk; + waiter.type = RWSEM_WAITING_FOR_READ; + get_task_struct(tsk); + + raw_spin_lock_irq(&sem->wait_lock); + if (list_empty(&sem->wait_list)) + adjustment += RWSEM_WAITING_BIAS; + list_add_tail(&waiter.list, &sem->wait_list); + + /* we're now waiting on the lock, but no longer actively locking */ + count = rwsem_atomic_update(adjustment, sem); + + /* If there are no active locks, wake the front queued process(es). + * + * If there are no writers and we are first in the queue, + * wake our own waiter to join the existing active readers ! + */ + if (count == RWSEM_WAITING_BIAS || + (count > RWSEM_WAITING_BIAS && + adjustment != -RWSEM_ACTIVE_READ_BIAS)) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); + + raw_spin_unlock_irq(&sem->wait_lock); + + /* wait to be given the lock */ + while (true) { + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (!waiter.task) + break; + schedule(); + } + + tsk->state = TASK_RUNNING; + + return sem; +} + +/* + * wait until we successfully acquire the write lock + */ +struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) +{ + long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS; + struct rwsem_waiter waiter; + struct task_struct *tsk = current; + + /* set up my own style of waitqueue */ + waiter.task = tsk; + waiter.type = RWSEM_WAITING_FOR_WRITE; + + raw_spin_lock_irq(&sem->wait_lock); + if (list_empty(&sem->wait_list)) + adjustment += RWSEM_WAITING_BIAS; + list_add_tail(&waiter.list, &sem->wait_list); + + /* we're now waiting on the lock, but no longer actively locking */ + count = rwsem_atomic_update(adjustment, sem); + + /* If there were already threads queued before us and there are no + * active writers, the lock must be read owned; so we try to wake + * any read locks that were queued ahead of us. */ + if (count > RWSEM_WAITING_BIAS && + adjustment == -RWSEM_ACTIVE_WRITE_BIAS) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); + + /* wait until we successfully acquire the lock */ + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + while (true) { + if (!(count & RWSEM_ACTIVE_MASK)) { + /* Try acquiring the write lock. */ + count = RWSEM_ACTIVE_WRITE_BIAS; + if (!list_is_singular(&sem->wait_list)) + count += RWSEM_WAITING_BIAS; + + if (sem->count == RWSEM_WAITING_BIAS && + cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) == + RWSEM_WAITING_BIAS) + break; + } + + raw_spin_unlock_irq(&sem->wait_lock); + + /* Block until there are no active lockers. */ + do { + schedule(); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + } while ((count = sem->count) & RWSEM_ACTIVE_MASK); + + raw_spin_lock_irq(&sem->wait_lock); + } + + list_del(&waiter.list); + raw_spin_unlock_irq(&sem->wait_lock); + tsk->state = TASK_RUNNING; + + return sem; +} + +/* + * handle waking up a waiter on the semaphore + * - up_read/up_write has decremented the active part of count if we come here + */ +struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&sem->wait_lock, flags); + + /* do nothing if list empty */ + if (!list_empty(&sem->wait_list)) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); + + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + return sem; +} + +/* + * downgrade a write lock into a read lock + * - caller incremented waiting part of count and discovered it still negative + * - just wake up any readers at the front of the queue + */ +struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&sem->wait_lock, flags); + + /* do nothing if list empty */ + if (!list_empty(&sem->wait_list)) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); + + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + return sem; +} + +EXPORT_SYMBOL(rwsem_down_read_failed); +EXPORT_SYMBOL(rwsem_down_write_failed); +EXPORT_SYMBOL(rwsem_wake); +EXPORT_SYMBOL(rwsem_downgrade_wake); diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c new file mode 100644 index 0000000..cfff143 --- /dev/null +++ b/kernel/locking/rwsem.c @@ -0,0 +1,157 @@ +/* kernel/rwsem.c: R/W semaphores, public implementation + * + * Written by David Howells (dhowells@redhat.com). + * Derived from asm-i386/semaphore.h + */ + +#include +#include +#include +#include +#include + +#include + +/* + * lock for reading + */ +void __sched down_read(struct rw_semaphore *sem) +{ + might_sleep(); + rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); + + LOCK_CONTENDED(sem, __down_read_trylock, __down_read); +} + +EXPORT_SYMBOL(down_read); + +/* + * trylock for reading -- returns 1 if successful, 0 if contention + */ +int down_read_trylock(struct rw_semaphore *sem) +{ + int ret = __down_read_trylock(sem); + + if (ret == 1) + rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); + return ret; +} + +EXPORT_SYMBOL(down_read_trylock); + +/* + * lock for writing + */ +void __sched down_write(struct rw_semaphore *sem) +{ + might_sleep(); + rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); + + LOCK_CONTENDED(sem, __down_write_trylock, __down_write); +} + +EXPORT_SYMBOL(down_write); + +/* + * trylock for writing -- returns 1 if successful, 0 if contention + */ +int down_write_trylock(struct rw_semaphore *sem) +{ + int ret = __down_write_trylock(sem); + + if (ret == 1) + rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); + return ret; +} + +EXPORT_SYMBOL(down_write_trylock); + +/* + * release a read lock + */ +void up_read(struct rw_semaphore *sem) +{ + rwsem_release(&sem->dep_map, 1, _RET_IP_); + + __up_read(sem); +} + +EXPORT_SYMBOL(up_read); + +/* + * release a write lock + */ +void up_write(struct rw_semaphore *sem) +{ + rwsem_release(&sem->dep_map, 1, _RET_IP_); + + __up_write(sem); +} + +EXPORT_SYMBOL(up_write); + +/* + * downgrade write lock to read lock + */ +void downgrade_write(struct rw_semaphore *sem) +{ + /* + * lockdep: a downgraded write will live on as a write + * dependency. + */ + __downgrade_write(sem); +} + +EXPORT_SYMBOL(downgrade_write); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +void down_read_nested(struct rw_semaphore *sem, int subclass) +{ + might_sleep(); + rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); + + LOCK_CONTENDED(sem, __down_read_trylock, __down_read); +} + +EXPORT_SYMBOL(down_read_nested); + +void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) +{ + might_sleep(); + rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); + + LOCK_CONTENDED(sem, __down_write_trylock, __down_write); +} + +EXPORT_SYMBOL(_down_write_nest_lock); + +void down_read_non_owner(struct rw_semaphore *sem) +{ + might_sleep(); + + __down_read(sem); +} + +EXPORT_SYMBOL(down_read_non_owner); + +void down_write_nested(struct rw_semaphore *sem, int subclass) +{ + might_sleep(); + rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); + + LOCK_CONTENDED(sem, __down_write_trylock, __down_write); +} + +EXPORT_SYMBOL(down_write_nested); + +void up_read_non_owner(struct rw_semaphore *sem) +{ + __up_read(sem); +} + +EXPORT_SYMBOL(up_read_non_owner); + +#endif + + diff --git a/kernel/rwsem.c b/kernel/rwsem.c deleted file mode 100644 index cfff143..0000000 --- a/kernel/rwsem.c +++ /dev/null @@ -1,157 +0,0 @@ -/* kernel/rwsem.c: R/W semaphores, public implementation - * - * Written by David Howells (dhowells@redhat.com). - * Derived from asm-i386/semaphore.h - */ - -#include -#include -#include -#include -#include - -#include - -/* - * lock for reading - */ -void __sched down_read(struct rw_semaphore *sem) -{ - might_sleep(); - rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); - - LOCK_CONTENDED(sem, __down_read_trylock, __down_read); -} - -EXPORT_SYMBOL(down_read); - -/* - * trylock for reading -- returns 1 if successful, 0 if contention - */ -int down_read_trylock(struct rw_semaphore *sem) -{ - int ret = __down_read_trylock(sem); - - if (ret == 1) - rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); - return ret; -} - -EXPORT_SYMBOL(down_read_trylock); - -/* - * lock for writing - */ -void __sched down_write(struct rw_semaphore *sem) -{ - might_sleep(); - rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); - - LOCK_CONTENDED(sem, __down_write_trylock, __down_write); -} - -EXPORT_SYMBOL(down_write); - -/* - * trylock for writing -- returns 1 if successful, 0 if contention - */ -int down_write_trylock(struct rw_semaphore *sem) -{ - int ret = __down_write_trylock(sem); - - if (ret == 1) - rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); - return ret; -} - -EXPORT_SYMBOL(down_write_trylock); - -/* - * release a read lock - */ -void up_read(struct rw_semaphore *sem) -{ - rwsem_release(&sem->dep_map, 1, _RET_IP_); - - __up_read(sem); -} - -EXPORT_SYMBOL(up_read); - -/* - * release a write lock - */ -void up_write(struct rw_semaphore *sem) -{ - rwsem_release(&sem->dep_map, 1, _RET_IP_); - - __up_write(sem); -} - -EXPORT_SYMBOL(up_write); - -/* - * downgrade write lock to read lock - */ -void downgrade_write(struct rw_semaphore *sem) -{ - /* - * lockdep: a downgraded write will live on as a write - * dependency. - */ - __downgrade_write(sem); -} - -EXPORT_SYMBOL(downgrade_write); - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - -void down_read_nested(struct rw_semaphore *sem, int subclass) -{ - might_sleep(); - rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); - - LOCK_CONTENDED(sem, __down_read_trylock, __down_read); -} - -EXPORT_SYMBOL(down_read_nested); - -void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) -{ - might_sleep(); - rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); - - LOCK_CONTENDED(sem, __down_write_trylock, __down_write); -} - -EXPORT_SYMBOL(_down_write_nest_lock); - -void down_read_non_owner(struct rw_semaphore *sem) -{ - might_sleep(); - - __down_read(sem); -} - -EXPORT_SYMBOL(down_read_non_owner); - -void down_write_nested(struct rw_semaphore *sem, int subclass) -{ - might_sleep(); - rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); - - LOCK_CONTENDED(sem, __down_write_trylock, __down_write); -} - -EXPORT_SYMBOL(down_write_nested); - -void up_read_non_owner(struct rw_semaphore *sem) -{ - __up_read(sem); -} - -EXPORT_SYMBOL(up_read_non_owner); - -#endif - - diff --git a/lib/Makefile b/lib/Makefile index bee27e1..ca8cadc 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -42,8 +42,6 @@ obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o -lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o -lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c deleted file mode 100644 index 9be8a91..0000000 --- a/lib/rwsem-spinlock.c +++ /dev/null @@ -1,296 +0,0 @@ -/* rwsem-spinlock.c: R/W semaphores: contention handling functions for - * generic spinlock implementation - * - * Copyright (c) 2001 David Howells (dhowells@redhat.com). - * - Derived partially from idea by Andrea Arcangeli - * - Derived also from comments by Linus - */ -#include -#include -#include - -enum rwsem_waiter_type { - RWSEM_WAITING_FOR_WRITE, - RWSEM_WAITING_FOR_READ -}; - -struct rwsem_waiter { - struct list_head list; - struct task_struct *task; - enum rwsem_waiter_type type; -}; - -int rwsem_is_locked(struct rw_semaphore *sem) -{ - int ret = 1; - unsigned long flags; - - if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { - ret = (sem->activity != 0); - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - } - return ret; -} -EXPORT_SYMBOL(rwsem_is_locked); - -/* - * initialise the semaphore - */ -void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key) -{ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* - * Make sure we are not reinitializing a held semaphore: - */ - debug_check_no_locks_freed((void *)sem, sizeof(*sem)); - lockdep_init_map(&sem->dep_map, name, key, 0); -#endif - sem->activity = 0; - raw_spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -} -EXPORT_SYMBOL(__init_rwsem); - -/* - * handle the lock release when processes blocked on it that can now run - * - if we come here, then: - * - the 'active count' _reached_ zero - * - the 'waiting count' is non-zero - * - the spinlock must be held by the caller - * - woken process blocks are discarded from the list after having task zeroed - * - writers are only woken if wakewrite is non-zero - */ -static inline struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) -{ - struct rwsem_waiter *waiter; - struct task_struct *tsk; - int woken; - - waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - - if (waiter->type == RWSEM_WAITING_FOR_WRITE) { - if (wakewrite) - /* Wake up a writer. Note that we do not grant it the - * lock - it will have to acquire it when it runs. */ - wake_up_process(waiter->task); - goto out; - } - - /* grant an infinite number of read locks to the front of the queue */ - woken = 0; - do { - struct list_head *next = waiter->list.next; - - list_del(&waiter->list); - tsk = waiter->task; - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); - woken++; - if (next == &sem->wait_list) - break; - waiter = list_entry(next, struct rwsem_waiter, list); - } while (waiter->type != RWSEM_WAITING_FOR_WRITE); - - sem->activity += woken; - - out: - return sem; -} - -/* - * wake a single writer - */ -static inline struct rw_semaphore * -__rwsem_wake_one_writer(struct rw_semaphore *sem) -{ - struct rwsem_waiter *waiter; - - waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - wake_up_process(waiter->task); - - return sem; -} - -/* - * get a read lock on the semaphore - */ -void __sched __down_read(struct rw_semaphore *sem) -{ - struct rwsem_waiter waiter; - struct task_struct *tsk; - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - if (sem->activity >= 0 && list_empty(&sem->wait_list)) { - /* granted */ - sem->activity++; - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - goto out; - } - - tsk = current; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - - /* set up my own style of waitqueue */ - waiter.task = tsk; - waiter.type = RWSEM_WAITING_FOR_READ; - get_task_struct(tsk); - - list_add_tail(&waiter.list, &sem->wait_list); - - /* we don't need to touch the semaphore struct anymore */ - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - /* wait to be given the lock */ - for (;;) { - if (!waiter.task) - break; - schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - } - - tsk->state = TASK_RUNNING; - out: - ; -} - -/* - * trylock for reading -- returns 1 if successful, 0 if contention - */ -int __down_read_trylock(struct rw_semaphore *sem) -{ - unsigned long flags; - int ret = 0; - - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - if (sem->activity >= 0 && list_empty(&sem->wait_list)) { - /* granted */ - sem->activity++; - ret = 1; - } - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - return ret; -} - -/* - * get a write lock on the semaphore - */ -void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) -{ - struct rwsem_waiter waiter; - struct task_struct *tsk; - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - /* set up my own style of waitqueue */ - tsk = current; - waiter.task = tsk; - waiter.type = RWSEM_WAITING_FOR_WRITE; - list_add_tail(&waiter.list, &sem->wait_list); - - /* wait for someone to release the lock */ - for (;;) { - /* - * That is the key to support write lock stealing: allows the - * task already on CPU to get the lock soon rather than put - * itself into sleep and waiting for system woke it or someone - * else in the head of the wait list up. - */ - if (sem->activity == 0) - break; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - schedule(); - raw_spin_lock_irqsave(&sem->wait_lock, flags); - } - /* got the lock */ - sem->activity = -1; - list_del(&waiter.list); - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); -} - -void __sched __down_write(struct rw_semaphore *sem) -{ - __down_write_nested(sem, 0); -} - -/* - * trylock for writing -- returns 1 if successful, 0 if contention - */ -int __down_write_trylock(struct rw_semaphore *sem) -{ - unsigned long flags; - int ret = 0; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - if (sem->activity == 0) { - /* got the lock */ - sem->activity = -1; - ret = 1; - } - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - return ret; -} - -/* - * release a read lock on the semaphore - */ -void __up_read(struct rw_semaphore *sem) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - if (--sem->activity == 0 && !list_empty(&sem->wait_list)) - sem = __rwsem_wake_one_writer(sem); - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); -} - -/* - * release a write lock on the semaphore - */ -void __up_write(struct rw_semaphore *sem) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - sem->activity = 0; - if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, 1); - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); -} - -/* - * downgrade a write lock into a read lock - * - just wake up any readers at the front of the queue - */ -void __downgrade_write(struct rw_semaphore *sem) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - sem->activity = 1; - if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, 0); - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); -} - diff --git a/lib/rwsem.c b/lib/rwsem.c deleted file mode 100644 index 19c5fa9..0000000 --- a/lib/rwsem.c +++ /dev/null @@ -1,293 +0,0 @@ -/* rwsem.c: R/W semaphores: contention handling functions - * - * Written by David Howells (dhowells@redhat.com). - * Derived from arch/i386/kernel/semaphore.c - * - * Writer lock-stealing by Alex Shi - * and Michel Lespinasse - */ -#include -#include -#include -#include - -/* - * Initialize an rwsem: - */ -void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key) -{ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* - * Make sure we are not reinitializing a held semaphore: - */ - debug_check_no_locks_freed((void *)sem, sizeof(*sem)); - lockdep_init_map(&sem->dep_map, name, key, 0); -#endif - sem->count = RWSEM_UNLOCKED_VALUE; - raw_spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -} - -EXPORT_SYMBOL(__init_rwsem); - -enum rwsem_waiter_type { - RWSEM_WAITING_FOR_WRITE, - RWSEM_WAITING_FOR_READ -}; - -struct rwsem_waiter { - struct list_head list; - struct task_struct *task; - enum rwsem_waiter_type type; -}; - -enum rwsem_wake_type { - RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ - RWSEM_WAKE_READERS, /* Wake readers only */ - RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */ -}; - -/* - * handle the lock release when processes blocked on it that can now run - * - if we come here from up_xxxx(), then: - * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) - * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) - * - there must be someone on the queue - * - the spinlock must be held by the caller - * - woken process blocks are discarded from the list after having task zeroed - * - writers are only woken if downgrading is false - */ -static struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) -{ - struct rwsem_waiter *waiter; - struct task_struct *tsk; - struct list_head *next; - long oldcount, woken, loop, adjustment; - - waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (waiter->type == RWSEM_WAITING_FOR_WRITE) { - if (wake_type == RWSEM_WAKE_ANY) - /* Wake writer at the front of the queue, but do not - * grant it the lock yet as we want other writers - * to be able to steal it. Readers, on the other hand, - * will block as they will notice the queued writer. - */ - wake_up_process(waiter->task); - goto out; - } - - /* Writers might steal the lock before we grant it to the next reader. - * We prefer to do the first reader grant before counting readers - * so we can bail out early if a writer stole the lock. - */ - adjustment = 0; - if (wake_type != RWSEM_WAKE_READ_OWNED) { - adjustment = RWSEM_ACTIVE_READ_BIAS; - try_reader_grant: - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; - if (unlikely(oldcount < RWSEM_WAITING_BIAS)) { - /* A writer stole the lock. Undo our reader grant. */ - if (rwsem_atomic_update(-adjustment, sem) & - RWSEM_ACTIVE_MASK) - goto out; - /* Last active locker left. Retry waking readers. */ - goto try_reader_grant; - } - } - - /* Grant an infinite number of read locks to the readers at the front - * of the queue. Note we increment the 'active part' of the count by - * the number of readers before waking any processes up. - */ - woken = 0; - do { - woken++; - - if (waiter->list.next == &sem->wait_list) - break; - - waiter = list_entry(waiter->list.next, - struct rwsem_waiter, list); - - } while (waiter->type != RWSEM_WAITING_FOR_WRITE); - - adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; - if (waiter->type != RWSEM_WAITING_FOR_WRITE) - /* hit end of list above */ - adjustment -= RWSEM_WAITING_BIAS; - - if (adjustment) - rwsem_atomic_add(adjustment, sem); - - next = sem->wait_list.next; - loop = woken; - do { - waiter = list_entry(next, struct rwsem_waiter, list); - next = waiter->list.next; - tsk = waiter->task; - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); - } while (--loop); - - sem->wait_list.next = next; - next->prev = &sem->wait_list; - - out: - return sem; -} - -/* - * wait for the read lock to be granted - */ -struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) -{ - long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; - struct rwsem_waiter waiter; - struct task_struct *tsk = current; - - /* set up my own style of waitqueue */ - waiter.task = tsk; - waiter.type = RWSEM_WAITING_FOR_READ; - get_task_struct(tsk); - - raw_spin_lock_irq(&sem->wait_lock); - if (list_empty(&sem->wait_list)) - adjustment += RWSEM_WAITING_BIAS; - list_add_tail(&waiter.list, &sem->wait_list); - - /* we're now waiting on the lock, but no longer actively locking */ - count = rwsem_atomic_update(adjustment, sem); - - /* If there are no active locks, wake the front queued process(es). - * - * If there are no writers and we are first in the queue, - * wake our own waiter to join the existing active readers ! - */ - if (count == RWSEM_WAITING_BIAS || - (count > RWSEM_WAITING_BIAS && - adjustment != -RWSEM_ACTIVE_READ_BIAS)) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); - - raw_spin_unlock_irq(&sem->wait_lock); - - /* wait to be given the lock */ - while (true) { - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (!waiter.task) - break; - schedule(); - } - - tsk->state = TASK_RUNNING; - - return sem; -} - -/* - * wait until we successfully acquire the write lock - */ -struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) -{ - long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS; - struct rwsem_waiter waiter; - struct task_struct *tsk = current; - - /* set up my own style of waitqueue */ - waiter.task = tsk; - waiter.type = RWSEM_WAITING_FOR_WRITE; - - raw_spin_lock_irq(&sem->wait_lock); - if (list_empty(&sem->wait_list)) - adjustment += RWSEM_WAITING_BIAS; - list_add_tail(&waiter.list, &sem->wait_list); - - /* we're now waiting on the lock, but no longer actively locking */ - count = rwsem_atomic_update(adjustment, sem); - - /* If there were already threads queued before us and there are no - * active writers, the lock must be read owned; so we try to wake - * any read locks that were queued ahead of us. */ - if (count > RWSEM_WAITING_BIAS && - adjustment == -RWSEM_ACTIVE_WRITE_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); - - /* wait until we successfully acquire the lock */ - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - while (true) { - if (!(count & RWSEM_ACTIVE_MASK)) { - /* Try acquiring the write lock. */ - count = RWSEM_ACTIVE_WRITE_BIAS; - if (!list_is_singular(&sem->wait_list)) - count += RWSEM_WAITING_BIAS; - - if (sem->count == RWSEM_WAITING_BIAS && - cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) == - RWSEM_WAITING_BIAS) - break; - } - - raw_spin_unlock_irq(&sem->wait_lock); - - /* Block until there are no active lockers. */ - do { - schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - } while ((count = sem->count) & RWSEM_ACTIVE_MASK); - - raw_spin_lock_irq(&sem->wait_lock); - } - - list_del(&waiter.list); - raw_spin_unlock_irq(&sem->wait_lock); - tsk->state = TASK_RUNNING; - - return sem; -} - -/* - * handle waking up a waiter on the semaphore - * - up_read/up_write has decremented the active part of count if we come here - */ -struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - /* do nothing if list empty */ - if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - return sem; -} - -/* - * downgrade a write lock into a read lock - * - caller incremented waiting part of count and discovered it still negative - * - just wake up any readers at the front of the queue - */ -struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - /* do nothing if list empty */ - if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - return sem; -} - -EXPORT_SYMBOL(rwsem_down_read_failed); -EXPORT_SYMBOL(rwsem_down_write_failed); -EXPORT_SYMBOL(rwsem_wake); -EXPORT_SYMBOL(rwsem_downgrade_wake); -- cgit v1.1 From cd4d241d57c99c6b00ef1799ad797d90f75a1da9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 4 Nov 2013 11:51:33 +0100 Subject: locking: Move the lglocks code to kernel/locking/ Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-amd6pg1mif6tikbyktfvby3y@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/Makefile | 2 +- kernel/lglock.c | 89 ------------------------------------------------- kernel/locking/Makefile | 2 +- kernel/locking/lglock.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 91 deletions(-) delete mode 100644 kernel/lglock.c create mode 100644 kernel/locking/lglock.c diff --git a/kernel/Makefile b/kernel/Makefile index 1aef002..09a9c94 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o \ kthread.o sys_ni.o posix-cpu-timers.o \ hrtimer.o nsproxy.o \ notifier.o ksysfs.o cred.o reboot.o \ - async.o range.o groups.o lglock.o smpboot.o + async.o range.o groups.o smpboot.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files diff --git a/kernel/lglock.c b/kernel/lglock.c deleted file mode 100644 index 86ae2ae..0000000 --- a/kernel/lglock.c +++ /dev/null @@ -1,89 +0,0 @@ -/* See include/linux/lglock.h for description */ -#include -#include -#include -#include - -/* - * Note there is no uninit, so lglocks cannot be defined in - * modules (but it's fine to use them from there) - * Could be added though, just undo lg_lock_init - */ - -void lg_lock_init(struct lglock *lg, char *name) -{ - LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0); -} -EXPORT_SYMBOL(lg_lock_init); - -void lg_local_lock(struct lglock *lg) -{ - arch_spinlock_t *lock; - - preempt_disable(); - lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); - lock = this_cpu_ptr(lg->lock); - arch_spin_lock(lock); -} -EXPORT_SYMBOL(lg_local_lock); - -void lg_local_unlock(struct lglock *lg) -{ - arch_spinlock_t *lock; - - lock_release(&lg->lock_dep_map, 1, _RET_IP_); - lock = this_cpu_ptr(lg->lock); - arch_spin_unlock(lock); - preempt_enable(); -} -EXPORT_SYMBOL(lg_local_unlock); - -void lg_local_lock_cpu(struct lglock *lg, int cpu) -{ - arch_spinlock_t *lock; - - preempt_disable(); - lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); - lock = per_cpu_ptr(lg->lock, cpu); - arch_spin_lock(lock); -} -EXPORT_SYMBOL(lg_local_lock_cpu); - -void lg_local_unlock_cpu(struct lglock *lg, int cpu) -{ - arch_spinlock_t *lock; - - lock_release(&lg->lock_dep_map, 1, _RET_IP_); - lock = per_cpu_ptr(lg->lock, cpu); - arch_spin_unlock(lock); - preempt_enable(); -} -EXPORT_SYMBOL(lg_local_unlock_cpu); - -void lg_global_lock(struct lglock *lg) -{ - int i; - - preempt_disable(); - lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); - for_each_possible_cpu(i) { - arch_spinlock_t *lock; - lock = per_cpu_ptr(lg->lock, i); - arch_spin_lock(lock); - } -} -EXPORT_SYMBOL(lg_global_lock); - -void lg_global_unlock(struct lglock *lg) -{ - int i; - - lock_release(&lg->lock_dep_map, 1, _RET_IP_); - for_each_possible_cpu(i) { - arch_spinlock_t *lock; - lock = per_cpu_ptr(lg->lock, i); - arch_spin_unlock(lock); - } - preempt_enable(); -} -EXPORT_SYMBOL(lg_global_unlock); diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index b0e0d73..bdd313a 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -1,5 +1,5 @@ -obj-y += mutex.o semaphore.o rwsem.o +obj-y += mutex.o semaphore.o rwsem.o lglock.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_lockdep.o = -pg diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c new file mode 100644 index 0000000..86ae2ae --- /dev/null +++ b/kernel/locking/lglock.c @@ -0,0 +1,89 @@ +/* See include/linux/lglock.h for description */ +#include +#include +#include +#include + +/* + * Note there is no uninit, so lglocks cannot be defined in + * modules (but it's fine to use them from there) + * Could be added though, just undo lg_lock_init + */ + +void lg_lock_init(struct lglock *lg, char *name) +{ + LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0); +} +EXPORT_SYMBOL(lg_lock_init); + +void lg_local_lock(struct lglock *lg) +{ + arch_spinlock_t *lock; + + preempt_disable(); + lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock); + +void lg_local_unlock(struct lglock *lg) +{ + arch_spinlock_t *lock; + + lock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock); + +void lg_local_lock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + + preempt_disable(); + lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock_cpu); + +void lg_local_unlock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + + lock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock_cpu); + +void lg_global_lock(struct lglock *lg) +{ + int i; + + preempt_disable(); + lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_lock(lock); + } +} +EXPORT_SYMBOL(lg_global_lock); + +void lg_global_unlock(struct lglock *lg) +{ + int i; + + lock_release(&lg->lock_dep_map, 1, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_unlock(lock); + } + preempt_enable(); +} +EXPORT_SYMBOL(lg_global_unlock); -- cgit v1.1 From 32cf7c3c94623514eb882addae307212c1507239 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 4 Nov 2013 18:05:09 +0100 Subject: locking: Move the percpu-rwsem code to kernel/locking/ Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-52bjmtty46we26hbfd9sc9iy@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/locking/Makefile | 3 +- kernel/locking/percpu-rwsem.c | 165 ++++++++++++++++++++++++++++++++++++++++++ lib/Makefile | 1 - lib/percpu-rwsem.c | 165 ------------------------------------------ 4 files changed, 167 insertions(+), 167 deletions(-) create mode 100644 kernel/locking/percpu-rwsem.c delete mode 100644 lib/percpu-rwsem.c diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index bdd313a..baab8e5 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -21,4 +21,5 @@ obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o -obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o \ No newline at end of file +obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o +obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c new file mode 100644 index 0000000..652a8ee --- /dev/null +++ b/kernel/locking/percpu-rwsem.c @@ -0,0 +1,165 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int __percpu_init_rwsem(struct percpu_rw_semaphore *brw, + const char *name, struct lock_class_key *rwsem_key) +{ + brw->fast_read_ctr = alloc_percpu(int); + if (unlikely(!brw->fast_read_ctr)) + return -ENOMEM; + + /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ + __init_rwsem(&brw->rw_sem, name, rwsem_key); + atomic_set(&brw->write_ctr, 0); + atomic_set(&brw->slow_read_ctr, 0); + init_waitqueue_head(&brw->write_waitq); + return 0; +} + +void percpu_free_rwsem(struct percpu_rw_semaphore *brw) +{ + free_percpu(brw->fast_read_ctr); + brw->fast_read_ctr = NULL; /* catch use after free bugs */ +} + +/* + * This is the fast-path for down_read/up_read, it only needs to ensure + * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the + * fast per-cpu counter. The writer uses synchronize_sched_expedited() to + * serialize with the preempt-disabled section below. + * + * The nontrivial part is that we should guarantee acquire/release semantics + * in case when + * + * R_W: down_write() comes after up_read(), the writer should see all + * changes done by the reader + * or + * W_R: down_read() comes after up_write(), the reader should see all + * changes done by the writer + * + * If this helper fails the callers rely on the normal rw_semaphore and + * atomic_dec_and_test(), so in this case we have the necessary barriers. + * + * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or + * __this_cpu_add() below can be reordered with any LOAD/STORE done by the + * reader inside the critical section. See the comments in down_write and + * up_write below. + */ +static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) +{ + bool success = false; + + preempt_disable(); + if (likely(!atomic_read(&brw->write_ctr))) { + __this_cpu_add(*brw->fast_read_ctr, val); + success = true; + } + preempt_enable(); + + return success; +} + +/* + * Like the normal down_read() this is not recursive, the writer can + * come after the first percpu_down_read() and create the deadlock. + * + * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep, + * percpu_up_read() does rwsem_release(). This pairs with the usage + * of ->rw_sem in percpu_down/up_write(). + */ +void percpu_down_read(struct percpu_rw_semaphore *brw) +{ + might_sleep(); + if (likely(update_fast_ctr(brw, +1))) { + rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_); + return; + } + + down_read(&brw->rw_sem); + atomic_inc(&brw->slow_read_ctr); + /* avoid up_read()->rwsem_release() */ + __up_read(&brw->rw_sem); +} + +void percpu_up_read(struct percpu_rw_semaphore *brw) +{ + rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_); + + if (likely(update_fast_ctr(brw, -1))) + return; + + /* false-positive is possible but harmless */ + if (atomic_dec_and_test(&brw->slow_read_ctr)) + wake_up_all(&brw->write_waitq); +} + +static int clear_fast_ctr(struct percpu_rw_semaphore *brw) +{ + unsigned int sum = 0; + int cpu; + + for_each_possible_cpu(cpu) { + sum += per_cpu(*brw->fast_read_ctr, cpu); + per_cpu(*brw->fast_read_ctr, cpu) = 0; + } + + return sum; +} + +/* + * A writer increments ->write_ctr to force the readers to switch to the + * slow mode, note the atomic_read() check in update_fast_ctr(). + * + * After that the readers can only inc/dec the slow ->slow_read_ctr counter, + * ->fast_read_ctr is stable. Once the writer moves its sum into the slow + * counter it represents the number of active readers. + * + * Finally the writer takes ->rw_sem for writing and blocks the new readers, + * then waits until the slow counter becomes zero. + */ +void percpu_down_write(struct percpu_rw_semaphore *brw) +{ + /* tell update_fast_ctr() there is a pending writer */ + atomic_inc(&brw->write_ctr); + /* + * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read + * so that update_fast_ctr() can't succeed. + * + * 2. Ensures we see the result of every previous this_cpu_add() in + * update_fast_ctr(). + * + * 3. Ensures that if any reader has exited its critical section via + * fast-path, it executes a full memory barrier before we return. + * See R_W case in the comment above update_fast_ctr(). + */ + synchronize_sched_expedited(); + + /* exclude other writers, and block the new readers completely */ + down_write(&brw->rw_sem); + + /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ + atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); + + /* wait for all readers to complete their percpu_up_read() */ + wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); +} + +void percpu_up_write(struct percpu_rw_semaphore *brw) +{ + /* release the lock, but the readers can't use the fast-path */ + up_write(&brw->rw_sem); + /* + * Insert the barrier before the next fast-path in down_read, + * see W_R case in the comment above update_fast_ctr(). + */ + synchronize_sched_expedited(); + /* the last writer unblocks update_fast_ctr() */ + atomic_dec(&brw->write_ctr); +} diff --git a/lib/Makefile b/lib/Makefile index ca8cadc..8d3c6293 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -42,7 +42,6 @@ obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o -lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c deleted file mode 100644 index 652a8ee..0000000 --- a/lib/percpu-rwsem.c +++ /dev/null @@ -1,165 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int __percpu_init_rwsem(struct percpu_rw_semaphore *brw, - const char *name, struct lock_class_key *rwsem_key) -{ - brw->fast_read_ctr = alloc_percpu(int); - if (unlikely(!brw->fast_read_ctr)) - return -ENOMEM; - - /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ - __init_rwsem(&brw->rw_sem, name, rwsem_key); - atomic_set(&brw->write_ctr, 0); - atomic_set(&brw->slow_read_ctr, 0); - init_waitqueue_head(&brw->write_waitq); - return 0; -} - -void percpu_free_rwsem(struct percpu_rw_semaphore *brw) -{ - free_percpu(brw->fast_read_ctr); - brw->fast_read_ctr = NULL; /* catch use after free bugs */ -} - -/* - * This is the fast-path for down_read/up_read, it only needs to ensure - * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the - * fast per-cpu counter. The writer uses synchronize_sched_expedited() to - * serialize with the preempt-disabled section below. - * - * The nontrivial part is that we should guarantee acquire/release semantics - * in case when - * - * R_W: down_write() comes after up_read(), the writer should see all - * changes done by the reader - * or - * W_R: down_read() comes after up_write(), the reader should see all - * changes done by the writer - * - * If this helper fails the callers rely on the normal rw_semaphore and - * atomic_dec_and_test(), so in this case we have the necessary barriers. - * - * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or - * __this_cpu_add() below can be reordered with any LOAD/STORE done by the - * reader inside the critical section. See the comments in down_write and - * up_write below. - */ -static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) -{ - bool success = false; - - preempt_disable(); - if (likely(!atomic_read(&brw->write_ctr))) { - __this_cpu_add(*brw->fast_read_ctr, val); - success = true; - } - preempt_enable(); - - return success; -} - -/* - * Like the normal down_read() this is not recursive, the writer can - * come after the first percpu_down_read() and create the deadlock. - * - * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep, - * percpu_up_read() does rwsem_release(). This pairs with the usage - * of ->rw_sem in percpu_down/up_write(). - */ -void percpu_down_read(struct percpu_rw_semaphore *brw) -{ - might_sleep(); - if (likely(update_fast_ctr(brw, +1))) { - rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_); - return; - } - - down_read(&brw->rw_sem); - atomic_inc(&brw->slow_read_ctr); - /* avoid up_read()->rwsem_release() */ - __up_read(&brw->rw_sem); -} - -void percpu_up_read(struct percpu_rw_semaphore *brw) -{ - rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_); - - if (likely(update_fast_ctr(brw, -1))) - return; - - /* false-positive is possible but harmless */ - if (atomic_dec_and_test(&brw->slow_read_ctr)) - wake_up_all(&brw->write_waitq); -} - -static int clear_fast_ctr(struct percpu_rw_semaphore *brw) -{ - unsigned int sum = 0; - int cpu; - - for_each_possible_cpu(cpu) { - sum += per_cpu(*brw->fast_read_ctr, cpu); - per_cpu(*brw->fast_read_ctr, cpu) = 0; - } - - return sum; -} - -/* - * A writer increments ->write_ctr to force the readers to switch to the - * slow mode, note the atomic_read() check in update_fast_ctr(). - * - * After that the readers can only inc/dec the slow ->slow_read_ctr counter, - * ->fast_read_ctr is stable. Once the writer moves its sum into the slow - * counter it represents the number of active readers. - * - * Finally the writer takes ->rw_sem for writing and blocks the new readers, - * then waits until the slow counter becomes zero. - */ -void percpu_down_write(struct percpu_rw_semaphore *brw) -{ - /* tell update_fast_ctr() there is a pending writer */ - atomic_inc(&brw->write_ctr); - /* - * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read - * so that update_fast_ctr() can't succeed. - * - * 2. Ensures we see the result of every previous this_cpu_add() in - * update_fast_ctr(). - * - * 3. Ensures that if any reader has exited its critical section via - * fast-path, it executes a full memory barrier before we return. - * See R_W case in the comment above update_fast_ctr(). - */ - synchronize_sched_expedited(); - - /* exclude other writers, and block the new readers completely */ - down_write(&brw->rw_sem); - - /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ - atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); - - /* wait for all readers to complete their percpu_up_read() */ - wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); -} - -void percpu_up_write(struct percpu_rw_semaphore *brw) -{ - /* release the lock, but the readers can't use the fast-path */ - up_write(&brw->rw_sem); - /* - * Insert the barrier before the next fast-path in down_read, - * see W_R case in the comment above update_fast_ctr(). - */ - synchronize_sched_expedited(); - /* the last writer unblocks update_fast_ctr() */ - atomic_dec(&brw->write_ctr); -} -- cgit v1.1 From 827da44c61419f29ae3be198c342e2147f1a10cb Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:51:58 -0700 Subject: net: Explicitly initialize u64_stats_sync structures for lockdep In order to enable lockdep on seqcount/seqlock structures, we must explicitly initialize any locks. The u64_stats_sync structure, uses a seqcount, and thus we need to introduce a u64_stats_init() function and use it to initialize the structure. This unfortunately adds a lot of fairly trivial initialization code to a number of drivers. But the benefit of ensuring correctness makes this worth while. Because these changes are required for lockdep to be enabled, and the changes are quite trivial, I've not yet split this patch out into 30-some separate patches, as I figured it would be better to get the various maintainers thoughts on how to best merge this change along with the seqcount lockdep enablement. Feedback would be appreciated! Signed-off-by: John Stultz Acked-by: Julian Anastasov Signed-off-by: Peter Zijlstra Cc: Alexey Kuznetsov Cc: "David S. Miller" Cc: Eric Dumazet Cc: Hideaki YOSHIFUJI Cc: James Morris Cc: Jesse Gross Cc: Mathieu Desnoyers Cc: "Michael S. Tsirkin" Cc: Mirko Lindner Cc: Patrick McHardy Cc: Roger Luethi Cc: Rusty Russell Cc: Simon Horman Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Thomas Petazzoni Cc: Wensong Zhang Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-2-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- drivers/net/dummy.c | 6 ++++++ drivers/net/ethernet/emulex/benet/be_main.c | 4 ++++ drivers/net/ethernet/intel/igb/igb_main.c | 5 +++++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++++ drivers/net/ethernet/marvell/mvneta.c | 3 +++ drivers/net/ethernet/marvell/sky2.c | 3 +++ drivers/net/ethernet/neterion/vxge/vxge-main.c | 4 ++++ drivers/net/ethernet/nvidia/forcedeth.c | 2 ++ drivers/net/ethernet/realtek/8139too.c | 3 +++ drivers/net/ethernet/tile/tilepro.c | 2 ++ drivers/net/ethernet/via/via-rhine.c | 3 +++ drivers/net/ifb.c | 5 +++++ drivers/net/loopback.c | 6 ++++++ drivers/net/macvlan.c | 7 +++++++ drivers/net/nlmon.c | 8 ++++++++ drivers/net/team/team.c | 6 ++++++ drivers/net/team/team_mode_loadbalance.c | 9 ++++++++- drivers/net/veth.c | 8 ++++++++ drivers/net/virtio_net.c | 8 ++++++++ drivers/net/vxlan.c | 8 ++++++++ drivers/net/xen-netfront.c | 6 ++++++ include/linux/u64_stats_sync.h | 7 +++++++ net/8021q/vlan_dev.c | 9 ++++++++- net/bridge/br_device.c | 7 +++++++ net/ipv4/af_inet.c | 14 ++++++++++++++ net/ipv4/ip_tunnel.c | 8 +++++++- net/ipv6/addrconf.c | 14 ++++++++++++++ net/ipv6/af_inet6.c | 14 ++++++++++++++ net/ipv6/ip6_gre.c | 15 +++++++++++++++ net/ipv6/ip6_tunnel.c | 7 +++++++ net/ipv6/sit.c | 15 +++++++++++++++ net/netfilter/ipvs/ip_vs_ctl.c | 25 ++++++++++++++++++++++--- net/openvswitch/datapath.c | 6 ++++++ net/openvswitch/vport.c | 8 ++++++++ 34 files changed, 253 insertions(+), 6 deletions(-) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index b710c6b..bd8f84b 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -88,10 +88,16 @@ static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev) static int dummy_dev_init(struct net_device *dev) { + int i; dev->dstats = alloc_percpu(struct pcpu_dstats); if (!dev->dstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_dstats *dstats; + dstats = per_cpu_ptr(dev->dstats, i); + u64_stats_init(&dstats->syncp); + } return 0; } diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 2c38cc4..edd7595 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2047,6 +2047,9 @@ static int be_tx_qs_create(struct be_adapter *adapter) if (status) return status; + u64_stats_init(&txo->stats.sync); + u64_stats_init(&txo->stats.sync_compl); + /* If num_evt_qs is less than num_tx_qs, then more than * one txq share an eq */ @@ -2108,6 +2111,7 @@ static int be_rx_cqs_create(struct be_adapter *adapter) if (rc) return rc; + u64_stats_init(&rxo->stats.sync); eq = &adapter->eq_obj[i % adapter->num_evt_qs].q; rc = be_cmd_cq_create(adapter, cq, eq, false, 3); if (rc) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 8cf44f2..b6edb93 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1223,6 +1223,9 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, ring->count = adapter->tx_ring_count; ring->queue_index = txr_idx; + u64_stats_init(&ring->tx_syncp); + u64_stats_init(&ring->tx_syncp2); + /* assign ring to adapter */ adapter->tx_ring[txr_idx] = ring; @@ -1256,6 +1259,8 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, ring->count = adapter->rx_ring_count; ring->queue_index = rxr_idx; + u64_stats_init(&ring->rx_syncp); + /* assign ring to adapter */ adapter->rx_ring[rxr_idx] = ring; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0ade0cd..c1750364 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4867,6 +4867,8 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring) if (!tx_ring->tx_buffer_info) goto err; + u64_stats_init(&tx_ring->syncp); + /* round up to nearest 4K */ tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); @@ -4949,6 +4951,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) if (!rx_ring->rx_buffer_info) goto err; + u64_stats_init(&rx_ring->syncp); + /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index e35bac7..cb4635c 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2792,6 +2792,9 @@ static int mvneta_probe(struct platform_device *pdev) pp = netdev_priv(dev); + u64_stats_init(&pp->tx_stats.syncp); + u64_stats_init(&pp->rx_stats.syncp); + pp->weight = MVNETA_RX_POLL_WEIGHT; pp->phy_node = phy_node; pp->phy_interface = phy_mode; diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index e09a8c6..339d841 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4763,6 +4763,9 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port, sky2->hw = hw; sky2->msg_enable = netif_msg_init(debug, default_msg); + u64_stats_init(&sky2->tx_stats.syncp); + u64_stats_init(&sky2->rx_stats.syncp); + /* Auto speed and flow control */ sky2->flags = SKY2_FLAG_AUTO_SPEED | SKY2_FLAG_AUTO_PAUSE; if (hw->chip_id != CHIP_ID_YUKON_XL) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 5a20eaf..44626ec 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -2072,6 +2072,10 @@ static int vxge_open_vpaths(struct vxgedev *vdev) vdev->config.tx_steering_type; vpath->fifo.ndev = vdev->ndev; vpath->fifo.pdev = vdev->pdev; + + u64_stats_init(&vpath->fifo.stats.syncp); + u64_stats_init(&vpath->ring.stats.syncp); + if (vdev->config.tx_steering_type) vpath->fifo.txq = netdev_get_tx_queue(vdev->ndev, i); diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 098b96d..2d045be 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -5619,6 +5619,8 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) spin_lock_init(&np->lock); spin_lock_init(&np->hwstats_lock); SET_NETDEV_DEV(dev, &pci_dev->dev); + u64_stats_init(&np->swstats_rx_syncp); + u64_stats_init(&np->swstats_tx_syncp); init_timer(&np->oom_kick); np->oom_kick.data = (unsigned long) dev; diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 3ccedeb..c40e9848 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -791,6 +791,9 @@ static struct net_device *rtl8139_init_board(struct pci_dev *pdev) pci_set_master (pdev); + u64_stats_init(&tp->rx_stats.syncp); + u64_stats_init(&tp->tx_stats.syncp); + retry: /* PIO bar register comes first. */ bar = !use_io; diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c index 106be47..edb2e12 100644 --- a/drivers/net/ethernet/tile/tilepro.c +++ b/drivers/net/ethernet/tile/tilepro.c @@ -1008,6 +1008,8 @@ static void tile_net_register(void *dev_ptr) info->egress_timer.data = (long)info; info->egress_timer.function = tile_net_handle_egress_timer; + u64_stats_init(&info->stats.syncp); + priv->cpu[my_cpu] = info; /* diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index bdf697b..dd99715 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -987,6 +987,9 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rp->base = ioaddr; + u64_stats_init(&rp->tx_stats.syncp); + u64_stats_init(&rp->rx_stats.syncp); + /* Get chip registers into a sane state */ rhine_power_init(dev); rhine_hw_init(dev, pioaddr); diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index a3bed28..c14d39b 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -265,6 +265,7 @@ MODULE_PARM_DESC(numifbs, "Number of ifb devices"); static int __init ifb_init_one(int index) { struct net_device *dev_ifb; + struct ifb_private *dp; int err; dev_ifb = alloc_netdev(sizeof(struct ifb_private), @@ -273,6 +274,10 @@ static int __init ifb_init_one(int index) if (!dev_ifb) return -ENOMEM; + dp = netdev_priv(dev_ifb); + u64_stats_init(&dp->rsync); + u64_stats_init(&dp->tsync); + dev_ifb->rtnl_link_ops = &ifb_link_ops; err = register_netdevice(dev_ifb); if (err < 0) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index a17d85a..ac24c27 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -137,10 +137,16 @@ static const struct ethtool_ops loopback_ethtool_ops = { static int loopback_dev_init(struct net_device *dev) { + int i; dev->lstats = alloc_percpu(struct pcpu_lstats); if (!dev->lstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_lstats *lb_stats; + lb_stats = per_cpu_ptr(dev->lstats, i); + u64_stats_init(&lb_stats->syncp); + } return 0; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 9bf46bd..0924e51b 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -501,6 +501,7 @@ static int macvlan_init(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); const struct net_device *lowerdev = vlan->lowerdev; + int i; dev->state = (dev->state & ~MACVLAN_STATE_MASK) | (lowerdev->state & MACVLAN_STATE_MASK); @@ -516,6 +517,12 @@ static int macvlan_init(struct net_device *dev) if (!vlan->pcpu_stats) return -ENOMEM; + for_each_possible_cpu(i) { + struct macvlan_pcpu_stats *mvlstats; + mvlstats = per_cpu_ptr(vlan->pcpu_stats, i); + u64_stats_init(&mvlstats->syncp); + } + return 0; } diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c index b57ce5f..d2bb12b 100644 --- a/drivers/net/nlmon.c +++ b/drivers/net/nlmon.c @@ -47,8 +47,16 @@ static int nlmon_change_mtu(struct net_device *dev, int new_mtu) static int nlmon_dev_init(struct net_device *dev) { + int i; + dev->lstats = alloc_percpu(struct pcpu_lstats); + for_each_possible_cpu(i) { + struct pcpu_lstats *nlmstats; + nlmstats = per_cpu_ptr(dev->lstats, i); + u64_stats_init(&nlmstats->syncp); + } + return dev->lstats == NULL ? -ENOMEM : 0; } diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 50e43e6..6574eb8 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1540,6 +1540,12 @@ static int team_init(struct net_device *dev) if (!team->pcpu_stats) return -ENOMEM; + for_each_possible_cpu(i) { + struct team_pcpu_stats *team_stats; + team_stats = per_cpu_ptr(team->pcpu_stats, i); + u64_stats_init(&team_stats->syncp); + } + for (i = 0; i < TEAM_PORT_HASHENTRIES; i++) INIT_HLIST_HEAD(&team->en_port_hlist[i]); INIT_LIST_HEAD(&team->port_list); diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index 829a9cd..d671fc3 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -570,7 +570,7 @@ static int lb_init(struct team *team) { struct lb_priv *lb_priv = get_lb_priv(team); lb_select_tx_port_func_t *func; - int err; + int i, err; /* set default tx port selector */ func = lb_select_tx_port_get_func("hash"); @@ -588,6 +588,13 @@ static int lb_init(struct team *team) goto err_alloc_pcpu_stats; } + for_each_possible_cpu(i) { + struct lb_pcpu_stats *team_lb_stats; + team_lb_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); + u64_stats_init(&team_lb_stats->syncp); + } + + INIT_DELAYED_WORK(&lb_priv->ex->stats.refresh_dw, lb_stats_refresh); err = team_options_register(team, lb_options, ARRAY_SIZE(lb_options)); diff --git a/drivers/net/veth.c b/drivers/net/veth.c index eee1f19..46e83e3 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -230,10 +230,18 @@ static int veth_change_mtu(struct net_device *dev, int new_mtu) static int veth_dev_init(struct net_device *dev) { + int i; + dev->vstats = alloc_percpu(struct pcpu_vstats); if (!dev->vstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_vstats *veth_stats; + veth_stats = per_cpu_ptr(dev->vstats, i); + u64_stats_init(&veth_stats->syncp); + } + return 0; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9fbdfcd..ee384f3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1569,6 +1569,14 @@ static int virtnet_probe(struct virtio_device *vdev) if (vi->stats == NULL) goto free; + for_each_possible_cpu(i) { + struct virtnet_stats *virtnet_stats; + virtnet_stats = per_cpu_ptr(vi->stats, i); + u64_stats_init(&virtnet_stats->tx_syncp); + u64_stats_init(&virtnet_stats->rx_syncp); + } + + vi->vq_index = alloc_percpu(int); if (vi->vq_index == NULL) goto free_stats; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 2ef5b62..01ab64d 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1884,11 +1884,19 @@ static int vxlan_init(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); struct vxlan_sock *vs; + int i; dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *vxlan_stats; + vxlan_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&vxlan_stats->syncp); + } + + spin_lock(&vn->sock_lock); vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port); if (vs) { diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 36808bf..54223ac 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1338,6 +1338,12 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) if (np->stats == NULL) goto exit; + for_each_possible_cpu(i) { + struct netfront_stats *xen_nf_stats; + xen_nf_stats = per_cpu_ptr(np->stats, i); + u64_stats_init(&xen_nf_stats->syncp); + } + /* Initialise tx_skbs as a free chain containing every entry. */ np->tx_skb_freelist = 0; for (i = 0; i < NET_TX_RING_SIZE; i++) { diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 8da8c4e..7bfabd2 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -67,6 +67,13 @@ struct u64_stats_sync { #endif }; + +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) +# define u64_stats_init(syncp) seqcount_init(syncp.seq) +#else +# define u64_stats_init(syncp) do { } while (0) +#endif + static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 09bf1c3..4deff3e 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -558,7 +558,7 @@ static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - int subclass = 0; + int subclass = 0, i; netif_carrier_off(dev); @@ -612,6 +612,13 @@ static int vlan_dev_init(struct net_device *dev) if (!vlan_dev_priv(dev)->vlan_pcpu_stats) return -ENOMEM; + for_each_possible_cpu(i) { + struct vlan_pcpu_stats *vlan_stat; + vlan_stat = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); + u64_stats_init(&vlan_stat->syncp); + } + + return 0; } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ca04163..7893d64 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -88,11 +88,18 @@ out: static int br_dev_init(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); + int i; br->stats = alloc_percpu(struct br_cpu_netstats); if (!br->stats) return -ENOMEM; + for_each_possible_cpu(i) { + struct br_cpu_netstats *br_dev_stats; + br_dev_stats = per_cpu_ptr(br->stats, i); + u64_stats_init(&br_dev_stats->syncp); + } + return 0; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index cfeb85c..5f4617e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1518,6 +1518,7 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) ptr[0] = __alloc_percpu(mibsize, align); if (!ptr[0]) return -ENOMEM; + #if SNMP_ARRAY_SZ == 2 ptr[1] = __alloc_percpu(mibsize, align); if (!ptr[1]) { @@ -1561,6 +1562,8 @@ static const struct net_protocol icmp_protocol = { static __net_init int ipv4_mib_init_net(struct net *net) { + int i; + if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, sizeof(struct tcp_mib), __alignof__(struct tcp_mib)) < 0) @@ -1569,6 +1572,17 @@ static __net_init int ipv4_mib_init_net(struct net *net) sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip_mib; + + for_each_possible_cpu(i) { + struct ipstats_mib *af_inet_stats; + af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i); + u64_stats_init(&af_inet_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i); + u64_stats_init(&af_inet_stats->syncp); +#endif + } + if (snmp_mib_init((void __percpu **)net->mib.net_statistics, sizeof(struct linux_mib), __alignof__(struct linux_mib)) < 0) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 63a6d6d..caf01176 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -976,13 +976,19 @@ int ip_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; - int err; + int i, err; dev->destructor = ip_tunnel_dev_free; dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ipt_stats; + ipt_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ipt_stats->syncp); + } + err = gro_cells_init(&tunnel->gro_cells, dev); if (err) { free_percpu(dev->tstats); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cd3fb30..d62d589 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -281,10 +281,24 @@ static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp, static int snmp6_alloc_dev(struct inet6_dev *idev) { + int i; + if (snmp_mib_init((void __percpu **)idev->stats.ipv6, sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip; + + for_each_possible_cpu(i) { + struct ipstats_mib *addrconf_stats; + addrconf_stats = per_cpu_ptr(idev->stats.ipv6[0], i); + u64_stats_init(&addrconf_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + addrconf_stats = per_cpu_ptr(idev->stats.ipv6[1], i); + u64_stats_init(&addrconf_stats->syncp); +#endif + } + + idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device), GFP_KERNEL); if (!idev->stats.icmpv6dev) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7c96100..a8f8559 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -719,6 +719,8 @@ static void ipv6_packet_cleanup(void) static int __net_init ipv6_init_mibs(struct net *net) { + int i; + if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, sizeof(struct udp_mib), __alignof__(struct udp_mib)) < 0) @@ -731,6 +733,18 @@ static int __net_init ipv6_init_mibs(struct net *net) sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip_mib; + + for_each_possible_cpu(i) { + struct ipstats_mib *af_inet6_stats; + af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[0], i); + u64_stats_init(&af_inet6_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[1], i); + u64_stats_init(&af_inet6_stats->syncp); +#endif + } + + if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, sizeof(struct icmpv6_mib), __alignof__(struct icmpv6_mib)) < 0) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index bf4a9a0..8acb286 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1252,6 +1252,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev) static int ip6gre_tunnel_init(struct net_device *dev) { struct ip6_tnl *tunnel; + int i; tunnel = netdev_priv(dev); @@ -1269,6 +1270,13 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6gre_tunnel_stats; + ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6gre_tunnel_stats->syncp); + } + + return 0; } @@ -1449,6 +1457,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { struct ip6_tnl *tunnel; + int i; tunnel = netdev_priv(dev); @@ -1462,6 +1471,12 @@ static int ip6gre_tap_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6gre_tap_stats; + ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6gre_tap_stats->syncp); + } + return 0; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 583b77e..df1fa58 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1494,12 +1494,19 @@ static inline int ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); + int i; t->dev = dev; t->net = dev_net(dev); dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6_tnl_stats; + ip6_tnl_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6_tnl_stats->syncp); + } return 0; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1926945..365dc54 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1310,6 +1310,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) static int ipip6_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); + int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1322,6 +1323,12 @@ static int ipip6_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ipip6_tunnel_stats; + ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ipip6_tunnel_stats->syncp); + } + return 0; } @@ -1331,6 +1338,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); + int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1344,6 +1352,13 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + + for_each_possible_cpu(i) { + struct pcpu_tstats *ipip6_fb_stats; + ipip6_fb_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ipip6_fb_stats->syncp); + } + dev_hold(dev); rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); return 0; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a3df9bd..3825725 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -842,7 +842,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, struct ip_vs_dest **dest_p) { struct ip_vs_dest *dest; - unsigned int atype; + unsigned int atype, i; EnterFunction(2); @@ -869,6 +869,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, if (!dest->stats.cpustats) goto err_alloc; + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *ip_vs_dest_stats; + ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i); + u64_stats_init(&ip_vs_dest_stats->syncp); + } + dest->af = svc->af; dest->protocol = svc->protocol; dest->vaddr = svc->addr; @@ -1134,7 +1140,7 @@ static int ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p) { - int ret = 0; + int ret = 0, i; struct ip_vs_scheduler *sched = NULL; struct ip_vs_pe *pe = NULL; struct ip_vs_service *svc = NULL; @@ -1184,6 +1190,13 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, goto out_err; } + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *ip_vs_stats; + ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i); + u64_stats_init(&ip_vs_stats->syncp); + } + + /* I'm the first user of the service */ atomic_set(&svc->refcnt, 0); @@ -3780,7 +3793,7 @@ static struct notifier_block ip_vs_dst_notifier = { int __net_init ip_vs_control_net_init(struct net *net) { - int idx; + int i, idx; struct netns_ipvs *ipvs = net_ipvs(net); /* Initialize rs_table */ @@ -3799,6 +3812,12 @@ int __net_init ip_vs_control_net_init(struct net *net) if (!ipvs->tot_stats.cpustats) return -ENOMEM; + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *ipvs_tot_stats; + ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i); + u64_stats_init(&ipvs_tot_stats->syncp); + } + spin_lock_init(&ipvs->tot_stats.lock); proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2aa13bd..b92553c 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1698,6 +1698,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_table; } + for_each_possible_cpu(i) { + struct dp_stats_percpu *dpath_stats; + dpath_stats = per_cpu_ptr(dp->stats_percpu, i); + u64_stats_init(&dpath_stats->sync); + } + dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), GFP_KERNEL); if (!dp->ports) { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 6f65dbe..d830a95f 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -118,6 +118,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, { struct vport *vport; size_t alloc_size; + int i; alloc_size = sizeof(struct vport); if (priv_size) { @@ -141,6 +142,13 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, return ERR_PTR(-ENOMEM); } + for_each_possible_cpu(i) { + struct pcpu_tstats *vport_stats; + vport_stats = per_cpu_ptr(vport->percpu_stats, i); + u64_stats_init(&vport_stats->syncp); + } + + spin_lock_init(&vport->stats_lock); return vport; -- cgit v1.1 From 1ca7d67cf5d5a2aef26a8d9afd789006fa098347 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:51:59 -0700 Subject: seqcount: Add lockdep functionality to seqcount/seqlock structures Currently seqlocks and seqcounts don't support lockdep. After running across a seqcount related deadlock in the timekeeping code, I used a less-refined and more focused variant of this patch to narrow down the cause of the issue. This is a first-pass attempt to properly enable lockdep functionality on seqlocks and seqcounts. Since seqcounts are used in the vdso gettimeofday code, I've provided non-lockdep accessors for those needs. I've also handled one case where there were nested seqlock writers and there may be more edge cases. Comments and feedback would be appreciated! Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra Cc: Eric Dumazet Cc: Li Zefan Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: "David S. Miller" Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-3-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/vdso/vclock_gettime.c | 8 ++--- fs/dcache.c | 4 +-- fs/fs_struct.c | 2 +- include/linux/init_task.h | 8 ++--- include/linux/lockdep.h | 8 +++-- include/linux/seqlock.h | 79 ++++++++++++++++++++++++++++++++++++++---- mm/filemap_xip.c | 2 +- 7 files changed, 90 insertions(+), 21 deletions(-) diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 72074d5..2ada505 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -178,7 +178,7 @@ notrace static int __always_inline do_realtime(struct timespec *ts) ts->tv_nsec = 0; do { - seq = read_seqcount_begin(>od->seq); + seq = read_seqcount_begin_no_lockdep(>od->seq); mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->wall_time_sec; ns = gtod->wall_time_snsec; @@ -198,7 +198,7 @@ notrace static int do_monotonic(struct timespec *ts) ts->tv_nsec = 0; do { - seq = read_seqcount_begin(>od->seq); + seq = read_seqcount_begin_no_lockdep(>od->seq); mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->monotonic_time_sec; ns = gtod->monotonic_time_snsec; @@ -214,7 +214,7 @@ notrace static int do_realtime_coarse(struct timespec *ts) { unsigned long seq; do { - seq = read_seqcount_begin(>od->seq); + seq = read_seqcount_begin_no_lockdep(>od->seq); ts->tv_sec = gtod->wall_time_coarse.tv_sec; ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; } while (unlikely(read_seqcount_retry(>od->seq, seq))); @@ -225,7 +225,7 @@ notrace static int do_monotonic_coarse(struct timespec *ts) { unsigned long seq; do { - seq = read_seqcount_begin(>od->seq); + seq = read_seqcount_begin_no_lockdep(>od->seq); ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; } while (unlikely(read_seqcount_retry(>od->seq, seq))); diff --git a/fs/dcache.c b/fs/dcache.c index ae6ebb8..f750be2 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2574,7 +2574,7 @@ static void __d_move(struct dentry * dentry, struct dentry * target) dentry_lock_for_move(dentry, target); write_seqcount_begin(&dentry->d_seq); - write_seqcount_begin(&target->d_seq); + write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ @@ -2706,7 +2706,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) dentry_lock_for_move(anon, dentry); write_seqcount_begin(&dentry->d_seq); - write_seqcount_begin(&anon->d_seq); + write_seqcount_begin_nested(&anon->d_seq, DENTRY_D_LOCK_NESTED); dparent = dentry->d_parent; diff --git a/fs/fs_struct.c b/fs/fs_struct.c index d8ac61d..7dca743 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -161,6 +161,6 @@ EXPORT_SYMBOL(current_umask); struct fs_struct init_fs = { .users = 1, .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), - .seq = SEQCNT_ZERO, + .seq = SEQCNT_ZERO(init_fs.seq), .umask = 0022, }; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 5cd0f09..b0ed422 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -32,10 +32,10 @@ extern struct fs_struct init_fs; #endif #ifdef CONFIG_CPUSETS -#define INIT_CPUSET_SEQ \ - .mems_allowed_seq = SEQCNT_ZERO, +#define INIT_CPUSET_SEQ(tsk) \ + .mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq), #else -#define INIT_CPUSET_SEQ +#define INIT_CPUSET_SEQ(tsk) #endif #define INIT_SIGNALS(sig) { \ @@ -220,7 +220,7 @@ extern struct task_group root_task_group; INIT_FTRACE_GRAPH \ INIT_TRACE_RECURSION \ INIT_TASK_RCU_PREEMPT(tsk) \ - INIT_CPUSET_SEQ \ + INIT_CPUSET_SEQ(tsk) \ INIT_VTIME(tsk) \ } diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index cfc2f11..92b1bfc 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -497,6 +497,10 @@ static inline void print_irqtrace_events(struct task_struct *curr) #define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) #define rwlock_release(l, n, i) lock_release(l, n, i) +#define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) +#define seqcount_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) +#define seqcount_release(l, n, i) lock_release(l, n, i) + #define mutex_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define mutex_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) #define mutex_release(l, n, i) lock_release(l, n, i) @@ -504,11 +508,11 @@ static inline void print_irqtrace_events(struct task_struct *curr) #define rwsem_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define rwsem_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) #define rwsem_acquire_read(l, s, t, i) lock_acquire_shared(l, s, t, NULL, i) -# define rwsem_release(l, n, i) lock_release(l, n, i) +#define rwsem_release(l, n, i) lock_release(l, n, i) #define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_) #define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_) -# define lock_map_release(l) lock_release(l, 1, _THIS_IP_) +#define lock_map_release(l) lock_release(l, 1, _THIS_IP_) #ifdef CONFIG_PROVE_LOCKING # define might_lock(lock) \ diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 21a2093..1e8a8b6 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -34,6 +34,7 @@ #include #include +#include #include /* @@ -44,10 +45,50 @@ */ typedef struct seqcount { unsigned sequence; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } seqcount_t; -#define SEQCNT_ZERO { 0 } -#define seqcount_init(x) do { *(x) = (seqcount_t) SEQCNT_ZERO; } while (0) +static inline void __seqcount_init(seqcount_t *s, const char *name, + struct lock_class_key *key) +{ + /* + * Make sure we are not reinitializing a held lock: + */ + lockdep_init_map(&s->dep_map, name, key, 0); + s->sequence = 0; +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define SEQCOUNT_DEP_MAP_INIT(lockname) \ + .dep_map = { .name = #lockname } \ + +# define seqcount_init(s) \ + do { \ + static struct lock_class_key __key; \ + __seqcount_init((s), #s, &__key); \ + } while (0) + +static inline void seqcount_lockdep_reader_access(const seqcount_t *s) +{ + seqcount_t *l = (seqcount_t *)s; + unsigned long flags; + + local_irq_save(flags); + seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); + seqcount_release(&l->dep_map, 1, _RET_IP_); + local_irq_restore(flags); +} + +#else +# define SEQCOUNT_DEP_MAP_INIT(lockname) +# define seqcount_init(s) __seqcount_init(s, NULL, NULL) +# define seqcount_lockdep_reader_access(x) +#endif + +#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)} + /** * __read_seqcount_begin - begin a seq-read critical section (without barrier) @@ -76,6 +117,22 @@ repeat: } /** + * read_seqcount_begin_no_lockdep - start seq-read critical section w/o lockdep + * @s: pointer to seqcount_t + * Returns: count to be passed to read_seqcount_retry + * + * read_seqcount_begin_no_lockdep opens a read critical section of the given + * seqcount, but without any lockdep checking. Validity of the critical + * section is tested by checking read_seqcount_retry function. + */ +static inline unsigned read_seqcount_begin_no_lockdep(const seqcount_t *s) +{ + unsigned ret = __read_seqcount_begin(s); + smp_rmb(); + return ret; +} + +/** * read_seqcount_begin - begin a seq-read critical section * @s: pointer to seqcount_t * Returns: count to be passed to read_seqcount_retry @@ -86,9 +143,8 @@ repeat: */ static inline unsigned read_seqcount_begin(const seqcount_t *s) { - unsigned ret = __read_seqcount_begin(s); - smp_rmb(); - return ret; + seqcount_lockdep_reader_access(s); + return read_seqcount_begin_no_lockdep(s); } /** @@ -108,6 +164,8 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s) static inline unsigned raw_seqcount_begin(const seqcount_t *s) { unsigned ret = ACCESS_ONCE(s->sequence); + + seqcount_lockdep_reader_access(s); smp_rmb(); return ret & ~1; } @@ -152,14 +210,21 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) * Sequence counter only version assumes that callers are using their * own mutexing. */ -static inline void write_seqcount_begin(seqcount_t *s) +static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) { s->sequence++; smp_wmb(); + seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); +} + +static inline void write_seqcount_begin(seqcount_t *s) +{ + write_seqcount_begin_nested(s, 0); } static inline void write_seqcount_end(seqcount_t *s) { + seqcount_release(&s->dep_map, 1, _RET_IP_); smp_wmb(); s->sequence++; } @@ -188,7 +253,7 @@ typedef struct { */ #define __SEQLOCK_UNLOCKED(lockname) \ { \ - .seqcount = SEQCNT_ZERO, \ + .seqcount = SEQCNT_ZERO(lockname), \ .lock = __SPIN_LOCK_UNLOCKED(lockname) \ } diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 28fe26b..d8d9fe3 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -26,7 +26,7 @@ * of ZERO_PAGE(), such as /dev/zero */ static DEFINE_MUTEX(xip_sparse_mutex); -static seqcount_t xip_sparse_seq = SEQCNT_ZERO; +static seqcount_t xip_sparse_seq = SEQCNT_ZERO(xip_sparse_seq); static struct page *__xip_sparse_page; /* called under xip_sparse_mutex */ -- cgit v1.1 From db751fe3ea6880ff5ac5abe60cb7b80deb5a4140 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:52:00 -0700 Subject: cpuset: Fix potential deadlock w/ set_mems_allowed After adding lockdep support to seqlock/seqcount structures, I started seeing the following warning: [ 1.070907] ====================================================== [ 1.072015] [ INFO: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected ] [ 1.073181] 3.11.0+ #67 Not tainted [ 1.073801] ------------------------------------------------------ [ 1.074882] kworker/u4:2/708 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: [ 1.076088] (&p->mems_allowed_seq){+.+...}, at: [] new_slab+0x5f/0x280 [ 1.077572] [ 1.077572] and this task is already holding: [ 1.078593] (&(&q->__queue_lock)->rlock){..-...}, at: [] blk_execute_rq_nowait+0x53/0xf0 [ 1.080042] which would create a new lock dependency: [ 1.080042] (&(&q->__queue_lock)->rlock){..-...} -> (&p->mems_allowed_seq){+.+...} [ 1.080042] [ 1.080042] but this new dependency connects a SOFTIRQ-irq-safe lock: [ 1.080042] (&(&q->__queue_lock)->rlock){..-...} [ 1.080042] ... which became SOFTIRQ-irq-safe at: [ 1.080042] [] __lock_acquire+0x5b9/0x1db0 [ 1.080042] [] lock_acquire+0x95/0x130 [ 1.080042] [] _raw_spin_lock+0x41/0x80 [ 1.080042] [] scsi_device_unbusy+0x7e/0xd0 [ 1.080042] [] scsi_finish_command+0x32/0xf0 [ 1.080042] [] scsi_softirq_done+0xa1/0x130 [ 1.080042] [] blk_done_softirq+0x73/0x90 [ 1.080042] [] __do_softirq+0x110/0x2f0 [ 1.080042] [] run_ksoftirqd+0x2d/0x60 [ 1.080042] [] smpboot_thread_fn+0x156/0x1e0 [ 1.080042] [] kthread+0xd6/0xe0 [ 1.080042] [] ret_from_fork+0x7c/0xb0 [ 1.080042] [ 1.080042] to a SOFTIRQ-irq-unsafe lock: [ 1.080042] (&p->mems_allowed_seq){+.+...} [ 1.080042] ... which became SOFTIRQ-irq-unsafe at: [ 1.080042] ... [] __lock_acquire+0x613/0x1db0 [ 1.080042] [] lock_acquire+0x95/0x130 [ 1.080042] [] kthreadd+0x82/0x180 [ 1.080042] [] ret_from_fork+0x7c/0xb0 [ 1.080042] [ 1.080042] other info that might help us debug this: [ 1.080042] [ 1.080042] Possible interrupt unsafe locking scenario: [ 1.080042] [ 1.080042] CPU0 CPU1 [ 1.080042] ---- ---- [ 1.080042] lock(&p->mems_allowed_seq); [ 1.080042] local_irq_disable(); [ 1.080042] lock(&(&q->__queue_lock)->rlock); [ 1.080042] lock(&p->mems_allowed_seq); [ 1.080042] [ 1.080042] lock(&(&q->__queue_lock)->rlock); [ 1.080042] [ 1.080042] *** DEADLOCK *** The issue stems from the kthreadd() function calling set_mems_allowed with irqs enabled. While its possibly unlikely for the actual deadlock to trigger, a fix is fairly simple: disable irqs before taking the mems_allowed_seq lock. Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra Acked-by: Li Zefan Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: "David S. Miller" Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-4-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/cpuset.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index cc1b01c..3fe661f 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -110,10 +110,14 @@ static inline bool put_mems_allowed(unsigned int seq) static inline void set_mems_allowed(nodemask_t nodemask) { + unsigned long flags; + task_lock(current); + local_irq_save(flags); write_seqcount_begin(¤t->mems_allowed_seq); current->mems_allowed = nodemask; write_seqcount_end(¤t->mems_allowed_seq); + local_irq_restore(flags); task_unlock(current); } -- cgit v1.1 From 5ac68e7c34a4797aa4ca9615e5a6603bda1abe9b Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:52:01 -0700 Subject: ipv6: Fix possible ipv6 seqlock deadlock While enabling lockdep on seqlocks, I ran across the warning below caused by the ipv6 stats being updated in both irq and non-irq context. This patch changes from IP6_INC_STATS_BH to IP6_INC_STATS (suggested by Eric Dumazet) to resolve this problem. [ 11.120383] ================================= [ 11.121024] [ INFO: inconsistent lock state ] [ 11.121663] 3.12.0-rc1+ #68 Not tainted [ 11.122229] --------------------------------- [ 11.122867] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 11.123741] init/4483 [HC0[0]:SC1[3]:HE1:SE0] takes: [ 11.124505] (&stats->syncp.seq#6){+.?...}, at: [] ndisc_send_ns+0xe2/0x130 [ 11.125736] {SOFTIRQ-ON-W} state was registered at: [ 11.126447] [] __lock_acquire+0x5c7/0x1af0 [ 11.127222] [] lock_acquire+0x96/0xd0 [ 11.127925] [] write_seqcount_begin+0x33/0x40 [ 11.128766] [] ip6_dst_lookup_tail+0x3a3/0x460 [ 11.129582] [] ip6_dst_lookup_flow+0x2e/0x80 [ 11.130014] [] ip6_datagram_connect+0x150/0x4e0 [ 11.130014] [] inet_dgram_connect+0x25/0x70 [ 11.130014] [] SYSC_connect+0xa1/0xc0 [ 11.130014] [] SyS_connect+0x11/0x20 [ 11.130014] [] SyS_socketcall+0x12b/0x300 [ 11.130014] [] syscall_call+0x7/0xb [ 11.130014] irq event stamp: 1184 [ 11.130014] hardirqs last enabled at (1184): [] local_bh_enable+0x71/0x110 [ 11.130014] hardirqs last disabled at (1183): [] local_bh_enable+0x3d/0x110 [ 11.130014] softirqs last enabled at (0): [] copy_process.part.42+0x45d/0x11a0 [ 11.130014] softirqs last disabled at (1147): [] irq_exit+0xa5/0xb0 [ 11.130014] [ 11.130014] other info that might help us debug this: [ 11.130014] Possible unsafe locking scenario: [ 11.130014] [ 11.130014] CPU0 [ 11.130014] ---- [ 11.130014] lock(&stats->syncp.seq#6); [ 11.130014] [ 11.130014] lock(&stats->syncp.seq#6); [ 11.130014] [ 11.130014] *** DEADLOCK *** [ 11.130014] [ 11.130014] 3 locks held by init/4483: [ 11.130014] #0: (rcu_read_lock){.+.+..}, at: [] SyS_setpriority+0x4c/0x620 [ 11.130014] #1: (((&ifa->dad_timer))){+.-...}, at: [] call_timer_fn+0x0/0xf0 [ 11.130014] #2: (rcu_read_lock){.+.+..}, at: [] ndisc_send_skb+0x54/0x5d0 [ 11.130014] [ 11.130014] stack backtrace: [ 11.130014] CPU: 0 PID: 4483 Comm: init Not tainted 3.12.0-rc1+ #68 [ 11.130014] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 11.130014] 00000000 00000000 c55e5c10 c1bb0e71 c57128b0 c55e5c4c c1badf79 c1ec1123 [ 11.130014] c1ec1484 00001183 00000000 00000000 00000001 00000003 00000001 00000000 [ 11.130014] c1ec1484 00000004 c5712dcc 00000000 c55e5c84 c10de492 00000004 c10755f2 [ 11.130014] Call Trace: [ 11.130014] [] dump_stack+0x4b/0x66 [ 11.130014] [] print_usage_bug+0x1d3/0x1dd [ 11.130014] [] mark_lock+0x282/0x2f0 [ 11.130014] [] ? kvm_clock_read+0x22/0x30 [ 11.130014] [] ? check_usage_backwards+0x150/0x150 [ 11.130014] [] __lock_acquire+0x584/0x1af0 [ 11.130014] [] ? sched_clock_cpu+0xef/0x190 [ 11.130014] [] ? mark_held_locks+0x8c/0xf0 [ 11.130014] [] lock_acquire+0x96/0xd0 [ 11.130014] [] ? ndisc_send_ns+0xe2/0x130 [ 11.130014] [] ndisc_send_skb+0x293/0x5d0 [ 11.130014] [] ? ndisc_send_ns+0xe2/0x130 [ 11.130014] [] ndisc_send_ns+0xe2/0x130 [ 11.130014] [] ? mod_timer+0xf2/0x160 [ 11.130014] [] ? addrconf_dad_timer+0xce/0x150 [ 11.130014] [] addrconf_dad_timer+0x10a/0x150 [ 11.130014] [] ? addrconf_dad_completed+0x1c0/0x1c0 [ 11.130014] [] call_timer_fn+0x73/0xf0 [ 11.130014] [] ? __internal_add_timer+0xb0/0xb0 [ 11.130014] [] ? addrconf_dad_completed+0x1c0/0x1c0 [ 11.130014] [] run_timer_softirq+0x141/0x1e0 [ 11.130014] [] ? __do_softirq+0x70/0x1b0 [ 11.130014] [] __do_softirq+0xc0/0x1b0 [ 11.130014] [] irq_exit+0xa5/0xb0 [ 11.130014] [] smp_apic_timer_interrupt+0x35/0x50 [ 11.130014] [] apic_timer_interrupt+0x32/0x38 [ 11.130014] [] ? SyS_setpriority+0xfd/0x620 [ 11.130014] [] ? lock_release+0x9/0x240 [ 11.130014] [] ? SyS_setpriority+0xe7/0x620 [ 11.130014] [] ? _raw_read_unlock+0x1d/0x30 [ 11.130014] [] SyS_setpriority+0x111/0x620 [ 11.130014] [] ? SyS_setpriority+0x4c/0x620 [ 11.130014] [] syscall_call+0x7/0xb Signed-off-by: John Stultz Acked-by: Eric Dumazet Signed-off-by: Peter Zijlstra Cc: Alexey Kuznetsov Cc: "David S. Miller" Cc: Hideaki YOSHIFUJI Cc: James Morris Cc: Mathieu Desnoyers Cc: Patrick McHardy Cc: Steven Rostedt Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-5-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 91fb4e8..ad138b1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -909,7 +909,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, out_err_release: if (err == -ENETUNREACH) - IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); dst_release(*dst); *dst = NULL; return err; -- cgit v1.1 From 67a6de49bf545c34eb8dee99abbb92d9ea268200 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Nov 2013 08:26:39 +0100 Subject: locking/doc: Update references to kernel/mutex.c Fix this docbook error: >> docproc: kernel/mutex.c: No such file or directory by updating the stale references to kernel/mutex.c. Reported-by: fengguang.wu@intel.com Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-34pikw1tlsskj65rrt5iusrq@git.kernel.org Signed-off-by: Ingo Molnar --- Documentation/DocBook/kernel-locking.tmpl | 2 +- Documentation/mutex-design.txt | 10 +++++----- include/linux/mutex.h | 2 +- kernel/locking/mutex.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 09e884e..19f2a5a 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -1958,7 +1958,7 @@ machines due to caching. Mutex API reference !Iinclude/linux/mutex.h -!Ekernel/mutex.c +!Ekernel/locking/mutex.c diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt index 38c10fd..1dfe62c 100644 --- a/Documentation/mutex-design.txt +++ b/Documentation/mutex-design.txt @@ -116,11 +116,11 @@ using mutexes at the moment, please let me know if you find any. ] Implementation of mutexes ------------------------- -'struct mutex' is the new mutex type, defined in include/linux/mutex.h -and implemented in kernel/mutex.c. It is a counter-based mutex with a -spinlock and a wait-list. The counter has 3 states: 1 for "unlocked", -0 for "locked" and negative numbers (usually -1) for "locked, potential -waiters queued". +'struct mutex' is the new mutex type, defined in include/linux/mutex.h and +implemented in kernel/locking/mutex.c. It is a counter-based mutex with a +spinlock and a wait-list. The counter has 3 states: 1 for "unlocked", 0 for +"locked" and negative numbers (usually -1) for "locked, potential waiters +queued". the APIs of 'struct mutex' have been streamlined: diff --git a/include/linux/mutex.h b/include/linux/mutex.h index bab49da..d318193 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -131,7 +131,7 @@ static inline int mutex_is_locked(struct mutex *lock) } /* - * See kernel/mutex.c for detailed documentation of these APIs. + * See kernel/locking/mutex.c for detailed documentation of these APIs. * Also see Documentation/mutex-design.txt. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index d24105b..4dd6e4c 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -1,5 +1,5 @@ /* - * kernel/mutex.c + * kernel/locking/mutex.c * * Mutexes: blocking mutual exclusion locks * -- cgit v1.1 From 838cc7b488f89ee642fd8336e8e1b620c8c3ece2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Nov 2013 17:42:30 +0100 Subject: lockdep/proc: Fix lock-time avg computation > kernel/locking/lockdep_proc.c: In function 'seq_lock_time': > >> kernel/locking/lockdep_proc.c:424:23: warning: comparison of distinct pointer types lacks a cast [enabled by default] > > 418 static void seq_lock_time(struct seq_file *m, struct lock_time *lt) > 419 { > 420 seq_printf(m, "%14lu", lt->nr); > 421 seq_time(m, lt->min); > 422 seq_time(m, lt->max); > 423 seq_time(m, lt->total); > > 424 seq_time(m, lt->nr ? do_div(lt->total, lt->nr) : 0); > 425 } My compiler refuses to actually say that; but it looks wrong in that do_div() returns the remainder, not the divisor. Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Peter Zijlstra Cc: Davidlohr Bueso Link: http://lkml.kernel.org/r/20131106164230.GE16117@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/locking/lockdep_proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index 0922065..ef43ac4 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -421,7 +421,7 @@ static void seq_lock_time(struct seq_file *m, struct lock_time *lt) seq_time(m, lt->min); seq_time(m, lt->max); seq_time(m, lt->total); - seq_time(m, lt->nr ? do_div(lt->total, lt->nr) : 0); + seq_time(m, lt->nr ? div_s64(lt->total, lt->nr) : 0); } static void seq_stats(struct seq_file *m, struct lock_stat_data *data) -- cgit v1.1 From 5216d530bbd8581c927b250a11533c2a31b57510 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Sat, 9 Nov 2013 00:55:35 +0800 Subject: locking/lockdep: Mark __lockdep_count_forward_deps() as static There are new Sparse warnings: >> kernel/locking/lockdep.c:1235:15: sparse: symbol '__lockdep_count_forward_deps' was not declared. Should it be static? >> kernel/locking/lockdep.c:1261:15: sparse: symbol '__lockdep_count_backward_deps' was not declared. Should it be static? Please consider folding the attached diff :-) Signed-off-by: Fengguang Wu Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/527d1787.ThzXGoUspZWehFDl\%fengguang.wu@intel.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 4e8e14c..576ba75 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1232,7 +1232,7 @@ static int noop_count(struct lock_list *entry, void *data) return 0; } -unsigned long __lockdep_count_forward_deps(struct lock_list *this) +static unsigned long __lockdep_count_forward_deps(struct lock_list *this) { unsigned long count = 0; struct lock_list *uninitialized_var(target_entry); @@ -1258,7 +1258,7 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class) return ret; } -unsigned long __lockdep_count_backward_deps(struct lock_list *this) +static unsigned long __lockdep_count_backward_deps(struct lock_list *this) { unsigned long count = 0; struct lock_list *uninitialized_var(target_entry); -- cgit v1.1 From 90d3839b90fe379557dae4a44735a6af78f42885 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 Nov 2013 19:42:14 -0800 Subject: block: Use u64_stats_init() to initialize seqcounts Now that seqcounts are lockdep enabled objects, we need to explicitly initialize runtime allocated seqcounts so that lockdep can track them. Without this patch, Fengguang was seeing: [ 4.127282] INFO: trying to register non-static key. [ 4.128027] the code is fine but needs lockdep annotation. [ 4.128027] turning off the locking correctness validator. [ 4.128027] CPU: 0 PID: 96 Comm: kworker/u4:1 Not tainted 3.12.0-next-20131108-10601-gbad570d #2 [ 4.128027] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ ... ] [ 4.128027] Call Trace: [ 4.128027] [<7908e744>] ? console_unlock+0x353/0x380 [ 4.128027] [<79dc7cf2>] dump_stack+0x48/0x60 [ 4.128027] [<7908953e>] __lock_acquire.isra.26+0x7e3/0xceb [ 4.128027] [<7908a1c5>] lock_acquire+0x71/0x9a [ 4.128027] [<794079aa>] ? blk_throtl_bio+0x1c3/0x485 [ 4.128027] [<7940658b>] throtl_update_dispatch_stats+0x7c/0x153 [ 4.128027] [<794079aa>] ? blk_throtl_bio+0x1c3/0x485 [ 4.128027] [<794079aa>] blk_throtl_bio+0x1c3/0x485 ... Use u64_stats_init() for all affected data structures, which initializes the seqcount. Reported-and-Tested-by: Fengguang Wu Cc: Vivek Goyal Cc: Jens Axboe Signed-off-by: Peter Zijlstra [ Folded in another fix from the mailing list as well as a fix to that fix. Tweaked commit message. ] Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1384314134-6895-1-git-send-email-john.stultz@linaro.org [ So I actually think that the two SOBs from PeterZ are the right depiction of the patch route. ] Signed-off-by: Ingo Molnar --- block/blk-cgroup.h | 10 ++++++++++ block/blk-throttle.c | 10 ++++++++++ block/cfq-iosched.c | 25 +++++++++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index ae6969a..1610b22 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -402,6 +402,11 @@ struct request_list *__blk_queue_next_rl(struct request_list *rl, #define blk_queue_for_each_rl(rl, q) \ for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) +static inline void blkg_stat_init(struct blkg_stat *stat) +{ + u64_stats_init(&stat->syncp); +} + /** * blkg_stat_add - add a value to a blkg_stat * @stat: target blkg_stat @@ -458,6 +463,11 @@ static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) blkg_stat_add(to, blkg_stat_read(from)); } +static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat) +{ + u64_stats_init(&rwstat->syncp); +} + /** * blkg_rwstat_add - add a value to a blkg_rwstat * @rwstat: target blkg_rwstat diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 8331aba..0653404 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -256,6 +256,12 @@ static struct throtl_data *sq_to_td(struct throtl_service_queue *sq) } \ } while (0) +static void tg_stats_init(struct tg_stats_cpu *tg_stats) +{ + blkg_rwstat_init(&tg_stats->service_bytes); + blkg_rwstat_init(&tg_stats->serviced); +} + /* * Worker for allocating per cpu stat for tgs. This is scheduled on the * system_wq once there are some groups on the alloc_list waiting for @@ -269,12 +275,16 @@ static void tg_stats_alloc_fn(struct work_struct *work) alloc_stats: if (!stats_cpu) { + int cpu; + stats_cpu = alloc_percpu(struct tg_stats_cpu); if (!stats_cpu) { /* allocation failed, try again after some time */ schedule_delayed_work(dwork, msecs_to_jiffies(10)); return; } + for_each_possible_cpu(cpu) + tg_stats_init(per_cpu_ptr(stats_cpu, cpu)); } spin_lock_irq(&tg_stats_alloc_lock); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 434944c..4d5cec1 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1508,6 +1508,29 @@ static void cfq_init_cfqg_base(struct cfq_group *cfqg) } #ifdef CONFIG_CFQ_GROUP_IOSCHED +static void cfqg_stats_init(struct cfqg_stats *stats) +{ + blkg_rwstat_init(&stats->service_bytes); + blkg_rwstat_init(&stats->serviced); + blkg_rwstat_init(&stats->merged); + blkg_rwstat_init(&stats->service_time); + blkg_rwstat_init(&stats->wait_time); + blkg_rwstat_init(&stats->queued); + + blkg_stat_init(&stats->sectors); + blkg_stat_init(&stats->time); + +#ifdef CONFIG_DEBUG_BLK_CGROUP + blkg_stat_init(&stats->unaccounted_time); + blkg_stat_init(&stats->avg_queue_size_sum); + blkg_stat_init(&stats->avg_queue_size_samples); + blkg_stat_init(&stats->dequeue); + blkg_stat_init(&stats->group_wait_time); + blkg_stat_init(&stats->idle_time); + blkg_stat_init(&stats->empty_time); +#endif +} + static void cfq_pd_init(struct blkcg_gq *blkg) { struct cfq_group *cfqg = blkg_to_cfqg(blkg); @@ -1515,6 +1538,8 @@ static void cfq_pd_init(struct blkcg_gq *blkg) cfq_init_cfqg_base(cfqg); cfqg->weight = blkg->blkcg->cfq_weight; cfqg->leaf_weight = blkg->blkcg->cfq_leaf_weight; + cfqg_stats_init(&cfqg->stats); + cfqg_stats_init(&cfqg->dead_stats); } static void cfq_pd_offline(struct blkcg_gq *blkg) -- cgit v1.1