From bb34d92f643086d546b49cef680f6f305ed84414 Mon Sep 17 00:00:00 2001 From: Frank Mayhar Date: Fri, 12 Sep 2008 09:54:39 -0700 Subject: timers: fix itimer/many thread hang, v2 This is the second resubmission of the posix timer rework patch, posted a few days ago. This includes the changes from the previous resubmittion, which addressed Oleg Nesterov's comments, removing the RCU stuff from the patch and un-inlining the thread_group_cputime() function for SMP. In addition, per Ingo Molnar it simplifies the UP code, consolidating much of it with the SMP version and depending on lower-level SMP/UP handling to take care of the differences. It also cleans up some UP compile errors, moves the scheduler stats-related macros into kernel/sched_stats.h, cleans up a merge error in kernel/fork.c and has a few other minor fixes and cleanups as suggested by Oleg and Ingo. Thanks for the review, guys. Signed-off-by: Frank Mayhar Cc: Roland McGrath Cc: Alexey Dobriyan Cc: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/sched_stats.h | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) (limited to 'kernel/sched_stats.h') diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 8385d43..d6903bd 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -270,3 +270,139 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) #define sched_info_switch(t, next) do { } while (0) #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ +/* + * The following are functions that support scheduler-internal time accounting. + * These functions are generally called at the timer tick. None of this depends + * on CONFIG_SCHEDSTATS. + */ + +#ifdef CONFIG_SMP + +/** + * thread_group_cputime_account_user - Maintain utime for a thread group. + * + * @tgtimes: Pointer to thread_group_cputime structure. + * @cputime: Time value by which to increment the utime field of that + * structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the utime field there. + */ +static inline void thread_group_cputime_account_user( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + if (tgtimes->totals) { + struct task_cputime *times; + + times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times->utime = cputime_add(times->utime, cputime); + put_cpu_no_resched(); + } +} + +/** + * thread_group_cputime_account_system - Maintain stime for a thread group. + * + * @tgtimes: Pointer to thread_group_cputime structure. + * @cputime: Time value by which to increment the stime field of that + * structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the stime field there. + */ +static inline void thread_group_cputime_account_system( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + if (tgtimes->totals) { + struct task_cputime *times; + + times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times->stime = cputime_add(times->stime, cputime); + put_cpu_no_resched(); + } +} + +/** + * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a + * thread group. + * + * @tgtimes: Pointer to thread_group_cputime structure. + * @ns: Time value by which to increment the sum_exec_runtime field + * of that structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the sum_exec_runtime field there. + */ +static inline void thread_group_cputime_account_exec_runtime( + struct thread_group_cputime *tgtimes, + unsigned long long ns) +{ + if (tgtimes->totals) { + struct task_cputime *times; + + times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times->sum_exec_runtime += ns; + put_cpu_no_resched(); + } +} + +#else /* CONFIG_SMP */ + +static inline void thread_group_cputime_account_user( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime); +} + +static inline void thread_group_cputime_account_system( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime); +} + +static inline void thread_group_cputime_account_exec_runtime( + struct thread_group_cputime *tgtimes, + unsigned long long ns) +{ + tgtimes->totals->sum_exec_runtime += ns; +} + +#endif /* CONFIG_SMP */ + +/* + * These are the generic time-accounting routines that use the above + * functions. They are the functions actually called by the scheduler. + */ +static inline void account_group_user_time(struct task_struct *tsk, + cputime_t cputime) +{ + struct signal_struct *sig; + + sig = tsk->signal; + if (likely(sig)) + thread_group_cputime_account_user(&sig->cputime, cputime); +} + +static inline void account_group_system_time(struct task_struct *tsk, + cputime_t cputime) +{ + struct signal_struct *sig; + + sig = tsk->signal; + if (likely(sig)) + thread_group_cputime_account_system(&sig->cputime, cputime); +} + +static inline void account_group_exec_runtime(struct task_struct *tsk, + unsigned long long ns) +{ + struct signal_struct *sig; + + sig = tsk->signal; + if (likely(sig)) + thread_group_cputime_account_exec_runtime(&sig->cputime, ns); +} -- cgit v1.1 From 7086efe1c1536f6bc160e7d60a9bfd645b91f279 Mon Sep 17 00:00:00 2001 From: Frank Mayhar Date: Fri, 12 Sep 2008 09:54:39 -0700 Subject: timers: fix itimer/many thread hang, v3 - fix UP lockup - another set of UP/SMP cleanups and simplifications Signed-off-by: Frank Mayhar Signed-off-by: Ingo Molnar --- kernel/sched_stats.h | 126 ++++++++++++++++----------------------------------- 1 file changed, 38 insertions(+), 88 deletions(-) (limited to 'kernel/sched_stats.h') diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index d6903bd..b8c1569 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -276,133 +276,83 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) * on CONFIG_SCHEDSTATS. */ -#ifdef CONFIG_SMP - /** - * thread_group_cputime_account_user - Maintain utime for a thread group. + * account_group_user_time - Maintain utime for a thread group. * - * @tgtimes: Pointer to thread_group_cputime structure. - * @cputime: Time value by which to increment the utime field of that - * structure. + * @tsk: Pointer to task structure. + * @cputime: Time value by which to increment the utime field of the + * thread_group_cputime structure. * * If thread group time is being maintained, get the structure for the * running CPU and update the utime field there. */ -static inline void thread_group_cputime_account_user( - struct thread_group_cputime *tgtimes, - cputime_t cputime) +static inline void account_group_user_time(struct task_struct *tsk, + cputime_t cputime) { - if (tgtimes->totals) { + struct signal_struct *sig; + + sig = tsk->signal; + if (unlikely(!sig)) + return; + if (sig->cputime.totals) { struct task_cputime *times; - times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times = per_cpu_ptr(sig->cputime.totals, get_cpu()); times->utime = cputime_add(times->utime, cputime); put_cpu_no_resched(); } } /** - * thread_group_cputime_account_system - Maintain stime for a thread group. + * account_group_system_time - Maintain stime for a thread group. * - * @tgtimes: Pointer to thread_group_cputime structure. - * @cputime: Time value by which to increment the stime field of that - * structure. + * @tsk: Pointer to task structure. + * @cputime: Time value by which to increment the stime field of the + * thread_group_cputime structure. * * If thread group time is being maintained, get the structure for the * running CPU and update the stime field there. */ -static inline void thread_group_cputime_account_system( - struct thread_group_cputime *tgtimes, - cputime_t cputime) +static inline void account_group_system_time(struct task_struct *tsk, + cputime_t cputime) { - if (tgtimes->totals) { + struct signal_struct *sig; + + sig = tsk->signal; + if (unlikely(!sig)) + return; + if (sig->cputime.totals) { struct task_cputime *times; - times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times = per_cpu_ptr(sig->cputime.totals, get_cpu()); times->stime = cputime_add(times->stime, cputime); put_cpu_no_resched(); } } /** - * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a - * thread group. + * account_group_exec_runtime - Maintain exec runtime for a thread group. * - * @tgtimes: Pointer to thread_group_cputime structure. + * @tsk: Pointer to task structure. * @ns: Time value by which to increment the sum_exec_runtime field - * of that structure. + * of the thread_group_cputime structure. * * If thread group time is being maintained, get the structure for the * running CPU and update the sum_exec_runtime field there. */ -static inline void thread_group_cputime_account_exec_runtime( - struct thread_group_cputime *tgtimes, - unsigned long long ns) +static inline void account_group_exec_runtime(struct task_struct *tsk, + unsigned long long ns) { - if (tgtimes->totals) { + struct signal_struct *sig; + + sig = tsk->signal; + if (unlikely(!sig)) + return; + if (sig->cputime.totals) { struct task_cputime *times; - times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times = per_cpu_ptr(sig->cputime.totals, get_cpu()); times->sum_exec_runtime += ns; put_cpu_no_resched(); } } - -#else /* CONFIG_SMP */ - -static inline void thread_group_cputime_account_user( - struct thread_group_cputime *tgtimes, - cputime_t cputime) -{ - tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime); -} - -static inline void thread_group_cputime_account_system( - struct thread_group_cputime *tgtimes, - cputime_t cputime) -{ - tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime); -} - -static inline void thread_group_cputime_account_exec_runtime( - struct thread_group_cputime *tgtimes, - unsigned long long ns) -{ - tgtimes->totals->sum_exec_runtime += ns; -} - -#endif /* CONFIG_SMP */ - -/* - * These are the generic time-accounting routines that use the above - * functions. They are the functions actually called by the scheduler. - */ -static inline void account_group_user_time(struct task_struct *tsk, - cputime_t cputime) -{ - struct signal_struct *sig; - - sig = tsk->signal; - if (likely(sig)) - thread_group_cputime_account_user(&sig->cputime, cputime); -} - -static inline void account_group_system_time(struct task_struct *tsk, - cputime_t cputime) -{ - struct signal_struct *sig; - - sig = tsk->signal; - if (likely(sig)) - thread_group_cputime_account_system(&sig->cputime, cputime); -} - -static inline void account_group_exec_runtime(struct task_struct *tsk, - unsigned long long ns) -{ - struct signal_struct *sig; - - sig = tsk->signal; - if (likely(sig)) - thread_group_cputime_account_exec_runtime(&sig->cputime, ns); -} -- cgit v1.1 From c851c8676bd7ae456e9b3af8e6bb2c434eddcc75 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 17 Oct 2008 18:17:46 +0800 Subject: sched: fix the wrong mask_len If NR_CPUS isn't a multiple of 32, we get a truncated string of sched domains by catting /proc/schedstat. This is caused by the wrong mask_len. This patch fixes it. Signed-off-by: Miao Xie Cc: Signed-off-by: Ingo Molnar --- kernel/sched_stats.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched_stats.h') diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 8385d43..81365b3 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -9,7 +9,7 @@ static int show_schedstat(struct seq_file *seq, void *v) { int cpu; - int mask_len = NR_CPUS/32 * 9; + int mask_len = (NR_CPUS/32 + 1) * 9; char *mask_str = kmalloc(mask_len, GFP_KERNEL); if (mask_str == NULL) -- cgit v1.1 From b968905292eaa52b25abb7b3e6c0841dac9f03ae Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Oct 2008 12:58:19 +0200 Subject: sched: fix the wrong mask_len, cleanup Clean up the division in show_schedstat(). Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_stats.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched_stats.h') diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 81365b3..6757925 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -9,7 +9,7 @@ static int show_schedstat(struct seq_file *seq, void *v) { int cpu; - int mask_len = (NR_CPUS/32 + 1) * 9; + int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9; char *mask_str = kmalloc(mask_len, GFP_KERNEL); if (mask_str == NULL) -- cgit v1.1 From b5aadf7f14c1acc94956aa257e018e9de3881f41 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Oct 2008 13:23:43 +0400 Subject: proc: move /proc/schedstat boilerplate to kernel/sched_stats.h Signed-off-by: Alexey Dobriyan --- kernel/sched_stats.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel/sched_stats.h') diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index b8c1569..3d14ce2 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -90,13 +90,20 @@ static int schedstat_open(struct inode *inode, struct file *file) return res; } -const struct file_operations proc_schedstat_operations = { +static const struct file_operations proc_schedstat_operations = { .open = schedstat_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; +static int __init proc_schedstat_init(void) +{ + proc_create("schedstat", 0, NULL, &proc_schedstat_operations); + return 0; +} +module_init(proc_schedstat_init); + /* * Expects runqueue lock to be held for atomicity of update */ -- cgit v1.1 From ad133ba3dc283300e5b62b5b7211d2f39fbf6ee7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Nov 2008 15:39:47 +0100 Subject: sched, signals: fix the racy usage of ->signal in account_group_xxx/run_posix_cpu_timers Impact: fix potential NULL dereference Contrary to ad474caca3e2a0550b7ce0706527ad5ab389a4d4 changelog, other acct_group_xxx() helpers can be called after exit_notify() by timer tick. Thanks to Roland for pointing out this. Somehow I missed this simple fact when I read the original patch, and I am afraid I confused Frank during the discussion. Sorry. Fortunately, these helpers work with current, we can check ->exit_state to ensure that ->signal can't go away under us. Also, add the comment and compiler barrier to account_group_exec_runtime(), to make sure we load ->signal only once. Signed-off-by: Oleg Nesterov Signed-off-by: Ingo Molnar --- kernel/sched_stats.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'kernel/sched_stats.h') diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index ee71bec..7dbf72a 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -298,9 +298,11 @@ static inline void account_group_user_time(struct task_struct *tsk, { struct signal_struct *sig; - sig = tsk->signal; - if (unlikely(!sig)) + /* tsk == current, ensure it is safe to use ->signal */ + if (unlikely(tsk->exit_state)) return; + + sig = tsk->signal; if (sig->cputime.totals) { struct task_cputime *times; @@ -325,9 +327,11 @@ static inline void account_group_system_time(struct task_struct *tsk, { struct signal_struct *sig; - sig = tsk->signal; - if (unlikely(!sig)) + /* tsk == current, ensure it is safe to use ->signal */ + if (unlikely(tsk->exit_state)) return; + + sig = tsk->signal; if (sig->cputime.totals) { struct task_cputime *times; @@ -353,8 +357,11 @@ static inline void account_group_exec_runtime(struct task_struct *tsk, struct signal_struct *sig; sig = tsk->signal; + /* see __exit_signal()->task_rq_unlock_wait() */ + barrier(); if (unlikely(!sig)) return; + if (sig->cputime.totals) { struct task_cputime *times; -- cgit v1.1