From c9d557c19f94df42db78d4a5de4d25feee694bad Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 7 Jan 2009 14:33:30 -0800 Subject: rcu: fix bug in rcutorture system-shutdown code This patch fixes an rcutorture bug found by Eric Sesterhenn that resulted in oopses in response to "rmmod rcutorture". The problem was in some new code that attempted to handle the case where a system is shut down while rcutorture is still running, for example, when rcutorture is built into the kernel so that it cannot be removed. The fix causes the rcutorture threads to "park" in an schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT) rather than trying to get them to terminate cleanly. Concurrent shutdown and rmmod is illegal. I believe that this is 2.6.29 material, as it is used in some testing setups. For reference, here are the rcutorture operating modes: CONFIG_RCU_TORTURE_TEST=m This is the normal rcutorture build. Use "modprobe rcutorture" (with optional arguments) to start, and "rmmod rcutorture" to stop. If you shut the system down without doing the rmmod, you should see console output like: rcutorture thread rcu_torture_writer parking due to system shutdown One for each rcutorture kthread. CONFIG_RCU_TORTURE_TEST=y CONFIG_RCU_TORTURE_TEST_RUNNABLE=n Use this if you want rcutorture built in, but don't want the test to start running during early boot. To start the torturing: echo 1 > /proc/sys/kernel/rcutorture_runnable To stop the torturing, s/1/0/ You will get "parking" console messages as noted above when you shut the system down. CONFIG_RCU_TORTURE_TEST=y CONFIG_RCU_TORTURE_TEST_RUNNABLE=y Same as above, except that the torturing starts during early boot. Only for the stout of heart and strong of stomach. The same /proc entry noted above may be used to control the test. Located-by: Eric Sesterhenn Tested-by: Eric Sesterhenn Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- kernel/rcutorture.c | 113 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 68 insertions(+), 45 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 1cff28d..7c4142a 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -136,29 +136,47 @@ static int stutter_pause_test = 0; #endif int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; -#define FULLSTOP_SHUTDOWN 1 /* Bail due to system shutdown/panic. */ -#define FULLSTOP_CLEANUP 2 /* Orderly shutdown. */ -static int fullstop; /* stop generating callbacks at test end. */ -DEFINE_MUTEX(fullstop_mutex); /* protect fullstop transitions and */ - /* spawning of kthreads. */ +/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ + +#define FULLSTOP_DONTSTOP 0 /* Normal operation. */ +#define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ +#define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ +static int fullstop = FULLSTOP_RMMOD; +DEFINE_MUTEX(fullstop_mutex); /* Protect fullstop transitions and spawning */ + /* of kthreads. */ /* - * Detect and respond to a signal-based shutdown. + * Detect and respond to a system shutdown. */ static int rcutorture_shutdown_notify(struct notifier_block *unused1, unsigned long unused2, void *unused3) { - if (fullstop) - return NOTIFY_DONE; mutex_lock(&fullstop_mutex); - if (!fullstop) + if (fullstop == FULLSTOP_DONTSTOP) fullstop = FULLSTOP_SHUTDOWN; + else + printk(KERN_WARNING /* but going down anyway, so... */ + "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); mutex_unlock(&fullstop_mutex); return NOTIFY_DONE; } /* + * Absorb kthreads into a kernel function that won't return, so that + * they won't ever access module text or data again. + */ +static void rcutorture_shutdown_absorb(char *title) +{ + if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { + printk(KERN_NOTICE + "rcutorture thread %s parking due to system shutdown\n", + title); + schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT); + } +} + +/* * Allocate an element from the rcu_tortures pool. */ static struct rcu_torture * @@ -219,13 +237,14 @@ rcu_random(struct rcu_random_state *rrsp) } static void -rcu_stutter_wait(void) +rcu_stutter_wait(char *title) { - while ((stutter_pause_test || !rcutorture_runnable) && !fullstop) { + while (stutter_pause_test || !rcutorture_runnable) { if (rcutorture_runnable) schedule_timeout_interruptible(1); else schedule_timeout_interruptible(round_jiffies_relative(HZ)); + rcutorture_shutdown_absorb(title); } } @@ -287,7 +306,7 @@ rcu_torture_cb(struct rcu_head *p) int i; struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu); - if (fullstop) { + if (fullstop != FULLSTOP_DONTSTOP) { /* Test is ending, just drop callbacks on the floor. */ /* The next initialization will pick up the pieces. */ return; @@ -619,10 +638,11 @@ rcu_torture_writer(void *arg) } rcu_torture_current_version++; oldbatch = cur_ops->completed(); - rcu_stutter_wait(); - } while (!kthread_should_stop() && !fullstop); + rcu_stutter_wait("rcu_torture_writer"); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping"); - while (!kthread_should_stop() && fullstop != FULLSTOP_SHUTDOWN) + rcutorture_shutdown_absorb("rcu_torture_writer"); + while (!kthread_should_stop()) schedule_timeout_uninterruptible(1); return 0; } @@ -643,11 +663,12 @@ rcu_torture_fakewriter(void *arg) schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); udelay(rcu_random(&rand) & 0x3ff); cur_ops->sync(); - rcu_stutter_wait(); - } while (!kthread_should_stop() && !fullstop); + rcu_stutter_wait("rcu_torture_fakewriter"); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping"); - while (!kthread_should_stop() && fullstop != FULLSTOP_SHUTDOWN) + rcutorture_shutdown_absorb("rcu_torture_fakewriter"); + while (!kthread_should_stop()) schedule_timeout_uninterruptible(1); return 0; } @@ -752,12 +773,13 @@ rcu_torture_reader(void *arg) preempt_enable(); cur_ops->readunlock(idx); schedule(); - rcu_stutter_wait(); - } while (!kthread_should_stop() && !fullstop); + rcu_stutter_wait("rcu_torture_reader"); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping"); + rcutorture_shutdown_absorb("rcu_torture_reader"); if (irqreader && cur_ops->irqcapable) del_timer_sync(&t); - while (!kthread_should_stop() && fullstop != FULLSTOP_SHUTDOWN) + while (!kthread_should_stop()) schedule_timeout_uninterruptible(1); return 0; } @@ -854,7 +876,8 @@ rcu_torture_stats(void *arg) do { schedule_timeout_interruptible(stat_interval * HZ); rcu_torture_stats_print(); - } while (!kthread_should_stop() && !fullstop); + rcutorture_shutdown_absorb("rcu_torture_stats"); + } while (!kthread_should_stop()); VERBOSE_PRINTK_STRING("rcu_torture_stats task stopping"); return 0; } @@ -866,52 +889,49 @@ static int rcu_idle_cpu; /* Force all torture tasks off this CPU */ */ static void rcu_torture_shuffle_tasks(void) { - cpumask_var_t tmp_mask; + cpumask_t tmp_mask; int i; - if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL)) - BUG(); - - cpumask_setall(tmp_mask); + cpus_setall(tmp_mask); get_online_cpus(); /* No point in shuffling if there is only one online CPU (ex: UP) */ - if (num_online_cpus() == 1) - goto out; + if (num_online_cpus() == 1) { + put_online_cpus(); + return; + } if (rcu_idle_cpu != -1) - cpumask_clear_cpu(rcu_idle_cpu, tmp_mask); + cpu_clear(rcu_idle_cpu, tmp_mask); - set_cpus_allowed_ptr(current, tmp_mask); + set_cpus_allowed_ptr(current, &tmp_mask); if (reader_tasks) { for (i = 0; i < nrealreaders; i++) if (reader_tasks[i]) set_cpus_allowed_ptr(reader_tasks[i], - tmp_mask); + &tmp_mask); } if (fakewriter_tasks) { for (i = 0; i < nfakewriters; i++) if (fakewriter_tasks[i]) set_cpus_allowed_ptr(fakewriter_tasks[i], - tmp_mask); + &tmp_mask); } if (writer_task) - set_cpus_allowed_ptr(writer_task, tmp_mask); + set_cpus_allowed_ptr(writer_task, &tmp_mask); if (stats_task) - set_cpus_allowed_ptr(stats_task, tmp_mask); + set_cpus_allowed_ptr(stats_task, &tmp_mask); if (rcu_idle_cpu == -1) rcu_idle_cpu = num_online_cpus() - 1; else rcu_idle_cpu--; -out: put_online_cpus(); - free_cpumask_var(tmp_mask); } /* Shuffle tasks across CPUs, with the intent of allowing each CPU in the @@ -925,7 +945,8 @@ rcu_torture_shuffle(void *arg) do { schedule_timeout_interruptible(shuffle_interval * HZ); rcu_torture_shuffle_tasks(); - } while (!kthread_should_stop() && !fullstop); + rcutorture_shutdown_absorb("rcu_torture_shuffle"); + } while (!kthread_should_stop()); VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping"); return 0; } @@ -940,10 +961,11 @@ rcu_torture_stutter(void *arg) do { schedule_timeout_interruptible(stutter * HZ); stutter_pause_test = 1; - if (!kthread_should_stop() && !fullstop) + if (!kthread_should_stop()) schedule_timeout_interruptible(stutter * HZ); stutter_pause_test = 0; - } while (!kthread_should_stop() && !fullstop); + rcutorture_shutdown_absorb("rcu_torture_stutter"); + } while (!kthread_should_stop()); VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping"); return 0; } @@ -970,15 +992,16 @@ rcu_torture_cleanup(void) int i; mutex_lock(&fullstop_mutex); - if (!fullstop) { - /* If being signaled, let it happen, then exit. */ + if (fullstop == FULLSTOP_SHUTDOWN) { + printk(KERN_WARNING /* but going down anyway, so... */ + "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); mutex_unlock(&fullstop_mutex); - schedule_timeout_interruptible(10 * HZ); + schedule_timeout_uninterruptible(10); if (cur_ops->cb_barrier != NULL) cur_ops->cb_barrier(); return; } - fullstop = FULLSTOP_CLEANUP; + fullstop = FULLSTOP_RMMOD; mutex_unlock(&fullstop_mutex); unregister_reboot_notifier(&rcutorture_nb); if (stutter_task) { @@ -1078,7 +1101,7 @@ rcu_torture_init(void) else nrealreaders = 2 * num_online_cpus(); rcu_torture_print_module_parms("Start of test"); - fullstop = 0; + fullstop = FULLSTOP_DONTSTOP; /* Set up the freelist. */ -- cgit v1.1 From 62ea9ceb17a74bc7544211bfeecf4170c554ac4f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sun, 11 Jan 2009 01:04:16 +0100 Subject: cpumask: fix CONFIG_NUMA=y sched.c Impact: fix panic on ia64 with NR_CPUS=1024 struct sched_domain is now a dangling structure; where we really want static ones, we need to use static_sched_domain. (As the FIXME in this file says, cpumask_var_t would be better, but this code is hairy enough without trying to add initialization code to the right places). Reported-by: Mike Travis Signed-off-by: Rusty Russell Signed-off-by: Ingo Molnar --- kernel/sched.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index deb5ac8..f0c0a81 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7282,10 +7282,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, * groups, so roll our own. Now each node has its own list of groups which * gets dynamically allocated. */ -static DEFINE_PER_CPU(struct sched_domain, node_domains); +static DEFINE_PER_CPU(struct static_sched_domain, node_domains); static struct sched_group ***sched_group_nodes_bycpu; -static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); +static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains); static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, @@ -7560,7 +7560,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_NUMA if (cpumask_weight(cpu_map) > SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) { - sd = &per_cpu(allnodes_domains, i); + sd = &per_cpu(allnodes_domains, i).sd; SD_INIT(sd, ALLNODES); set_domain_attribute(sd, attr); cpumask_copy(sched_domain_span(sd), cpu_map); @@ -7570,7 +7570,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, } else p = NULL; - sd = &per_cpu(node_domains, i); + sd = &per_cpu(node_domains, i).sd; SD_INIT(sd, NODE); set_domain_attribute(sd, attr); sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); @@ -7688,7 +7688,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, for_each_cpu(j, nodemask) { struct sched_domain *sd; - sd = &per_cpu(node_domains, j); + sd = &per_cpu(node_domains, j).sd; sd->groups = sg; } sg->__cpu_power = 0; -- cgit v1.1 From 805194c35b91999b139e4d6b6145f4f84fd4c814 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sat, 10 Jan 2009 15:43:15 +0800 Subject: sched: partly revert "sched debug: remove NULL checking in print_cfs_rt_rq()" Impact: avoid accessing NULL tg.css->cgroup In commit 0a0db8f5c9d4bbb9bbfcc2b6cb6bce2d0ef4d73d, I removed checking NULL tg.css->cgroup, but I realized I was wrong when I found reading /proc/sched_debug can race with cgroup_create(). Signed-off-by: Li Zefan Signed-off-by: Ingo Molnar --- kernel/sched_debug.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 4293cfa..16eeba4e 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -145,6 +145,19 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) read_unlock_irqrestore(&tasklist_lock, flags); } +#if defined(CONFIG_CGROUP_SCHED) && \ + (defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)) +static void task_group_path(struct task_group *tg, char *buf, int buflen) +{ + /* may be NULL if the underlying cgroup isn't fully-created yet */ + if (!tg->css.cgroup) { + buf[0] = '\0'; + return; + } + cgroup_path(tg->css.cgroup, buf, buflen); +} +#endif + void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) { s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, @@ -154,10 +167,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) unsigned long flags; #if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) - char path[128] = ""; + char path[128]; struct task_group *tg = cfs_rq->tg; - cgroup_path(tg->css.cgroup, path, sizeof(path)); + task_group_path(tg, path, sizeof(path)); SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); #elif defined(CONFIG_USER_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) @@ -208,10 +221,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) { #if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED) - char path[128] = ""; + char path[128]; struct task_group *tg = rt_rq->tg; - cgroup_path(tg->css.cgroup, path, sizeof(path)); + task_group_path(tg, path, sizeof(path)); SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path); #else -- cgit v1.1 From 53ce3d9564908794ae7dd32969089b57df5fc098 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 9 Jan 2009 12:27:08 -0800 Subject: smp_call_function_single(): be slightly less stupid If you do smp_call_function_single(expression-with-side-effects, ...) then expression-with-side-effects never gets evaluated on UP builds. As always, implementing it in C is the correct thing to do. While we're there, uninline it for size and possible header dependency reasons. And create a new kernel/up.c, as a place in which to put uniprocessor-specific code and storage. It should mirror kernel/smp.c. Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/Makefile | 6 +++++- kernel/up.c | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 kernel/up.c (limited to 'kernel') diff --git a/kernel/Makefile b/kernel/Makefile index 2921d90..2aebc4c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -40,7 +40,11 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o -obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o +ifeq ($(CONFIG_USE_GENERIC_SMP_HELPERS),y) +obj-y += smp.o +else +obj-y += up.o +endif obj-$(CONFIG_SMP) += spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_PROVE_LOCKING) += spinlock.o diff --git a/kernel/up.c b/kernel/up.c new file mode 100644 index 0000000..ce62cc9 --- /dev/null +++ b/kernel/up.c @@ -0,0 +1,18 @@ +/* + * Uniprocessor-only support functions. The counterpart to kernel/smp.c + */ + +#include +#include +#include + +int smp_call_function_single(int cpu, void (*func) (void *info), void *info, + int wait) +{ + WARN_ON(cpuid != 0); + local_irq_disable(); + (func)(info); + local_irq_enable(); + return 0; +} +EXPORT_SYMBOL(smp_call_function_single); -- cgit v1.1 From 93423b8665f43a0c7a006a1d5be048b99db56d32 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 11 Jan 2009 05:15:21 +0100 Subject: smp_call_function_single(): be slightly less stupid, fix Impact: build fix on Alpha kernel/up.c: In function 'smp_call_function_single': kernel/up.c:12: error: 'cpuid' undeclared (first use in this function) kernel/up.c:12: error: (Each undeclared identifier is reported only once kernel/up.c:12: error: for each function it appears in.) The typo didnt show up on x86 because 'cpuid' happens to be a function address as well ... Signed-off-by: Ingo Molnar --- kernel/up.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/up.c b/kernel/up.c index ce62cc9..c04b9dc 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -9,10 +9,12 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, int wait) { - WARN_ON(cpuid != 0); + WARN_ON(cpu != 0); + local_irq_disable(); (func)(info); local_irq_enable(); + return 0; } EXPORT_SYMBOL(smp_call_function_single); -- cgit v1.1 From fd2ab30b65e961b974ae0bc71e0d47d6b35e0968 Mon Sep 17 00:00:00 2001 From: Steven Noonan Date: Sun, 11 Jan 2009 01:04:22 -0800 Subject: kernel/sched.c: add missing forward declaration for 'double_rq_lock' Impact: build fix on certain configs Added 'double_rq_lock' forward declaration, allowing double_rq_lock to be used in _double_lock_balance(). Signed-off-by: Steven Noonan Signed-off-by: Ingo Molnar --- kernel/sched.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index f0c0a81..8be2c13 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -125,6 +125,9 @@ DEFINE_TRACE(sched_switch); DEFINE_TRACE(sched_migrate_task); #ifdef CONFIG_SMP + +static void double_rq_lock(struct rq *rq1, struct rq *rq2); + /* * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) * Since cpu_power is a 'constant', we can use a reciprocal divide. -- cgit v1.1 From 01e3eb82278bf45221fc38b391bc5ee0f6a314d6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 Jan 2009 13:00:50 +0100 Subject: Revert "sched: improve preempt debugging" This reverts commit 7317d7b87edb41a9135e30be1ec3f7ef817c53dd. This has been reported (and bisected) by Alexey Zaytsev and Kamalesh Babulal to produce annoying warnings during bootup on both x86 and powerpc. kernel_locked() is not a valid test in IRQ context (we update the BKL's ->lock_depth and the preempt count separately and non-atomicalyy), so we cannot put it into the generic preempt debugging checks which can run in IRQ contexts too. Reported-and-bisected-by: Alexey Zaytsev Reported-and-bisected-by: Kamalesh Babulal Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 8be2c13..3b630d8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4440,7 +4440,7 @@ void __kprobes sub_preempt_count(int val) /* * Underflow? */ - if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) + if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) return; /* * Is the spinlock portion underflowing? -- cgit v1.1 From 6e96281412f2f757abe623e08a9577e2bbd3402f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 Jan 2009 16:04:37 +0100 Subject: smp_call_function_single(): be slightly less stupid, fix #2 fix m68k build failure: tip/kernel/up.c: In function 'smp_call_function_single': tip/kernel/up.c:16: error: dereferencing pointer to incomplete type make[2]: *** [kernel/up.o] Error 1 Signed-off-by: Ingo Molnar --- kernel/up.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/up.c b/kernel/up.c index c04b9dc..1ff27a2 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -2,6 +2,7 @@ * Uniprocessor-only support functions. The counterpart to kernel/smp.c */ +#include #include #include #include -- cgit v1.1 From 37a76bd4f1b716949fc38a6842e89f0ccb8384d0 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 11 Jan 2009 15:35:01 +0000 Subject: async: fix __lowest_in_progress() At 37000 feet somewhere near Greenland I woke up from a half-sleep with the realisation that __lowest_in_progress() is buggy. After landing I checked and there were indeed 2 problems with it; this patch fixes both: * The order of the list checks was wrong * The locking was not correct. Signed-off-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- kernel/async.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/async.c b/kernel/async.c index f286e9f..608b32b 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -90,12 +90,12 @@ extern int initcall_debug; static async_cookie_t __lowest_in_progress(struct list_head *running) { struct async_entry *entry; - if (!list_empty(&async_pending)) { - entry = list_first_entry(&async_pending, + if (!list_empty(running)) { + entry = list_first_entry(running, struct async_entry, list); return entry->cookie; - } else if (!list_empty(running)) { - entry = list_first_entry(running, + } else if (!list_empty(&async_pending)) { + entry = list_first_entry(&async_pending, struct async_entry, list); return entry->cookie; } else { @@ -104,6 +104,17 @@ static async_cookie_t __lowest_in_progress(struct list_head *running) } } + +static async_cookie_t lowest_in_progress(struct list_head *running) +{ + unsigned long flags; + async_cookie_t ret; + + spin_lock_irqsave(&async_lock, flags); + ret = __lowest_in_progress(running); + spin_unlock_irqrestore(&async_lock, flags); + return ret; +} /* * pick the first pending entry and run it */ @@ -229,7 +240,7 @@ void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *r starttime = ktime_get(); } - wait_event(async_done, __lowest_in_progress(running) >= cookie); + wait_event(async_done, lowest_in_progress(running) >= cookie); if (initcall_debug && system_state == SYSTEM_BOOTING) { endtime = ktime_get(); -- cgit v1.1 From 2ed7c03ec17779afb4fcfa3b8c61df61bd4879ba Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:54 +0100 Subject: [CVE-2009-0029] Convert all system calls to return a long Convert all system calls to return a long. This should be a NOP since all converted types should have the same size anyway. With the exception of sys_exit_group which returned void. But that doesn't matter since the system call doesn't return. Signed-off-by: Heiko Carstens --- kernel/exit.c | 4 +++- kernel/signal.c | 2 +- kernel/timer.c | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index c7740fa..fac9b04 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1182,9 +1182,11 @@ do_group_exit(int exit_code) * wait4()-ing process will get the correct exit code - even if this * thread is not the thread group leader. */ -asmlinkage void sys_exit_group(int error_code) +asmlinkage long sys_exit_group(int error_code) { do_group_exit((error_code & 0xff) << 8); + /* NOTREACHED */ + return 0; } static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) diff --git a/kernel/signal.c b/kernel/signal.c index 3152ac3..856a547 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2559,7 +2559,7 @@ sys_ssetmask(int newmask) /* * For backwards compatibility. Functionality superseded by sigaction. */ -asmlinkage unsigned long +asmlinkage long sys_signal(int sig, __sighandler_t handler) { struct k_sigaction new_sa, old_sa; diff --git a/kernel/timer.c b/kernel/timer.c index dee3f64..7b8697d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1129,7 +1129,7 @@ void do_timer(unsigned long ticks) * For backwards compatibility? This can be done in libc so Alpha * and all newer ports shouldn't need it. */ -asmlinkage unsigned long sys_alarm(unsigned int seconds) +asmlinkage long sys_alarm(unsigned int seconds) { return alarm_setitimer(seconds); } -- cgit v1.1 From f627a741d24f12955fa2d9f8831c3b12860635bd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:58 +0100 Subject: [CVE-2009-0029] Make sys_syslog a conditional system call Remove the -ENOSYS implementation for !CONFIG_PRINTK and use the cond_syscall infrastructure instead. Acked-by: Kyle McMartin Signed-off-by: Heiko Carstens --- kernel/printk.c | 5 ----- kernel/sys_ni.c | 1 + 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 7015733..e48cf33 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -742,11 +742,6 @@ EXPORT_SYMBOL(vprintk); #else -asmlinkage long sys_syslog(int type, char __user *buf, int len) -{ - return -ENOSYS; -} - static void call_console_drivers(unsigned start, unsigned end) { } diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index e14a232..27dad29 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -131,6 +131,7 @@ cond_syscall(sys_io_destroy); cond_syscall(sys_io_submit); cond_syscall(sys_io_cancel); cond_syscall(sys_io_getevents); +cond_syscall(sys_syslog); /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); -- cgit v1.1 From 58fd3aa288939d3097fa04505b25c2f5e6e144d1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:03 +0100 Subject: [CVE-2009-0029] System call wrappers part 01 Signed-off-by: Heiko Carstens --- kernel/hrtimer.c | 4 ++-- kernel/sys.c | 2 +- kernel/time.c | 14 +++++++------- kernel/timer.c | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 1455b76..2dc30c5 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1467,8 +1467,8 @@ out: return ret; } -asmlinkage long -sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) +SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, + struct timespec __user *, rmtp) { struct timespec tu; diff --git a/kernel/sys.c b/kernel/sys.c index 763c3c1..37165e5 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -919,7 +919,7 @@ void do_sys_times(struct tms *tms) tms->tms_cstime = cputime_to_clock_t(cstime); } -asmlinkage long sys_times(struct tms __user * tbuf) +SYSCALL_DEFINE1(times, struct tms __user *, tbuf) { if (tbuf) { struct tms tmp; diff --git a/kernel/time.c b/kernel/time.c index 4886e3c..2951194 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -60,7 +60,7 @@ EXPORT_SYMBOL(sys_tz); * why not move it into the appropriate arch directory (for those * architectures that need it). */ -asmlinkage long sys_time(time_t __user * tloc) +SYSCALL_DEFINE1(time, time_t __user *, tloc) { time_t i = get_seconds(); @@ -79,7 +79,7 @@ asmlinkage long sys_time(time_t __user * tloc) * architectures that need it). */ -asmlinkage long sys_stime(time_t __user *tptr) +SYSCALL_DEFINE1(stime, time_t __user *, tptr) { struct timespec tv; int err; @@ -99,8 +99,8 @@ asmlinkage long sys_stime(time_t __user *tptr) #endif /* __ARCH_WANT_SYS_TIME */ -asmlinkage long sys_gettimeofday(struct timeval __user *tv, - struct timezone __user *tz) +SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv, + struct timezone __user *, tz) { if (likely(tv != NULL)) { struct timeval ktv; @@ -184,8 +184,8 @@ int do_sys_settimeofday(struct timespec *tv, struct timezone *tz) return 0; } -asmlinkage long sys_settimeofday(struct timeval __user *tv, - struct timezone __user *tz) +SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv, + struct timezone __user *, tz) { struct timeval user_tv; struct timespec new_ts; @@ -205,7 +205,7 @@ asmlinkage long sys_settimeofday(struct timeval __user *tv, return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL); } -asmlinkage long sys_adjtimex(struct timex __user *txc_p) +SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) { struct timex txc; /* Local copy of parameter */ int ret; diff --git a/kernel/timer.c b/kernel/timer.c index 7b8697d..76041df 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1129,7 +1129,7 @@ void do_timer(unsigned long ticks) * For backwards compatibility? This can be done in libc so Alpha * and all newer ports shouldn't need it. */ -asmlinkage long sys_alarm(unsigned int seconds) +SYSCALL_DEFINE1(alarm, unsigned int, seconds) { return alarm_setitimer(seconds); } @@ -1152,7 +1152,7 @@ asmlinkage long sys_alarm(unsigned int seconds) * * This is SMP safe as current->tgid does not change. */ -asmlinkage long sys_getpid(void) +SYSCALL_DEFINE0(getpid) { return task_tgid_vnr(current); } @@ -1308,7 +1308,7 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) EXPORT_SYMBOL(schedule_timeout_uninterruptible); /* Thread ID - the internal kernel "pid" */ -asmlinkage long sys_gettid(void) +SYSCALL_DEFINE0(gettid) { return task_pid_vnr(current); } -- cgit v1.1 From dbf040d9d1cbf1ef6250bdb095c5c118950bcde8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:04 +0100 Subject: [CVE-2009-0029] System call wrappers part 02 Signed-off-by: Heiko Carstens --- kernel/sys.c | 10 +++++----- kernel/timer.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 37165e5..4c33555 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -756,7 +756,7 @@ error: return retval; } -asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) +SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid) { const struct cred *cred = current_cred(); int retval; @@ -814,7 +814,7 @@ error: return retval; } -asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) +SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid) { const struct cred *cred = current_cred(); int retval; @@ -1015,7 +1015,7 @@ out: return err; } -asmlinkage long sys_getpgid(pid_t pid) +SYSCALL_DEFINE1(getpgid, pid_t, pid) { struct task_struct *p; struct pid *grp; @@ -1045,14 +1045,14 @@ out: #ifdef __ARCH_WANT_SYS_GETPGRP -asmlinkage long sys_getpgrp(void) +SYSCALL_DEFINE0(getpgrp) { return sys_getpgid(0); } #endif -asmlinkage long sys_getsid(pid_t pid) +SYSCALL_DEFINE1(getsid, pid_t, pid) { struct task_struct *p; struct pid *sid; diff --git a/kernel/timer.c b/kernel/timer.c index 76041df..14a5153 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1163,7 +1163,7 @@ SYSCALL_DEFINE0(getpid) * value of ->real_parent under rcu_read_lock(), see * release_task()->call_rcu(delayed_put_task_struct). */ -asmlinkage long sys_getppid(void) +SYSCALL_DEFINE0(getppid) { int pid; @@ -1174,25 +1174,25 @@ asmlinkage long sys_getppid(void) return pid; } -asmlinkage long sys_getuid(void) +SYSCALL_DEFINE0(getuid) { /* Only we change this so SMP safe */ return current_uid(); } -asmlinkage long sys_geteuid(void) +SYSCALL_DEFINE0(geteuid) { /* Only we change this so SMP safe */ return current_euid(); } -asmlinkage long sys_getgid(void) +SYSCALL_DEFINE0(getgid) { /* Only we change this so SMP safe */ return current_gid(); } -asmlinkage long sys_getegid(void) +SYSCALL_DEFINE0(getegid) { /* Only we change this so SMP safe */ return current_egid(); -- cgit v1.1 From ae1251ab785f6da87219df8352ffdac68bba23e4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:05 +0100 Subject: [CVE-2009-0029] System call wrappers part 03 Signed-off-by: Heiko Carstens --- kernel/sys.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 4c33555..ace9ced 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -478,7 +478,7 @@ void ctrl_alt_del(void) * SMP: There are not races, the GIDs are checked only by filesystem * operations (as far as semantic preservation is concerned). */ -asmlinkage long sys_setregid(gid_t rgid, gid_t egid) +SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) { const struct cred *old; struct cred *new; @@ -529,7 +529,7 @@ error: * * SMP: Same implicit races as above. */ -asmlinkage long sys_setgid(gid_t gid) +SYSCALL_DEFINE1(setgid, gid_t, gid) { const struct cred *old; struct cred *new; @@ -597,7 +597,7 @@ static int set_user(struct cred *new) * 100% compatible with BSD. A program which uses just setuid() will be * 100% compatible with POSIX with saved IDs. */ -asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) +SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) { const struct cred *old; struct cred *new; @@ -661,7 +661,7 @@ error: * will allow a root program to temporarily drop privileges and be able to * regain them by swapping the real and effective uid. */ -asmlinkage long sys_setuid(uid_t uid) +SYSCALL_DEFINE1(setuid, uid_t, uid) { const struct cred *old; struct cred *new; @@ -705,7 +705,7 @@ error: * This function implements a generic ability to update ruid, euid, * and suid. This allows you to implement the 4.4 compatible seteuid(). */ -asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) +SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) { const struct cred *old; struct cred *new; @@ -771,7 +771,7 @@ SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __u /* * Same as above, but for rgid, egid, sgid. */ -asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) +SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) { const struct cred *old; struct cred *new; @@ -833,7 +833,7 @@ SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __u * whatever uid it wants to). It normally shadows "euid", except when * explicitly set by setfsuid() or for access.. */ -asmlinkage long sys_setfsuid(uid_t uid) +SYSCALL_DEFINE1(setfsuid, uid_t, uid) { const struct cred *old; struct cred *new; @@ -870,7 +870,7 @@ change_okay: /* * Samma på svenska.. */ -asmlinkage long sys_setfsgid(gid_t gid) +SYSCALL_DEFINE1(setfsgid, gid_t, gid) { const struct cred *old; struct cred *new; @@ -1311,7 +1311,7 @@ int set_current_groups(struct group_info *group_info) EXPORT_SYMBOL(set_current_groups); -asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) +SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist) { const struct cred *cred = current_cred(); int i; -- cgit v1.1 From b290ebe2c46d01b742b948ce03f09e8a3efb9a92 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:06 +0100 Subject: [CVE-2009-0029] System call wrappers part 04 Signed-off-by: Heiko Carstens --- kernel/acct.c | 2 +- kernel/capability.c | 4 ++-- kernel/exec_domain.c | 3 +-- kernel/itimer.c | 2 +- kernel/signal.c | 7 +++---- kernel/sys.c | 6 +++--- 6 files changed, 11 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/acct.c b/kernel/acct.c index d57b7cb..7afa315 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -277,7 +277,7 @@ static int acct_on(char *name) * should be written. If the filename is NULL, accounting will be * shutdown. */ -asmlinkage long sys_acct(const char __user *name) +SYSCALL_DEFINE1(acct, const char __user *, name) { int error; diff --git a/kernel/capability.c b/kernel/capability.c index 688926e..4e17041 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -161,7 +161,7 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, * * Returns 0 on success and < 0 on error. */ -asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) +SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr) { int ret = 0; pid_t pid; @@ -235,7 +235,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) * * Returns 0 on success and < 0 on error. */ -asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) +SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data) { struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; unsigned i, tocopy; diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index 0511716..667c841 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -209,8 +209,7 @@ static int __init proc_execdomains_init(void) module_init(proc_execdomains_init); #endif -asmlinkage long -sys_personality(u_long personality) +SYSCALL_DEFINE1(personality, u_long, personality) { u_long old = current->personality; diff --git a/kernel/itimer.c b/kernel/itimer.c index db7c358..7e0663e 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -100,7 +100,7 @@ int do_getitimer(int which, struct itimerval *value) return 0; } -asmlinkage long sys_getitimer(int which, struct itimerval __user *value) +SYSCALL_DEFINE2(getitimer, int, which, struct itimerval __user *, value) { int error = -EFAULT; struct itimerval get_buffer; diff --git a/kernel/signal.c b/kernel/signal.c index 856a547..3fe08ea 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2434,8 +2434,7 @@ out: #ifdef __ARCH_WANT_SYS_SIGPENDING -asmlinkage long -sys_sigpending(old_sigset_t __user *set) +SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set) { return do_sigpending(set, sizeof(*set)); } @@ -2446,8 +2445,8 @@ sys_sigpending(old_sigset_t __user *set) /* Some platforms have their own version with special arguments others support only sys_rt_sigprocmask. */ -asmlinkage long -sys_sigprocmask(int how, old_sigset_t __user *set, old_sigset_t __user *oset) +SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set, + old_sigset_t __user *, oset) { int error; old_sigset_t old_set, new_set; diff --git a/kernel/sys.c b/kernel/sys.c index ace9ced..cbe4502 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -944,7 +944,7 @@ SYSCALL_DEFINE1(times, struct tms __user *, tbuf) * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. * LBT 04.03.94 */ -asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) +SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) { struct task_struct *p; struct task_struct *group_leader = current->group_leader; @@ -1080,7 +1080,7 @@ out: return retval; } -asmlinkage long sys_setsid(void) +SYSCALL_DEFINE0(setsid) { struct task_struct *group_leader = current->group_leader; struct pid *sid = task_pid(group_leader); @@ -1340,7 +1340,7 @@ out: * without another task interfering. */ -asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist) +SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) { struct group_info *group_info; int retval; -- cgit v1.1 From 362e9c07c7220c0a78c88826fc0d2bf7e4a4bb68 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:07 +0100 Subject: [CVE-2009-0029] System call wrappers part 05 Signed-off-by: Heiko Carstens --- kernel/itimer.c | 5 ++--- kernel/posix-timers.c | 43 +++++++++++++++++++------------------------ 2 files changed, 21 insertions(+), 27 deletions(-) (limited to 'kernel') diff --git a/kernel/itimer.c b/kernel/itimer.c index 7e0663e..6a5fe93 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -260,9 +260,8 @@ unsigned int alarm_setitimer(unsigned int seconds) return it_old.it_value.tv_sec; } -asmlinkage long sys_setitimer(int which, - struct itimerval __user *value, - struct itimerval __user *ovalue) +SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value, + struct itimerval __user *, ovalue) { struct itimerval set_buffer, get_buffer; int error; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 887c637..052ec4d 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -477,10 +477,9 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) /* Create a POSIX.1b interval timer. */ -asmlinkage long -sys_timer_create(const clockid_t which_clock, - struct sigevent __user *timer_event_spec, - timer_t __user * created_timer_id) +SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, + struct sigevent __user *, timer_event_spec, + timer_t __user *, created_timer_id) { struct k_itimer *new_timer; int error, new_timer_id; @@ -661,8 +660,8 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) } /* Get the time remaining on a POSIX.1b interval timer. */ -asmlinkage long -sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting) +SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, + struct itimerspec __user *, setting) { struct k_itimer *timr; struct itimerspec cur_setting; @@ -691,8 +690,7 @@ sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting) * the call back to do_schedule_next_timer(). So all we need to do is * to pick up the frozen overrun. */ -asmlinkage long -sys_timer_getoverrun(timer_t timer_id) +SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) { struct k_itimer *timr; int overrun; @@ -760,10 +758,9 @@ common_timer_set(struct k_itimer *timr, int flags, } /* Set a POSIX.1b interval timer */ -asmlinkage long -sys_timer_settime(timer_t timer_id, int flags, - const struct itimerspec __user *new_setting, - struct itimerspec __user *old_setting) +SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, + const struct itimerspec __user *, new_setting, + struct itimerspec __user *, old_setting) { struct k_itimer *timr; struct itimerspec new_spec, old_spec; @@ -816,8 +813,7 @@ static inline int timer_delete_hook(struct k_itimer *timer) } /* Delete a POSIX.1b interval timer. */ -asmlinkage long -sys_timer_delete(timer_t timer_id) +SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) { struct k_itimer *timer; unsigned long flags; @@ -903,8 +899,8 @@ int do_posix_clock_nonanosleep(const clockid_t clock, int flags, } EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep); -asmlinkage long sys_clock_settime(const clockid_t which_clock, - const struct timespec __user *tp) +SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, + const struct timespec __user *, tp) { struct timespec new_tp; @@ -916,8 +912,8 @@ asmlinkage long sys_clock_settime(const clockid_t which_clock, return CLOCK_DISPATCH(which_clock, clock_set, (which_clock, &new_tp)); } -asmlinkage long -sys_clock_gettime(const clockid_t which_clock, struct timespec __user *tp) +SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, + struct timespec __user *,tp) { struct timespec kernel_tp; int error; @@ -933,8 +929,8 @@ sys_clock_gettime(const clockid_t which_clock, struct timespec __user *tp) } -asmlinkage long -sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp) +SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, + struct timespec __user *, tp) { struct timespec rtn_tp; int error; @@ -963,10 +959,9 @@ static int common_nsleep(const clockid_t which_clock, int flags, which_clock); } -asmlinkage long -sys_clock_nanosleep(const clockid_t which_clock, int flags, - const struct timespec __user *rqtp, - struct timespec __user *rmtp) +SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, + const struct timespec __user *, rqtp, + struct timespec __user *, rmtp) { struct timespec t; -- cgit v1.1 From 5add95d4f7cf08f6f62510f19576992912387501 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:08 +0100 Subject: [CVE-2009-0029] System call wrappers part 06 Signed-off-by: Heiko Carstens --- kernel/sched.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 8be2c13..1a0fdfa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5126,7 +5126,7 @@ int can_nice(const struct task_struct *p, const int nice) * sys_setpriority is a more generic, but much slower function that * does similar things. */ -asmlinkage long sys_nice(int increment) +SYSCALL_DEFINE1(nice, int, increment) { long nice, retval; @@ -5433,8 +5433,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) * @policy: new policy. * @param: structure containing the new RT priority. */ -asmlinkage long -sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) +SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, + struct sched_param __user *, param) { /* negative values for policy are not valid */ if (policy < 0) @@ -5448,7 +5448,7 @@ sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) * @pid: the pid in question. * @param: structure containing the new RT priority. */ -asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) +SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) { return do_sched_setscheduler(pid, -1, param); } @@ -5457,7 +5457,7 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) * sys_sched_getscheduler - get the policy (scheduling class) of a thread * @pid: the pid in question. */ -asmlinkage long sys_sched_getscheduler(pid_t pid) +SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) { struct task_struct *p; int retval; @@ -5482,7 +5482,7 @@ asmlinkage long sys_sched_getscheduler(pid_t pid) * @pid: the pid in question. * @param: structure containing the RT priority. */ -asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) +SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) { struct sched_param lp; struct task_struct *p; @@ -5600,8 +5600,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, * @len: length in bytes of the bitmask pointed to by user_mask_ptr * @user_mask_ptr: user-space pointer to the new cpu mask */ -asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, - unsigned long __user *user_mask_ptr) +SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, + unsigned long __user *, user_mask_ptr) { cpumask_var_t new_mask; int retval; @@ -5648,8 +5648,8 @@ out_unlock: * @len: length in bytes of the bitmask pointed to by user_mask_ptr * @user_mask_ptr: user-space pointer to hold the current cpu mask */ -asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, - unsigned long __user *user_mask_ptr) +SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, + unsigned long __user *, user_mask_ptr) { int ret; cpumask_var_t mask; @@ -5678,7 +5678,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, * This function yields the current CPU to other tasks. If there are no * other threads running on this CPU then this function will return. */ -asmlinkage long sys_sched_yield(void) +SYSCALL_DEFINE0(sched_yield) { struct rq *rq = this_rq_lock(); @@ -5819,7 +5819,7 @@ long __sched io_schedule_timeout(long timeout) * this syscall returns the maximum rt_priority that can be used * by a given scheduling class. */ -asmlinkage long sys_sched_get_priority_max(int policy) +SYSCALL_DEFINE1(sched_get_priority_max, int, policy) { int ret = -EINVAL; @@ -5844,7 +5844,7 @@ asmlinkage long sys_sched_get_priority_max(int policy) * this syscall returns the minimum rt_priority that can be used * by a given scheduling class. */ -asmlinkage long sys_sched_get_priority_min(int policy) +SYSCALL_DEFINE1(sched_get_priority_min, int, policy) { int ret = -EINVAL; -- cgit v1.1 From 754fe8d297bfae7b77f7ce866e2fb0c5fb186506 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:09 +0100 Subject: [CVE-2009-0029] System call wrappers part 07 Signed-off-by: Heiko Carstens --- kernel/exit.c | 8 ++++---- kernel/kexec.c | 5 ++--- kernel/sched.c | 4 ++-- kernel/signal.c | 2 +- kernel/sys.c | 7 ++++--- 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index fac9b04..08895df 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1141,7 +1141,7 @@ NORET_TYPE void complete_and_exit(struct completion *comp, long code) EXPORT_SYMBOL(complete_and_exit); -asmlinkage long sys_exit(int error_code) +SYSCALL_DEFINE1(exit, int, error_code) { do_exit((error_code&0xff)<<8); } @@ -1182,7 +1182,7 @@ do_group_exit(int exit_code) * wait4()-ing process will get the correct exit code - even if this * thread is not the thread group leader. */ -asmlinkage long sys_exit_group(int error_code) +SYSCALL_DEFINE1(exit_group, int, error_code) { do_group_exit((error_code & 0xff) << 8); /* NOTREACHED */ @@ -1795,8 +1795,8 @@ asmlinkage long sys_waitid(int which, pid_t upid, return ret; } -asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr, - int options, struct rusage __user *ru) +SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, + int, options, struct rusage __user *, ru) { struct pid *pid = NULL; enum pid_type type; diff --git a/kernel/kexec.c b/kernel/kexec.c index 3fb855a..8a6d7b0 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -934,9 +934,8 @@ struct kimage *kexec_crash_image; static DEFINE_MUTEX(kexec_mutex); -asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, - struct kexec_segment __user *segments, - unsigned long flags) +SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, + struct kexec_segment __user *, segments, unsigned long, flags) { struct kimage **dest_image, *image; int result; diff --git a/kernel/sched.c b/kernel/sched.c index 1a0fdfa..65c0203 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5869,8 +5869,8 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy) * this syscall writes the default timeslice value of a given process * into the user-space timespec buffer. A value of '0' means infinity. */ -asmlinkage -long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) +SYSCALL_DEFINE4(sched_rr_get_interval, pid_t, pid, + struct timespec __user *, interval) { struct task_struct *p; unsigned int time_slice; diff --git a/kernel/signal.c b/kernel/signal.c index 3fe08ea..41f32e0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1961,7 +1961,7 @@ EXPORT_SYMBOL(unblock_all_signals); * System call entry points. */ -asmlinkage long sys_restart_syscall(void) +SYSCALL_DEFINE0(restart_syscall) { struct restart_block *restart = ¤t_thread_info()->restart_block; return restart->fn(restart); diff --git a/kernel/sys.c b/kernel/sys.c index cbe4502..39b192b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -143,7 +143,7 @@ out: return error; } -asmlinkage long sys_setpriority(int which, int who, int niceval) +SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) { struct task_struct *g, *p; struct user_struct *user; @@ -208,7 +208,7 @@ out: * has been offset by 20 (ie it returns 40..1 instead of -20..19) * to stay compatible. */ -asmlinkage long sys_getpriority(int which, int who) +SYSCALL_DEFINE2(getpriority, int, which, int, who) { struct task_struct *g, *p; struct user_struct *user; @@ -355,7 +355,8 @@ EXPORT_SYMBOL_GPL(kernel_power_off); * * reboot doesn't sync: do that yourself before calling this. */ -asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user * arg) +SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, + void __user *, arg) { char buffer[256]; -- cgit v1.1 From 17da2bd90abf428523de0fb98f7075e00e3ed42e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:10 +0100 Subject: [CVE-2009-0029] System call wrappers part 08 Signed-off-by: Heiko Carstens --- kernel/exit.c | 7 +++---- kernel/fork.c | 2 +- kernel/futex.c | 6 +++--- kernel/module.c | 10 ++++------ kernel/sched.c | 2 +- kernel/signal.c | 18 +++++++----------- 6 files changed, 19 insertions(+), 26 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 08895df..f80dec3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1754,9 +1754,8 @@ end: return retval; } -asmlinkage long sys_waitid(int which, pid_t upid, - struct siginfo __user *infop, int options, - struct rusage __user *ru) +SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, + infop, int, options, struct rusage __user *, ru) { struct pid *pid = NULL; enum pid_type type; @@ -1833,7 +1832,7 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, * sys_waitpid() remains for compatibility. waitpid() should be * implemented by calling sys_wait4() from libc.a. */ -asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options) +SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options) { return sys_wait4(pid, stat_addr, options, NULL); } diff --git a/kernel/fork.c b/kernel/fork.c index 1d68f12..8eb37d3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -901,7 +901,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) clear_freeze_flag(p); } -asmlinkage long sys_set_tid_address(int __user *tidptr) +SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) { current->clear_child_tid = tidptr; diff --git a/kernel/futex.c b/kernel/futex.c index 002aa18..e86931d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1978,9 +1978,9 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, } -asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, - struct timespec __user *utime, u32 __user *uaddr2, - u32 val3) +SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + struct timespec __user *, utime, u32 __user *, uaddr2, + u32, val3) { struct timespec ts; ktime_t t, *tp = NULL; diff --git a/kernel/module.c b/kernel/module.c index c9332c9..e8b51d4 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -743,8 +743,8 @@ static void wait_for_zero_refcount(struct module *mod) mutex_lock(&module_mutex); } -asmlinkage long -sys_delete_module(const char __user *name_user, unsigned int flags) +SYSCALL_DEFINE2(delete_module, const char __user *, name_user, + unsigned int, flags) { struct module *mod; char name[MODULE_NAME_LEN]; @@ -2296,10 +2296,8 @@ static noinline struct module *load_module(void __user *umod, } /* This is where the real work happens */ -asmlinkage long -sys_init_module(void __user *umod, - unsigned long len, - const char __user *uargs) +SYSCALL_DEFINE3(init_module, void __user *, umod, + unsigned long, len, const char __user *, uargs) { struct module *mod; int ret = 0; diff --git a/kernel/sched.c b/kernel/sched.c index 65c0203..eb1931e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5869,7 +5869,7 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy) * this syscall writes the default timeslice value of a given process * into the user-space timespec buffer. A value of '0' means infinity. */ -SYSCALL_DEFINE4(sched_rr_get_interval, pid_t, pid, +SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, struct timespec __user *, interval) { struct task_struct *p; diff --git a/kernel/signal.c b/kernel/signal.c index 41f32e0..278cc87 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2014,8 +2014,8 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset) return error; } -asmlinkage long -sys_rt_sigprocmask(int how, sigset_t __user *set, sigset_t __user *oset, size_t sigsetsize) +SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set, + sigset_t __user *, oset, size_t, sigsetsize) { int error = -EINVAL; sigset_t old_set, new_set; @@ -2074,8 +2074,7 @@ out: return error; } -asmlinkage long -sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize) +SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize) { return do_sigpending(set, sigsetsize); } @@ -2146,11 +2145,9 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from) #endif -asmlinkage long -sys_rt_sigtimedwait(const sigset_t __user *uthese, - siginfo_t __user *uinfo, - const struct timespec __user *uts, - size_t sigsetsize) +SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, + siginfo_t __user *, uinfo, const struct timespec __user *, uts, + size_t, sigsetsize) { int ret, sig; sigset_t these; @@ -2223,8 +2220,7 @@ sys_rt_sigtimedwait(const sigset_t __user *uthese, return ret; } -asmlinkage long -sys_kill(pid_t pid, int sig) +SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) { struct siginfo info; -- cgit v1.1 From a5f8fa9e9ba5ef3305e147f41ad6e1e84ac1f0bd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:11 +0100 Subject: [CVE-2009-0029] System call wrappers part 09 Signed-off-by: Heiko Carstens --- kernel/signal.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 278cc87..e233392 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2279,7 +2279,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) * exists but it's not belonging to the target process anymore. This * method solves the problem of threads exiting and PIDs getting reused. */ -asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig) +SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig) { /* This is only valid for single tasks */ if (pid <= 0 || tgid <= 0) @@ -2291,8 +2291,7 @@ asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig) /* * Send a signal to only one task, even if it's a CLONE_THREAD task. */ -asmlinkage long -sys_tkill(pid_t pid, int sig) +SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig) { /* This is only valid for single tasks */ if (pid <= 0) @@ -2301,8 +2300,8 @@ sys_tkill(pid_t pid, int sig) return do_tkill(0, pid, sig); } -asmlinkage long -sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo) +SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, + siginfo_t __user *, uinfo) { siginfo_t info; @@ -2526,15 +2525,13 @@ out: /* * For backwards compatibility. Functionality superseded by sigprocmask. */ -asmlinkage long -sys_sgetmask(void) +SYSCALL_DEFINE0(sgetmask) { /* SMP safe */ return current->blocked.sig[0]; } -asmlinkage long -sys_ssetmask(int newmask) +SYSCALL_DEFINE1(ssetmask, int, newmask) { int old; @@ -2554,8 +2551,7 @@ sys_ssetmask(int newmask) /* * For backwards compatibility. Functionality superseded by sigaction. */ -asmlinkage long -sys_signal(int sig, __sighandler_t handler) +SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler) { struct k_sigaction new_sa, old_sa; int ret; @@ -2572,8 +2568,7 @@ sys_signal(int sig, __sighandler_t handler) #ifdef __ARCH_WANT_SYS_PAUSE -asmlinkage long -sys_pause(void) +SYSCALL_DEFINE0(pause) { current->state = TASK_INTERRUPTIBLE; schedule(); -- cgit v1.1 From ca013e945b1ba5828b151ee646946f1297b67a4c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:19 +0100 Subject: [CVE-2009-0029] System call wrappers part 17 Signed-off-by: Heiko Carstens --- kernel/uid16.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/uid16.c b/kernel/uid16.c index 2460c31..37f48c0 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -17,7 +17,7 @@ #include -asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gid_t group) +SYSCALL_DEFINE3(chown16, const char __user *, filename, old_uid_t, user, old_gid_t, group) { long ret = sys_chown(filename, low2highuid(user), low2highgid(group)); /* avoid REGPARM breakage on x86: */ @@ -25,7 +25,7 @@ asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gi return ret; } -asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_gid_t group) +SYSCALL_DEFINE3(lchown16, const char __user *, filename, old_uid_t, user, old_gid_t, group) { long ret = sys_lchown(filename, low2highuid(user), low2highgid(group)); /* avoid REGPARM breakage on x86: */ @@ -33,7 +33,7 @@ asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_g return ret; } -asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group) +SYSCALL_DEFINE3(fchown16, unsigned int, fd, old_uid_t, user, old_gid_t, group) { long ret = sys_fchown(fd, low2highuid(user), low2highgid(group)); /* avoid REGPARM breakage on x86: */ -- cgit v1.1 From a6b42e83f249aad723589b2bdf6d1dfb2b0997c8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:20 +0100 Subject: [CVE-2009-0029] System call wrappers part 18 Signed-off-by: Heiko Carstens --- kernel/uid16.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/uid16.c b/kernel/uid16.c index 37f48c0..221894e 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -41,7 +41,7 @@ SYSCALL_DEFINE3(fchown16, unsigned int, fd, old_uid_t, user, old_gid_t, group) return ret; } -asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid) +SYSCALL_DEFINE2(setregid16, old_gid_t, rgid, old_gid_t, egid) { long ret = sys_setregid(low2highgid(rgid), low2highgid(egid)); /* avoid REGPARM breakage on x86: */ @@ -49,7 +49,7 @@ asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid) return ret; } -asmlinkage long sys_setgid16(old_gid_t gid) +SYSCALL_DEFINE1(setgid16, old_gid_t, gid) { long ret = sys_setgid(low2highgid(gid)); /* avoid REGPARM breakage on x86: */ @@ -57,7 +57,7 @@ asmlinkage long sys_setgid16(old_gid_t gid) return ret; } -asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid) +SYSCALL_DEFINE2(setreuid16, old_uid_t, ruid, old_uid_t, euid) { long ret = sys_setreuid(low2highuid(ruid), low2highuid(euid)); /* avoid REGPARM breakage on x86: */ @@ -65,7 +65,7 @@ asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid) return ret; } -asmlinkage long sys_setuid16(old_uid_t uid) +SYSCALL_DEFINE1(setuid16, old_uid_t, uid) { long ret = sys_setuid(low2highuid(uid)); /* avoid REGPARM breakage on x86: */ @@ -73,7 +73,7 @@ asmlinkage long sys_setuid16(old_uid_t uid) return ret; } -asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid) +SYSCALL_DEFINE3(setresuid16, old_uid_t, ruid, old_uid_t, euid, old_uid_t, suid) { long ret = sys_setresuid(low2highuid(ruid), low2highuid(euid), low2highuid(suid)); @@ -82,7 +82,7 @@ asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid) return ret; } -asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid) +SYSCALL_DEFINE3(getresuid16, old_uid_t __user *, ruid, old_uid_t __user *, euid, old_uid_t __user *, suid) { const struct cred *cred = current_cred(); int retval; @@ -94,7 +94,7 @@ asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, return retval; } -asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid) +SYSCALL_DEFINE3(setresgid16, old_gid_t, rgid, old_gid_t, egid, old_gid_t, sgid) { long ret = sys_setresgid(low2highgid(rgid), low2highgid(egid), low2highgid(sgid)); @@ -103,7 +103,8 @@ asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid) return ret; } -asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid) + +SYSCALL_DEFINE3(getresgid16, old_gid_t __user *, rgid, old_gid_t __user *, egid, old_gid_t __user *, sgid) { const struct cred *cred = current_cred(); int retval; @@ -115,7 +116,7 @@ asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, return retval; } -asmlinkage long sys_setfsuid16(old_uid_t uid) +SYSCALL_DEFINE1(setfsuid16, old_uid_t, uid) { long ret = sys_setfsuid(low2highuid(uid)); /* avoid REGPARM breakage on x86: */ @@ -123,7 +124,7 @@ asmlinkage long sys_setfsuid16(old_uid_t uid) return ret; } -asmlinkage long sys_setfsgid16(old_gid_t gid) +SYSCALL_DEFINE1(setfsgid16, old_gid_t, gid) { long ret = sys_setfsgid(low2highgid(gid)); /* avoid REGPARM breakage on x86: */ -- cgit v1.1 From 003d7ab479168132a2b2c6700fe682b08f08ab0c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:21 +0100 Subject: [CVE-2009-0029] System call wrappers part 19 Signed-off-by: Heiko Carstens --- kernel/uid16.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/uid16.c b/kernel/uid16.c index 221894e..0314501 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -162,7 +162,7 @@ static int groups16_from_user(struct group_info *group_info, return 0; } -asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist) +SYSCALL_DEFINE2(getgroups16, int, gidsetsize, old_gid_t __user *, grouplist) { const struct cred *cred = current_cred(); int i; @@ -185,7 +185,7 @@ out: return i; } -asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist) +SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) { struct group_info *group_info; int retval; @@ -210,22 +210,22 @@ asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist) return retval; } -asmlinkage long sys_getuid16(void) +SYSCALL_DEFINE0(getuid16) { return high2lowuid(current_uid()); } -asmlinkage long sys_geteuid16(void) +SYSCALL_DEFINE0(geteuid16) { return high2lowuid(current_euid()); } -asmlinkage long sys_getgid16(void) +SYSCALL_DEFINE0(getgid16) { return high2lowgid(current_gid()); } -asmlinkage long sys_getegid16(void) +SYSCALL_DEFINE0(getegid16) { return high2lowgid(current_egid()); } -- cgit v1.1 From 5a8a82b1d306a325d899b67715618413657efda4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:25 +0100 Subject: [CVE-2009-0029] System call wrappers part 23 Signed-off-by: Heiko Carstens --- kernel/sys.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 39b192b..5292f21 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1406,7 +1406,7 @@ asmlinkage long sys_newuname(struct new_utsname __user * name) return errno; } -asmlinkage long sys_sethostname(char __user *name, int len) +SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) { int errno; char tmp[__NEW_UTS_LEN]; @@ -1430,7 +1430,7 @@ asmlinkage long sys_sethostname(char __user *name, int len) #ifdef __ARCH_WANT_SYS_GETHOSTNAME -asmlinkage long sys_gethostname(char __user *name, int len) +SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) { int i, errno; struct new_utsname *u; @@ -1455,7 +1455,7 @@ asmlinkage long sys_gethostname(char __user *name, int len) * Only setdomainname; getdomainname can be implemented by calling * uname() */ -asmlinkage long sys_setdomainname(char __user *name, int len) +SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) { int errno; char tmp[__NEW_UTS_LEN]; -- cgit v1.1 From e48fbb699f82ef1e80bd7126046394d2dc9ca7e6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:26 +0100 Subject: [CVE-2009-0029] System call wrappers part 24 Signed-off-by: Heiko Carstens --- kernel/sys.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 5292f21..70ffa84 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1395,7 +1395,7 @@ EXPORT_SYMBOL(in_egroup_p); DECLARE_RWSEM(uts_sem); -asmlinkage long sys_newuname(struct new_utsname __user * name) +SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) { int errno = 0; @@ -1478,7 +1478,7 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) return errno; } -asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim) +SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) { if (resource >= RLIM_NLIMITS) return -EINVAL; @@ -1497,7 +1497,8 @@ asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim) * Back compatibility for getrlimit. Needed for some apps. */ -asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim) +SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, + struct rlimit __user *, rlim) { struct rlimit x; if (resource >= RLIM_NLIMITS) @@ -1515,7 +1516,7 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r #endif -asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) +SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) { struct rlimit new_rlim, *old_rlim; int retval; @@ -1688,7 +1689,7 @@ int getrusage(struct task_struct *p, int who, struct rusage __user *ru) return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; } -asmlinkage long sys_getrusage(int who, struct rusage __user *ru) +SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) { if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && who != RUSAGE_THREAD) @@ -1696,7 +1697,7 @@ asmlinkage long sys_getrusage(int who, struct rusage __user *ru) return getrusage(current, who, ru); } -asmlinkage long sys_umask(int mask) +SYSCALL_DEFINE1(umask, int, mask) { mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); return mask; -- cgit v1.1 From c4ea37c26a691ad0b7e86aa5884aab27830e95c9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:28 +0100 Subject: [CVE-2009-0029] System call wrappers part 26 Signed-off-by: Heiko Carstens --- kernel/sys.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 70ffa84..59aadcd 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1703,8 +1703,8 @@ SYSCALL_DEFINE1(umask, int, mask) return mask; } -asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) +SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, + unsigned long, arg4, unsigned long, arg5) { struct task_struct *me = current; unsigned char comm[sizeof(me->comm)]; -- cgit v1.1 From 1e7bfb2134dfec37ce04fb3a4ca89299e892d10c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:29 +0100 Subject: [CVE-2009-0029] System call wrappers part 27 Signed-off-by: Heiko Carstens --- kernel/printk.c | 2 +- kernel/ptrace.c | 2 +- kernel/sysctl.c | 4 ++-- kernel/timer.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index e48cf33..69188f2 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -382,7 +382,7 @@ out: return error; } -asmlinkage long sys_syslog(int type, char __user *buf, int len) +SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) { return do_syslog(type, buf, len); } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 29dc700..c9cf48b 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -574,7 +574,7 @@ struct task_struct *ptrace_get_task_struct(pid_t pid) #define arch_ptrace_attach(child) do { } while (0) #endif -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) { struct task_struct *child; long ret; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 89d7443..3e38b74 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1688,7 +1688,7 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol return error; } -asmlinkage long sys_sysctl(struct __sysctl_args __user *args) +SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args) { struct __sysctl_args tmp; int error; @@ -2989,7 +2989,7 @@ int sysctl_ms_jiffies(struct ctl_table *table, #else /* CONFIG_SYSCTL_SYSCALL */ -asmlinkage long sys_sysctl(struct __sysctl_args __user *args) +SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args) { struct __sysctl_args tmp; int error; diff --git a/kernel/timer.c b/kernel/timer.c index 14a5153..13dd64f 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1400,7 +1400,7 @@ out: return 0; } -asmlinkage long sys_sysinfo(struct sysinfo __user *info) +SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) { struct sysinfo val; -- cgit v1.1 From 6559eed8ca7db0531a207cd80be5e28cd6f213c5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:32 +0100 Subject: [CVE-2009-0029] System call wrappers part 30 Signed-off-by: Heiko Carstens --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 8eb37d3..bf0cef8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1603,7 +1603,7 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp * constructed. Here we are modifying the current, active, * task_struct. */ -asmlinkage long sys_unshare(unsigned long unshare_flags) +SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) { int err = 0; struct fs_struct *fs, *new_fs = NULL; -- cgit v1.1 From 836f92adf121f806e9beb5b6b88bd5c9c4ea3f24 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:33 +0100 Subject: [CVE-2009-0029] System call wrappers part 31 Signed-off-by: Heiko Carstens --- kernel/futex.c | 11 +++++------ kernel/sys.c | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index e86931d..f89d373 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1733,9 +1733,8 @@ pi_faulted: * @head: pointer to the list-head * @len: length of the list-head, as userspace expects */ -asmlinkage long -sys_set_robust_list(struct robust_list_head __user *head, - size_t len) +SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, + size_t, len) { if (!futex_cmpxchg_enabled) return -ENOSYS; @@ -1756,9 +1755,9 @@ sys_set_robust_list(struct robust_list_head __user *head, * @head_ptr: pointer to a list-head pointer, the kernel fills it in * @len_ptr: pointer to a length field, the kernel fills in the header size */ -asmlinkage long -sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, - size_t __user *len_ptr) +SYSCALL_DEFINE3(get_robust_list, int, pid, + struct robust_list_head __user * __user *, head_ptr, + size_t __user *, len_ptr) { struct robust_list_head __user *head; unsigned long ret; diff --git a/kernel/sys.c b/kernel/sys.c index 59aadcd..e7dc0e1 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1817,8 +1817,8 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return error; } -asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, - struct getcpu_cache __user *unused) +SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, + struct getcpu_cache __user *, unused) { int err = 0; int cpu = raw_smp_processor_id(); -- cgit v1.1 From d4e82042c4cfa87a7d51710b71f568fe80132551 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:34 +0100 Subject: [CVE-2009-0029] System call wrappers part 32 Signed-off-by: Heiko Carstens --- kernel/signal.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index e233392..e737597 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2491,11 +2491,10 @@ out: #endif /* __ARCH_WANT_SYS_SIGPROCMASK */ #ifdef __ARCH_WANT_SYS_RT_SIGACTION -asmlinkage long -sys_rt_sigaction(int sig, - const struct sigaction __user *act, - struct sigaction __user *oact, - size_t sigsetsize) +SYSCALL_DEFINE4(rt_sigaction, int, sig, + const struct sigaction __user *, act, + struct sigaction __user *, oact, + size_t, sigsetsize) { struct k_sigaction new_sa, old_sa; int ret = -EINVAL; @@ -2578,7 +2577,7 @@ SYSCALL_DEFINE0(pause) #endif #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND -asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize) +SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize) { sigset_t newset; -- cgit v1.1 From 9316fcacb89c59fe556c48587ac02cd7f5d38045 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 Jan 2009 09:35:44 -0800 Subject: kernel/up.c: omit it if SMP=y, USE_GENERIC_SMP_HELPERS=n Fix the sparc build - we were including `up.o' on SMP builds, when CONFIG_USE_GENERIC_SMP_HELPERS=n. Tested-by: Robert Reif Fixed-by: Robert Reif Cc: David Miller Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/Makefile b/kernel/Makefile index 2aebc4c..170a921 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -40,9 +40,8 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o -ifeq ($(CONFIG_USE_GENERIC_SMP_HELPERS),y) -obj-y += smp.o -else +obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o +ifneq ($(CONFIG_SMP),y) obj-y += up.o endif obj-$(CONFIG_SMP) += spinlock.o -- cgit v1.1 From 98a4826b99bc4bcc34c604b2fc4fcf4d771600ec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Jan 2009 10:56:32 +0100 Subject: sched: fix bandwidth validation for UID grouping Impact: make rt-limit tunables work again Mark Glines reported: > I've got an issue on x86-64 where I can't configure the system to allow > RT tasks for a non-root user. > > In 2.6.26.5, I was able to do the following to set things up nicely: > echo 450000 >/sys/kernel/uids/0/cpu_rt_runtime > echo 450000 >/sys/kernel/uids/1000/cpu_rt_runtime > > Seems like every value I try to echo into the /sys files returns EINVAL. For UID grouping we initialize the root group with infinite bandwidth which by default is actually more than the global limit, therefore the bandwidth check always fails. Because the root group is a phantom group (for UID grouping) we cannot runtime adjust it, therefore we let it reflect the global bandwidth settings. Reported-by: Mark Glines Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 3b630d8..ed62d1c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9050,6 +9050,13 @@ static int tg_schedulable(struct task_group *tg, void *data) runtime = d->rt_runtime; } +#ifdef CONFIG_USER_SCHED + if (tg == &root_task_group) { + period = global_rt_period(); + runtime = global_rt_runtime(); + } +#endif + /* * Cannot have more runtime than the period. */ -- cgit v1.1 From cce7ade803699463ecc62a065ca522004f7ccb3d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Jan 2009 14:53:37 +0100 Subject: sched: SCHED_IDLE weight change Increase the SCHED_IDLE weight from 2 to 3, this gives much more stable vruntime numbers. time advanced in 100ms: weight=2 64765.988352 67012.881408 88501.412352 weight=3 35496.181411 34130.971298 35497.411573 Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index ed62d1c..6acfb3c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1323,8 +1323,8 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec) * slice expiry etc. */ -#define WEIGHT_IDLEPRIO 2 -#define WMULT_IDLEPRIO (1 << 31) +#define WEIGHT_IDLEPRIO 3 +#define WMULT_IDLEPRIO 1431655765 /* * Nice levels are multiplicative, with a gentle 10% change for every -- cgit v1.1 From 6bc912b71b6f33b041cfde93ca3f019cbaa852bc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Jan 2009 14:53:38 +0100 Subject: sched: SCHED_OTHER vs SCHED_IDLE isolation Stronger SCHED_IDLE isolation: - no SCHED_IDLE buddies - never let SCHED_IDLE preempt on wakeup - always preempt SCHED_IDLE on wakeup - limit SLEEPER fairness for SCHED_IDLE. Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 8e1352c..cdebd80 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -677,9 +677,13 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) unsigned long thresh = sysctl_sched_latency; /* - * convert the sleeper threshold into virtual time + * Convert the sleeper threshold into virtual time. + * SCHED_IDLE is a special sub-class. We care about + * fairness only relative to other SCHED_IDLE tasks, + * all of which have the same weight. */ - if (sched_feat(NORMALIZED_SLEEPER)) + if (sched_feat(NORMALIZED_SLEEPER) && + task_of(se)->policy != SCHED_IDLE) thresh = calc_delta_fair(thresh, se); vruntime -= thresh; @@ -1340,14 +1344,18 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) static void set_last_buddy(struct sched_entity *se) { - for_each_sched_entity(se) - cfs_rq_of(se)->last = se; + if (likely(task_of(se)->policy != SCHED_IDLE)) { + for_each_sched_entity(se) + cfs_rq_of(se)->last = se; + } } static void set_next_buddy(struct sched_entity *se) { - for_each_sched_entity(se) - cfs_rq_of(se)->next = se; + if (likely(task_of(se)->policy != SCHED_IDLE)) { + for_each_sched_entity(se) + cfs_rq_of(se)->next = se; + } } /* @@ -1393,12 +1401,18 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) return; /* - * Batch tasks do not preempt (their preemption is driven by + * Batch and idle tasks do not preempt (their preemption is driven by * the tick): */ - if (unlikely(p->policy == SCHED_BATCH)) + if (unlikely(p->policy != SCHED_NORMAL)) return; + /* Idle tasks are by definition preempted by everybody. */ + if (unlikely(curr->policy == SCHED_IDLE)) { + resched_task(curr); + return; + } + if (!sched_feat(WAKEUP_PREEMPT)) return; -- cgit v1.1 From e17036dac189dd034c092a91df56aa740db7146d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Jan 2009 14:53:39 +0100 Subject: sched: fix update_min_vruntime Impact: fix SCHED_IDLE latency problems OK, so we have 1 running task A (which is obviously curr and the tree is equally obviously empty). 'A' nicely chugs along, doing its thing, carrying min_vruntime along as it goes. Then some whacko speed freak SCHED_IDLE task gets inserted due to SMP balancing, which is very likely far right, in that case update_curr update_min_vruntime cfs_rq->rb_leftmost := true (the crazy task sitting in a tree) vruntime = se->vruntime and voila, min_vruntime is waaay right of where it ought to be. OK, so why did I write it like that to begin with... Aah, yes. Say we've just dequeued current schedule deactivate_task(prev) dequeue_entity update_min_vruntime Then we'll set vruntime = cfs_rq->min_vruntime; we find !cfs_rq->curr, but do find someone in the tree. Then we _must_ do vruntime = se->vruntime, because vruntime = min_vruntime(vruntime := cfs_rq->min_vruntime, se->vruntime) will not advance vruntime, and cause lags the other way around (which we fixed with that initial patch: 1af5f730fc1bf7c62ec9fb2d307206e18bf40a69 (sched: more accurate min_vruntime accounting). Signed-off-by: Peter Zijlstra Tested-by: Mike Galbraith Acked-by: Mike Galbraith Cc: Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index cdebd80..16b419b 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -283,7 +283,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq) struct sched_entity, run_node); - if (vruntime == cfs_rq->min_vruntime) + if (!cfs_rq->curr) vruntime = se->vruntime; else vruntime = min_vruntime(vruntime, se->vruntime); -- cgit v1.1 From 88fc241f54459ac3d86c5e13b449730199f66061 Mon Sep 17 00:00:00 2001 From: Doug Chapman Date: Thu, 15 Jan 2009 10:38:56 -0800 Subject: [IA64] dump stack on kernel unaligned warnings Often the cause of kernel unaligned access warnings is not obvious from just the ip displayed in the warning. This adds the option via proc to dump the stack in addition to the warning. The default is off (just display the 1 line warning). To enable the stack to be shown: echo 1 > /proc/sys/kernel/unaligned-dump-stack Signed-off-by: Doug Chapman Signed-off-by: Tony Luck --- kernel/sysctl.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3e38b74..368d163 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -144,6 +144,7 @@ extern int acct_parm[]; #ifdef CONFIG_IA64 extern int no_unaligned_warning; +extern int unaligned_dump_stack; #endif #ifdef CONFIG_RT_MUTEXES @@ -781,6 +782,14 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "unaligned-dump-stack", + .data = &unaligned_dump_stack, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif #ifdef CONFIG_DETECT_SOFTLOCKUP { -- cgit v1.1 From 6272d68cc6a5f90c6b1a2228cf0f67b895305d17 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Thu, 15 Jan 2009 17:17:15 +0100 Subject: sched: sched_slice() fixlet Mike's change: 0a582440f "sched: fix sched_slice())" broke group scheduling by forgetting to reload cfs_rq on each loop. This patch fixes aim7 regression and specjbb2005 regression becomes less than 1.5% on 8-core stokley. Signed-off-by: Lin Ming Signed-off-by: Peter Zijlstra Tested-by: Jayson King Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 16b419b..5cc1c16 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -429,7 +429,10 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq); for_each_sched_entity(se) { - struct load_weight *load = &cfs_rq->load; + struct load_weight *load; + + cfs_rq = cfs_rq_of(se); + load = &cfs_rq->load; if (unlikely(!se->on_rq)) { struct load_weight lw = cfs_rq->load; -- cgit v1.1 From 45ce80fb6b6f9594d1396d44dd7e7c02d596fef8 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 15 Jan 2009 13:50:59 -0800 Subject: cgroups: consolidate cgroup documents Move Documentation/cpusets.txt and Documentation/controllers/* to Documentation/cgroups/ Signed-off-by: Li Zefan Acked-by: KAMEZAWA Hiroyuki Acked-by: Balbir Singh Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 647c77a..a856788 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -568,7 +568,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) * load balancing domains (sched domains) as specified by that partial * partition. * - * See "What is sched_load_balance" in Documentation/cpusets.txt + * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt * for a background explanation of this. * * Does not return errors, on the theory that the callers of this -- cgit v1.1 From 6ae301e85c9c58d2f430a8a7057ce488b7ff76df Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Jan 2009 13:51:01 -0800 Subject: resources: fix parameter name and kernel-doc Fix __request_region() parameter kernel-doc notation and parameter name: Warning(linux-2.6.28-git10//kernel/resource.c:627): No description found for parameter 'flags' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index ca6a153..fd5d7d5 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -620,6 +620,7 @@ resource_size_t resource_alignment(struct resource *res) * @start: resource start address * @n: resource region size * @name: reserving caller's ID string + * @flags: IO resource flags */ struct resource * __request_region(struct resource *parent, resource_size_t start, resource_size_t n, -- cgit v1.1 From 33f1d7ecc6cffff3c618a02295de969ebbacd95d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 6 Jan 2009 21:14:04 +0100 Subject: PM: Fix freezer compilation if PM_SLEEP is unset Freezer fails to compile if with the following configuration settings: CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y CONFIG_MODULES=y CONFIG_FREEZER=y CONFIG_PM=y CONFIG_PM_SLEEP=n Fix this by making process.o compilation depend on CONFIG_FREEZER. Reported-by: Cheng Renquan Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Len Brown --- kernel/power/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 597823b..d7a1016 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -4,7 +4,8 @@ EXTRA_CFLAGS += -DDEBUG endif obj-y := main.o -obj-$(CONFIG_PM_SLEEP) += process.o console.o +obj-$(CONFIG_PM_SLEEP) += console.o +obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o -- cgit v1.1 From 5a4ccaf37ffece09ef33f1cfec67efa8ee56f967 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 6 Jan 2009 21:15:32 +0100 Subject: kprobes: check CONFIG_FREEZER instead of CONFIG_PM Check CONFIG_FREEZER instead of CONFIG_PM because kprobe booster depends on freeze_processes() and thaw_processes() when CONFIG_PREEMPT=y. This fixes a linkage error which occurs when CONFIG_PREEMPT=y, CONFIG_PM=y and CONFIG_FREEZER=n. Reported-by: Cheng Renquan Signed-off-by: Masami Hiramatsu Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Signed-off-by: Len Brown --- kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 1b9cbdc..7ba8cd9 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -123,7 +123,7 @@ static int collect_garbage_slots(void); static int __kprobes check_safety(void) { int ret = 0; -#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM) +#if defined(CONFIG_PREEMPT) && defined(CONFIG_FREEZER) ret = freeze_processes(); if (ret == 0) { struct task_struct *p, *q; -- cgit v1.1