diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 3 | ||||
-rw-r--r-- | kernel/cred.c | 6 | ||||
-rw-r--r-- | kernel/events/core.c | 67 | ||||
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | kernel/irq/generic-chip.c | 4 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 37 | ||||
-rw-r--r-- | kernel/irq/manage.c | 17 | ||||
-rw-r--r-- | kernel/lockdep.c | 8 | ||||
-rw-r--r-- | kernel/power/Kconfig | 4 | ||||
-rw-r--r-- | kernel/printk.c | 8 | ||||
-rw-r--r-- | kernel/sched.c | 43 | ||||
-rw-r--r-- | kernel/sys.c | 53 | ||||
-rw-r--r-- | kernel/sys_ni.c | 1 | ||||
-rw-r--r-- | kernel/sysctl_binary.c | 2 | ||||
-rw-r--r-- | kernel/sysctl_check.c | 2 | ||||
-rw-r--r-- | kernel/time/alarmtimer.c | 18 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 2 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 21 | ||||
-rw-r--r-- | kernel/workqueue.c | 7 |
19 files changed, 230 insertions, 74 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index d06467f..eca595e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ - async.o range.o jump_label.o + async.o range.o obj-y += groups.o ifdef CONFIG_FUNCTION_TRACER @@ -107,6 +107,7 @@ obj-$(CONFIG_PERF_EVENTS) += events/ obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o obj-$(CONFIG_PADATA) += padata.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_JUMP_LABEL) += jump_label.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is diff --git a/kernel/cred.c b/kernel/cred.c index 174fa84..8ef31f5 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -508,10 +508,8 @@ int commit_creds(struct cred *new) key_fsgid_changed(task); /* do it - * - What if a process setreuid()'s and this brings the - * new uid over his NPROC rlimit? We can check this now - * cheaply with the new uid cache, so if it matters - * we should be checking for it. -DaveM + * RLIMIT_NPROC limits on user->processes have already been checked + * in set_user(). */ alter_cred_subscribers(new, 2); if (new->user != old->user) diff --git a/kernel/events/core.c b/kernel/events/core.c index b8785e2..0f85778 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -399,14 +399,54 @@ void perf_cgroup_switch(struct task_struct *task, int mode) local_irq_restore(flags); } -static inline void perf_cgroup_sched_out(struct task_struct *task) +static inline void perf_cgroup_sched_out(struct task_struct *task, + struct task_struct *next) { - perf_cgroup_switch(task, PERF_CGROUP_SWOUT); + struct perf_cgroup *cgrp1; + struct perf_cgroup *cgrp2 = NULL; + + /* + * we come here when we know perf_cgroup_events > 0 + */ + cgrp1 = perf_cgroup_from_task(task); + + /* + * next is NULL when called from perf_event_enable_on_exec() + * that will systematically cause a cgroup_switch() + */ + if (next) + cgrp2 = perf_cgroup_from_task(next); + + /* + * only schedule out current cgroup events if we know + * that we are switching to a different cgroup. Otherwise, + * do no touch the cgroup events. + */ + if (cgrp1 != cgrp2) + perf_cgroup_switch(task, PERF_CGROUP_SWOUT); } -static inline void perf_cgroup_sched_in(struct task_struct *task) +static inline void perf_cgroup_sched_in(struct task_struct *prev, + struct task_struct *task) { - perf_cgroup_switch(task, PERF_CGROUP_SWIN); + struct perf_cgroup *cgrp1; + struct perf_cgroup *cgrp2 = NULL; + + /* + * we come here when we know perf_cgroup_events > 0 + */ + cgrp1 = perf_cgroup_from_task(task); + + /* prev can never be NULL */ + cgrp2 = perf_cgroup_from_task(prev); + + /* + * only need to schedule in cgroup events if we are changing + * cgroup during ctxsw. Cgroup events were not scheduled + * out of ctxsw out if that was not the case. + */ + if (cgrp1 != cgrp2) + perf_cgroup_switch(task, PERF_CGROUP_SWIN); } static inline int perf_cgroup_connect(int fd, struct perf_event *event, @@ -518,11 +558,13 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) { } -static inline void perf_cgroup_sched_out(struct task_struct *task) +static inline void perf_cgroup_sched_out(struct task_struct *task, + struct task_struct *next) { } -static inline void perf_cgroup_sched_in(struct task_struct *task) +static inline void perf_cgroup_sched_in(struct task_struct *prev, + struct task_struct *task) { } @@ -1988,7 +2030,7 @@ void __perf_event_task_sched_out(struct task_struct *task, * cgroup event are system-wide mode only */ if (atomic_read(&__get_cpu_var(perf_cgroup_events))) - perf_cgroup_sched_out(task); + perf_cgroup_sched_out(task, next); } static void task_ctx_sched_out(struct perf_event_context *ctx) @@ -2153,7 +2195,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, * accessing the event control register. If a NMI hits, then it will * keep the event running. */ -void __perf_event_task_sched_in(struct task_struct *task) +void __perf_event_task_sched_in(struct task_struct *prev, + struct task_struct *task) { struct perf_event_context *ctx; int ctxn; @@ -2171,7 +2214,7 @@ void __perf_event_task_sched_in(struct task_struct *task) * cgroup event are system-wide mode only */ if (atomic_read(&__get_cpu_var(perf_cgroup_events))) - perf_cgroup_sched_in(task); + perf_cgroup_sched_in(prev, task); } static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) @@ -2427,7 +2470,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) * ctxswin cgroup events which are already scheduled * in. */ - perf_cgroup_sched_out(current); + perf_cgroup_sched_out(current, NULL); raw_spin_lock(&ctx->lock); task_ctx_sched_out(ctx); @@ -3353,8 +3396,8 @@ static int perf_event_index(struct perf_event *event) } static void calc_timer_values(struct perf_event *event, - u64 *running, - u64 *enabled) + u64 *enabled, + u64 *running) { u64 now, ctx_time; diff --git a/kernel/fork.c b/kernel/fork.c index e7ceaca..8e6b6f4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1111,6 +1111,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->real_cred->user != INIT_USER) goto bad_fork_free; } + current->flags &= ~PF_NPROC_EXCEEDED; retval = copy_creds(p, clone_flags); if (retval < 0) diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 3a2cab4..e38544d 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -246,7 +246,7 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask); for (i = gc->irq_base; msk; msk >>= 1, i++) { - if (!msk & 0x01) + if (!(msk & 0x01)) continue; if (flags & IRQ_GC_INIT_NESTED_LOCK) @@ -301,7 +301,7 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, raw_spin_unlock(&gc_lock); for (; msk; msk >>= 1, i++) { - if (!msk & 0x01) + if (!(msk & 0x01)) continue; /* Remove handler first. That will mask the irq line */ diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 4c60a50..039b889 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -70,7 +70,8 @@ static inline void desc_smp_init(struct irq_desc *desc, int node) { } static inline int desc_node(struct irq_desc *desc) { return 0; } #endif -static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) +static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, + struct module *owner) { int cpu; @@ -86,6 +87,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) desc->irq_count = 0; desc->irqs_unhandled = 0; desc->name = NULL; + desc->owner = owner; for_each_possible_cpu(cpu) *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; desc_smp_init(desc, node); @@ -128,7 +130,7 @@ static void free_masks(struct irq_desc *desc) static inline void free_masks(struct irq_desc *desc) { } #endif -static struct irq_desc *alloc_desc(int irq, int node) +static struct irq_desc *alloc_desc(int irq, int node, struct module *owner) { struct irq_desc *desc; gfp_t gfp = GFP_KERNEL; @@ -147,7 +149,7 @@ static struct irq_desc *alloc_desc(int irq, int node) raw_spin_lock_init(&desc->lock); lockdep_set_class(&desc->lock, &irq_desc_lock_class); - desc_set_defaults(irq, desc, node); + desc_set_defaults(irq, desc, node, owner); return desc; @@ -173,13 +175,14 @@ static void free_desc(unsigned int irq) kfree(desc); } -static int alloc_descs(unsigned int start, unsigned int cnt, int node) +static int alloc_descs(unsigned int start, unsigned int cnt, int node, + struct module *owner) { struct irq_desc *desc; int i; for (i = 0; i < cnt; i++) { - desc = alloc_desc(start + i, node); + desc = alloc_desc(start + i, node, owner); if (!desc) goto err; mutex_lock(&sparse_irq_lock); @@ -227,7 +230,7 @@ int __init early_irq_init(void) nr_irqs = initcnt; for (i = 0; i < initcnt; i++) { - desc = alloc_desc(i, node); + desc = alloc_desc(i, node, NULL); set_bit(i, allocated_irqs); irq_insert_desc(i, desc); } @@ -261,7 +264,7 @@ int __init early_irq_init(void) alloc_masks(&desc[i], GFP_KERNEL, node); raw_spin_lock_init(&desc[i].lock); lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); - desc_set_defaults(i, &desc[i], node); + desc_set_defaults(i, &desc[i], node, NULL); } return arch_early_irq_init(); } @@ -276,8 +279,16 @@ static void free_desc(unsigned int irq) dynamic_irq_cleanup(irq); } -static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) +static inline int alloc_descs(unsigned int start, unsigned int cnt, int node, + struct module *owner) { + u32 i; + + for (i = 0; i < cnt; i++) { + struct irq_desc *desc = irq_to_desc(start + i); + + desc->owner = owner; + } return start; } @@ -333,11 +344,13 @@ EXPORT_SYMBOL_GPL(irq_free_descs); * @from: Start the search from this irq number * @cnt: Number of consecutive irqs to allocate. * @node: Preferred node on which the irq descriptor should be allocated + * @owner: Owning module (can be NULL) * * Returns the first irq number or error code */ int __ref -irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) +__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, + struct module *owner) { int start, ret; @@ -366,13 +379,13 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) bitmap_set(allocated_irqs, start, cnt); mutex_unlock(&sparse_irq_lock); - return alloc_descs(start, cnt, node); + return alloc_descs(start, cnt, node, owner); err: mutex_unlock(&sparse_irq_lock); return ret; } -EXPORT_SYMBOL_GPL(irq_alloc_descs); +EXPORT_SYMBOL_GPL(__irq_alloc_descs); /** * irq_reserve_irqs - mark irqs allocated @@ -440,7 +453,7 @@ void dynamic_irq_cleanup(unsigned int irq) unsigned long flags; raw_spin_lock_irqsave(&desc->lock, flags); - desc_set_defaults(irq, desc, desc_node(desc)); + desc_set_defaults(irq, desc, desc_node(desc), NULL); raw_spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0a7840ae..9b956fa 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -883,6 +883,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (desc->irq_data.chip == &no_irq_chip) return -ENOSYS; + if (!try_module_get(desc->owner)) + return -ENODEV; /* * Some drivers like serial.c use request_irq() heavily, * so we have to be careful not to interfere with a @@ -906,8 +908,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ nested = irq_settings_is_nested_thread(desc); if (nested) { - if (!new->thread_fn) - return -EINVAL; + if (!new->thread_fn) { + ret = -EINVAL; + goto out_mput; + } /* * Replace the primary handler which was provided from * the driver for non nested interrupt handling by the @@ -929,8 +933,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) t = kthread_create(irq_thread, new, "irq/%d-%s", irq, new->name); - if (IS_ERR(t)) - return PTR_ERR(t); + if (IS_ERR(t)) { + ret = PTR_ERR(t); + goto out_mput; + } /* * We keep the reference to the task struct even if * the thread dies to avoid that the interrupt code @@ -1095,6 +1101,8 @@ out_thread: kthread_stop(t); put_task_struct(t); } +out_mput: + module_put(desc->owner); return ret; } @@ -1203,6 +1211,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) put_task_struct(action->thread); } + module_put(desc->owner); return action; } diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 8c24294..91d67ce 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3111,7 +3111,13 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) if (!class) class = look_up_lock_class(lock, 0); - if (DEBUG_LOCKS_WARN_ON(!class)) + /* + * If look_up_lock_class() failed to find a class, we're trying + * to test if we hold a lock that has never yet been acquired. + * Clearly if the lock hasn't been acquired _ever_, we're not + * holding it either, so report failure. + */ + if (!class) return 0; if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock)) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index b1914cb9..3744c59 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -231,3 +231,7 @@ config PM_CLK config PM_GENERIC_DOMAINS bool depends on PM + +config PM_GENERIC_DOMAINS_RUNTIME + def_bool y + depends on PM_RUNTIME && PM_GENERIC_DOMAINS diff --git a/kernel/printk.c b/kernel/printk.c index 37dff34..28a40d8 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -318,8 +318,10 @@ static int check_syslog_permissions(int type, bool from_file) return 0; /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ if (capable(CAP_SYS_ADMIN)) { - WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN " - "but no CAP_SYSLOG (deprecated).\n"); + printk_once(KERN_WARNING "%s (%d): " + "Attempt to access syslog with CAP_SYS_ADMIN " + "but no CAP_SYSLOG (deprecated).\n", + current->comm, task_pid_nr(current)); return 0; } return -EPERM; @@ -1602,7 +1604,7 @@ static int __init printk_late_init(void) struct console *con; for_each_console(con) { - if (con->flags & CON_BOOT) { + if (!keep_bootcon && con->flags & CON_BOOT) { printk(KERN_INFO "turn off boot console %s%d\n", con->name, con->index); unregister_console(con); diff --git a/kernel/sched.c b/kernel/sched.c index ccacdbd..ec5f472 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3065,7 +3065,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW local_irq_disable(); #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ - perf_event_task_sched_in(current); + perf_event_task_sched_in(prev, current); #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW local_irq_enable(); #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ @@ -4279,9 +4279,9 @@ pick_next_task(struct rq *rq) } /* - * schedule() is the main scheduler function. + * __schedule() is the main scheduler function. */ -asmlinkage void __sched schedule(void) +static void __sched __schedule(void) { struct task_struct *prev, *next; unsigned long *switch_count; @@ -4322,16 +4322,6 @@ need_resched: if (to_wakeup) try_to_wake_up_local(to_wakeup); } - - /* - * If we are going to sleep and we have plugged IO - * queued, make sure to submit it to avoid deadlocks. - */ - if (blk_needs_flush_plug(prev)) { - raw_spin_unlock(&rq->lock); - blk_schedule_flush_plug(prev); - raw_spin_lock(&rq->lock); - } } switch_count = &prev->nvcsw; } @@ -4369,6 +4359,26 @@ need_resched: if (need_resched()) goto need_resched; } + +static inline void sched_submit_work(struct task_struct *tsk) +{ + if (!tsk->state) + return; + /* + * If we are going to sleep and we have plugged IO queued, + * make sure to submit it to avoid deadlocks. + */ + if (blk_needs_flush_plug(tsk)) + blk_schedule_flush_plug(tsk); +} + +asmlinkage void schedule(void) +{ + struct task_struct *tsk = current; + + sched_submit_work(tsk); + __schedule(); +} EXPORT_SYMBOL(schedule); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER @@ -4435,7 +4445,7 @@ asmlinkage void __sched notrace preempt_schedule(void) do { add_preempt_count_notrace(PREEMPT_ACTIVE); - schedule(); + __schedule(); sub_preempt_count_notrace(PREEMPT_ACTIVE); /* @@ -4463,7 +4473,7 @@ asmlinkage void __sched preempt_schedule_irq(void) do { add_preempt_count(PREEMPT_ACTIVE); local_irq_enable(); - schedule(); + __schedule(); local_irq_disable(); sub_preempt_count(PREEMPT_ACTIVE); @@ -5588,7 +5598,7 @@ static inline int should_resched(void) static void __cond_resched(void) { add_preempt_count(PREEMPT_ACTIVE); - schedule(); + __schedule(); sub_preempt_count(PREEMPT_ACTIVE); } @@ -7443,6 +7453,7 @@ static void __sdt_free(const struct cpumask *cpu_map) struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j); if (sd && (sd->flags & SD_OVERLAP)) free_sched_groups(sd->groups, 0); + kfree(*per_cpu_ptr(sdd->sd, j)); kfree(*per_cpu_ptr(sdd->sg, j)); kfree(*per_cpu_ptr(sdd->sgp, j)); } diff --git a/kernel/sys.c b/kernel/sys.c index a101ba3..18ee1d2 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -37,6 +37,8 @@ #include <linux/fs_struct.h> #include <linux/gfp.h> #include <linux/syscore_ops.h> +#include <linux/version.h> +#include <linux/ctype.h> #include <linux/compat.h> #include <linux/syscalls.h> @@ -44,6 +46,8 @@ #include <linux/user_namespace.h> #include <linux/kmsg_dump.h> +/* Move somewhere else to avoid recompiling? */ +#include <generated/utsrelease.h> #include <asm/uaccess.h> #include <asm/io.h> @@ -621,11 +625,18 @@ static int set_user(struct cred *new) if (!new_user) return -EAGAIN; + /* + * We don't fail in case of NPROC limit excess here because too many + * poorly written programs don't check set*uid() return code, assuming + * it never fails if called by root. We may still enforce NPROC limit + * for programs doing set*uid()+execve() by harmlessly deferring the + * failure to the execve() stage. + */ if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && - new_user != INIT_USER) { - free_uid(new_user); - return -EAGAIN; - } + new_user != INIT_USER) + current->flags |= PF_NPROC_EXCEEDED; + else + current->flags &= ~PF_NPROC_EXCEEDED; free_uid(new->user); new->user = new_user; @@ -1154,6 +1165,34 @@ DECLARE_RWSEM(uts_sem); #define override_architecture(name) 0 #endif +/* + * Work around broken programs that cannot handle "Linux 3.0". + * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 + */ +static int override_release(char __user *release, int len) +{ + int ret = 0; + char buf[len]; + + if (current->personality & UNAME26) { + char *rest = UTS_RELEASE; + int ndots = 0; + unsigned v; + + while (*rest) { + if (*rest == '.' && ++ndots >= 3) + break; + if (!isdigit(*rest) && *rest != '.') + break; + rest++; + } + v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; + snprintf(buf, len, "2.6.%u%s", v, rest); + ret = copy_to_user(release, buf, len); + } + return ret; +} + SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) { int errno = 0; @@ -1163,6 +1202,8 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) errno = -EFAULT; up_read(&uts_sem); + if (!errno && override_release(name->release, sizeof(name->release))) + errno = -EFAULT; if (!errno && override_architecture(name)) errno = -EFAULT; return errno; @@ -1184,6 +1225,8 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) error = -EFAULT; up_read(&uts_sem); + if (!error && override_release(name->release, sizeof(name->release))) + error = -EFAULT; if (!error && override_architecture(name)) error = -EFAULT; return error; @@ -1218,6 +1261,8 @@ SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) if (!error && override_architecture(name)) error = -EFAULT; + if (!error && override_release(name->release, sizeof(name->release))) + error = -EFAULT; return error ? -EFAULT : 0; } #endif diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 62cbc88..a9a5de0 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -16,7 +16,6 @@ asmlinkage long sys_ni_syscall(void) return -ENOSYS; } -cond_syscall(sys_nfsservctl); cond_syscall(sys_quotactl); cond_syscall(sys32_quotactl); cond_syscall(sys_acct); diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 3b8e028..e8bffbe 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -1,6 +1,6 @@ #include <linux/stat.h> #include <linux/sysctl.h> -#include "../fs/xfs/linux-2.6/xfs_sysctl.h" +#include "../fs/xfs/xfs_sysctl.h" #include <linux/sunrpc/debug.h> #include <linux/string.h> #include <net/ip_vs.h> diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index 4e4932a..362da65 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -1,6 +1,6 @@ #include <linux/stat.h> #include <linux/sysctl.h> -#include "../fs/xfs/linux-2.6/xfs_sysctl.h" +#include "../fs/xfs/xfs_sysctl.h" #include <linux/sunrpc/debug.h> #include <linux/string.h> #include <net/ip_vs.h> diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 59f369f..ea5e1a9 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -441,6 +441,8 @@ static int alarm_timer_create(struct k_itimer *new_timer) static void alarm_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) { + memset(cur_setting, 0, sizeof(struct itimerspec)); + cur_setting->it_interval = ktime_to_timespec(timr->it.alarmtimer.period); cur_setting->it_value = @@ -479,11 +481,17 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, if (!rtcdev) return -ENOTSUPP; - /* Save old values */ - old_setting->it_interval = - ktime_to_timespec(timr->it.alarmtimer.period); - old_setting->it_value = - ktime_to_timespec(timr->it.alarmtimer.node.expires); + /* + * XXX HACK! Currently we can DOS a system if the interval + * period on alarmtimers is too small. Cap the interval here + * to 100us and solve this properly in a future patch! -jstultz + */ + if ((new_setting->it_interval.tv_sec == 0) && + (new_setting->it_interval.tv_nsec < 100000)) + new_setting->it_interval.tv_nsec = 100000; + + if (old_setting) + alarm_timer_get(timr, old_setting); /* If the timer was already set, cancel it */ alarm_cancel(&timr->it.alarmtimer); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 2ad39e5..cd31345 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -82,7 +82,7 @@ config EVENT_POWER_TRACING_DEPRECATED power:power_frequency This is for userspace compatibility and will vanish after 5 kernel iterations, - namely 2.6.41. + namely 3.1. config CONTEXT_SWITCH_TRACER bool diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 6957aa2..7c910a5 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -206,6 +206,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= MASK_TC_BIT(rw, RAHEAD); what |= MASK_TC_BIT(rw, META); what |= MASK_TC_BIT(rw, DISCARD); + what |= MASK_TC_BIT(rw, FLUSH); + what |= MASK_TC_BIT(rw, FUA); pid = tsk->pid; if (act_log_check(bt, what, sector, pid)) @@ -1054,6 +1056,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) goto out; } + if (tc & BLK_TC_FLUSH) + rwbs[i++] = 'F'; + if (tc & BLK_TC_DISCARD) rwbs[i++] = 'D'; else if (tc & BLK_TC_WRITE) @@ -1063,10 +1068,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) else rwbs[i++] = 'N'; + if (tc & BLK_TC_FUA) + rwbs[i++] = 'F'; if (tc & BLK_TC_AHEAD) rwbs[i++] = 'A'; - if (tc & BLK_TC_BARRIER) - rwbs[i++] = 'B'; if (tc & BLK_TC_SYNC) rwbs[i++] = 'S'; if (tc & BLK_TC_META) @@ -1132,7 +1137,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); static int blk_log_action_classic(struct trace_iterator *iter, const char *act) { - char rwbs[6]; + char rwbs[RWBS_LEN]; unsigned long long ts = iter->ts; unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); unsigned secs = (unsigned long)ts; @@ -1148,7 +1153,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act) static int blk_log_action(struct trace_iterator *iter, const char *act) { - char rwbs[6]; + char rwbs[RWBS_LEN]; const struct blk_io_trace *t = te_blk_io_trace(iter->ent); fill_rwbs(rwbs, t); @@ -1561,7 +1566,7 @@ static const struct { } mask_maps[] = { { BLK_TC_READ, "read" }, { BLK_TC_WRITE, "write" }, - { BLK_TC_BARRIER, "barrier" }, + { BLK_TC_FLUSH, "flush" }, { BLK_TC_SYNC, "sync" }, { BLK_TC_QUEUE, "queue" }, { BLK_TC_REQUEUE, "requeue" }, @@ -1573,6 +1578,7 @@ static const struct { { BLK_TC_META, "meta" }, { BLK_TC_DISCARD, "discard" }, { BLK_TC_DRV_DATA, "drv_data" }, + { BLK_TC_FUA, "fua" }, }; static int blk_trace_str2mask(const char *str) @@ -1788,6 +1794,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) { int i = 0; + if (rw & REQ_FLUSH) + rwbs[i++] = 'F'; + if (rw & WRITE) rwbs[i++] = 'W'; else if (rw & REQ_DISCARD) @@ -1797,6 +1806,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) else rwbs[i++] = 'N'; + if (rw & REQ_FUA) + rwbs[i++] = 'F'; if (rw & REQ_RAHEAD) rwbs[i++] = 'A'; if (rw & REQ_SYNC) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 25fb1b0..1783aab 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2412,8 +2412,13 @@ reflush: for_each_cwq_cpu(cpu, wq) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); + bool drained; - if (!cwq->nr_active && list_empty(&cwq->delayed_works)) + spin_lock_irq(&cwq->gcwq->lock); + drained = !cwq->nr_active && list_empty(&cwq->delayed_works); + spin_unlock_irq(&cwq->gcwq->lock); + + if (drained) continue; if (++flush_cnt == 10 || |