diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/cgroup.c | 9 | ||||
-rw-r--r-- | kernel/cpu_acct.c | 186 | ||||
-rw-r--r-- | kernel/irq/handle.c | 8 | ||||
-rw-r--r-- | kernel/marker.c | 41 | ||||
-rw-r--r-- | kernel/params.c | 15 | ||||
-rw-r--r-- | kernel/pid.c | 2 | ||||
-rw-r--r-- | kernel/power/disk.c | 12 | ||||
-rw-r--r-- | kernel/resource.c | 2 | ||||
-rw-r--r-- | kernel/sched.c | 14 | ||||
-rw-r--r-- | kernel/sysctl.c | 4 | ||||
-rw-r--r-- | kernel/taskstats.c | 36 |
12 files changed, 71 insertions, 259 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index f60afe7..dfa9695 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -40,7 +40,6 @@ obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_CGROUPS) += cgroup.o obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o obj-$(CONFIG_CPUSETS) += cpuset.o -obj-$(CONFIG_CGROUP_CPUACCT) += cpu_acct.o obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_STOP_MACHINE) += stop_machine.o diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3fe21e1..1a3c239 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1,6 +1,4 @@ /* - * kernel/cgroup.c - * * Generic process-grouping system. * * Based originally on the cpuset system, extracted by Paul Menage @@ -2200,7 +2198,8 @@ static void cgroup_init_subsys(struct cgroup_subsys *ss) { struct cgroup_subsys_state *css; struct list_head *l; - printk(KERN_ERR "Initializing cgroup subsys %s\n", ss->name); + + printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); /* Create the top cgroup state for this subsystem */ ss->root = &rootnode; @@ -2273,7 +2272,7 @@ int __init cgroup_init_early(void) BUG_ON(!ss->create); BUG_ON(!ss->destroy); if (ss->subsys_id != i) { - printk(KERN_ERR "Subsys %s id == %d\n", + printk(KERN_ERR "cgroup: Subsys %s id == %d\n", ss->name, ss->subsys_id); BUG(); } @@ -2605,7 +2604,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename)); if (IS_ERR(dentry)) { printk(KERN_INFO - "Couldn't allocate dentry for %s: %ld\n", nodename, + "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename, PTR_ERR(dentry)); ret = PTR_ERR(dentry); goto out_release; diff --git a/kernel/cpu_acct.c b/kernel/cpu_acct.c deleted file mode 100644 index 731e47e..0000000 --- a/kernel/cpu_acct.c +++ /dev/null @@ -1,186 +0,0 @@ -/* - * kernel/cpu_acct.c - CPU accounting cgroup subsystem - * - * Copyright (C) Google Inc, 2006 - * - * Developed by Paul Menage (menage@google.com) and Balbir Singh - * (balbir@in.ibm.com) - * - */ - -/* - * Example cgroup subsystem for reporting total CPU usage of tasks in a - * cgroup, along with percentage load over a time interval - */ - -#include <linux/module.h> -#include <linux/cgroup.h> -#include <linux/fs.h> -#include <linux/rcupdate.h> - -#include <asm/div64.h> - -struct cpuacct { - struct cgroup_subsys_state css; - spinlock_t lock; - /* total time used by this class */ - cputime64_t time; - - /* time when next load calculation occurs */ - u64 next_interval_check; - - /* time used in current period */ - cputime64_t current_interval_time; - - /* time used in last period */ - cputime64_t last_interval_time; -}; - -struct cgroup_subsys cpuacct_subsys; - -static inline struct cpuacct *cgroup_ca(struct cgroup *cont) -{ - return container_of(cgroup_subsys_state(cont, cpuacct_subsys_id), - struct cpuacct, css); -} - -static inline struct cpuacct *task_ca(struct task_struct *task) -{ - return container_of(task_subsys_state(task, cpuacct_subsys_id), - struct cpuacct, css); -} - -#define INTERVAL (HZ * 10) - -static inline u64 next_interval_boundary(u64 now) -{ - /* calculate the next interval boundary beyond the - * current time */ - do_div(now, INTERVAL); - return (now + 1) * INTERVAL; -} - -static struct cgroup_subsys_state *cpuacct_create( - struct cgroup_subsys *ss, struct cgroup *cont) -{ - struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); - - if (!ca) - return ERR_PTR(-ENOMEM); - spin_lock_init(&ca->lock); - ca->next_interval_check = next_interval_boundary(get_jiffies_64()); - return &ca->css; -} - -static void cpuacct_destroy(struct cgroup_subsys *ss, - struct cgroup *cont) -{ - kfree(cgroup_ca(cont)); -} - -/* Lazily update the load calculation if necessary. Called with ca locked */ -static void cpuusage_update(struct cpuacct *ca) -{ - u64 now = get_jiffies_64(); - - /* If we're not due for an update, return */ - if (ca->next_interval_check > now) - return; - - if (ca->next_interval_check <= (now - INTERVAL)) { - /* If it's been more than an interval since the last - * check, then catch up - the last interval must have - * been zero load */ - ca->last_interval_time = 0; - ca->next_interval_check = next_interval_boundary(now); - } else { - /* If a steal takes the last interval time negative, - * then we just ignore it */ - if ((s64)ca->current_interval_time > 0) - ca->last_interval_time = ca->current_interval_time; - else - ca->last_interval_time = 0; - ca->next_interval_check += INTERVAL; - } - ca->current_interval_time = 0; -} - -static u64 cpuusage_read(struct cgroup *cont, struct cftype *cft) -{ - struct cpuacct *ca = cgroup_ca(cont); - u64 time; - - spin_lock_irq(&ca->lock); - cpuusage_update(ca); - time = cputime64_to_jiffies64(ca->time); - spin_unlock_irq(&ca->lock); - - /* Convert 64-bit jiffies to seconds */ - time *= 1000; - do_div(time, HZ); - return time; -} - -static u64 load_read(struct cgroup *cont, struct cftype *cft) -{ - struct cpuacct *ca = cgroup_ca(cont); - u64 time; - - /* Find the time used in the previous interval */ - spin_lock_irq(&ca->lock); - cpuusage_update(ca); - time = cputime64_to_jiffies64(ca->last_interval_time); - spin_unlock_irq(&ca->lock); - - /* Convert time to a percentage, to give the load in the - * previous period */ - time *= 100; - do_div(time, INTERVAL); - - return time; -} - -static struct cftype files[] = { - { - .name = "usage", - .read_uint = cpuusage_read, - }, - { - .name = "load", - .read_uint = load_read, - } -}; - -static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cont) -{ - return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files)); -} - -void cpuacct_charge(struct task_struct *task, cputime_t cputime) -{ - - struct cpuacct *ca; - unsigned long flags; - - if (!cpuacct_subsys.active) - return; - rcu_read_lock(); - ca = task_ca(task); - if (ca) { - spin_lock_irqsave(&ca->lock, flags); - cpuusage_update(ca); - ca->time = cputime64_add(ca->time, cputime); - ca->current_interval_time = - cputime64_add(ca->current_interval_time, cputime); - spin_unlock_irqrestore(&ca->lock, flags); - } - rcu_read_unlock(); -} - -struct cgroup_subsys cpuacct_subsys = { - .name = "cpuacct", - .create = cpuacct_create, - .destroy = cpuacct_destroy, - .populate = cpuacct_populate, - .subsys_id = cpuacct_subsys_id, -}; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index e391cbb..dc335ad 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -178,9 +178,11 @@ fastcall unsigned int __do_IRQ(unsigned int irq) */ if (desc->chip->ack) desc->chip->ack(irq); - action_ret = handle_IRQ_event(irq, desc->action); - if (!noirqdebug) - note_interrupt(irq, desc, action_ret); + if (likely(!(desc->status & IRQ_DISABLED))) { + action_ret = handle_IRQ_event(irq, desc->action); + if (!noirqdebug) + note_interrupt(irq, desc, action_ret); + } desc->chip->end(irq); return 1; } diff --git a/kernel/marker.c b/kernel/marker.c index ccb48d9..5323cfa 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -28,7 +28,7 @@ extern struct marker __start___markers[]; extern struct marker __stop___markers[]; /* - * module_mutex nests inside markers_mutex. Markers mutex protects the builtin + * markers_mutex nests inside module_mutex. Markers mutex protects the builtin * and module markers, the hash table and deferred_sync. */ static DEFINE_MUTEX(markers_mutex); @@ -257,7 +257,6 @@ static void disable_marker(struct marker *elem) * @refcount: number of references left to the given probe_module (out) * * Updates the probe callback corresponding to a range of markers. - * Must be called with markers_mutex held. */ void marker_update_probe_range(struct marker *begin, struct marker *end, struct module *probe_module, @@ -266,6 +265,7 @@ void marker_update_probe_range(struct marker *begin, struct marker *iter; struct marker_entry *mark_entry; + mutex_lock(&markers_mutex); for (iter = begin; iter < end; iter++) { mark_entry = get_marker(iter->name); if (mark_entry && mark_entry->refcount) { @@ -281,6 +281,7 @@ void marker_update_probe_range(struct marker *begin, disable_marker(iter); } } + mutex_unlock(&markers_mutex); } /* @@ -293,7 +294,6 @@ static void marker_update_probes(struct module *probe_module) { int refcount = 0; - mutex_lock(&markers_mutex); /* Core kernel markers */ marker_update_probe_range(__start___markers, __stop___markers, probe_module, &refcount); @@ -303,7 +303,6 @@ static void marker_update_probes(struct module *probe_module) synchronize_sched(); deferred_sync = 0; } - mutex_unlock(&markers_mutex); } /** @@ -320,7 +319,7 @@ int marker_probe_register(const char *name, const char *format, marker_probe_func *probe, void *private) { struct marker_entry *entry; - int ret = 0, need_update = 0; + int ret = 0; mutex_lock(&markers_mutex); entry = get_marker(name); @@ -335,11 +334,11 @@ int marker_probe_register(const char *name, const char *format, ret = add_marker(name, format, probe, private); if (ret) goto end; - need_update = 1; + mutex_unlock(&markers_mutex); + marker_update_probes(NULL); + return ret; end: mutex_unlock(&markers_mutex); - if (need_update) - marker_update_probes(NULL); return ret; } EXPORT_SYMBOL_GPL(marker_probe_register); @@ -355,7 +354,6 @@ void *marker_probe_unregister(const char *name) struct module *probe_module; struct marker_entry *entry; void *private; - int need_update = 0; mutex_lock(&markers_mutex); entry = get_marker(name); @@ -368,11 +366,11 @@ void *marker_probe_unregister(const char *name) probe_module = __module_text_address((unsigned long)entry->probe); private = remove_marker(name); deferred_sync = 1; - need_update = 1; + mutex_unlock(&markers_mutex); + marker_update_probes(probe_module); + return private; end: mutex_unlock(&markers_mutex); - if (need_update) - marker_update_probes(probe_module); return private; } EXPORT_SYMBOL_GPL(marker_probe_unregister); @@ -392,7 +390,6 @@ void *marker_probe_unregister_private_data(void *private) struct marker_entry *entry; int found = 0; unsigned int i; - int need_update = 0; mutex_lock(&markers_mutex); for (i = 0; i < MARKER_TABLE_SIZE; i++) { @@ -414,11 +411,11 @@ iter_end: probe_module = __module_text_address((unsigned long)entry->probe); private = remove_marker(entry->name); deferred_sync = 1; - need_update = 1; + mutex_unlock(&markers_mutex); + marker_update_probes(probe_module); + return private; end: mutex_unlock(&markers_mutex); - if (need_update) - marker_update_probes(probe_module); return private; } EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); @@ -434,7 +431,7 @@ EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); int marker_arm(const char *name) { struct marker_entry *entry; - int ret = 0, need_update = 0; + int ret = 0; mutex_lock(&markers_mutex); entry = get_marker(name); @@ -447,11 +444,9 @@ int marker_arm(const char *name) */ if (entry->refcount++) goto end; - need_update = 1; end: mutex_unlock(&markers_mutex); - if (need_update) - marker_update_probes(NULL); + marker_update_probes(NULL); return ret; } EXPORT_SYMBOL_GPL(marker_arm); @@ -467,7 +462,7 @@ EXPORT_SYMBOL_GPL(marker_arm); int marker_disarm(const char *name) { struct marker_entry *entry; - int ret = 0, need_update = 0; + int ret = 0; mutex_lock(&markers_mutex); entry = get_marker(name); @@ -486,11 +481,9 @@ int marker_disarm(const char *name) ret = -EPERM; goto end; } - need_update = 1; end: mutex_unlock(&markers_mutex); - if (need_update) - marker_update_probes(NULL); + marker_update_probes(NULL); return ret; } EXPORT_SYMBOL_GPL(marker_disarm); diff --git a/kernel/params.c b/kernel/params.c index 16f269e..2a4c514 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -592,19 +592,16 @@ static void __init param_sysfs_builtin(void) for (i=0; i < __stop___param - __start___param; i++) { char *dot; - size_t kplen; + size_t max_name_len; kp = &__start___param[i]; - kplen = strlen(kp->name); + max_name_len = + min_t(size_t, MAX_KBUILD_MODNAME, strlen(kp->name)); - /* We do not handle args without periods. */ - if (kplen > MAX_KBUILD_MODNAME) { - DEBUGP("kernel parameter name is too long: %s\n", kp->name); - continue; - } - dot = memchr(kp->name, '.', kplen); + dot = memchr(kp->name, '.', max_name_len); if (!dot) { - DEBUGP("couldn't find period in %s\n", kp->name); + DEBUGP("couldn't find period in first %d characters " + "of %s\n", MAX_KBUILD_MODNAME, kp->name); continue; } name_len = dot - kp->name; diff --git a/kernel/pid.c b/kernel/pid.c index d1db36b..f815455 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -537,6 +537,7 @@ err_alloc: return NULL; } +#ifdef CONFIG_PID_NS static struct pid_namespace *create_pid_namespace(int level) { struct pid_namespace *ns; @@ -621,6 +622,7 @@ void free_pid_ns(struct kref *kref) if (parent != NULL) put_pid_ns(parent); } +#endif /* CONFIG_PID_NS */ void zap_pid_ns_processes(struct pid_namespace *pid_ns) { diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 8b15f77..05b6479 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -456,7 +456,17 @@ static int software_resume(void) int error; unsigned int flags; - mutex_lock(&pm_mutex); + /* + * name_to_dev_t() below takes a sysfs buffer mutex when sysfs + * is configured into the kernel. Since the regular hibernate + * trigger path is via sysfs which takes a buffer mutex before + * calling hibernate functions (which take pm_mutex) this can + * cause lockdep to complain about a possible ABBA deadlock + * which cannot happen since we're in the boot code here and + * sysfs can't be invoked yet. Therefore, we use a subclass + * here to avoid lockdep complaining. + */ + mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING); if (!swsusp_resume_device) { if (!strlen(resume_file)) { mutex_unlock(&pm_mutex); diff --git a/kernel/resource.c b/kernel/resource.c index a358142..2eb553d 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -277,7 +277,7 @@ walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg, int ret = -1; res.start = (u64) start_pfn << PAGE_SHIFT; res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1; - res.flags = IORESOURCE_MEM; + res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; orig_end = res.end; while ((res.start < res.end) && (find_next_system_ram(&res) >= 0)) { pfn = (unsigned long)(res.start >> PAGE_SHIFT); diff --git a/kernel/sched.c b/kernel/sched.c index b18f231..4fb3532 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -52,7 +52,6 @@ #include <linux/cpu.h> #include <linux/cpuset.h> #include <linux/percpu.h> -#include <linux/cpu_acct.h> #include <linux/kthread.h> #include <linux/seq_file.h> #include <linux/sysctl.h> @@ -3338,13 +3337,9 @@ void account_user_time(struct task_struct *p, cputime_t cputime) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; cputime64_t tmp; - struct rq *rq = this_rq(); p->utime = cputime_add(p->utime, cputime); - if (p != rq->idle) - cpuacct_charge(p, cputime); - /* Add user time to cpustat. */ tmp = cputime_to_cputime64(cputime); if (TASK_NICE(p) > 0) @@ -3408,10 +3403,9 @@ void account_system_time(struct task_struct *p, int hardirq_offset, cpustat->irq = cputime64_add(cpustat->irq, tmp); else if (softirq_count()) cpustat->softirq = cputime64_add(cpustat->softirq, tmp); - else if (p != rq->idle) { + else if (p != rq->idle) cpustat->system = cputime64_add(cpustat->system, tmp); - cpuacct_charge(p, cputime); - } else if (atomic_read(&rq->nr_iowait) > 0) + else if (atomic_read(&rq->nr_iowait) > 0) cpustat->iowait = cputime64_add(cpustat->iowait, tmp); else cpustat->idle = cputime64_add(cpustat->idle, tmp); @@ -3447,10 +3441,8 @@ void account_steal_time(struct task_struct *p, cputime_t steal) cpustat->iowait = cputime64_add(cpustat->iowait, tmp); else cpustat->idle = cputime64_add(cpustat->idle, tmp); - } else { + } else cpustat->steal = cputime64_add(cpustat->steal, tmp); - cpuacct_charge(p, -tmp); - } } /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3a1744f..0deed82 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2620,6 +2620,10 @@ static int deprecated_sysctl_warning(struct __sysctl_args *args) int name[CTL_MAXNAME]; int i; + /* Check args->nlen. */ + if (args->nlen < 0 || args->nlen > CTL_MAXNAME) + return -ENOTDIR; + /* Read in the sysctl name for better debug message logging */ for (i = 0; i < args->nlen; i++) if (get_user(name[i], args->name + i)) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 354e74b..07e86a8 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -398,31 +398,31 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); file = fget_light(fd, &fput_needed); - if (file) { - size = nla_total_size(sizeof(struct cgroupstats)); + if (!file) + return 0; - rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb, - size); - if (rc < 0) - goto err; + size = nla_total_size(sizeof(struct cgroupstats)); - na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, - sizeof(struct cgroupstats)); - stats = nla_data(na); - memset(stats, 0, sizeof(*stats)); + rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb, + size); + if (rc < 0) + goto err; - rc = cgroupstats_build(stats, file->f_dentry); - if (rc < 0) - goto err; + na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, + sizeof(struct cgroupstats)); + stats = nla_data(na); + memset(stats, 0, sizeof(*stats)); - fput_light(file, fput_needed); - return send_reply(rep_skb, info->snd_pid); + rc = cgroupstats_build(stats, file->f_dentry); + if (rc < 0) { + nlmsg_free(rep_skb); + goto err; } + rc = send_reply(rep_skb, info->snd_pid); + err: - if (file) - fput_light(file, fput_needed); - nlmsg_free(rep_skb); + fput_light(file, fput_needed); return rc; } |