diff options
-rw-r--r-- | Documentation/vm/slabinfo.c | 27 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux_64.lds.S | 6 | ||||
-rw-r--r-- | arch/x86/vdso/vdso.S | 10 | ||||
-rw-r--r-- | drivers/acpi/processor_thermal.c | 30 | ||||
-rw-r--r-- | drivers/atm/nicstar.c | 2 | ||||
-rw-r--r-- | drivers/char/drm/r128_cce.c | 2 | ||||
-rw-r--r-- | drivers/char/tty_io.c | 2 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 129 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_stats.c | 6 | ||||
-rw-r--r-- | include/linux/mm_types.h | 5 | ||||
-rw-r--r-- | include/linux/slub_def.h | 16 | ||||
-rw-r--r-- | init/do_mounts_md.c | 2 | ||||
-rw-r--r-- | init/do_mounts_rd.c | 2 | ||||
-rw-r--r-- | kernel/cpuset.c | 3 | ||||
-rw-r--r-- | kernel/hrtimer.c | 19 | ||||
-rw-r--r-- | kernel/pid_namespace.c | 2 | ||||
-rw-r--r-- | kernel/ptrace.c | 7 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 2 | ||||
-rw-r--r-- | mm/slob.c | 3 | ||||
-rw-r--r-- | mm/slub.c | 481 |
22 files changed, 445 insertions, 323 deletions
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c index 22d7e3e..d3ce295 100644 --- a/Documentation/vm/slabinfo.c +++ b/Documentation/vm/slabinfo.c @@ -31,7 +31,7 @@ struct slabinfo { int hwcache_align, object_size, objs_per_slab; int sanity_checks, slab_size, store_user, trace; int order, poison, reclaim_account, red_zone; - unsigned long partial, objects, slabs; + unsigned long partial, objects, slabs, objects_partial, objects_total; unsigned long alloc_fastpath, alloc_slowpath; unsigned long free_fastpath, free_slowpath; unsigned long free_frozen, free_add_partial, free_remove_partial; @@ -540,7 +540,8 @@ void slabcache(struct slabinfo *s) return; store_size(size_str, slab_size(s)); - snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs, s->partial, s->cpu_slabs); + snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs - s->cpu_slabs, + s->partial, s->cpu_slabs); if (!line++) first_line(); @@ -776,7 +777,6 @@ void totals(void) unsigned long used; unsigned long long wasted; unsigned long long objwaste; - long long objects_in_partial_slabs; unsigned long percentage_partial_slabs; unsigned long percentage_partial_objs; @@ -790,18 +790,11 @@ void totals(void) wasted = size - used; objwaste = s->slab_size - s->object_size; - objects_in_partial_slabs = s->objects - - (s->slabs - s->partial - s ->cpu_slabs) * - s->objs_per_slab; - - if (objects_in_partial_slabs < 0) - objects_in_partial_slabs = 0; - percentage_partial_slabs = s->partial * 100 / s->slabs; if (percentage_partial_slabs > 100) percentage_partial_slabs = 100; - percentage_partial_objs = objects_in_partial_slabs * 100 + percentage_partial_objs = s->objects_partial * 100 / s->objects; if (percentage_partial_objs > 100) @@ -823,8 +816,8 @@ void totals(void) min_objects = s->objects; if (used < min_used) min_used = used; - if (objects_in_partial_slabs < min_partobj) - min_partobj = objects_in_partial_slabs; + if (s->objects_partial < min_partobj) + min_partobj = s->objects_partial; if (percentage_partial_slabs < min_ppart) min_ppart = percentage_partial_slabs; if (percentage_partial_objs < min_ppartobj) @@ -848,8 +841,8 @@ void totals(void) max_objects = s->objects; if (used > max_used) max_used = used; - if (objects_in_partial_slabs > max_partobj) - max_partobj = objects_in_partial_slabs; + if (s->objects_partial > max_partobj) + max_partobj = s->objects_partial; if (percentage_partial_slabs > max_ppart) max_ppart = percentage_partial_slabs; if (percentage_partial_objs > max_ppartobj) @@ -864,7 +857,7 @@ void totals(void) total_objects += s->objects; total_used += used; - total_partobj += objects_in_partial_slabs; + total_partobj += s->objects_partial; total_ppart += percentage_partial_slabs; total_ppartobj += percentage_partial_objs; @@ -1160,6 +1153,8 @@ void read_slab_dir(void) slab->hwcache_align = get_obj("hwcache_align"); slab->object_size = get_obj("object_size"); slab->objects = get_obj("objects"); + slab->objects_partial = get_obj("objects_partial"); + slab->objects_total = get_obj("objects_total"); slab->objs_per_slab = get_obj("objs_per_slab"); slab->order = get_obj("order"); slab->partial = get_obj("partial"); diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index e2d870d..8db8f73 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -339,6 +339,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); unsigned int freq; + unsigned int cached_freq; dprintk("get_cur_freq_on_cpu (%d)\n", cpu); @@ -347,7 +348,16 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) return 0; } + cached_freq = data->freq_table[data->acpi_data->state].frequency; freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data); + if (freq != cached_freq) { + /* + * The dreaded BIOS frequency change behind our back. + * Force set the frequency on next target call. + */ + data->resume = 1; + } + dprintk("cur freq = %u\n", freq); return freq; diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index b7ab3c3..fad3674 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -209,12 +209,6 @@ SECTIONS EXIT_DATA } -/* vdso blob that is mapped into user space */ - vdso_start = . ; - .vdso : AT(ADDR(.vdso) - LOAD_OFFSET) { *(.vdso) } - . = ALIGN(PAGE_SIZE); - vdso_end = .; - #ifdef CONFIG_BLK_DEV_INITRD . = ALIGN(PAGE_SIZE); __initramfs_start = .; diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S index 4b1620a..1d3aa6b 100644 --- a/arch/x86/vdso/vdso.S +++ b/arch/x86/vdso/vdso.S @@ -1,2 +1,10 @@ - .section ".vdso","a" +#include <linux/init.h> + +__INITDATA + + .globl vdso_start, vdso_end +vdso_start: .incbin "arch/x86/vdso/vdso.so" +vdso_end: + +__FINIT diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index 9cb43f5..649ae99 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -97,7 +97,7 @@ static int acpi_processor_apply_limit(struct acpi_processor *pr) #define CPUFREQ_THERMAL_MIN_STEP 0 #define CPUFREQ_THERMAL_MAX_STEP 3 -static unsigned int cpufreq_thermal_reduction_pctg[NR_CPUS]; +static DEFINE_PER_CPU(unsigned int, cpufreq_thermal_reduction_pctg); static unsigned int acpi_thermal_cpufreq_is_init = 0; static int cpu_has_cpufreq(unsigned int cpu) @@ -113,9 +113,9 @@ static int acpi_thermal_cpufreq_increase(unsigned int cpu) if (!cpu_has_cpufreq(cpu)) return -ENODEV; - if (cpufreq_thermal_reduction_pctg[cpu] < + if (per_cpu(cpufreq_thermal_reduction_pctg, cpu) < CPUFREQ_THERMAL_MAX_STEP) { - cpufreq_thermal_reduction_pctg[cpu]++; + per_cpu(cpufreq_thermal_reduction_pctg, cpu)++; cpufreq_update_policy(cpu); return 0; } @@ -128,14 +128,14 @@ static int acpi_thermal_cpufreq_decrease(unsigned int cpu) if (!cpu_has_cpufreq(cpu)) return -ENODEV; - if (cpufreq_thermal_reduction_pctg[cpu] > + if (per_cpu(cpufreq_thermal_reduction_pctg, cpu) > (CPUFREQ_THERMAL_MIN_STEP + 1)) - cpufreq_thermal_reduction_pctg[cpu]--; + per_cpu(cpufreq_thermal_reduction_pctg, cpu)--; else - cpufreq_thermal_reduction_pctg[cpu] = 0; + per_cpu(cpufreq_thermal_reduction_pctg, cpu) = 0; cpufreq_update_policy(cpu); /* We reached max freq again and can leave passive mode */ - return !cpufreq_thermal_reduction_pctg[cpu]; + return !per_cpu(cpufreq_thermal_reduction_pctg, cpu); } static int acpi_thermal_cpufreq_notifier(struct notifier_block *nb, @@ -147,9 +147,10 @@ static int acpi_thermal_cpufreq_notifier(struct notifier_block *nb, if (event != CPUFREQ_ADJUST) goto out; - max_freq = - (policy->cpuinfo.max_freq * - (100 - cpufreq_thermal_reduction_pctg[policy->cpu] * 20)) / 100; + max_freq = ( + policy->cpuinfo.max_freq * + (100 - per_cpu(cpufreq_thermal_reduction_pctg, policy->cpu) * 20) + ) / 100; cpufreq_verify_within_limits(policy, 0, max_freq); @@ -174,7 +175,7 @@ static int cpufreq_get_cur_state(unsigned int cpu) if (!cpu_has_cpufreq(cpu)) return 0; - return cpufreq_thermal_reduction_pctg[cpu]; + return per_cpu(cpufreq_thermal_reduction_pctg, cpu); } static int cpufreq_set_cur_state(unsigned int cpu, int state) @@ -182,7 +183,7 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state) if (!cpu_has_cpufreq(cpu)) return 0; - cpufreq_thermal_reduction_pctg[cpu] = state; + per_cpu(cpufreq_thermal_reduction_pctg, cpu) = state; cpufreq_update_policy(cpu); return 0; } @@ -191,8 +192,9 @@ void acpi_thermal_cpufreq_init(void) { int i; - for (i = 0; i < NR_CPUS; i++) - cpufreq_thermal_reduction_pctg[i] = 0; + for (i = 0; i < nr_cpu_ids; i++) + if (cpu_present(i)) + per_cpu(cpufreq_thermal_reduction_pctg, i) = 0; i = cpufreq_register_notifier(&acpi_thermal_cpufreq_notifier_block, CPUFREQ_POLICY_NOTIFIER); diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 38c769f..3da804b 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -415,7 +415,7 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) card->pcidev = pcidev; membase = pci_resource_start(pcidev, 1); card->membase = ioremap(membase, NS_IOREMAP_SIZE); - if (card->membase == 0) + if (!card->membase) { printk("nicstar%d: can't ioremap() membase.\n",i); error = 3; diff --git a/drivers/char/drm/r128_cce.c b/drivers/char/drm/r128_cce.c index f36adbd..c31afbd 100644 --- a/drivers/char/drm/r128_cce.c +++ b/drivers/char/drm/r128_cce.c @@ -817,7 +817,7 @@ static struct drm_buf *r128_freelist_get(struct drm_device * dev) for (i = 0; i < dma->buf_count; i++) { buf = dma->buflist[i]; buf_priv = buf->dev_private; - if (buf->file_priv == 0) + if (!buf->file_priv) return buf; } diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 4d3c701..98b65a2 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -1180,7 +1180,7 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line) if (*str == ',') str++; if (*str == '\0') - str = 0; + str = NULL; if (tty_line >= 0 && tty_line <= p->num && p->poll_init && !p->poll_init(p, tty_line, str)) { diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 35a26a3..d3575f5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -118,9 +118,11 @@ static void handle_update(struct work_struct *work); static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list); static struct srcu_notifier_head cpufreq_transition_notifier_list; +static bool init_cpufreq_transition_notifier_list_called; static int __init init_cpufreq_transition_notifier_list(void) { srcu_init_notifier_head(&cpufreq_transition_notifier_list); + init_cpufreq_transition_notifier_list_called = true; return 0; } pure_initcall(init_cpufreq_transition_notifier_list); @@ -216,7 +218,7 @@ static void cpufreq_debug_disable_ratelimit(void) } void cpufreq_debug_printk(unsigned int type, const char *prefix, - const char *fmt, ...) + const char *fmt, ...) { char s[256]; va_list args; @@ -378,7 +380,7 @@ static struct cpufreq_governor *__find_governor(const char *str_governor) /** * cpufreq_parse_governor - parse a governor string */ -static int cpufreq_parse_governor (char *str_governor, unsigned int *policy, +static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, struct cpufreq_governor **governor) { int err = -EINVAL; @@ -446,7 +448,7 @@ extern struct sysdev_class cpu_sysdev_class; #define show_one(file_name, object) \ static ssize_t show_##file_name \ -(struct cpufreq_policy * policy, char *buf) \ +(struct cpufreq_policy *policy, char *buf) \ { \ return sprintf (buf, "%u\n", policy->object); \ } @@ -465,7 +467,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data, */ #define store_one(file_name, object) \ static ssize_t store_##file_name \ -(struct cpufreq_policy * policy, const char *buf, size_t count) \ +(struct cpufreq_policy *policy, const char *buf, size_t count) \ { \ unsigned int ret = -EINVAL; \ struct cpufreq_policy new_policy; \ @@ -490,8 +492,8 @@ store_one(scaling_max_freq,max); /** * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware */ -static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy, - char *buf) +static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, + char *buf) { unsigned int cur_freq = __cpufreq_get(policy->cpu); if (!cur_freq) @@ -503,8 +505,7 @@ static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy, /** * show_scaling_governor - show the current policy for the specified CPU */ -static ssize_t show_scaling_governor (struct cpufreq_policy * policy, - char *buf) +static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) { if(policy->policy == CPUFREQ_POLICY_POWERSAVE) return sprintf(buf, "powersave\n"); @@ -519,8 +520,8 @@ static ssize_t show_scaling_governor (struct cpufreq_policy * policy, /** * store_scaling_governor - store policy for the specified CPU */ -static ssize_t store_scaling_governor (struct cpufreq_policy * policy, - const char *buf, size_t count) +static ssize_t store_scaling_governor(struct cpufreq_policy *policy, + const char *buf, size_t count) { unsigned int ret = -EINVAL; char str_governor[16]; @@ -554,7 +555,7 @@ static ssize_t store_scaling_governor (struct cpufreq_policy * policy, /** * show_scaling_driver - show the cpufreq driver currently loaded */ -static ssize_t show_scaling_driver (struct cpufreq_policy * policy, char *buf) +static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf) { return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name); } @@ -562,8 +563,8 @@ static ssize_t show_scaling_driver (struct cpufreq_policy * policy, char *buf) /** * show_scaling_available_governors - show the available CPUfreq governors */ -static ssize_t show_scaling_available_governors (struct cpufreq_policy *policy, - char *buf) +static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, + char *buf) { ssize_t i = 0; struct cpufreq_governor *t; @@ -585,7 +586,7 @@ out: /** * show_affected_cpus - show the CPUs affected by each transition */ -static ssize_t show_affected_cpus (struct cpufreq_policy * policy, char *buf) +static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf) { ssize_t i = 0; unsigned int cpu; @@ -602,7 +603,7 @@ static ssize_t show_affected_cpus (struct cpufreq_policy * policy, char *buf) } static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy, - const char *buf, size_t count) + const char *buf, size_t count) { unsigned int freq = 0; unsigned int ret; @@ -651,7 +652,7 @@ define_one_rw(scaling_max_freq); define_one_rw(scaling_governor); define_one_rw(scaling_setspeed); -static struct attribute * default_attrs[] = { +static struct attribute *default_attrs[] = { &cpuinfo_min_freq.attr, &cpuinfo_max_freq.attr, &scaling_min_freq.attr, @@ -667,10 +668,10 @@ static struct attribute * default_attrs[] = { #define to_policy(k) container_of(k,struct cpufreq_policy,kobj) #define to_attr(a) container_of(a,struct freq_attr,attr) -static ssize_t show(struct kobject * kobj, struct attribute * attr ,char * buf) +static ssize_t show(struct kobject *kobj, struct attribute *attr ,char *buf) { - struct cpufreq_policy * policy = to_policy(kobj); - struct freq_attr * fattr = to_attr(attr); + struct cpufreq_policy *policy = to_policy(kobj); + struct freq_attr *fattr = to_attr(attr); ssize_t ret = -EINVAL; policy = cpufreq_cpu_get(policy->cpu); if (!policy) @@ -691,11 +692,11 @@ no_policy: return ret; } -static ssize_t store(struct kobject * kobj, struct attribute * attr, - const char * buf, size_t count) +static ssize_t store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) { - struct cpufreq_policy * policy = to_policy(kobj); - struct freq_attr * fattr = to_attr(attr); + struct cpufreq_policy *policy = to_policy(kobj); + struct freq_attr *fattr = to_attr(attr); ssize_t ret = -EINVAL; policy = cpufreq_cpu_get(policy->cpu); if (!policy) @@ -716,9 +717,9 @@ no_policy: return ret; } -static void cpufreq_sysfs_release(struct kobject * kobj) +static void cpufreq_sysfs_release(struct kobject *kobj) { - struct cpufreq_policy * policy = to_policy(kobj); + struct cpufreq_policy *policy = to_policy(kobj); dprintk("last reference is dropped\n"); complete(&policy->kobj_unregister); } @@ -740,7 +741,7 @@ static struct kobj_type ktype_cpufreq = { * * Adds the cpufreq interface for a CPU device. */ -static int cpufreq_add_dev (struct sys_device * sys_dev) +static int cpufreq_add_dev(struct sys_device *sys_dev) { unsigned int cpu = sys_dev->id; int ret = 0; @@ -800,7 +801,6 @@ static int cpufreq_add_dev (struct sys_device * sys_dev) ret = cpufreq_driver->init(policy); if (ret) { dprintk("initialization failed\n"); - unlock_policy_rwsem_write(cpu); goto err_out; } policy->user_policy.min = policy->cpuinfo.min_freq; @@ -823,7 +823,7 @@ static int cpufreq_add_dev (struct sys_device * sys_dev) /* check for existing affected CPUs. They may not be aware * of it due to CPU Hotplug. */ - managed_policy = cpufreq_cpu_get(j); + managed_policy = cpufreq_cpu_get(j); // FIXME: Where is this released? What about error paths? if (unlikely(managed_policy)) { /* Set proper policy_cpu */ @@ -842,14 +842,11 @@ static int cpufreq_add_dev (struct sys_device * sys_dev) ret = sysfs_create_link(&sys_dev->kobj, &managed_policy->kobj, "cpufreq"); - if (ret) { - unlock_policy_rwsem_write(cpu); + if (ret) goto err_out_driver_exit; - } cpufreq_debug_enable_ratelimit(); ret = 0; - unlock_policy_rwsem_write(cpu); goto err_out_driver_exit; /* call driver->exit() */ } } @@ -859,33 +856,26 @@ static int cpufreq_add_dev (struct sys_device * sys_dev) /* prepare interface data */ ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj, "cpufreq"); - if (ret) { - unlock_policy_rwsem_write(cpu); + if (ret) goto err_out_driver_exit; - } + /* set up files for this cpu device */ drv_attr = cpufreq_driver->attr; while ((drv_attr) && (*drv_attr)) { ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); - if (ret) { - unlock_policy_rwsem_write(cpu); + if (ret) goto err_out_driver_exit; - } drv_attr++; } - if (cpufreq_driver->get){ + if (cpufreq_driver->get) { ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr); - if (ret) { - unlock_policy_rwsem_write(cpu); + if (ret) goto err_out_driver_exit; - } } - if (cpufreq_driver->target){ + if (cpufreq_driver->target) { ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); - if (ret) { - unlock_policy_rwsem_write(cpu); + if (ret) goto err_out_driver_exit; - } } spin_lock_irqsave(&cpufreq_driver_lock, flags); @@ -907,10 +897,8 @@ static int cpufreq_add_dev (struct sys_device * sys_dev) cpu_sys_dev = get_cpu_sysdev(j); ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj, "cpufreq"); - if (ret) { - unlock_policy_rwsem_write(cpu); + if (ret) goto err_out_unregister; - } } policy->governor = NULL; /* to assure that the starting sequence is @@ -950,6 +938,7 @@ err_out_driver_exit: cpufreq_driver->exit(policy); err_out: + unlock_policy_rwsem_write(cpu); kfree(policy); nomem_out: @@ -967,7 +956,7 @@ module_out: * Caller should already have policy_rwsem in write mode for this CPU. * This routine frees the rwsem before returning. */ -static int __cpufreq_remove_dev (struct sys_device * sys_dev) +static int __cpufreq_remove_dev(struct sys_device *sys_dev) { unsigned int cpu = sys_dev->id; unsigned long flags; @@ -1071,7 +1060,7 @@ static int __cpufreq_remove_dev (struct sys_device * sys_dev) } -static int cpufreq_remove_dev (struct sys_device * sys_dev) +static int cpufreq_remove_dev(struct sys_device *sys_dev) { unsigned int cpu = sys_dev->id; int retval; @@ -1138,7 +1127,7 @@ unsigned int cpufreq_quick_get(unsigned int cpu) cpufreq_cpu_put(policy); } - return (ret_freq); + return ret_freq; } EXPORT_SYMBOL(cpufreq_quick_get); @@ -1149,7 +1138,7 @@ static unsigned int __cpufreq_get(unsigned int cpu) unsigned int ret_freq = 0; if (!cpufreq_driver->get) - return (ret_freq); + return ret_freq; ret_freq = cpufreq_driver->get(cpu); @@ -1163,7 +1152,7 @@ static unsigned int __cpufreq_get(unsigned int cpu) } } - return (ret_freq); + return ret_freq; } /** @@ -1190,7 +1179,7 @@ unsigned int cpufreq_get(unsigned int cpu) out_policy: cpufreq_cpu_put(policy); out: - return (ret_freq); + return ret_freq; } EXPORT_SYMBOL(cpufreq_get); @@ -1199,7 +1188,7 @@ EXPORT_SYMBOL(cpufreq_get); * cpufreq_suspend - let the low level driver prepare for suspend */ -static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg) +static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) { int cpu = sysdev->id; int ret = 0; @@ -1221,22 +1210,18 @@ static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg) return -EINVAL; /* only handle each CPU group once */ - if (unlikely(cpu_policy->cpu != cpu)) { - cpufreq_cpu_put(cpu_policy); - return 0; - } + if (unlikely(cpu_policy->cpu != cpu)) + goto out; if (cpufreq_driver->suspend) { ret = cpufreq_driver->suspend(cpu_policy, pmsg); if (ret) { printk(KERN_ERR "cpufreq: suspend failed in ->suspend " "step on CPU %u\n", cpu_policy->cpu); - cpufreq_cpu_put(cpu_policy); - return ret; + goto out; } } - if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS) goto out; @@ -1270,7 +1255,7 @@ static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg) out: cpufreq_cpu_put(cpu_policy); - return 0; + return ret; } /** @@ -1281,7 +1266,7 @@ out: * 3.) schedule call cpufreq_update_policy() ASAP as interrupts are * restored. */ -static int cpufreq_resume(struct sys_device * sysdev) +static int cpufreq_resume(struct sys_device *sysdev) { int cpu = sysdev->id; int ret = 0; @@ -1302,18 +1287,15 @@ static int cpufreq_resume(struct sys_device * sysdev) return -EINVAL; /* only handle each CPU group once */ - if (unlikely(cpu_policy->cpu != cpu)) { - cpufreq_cpu_put(cpu_policy); - return 0; - } + if (unlikely(cpu_policy->cpu != cpu)) + goto fail; if (cpufreq_driver->resume) { ret = cpufreq_driver->resume(cpu_policy); if (ret) { printk(KERN_ERR "cpufreq: resume failed in ->resume " "step on CPU %u\n", cpu_policy->cpu); - cpufreq_cpu_put(cpu_policy); - return ret; + goto fail; } } @@ -1353,6 +1335,7 @@ static int cpufreq_resume(struct sys_device * sysdev) out: schedule_work(&cpu_policy->update); +fail: cpufreq_cpu_put(cpu_policy); return ret; } @@ -1386,6 +1369,8 @@ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list) { int ret; + WARN_ON(!init_cpufreq_transition_notifier_list_called); + switch (list) { case CPUFREQ_TRANSITION_NOTIFIER: ret = srcu_notifier_chain_register( @@ -1848,7 +1833,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) cpufreq_debug_enable_ratelimit(); } - return (ret); + return ret; } EXPORT_SYMBOL_GPL(cpufreq_register_driver); diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 070421a..ef09e06 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -114,7 +114,7 @@ show_trans_table(struct cpufreq_policy *policy, char *buf) stat->freq_table[i]); } if (len >= PAGE_SIZE) - return len; + return PAGE_SIZE; len += snprintf(buf + len, PAGE_SIZE - len, "\n"); @@ -131,8 +131,12 @@ show_trans_table(struct cpufreq_policy *policy, char *buf) len += snprintf(buf + len, PAGE_SIZE - len, "%9u ", stat->trans_table[i*stat->max_state+j]); } + if (len >= PAGE_SIZE) + break; len += snprintf(buf + len, PAGE_SIZE - len, "\n"); } + if (len >= PAGE_SIZE) + return PAGE_SIZE; return len; } CPUFREQ_STATDEVICE_ATTR(trans_table,0444,show_trans_table); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 29adaa78..e2bae8d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -42,7 +42,10 @@ struct page { * to show when page is mapped * & limit reverse map searches. */ - unsigned int inuse; /* SLUB: Nr of objects */ + struct { /* SLUB */ + u16 inuse; + u16 objects; + }; }; union { struct { diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 79d59c9..71e43a1 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -29,6 +29,7 @@ enum stat_item { DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */ DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */ DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */ + ORDER_FALLBACK, /* Number of times fallback was necessary */ NR_SLUB_STAT_ITEMS }; struct kmem_cache_cpu { @@ -48,11 +49,21 @@ struct kmem_cache_node { struct list_head partial; #ifdef CONFIG_SLUB_DEBUG atomic_long_t nr_slabs; + atomic_long_t total_objects; struct list_head full; #endif }; /* + * Word size structure that can be atomically updated or read and that + * contains both the order and the number of objects that a slab of the + * given order would contain. + */ +struct kmem_cache_order_objects { + unsigned long x; +}; + +/* * Slab cache management. */ struct kmem_cache { @@ -61,7 +72,7 @@ struct kmem_cache { int size; /* The size of an object including meta data */ int objsize; /* The size of an object without meta data */ int offset; /* Free pointer offset. */ - int order; /* Current preferred allocation order */ + struct kmem_cache_order_objects oo; /* * Avoid an extra cache line for UP, SMP and for the node local to @@ -70,7 +81,8 @@ struct kmem_cache { struct kmem_cache_node local_node; /* Allocation and freeing of slabs */ - int objects; /* Number of objects in slab */ + struct kmem_cache_order_objects max; + struct kmem_cache_order_objects min; gfp_t allocflags; /* gfp flags to use on each alloc */ int refcount; /* Refcount for slab cache destroy */ void (*ctor)(struct kmem_cache *, void *); diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c index 753dc54..7473b0c 100644 --- a/init/do_mounts_md.c +++ b/init/do_mounts_md.c @@ -133,7 +133,7 @@ static void __init md_setup_drive(void) else dev = MKDEV(MD_MAJOR, minor); create_dev(name, dev); - for (i = 0; i < MD_SB_DISKS && devname != 0; i++) { + for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) { char *p; char comp_name[64]; u32 rdev; diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index 3ac5904..46dfd64 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -212,7 +212,7 @@ int __init rd_load_image(char *from) } buf = kmalloc(BLOCK_SIZE, GFP_KERNEL); - if (buf == 0) { + if (!buf) { printk(KERN_ERR "RAMDISK: could not allocate buffer\n"); goto done; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 024888b..48a976c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1265,7 +1265,8 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont, return -E2BIG; /* +1 for nul-terminator */ - if ((buffer = kmalloc(nbytes + 1, GFP_KERNEL)) == 0) + buffer = kmalloc(nbytes + 1, GFP_KERNEL); + if (!buffer) return -ENOMEM; if (copy_from_user(buffer, userbuf, nbytes)) { diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index e379ef0..dea4c91 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -590,7 +590,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, list_add_tail(&timer->cb_entry, &base->cpu_base->cb_pending); timer->state = HRTIMER_STATE_PENDING; - raise_softirq(HRTIMER_SOFTIRQ); return 1; default: BUG(); @@ -633,6 +632,11 @@ static int hrtimer_switch_to_hres(void) return 1; } +static inline void hrtimer_raise_softirq(void) +{ + raise_softirq(HRTIMER_SOFTIRQ); +} + #else static inline int hrtimer_hres_active(void) { return 0; } @@ -651,6 +655,7 @@ static inline int hrtimer_reprogram(struct hrtimer *timer, { return 0; } +static inline void hrtimer_raise_softirq(void) { } #endif /* CONFIG_HIGH_RES_TIMERS */ @@ -850,7 +855,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) { struct hrtimer_clock_base *base, *new_base; unsigned long flags; - int ret; + int ret, raise; base = lock_hrtimer_base(timer, &flags); @@ -884,8 +889,18 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) enqueue_hrtimer(timer, new_base, new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); + /* + * The timer may be expired and moved to the cb_pending + * list. We can not raise the softirq with base lock held due + * to a possible deadlock with runqueue lock. + */ + raise = timer->state == HRTIMER_STATE_PENDING; + unlock_hrtimer_base(timer, &flags); + if (raise) + hrtimer_raise_softirq(); + return ret; } EXPORT_SYMBOL_GPL(hrtimer_start); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 6d792b6..5ca37fa 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -92,7 +92,7 @@ static struct pid_namespace *create_pid_namespace(int level) atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); for (i = 1; i < PIDMAP_ENTRIES; i++) { - ns->pidmap[i].page = 0; + ns->pidmap[i].page = NULL; atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 67e392e..dac4b4e 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -612,7 +612,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data) return (copied == sizeof(data)) ? 0 : -EIO; } -#ifdef CONFIG_COMPAT +#if defined CONFIG_COMPAT && defined __ARCH_WANT_COMPAT_SYS_PTRACE #include <linux/compat.h> int compat_ptrace_request(struct task_struct *child, compat_long_t request, @@ -667,7 +667,6 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, return ret; } -#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, compat_long_t addr, compat_long_t data) { @@ -710,6 +709,4 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, unlock_kernel(); return ret; } -#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */ - -#endif /* CONFIG_COMPAT */ +#endif /* CONFIG_COMPAT && __ARCH_WANT_COMPAT_SYS_PTRACE */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index df28c17..2c37c67 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -204,7 +204,7 @@ static struct page *alloc_fresh_huge_page_node(int nid) if (page) { if (arch_prepare_hugepage(page)) { __free_pages(page, HUGETLB_PAGE_ORDER); - return 0; + return NULL; } set_compound_page_dtor(page, free_huge_page); spin_lock(&hugetlb_lock); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c4ba85c..b17dca7 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -29,6 +29,8 @@ #include <asm/tlbflush.h> +#include "internal.h" + /* add this memory to iomem resource */ static struct resource *register_memory_resource(u64 start, u64 size) { @@ -533,7 +533,8 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, { struct kmem_cache *c; - c = slob_alloc(sizeof(struct kmem_cache), flags, 0, -1); + c = slob_alloc(sizeof(struct kmem_cache), + flags, ARCH_KMALLOC_MINALIGN, -1); if (c) { c->name = name; @@ -149,25 +149,6 @@ static inline void ClearSlabDebug(struct page *page) /* Enable to test recovery from slab corruption on boot */ #undef SLUB_RESILIENCY_TEST -#if PAGE_SHIFT <= 12 - -/* - * Small page size. Make sure that we do not fragment memory - */ -#define DEFAULT_MAX_ORDER 1 -#define DEFAULT_MIN_OBJECTS 4 - -#else - -/* - * Large page machines are customarily able to handle larger - * page orders. - */ -#define DEFAULT_MAX_ORDER 2 -#define DEFAULT_MIN_OBJECTS 8 - -#endif - /* * Mininum number of partial slabs. These will be left on the partial * lists even if they are empty. kmem_cache_shrink may reclaim them. @@ -204,8 +185,6 @@ static inline void ClearSlabDebug(struct page *page) /* Internal SLUB flags */ #define __OBJECT_POISON 0x80000000 /* Poison object */ #define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ -#define __KMALLOC_CACHE 0x20000000 /* objects freed using kfree */ -#define __PAGE_ALLOC_FALLBACK 0x10000000 /* Allow fallback to page alloc */ static int kmem_size = sizeof(struct kmem_cache); @@ -296,7 +275,7 @@ static inline int check_valid_pointer(struct kmem_cache *s, return 1; base = page_address(page); - if (object < base || object >= base + s->objects * s->size || + if (object < base || object >= base + page->objects * s->size || (object - base) % s->size) { return 0; } @@ -322,8 +301,8 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) } /* Loop over all objects in a slab */ -#define for_each_object(__p, __s, __addr) \ - for (__p = (__addr); __p < (__addr) + (__s)->objects * (__s)->size;\ +#define for_each_object(__p, __s, __addr, __objects) \ + for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\ __p += (__s)->size) /* Scan freelist */ @@ -336,6 +315,26 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr) return (p - addr) / s->size; } +static inline struct kmem_cache_order_objects oo_make(int order, + unsigned long size) +{ + struct kmem_cache_order_objects x = { + (order << 16) + (PAGE_SIZE << order) / size + }; + + return x; +} + +static inline int oo_order(struct kmem_cache_order_objects x) +{ + return x.x >> 16; +} + +static inline int oo_objects(struct kmem_cache_order_objects x) +{ + return x.x & ((1 << 16) - 1); +} + #ifdef CONFIG_SLUB_DEBUG /* * Debug settings: @@ -446,8 +445,8 @@ static void print_tracking(struct kmem_cache *s, void *object) static void print_page_info(struct page *page) { - printk(KERN_ERR "INFO: Slab 0x%p used=%u fp=0x%p flags=0x%04lx\n", - page, page->inuse, page->freelist, page->flags); + printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n", + page, page->objects, page->inuse, page->freelist, page->flags); } @@ -647,6 +646,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) p + off, POISON_INUSE, s->size - off); } +/* Check the pad bytes at the end of a slab page */ static int slab_pad_check(struct kmem_cache *s, struct page *page) { u8 *start; @@ -659,20 +659,20 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) return 1; start = page_address(page); - end = start + (PAGE_SIZE << s->order); - length = s->objects * s->size; - remainder = end - (start + length); + length = (PAGE_SIZE << compound_order(page)); + end = start + length; + remainder = length % s->size; if (!remainder) return 1; - fault = check_bytes(start + length, POISON_INUSE, remainder); + fault = check_bytes(end - remainder, POISON_INUSE, remainder); if (!fault) return 1; while (end > fault && end[-1] == POISON_INUSE) end--; slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); - print_section("Padding", start, length); + print_section("Padding", end - remainder, remainder); restore_bytes(s, "slab padding", POISON_INUSE, start, end); return 0; @@ -734,15 +734,24 @@ static int check_object(struct kmem_cache *s, struct page *page, static int check_slab(struct kmem_cache *s, struct page *page) { + int maxobj; + VM_BUG_ON(!irqs_disabled()); if (!PageSlab(page)) { slab_err(s, page, "Not a valid slab page"); return 0; } - if (page->inuse > s->objects) { + + maxobj = (PAGE_SIZE << compound_order(page)) / s->size; + if (page->objects > maxobj) { + slab_err(s, page, "objects %u > max %u", + s->name, page->objects, maxobj); + return 0; + } + if (page->inuse > page->objects) { slab_err(s, page, "inuse %u > max %u", - s->name, page->inuse, s->objects); + s->name, page->inuse, page->objects); return 0; } /* Slab_pad_check fixes things up after itself */ @@ -759,8 +768,9 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) int nr = 0; void *fp = page->freelist; void *object = NULL; + unsigned long max_objects; - while (fp && nr <= s->objects) { + while (fp && nr <= page->objects) { if (fp == search) return 1; if (!check_valid_pointer(s, page, fp)) { @@ -772,7 +782,7 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) } else { slab_err(s, page, "Freepointer corrupt"); page->freelist = NULL; - page->inuse = s->objects; + page->inuse = page->objects; slab_fix(s, "Freelist cleared"); return 0; } @@ -783,10 +793,20 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) nr++; } - if (page->inuse != s->objects - nr) { + max_objects = (PAGE_SIZE << compound_order(page)) / s->size; + if (max_objects > 65535) + max_objects = 65535; + + if (page->objects != max_objects) { + slab_err(s, page, "Wrong number of objects. Found %d but " + "should be %d", page->objects, max_objects); + page->objects = max_objects; + slab_fix(s, "Number of objects adjusted."); + } + if (page->inuse != page->objects - nr) { slab_err(s, page, "Wrong object count. Counter is %d but " - "counted were %d", page->inuse, s->objects - nr); - page->inuse = s->objects - nr; + "counted were %d", page->inuse, page->objects - nr); + page->inuse = page->objects - nr; slab_fix(s, "Object count adjusted."); } return search == NULL; @@ -840,7 +860,7 @@ static inline unsigned long slabs_node(struct kmem_cache *s, int node) return atomic_long_read(&n->nr_slabs); } -static inline void inc_slabs_node(struct kmem_cache *s, int node) +static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) { struct kmem_cache_node *n = get_node(s, node); @@ -850,14 +870,17 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node) * dilemma by deferring the increment of the count during * bootstrap (see early_kmem_cache_node_alloc). */ - if (!NUMA_BUILD || n) + if (!NUMA_BUILD || n) { atomic_long_inc(&n->nr_slabs); + atomic_long_add(objects, &n->total_objects); + } } -static inline void dec_slabs_node(struct kmem_cache *s, int node) +static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) { struct kmem_cache_node *n = get_node(s, node); atomic_long_dec(&n->nr_slabs); + atomic_long_sub(objects, &n->total_objects); } /* Object debug checks for alloc/free paths */ @@ -905,7 +928,7 @@ bad: * as used avoids touching the remaining objects. */ slab_fix(s, "Marking all objects used"); - page->inuse = s->objects; + page->inuse = page->objects; page->freelist = NULL; } return 0; @@ -1055,31 +1078,52 @@ static inline unsigned long kmem_cache_flags(unsigned long objsize, static inline unsigned long slabs_node(struct kmem_cache *s, int node) { return 0; } -static inline void inc_slabs_node(struct kmem_cache *s, int node) {} -static inline void dec_slabs_node(struct kmem_cache *s, int node) {} +static inline void inc_slabs_node(struct kmem_cache *s, int node, + int objects) {} +static inline void dec_slabs_node(struct kmem_cache *s, int node, + int objects) {} #endif + /* * Slab allocation and freeing */ +static inline struct page *alloc_slab_page(gfp_t flags, int node, + struct kmem_cache_order_objects oo) +{ + int order = oo_order(oo); + + if (node == -1) + return alloc_pages(flags, order); + else + return alloc_pages_node(node, flags, order); +} + static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) { struct page *page; - int pages = 1 << s->order; + struct kmem_cache_order_objects oo = s->oo; flags |= s->allocflags; - if (node == -1) - page = alloc_pages(flags, s->order); - else - page = alloc_pages_node(node, flags, s->order); - - if (!page) - return NULL; + page = alloc_slab_page(flags | __GFP_NOWARN | __GFP_NORETRY, node, + oo); + if (unlikely(!page)) { + oo = s->min; + /* + * Allocation may have failed due to fragmentation. + * Try a lower order alloc if possible + */ + page = alloc_slab_page(flags, node, oo); + if (!page) + return NULL; + stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); + } + page->objects = oo_objects(oo); mod_zone_page_state(page_zone(page), (s->flags & SLAB_RECLAIM_ACCOUNT) ? NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, - pages); + 1 << oo_order(oo)); return page; } @@ -1106,7 +1150,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) if (!page) goto out; - inc_slabs_node(s, page_to_nid(page)); + inc_slabs_node(s, page_to_nid(page), page->objects); page->slab = s; page->flags |= 1 << PG_slab; if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | @@ -1116,10 +1160,10 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) start = page_address(page); if (unlikely(s->flags & SLAB_POISON)) - memset(start, POISON_INUSE, PAGE_SIZE << s->order); + memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page)); last = start; - for_each_object(p, s, start) { + for_each_object(p, s, start, page->objects) { setup_object(s, page, last); set_freepointer(s, last, p); last = p; @@ -1135,13 +1179,15 @@ out: static void __free_slab(struct kmem_cache *s, struct page *page) { - int pages = 1 << s->order; + int order = compound_order(page); + int pages = 1 << order; if (unlikely(SlabDebug(page))) { void *p; slab_pad_check(s, page); - for_each_object(p, s, page_address(page)) + for_each_object(p, s, page_address(page), + page->objects) check_object(s, page, p, 0); ClearSlabDebug(page); } @@ -1153,7 +1199,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) __ClearPageSlab(page); reset_page_mapcount(page); - __free_pages(page, s->order); + __free_pages(page, order); } static void rcu_free_slab(struct rcu_head *h) @@ -1179,7 +1225,7 @@ static void free_slab(struct kmem_cache *s, struct page *page) static void discard_slab(struct kmem_cache *s, struct page *page) { - dec_slabs_node(s, page_to_nid(page)); + dec_slabs_node(s, page_to_nid(page), page->objects); free_slab(s, page); } @@ -1515,7 +1561,7 @@ load_freelist: goto debug; c->freelist = object[c->offset]; - c->page->inuse = s->objects; + c->page->inuse = c->page->objects; c->page->freelist = NULL; c->node = page_to_nid(c->page); unlock_out: @@ -1552,27 +1598,6 @@ new_slab: c->page = new; goto load_freelist; } - - /* - * No memory available. - * - * If the slab uses higher order allocs but the object is - * smaller than a page size then we can fallback in emergencies - * to the page allocator via kmalloc_large. The page allocator may - * have failed to obtain a higher order page and we can try to - * allocate a single page if the object fits into a single page. - * That is only possible if certain conditions are met that are being - * checked when a slab is created. - */ - if (!(gfpflags & __GFP_NORETRY) && - (s->flags & __PAGE_ALLOC_FALLBACK)) { - if (gfpflags & __GFP_WAIT) - local_irq_enable(); - object = kmalloc_large(s->objsize, gfpflags); - if (gfpflags & __GFP_WAIT) - local_irq_disable(); - return object; - } return NULL; debug: if (!alloc_debug_processing(s, c->page, object, addr)) @@ -1773,8 +1798,8 @@ static struct page *get_object_page(const void *x) * take the list_lock. */ static int slub_min_order; -static int slub_max_order = DEFAULT_MAX_ORDER; -static int slub_min_objects = DEFAULT_MIN_OBJECTS; +static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; +static int slub_min_objects; /* * Merge control. If this is set then no merging of slab caches will occur. @@ -1789,7 +1814,7 @@ static int slub_nomerge; * system components. Generally order 0 allocations should be preferred since * order 0 does not cause fragmentation in the page allocator. Larger objects * be problematic to put into order 0 slabs because there may be too much - * unused space left. We go to a higher order if more than 1/8th of the slab + * unused space left. We go to a higher order if more than 1/16th of the slab * would be wasted. * * In order to reach satisfactory performance we must ensure that a minimum @@ -1814,6 +1839,9 @@ static inline int slab_order(int size, int min_objects, int rem; int min_order = slub_min_order; + if ((PAGE_SIZE << min_order) / size > 65535) + return get_order(size * 65535) - 1; + for (order = max(min_order, fls(min_objects * size - 1) - PAGE_SHIFT); order <= max_order; order++) { @@ -1848,8 +1876,10 @@ static inline int calculate_order(int size) * we reduce the minimum objects required in a slab. */ min_objects = slub_min_objects; + if (!min_objects) + min_objects = 4 * (fls(nr_cpu_ids) + 1); while (min_objects > 1) { - fraction = 8; + fraction = 16; while (fraction >= 4) { order = slab_order(size, min_objects, slub_max_order, fraction); @@ -2091,7 +2121,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, init_tracking(kmalloc_caches, n); #endif init_kmem_cache_node(n); - inc_slabs_node(kmalloc_caches, node); + inc_slabs_node(kmalloc_caches, node, page->objects); /* * lockdep requires consistent irq usage for each lock @@ -2167,11 +2197,12 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) * calculate_sizes() determines the order and the distribution of data within * a slab object. */ -static int calculate_sizes(struct kmem_cache *s) +static int calculate_sizes(struct kmem_cache *s, int forced_order) { unsigned long flags = s->flags; unsigned long size = s->objsize; unsigned long align = s->align; + int order; /* * Round up object size to the next word boundary. We can only @@ -2255,26 +2286,16 @@ static int calculate_sizes(struct kmem_cache *s) */ size = ALIGN(size, align); s->size = size; + if (forced_order >= 0) + order = forced_order; + else + order = calculate_order(size); - if ((flags & __KMALLOC_CACHE) && - PAGE_SIZE / size < slub_min_objects) { - /* - * Kmalloc cache that would not have enough objects in - * an order 0 page. Kmalloc slabs can fallback to - * page allocator order 0 allocs so take a reasonably large - * order that will allows us a good number of objects. - */ - s->order = max(slub_max_order, PAGE_ALLOC_COSTLY_ORDER); - s->flags |= __PAGE_ALLOC_FALLBACK; - s->allocflags |= __GFP_NOWARN; - } else - s->order = calculate_order(size); - - if (s->order < 0) + if (order < 0) return 0; s->allocflags = 0; - if (s->order) + if (order) s->allocflags |= __GFP_COMP; if (s->flags & SLAB_CACHE_DMA) @@ -2286,9 +2307,12 @@ static int calculate_sizes(struct kmem_cache *s) /* * Determine the number of objects per slab */ - s->objects = (PAGE_SIZE << s->order) / size; + s->oo = oo_make(order, size); + s->min = oo_make(get_order(size), size); + if (oo_objects(s->oo) > oo_objects(s->max)) + s->max = s->oo; - return !!s->objects; + return !!oo_objects(s->oo); } @@ -2304,7 +2328,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, s->align = align; s->flags = kmem_cache_flags(size, flags, name, ctor); - if (!calculate_sizes(s)) + if (!calculate_sizes(s, -1)) goto error; s->refcount = 1; @@ -2321,7 +2345,7 @@ error: if (flags & SLAB_PANIC) panic("Cannot create slab %s size=%lu realsize=%u " "order=%u offset=%u flags=%lx\n", - s->name, (unsigned long)size, s->size, s->order, + s->name, (unsigned long)size, s->size, oo_order(s->oo), s->offset, flags); return 0; } @@ -2367,26 +2391,52 @@ const char *kmem_cache_name(struct kmem_cache *s) } EXPORT_SYMBOL(kmem_cache_name); +static void list_slab_objects(struct kmem_cache *s, struct page *page, + const char *text) +{ +#ifdef CONFIG_SLUB_DEBUG + void *addr = page_address(page); + void *p; + DECLARE_BITMAP(map, page->objects); + + bitmap_zero(map, page->objects); + slab_err(s, page, "%s", text); + slab_lock(page); + for_each_free_object(p, s, page->freelist) + set_bit(slab_index(p, s, addr), map); + + for_each_object(p, s, addr, page->objects) { + + if (!test_bit(slab_index(p, s, addr), map)) { + printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n", + p, p - addr); + print_tracking(s, p); + } + } + slab_unlock(page); +#endif +} + /* - * Attempt to free all slabs on a node. Return the number of slabs we - * were unable to free. + * Attempt to free all partial slabs on a node. */ -static int free_list(struct kmem_cache *s, struct kmem_cache_node *n, - struct list_head *list) +static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) { - int slabs_inuse = 0; unsigned long flags; struct page *page, *h; spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry_safe(page, h, list, lru) + list_for_each_entry_safe(page, h, &n->partial, lru) { if (!page->inuse) { list_del(&page->lru); discard_slab(s, page); - } else - slabs_inuse++; + n->nr_partial--; + } else { + list_slab_objects(s, page, + "Objects remaining on kmem_cache_close()"); + } + } spin_unlock_irqrestore(&n->list_lock, flags); - return slabs_inuse; } /* @@ -2403,8 +2453,8 @@ static inline int kmem_cache_close(struct kmem_cache *s) for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = get_node(s, node); - n->nr_partial -= free_list(s, n, &n->partial); - if (slabs_node(s, node)) + free_partial(s, n); + if (n->nr_partial || slabs_node(s, node)) return 1; } free_kmem_cache_nodes(s); @@ -2422,8 +2472,11 @@ void kmem_cache_destroy(struct kmem_cache *s) if (!s->refcount) { list_del(&s->list); up_write(&slub_lock); - if (kmem_cache_close(s)) - WARN_ON(1); + if (kmem_cache_close(s)) { + printk(KERN_ERR "SLUB %s: %s called for cache that " + "still has objects.\n", s->name, __func__); + dump_stack(); + } sysfs_slab_remove(s); } else up_write(&slub_lock); @@ -2482,7 +2535,7 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, down_write(&slub_lock); if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, - flags | __KMALLOC_CACHE, NULL)) + flags, NULL)) goto panic; list_add(&s->list, &slab_caches); @@ -2730,8 +2783,9 @@ int kmem_cache_shrink(struct kmem_cache *s) struct kmem_cache_node *n; struct page *page; struct page *t; + int objects = oo_objects(s->max); struct list_head *slabs_by_inuse = - kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL); + kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL); unsigned long flags; if (!slabs_by_inuse) @@ -2744,7 +2798,7 @@ int kmem_cache_shrink(struct kmem_cache *s) if (!n->nr_partial) continue; - for (i = 0; i < s->objects; i++) + for (i = 0; i < objects; i++) INIT_LIST_HEAD(slabs_by_inuse + i); spin_lock_irqsave(&n->list_lock, flags); @@ -2776,7 +2830,7 @@ int kmem_cache_shrink(struct kmem_cache *s) * Rebuild the partial list with the slabs filled up most * first and the least used slabs at the end. */ - for (i = s->objects - 1; i >= 0; i--) + for (i = objects - 1; i >= 0; i--) list_splice(slabs_by_inuse + i, n->partial.prev); spin_unlock_irqrestore(&n->list_lock, flags); @@ -2997,9 +3051,6 @@ static int slab_unmergeable(struct kmem_cache *s) if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) return 1; - if ((s->flags & __PAGE_ALLOC_FALLBACK)) - return 1; - if (s->ctor) return 1; @@ -3192,7 +3243,8 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, } #if (defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)) || defined(CONFIG_SLABINFO) -static unsigned long count_partial(struct kmem_cache_node *n) +static unsigned long count_partial(struct kmem_cache_node *n, + int (*get_count)(struct page *)) { unsigned long flags; unsigned long x = 0; @@ -3200,10 +3252,25 @@ static unsigned long count_partial(struct kmem_cache_node *n) spin_lock_irqsave(&n->list_lock, flags); list_for_each_entry(page, &n->partial, lru) - x += page->inuse; + x += get_count(page); spin_unlock_irqrestore(&n->list_lock, flags); return x; } + +static int count_inuse(struct page *page) +{ + return page->inuse; +} + +static int count_total(struct page *page) +{ + return page->objects; +} + +static int count_free(struct page *page) +{ + return page->objects - page->inuse; +} #endif #if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) @@ -3218,7 +3285,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page, return 0; /* Now we know that a valid freelist exists */ - bitmap_zero(map, s->objects); + bitmap_zero(map, page->objects); for_each_free_object(p, s, page->freelist) { set_bit(slab_index(p, s, addr), map); @@ -3226,7 +3293,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page, return 0; } - for_each_object(p, s, addr) + for_each_object(p, s, addr, page->objects) if (!test_bit(slab_index(p, s, addr), map)) if (!check_object(s, page, p, 1)) return 0; @@ -3292,7 +3359,7 @@ static long validate_slab_cache(struct kmem_cache *s) { int node; unsigned long count = 0; - unsigned long *map = kmalloc(BITS_TO_LONGS(s->objects) * + unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) * sizeof(unsigned long), GFP_KERNEL); if (!map) @@ -3495,14 +3562,14 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s, struct page *page, enum track_item alloc) { void *addr = page_address(page); - DECLARE_BITMAP(map, s->objects); + DECLARE_BITMAP(map, page->objects); void *p; - bitmap_zero(map, s->objects); + bitmap_zero(map, page->objects); for_each_free_object(p, s, page->freelist) set_bit(slab_index(p, s, addr), map); - for_each_object(p, s, addr) + for_each_object(p, s, addr, page->objects) if (!test_bit(slab_index(p, s, addr), map)) add_location(t, s, get_track(s, p, alloc)); } @@ -3592,22 +3659,23 @@ static int list_locations(struct kmem_cache *s, char *buf, } enum slab_stat_type { - SL_FULL, - SL_PARTIAL, - SL_CPU, - SL_OBJECTS + SL_ALL, /* All slabs */ + SL_PARTIAL, /* Only partially allocated slabs */ + SL_CPU, /* Only slabs used for cpu caches */ + SL_OBJECTS, /* Determine allocated objects not slabs */ + SL_TOTAL /* Determine object capacity not slabs */ }; -#define SO_FULL (1 << SL_FULL) +#define SO_ALL (1 << SL_ALL) #define SO_PARTIAL (1 << SL_PARTIAL) #define SO_CPU (1 << SL_CPU) #define SO_OBJECTS (1 << SL_OBJECTS) +#define SO_TOTAL (1 << SL_TOTAL) static ssize_t show_slab_objects(struct kmem_cache *s, char *buf, unsigned long flags) { unsigned long total = 0; - int cpu; int node; int x; unsigned long *nodes; @@ -3618,56 +3686,60 @@ static ssize_t show_slab_objects(struct kmem_cache *s, return -ENOMEM; per_cpu = nodes + nr_node_ids; - for_each_possible_cpu(cpu) { - struct page *page; - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); + if (flags & SO_CPU) { + int cpu; - if (!c) - continue; + for_each_possible_cpu(cpu) { + struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); - page = c->page; - node = c->node; - if (node < 0) - continue; - if (page) { - if (flags & SO_CPU) { - if (flags & SO_OBJECTS) - x = page->inuse; + if (!c || c->node < 0) + continue; + + if (c->page) { + if (flags & SO_TOTAL) + x = c->page->objects; + else if (flags & SO_OBJECTS) + x = c->page->inuse; else x = 1; + total += x; - nodes[node] += x; + nodes[c->node] += x; } - per_cpu[node]++; + per_cpu[c->node]++; } } - for_each_node_state(node, N_NORMAL_MEMORY) { - struct kmem_cache_node *n = get_node(s, node); + if (flags & SO_ALL) { + for_each_node_state(node, N_NORMAL_MEMORY) { + struct kmem_cache_node *n = get_node(s, node); + + if (flags & SO_TOTAL) + x = atomic_long_read(&n->total_objects); + else if (flags & SO_OBJECTS) + x = atomic_long_read(&n->total_objects) - + count_partial(n, count_free); - if (flags & SO_PARTIAL) { - if (flags & SO_OBJECTS) - x = count_partial(n); else - x = n->nr_partial; + x = atomic_long_read(&n->nr_slabs); total += x; nodes[node] += x; } - if (flags & SO_FULL) { - int full_slabs = atomic_long_read(&n->nr_slabs) - - per_cpu[node] - - n->nr_partial; + } else if (flags & SO_PARTIAL) { + for_each_node_state(node, N_NORMAL_MEMORY) { + struct kmem_cache_node *n = get_node(s, node); - if (flags & SO_OBJECTS) - x = full_slabs * s->objects; + if (flags & SO_TOTAL) + x = count_partial(n, count_total); + else if (flags & SO_OBJECTS) + x = count_partial(n, count_inuse); else - x = full_slabs; + x = n->nr_partial; total += x; nodes[node] += x; } } - x = sprintf(buf, "%lu", total); #ifdef CONFIG_NUMA for_each_node_state(node, N_NORMAL_MEMORY) @@ -3682,14 +3754,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s, static int any_slab_objects(struct kmem_cache *s) { int node; - int cpu; - - for_each_possible_cpu(cpu) { - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); - - if (c && c->page) - return 1; - } for_each_online_node(node) { struct kmem_cache_node *n = get_node(s, node); @@ -3697,7 +3761,7 @@ static int any_slab_objects(struct kmem_cache *s) if (!n) continue; - if (n->nr_partial || atomic_long_read(&n->nr_slabs)) + if (atomic_read(&n->total_objects)) return 1; } return 0; @@ -3739,15 +3803,27 @@ SLAB_ATTR_RO(object_size); static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf) { - return sprintf(buf, "%d\n", s->objects); + return sprintf(buf, "%d\n", oo_objects(s->oo)); } SLAB_ATTR_RO(objs_per_slab); +static ssize_t order_store(struct kmem_cache *s, + const char *buf, size_t length) +{ + int order = simple_strtoul(buf, NULL, 10); + + if (order > slub_max_order || order < slub_min_order) + return -EINVAL; + + calculate_sizes(s, order); + return length; +} + static ssize_t order_show(struct kmem_cache *s, char *buf) { - return sprintf(buf, "%d\n", s->order); + return sprintf(buf, "%d\n", oo_order(s->oo)); } -SLAB_ATTR_RO(order); +SLAB_ATTR(order); static ssize_t ctor_show(struct kmem_cache *s, char *buf) { @@ -3768,7 +3844,7 @@ SLAB_ATTR_RO(aliases); static ssize_t slabs_show(struct kmem_cache *s, char *buf) { - return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); + return show_slab_objects(s, buf, SO_ALL); } SLAB_ATTR_RO(slabs); @@ -3786,10 +3862,22 @@ SLAB_ATTR_RO(cpu_slabs); static ssize_t objects_show(struct kmem_cache *s, char *buf) { - return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); + return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS); } SLAB_ATTR_RO(objects); +static ssize_t objects_partial_show(struct kmem_cache *s, char *buf) +{ + return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS); +} +SLAB_ATTR_RO(objects_partial); + +static ssize_t total_objects_show(struct kmem_cache *s, char *buf) +{ + return show_slab_objects(s, buf, SO_ALL|SO_TOTAL); +} +SLAB_ATTR_RO(total_objects); + static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); @@ -3869,7 +3957,7 @@ static ssize_t red_zone_store(struct kmem_cache *s, s->flags &= ~SLAB_RED_ZONE; if (buf[0] == '1') s->flags |= SLAB_RED_ZONE; - calculate_sizes(s); + calculate_sizes(s, -1); return length; } SLAB_ATTR(red_zone); @@ -3888,7 +3976,7 @@ static ssize_t poison_store(struct kmem_cache *s, s->flags &= ~SLAB_POISON; if (buf[0] == '1') s->flags |= SLAB_POISON; - calculate_sizes(s); + calculate_sizes(s, -1); return length; } SLAB_ATTR(poison); @@ -3907,7 +3995,7 @@ static ssize_t store_user_store(struct kmem_cache *s, s->flags &= ~SLAB_STORE_USER; if (buf[0] == '1') s->flags |= SLAB_STORE_USER; - calculate_sizes(s); + calculate_sizes(s, -1); return length; } SLAB_ATTR(store_user); @@ -4038,7 +4126,7 @@ STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty); STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head); STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail); STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees); - +STAT_ATTR(ORDER_FALLBACK, order_fallback); #endif static struct attribute *slab_attrs[] = { @@ -4047,6 +4135,8 @@ static struct attribute *slab_attrs[] = { &objs_per_slab_attr.attr, &order_attr.attr, &objects_attr.attr, + &objects_partial_attr.attr, + &total_objects_attr.attr, &slabs_attr.attr, &partial_attr.attr, &cpu_slabs_attr.attr, @@ -4089,6 +4179,7 @@ static struct attribute *slab_attrs[] = { &deactivate_to_head_attr.attr, &deactivate_to_tail_attr.attr, &deactivate_remote_frees_attr.attr, + &order_fallback_attr.attr, #endif NULL }; @@ -4375,7 +4466,8 @@ static int s_show(struct seq_file *m, void *p) unsigned long nr_partials = 0; unsigned long nr_slabs = 0; unsigned long nr_inuse = 0; - unsigned long nr_objs; + unsigned long nr_objs = 0; + unsigned long nr_free = 0; struct kmem_cache *s; int node; @@ -4389,14 +4481,15 @@ static int s_show(struct seq_file *m, void *p) nr_partials += n->nr_partial; nr_slabs += atomic_long_read(&n->nr_slabs); - nr_inuse += count_partial(n); + nr_objs += atomic_long_read(&n->total_objects); + nr_free += count_partial(n, count_free); } - nr_objs = nr_slabs * s->objects; - nr_inuse += (nr_slabs - nr_partials) * s->objects; + nr_inuse = nr_objs - nr_free; seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse, - nr_objs, s->size, s->objects, (1 << s->order)); + nr_objs, s->size, oo_objects(s->oo), + (1 << oo_order(s->oo))); seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0); seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs, 0UL); |