From fa5ec8a1f66f3c2a3af723abcf8085509c9ee682 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 7 Jul 2009 00:14:14 -0700 Subject: slub: add option to disable higher order debugging slabs When debugging is enabled, slub requires that additional metadata be stored in slabs for certain options: SLAB_RED_ZONE, SLAB_POISON, and SLAB_STORE_USER. Consequently, it may require that the minimum possible slab order needed to allocate a single object be greater when using these options. The most notable example is for objects that are PAGE_SIZE bytes in size. Higher minimum slab orders may cause page allocation failures when oom or under heavy fragmentation. This patch adds a new slub_debug option, which disables debugging by default for caches that would have resulted in higher minimum orders: slub_debug=O When this option is used on systems with 4K pages, kmalloc-4096, for example, will not have debugging enabled by default even if CONFIG_SLUB_DEBUG_ON is defined because it would have resulted in a order-1 minimum slab order. Reported-by: Larry Finger Tested-by: Larry Finger Cc: Christoph Lameter Signed-off-by: David Rientjes Signed-off-by: Pekka Enberg --- mm/slub.c | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index a9201d8..466089c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -142,6 +142,13 @@ SLAB_POISON | SLAB_STORE_USER) /* + * Debugging flags that require metadata to be stored in the slab, up to + * DEBUG_SIZE in size. + */ +#define DEBUG_SIZE_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) +#define DEBUG_SIZE (3 * sizeof(void *) + 2 * sizeof(struct track)) + +/* * Set of flags that will prevent slab merging */ #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ @@ -326,6 +333,7 @@ static int slub_debug; #endif static char *slub_debug_slabs; +static int disable_higher_order_debug; /* * Object debugging @@ -977,6 +985,15 @@ static int __init setup_slub_debug(char *str) */ goto check_slabs; + if (tolower(*str) == 'o') { + /* + * Avoid enabling debugging on caches if its minimum order + * would increase as a result. + */ + disable_higher_order_debug = 1; + goto out; + } + slub_debug = 0; if (*str == '-') /* @@ -1023,13 +1040,27 @@ static unsigned long kmem_cache_flags(unsigned long objsize, unsigned long flags, const char *name, void (*ctor)(void *)) { + int debug_flags = slub_debug; + /* * Enable debugging if selected on the kernel commandline. */ - if (slub_debug && (!slub_debug_slabs || - strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0)) - flags |= slub_debug; + if (debug_flags) { + if (slub_debug_slabs && + strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))) + goto out; + + /* + * Disable debugging that increases slab size if the minimum + * slab order would have increased as a result. + */ + if (disable_higher_order_debug && + get_order(objsize + DEBUG_SIZE) > get_order(objsize)) + debug_flags &= ~DEBUG_SIZE_FLAGS; + flags |= debug_flags; + } +out: return flags; } #else @@ -1561,6 +1592,10 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) "default order: %d, min order: %d\n", s->name, s->objsize, s->size, oo_order(s->oo), oo_order(s->min)); + if (oo_order(s->min) > get_order(s->objsize)) + printk(KERN_WARNING " %s debugging increased min order, use " + "slub_debug=O to disable.\n", s->name); + for_each_online_node(node) { struct kmem_cache_node *n = get_node(s, node); unsigned long nr_slabs; -- cgit v1.1 From 3de472138a138008b534d9587593ba83390e330a Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 27 Jul 2009 18:30:35 -0700 Subject: slub: use size and objsize orders to disable debug flags This patch moves the masking of debugging flags which increase a cache's min order due to metadata when `slub_debug=O' is used from kmem_cache_flags() to kmem_cache_open(). Instead of defining the maximum metadata size increase in a preprocessor macro, this approach uses the cache's ->size and ->objsize members to determine if the min order increased due to debugging options. If so, the flags specified in the more appropriately named DEBUG_METADATA_FLAGS are masked off. This approach was suggested by Christoph Lameter . Cc: Christoph Lameter Signed-off-by: David Rientjes Signed-off-by: Pekka Enberg --- mm/slub.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index 466089c..a465c0a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -142,11 +142,11 @@ SLAB_POISON | SLAB_STORE_USER) /* - * Debugging flags that require metadata to be stored in the slab, up to - * DEBUG_SIZE in size. + * Debugging flags that require metadata to be stored in the slab. These get + * disabled when slub_debug=O is used and a cache's min order increases with + * metadata. */ -#define DEBUG_SIZE_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) -#define DEBUG_SIZE (3 * sizeof(void *) + 2 * sizeof(struct track)) +#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) /* * Set of flags that will prevent slab merging @@ -1040,27 +1040,13 @@ static unsigned long kmem_cache_flags(unsigned long objsize, unsigned long flags, const char *name, void (*ctor)(void *)) { - int debug_flags = slub_debug; - /* * Enable debugging if selected on the kernel commandline. */ - if (debug_flags) { - if (slub_debug_slabs && - strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))) - goto out; - - /* - * Disable debugging that increases slab size if the minimum - * slab order would have increased as a result. - */ - if (disable_higher_order_debug && - get_order(objsize + DEBUG_SIZE) > get_order(objsize)) - debug_flags &= ~DEBUG_SIZE_FLAGS; + if (slub_debug && (!slub_debug_slabs || + !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))) + flags |= slub_debug; - flags |= debug_flags; - } -out: return flags; } #else @@ -2488,6 +2474,18 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, if (!calculate_sizes(s, -1)) goto error; + if (disable_higher_order_debug) { + /* + * Disable debugging flags that store metadata if the min slab + * order increased. + */ + if (get_order(s->size) > get_order(s->objsize)) { + s->flags &= ~DEBUG_METADATA_FLAGS; + s->offset = 0; + if (!calculate_sizes(s, -1)) + goto error; + } + } /* * The larger the object size is, the more pages we want on the partial -- cgit v1.1 From dcb0ce1bdf39581bcd0cffc3d487fb20667977cd Mon Sep 17 00:00:00 2001 From: "Zhang, Yanmin" Date: Thu, 30 Jul 2009 11:28:11 +0800 Subject: slub: change kmem_cache->align to record the real alignment kmem_cache->align records the original align parameter value specified by users. Function calculate_alignment might change it based on cache line size. So change kmem_cache->align correspondingly. Signed-off-by: Zhang Yanmin Signed-off-by: Pekka Enberg --- mm/slub.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index a465c0a..801fe4b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2422,6 +2422,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * on bootup. */ align = calculate_alignment(flags, align, s->objsize); + s->align = align; /* * SLUB stores one object immediately after another beginning from -- cgit v1.1 From bbff2e433e80fae72c8d00d482927d52ec19ba33 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Thu, 6 Aug 2009 11:36:25 +0300 Subject: slab: remove duplicate kmem_cache_init_late() declarations kmem_cache_init_late() has been declared in slab.h CC: Nick Piggin CC: Matt Mackall CC: Christoph Lameter Signed-off-by: Wu Fengguang Signed-off-by: Pekka Enberg --- mm/slob.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'mm') diff --git a/mm/slob.c b/mm/slob.c index 9641da3..837ebd6 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -692,3 +692,8 @@ void __init kmem_cache_init(void) { slob_ready = 1; } + +void __init kmem_cache_init_late(void) +{ + /* Nothing to do */ +} -- cgit v1.1 From cf5d11317e8f2671d3115622aec76274a40f4fc2 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 18 Aug 2009 19:11:40 +0300 Subject: SLUB: Drop write permission to /proc/slabinfo SLUB does not support writes to /proc/slabinfo so there should not be write permission to do that either. Signed-off-by: WANG Cong Signed-off-by: Pekka Enberg --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index 801fe4b..e16c9fb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4758,7 +4758,7 @@ static const struct file_operations proc_slabinfo_operations = { static int __init slab_proc_init(void) { - proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); + proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations); return 0; } module_init(slab_proc_init); -- cgit v1.1 From 5086c389cb897c7ad66c1cacd1abb5ffebaa74b2 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Wed, 19 Aug 2009 21:44:13 +0300 Subject: SLUB: Fix some coding style issues Signed-off-by: WANG Cong Signed-off-by: Pekka Enberg --- mm/slub.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index b9f1491..e4c3e3a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1109,8 +1109,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) } if (kmemcheck_enabled - && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) - { + && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { int pages = 1 << oo_order(oo); kmemcheck_alloc_shadow(page, oo_order(oo), flags, node); @@ -2001,7 +2000,7 @@ static inline int calculate_order(int size) return order; fraction /= 2; } - min_objects --; + min_objects--; } /* -- cgit v1.1 From acdfcd04d9df7d084ff752f82afad6ed4ad5f363 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Fri, 28 Aug 2009 14:28:54 +0300 Subject: SLUB: fix ARCH_KMALLOC_MINALIGN cases 64 and 256 If the minalign is 64 bytes, then the 96 byte cache should not be created because it would conflict with the 128 byte cache. If the minalign is 256 bytes, patching the size_index table should not result in a buffer overrun. The calculation "(i - 1) / 8" used to access size_index[] is moved to a separate function as suggested by Christoph Lameter. Acked-by: Christoph Lameter Signed-off-by: Aaro Koskinen Signed-off-by: Pekka Enberg --- mm/slub.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index e16c9fb..be493bd 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2825,6 +2825,11 @@ static s8 size_index[24] = { 2 /* 192 */ }; +static inline int size_index_elem(size_t bytes) +{ + return (bytes - 1) / 8; +} + static struct kmem_cache *get_slab(size_t size, gfp_t flags) { int index; @@ -2833,7 +2838,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) if (!size) return ZERO_SIZE_PTR; - index = size_index[(size - 1) / 8]; + index = size_index[size_index_elem(size)]; } else index = fls(size - 1); @@ -3188,10 +3193,12 @@ void __init kmem_cache_init(void) slab_state = PARTIAL; /* Caches that are not of the two-to-the-power-of size */ - if (KMALLOC_MIN_SIZE <= 64) { + if (KMALLOC_MIN_SIZE <= 32) { create_kmalloc_cache(&kmalloc_caches[1], "kmalloc-96", 96, GFP_NOWAIT); caches++; + } + if (KMALLOC_MIN_SIZE <= 64) { create_kmalloc_cache(&kmalloc_caches[2], "kmalloc-192", 192, GFP_NOWAIT); caches++; @@ -3218,17 +3225,28 @@ void __init kmem_cache_init(void) BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); - for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) - size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; + for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { + int elem = size_index_elem(i); + if (elem >= ARRAY_SIZE(size_index)) + break; + size_index[elem] = KMALLOC_SHIFT_LOW; + } - if (KMALLOC_MIN_SIZE == 128) { + if (KMALLOC_MIN_SIZE == 64) { + /* + * The 96 byte size cache is not used if the alignment + * is 64 byte. + */ + for (i = 64 + 8; i <= 96; i += 8) + size_index[size_index_elem(i)] = 7; + } else if (KMALLOC_MIN_SIZE == 128) { /* * The 192 byte sized cache is not used if the alignment * is 128 byte. Redirect kmalloc to use the 256 byte cache * instead. */ for (i = 128 + 8; i <= 192; i += 8) - size_index[(i - 1) / 8] = 8; + size_index[size_index_elem(i)] = 8; } slab_state = UP; -- cgit v1.1 From 5788d8ad6c113c589eeaaa48a173adbbe6b1cb3d Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Wed, 22 Jul 2009 11:28:53 +0800 Subject: slub: release kobject if sysfs_create_group failed in sysfs_slab_add When CONFIG_SLUB_DEBUG is enabled, sysfs_slab_add should unlink and put the kobject if sysfs_create_group failed. Otherwise, sysfs_slab_add returns error then free kmem_cache s, thus memory of s->kobj is leaked. Acked-by: Christoph Lameter Signed-off-by: Xiaotian Feng Signed-off-by: Pekka Enberg --- mm/slub.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index be493bd..d73f771 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4593,8 +4593,11 @@ static int sysfs_slab_add(struct kmem_cache *s) } err = sysfs_create_group(&s->kobj, &slab_attr_group); - if (err) + if (err) { + kobject_del(&s->kobj); + kobject_put(&s->kobj); return err; + } kobject_uevent(&s->kobj, KOBJ_ADD); if (!unmergeable) { /* Setup first alias */ -- cgit v1.1 From 8a3d271deb0cc9c2fc47317d8e431046382939c9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Sep 2009 16:08:06 +0200 Subject: slub: fix slab_pad_check() When SLAB_POISON is used and slab_pad_check() finds an overwrite of the slab padding, we call restore_bytes() on the whole slab, not only on the padding. Acked-by: Christoph Lameer Reported-by: Zdenek Kabelac Signed-off-by: Eric Dumazet Signed-off-by: Pekka Enberg --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index d73f771..b1cb2df 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -655,7 +655,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); print_section("Padding", end - remainder, remainder); - restore_bytes(s, "slab padding", POISON_INUSE, start, end); + restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end); return 0; } -- cgit v1.1