From 62f75532b583c03840f31e40386ce2df73be9ca0 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 14 Apr 2008 18:50:44 +0300 Subject: slub: Initialize per-cpu stats As spotted by kmemcheck, we need to initialize the per-CPU ->stat array before using it. [kmem_cache_cpu structures are usually allocated from arrays defined via DEFINE_PER_CPU that are zeroed so we have not noticed this so far --cl]. Reported-by: Vegard Nossum Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index acc975f..15a7a0d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1886,6 +1886,9 @@ static void init_kmem_cache_cpu(struct kmem_cache *s, c->node = 0; c->offset = s->offset / sizeof(void *); c->objsize = s->objsize; +#ifdef CONFIG_SLUB_STATS + memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned)); +#endif } static void init_kmem_cache_node(struct kmem_cache_node *n) -- cgit v1.1 From 4097d6017576a5e138f442f5e3c393ad00d10f58 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 14 Apr 2008 18:51:18 +0300 Subject: slub: Reduce #ifdef ZONE_DMA by moving kmalloc_caches_dma near dma logic Move the definition of kmalloc_caches_dma() into a later #ifdef CONFIG_ZONE_DMA. This saves one #ifdef and leaves us with a total of two #ifdefs for dma slab support. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 15a7a0d..3df6d5b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2412,10 +2412,6 @@ EXPORT_SYMBOL(kmem_cache_destroy); struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned; EXPORT_SYMBOL(kmalloc_caches); -#ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; -#endif - static int __init setup_slub_min_order(char *str) { get_option(&str, &slub_min_order); @@ -2475,6 +2471,7 @@ panic: } #ifdef CONFIG_ZONE_DMA +static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; static void sysfs_add_func(struct work_struct *w) { -- cgit v1.1 From 5b06c853ad447636e31d105e95c48ae9abb6bfb5 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 14 Apr 2008 18:51:34 +0300 Subject: slub: Deal with config variable dependencies count_partial() is used by both slabinfo and the sysfs proc support. Move the function directly before the beginning of the sysfs code so that it can be easily found. Rework the preprocessor conditional to take into account that slub sysfs support depends on CONFIG_SYSFS *and* CONFIG_SLUB_DEBUG. Make CONFIG_SLUB_STATS depend on CONFIG_SLUB_DEBUG and CONFIG_SYSFS. There is no point of keeping statistics if no one can restrive them. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- lib/Kconfig.debug | 2 +- mm/slub.c | 30 +++++++++++++++--------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 0796c1a..eef557d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -211,7 +211,7 @@ config SLUB_DEBUG_ON config SLUB_STATS default n bool "Enable SLUB performance statistics" - depends on SLUB + depends on SLUB && SLUB_DEBUG && SYSFS help SLUB statistics are useful to debug SLUBs allocation behavior in order find ways to optimize the allocator. This should never be diff --git a/mm/slub.c b/mm/slub.c index 3df6d5b..3fcdcf7 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2688,21 +2688,6 @@ void kfree(const void *x) } EXPORT_SYMBOL(kfree); -#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SLABINFO) -static unsigned long count_partial(struct kmem_cache_node *n) -{ - unsigned long flags; - unsigned long x = 0; - struct page *page; - - spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(page, &n->partial, lru) - x += page->inuse; - spin_unlock_irqrestore(&n->list_lock, flags); - return x; -} -#endif - /* * kmem_cache_shrink removes empty slabs from the partial lists and sorts * the remaining slabs by the number of items in use. The slabs with the @@ -3181,6 +3166,21 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, return slab_alloc(s, gfpflags, node, caller); } +#if (defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)) || defined(CONFIG_SLABINFO) +static unsigned long count_partial(struct kmem_cache_node *n) +{ + unsigned long flags; + unsigned long x = 0; + struct page *page; + + spin_lock_irqsave(&n->list_lock, flags); + list_for_each_entry(page, &n->partial, lru) + x += page->inuse; + spin_unlock_irqrestore(&n->list_lock, flags); + return x; +} +#endif + #if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) static int validate_slab(struct kmem_cache *s, struct page *page, unsigned long *map) -- cgit v1.1 From 50ef37b96c11e76625067ae413dc54585ea22585 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 14 Apr 2008 18:52:05 +0300 Subject: slub: Fixes to per cpu stat output in sysfs Only output per cpu stats if the kernel is build for SMP. Use a capital "C" as a leading character for the processor number (same as the numa statistics that also use a capital letter "N"). Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 3fcdcf7..23e5ee7 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3979,10 +3979,12 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) len = sprintf(buf, "%lu", sum); +#ifdef CONFIG_SMP for_each_online_cpu(cpu) { if (data[cpu] && len < PAGE_SIZE - 20) - len += sprintf(buf + len, " c%d=%u", cpu, data[cpu]); + len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]); } +#endif kfree(data); return len + sprintf(buf + len, "\n"); } -- cgit v1.1 From 49bd5221ce8fb55d12c04a3ffd375201c5bbfb7a Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 14 Apr 2008 18:52:18 +0300 Subject: slub: Move map/flag clearing to __free_slab __free_slab does some diagnostics. The resetting of mapcount etc in discard_slab() can interfere with debug processing. So move the reset immediately before the page is freed. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 23e5ee7..f924cff 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1125,6 +1125,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page) NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, -pages); + __ClearPageSlab(page); + reset_page_mapcount(page); __free_pages(page, s->order); } @@ -1154,8 +1156,6 @@ static void discard_slab(struct kmem_cache *s, struct page *page) struct kmem_cache_node *n = get_node(s, page_to_nid(page)); atomic_long_dec(&n->nr_slabs); - reset_page_mapcount(page); - __ClearPageSlab(page); free_slab(s, page); } -- cgit v1.1 From 0f389ec63077521166f071e1e970aed36147fd45 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 14 Apr 2008 18:53:02 +0300 Subject: slub: No need for per node slab counters if !SLUB_DEBUG The per node counters are used mainly for showing data through the sysfs API. If that API is not compiled in then there is no point in keeping track of this data. Disable counters for the number of slabs and the number of total slabs if !SLUB_DEBUG. Incrementing the per node counters is also accessing a potentially contended cacheline so this could actually be a performance benefit to embedded systems. SLABINFO support is also affected. It now must depends on SLUB_DEBUG (which is on by default). Patch also avoids a check for a NULL kmem_cache_node pointer in new_slab() if the system is not compiled with NUMA support. [penberg@cs.helsinki.fi: fix oops and move ->nr_slabs into CONFIG_SLUB_DEBUG] Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 2 +- init/Kconfig | 2 +- mm/slub.c | 51 +++++++++++++++++++++++++++++++++++++----------- 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index b00c1c7..79d59c9 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -45,9 +45,9 @@ struct kmem_cache_cpu { struct kmem_cache_node { spinlock_t list_lock; /* Protect partial list and nr_partial */ unsigned long nr_partial; - atomic_long_t nr_slabs; struct list_head partial; #ifdef CONFIG_SLUB_DEBUG + atomic_long_t nr_slabs; struct list_head full; #endif }; diff --git a/init/Kconfig b/init/Kconfig index a97924b..7fccf09 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -763,7 +763,7 @@ endmenu # General setup config SLABINFO bool depends on PROC_FS - depends on SLAB || SLUB + depends on SLAB || SLUB_DEBUG default y config RT_MUTEXES diff --git a/mm/slub.c b/mm/slub.c index f924cff..7f8aaa2 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -837,6 +837,35 @@ static void remove_full(struct kmem_cache *s, struct page *page) spin_unlock(&n->list_lock); } +/* Tracking of the number of slabs for debugging purposes */ +static inline unsigned long slabs_node(struct kmem_cache *s, int node) +{ + struct kmem_cache_node *n = get_node(s, node); + + return atomic_long_read(&n->nr_slabs); +} + +static inline void inc_slabs_node(struct kmem_cache *s, int node) +{ + struct kmem_cache_node *n = get_node(s, node); + + /* + * May be called early in order to allocate a slab for the + * kmem_cache_node structure. Solve the chicken-egg + * dilemma by deferring the increment of the count during + * bootstrap (see early_kmem_cache_node_alloc). + */ + if (!NUMA_BUILD || n) + atomic_long_inc(&n->nr_slabs); +} +static inline void dec_slabs_node(struct kmem_cache *s, int node) +{ + struct kmem_cache_node *n = get_node(s, node); + + atomic_long_dec(&n->nr_slabs); +} + +/* Object debug checks for alloc/free paths */ static void setup_object_debug(struct kmem_cache *s, struct page *page, void *object) { @@ -1028,6 +1057,11 @@ static inline unsigned long kmem_cache_flags(unsigned long objsize, return flags; } #define slub_debug 0 + +static inline unsigned long slabs_node(struct kmem_cache *s, int node) + { return 0; } +static inline void inc_slabs_node(struct kmem_cache *s, int node) {} +static inline void dec_slabs_node(struct kmem_cache *s, int node) {} #endif /* * Slab allocation and freeing @@ -1066,7 +1100,6 @@ static void setup_object(struct kmem_cache *s, struct page *page, static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) { struct page *page; - struct kmem_cache_node *n; void *start; void *last; void *p; @@ -1078,9 +1111,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) if (!page) goto out; - n = get_node(s, page_to_nid(page)); - if (n) - atomic_long_inc(&n->nr_slabs); + inc_slabs_node(s, page_to_nid(page)); page->slab = s; page->flags |= 1 << PG_slab; if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | @@ -1153,9 +1184,7 @@ static void free_slab(struct kmem_cache *s, struct page *page) static void discard_slab(struct kmem_cache *s, struct page *page) { - struct kmem_cache_node *n = get_node(s, page_to_nid(page)); - - atomic_long_dec(&n->nr_slabs); + dec_slabs_node(s, page_to_nid(page)); free_slab(s, page); } @@ -1894,10 +1923,10 @@ static void init_kmem_cache_cpu(struct kmem_cache *s, static void init_kmem_cache_node(struct kmem_cache_node *n) { n->nr_partial = 0; - atomic_long_set(&n->nr_slabs, 0); spin_lock_init(&n->list_lock); INIT_LIST_HEAD(&n->partial); #ifdef CONFIG_SLUB_DEBUG + atomic_long_set(&n->nr_slabs, 0); INIT_LIST_HEAD(&n->full); #endif } @@ -2066,7 +2095,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, init_tracking(kmalloc_caches, n); #endif init_kmem_cache_node(n); - atomic_long_inc(&n->nr_slabs); + inc_slabs_node(kmalloc_caches, node); /* * lockdep requires consistent irq usage for each lock @@ -2379,7 +2408,7 @@ static inline int kmem_cache_close(struct kmem_cache *s) struct kmem_cache_node *n = get_node(s, node); n->nr_partial -= free_list(s, n, &n->partial); - if (atomic_long_read(&n->nr_slabs)) + if (slabs_node(s, node)) return 1; } free_kmem_cache_nodes(s); @@ -2801,7 +2830,7 @@ static void slab_mem_offline_callback(void *arg) * and offline_pages() function shoudn't call this * callback. So, we must fail. */ - BUG_ON(atomic_long_read(&n->nr_slabs)); + BUG_ON(slabs_node(s, offline_node)); s->node[offline_node] = NULL; kmem_cache_free(kmalloc_caches, n); -- cgit v1.1