diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/mremap.c | 2 | ||||
-rw-r--r-- | mm/oom_kill.c | 8 | ||||
-rw-r--r-- | mm/page_alloc.c | 22 | ||||
-rw-r--r-- | mm/slab.c | 59 | ||||
-rw-r--r-- | mm/swap_state.c | 2 | ||||
-rw-r--r-- | mm/vmalloc.c | 2 | ||||
-rw-r--r-- | mm/vmscan.c | 27 |
8 files changed, 93 insertions, 31 deletions
diff --git a/mm/memory.c b/mm/memory.c index 7e2a4b1..c1e14c9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -503,7 +503,7 @@ again: return -ENOMEM; src_pte = pte_offset_map_nested(src_pmd, addr); src_ptl = pte_lockptr(src_mm, src_pmd); - spin_lock(src_ptl); + spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); do { /* diff --git a/mm/mremap.c b/mm/mremap.c index 1903bdf..7c15cf3 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -97,7 +97,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, new_pte = pte_offset_map_nested(new_pmd, new_addr); new_ptl = pte_lockptr(mm, new_pmd); if (new_ptl != old_ptl) - spin_lock(new_ptl); + spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, new_pte++, new_addr += PAGE_SIZE) { diff --git a/mm/oom_kill.c b/mm/oom_kill.c index d46ed0f..b9af136 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -225,7 +225,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that * we select a process with CAP_SYS_RAW_IO set). */ -static void __oom_kill_task(task_t *p, const char *message) +static void __oom_kill_task(struct task_struct *p, const char *message) { if (p->pid == 1) { WARN_ON(1); @@ -255,10 +255,10 @@ static void __oom_kill_task(task_t *p, const char *message) force_sig(SIGKILL, p); } -static int oom_kill_task(task_t *p, const char *message) +static int oom_kill_task(struct task_struct *p, const char *message) { struct mm_struct *mm; - task_t * g, * q; + struct task_struct *g, *q; mm = p->mm; @@ -316,7 +316,7 @@ static int oom_kill_process(struct task_struct *p, unsigned long points, */ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) { - task_t *p; + struct task_struct *p; unsigned long points = 0; if (printk_ratelimit()) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3e792a5..54a4f53 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2005,6 +2005,10 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, zone->spanned_pages = size; zone->present_pages = realsize; +#ifdef CONFIG_NUMA + zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio) + / 100; +#endif zone->name = zone_names[j]; spin_lock_init(&zone->lock); spin_lock_init(&zone->lru_lock); @@ -2298,6 +2302,24 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, return 0; } +#ifdef CONFIG_NUMA +int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *length, loff_t *ppos) +{ + struct zone *zone; + int rc; + + rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + if (rc) + return rc; + + for_each_zone(zone) + zone->min_unmapped_ratio = (zone->present_pages * + sysctl_min_unmapped_ratio) / 100; + return 0; +} +#endif + /* * lowmem_reserve_ratio_sysctl_handler - just a wrapper around * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve() @@ -1021,7 +1021,8 @@ static void drain_alien_cache(struct kmem_cache *cachep, } } -static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) +static inline int cache_free_alien(struct kmem_cache *cachep, void *objp, + int nesting) { struct slab *slabp = virt_to_slab(objp); int nodeid = slabp->nodeid; @@ -1039,7 +1040,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) STATS_INC_NODEFREES(cachep); if (l3->alien && l3->alien[nodeid]) { alien = l3->alien[nodeid]; - spin_lock(&alien->lock); + spin_lock_nested(&alien->lock, nesting); if (unlikely(alien->avail == alien->limit)) { STATS_INC_ACOVERFLOW(cachep); __drain_alien_cache(cachep, alien, nodeid); @@ -1068,7 +1069,8 @@ static inline void free_alien_cache(struct array_cache **ac_ptr) { } -static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) +static inline int cache_free_alien(struct kmem_cache *cachep, void *objp, + int nesting) { return 0; } @@ -1272,6 +1274,11 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, local_irq_disable(); memcpy(ptr, list, sizeof(struct kmem_list3)); + /* + * Do not assume that spinlocks can be initialized via memcpy: + */ + spin_lock_init(&ptr->list_lock); + MAKE_ALL_LISTS(cachep, ptr, nodeid); cachep->nodelists[nodeid] = ptr; local_irq_enable(); @@ -1398,7 +1405,7 @@ void __init kmem_cache_init(void) } /* 4) Replace the bootstrap head arrays */ { - void *ptr; + struct array_cache *ptr; ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); @@ -1406,6 +1413,11 @@ void __init kmem_cache_init(void) BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); memcpy(ptr, cpu_cache_get(&cache_cache), sizeof(struct arraycache_init)); + /* + * Do not assume that spinlocks can be initialized via memcpy: + */ + spin_lock_init(&ptr->lock); + cache_cache.array[smp_processor_id()] = ptr; local_irq_enable(); @@ -1416,6 +1428,11 @@ void __init kmem_cache_init(void) != &initarray_generic.cache); memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), sizeof(struct arraycache_init)); + /* + * Do not assume that spinlocks can be initialized via memcpy: + */ + spin_lock_init(&ptr->lock); + malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = ptr; local_irq_enable(); @@ -1743,6 +1760,8 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) } #endif +static void __cache_free(struct kmem_cache *cachep, void *objp, int nesting); + /** * slab_destroy - destroy and release all objects in a slab * @cachep: cache pointer being destroyed @@ -1766,8 +1785,17 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) call_rcu(&slab_rcu->head, kmem_rcu_free); } else { kmem_freepages(cachep, addr); - if (OFF_SLAB(cachep)) - kmem_cache_free(cachep->slabp_cache, slabp); + if (OFF_SLAB(cachep)) { + unsigned long flags; + + /* + * lockdep: we may nest inside an already held + * ac->lock, so pass in a nesting flag: + */ + local_irq_save(flags); + __cache_free(cachep->slabp_cache, slabp, 1); + local_irq_restore(flags); + } } } @@ -3072,7 +3100,16 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, if (slabp->inuse == 0) { if (l3->free_objects > l3->free_limit) { l3->free_objects -= cachep->num; + /* + * It is safe to drop the lock. The slab is + * no longer linked to the cache. cachep + * cannot disappear - we are using it and + * all destruction of caches must be + * serialized properly by the user. + */ + spin_unlock(&l3->list_lock); slab_destroy(cachep, slabp); + spin_lock(&l3->list_lock); } else { list_add(&slabp->list, &l3->slabs_free); } @@ -3098,7 +3135,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) #endif check_irq_off(); l3 = cachep->nodelists[node]; - spin_lock(&l3->list_lock); + spin_lock_nested(&l3->list_lock, SINGLE_DEPTH_NESTING); if (l3->shared) { struct array_cache *shared_array = l3->shared; int max = shared_array->limit - shared_array->avail; @@ -3141,14 +3178,14 @@ free_done: * Release an obj back to its cache. If the obj has a constructed state, it must * be in this state _before_ it is released. Called with disabled ints. */ -static inline void __cache_free(struct kmem_cache *cachep, void *objp) +static void __cache_free(struct kmem_cache *cachep, void *objp, int nesting) { struct array_cache *ac = cpu_cache_get(cachep); check_irq_off(); objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); - if (cache_free_alien(cachep, objp)) + if (cache_free_alien(cachep, objp, nesting)) return; if (likely(ac->avail < ac->limit)) { @@ -3387,7 +3424,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) BUG_ON(virt_to_cache(objp) != cachep); local_irq_save(flags); - __cache_free(cachep, objp); + __cache_free(cachep, objp, 0); local_irq_restore(flags); } EXPORT_SYMBOL(kmem_cache_free); @@ -3412,7 +3449,7 @@ void kfree(const void *objp) kfree_debugcheck(objp); c = virt_to_cache(objp); debug_check_no_locks_freed(objp, obj_size(c)); - __cache_free(c, (void *)objp); + __cache_free(c, (void *)objp, 0); local_irq_restore(flags); } EXPORT_SYMBOL(kfree); diff --git a/mm/swap_state.c b/mm/swap_state.c index fccbd9b..5f7cf2a 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -38,7 +38,7 @@ static struct backing_dev_info swap_backing_dev_info = { struct address_space swapper_space = { .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), - .tree_lock = RW_LOCK_UNLOCKED, + .tree_lock = __RW_LOCK_UNLOCKED(swapper_space.tree_lock), .a_ops = &swap_aops, .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), .backing_dev_info = &swap_backing_dev_info, diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 35f8553..7b45079 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -330,6 +330,8 @@ void __vunmap(void *addr, int deallocate_pages) return; } + debug_check_no_locks_freed(addr, area->size); + if (deallocate_pages) { int i; diff --git a/mm/vmscan.c b/mm/vmscan.c index ff2ebe9..5d4c4d0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1503,10 +1503,6 @@ module_init(kswapd_init) * * If non-zero call zone_reclaim when the number of free pages falls below * the watermarks. - * - * In the future we may add flags to the mode. However, the page allocator - * should only have to check that zone_reclaim_mode != 0 before calling - * zone_reclaim(). */ int zone_reclaim_mode __read_mostly; @@ -1524,6 +1520,12 @@ int zone_reclaim_mode __read_mostly; #define ZONE_RECLAIM_PRIORITY 4 /* + * Percentage of pages in a zone that must be unmapped for zone_reclaim to + * occur. + */ +int sysctl_min_unmapped_ratio = 1; + +/* * Try to free up some pages from this zone through reclaim. */ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) @@ -1590,18 +1592,17 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) int node_id; /* - * Do not reclaim if there are not enough reclaimable pages in this - * zone that would satify this allocations. + * Zone reclaim reclaims unmapped file backed pages. * - * All unmapped pagecache pages are reclaimable. - * - * Both counters may be temporarily off a bit so we use - * SWAP_CLUSTER_MAX as the boundary. It may also be good to - * leave a few frequently used unmapped pagecache pages around. + * A small portion of unmapped file backed pages is needed for + * file I/O otherwise pages read by file I/O will be immediately + * thrown out if the zone is overallocated. So we do not reclaim + * if less than a specified percentage of the zone is used by + * unmapped file backed pages. */ if (zone_page_state(zone, NR_FILE_PAGES) - - zone_page_state(zone, NR_FILE_MAPPED) < SWAP_CLUSTER_MAX) - return 0; + zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio) + return 0; /* * Avoid concurrent zone reclaims, do not reclaim in a zone that does |