diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/bootmem.c | 9 | ||||
-rw-r--r-- | mm/filemap.c | 32 | ||||
-rw-r--r-- | mm/highmem.c | 15 | ||||
-rw-r--r-- | mm/madvise.c | 3 | ||||
-rw-r--r-- | mm/mempolicy.c | 1 | ||||
-rw-r--r-- | mm/migrate.c | 8 | ||||
-rw-r--r-- | mm/mmap.c | 43 | ||||
-rw-r--r-- | mm/nommu.c | 18 | ||||
-rw-r--r-- | mm/oom_kill.c | 71 | ||||
-rw-r--r-- | mm/page-writeback.c | 7 | ||||
-rw-r--r-- | mm/page_alloc.c | 82 | ||||
-rw-r--r-- | mm/shmem.c | 3 | ||||
-rw-r--r-- | mm/slab.c | 53 | ||||
-rw-r--r-- | mm/slob.c | 10 | ||||
-rw-r--r-- | mm/swap_state.c | 3 | ||||
-rw-r--r-- | mm/vmalloc.c | 3 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
17 files changed, 243 insertions, 120 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c index d3e3bd2..d213fed 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -401,7 +401,7 @@ unsigned long __init free_all_bootmem (void) return(free_all_bootmem_core(NODE_DATA(0))); } -void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) +void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal) { bootmem_data_t *bdata; void *ptr; @@ -409,7 +409,14 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned list_for_each_entry(bdata, &bdata_list, list) if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0))) return(ptr); + return NULL; +} +void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) +{ + void *mem = __alloc_bootmem_nopanic(size,align,goal); + if (mem) + return mem; /* * Whoops, we cannot satisfy the allocation request. */ diff --git a/mm/filemap.c b/mm/filemap.c index 3ef2073..fd57442 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -697,6 +697,38 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, return ret; } +/** + * find_get_pages_contig - gang contiguous pagecache lookup + * @mapping: The address_space to search + * @index: The starting page index + * @nr_pages: The maximum number of pages + * @pages: Where the resulting pages are placed + * + * find_get_pages_contig() works exactly like find_get_pages(), except + * that the returned number of pages are guaranteed to be contiguous. + * + * find_get_pages_contig() returns the number of pages which were found. + */ +unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, + unsigned int nr_pages, struct page **pages) +{ + unsigned int i; + unsigned int ret; + + read_lock_irq(&mapping->tree_lock); + ret = radix_tree_gang_lookup(&mapping->page_tree, + (void **)pages, index, nr_pages); + for (i = 0; i < ret; i++) { + if (pages[i]->mapping == NULL || pages[i]->index != index) + break; + + page_cache_get(pages[i]); + index++; + } + read_unlock_irq(&mapping->tree_lock); + return i; +} + /* * Like find_get_pages, except we only return pages which are tagged with * `tag'. We update *index to index the next page for the traversal. diff --git a/mm/highmem.c b/mm/highmem.c index 55885f6..9b274fdf 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -74,8 +74,7 @@ static void flush_all_zero_pkmaps(void) pkmap_count[i] = 0; /* sanity check */ - if (pte_none(pkmap_page_table[i])) - BUG(); + BUG_ON(pte_none(pkmap_page_table[i])); /* * Don't need an atomic fetch-and-clear op here; @@ -158,8 +157,7 @@ void fastcall *kmap_high(struct page *page) if (!vaddr) vaddr = map_new_virtual(page); pkmap_count[PKMAP_NR(vaddr)]++; - if (pkmap_count[PKMAP_NR(vaddr)] < 2) - BUG(); + BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2); spin_unlock(&kmap_lock); return (void*) vaddr; } @@ -174,8 +172,7 @@ void fastcall kunmap_high(struct page *page) spin_lock(&kmap_lock); vaddr = (unsigned long)page_address(page); - if (!vaddr) - BUG(); + BUG_ON(!vaddr); nr = PKMAP_NR(vaddr); /* @@ -220,8 +217,7 @@ static __init int init_emergency_pool(void) return 0; page_pool = mempool_create_page_pool(POOL_SIZE, 0); - if (!page_pool) - BUG(); + BUG_ON(!page_pool); printk("highmem bounce pool size: %d pages\n", POOL_SIZE); return 0; @@ -264,8 +260,7 @@ int init_emergency_isa_pool(void) isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa, mempool_free_pages, (void *) 0); - if (!isa_page_pool) - BUG(); + BUG_ON(!isa_page_pool); printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE); return 0; diff --git a/mm/madvise.c b/mm/madvise.c index af3d573..4e19615 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -168,6 +168,9 @@ static long madvise_remove(struct vm_area_struct *vma, return -EINVAL; } + if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE)) + return -EACCES; + mapping = vma->vm_file->f_mapping; offset = (loff_t)(start - vma->vm_start) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index dec8249..8778f58 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1761,7 +1761,6 @@ static void gather_stats(struct page *page, void *private, int pte_dirty) md->mapcount_max = count; md->node[page_to_nid(page)]++; - cond_resched(); } #ifdef CONFIG_HUGETLB_PAGE diff --git a/mm/migrate.c b/mm/migrate.c index 09f6e4a..d444229 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -16,8 +16,7 @@ #include <linux/module.h> #include <linux/swap.h> #include <linux/pagemap.h> -#include <linux/buffer_head.h> /* for try_to_release_page(), - buffer_heads_over_limit */ +#include <linux/buffer_head.h> #include <linux/mm_inline.h> #include <linux/pagevec.h> #include <linux/rmap.h> @@ -28,8 +27,6 @@ #include "internal.h" -#include "internal.h" - /* The maximum number of pages to take off the LRU for migration */ #define MIGRATE_CHUNK_SIZE 256 @@ -176,7 +173,6 @@ unlock_retry: retry: return -EAGAIN; } -EXPORT_SYMBOL(swap_page); /* * Remove references for a page and establish the new page with the correct @@ -234,7 +230,7 @@ int migrate_page_remove_references(struct page *newpage, if (!page_mapping(page) || page_count(page) != nr_refs || *radix_pointer != page) { write_unlock_irq(&mapping->tree_lock); - return 1; + return -EAGAIN; } /* @@ -121,14 +121,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin) * only call if we're about to fail. */ n = nr_free_pages(); + + /* + * Leave reserved pages. The pages are not for anonymous pages. + */ + if (n <= totalreserve_pages) + goto error; + else + n -= totalreserve_pages; + + /* + * Leave the last 3% for root + */ if (!cap_sys_admin) n -= n / 32; free += n; if (free > pages) return 0; - vm_unacct_memory(pages); - return -ENOMEM; + + goto error; } allowed = (totalram_pages - hugetlb_total_pages()) @@ -150,7 +162,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) */ if (atomic_read(&vm_committed_space) < (long)allowed) return 0; - +error: vm_unacct_memory(pages); return -ENOMEM; @@ -220,6 +232,17 @@ asmlinkage unsigned long sys_brk(unsigned long brk) if (brk < mm->end_code) goto out; + + /* + * Check against rlimit here. If this check is done later after the test + * of oldbrk with newbrk then it can escape the test and let the data + * segment grow beyond its set limit the in case where the limit is + * not page aligned -Ram Gupta + */ + rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; + if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) + goto out; + newbrk = PAGE_ALIGN(brk); oldbrk = PAGE_ALIGN(mm->brk); if (oldbrk == newbrk) @@ -232,11 +255,6 @@ asmlinkage unsigned long sys_brk(unsigned long brk) goto out; } - /* Check against rlimit.. */ - rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; - if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) - goto out; - /* Check against existing mmap mappings. */ if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) goto out; @@ -294,8 +312,7 @@ void validate_mm(struct mm_struct *mm) i = browse_rb(&mm->mm_rb); if (i != mm->map_count) printk("map_count %d rb %d\n", mm->map_count, i), bug = 1; - if (bug) - BUG(); + BUG_ON(bug); } #else #define validate_mm(mm) do { } while (0) @@ -432,8 +449,7 @@ __insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) struct rb_node ** rb_link, * rb_parent; __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent); - if (__vma && __vma->vm_start < vma->vm_end) - BUG(); + BUG_ON(__vma && __vma->vm_start < vma->vm_end); __vma_link(mm, vma, prev, rb_link, rb_parent); mm->map_count++; } @@ -813,8 +829,7 @@ try_prev: * (e.g. stash info in next's anon_vma_node when assigning * an anon_vma, or when trying vma_merge). Another time. */ - if (find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma) - BUG(); + BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma); if (!near) goto none; @@ -1147,14 +1147,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin) * only call if we're about to fail. */ n = nr_free_pages(); + + /* + * Leave reserved pages. The pages are not for anonymous pages. + */ + if (n <= totalreserve_pages) + goto error; + else + n -= totalreserve_pages; + + /* + * Leave the last 3% for root + */ if (!cap_sys_admin) n -= n / 32; free += n; if (free > pages) return 0; - vm_unacct_memory(pages); - return -ENOMEM; + + goto error; } allowed = totalram_pages * sysctl_overcommit_ratio / 100; @@ -1175,7 +1187,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) */ if (atomic_read(&vm_committed_space) < (long)allowed) return 0; - +error: vm_unacct_memory(pages); return -ENOMEM; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 78747af..042e643 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -46,15 +46,25 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) { unsigned long points, cpu_time, run_time, s; - struct list_head *tsk; + struct mm_struct *mm; + struct task_struct *child; - if (!p->mm) + task_lock(p); + mm = p->mm; + if (!mm) { + task_unlock(p); return 0; + } /* * The memory size of the process is the basis for the badness. */ - points = p->mm->total_vm; + points = mm->total_vm; + + /* + * After this unlock we can no longer dereference local variable `mm' + */ + task_unlock(p); /* * Processes which fork a lot of child processes are likely @@ -64,11 +74,11 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) * child is eating the vast majority of memory, adding only half * to the parents will make the child our kill candidate of choice. */ - list_for_each(tsk, &p->children) { - struct task_struct *chld; - chld = list_entry(tsk, struct task_struct, sibling); - if (chld->mm != p->mm && chld->mm) - points += chld->mm->total_vm/2 + 1; + list_for_each_entry(child, &p->children, sibling) { + task_lock(child); + if (child->mm != mm && child->mm) + points += child->mm->total_vm/2 + 1; + task_unlock(child); } /* @@ -244,17 +254,24 @@ static void __oom_kill_task(task_t *p, const char *message) force_sig(SIGKILL, p); } -static struct mm_struct *oom_kill_task(task_t *p, const char *message) +static int oom_kill_task(task_t *p, const char *message) { - struct mm_struct *mm = get_task_mm(p); + struct mm_struct *mm; task_t * g, * q; - if (!mm) - return NULL; - if (mm == &init_mm) { - mmput(mm); - return NULL; - } + mm = p->mm; + + /* WARNING: mm may not be dereferenced since we did not obtain its + * value from get_task_mm(p). This is OK since all we need to do is + * compare mm to q->mm below. + * + * Furthermore, even if mm contains a non-NULL value, p->mm may + * change to NULL at any time since we do not hold task_lock(p). + * However, this is of no concern to us. + */ + + if (mm == NULL || mm == &init_mm) + return 1; __oom_kill_task(p, message); /* @@ -266,13 +283,12 @@ static struct mm_struct *oom_kill_task(task_t *p, const char *message) __oom_kill_task(q, message); while_each_thread(g, q); - return mm; + return 0; } -static struct mm_struct *oom_kill_process(struct task_struct *p, - unsigned long points, const char *message) +static int oom_kill_process(struct task_struct *p, unsigned long points, + const char *message) { - struct mm_struct *mm; struct task_struct *c; struct list_head *tsk; @@ -283,9 +299,8 @@ static struct mm_struct *oom_kill_process(struct task_struct *p, c = list_entry(tsk, struct task_struct, sibling); if (c->mm == p->mm) continue; - mm = oom_kill_task(c, message); - if (mm) - return mm; + if (!oom_kill_task(c, message)) + return 0; } return oom_kill_task(p, message); } @@ -300,7 +315,6 @@ static struct mm_struct *oom_kill_process(struct task_struct *p, */ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) { - struct mm_struct *mm = NULL; task_t *p; unsigned long points = 0; @@ -320,12 +334,12 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) */ switch (constrained_alloc(zonelist, gfp_mask)) { case CONSTRAINT_MEMORY_POLICY: - mm = oom_kill_process(current, points, + oom_kill_process(current, points, "No available memory (MPOL_BIND)"); break; case CONSTRAINT_CPUSET: - mm = oom_kill_process(current, points, + oom_kill_process(current, points, "No available memory in cpuset"); break; @@ -347,8 +361,7 @@ retry: panic("Out of memory and no killable processes...\n"); } - mm = oom_kill_process(p, points, "Out of memory"); - if (!mm) + if (oom_kill_process(p, points, "Out of memory")) goto retry; break; @@ -357,8 +370,6 @@ retry: out: read_unlock(&tasklist_lock); cpuset_unlock(); - if (mm) - mmput(mm); /* * Give "p" a good chance of killing itself before we diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 893d767..75d7f48 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -72,13 +72,12 @@ int dirty_background_ratio = 10; int vm_dirty_ratio = 40; /* - * The interval between `kupdate'-style writebacks, in centiseconds - * (hundredths of a second) + * The interval between `kupdate'-style writebacks, in jiffies */ int dirty_writeback_interval = 5 * HZ; /* - * The longest number of centiseconds for which data is allowed to remain dirty + * The longest number of jiffies for which data is allowed to remain dirty */ int dirty_expire_interval = 30 * HZ; @@ -258,7 +257,7 @@ static void balance_dirty_pages(struct address_space *mapping) /** * balance_dirty_pages_ratelimited_nr - balance dirty memory state * @mapping: address_space which was dirtied - * @nr_pages: number of pages which the caller has just dirtied + * @nr_pages_dirtied: number of pages which the caller has just dirtied * * Processes which are dirtying memory should call in here once for each page * which was newly dirtied. The function will periodically check the system's diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dc523a1..ea77c99 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -51,6 +51,7 @@ nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; EXPORT_SYMBOL(node_possible_map); unsigned long totalram_pages __read_mostly; unsigned long totalhigh_pages __read_mostly; +unsigned long totalreserve_pages __read_mostly; long nr_swap_pages; int percpu_pagelist_fraction; @@ -151,7 +152,8 @@ static void bad_page(struct page *page) 1 << PG_reclaim | 1 << PG_slab | 1 << PG_swapcache | - 1 << PG_writeback ); + 1 << PG_writeback | + 1 << PG_buddy ); set_page_count(page, 0); reset_page_mapcount(page); page->mapping = NULL; @@ -230,18 +232,20 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) * zone->lock is already acquired when we use these. * So, we don't need atomic page->flags operations here. */ -static inline unsigned long page_order(struct page *page) { +static inline unsigned long page_order(struct page *page) +{ return page_private(page); } -static inline void set_page_order(struct page *page, int order) { +static inline void set_page_order(struct page *page, int order) +{ set_page_private(page, order); - __SetPagePrivate(page); + __SetPageBuddy(page); } static inline void rmv_page_order(struct page *page) { - __ClearPagePrivate(page); + __ClearPageBuddy(page); set_page_private(page, 0); } @@ -280,11 +284,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order) * This function checks whether a page is free && is the buddy * we can do coalesce a page and its buddy if * (a) the buddy is not in a hole && - * (b) the buddy is free && - * (c) the buddy is on the buddy system && - * (d) a page and its buddy have the same order. - * for recording page's order, we use page_private(page) and PG_private. + * (b) the buddy is in the buddy system && + * (c) a page and its buddy have the same order. * + * For recording whether a page is in the buddy system, we use PG_buddy. + * Setting, clearing, and testing PG_buddy is serialized by zone->lock. + * + * For recording page's order, we use page_private(page). */ static inline int page_is_buddy(struct page *page, int order) { @@ -293,11 +299,11 @@ static inline int page_is_buddy(struct page *page, int order) return 0; #endif - if (PagePrivate(page) && - (page_order(page) == order) && - page_count(page) == 0) - return 1; - return 0; + if (PageBuddy(page) && page_order(page) == order) { + BUG_ON(page_count(page) != 0); + return 1; + } + return 0; } /* @@ -313,7 +319,7 @@ static inline int page_is_buddy(struct page *page, int order) * as necessary, plus some accounting needed to play nicely with other * parts of the VM system. * At each level, we keep a list of pages, which are heads of continuous - * free pages of length of (1 << order) and marked with PG_Private.Page's + * free pages of length of (1 << order) and marked with PG_buddy. Page's * order is recorded in page_private(page) field. * So when we are allocating or freeing one, we can derive the state of the * other. That is, if we allocate a small block, and both were @@ -376,7 +382,8 @@ static inline int free_pages_check(struct page *page) 1 << PG_slab | 1 << PG_swapcache | 1 << PG_writeback | - 1 << PG_reserved )))) + 1 << PG_reserved | + 1 << PG_buddy )))) bad_page(page); if (PageDirty(page)) __ClearPageDirty(page); @@ -524,7 +531,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) 1 << PG_slab | 1 << PG_swapcache | 1 << PG_writeback | - 1 << PG_reserved )))) + 1 << PG_reserved | + 1 << PG_buddy )))) bad_page(page); /* @@ -1954,7 +1962,7 @@ static inline void free_zone_pagesets(int cpu) } } -static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, +static int pageset_cpuup_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -2472,6 +2480,38 @@ void __init page_alloc_init(void) } /* + * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio + * or min_free_kbytes changes. + */ +static void calculate_totalreserve_pages(void) +{ + struct pglist_data *pgdat; + unsigned long reserve_pages = 0; + int i, j; + + for_each_online_pgdat(pgdat) { + for (i = 0; i < MAX_NR_ZONES; i++) { + struct zone *zone = pgdat->node_zones + i; + unsigned long max = 0; + + /* Find valid and maximum lowmem_reserve in the zone */ + for (j = i; j < MAX_NR_ZONES; j++) { + if (zone->lowmem_reserve[j] > max) + max = zone->lowmem_reserve[j]; + } + + /* we treat pages_high as reserved pages. */ + max += zone->pages_high; + + if (max > zone->present_pages) + max = zone->present_pages; + reserve_pages += max; + } + } + totalreserve_pages = reserve_pages; +} + +/* * setup_per_zone_lowmem_reserve - called whenever * sysctl_lower_zone_reserve_ratio changes. Ensures that each zone * has a correct pages reserved value, so an adequate number of @@ -2502,6 +2542,9 @@ static void setup_per_zone_lowmem_reserve(void) } } } + + /* update totalreserve_pages */ + calculate_totalreserve_pages(); } /* @@ -2556,6 +2599,9 @@ void setup_per_zone_pages_min(void) zone->pages_high = zone->pages_min + tmp / 2; spin_unlock_irqrestore(&zone->lru_lock, flags); } + + /* update totalreserve_pages */ + calculate_totalreserve_pages(); } /* @@ -46,6 +46,8 @@ #include <linux/mempolicy.h> #include <linux/namei.h> #include <linux/ctype.h> +#include <linux/migrate.h> + #include <asm/uaccess.h> #include <asm/div64.h> #include <asm/pgtable.h> @@ -2173,6 +2175,7 @@ static struct address_space_operations shmem_aops = { .prepare_write = shmem_prepare_write, .commit_write = simple_commit_write, #endif + .migratepage = migrate_page, }; static struct file_operations shmem_file_operations = { @@ -420,6 +420,7 @@ struct kmem_cache { unsigned long max_freeable; unsigned long node_allocs; unsigned long node_frees; + unsigned long node_overflow; atomic_t allochit; atomic_t allocmiss; atomic_t freehit; @@ -465,6 +466,7 @@ struct kmem_cache { #define STATS_INC_ERR(x) ((x)->errors++) #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) #define STATS_INC_NODEFREES(x) ((x)->node_frees++) +#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++) #define STATS_SET_FREEABLE(x, i) \ do { \ if ((x)->max_freeable < i) \ @@ -484,6 +486,7 @@ struct kmem_cache { #define STATS_INC_ERR(x) do { } while (0) #define STATS_INC_NODEALLOCS(x) do { } while (0) #define STATS_INC_NODEFREES(x) do { } while (0) +#define STATS_INC_ACOVERFLOW(x) do { } while (0) #define STATS_SET_FREEABLE(x, i) do { } while (0) #define STATS_INC_ALLOCHIT(x) do { } while (0) #define STATS_INC_ALLOCMISS(x) do { } while (0) @@ -976,7 +979,8 @@ static void __drain_alien_cache(struct kmem_cache *cachep, * That way we could avoid the overhead of putting the objects * into the free lists and getting them back later. */ - transfer_objects(rl3->shared, ac, ac->limit); + if (rl3->shared) + transfer_objects(rl3->shared, ac, ac->limit); free_block(cachep, ac->entry, ac->avail, node); ac->avail = 0; @@ -1033,7 +1037,7 @@ static inline void free_alien_cache(struct array_cache **ac_ptr) #endif -static int __devinit cpuup_callback(struct notifier_block *nfb, +static int cpuup_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { long cpu = (long)hcpu; @@ -1297,8 +1301,7 @@ void __init kmem_cache_init(void) if (cache_cache.num) break; } - if (!cache_cache.num) - BUG(); + BUG_ON(!cache_cache.num); cache_cache.gfporder = order; cache_cache.colour = left_over / cache_cache.colour_off; cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + @@ -1454,7 +1457,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) int i; flags |= cachep->gfpflags; +#ifndef CONFIG_MMU + /* nommu uses slab's for process anonymous memory allocations, so + * requires __GFP_COMP to properly refcount higher order allocations" + */ + page = alloc_pages_node(nodeid, (flags | __GFP_COMP), cachep->gfporder); +#else page = alloc_pages_node(nodeid, flags, cachep->gfporder); +#endif if (!page) return NULL; addr = page_address(page); @@ -1974,8 +1984,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, * Always checks flags, a caller might be expecting debug support which * isn't available. */ - if (flags & ~CREATE_MASK) - BUG(); + BUG_ON(flags & ~CREATE_MASK); /* * Check that size is in terms of words. This is needed to avoid @@ -2206,8 +2215,7 @@ static int __node_shrink(struct kmem_cache *cachep, int node) slabp = list_entry(l3->slabs_free.prev, struct slab, list); #if DEBUG - if (slabp->inuse) - BUG(); + BUG_ON(slabp->inuse); #endif list_del(&slabp->list); @@ -2248,8 +2256,7 @@ static int __cache_shrink(struct kmem_cache *cachep) */ int kmem_cache_shrink(struct kmem_cache *cachep) { - if (!cachep || in_interrupt()) - BUG(); + BUG_ON(!cachep || in_interrupt()); return __cache_shrink(cachep); } @@ -2277,8 +2284,7 @@ int kmem_cache_destroy(struct kmem_cache *cachep) int i; struct kmem_list3 *l3; - if (!cachep || in_interrupt()) - BUG(); + BUG_ON(!cachep || in_interrupt()); /* Don't let CPUs to come and go */ lock_cpu_hotplug(); @@ -2323,13 +2329,15 @@ EXPORT_SYMBOL(kmem_cache_destroy); /* Get the memory for a slab management obj. */ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, - int colour_off, gfp_t local_flags) + int colour_off, gfp_t local_flags, + int nodeid) { struct slab *slabp; if (OFF_SLAB(cachep)) { /* Slab management obj is off-slab. */ - slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags); + slabp = kmem_cache_alloc_node(cachep->slabp_cache, + local_flags, nodeid); if (!slabp) return NULL; } else { @@ -2339,6 +2347,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, slabp->inuse = 0; slabp->colouroff = colour_off; slabp->s_mem = objp + colour_off; + slabp->nodeid = nodeid; return slabp; } @@ -2477,8 +2486,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) * Be lazy and only check for valid flags here, keeping it out of the * critical path in kmem_cache_alloc(). */ - if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)) - BUG(); + BUG_ON(flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)); if (flags & SLAB_NO_GROW) return 0; @@ -2525,7 +2533,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) goto failed; /* Get slab management. */ - slabp = alloc_slabmgmt(cachep, objp, offset, local_flags); + slabp = alloc_slabmgmt(cachep, objp, offset, local_flags, nodeid); if (!slabp) goto opps1; @@ -3086,9 +3094,11 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) if (l3->alien && l3->alien[nodeid]) { alien = l3->alien[nodeid]; spin_lock(&alien->lock); - if (unlikely(alien->avail == alien->limit)) + if (unlikely(alien->avail == alien->limit)) { + STATS_INC_ACOVERFLOW(cachep); __drain_alien_cache(cachep, alien, nodeid); + } alien->entry[alien->avail++] = objp; spin_unlock(&alien->lock); } else { @@ -3766,7 +3776,7 @@ static void print_slabinfo_header(struct seq_file *m) seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); #if STATS seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " - "<error> <maxfreeable> <nodeallocs> <remotefrees>"); + "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>"); seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); #endif seq_putc(m, '\n'); @@ -3880,11 +3890,12 @@ static int s_show(struct seq_file *m, void *p) unsigned long max_freeable = cachep->max_freeable; unsigned long node_allocs = cachep->node_allocs; unsigned long node_frees = cachep->node_frees; + unsigned long overflows = cachep->node_overflow; seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ - %4lu %4lu %4lu %4lu", allocs, high, grown, + %4lu %4lu %4lu %4lu %4lu", allocs, high, grown, reaped, errors, max_freeable, node_allocs, - node_frees); + node_frees, overflows); } /* cpu stats */ { @@ -354,9 +354,7 @@ void *__alloc_percpu(size_t size) if (!pdata) return NULL; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; + for_each_possible_cpu(i) { pdata->ptrs[i] = kmalloc(size, GFP_KERNEL); if (!pdata->ptrs[i]) goto unwind_oom; @@ -383,11 +381,9 @@ free_percpu(const void *objp) int i; struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp); - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; + for_each_possible_cpu(i) kfree(p->ptrs[i]); - } + kfree(p); } EXPORT_SYMBOL(free_percpu); diff --git a/mm/swap_state.c b/mm/swap_state.c index d7af296..e0e1583 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -148,8 +148,7 @@ int add_to_swap(struct page * page, gfp_t gfp_mask) swp_entry_t entry; int err; - if (!PageLocked(page)) - BUG(); + BUG_ON(!PageLocked(page)); for (;;) { entry = get_swap_page(); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 729eb3e..c0504f1 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -321,8 +321,7 @@ void __vunmap(void *addr, int deallocate_pages) int i; for (i = 0; i < area->nr_pages; i++) { - if (unlikely(!area->pages[i])) - BUG(); + BUG_ON(!area->pages[i]); __free_page(area->pages[i]); } diff --git a/mm/vmscan.c b/mm/vmscan.c index acdf001..4649a63 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1328,7 +1328,7 @@ repeat: not required for correctness. So if the last cpu in a node goes away, we get changed to run anywhere: as the first one comes back, restore their cpu bindings. */ -static int __devinit cpu_callback(struct notifier_block *nfb, +static int cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { pg_data_t *pgdat; |